mirror of
https://github.com/ToruNiina/toml11.git
synced 2025-09-17 09:08:08 +08:00
1118 lines
39 KiB
C++
1118 lines
39 KiB
C++
#ifndef TOML11_PARSER_HPP
|
|
#define TOML11_PARSER_HPP
|
|
#include "result.hpp"
|
|
#include "region.hpp"
|
|
#include "combinator.hpp"
|
|
#include "lexer.hpp"
|
|
#include "types.hpp"
|
|
#include "value.hpp"
|
|
|
|
namespace toml
|
|
{
|
|
namespace detail
|
|
{
|
|
|
|
template<typename Container>
|
|
result<boolean, std::string> parse_boolean(location<Container>& loc)
|
|
{
|
|
const auto first = loc.iter();
|
|
if(const auto token = lex_boolean::invoke(loc))
|
|
{
|
|
const auto reg = token.unwrap();
|
|
if (reg.str() == "true") {return ok(true);}
|
|
else if(reg.str() == "false") {return ok(false);}
|
|
else // internal error.
|
|
{
|
|
throw toml::internal_error(format_underline(
|
|
"[error] toml::parse_boolean: internal error", reg,
|
|
"invalid token"));
|
|
}
|
|
}
|
|
loc.iter() = first; //rollback
|
|
return err(format_underline("[error] toml::parse_boolean", loc,
|
|
"token is not boolean", {"boolean is `true` or `false`"}));
|
|
}
|
|
|
|
template<typename Container>
|
|
result<integer, std::string> parse_binary_integer(location<Container>& loc)
|
|
{
|
|
const auto first = loc.iter();
|
|
if(const auto token = lex_bin_int::invoke(loc))
|
|
{
|
|
auto str = token.unwrap().str();
|
|
assert(str.size() > 2); // minimum -> 0b1
|
|
integer retval(0), base(1);
|
|
for(auto i(str.rbegin()), e(str.rend() - 2); i!=e; ++i)
|
|
{
|
|
if (*i == '1'){retval += base; base *= 2;}
|
|
else if(*i == '0'){base *= 2;}
|
|
else if(*i == '_'){/* do nothing. */}
|
|
else // internal error.
|
|
{
|
|
throw toml::internal_error(format_underline(
|
|
"[error] toml::parse_integer: internal error",
|
|
token.unwrap(), "invalid token"));
|
|
}
|
|
}
|
|
return ok(retval);
|
|
}
|
|
loc.iter() = first;
|
|
return err(format_underline("[error] toml::parse_binary_integer", loc,
|
|
"token is not binary integer", {"binary integer is like: 0b0011"}));
|
|
}
|
|
|
|
template<typename Container>
|
|
result<integer, std::string> parse_octal_integer(location<Container>& loc)
|
|
{
|
|
const auto first = loc.iter();
|
|
if(const auto token = lex_oct_int::invoke(loc))
|
|
{
|
|
auto str = token.unwrap().str();
|
|
str.erase(std::remove(str.begin(), str.end(), '_'), str.end());
|
|
str.erase(str.begin()); str.erase(str.begin()); // remove `0o` prefix
|
|
|
|
std::istringstream iss(str);
|
|
integer retval(0);
|
|
iss >> std::oct >> retval;
|
|
return ok(retval);
|
|
}
|
|
loc.iter() = first;
|
|
|
|
return err(format_underline("[error] toml::parse_octal_integer", loc,
|
|
"token is not octal integer", {"octal integer is like: 0o775"}));
|
|
}
|
|
|
|
template<typename Container>
|
|
result<integer, std::string> parse_hexadecimal_integer(location<Container>& loc)
|
|
{
|
|
const auto first = loc.iter();
|
|
if(const auto token = lex_hex_int::invoke(loc))
|
|
{
|
|
auto str = token.unwrap().str();
|
|
str.erase(std::remove(str.begin(), str.end(), '_'), str.end());
|
|
str.erase(str.begin()); str.erase(str.begin()); // remove `0x` prefix
|
|
|
|
std::istringstream iss(str);
|
|
integer retval(0);
|
|
iss >> std::hex >> retval;
|
|
return ok(retval);
|
|
}
|
|
loc.iter() = first;
|
|
return err(format_underline("[error] toml::parse_hexadecimal_integer", loc,
|
|
"token is not hex integer", {"hex integer is like: 0xC0FFEE"}));
|
|
}
|
|
|
|
template<typename Container>
|
|
result<integer, std::string> parse_integer(location<Container>& loc)
|
|
{
|
|
const auto first = loc.iter();
|
|
if(first != loc.end() && *first == '0')
|
|
{
|
|
if(const auto bin = parse_binary_integer (loc)) {return bin;}
|
|
if(const auto oct = parse_octal_integer (loc)) {return oct;}
|
|
if(const auto hex = parse_hexadecimal_integer(loc)) {return hex;}
|
|
}
|
|
|
|
if(const auto token = lex_dec_int::invoke(loc))
|
|
{
|
|
auto str = token.unwrap().str();
|
|
str.erase(std::remove(str.begin(), str.end(), '_'), str.end());
|
|
|
|
std::istringstream iss(str);
|
|
integer retval(0);
|
|
iss >> retval;
|
|
return ok(retval);
|
|
}
|
|
loc.iter() = first;
|
|
return err(format_underline("[error] toml::parse_integer", loc,
|
|
"token is not integer", {"integer is like: +42",
|
|
"hex integer is like: 0xC0FFEE", "octal integer is like: 0o775",
|
|
"binary integer is like: 0b0011"}));
|
|
}
|
|
|
|
template<typename Container>
|
|
result<floating, std::string> parse_floating(location<Container>& loc)
|
|
{
|
|
const auto first = loc.iter();
|
|
if(const auto token = lex_float::invoke(loc))
|
|
{
|
|
auto str = token.unwrap().str();
|
|
if(str == "inf" || str == "+inf")
|
|
{
|
|
if(std::numeric_limits<floating>::has_infinity)
|
|
{
|
|
return ok(std::numeric_limits<floating>::infinity());
|
|
}
|
|
else
|
|
{
|
|
throw std::domain_error("toml::parse_floating: inf value found"
|
|
" but the current environment does not support inf. Please"
|
|
" make sure that the floating-point implementation conforms"
|
|
" IEEE 754/ISO 60559 international standard.");
|
|
}
|
|
}
|
|
else if(str == "-inf")
|
|
{
|
|
if(std::numeric_limits<floating>::has_infinity)
|
|
{
|
|
return ok(-std::numeric_limits<floating>::infinity());
|
|
}
|
|
else
|
|
{
|
|
throw std::domain_error("toml::parse_floating: inf value found"
|
|
" but the current environment does not support inf. Please"
|
|
" make sure that the floating-point implementation conforms"
|
|
" IEEE 754/ISO 60559 international standard.");
|
|
}
|
|
}
|
|
else if(str == "nan" || str == "+nan")
|
|
{
|
|
if(std::numeric_limits<floating>::has_quiet_NaN)
|
|
{
|
|
return ok(std::numeric_limits<floating>::quiet_NaN());
|
|
}
|
|
else if(std::numeric_limits<floating>::has_signaling_NaN)
|
|
{
|
|
return ok(std::numeric_limits<floating>::signaling_NaN());
|
|
}
|
|
else
|
|
{
|
|
throw std::domain_error("toml::parse_floating: NaN value found"
|
|
" but the current environment does not support NaN. Please"
|
|
" make sure that the floating-point implementation conforms"
|
|
" IEEE 754/ISO 60559 international standard.");
|
|
}
|
|
}
|
|
else if(str == "-nan")
|
|
{
|
|
if(std::numeric_limits<floating>::has_quiet_NaN)
|
|
{
|
|
return ok(-std::numeric_limits<floating>::quiet_NaN());
|
|
}
|
|
else if(std::numeric_limits<floating>::has_signaling_NaN)
|
|
{
|
|
return ok(-std::numeric_limits<floating>::signaling_NaN());
|
|
}
|
|
else
|
|
{
|
|
throw std::domain_error("toml::parse_floating: NaN value found"
|
|
" but the current environment does not support NaN. Please"
|
|
" make sure that the floating-point implementation conforms"
|
|
" IEEE 754/ISO 60559 international standard.");
|
|
}
|
|
}
|
|
str.erase(std::remove(str.begin(), str.end(), '_'), str.end());
|
|
std::istringstream iss(str);
|
|
floating v(0.0);
|
|
iss >> v;
|
|
return ok(v);
|
|
}
|
|
loc.iter() = first;
|
|
return err(format_underline("[error] toml::parse_floating: ", loc,
|
|
"token is not a float", {"floating point is like: -3.14e+1"}));
|
|
}
|
|
|
|
inline std::string read_utf8_codepoint(const std::string& str)
|
|
{
|
|
std::uint_least32_t codepoint;
|
|
std::istringstream iss(str);
|
|
iss >> std::hex >> codepoint;
|
|
|
|
std::string character;
|
|
if(codepoint < 0x80) // U+0000 ... U+0079 ; just an ASCII.
|
|
{
|
|
character += static_cast<char>(codepoint);
|
|
}
|
|
else if(codepoint < 0x800) //U+0080 ... U+07FF
|
|
{
|
|
// 110yyyyx 10xxxxxx; 0x3f == 0b0011'1111
|
|
character += static_cast<unsigned char>(0xC0| codepoint >> 6);
|
|
character += static_cast<unsigned char>(0x80|(codepoint & 0x3F));
|
|
}
|
|
else if(codepoint < 0x10000) // U+0800...U+FFFF
|
|
{
|
|
// 1110yyyy 10yxxxxx 10xxxxxx
|
|
character += static_cast<unsigned char>(0xE0| codepoint >> 12);
|
|
character += static_cast<unsigned char>(0x80|(codepoint >> 6 & 0x3F));
|
|
character += static_cast<unsigned char>(0x80|(codepoint & 0x3F));
|
|
}
|
|
else if(codepoint < 0x200000) // U+10000 ... U+1FFFFF
|
|
{
|
|
if(0x10FFFF < codepoint) // out of Unicode region
|
|
{
|
|
std::cerr << "WARNING: input codepoint " << str << " is too large "
|
|
<< "to decode as a unicode character. It should be in "
|
|
<< "range [0x00 .. 0x10FFFF]. The result may not be able "
|
|
<< "to be rendered to your screen." << std::endl;
|
|
}
|
|
// 11110yyy 10yyxxxx 10xxxxxx 10xxxxxx
|
|
character += static_cast<unsigned char>(0xF0| codepoint >> 18);
|
|
character += static_cast<unsigned char>(0x80|(codepoint >> 12 & 0x3F));
|
|
character += static_cast<unsigned char>(0x80|(codepoint >> 6 & 0x3F));
|
|
character += static_cast<unsigned char>(0x80|(codepoint & 0x3F));
|
|
}
|
|
else // out of UTF-8 region
|
|
{
|
|
throw std::range_error("toml::read_utf8_codepoint: input codepoint `" +
|
|
str + "` is too large to decode as utf-8. It should be in range"
|
|
" 0x00 ... 0x1FFFFF.");
|
|
}
|
|
return character;
|
|
}
|
|
|
|
template<typename Container>
|
|
result<std::string, std::string> parse_escape_sequence(location<Container>& loc)
|
|
{
|
|
const auto first = loc.iter();
|
|
if(*first != '\\')
|
|
{
|
|
return err(format_underline("[error]: "
|
|
"toml::parse_escape_sequence: location does not points \"\\\"",
|
|
loc, "should be \"\\\""));
|
|
}
|
|
++loc.iter();
|
|
switch(*loc.iter())
|
|
{
|
|
case '\\':{++loc.iter(); return ok(std::string("\\"));}
|
|
case '"' :{++loc.iter(); return ok(std::string("\""));}
|
|
case 'b' :{++loc.iter(); return ok(std::string("\b"));}
|
|
case 't' :{++loc.iter(); return ok(std::string("\t"));}
|
|
case 'n' :{++loc.iter(); return ok(std::string("\n"));}
|
|
case 'f' :{++loc.iter(); return ok(std::string("\f"));}
|
|
case 'r' :{++loc.iter(); return ok(std::string("\r"));}
|
|
case 'u' :
|
|
{
|
|
++loc.iter();
|
|
if(const auto token = repeat<lex_hex_dig, exactly<4>>::invoke(loc))
|
|
{
|
|
return ok(read_utf8_codepoint(token.unwrap().str()));
|
|
}
|
|
else
|
|
{
|
|
return err(format_underline("[error] parse_escape_sequence: "
|
|
"invalid token found in UTF-8 codepoint uXXXX.",
|
|
loc, token.unwrap_err()));
|
|
}
|
|
}
|
|
case 'U':
|
|
{
|
|
++loc.iter();
|
|
if(const auto token = repeat<lex_hex_dig, exactly<8>>::invoke(loc))
|
|
{
|
|
return ok(read_utf8_codepoint(token.unwrap().str()));
|
|
}
|
|
else
|
|
{
|
|
return err(format_underline("[error] parse_escape_sequence: "
|
|
"invalid token found in UTF-8 codepoint Uxxxxxxxx",
|
|
loc, token.unwrap_err()));
|
|
}
|
|
}
|
|
}
|
|
|
|
const auto msg = format_underline("[error] parse_escape_sequence: "
|
|
"unknown escape sequence appeared.", loc, "escape sequence is one of"
|
|
" \\, \", b, t, n, f, r, uxxxx, Uxxxxxxxx", {"if you want to write "
|
|
"backslash as just one backslash, use literal string like:",
|
|
"regex = '<\\i\\c*\\s*>'"});
|
|
loc.iter() = first;
|
|
return err(msg);
|
|
}
|
|
|
|
template<typename Container>
|
|
result<toml::string, std::string>
|
|
parse_ml_basic_string(location<Container>& loc)
|
|
{
|
|
const auto first = loc.iter();
|
|
if(const auto token = lex_ml_basic_string::invoke(loc))
|
|
{
|
|
location<std::string> inner_loc(loc.name(), token.unwrap().str());
|
|
|
|
std::string retval;
|
|
retval.reserve(inner_loc.source()->size());
|
|
|
|
auto delim = lex_ml_basic_string_delim::invoke(inner_loc);
|
|
if(!delim)
|
|
{
|
|
throw internal_error(format_underline("[error] "
|
|
"parse_ml_basic_string: invalid token",
|
|
inner_loc, "should be \"\"\""));
|
|
}
|
|
// immediate newline is ignored (if exists)
|
|
/* discard return value */ lex_newline::invoke(inner_loc);
|
|
|
|
delim = err("tmp");
|
|
while(!delim)
|
|
{
|
|
using lex_unescaped_seq = repeat<
|
|
either<lex_ml_basic_unescaped, lex_newline>, unlimited>;
|
|
if(auto unescaped = lex_unescaped_seq::invoke(inner_loc))
|
|
{
|
|
retval += unescaped.unwrap().str();
|
|
}
|
|
if(auto escaped = parse_escape_sequence(inner_loc))
|
|
{
|
|
retval += escaped.unwrap();
|
|
}
|
|
if(auto esc_nl = lex_ml_basic_escaped_newline::invoke(inner_loc))
|
|
{
|
|
// ignore newline after escape until next non-ws char
|
|
}
|
|
if(inner_loc.iter() == inner_loc.end())
|
|
{
|
|
throw internal_error(format_underline("[error] "
|
|
"parse_ml_basic_string: unexpected end of region",
|
|
inner_loc, "not sufficient token"));
|
|
}
|
|
delim = lex_ml_basic_string_delim::invoke(inner_loc);
|
|
}
|
|
return ok(toml::string(retval));
|
|
}
|
|
else
|
|
{
|
|
loc.iter() = first;
|
|
return err(token.unwrap_err());
|
|
}
|
|
}
|
|
|
|
template<typename Container>
|
|
result<toml::string, std::string> parse_basic_string(location<Container>& loc)
|
|
{
|
|
const auto first = loc.iter();
|
|
if(const auto token = lex_basic_string::invoke(loc))
|
|
{
|
|
location<std::string> inner_loc(loc.name(), token.unwrap().str());
|
|
|
|
auto quot = lex_quotation_mark::invoke(inner_loc);
|
|
if(!quot)
|
|
{
|
|
throw internal_error(format_underline("[error] parse_basic_string: "
|
|
"invalid token", inner_loc, "should be \""));
|
|
}
|
|
|
|
std::string retval;
|
|
retval.reserve(inner_loc.source()->size());
|
|
|
|
quot = err("tmp");
|
|
while(!quot)
|
|
{
|
|
using lex_unescaped_seq = repeat<lex_basic_unescaped, unlimited>;
|
|
if(auto unescaped = lex_unescaped_seq::invoke(inner_loc))
|
|
{
|
|
retval += unescaped.unwrap().str();
|
|
}
|
|
if(auto escaped = parse_escape_sequence(inner_loc))
|
|
{
|
|
retval += escaped.unwrap();
|
|
}
|
|
if(inner_loc.iter() == inner_loc.end())
|
|
{
|
|
throw internal_error(format_underline("[error] "
|
|
"parse_ml_basic_string: unexpected end of region",
|
|
inner_loc, "not sufficient token"));
|
|
}
|
|
quot = lex_quotation_mark::invoke(inner_loc);
|
|
}
|
|
return ok(toml::string(retval));
|
|
}
|
|
else
|
|
{
|
|
loc.iter() = first; // rollback
|
|
return err(token.unwrap_err());
|
|
}
|
|
}
|
|
|
|
template<typename Container>
|
|
result<toml::string, std::string>
|
|
parse_ml_literal_string(location<Container>& loc)
|
|
{
|
|
const auto first = loc.iter();
|
|
if(const auto token = lex_ml_literal_string::invoke(loc))
|
|
{
|
|
location<std::string> inner_loc(loc.name(), token.unwrap().str());
|
|
|
|
const auto open = lex_ml_literal_string_delim::invoke(inner_loc);
|
|
if(!open)
|
|
{
|
|
throw internal_error(format_underline("[error] "
|
|
"parse_ml_literal_string: invalid token",
|
|
inner_loc, "should be '''"));
|
|
}
|
|
// immediate newline is ignored (if exists)
|
|
/* discard return value */ lex_newline::invoke(inner_loc);
|
|
|
|
const auto body = lex_ml_literal_body::invoke(inner_loc);
|
|
|
|
const auto close = lex_ml_literal_string_delim::invoke(inner_loc);
|
|
if(!close)
|
|
{
|
|
throw internal_error(format_underline("[error] "
|
|
"parse_ml_literal_string: invalid token",
|
|
inner_loc, "should be '''"));
|
|
}
|
|
return ok(toml::string(body.unwrap().str()));
|
|
}
|
|
else
|
|
{
|
|
loc.iter() = first; // rollback
|
|
return err(token.unwrap_err());
|
|
}
|
|
}
|
|
|
|
template<typename Container>
|
|
result<toml::string, std::string> parse_literal_string(location<Container>& loc)
|
|
{
|
|
const auto first = loc.iter();
|
|
if(const auto token = lex_literal_string::invoke(loc))
|
|
{
|
|
location<std::string> inner_loc(loc.name(), token.unwrap().str());
|
|
|
|
const auto open = lex_apostrophe::invoke(inner_loc);
|
|
if(!open)
|
|
{
|
|
throw internal_error(format_underline("[error] "
|
|
"parse_literal_string: invalid token",
|
|
inner_loc, "should be '"));
|
|
}
|
|
|
|
const auto body = repeat<lex_literal_char, unlimited>::invoke(inner_loc);
|
|
|
|
const auto close = lex_apostrophe::invoke(inner_loc);
|
|
if(!close)
|
|
{
|
|
throw internal_error(format_underline("[error] "
|
|
"parse_literal_string: invalid token",
|
|
inner_loc, "should be '"));
|
|
}
|
|
return ok(toml::string(body.unwrap().str()));
|
|
}
|
|
else
|
|
{
|
|
loc.iter() = first; // rollback
|
|
return err(token.unwrap_err());
|
|
}
|
|
}
|
|
|
|
template<typename Container>
|
|
result<toml::string, std::string> parse_string(location<Container>& loc)
|
|
{
|
|
if(const auto rslt = parse_ml_basic_string(loc))
|
|
{
|
|
return ok(rslt.unwrap());
|
|
}
|
|
if(const auto rslt = parse_ml_literal_string(loc))
|
|
{
|
|
return ok(rslt.unwrap());
|
|
}
|
|
if(const auto rslt = parse_basic_string(loc))
|
|
{
|
|
return ok(rslt.unwrap());
|
|
}
|
|
if(const auto rslt = parse_literal_string(loc))
|
|
{
|
|
return ok(rslt.unwrap());
|
|
}
|
|
return err(format_underline("[error] toml::parse_string: not a string",
|
|
loc, "not a string"));
|
|
}
|
|
|
|
template<typename Container>
|
|
result<local_date, std::string> parse_local_date(location<Container>& loc)
|
|
{
|
|
const auto first = loc.iter();
|
|
if(const auto token = lex_local_date::invoke(loc))
|
|
{
|
|
location<std::string> inner_loc(loc.name(), token.unwrap().str());
|
|
|
|
const auto y = lex_date_fullyear::invoke(inner_loc);
|
|
if(!y || inner_loc.iter() == inner_loc.end() || *inner_loc.iter() != '-')
|
|
{
|
|
throw internal_error(format_underline("[error]: "
|
|
"toml::parse_inner_local_date: invalid year format",
|
|
inner_loc, y.map_err_or_else([](const std::string& msg) {
|
|
return msg;
|
|
}, "should be `-`")));
|
|
}
|
|
++inner_loc.iter();
|
|
const auto m = lex_date_month::invoke(inner_loc);
|
|
if(!m || inner_loc.iter() == inner_loc.end() || *inner_loc.iter() != '-')
|
|
{
|
|
throw internal_error(format_underline("[error]: "
|
|
"toml::parse_local_date: invalid month format",
|
|
inner_loc, m.map_err_or_else([](const std::string& msg) {
|
|
return msg;
|
|
}, "should be `-`")));
|
|
}
|
|
++inner_loc.iter();
|
|
const auto d = lex_date_mday::invoke(inner_loc);
|
|
if(!d)
|
|
{
|
|
throw internal_error(format_underline("[error]: "
|
|
"toml::parse_local_date: invalid day format",
|
|
inner_loc, d.unwrap_err()));
|
|
}
|
|
return ok(local_date(
|
|
static_cast<std::int16_t>(from_string<int>(y.unwrap().str(), 0)),
|
|
static_cast<month_t>(
|
|
static_cast<std::int8_t>(from_string<int>(m.unwrap().str(), 0)-1)),
|
|
static_cast<std::int8_t>(from_string<int>(d.unwrap().str(), 0))));
|
|
}
|
|
else
|
|
{
|
|
auto msg = format_underline("[error]: toml::parse_local_date: "
|
|
"invalid format", loc, token.unwrap_err(),
|
|
{"local date is like: 1979-05-27"});
|
|
loc.iter() = first;
|
|
return err(std::move(msg));
|
|
}
|
|
}
|
|
|
|
template<typename Container>
|
|
result<local_time, std::string> parse_local_time(location<Container>& loc)
|
|
{
|
|
const auto first = loc.iter();
|
|
if(const auto token = lex_local_time::invoke(loc))
|
|
{
|
|
location<std::string> inner_loc(loc.name(), token.unwrap().str());
|
|
|
|
const auto h = lex_time_hour::invoke(inner_loc);
|
|
if(!h || inner_loc.iter() == inner_loc.end() || *inner_loc.iter() != ':')
|
|
{
|
|
throw internal_error(format_underline("[error]: "
|
|
"toml::parse_local_time: invalid year format",
|
|
inner_loc, h.map_err_or_else([](const std::string& msg) {
|
|
return msg;
|
|
}, "should be `:`")));
|
|
}
|
|
++inner_loc.iter();
|
|
const auto m = lex_time_minute::invoke(inner_loc);
|
|
if(!m || inner_loc.iter() == inner_loc.end() || *inner_loc.iter() != ':')
|
|
{
|
|
throw internal_error(format_underline("[error]: "
|
|
"toml::parse_local_time: invalid month format",
|
|
inner_loc, m.map_err_or_else([](const std::string& msg) {
|
|
return msg;
|
|
}, "should be `:`")));
|
|
}
|
|
++inner_loc.iter();
|
|
const auto s = lex_time_second::invoke(inner_loc);
|
|
if(!s)
|
|
{
|
|
throw internal_error(format_underline("[error]: "
|
|
"toml::parse_local_time: invalid second format",
|
|
inner_loc, s.unwrap_err()));
|
|
}
|
|
local_time time(
|
|
static_cast<std::int8_t>(from_string<int>(h.unwrap().str(), 0)),
|
|
static_cast<std::int8_t>(from_string<int>(m.unwrap().str(), 0)),
|
|
static_cast<std::int8_t>(from_string<int>(s.unwrap().str(), 0)), 0, 0);
|
|
|
|
const auto before_secfrac = inner_loc.iter();
|
|
if(const auto secfrac = lex_time_secfrac::invoke(inner_loc))
|
|
{
|
|
auto sf = secfrac.unwrap().str();
|
|
sf.erase(sf.begin()); // sf.front() == '.'
|
|
switch(sf.size() % 3)
|
|
{
|
|
case 2: sf += '0'; break;
|
|
case 1: sf += "00"; break;
|
|
case 0: break;
|
|
default: break;
|
|
}
|
|
if(sf.size() >= 6)
|
|
{
|
|
time.millisecond = from_string<std::int16_t>(sf.substr(0, 3), 0);
|
|
time.microsecond = from_string<std::int16_t>(sf.substr(3, 3), 0);
|
|
}
|
|
else if(sf.size() >= 3)
|
|
{
|
|
time.millisecond = from_string<std::int16_t>(sf, 0);
|
|
time.microsecond = 0;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if(before_secfrac != loc.iter())
|
|
{
|
|
throw internal_error(format_underline("[error]: "
|
|
"toml::parse_local_time: invalid subsecond format",
|
|
inner_loc, secfrac.unwrap_err()));
|
|
}
|
|
}
|
|
return ok(time);
|
|
}
|
|
else
|
|
{
|
|
auto msg = format_underline("[error]: toml::parse_local_time: "
|
|
"invalid format", loc, token.unwrap_err(),
|
|
{"local time is like: 00:32:00.999999"});
|
|
loc.iter() = first;
|
|
return err(std::move(msg));
|
|
}
|
|
}
|
|
|
|
template<typename Container>
|
|
result<local_datetime, std::string>
|
|
parse_local_datetime(location<Container>& loc)
|
|
{
|
|
const auto first = loc.iter();
|
|
if(const auto token = lex_local_date_time::invoke(loc))
|
|
{
|
|
location<std::string> inner_loc(loc.name(), token.unwrap().str());
|
|
const auto date = parse_local_date(inner_loc);
|
|
if(!date || inner_loc.iter() == inner_loc.end())
|
|
{
|
|
throw internal_error(format_underline("[error]: "
|
|
"toml::parse_local_datetime: invalid datetime format",
|
|
inner_loc, date.map_err_or_else([](const std::string& msg){
|
|
return msg;
|
|
}, "date, not datetime")));
|
|
}
|
|
const char delim = *(inner_loc.iter()++);
|
|
if(delim != 'T' && delim != 't' && delim != ' ')
|
|
{
|
|
throw internal_error(format_underline("[error]: "
|
|
"toml::parse_local_datetime: invalid datetime format",
|
|
inner_loc, "should be `T` or ` ` (space)"));
|
|
}
|
|
const auto time = parse_local_time(inner_loc);
|
|
if(!time)
|
|
{
|
|
throw internal_error(format_underline("[error]: "
|
|
"toml::parse_local_datetime: invalid datetime format",
|
|
inner_loc, "invalid time fomrat"));
|
|
}
|
|
return ok(local_datetime(date.unwrap(), time.unwrap()));
|
|
}
|
|
else
|
|
{
|
|
auto msg = format_underline("[error]: toml::parse_local_datetime: "
|
|
"invalid format", loc, token.unwrap_err(),
|
|
{"local datetime is like: 1979-05-27T00:32:00.999999"});
|
|
loc.iter() = first;
|
|
return err(std::move(msg));
|
|
}
|
|
}
|
|
|
|
template<typename Container>
|
|
result<offset_datetime, std::string>
|
|
parse_offset_datetime(location<Container>& loc)
|
|
{
|
|
const auto first = loc.iter();
|
|
if(const auto token = lex_offset_date_time::invoke(loc))
|
|
{
|
|
location<std::string> inner_loc(loc.name(), token.unwrap().str());
|
|
const auto datetime = parse_local_datetime(inner_loc);
|
|
if(!datetime || inner_loc.iter() == inner_loc.end())
|
|
{
|
|
throw internal_error(format_underline("[error]: "
|
|
"toml::parse_offset_datetime: invalid datetime format",
|
|
inner_loc, datetime.map_err_or_else([](const std::string& msg){
|
|
return msg;
|
|
}, "date, not datetime")));
|
|
}
|
|
time_offset offset(0, 0);
|
|
if(const auto ofs = lex_time_numoffset::invoke(inner_loc))
|
|
{
|
|
const auto str = ofs.unwrap().str();
|
|
if(str.front() == '+')
|
|
{
|
|
offset.hour = static_cast<std::int8_t>(from_string<int>(str.substr(1,2), 0));
|
|
offset.minute = static_cast<std::int8_t>(from_string<int>(str.substr(4,2), 0));
|
|
}
|
|
else
|
|
{
|
|
offset.hour = -static_cast<std::int8_t>(from_string<int>(str.substr(1,2), 0));
|
|
offset.minute = -static_cast<std::int8_t>(from_string<int>(str.substr(4,2), 0));
|
|
}
|
|
}
|
|
else if(*inner_loc.iter() != 'Z' && *inner_loc.iter() != 'z')
|
|
{
|
|
throw internal_error(format_underline("[error]: "
|
|
"toml::parse_offset_datetime: invalid datetime format",
|
|
inner_loc, "should be `Z` or `+HH:MM`"));
|
|
}
|
|
return ok(offset_datetime(datetime.unwrap(), offset));
|
|
}
|
|
else
|
|
{
|
|
auto msg = format_underline("[error]: toml::parse_offset_datetime: "
|
|
"invalid format", loc, token.unwrap_err(),
|
|
{"offset datetime is like: 1979-05-27T00:32:00-07:00",
|
|
"or in UTC (w/o offset) : 1979-05-27T00:32:00Z"});
|
|
loc.iter() = first;
|
|
return err(std::move(msg));
|
|
}
|
|
}
|
|
|
|
template<typename Container>
|
|
result<key, std::string> parse_simple_key(location<Container>& loc)
|
|
{
|
|
if(const auto bstr = parse_basic_string(loc))
|
|
{
|
|
return ok(bstr.unwrap().str);
|
|
}
|
|
if(const auto lstr = parse_literal_string(loc))
|
|
{
|
|
return ok(lstr.unwrap().str);
|
|
}
|
|
if(const auto bare = lex_unquoted_key::invoke(loc))
|
|
{
|
|
return ok(bare.unwrap().str());
|
|
}
|
|
return err(format_underline("[error] toml::parse_simple_key: "
|
|
"the next token is not a simple key", loc, "not a key"));
|
|
}
|
|
|
|
// dotted key become vector of keys
|
|
template<typename Container>
|
|
result<std::vector<key>, std::string> parse_key(location<Container>& loc)
|
|
{
|
|
const auto first = loc.iter();
|
|
// dotted key -> foo.bar.baz
|
|
if(const auto token = lex_dotted_key::invoke(loc))
|
|
{
|
|
location<std::string> inner_loc(loc.name(), token.unwrap().str());
|
|
std::vector<key> keys;
|
|
|
|
while(inner_loc.iter() != inner_loc.end())
|
|
{
|
|
if(const auto k = parse_simple_key(inner_loc))
|
|
{
|
|
keys.push_back(k.unwrap());
|
|
}
|
|
else
|
|
{
|
|
throw internal_error(format_underline("[error] "
|
|
"toml::detail::parse_key: dotted key contains invalid key",
|
|
inner_loc, k.unwrap_err()));
|
|
}
|
|
|
|
if(inner_loc.iter() == inner_loc.end())
|
|
{
|
|
break;
|
|
}
|
|
else if(*inner_loc.iter() == '.')
|
|
{
|
|
++inner_loc.iter(); // to skip `.`
|
|
}
|
|
else
|
|
{
|
|
throw internal_error(format_underline("[error] toml::parse_key: "
|
|
"dotted key contains invalid key ", inner_loc,
|
|
"should be `.`"));
|
|
}
|
|
}
|
|
return ok(keys);
|
|
}
|
|
loc.iter() = first;
|
|
|
|
// simple key -> foo
|
|
if(const auto smpl = parse_simple_key(loc))
|
|
{
|
|
return ok(std::vector<key>(1, smpl.unwrap()));
|
|
}
|
|
return err(format_underline("toml::parse_key: the next token is not a key",
|
|
loc, "not a key"));
|
|
}
|
|
|
|
// forward-decl to implement parse_array and parse_table
|
|
template<typename Container>
|
|
result<value, std::string> parse_value(location<Container>&);
|
|
|
|
template<typename Container>
|
|
result<array, std::string> parse_array(location<Container>& loc)
|
|
{
|
|
const auto first = loc.iter();
|
|
if(loc.iter() == loc.end())
|
|
{
|
|
return err("[error] toml::parse_array: input is empty");
|
|
}
|
|
if(*loc.iter() != '[')
|
|
{
|
|
return err(format_underline("[error] toml::parse_array: "
|
|
"token is not an array", loc, "should be ["));
|
|
}
|
|
++loc.iter();
|
|
|
|
using lex_ws_comment_newline = repeat<
|
|
either<lex_wschar, lex_newline, lex_comment>, unlimited>;
|
|
|
|
array retval;
|
|
while(loc.iter() != loc.end())
|
|
{
|
|
lex_ws_comment_newline::invoke(loc); // skip
|
|
|
|
if(loc.iter() != loc.end() && *loc.iter() == ']')
|
|
{
|
|
++loc.iter(); // skip ']'
|
|
return ok(retval);
|
|
}
|
|
|
|
if(auto val = parse_value(loc))
|
|
{
|
|
retval.push_back(std::move(val.unwrap()));
|
|
}
|
|
else
|
|
{
|
|
return err("[error] toml::parse_array: while reading an element of "
|
|
"an array\n" + val.unwrap_err());
|
|
}
|
|
|
|
using lex_array_separator = sequence<maybe<lex_ws>, character<','>>;
|
|
const auto sp = lex_array_separator::invoke(loc);
|
|
if(!sp)
|
|
{
|
|
lex_ws_comment_newline::invoke(loc);
|
|
if(loc.iter() != loc.end() && *loc.iter() == ']')
|
|
{
|
|
++loc.iter(); // skip ']'
|
|
return ok(retval);
|
|
}
|
|
else
|
|
{
|
|
return err(format_underline("[error] toml::parse_array: "
|
|
"missing array separator `,`", loc, "should be `,`"));
|
|
}
|
|
}
|
|
}
|
|
loc.iter() = first;
|
|
return err(format_underline("[error] toml::parse_array: "
|
|
"array did not closed by `]`", loc, "should be closed"));
|
|
}
|
|
|
|
template<typename Container>
|
|
result<std::pair<std::vector<key>, value>, std::string>
|
|
parse_key_value_pair(location<Container>& loc)
|
|
{
|
|
const auto first = loc.iter();
|
|
auto key = parse_key(loc);
|
|
if(!key)
|
|
{
|
|
loc.iter() = first;
|
|
return err("[error] toml::parse_key_value_pair: while reading key-value"
|
|
" pair" + key.unwrap_err());
|
|
}
|
|
|
|
const auto kvsp = lex_keyval_sep::invoke(loc);
|
|
if(!kvsp)
|
|
{
|
|
const auto msg = format_underline("[error] toml::parse_key_value_pair: "
|
|
"missing key-value separator `=`", loc, "should be `=`");
|
|
loc.iter() = first;
|
|
return err(msg);
|
|
}
|
|
|
|
auto val = parse_value(loc);
|
|
if(!val)
|
|
{
|
|
loc.iter() = first;
|
|
return err("[error] toml::parse_key_value_pair: while reading key-value"
|
|
" pair" + val.unwrap_err());
|
|
}
|
|
return ok(std::make_pair(std::move(key.unwrap()), std::move(val.unwrap())));
|
|
}
|
|
|
|
// for error messages.
|
|
template<typename InputIterator>
|
|
std::string format_dotted_keys(InputIterator first, const InputIterator last)
|
|
{
|
|
static_assert(std::is_same<key,
|
|
typename std::iterator_traits<InputIterator>::value_type>::value,"");
|
|
|
|
std::string retval(*first++);
|
|
for(; first != last; ++first)
|
|
{
|
|
retval += '.';
|
|
retval += *first;
|
|
}
|
|
return retval;
|
|
}
|
|
|
|
template<typename InputIterator>
|
|
result<bool, std::string>
|
|
insert_nested_key(table& root, const toml::value& v,
|
|
InputIterator iter, const InputIterator last,
|
|
const bool is_array_of_table = false)
|
|
{
|
|
static_assert(std::is_same<key,
|
|
typename std::iterator_traits<InputIterator>::value_type>::value,"");
|
|
|
|
const auto first = iter;
|
|
assert(iter != last);
|
|
|
|
table* tab = std::addressof(root);
|
|
for(; iter != last; ++iter) // search recursively
|
|
{
|
|
const key& k = *iter;
|
|
if(std::next(iter) == last) // k is the last key
|
|
{
|
|
// XXX if the value is array-of-tables, there can be several
|
|
// tables that are in the same array. in that case, we need to
|
|
// find the last element and insert it to there.
|
|
if(is_array_of_table)
|
|
{
|
|
if(tab->count(k) == 1) // there is already an array of table
|
|
{
|
|
if(!(tab->at(k).is(value_t::Array)))
|
|
{
|
|
throw syntax_error("toml::detail::insert_nested_key: "
|
|
"target is not an array of table: " +
|
|
format_dotted_keys(first, last));
|
|
}
|
|
array& a = tab->at(k).template cast<toml::value_t::Array>();
|
|
if(!(a.front().is(value_t::Table)))
|
|
{
|
|
throw syntax_error("toml::detail::insert_nested_key: "
|
|
"target is not an array of table: " +
|
|
format_dotted_keys(first, last));
|
|
}
|
|
a.push_back(v);
|
|
return ok(true);
|
|
}
|
|
else // if not, we need to create the array of table
|
|
{
|
|
array aot(1, v); // array having one table
|
|
tab->insert(std::make_pair(k, value(aot)));
|
|
return ok(true);
|
|
}
|
|
}
|
|
if(tab->count(k) == 1)
|
|
{
|
|
throw syntax_error("[error] toml::detail::insert_nested_key: "
|
|
"while inserting value to table: value already exists. " +
|
|
format_dotted_keys(first, last));
|
|
}
|
|
tab->insert(std::make_pair(k, v));
|
|
return ok(true);
|
|
}
|
|
else
|
|
{
|
|
// if there is no corresponding value, insert it first.
|
|
if(tab->count(k) == 0) {(*tab)[k] = table{};}
|
|
|
|
// type checking...
|
|
if(tab->at(k).is(value_t::Table))
|
|
{
|
|
tab = std::addressof((*tab)[k].template cast<value_t::Table>());
|
|
}
|
|
else if(tab->at(k).is(value_t::Array)) // array-of-table case
|
|
{
|
|
array& a = (*tab)[k].template cast<value_t::Array>();
|
|
if(!a.back().is(value_t::Table))
|
|
{
|
|
throw syntax_error("toml::detail::insert_nested_key: value "
|
|
"is not a table but an array: " +
|
|
format_dotted_keys(first, last));
|
|
}
|
|
tab = std::addressof(a.back().template cast<value_t::Table>());
|
|
}
|
|
else
|
|
{
|
|
throw syntax_error("toml::detail::insert_nested_key: value "
|
|
"is not a table but an array: " +
|
|
format_dotted_keys(first, last));
|
|
}
|
|
}
|
|
}
|
|
return err(std::string("toml::detail::insert_nested_key: never reach here"));
|
|
}
|
|
|
|
template<typename Container>
|
|
result<table, std::string> parse_inline_table(location<Container>& loc)
|
|
{
|
|
const auto first = loc.iter();
|
|
table retval;
|
|
if(!(loc.iter() != loc.end() && *loc.iter() == '{'))
|
|
{
|
|
return err(format_underline("[error] toml::parse_inline_table: "
|
|
"the next token is not an inline table", loc, "not `{`."));
|
|
}
|
|
++loc.iter();
|
|
while(loc.iter() != loc.end())
|
|
{
|
|
maybe<lex_ws>::invoke(loc);
|
|
if(loc.iter() != loc.end() && *loc.iter() == '}')
|
|
{
|
|
++loc.iter(); // skip `}`
|
|
return ok(retval);
|
|
}
|
|
|
|
const auto kv_r = parse_key_value_pair(loc);
|
|
if(!kv_r)
|
|
{
|
|
return err(kv_r.unwrap_err());
|
|
}
|
|
const std::vector<key>& keys = kv_r.unwrap().first;
|
|
const value& val = kv_r.unwrap().second;
|
|
|
|
const auto inserted =
|
|
insert_nested_key(retval, val, keys.begin(), keys.end());
|
|
if(!inserted)
|
|
{
|
|
throw internal_error("[error] toml::parse_inline_table: "
|
|
"failed to insert value into table: " + inserted.unwrap_err());
|
|
}
|
|
|
|
using lex_table_separator = sequence<maybe<lex_ws>, character<','>>;
|
|
const auto sp = lex_table_separator::invoke(loc);
|
|
if(!sp)
|
|
{
|
|
maybe<lex_ws>::invoke(loc);
|
|
if(loc.iter() != loc.end() && *loc.iter() == '}')
|
|
{
|
|
++loc.iter(); // skip `}`
|
|
return ok(retval);
|
|
}
|
|
else
|
|
{
|
|
return err(format_underline("[error] toml:::parse_inline_table:"
|
|
" missing table separator `,` ", loc, "should be `,`"));
|
|
}
|
|
}
|
|
}
|
|
loc.iter() = first;
|
|
return err(format_underline("[error] toml::parse_inline_table: "
|
|
"inline table did not closed by `}`", loc, "should be closed"));
|
|
}
|
|
|
|
template<typename Container>
|
|
result<value, std::string> parse_value(location<Container>& loc)
|
|
{
|
|
const auto first = loc.iter();
|
|
if(first == loc.end())
|
|
{
|
|
return err(std::string("toml::parse_value: input is empty"));
|
|
}
|
|
|
|
std::vector<std::string> helps;
|
|
if(auto r = parse_string(loc)) {return ok(value(r.unwrap()));}
|
|
else {helps.push_back(r.unwrap_err());}
|
|
if(auto r = parse_array(loc)) {return ok(value(r.unwrap()));}
|
|
else {helps.push_back(r.unwrap_err());}
|
|
if(auto r = parse_inline_table(loc)) {return ok(value(r.unwrap()));}
|
|
else {helps.push_back(r.unwrap_err());}
|
|
if(auto r = parse_boolean(loc)) {return ok(value(r.unwrap()));}
|
|
else {helps.push_back(r.unwrap_err());}
|
|
if(auto r = parse_offset_datetime(loc)) {return ok(value(r.unwrap()));}
|
|
else {helps.push_back(r.unwrap_err());}
|
|
if(auto r = parse_local_datetime(loc)) {return ok(value(r.unwrap()));}
|
|
else {helps.push_back(r.unwrap_err());}
|
|
if(auto r = parse_local_date(loc)) {return ok(value(r.unwrap()));}
|
|
else {helps.push_back(r.unwrap_err());}
|
|
if(auto r = parse_local_time(loc)) {return ok(value(r.unwrap()));}
|
|
else {helps.push_back(r.unwrap_err());}
|
|
if(auto r = parse_floating(loc)) {return ok(value(r.unwrap()));}
|
|
else {helps.push_back(r.unwrap_err());}
|
|
if(auto r = parse_integer(loc)) {return ok(value(r.unwrap()));}
|
|
else {helps.push_back(r.unwrap_err());}
|
|
const auto msg = format_underline("[error] toml::parse_value: "
|
|
"unknown token appeared", loc, "unknown", std::move(helps));
|
|
loc.iter() = first;
|
|
return err(msg);
|
|
}
|
|
|
|
} // detail
|
|
} // toml
|
|
#endif// TOML11_PARSER_HPP
|