diff --git a/toml/combinator.hpp b/toml/combinator.hpp new file mode 100644 index 0000000..a81eeed --- /dev/null +++ b/toml/combinator.hpp @@ -0,0 +1,354 @@ +#ifndef TOML11_COMBINATOR_HPP +#define TOML11_COMBINATOR_HPP +#include "traits.hpp" +#include "result.hpp" +#include "utility.hpp" +#include "region.hpp" +#include +#include +#include +#include +#include + +// they scans characters and returns region if it matches to the condition. +// when they fail, it does not change the location. +// in lexer.hpp, these are used. + +namespace toml +{ +namespace detail +{ + +// to output character as an error message. +inline std::string show_char(const char c) +{ + if(std::isgraph(c)) + { + return std::string(1, c); + } + else + { + std::ostringstream oss; + oss << std::hex << std::setfill('0') << std::setw(2) << "0x" + << static_cast(c); + return oss.str(); + } +} + +template +struct character +{ + static constexpr char target = C; + + template + static result, std::string> invoke(location& loc) + { + static_assert(std::is_same::value, + "internal error: container::value_type should be `char`."); + + if(loc.iter() == loc.end()) {return err("empty input");} + const auto first = loc.iter(); + + const char c = *(loc.iter()); + if(c != target) + { + return err(concat_to_string("expected '", show_char(target), + "' but got '", show_char(c), "'.")); + } + ++(loc.iter()); // update location + + return ok(region(loc, first, loc.iter())); + } + + static std::string pattern() {return show_char(target);} +}; +template +constexpr char character::target; + +// closed interval [Low, Up]. both Low and Up are included. +template +struct in_range +{ + // assuming ascii part of UTF-8... + static_assert(Low <= Up, "lower bound should be less than upper bound."); + + static constexpr char upper = Up; + static constexpr char lower = Low; + + template + static result, std::string> invoke(location& loc) + { + static_assert(std::is_same::value, + "internal error: container::value_type should be `char`."); + + if(loc.iter() == loc.end()) {return err("empty input");} + const auto first = loc.iter(); + + const char c = *(loc.iter()); + if(c < lower || upper < c) + { + return err(concat_to_string("expected character in range " + "[", show_char(lower), ", ", show_char(upper), "] but got ", + "'", show_char(c), "'.")); + } + + ++(loc.iter()); + return ok(region(loc, first, loc.iter())); + } + + static std::string pattern() + { + return concat_to_string("[",show_char(lower),"-",show_char(upper),"]"); + } +}; +template constexpr char in_range::upper; +template constexpr char in_range::lower; + +// keep iterator if `Combinator` matches. otherwise, increment `iter` by 1 char. +// for detecting invalid characters, like control sequences in toml string. +template +struct exclude +{ + template + static result, std::string> invoke(location& loc) + { + static_assert(std::is_same::value, + "internal error: container::value_type should be `char`."); + + if(loc.iter() == loc.end()) {return err("empty input");} + auto first = loc.iter(); + + auto rslt = Combinator::invoke(loc); + if(rslt.is_ok()) + { + loc.iter() = first; // rollback + return err(concat_to_string( + "invalid pattern (", Combinator::pattern(), ") appeared ", + rslt.unwrap().str())); + } + loc.iter() = std::next(first); + return ok(region(loc, first, loc.iter())); + } + + static std::string pattern() + { + return concat_to_string("^(", Combinator::pattern(), ')'); + } +}; + +// increment `iter`, if matches. otherwise, just return empty string. +template +struct maybe +{ + template + static result, std::string> invoke(location& loc) + { + static_assert(std::is_same::value, + "internal error: container::value_type should be `char`."); + + const auto rslt = Combinator::invoke(loc); + if(rslt.is_ok()) + { + return rslt; + } + return ok(region(loc)); + } + + static std::string pattern() + { + return concat_to_string('(', Combinator::pattern(), ")?"); + } +}; + +template +struct sequence; + +template +struct sequence +{ + template + static result, std::string> invoke(location& loc) + { + static_assert(std::is_same::value, + "internal error: container::value_type should be `char`."); + + const auto first = loc.iter(); + const auto rslt = Head::invoke(loc); + if(rslt.is_err()) + { + loc.iter() = first; + return err(rslt.unwrap_err()); + } + return sequence::invoke(loc, std::move(rslt.unwrap()), first); + } + + // called from the above function only, recursively. + template + static result, std::string> + invoke(location& loc, region reg, Iterator first) + { + const auto rslt = Head::invoke(loc); + if(rslt.is_err()) + { + loc.iter() = first; + return err(rslt.unwrap_err()); + } + reg += rslt.unwrap(); // concat regions + return sequence::invoke(loc, std::move(reg), first); + } + + static std::string pattern() + { + return concat_to_string(Head::pattern(), sequence::pattern()); + } +}; + +template +struct sequence +{ + // would be called from sequence::invoke only. + template + static result, std::string> + invoke(location& loc, region reg, Iterator first) + { + const auto rslt = Head::invoke(loc); + if(rslt.is_err()) + { + loc.iter() = first; + return err(rslt.unwrap_err()); + } + reg += rslt.unwrap(); // concat regions + return ok(reg); + } + static std::string pattern() {return Head::pattern();} +}; + +template +struct either; + +template +struct either +{ + template + static result, std::string> invoke(location& loc) + { + static_assert(std::is_same::value, + "internal error: container::value_type should be `char`."); + + const auto rslt = Head::invoke(loc); + if(rslt.is_ok()) {return rslt;} + return either::invoke(loc); + } + + static std::string pattern() + { + return concat_to_string('(', Head::pattern(), ")|", either::pattern()); + } +}; +template +struct either +{ + template + static result, std::string> invoke(location& loc) + { + static_assert(std::is_same::value, + "internal error: container::value_type should be `char`."); + return Head::invoke(loc); + } + static std::string pattern() + { + return concat_to_string('(', Head::pattern(), ')'); + } +}; + +template +struct repeat; + +template struct exactly{}; +template struct at_least{}; +struct unlimited{}; + +template +struct repeat> +{ + template + static result, std::string> invoke(location& loc) + { + region retval(loc); + const auto first = loc.iter(); + for(std::size_t i=0; i +struct repeat> +{ + template + static result, std::string> invoke(location& loc) + { + region retval(loc); + + const auto first = loc.iter(); + for(std::size_t i=0; i +struct repeat +{ + template + static result, std::string> invoke(location& loc) + { + region retval(loc); + while(true) + { + auto rslt = T::invoke(loc); + if(rslt.is_err()) + { + return ok(std::move(retval)); + } + retval += rslt.unwrap(); + } + } + static std::string pattern() {return concat_to_string('(', T::pattern(), ")*");} +}; + +} // detail +} // toml +#endif// TOML11_COMBINATOR_HPP