From e6f90434bf95c9d51d2cd00683ae5b4f8862528c Mon Sep 17 00:00:00 2001 From: ToruNiina Date: Thu, 13 Feb 2025 17:10:33 +0000 Subject: [PATCH] feat [skip ci]: update single_include --- single_include/toml.hpp | 1232 ++++++++++++++++++++++----------------- 1 file changed, 709 insertions(+), 523 deletions(-) diff --git a/single_include/toml.hpp b/single_include/toml.hpp index 9f88170..36c882a 100644 --- a/single_include/toml.hpp +++ b/single_include/toml.hpp @@ -10399,22 +10399,10 @@ class character_either final : public scanner_base public: - explicit character_either(std::initializer_list cs) noexcept - : chars_(std::move(cs)) - { - assert(! this->chars_.empty()); - } - template explicit character_either(const char (&cs)[N]) noexcept - : chars_(N-1, '\0') - { - static_assert(N >= 1, ""); - for(std::size_t i=0; i+1 chars_; + const char* value_; + std::size_t size_; }; // ---------------------------------------------------------------------------- @@ -10514,12 +10501,6 @@ class sequence final: public scanner_base scanner_base* clone() const override; - template - void push_back(Scanner&& other_scanner) - { - this->others_.emplace_back(std::forward(other_scanner)); - } - std::string name() const override; private: @@ -10566,12 +10547,6 @@ class either final: public scanner_base scanner_base* clone() const override; - template - void push_back(Scanner&& other_scanner) - { - this->others_.emplace_back(std::forward(other_scanner)); - } - std::string name() const override; private: @@ -10779,8 +10754,9 @@ TOML11_INLINE region character_either::scan(location& loc) const { if(loc.eof()) {return region{};} - for(const auto c : this->chars_) + for(std::size_t i=0; isize_; ++i) { + const auto c = char_type(this->value_[i]); if(loc.current() == c) { const auto first = loc; @@ -10793,30 +10769,32 @@ TOML11_INLINE region character_either::scan(location& loc) const TOML11_INLINE std::string character_either::expected_chars(location&) const { - assert( ! chars_.empty()); + assert( this->value_ ); + assert( this->size_ != 0 ); std::string expected; - if(chars_.size() == 1) + if(this->size_ == 1) { - expected += show_char(chars_.at(0)); + expected += show_char(char_type(value_[0])); } - else if(chars_.size() == 2) + else if(this->size_ == 2) { - expected += show_char(chars_.at(0)) + " or " + show_char(chars_.at(1)); + expected += show_char(char_type(value_[0])) + " or " + + show_char(char_type(value_[1])); } else { - for(std::size_t i=0; isize_; ++i) { if(i != 0) { expected += ", "; } - if(i + 1 == chars_.size()) + if(i + 1 == this->size_) { expected += "or "; } - expected += show_char(chars_.at(i)); + expected += show_char(char_type(value_[i])); } } return expected; @@ -10827,20 +10805,16 @@ TOML11_INLINE scanner_base* character_either::clone() const return new character_either(*this); } -TOML11_INLINE void character_either::push_back(const char_type c) -{ - chars_.push_back(c); -} - TOML11_INLINE std::string character_either::name() const { std::string n("character_either{"); - for(const auto c : this->chars_) + for(std::size_t i=0; isize_; ++i) { + const auto c = char_type(this->value_[i]); n += show_char(c); n += ", "; } - if( ! this->chars_.empty()) + if(this->size_ != 0) { n.pop_back(); n.pop_back(); @@ -11187,10 +11161,10 @@ using char_type = location::char_type; // avoid redundant representation and out-of-unicode sequence -character_in_range utf8_1byte (const spec&); -sequence utf8_2bytes(const spec&); -sequence utf8_3bytes(const spec&); -sequence utf8_4bytes(const spec&); +character_in_range const& utf8_1byte (const spec&); +sequence const& utf8_2bytes(const spec&); +sequence const& utf8_3bytes(const spec&); +sequence const& utf8_4bytes(const spec&); class non_ascii final : public scanner_base { @@ -11200,12 +11174,28 @@ class non_ascii final : public scanner_base public: - explicit non_ascii(const spec& s) noexcept; + explicit non_ascii(const spec& s) noexcept + : utf8_2B_(utf8_2bytes(s)), + utf8_3B_(utf8_3bytes(s)), + utf8_4B_(utf8_4bytes(s)) + {} ~non_ascii() override = default; region scan(location& loc) const override { - return scanner_.scan(loc); + { + const auto reg = utf8_2B_.scan(loc); + if(reg.is_ok()) {return reg;} + } + { + const auto reg = utf8_3B_.scan(loc); + if(reg.is_ok()) {return reg;} + } + { + const auto reg = utf8_4B_.scan(loc); + if(reg.is_ok()) {return reg;} + } + return region{}; } std::string expected_chars(location&) const override @@ -11224,34 +11214,35 @@ class non_ascii final : public scanner_base } private: - - either scanner_; + sequence utf8_2B_; + sequence utf8_3B_; + sequence utf8_4B_; }; // =========================================================================== // Whitespace -character_either wschar(const spec&); +character_either const& wschar(const spec&); -repeat_at_least ws(const spec& s); +repeat_at_least const& ws(const spec& s); // =========================================================================== // Newline -either newline(const spec&); +either const& newline(const spec&); // =========================================================================== // Comments -either allowed_comment_char(const spec& s); +either const& allowed_comment_char(const spec& s); // XXX Note that it does not take newline -sequence comment(const spec& s); +sequence const& comment(const spec& s); // =========================================================================== // Boolean -either boolean(const spec&); +either const& boolean(const spec&); // =========================================================================== // Integer @@ -11264,7 +11255,11 @@ class digit final : public scanner_base public: - explicit digit(const spec&) noexcept; + explicit digit(const spec&) noexcept + : scanner_(char_type('0'), char_type('9')) + {} + + ~digit() override = default; region scan(location& loc) const override @@ -11300,12 +11295,23 @@ class alpha final : public scanner_base public: - explicit alpha(const spec&) noexcept; + explicit alpha(const spec&) noexcept + : lowercase_(char_type('a'), char_type('z')), + uppercase_(char_type('A'), char_type('Z')) + {} ~alpha() override = default; region scan(location& loc) const override { - return scanner_.scan(loc); + { + const auto reg = lowercase_.scan(loc); + if(reg.is_ok()) {return reg;} + } + { + const auto reg = uppercase_.scan(loc); + if(reg.is_ok()) {return reg;} + } + return region{}; } std::string expected_chars(location&) const override @@ -11325,7 +11331,8 @@ class alpha final : public scanner_base private: - either scanner_; + character_in_range lowercase_; + character_in_range uppercase_; }; class hexdig final : public scanner_base @@ -11336,12 +11343,28 @@ class hexdig final : public scanner_base public: - explicit hexdig(const spec& s) noexcept; + explicit hexdig(const spec& s) noexcept + : digit_(s), + lowercase_(char_type('a'), char_type('f')), + uppercase_(char_type('A'), char_type('F')) + {} ~hexdig() override = default; region scan(location& loc) const override { - return scanner_.scan(loc); + { + const auto reg = digit_.scan(loc); + if(reg.is_ok()) {return reg;} + } + { + const auto reg = lowercase_.scan(loc); + if(reg.is_ok()) {return reg;} + } + { + const auto reg = uppercase_.scan(loc); + if(reg.is_ok()) {return reg;} + } + return region{}; } std::string expected_chars(location&) const override @@ -11361,61 +11384,63 @@ class hexdig final : public scanner_base private: - either scanner_; + digit digit_; + character_in_range lowercase_; + character_in_range uppercase_; }; -sequence num_suffix(const spec& s); +sequence const& num_suffix(const spec& s); -sequence dec_int(const spec& s); -sequence hex_int(const spec& s); -sequence oct_int(const spec&); -sequence bin_int(const spec&); -either integer(const spec& s); +sequence const& dec_int(const spec& s); +sequence const& hex_int(const spec& s); +sequence const& oct_int(const spec&); +sequence const& bin_int(const spec&); +either const& integer(const spec& s); // =========================================================================== // Floating -sequence zero_prefixable_int(const spec& s); -sequence fractional_part(const spec& s); -sequence exponent_part(const spec& s); -sequence hex_floating(const spec& s); -either floating(const spec& s); +sequence const& zero_prefixable_int(const spec& s); +sequence const& fractional_part(const spec& s); +sequence const& exponent_part(const spec& s); +sequence const& hex_floating(const spec& s); +either const& floating(const spec& s); // =========================================================================== // Datetime -sequence local_date(const spec& s); -sequence local_time(const spec& s); -either time_offset(const spec& s); -sequence full_time(const spec& s); -character_either time_delim(const spec&); -sequence local_datetime(const spec& s); -sequence offset_datetime(const spec& s); +sequence const& local_date(const spec& s); +sequence const& local_time(const spec& s); +either const& time_offset(const spec& s); +sequence const& full_time(const spec& s); +character_either const& time_delim(const spec&); +sequence const& local_datetime(const spec& s); +sequence const& offset_datetime(const spec& s); // =========================================================================== // String -sequence escaped(const spec& s); +sequence const& escaped_x2(const spec& s); +sequence const& escaped_u4(const spec& s); +sequence const& escaped_U8(const spec& s); -either basic_char(const spec& s); - -sequence basic_string(const spec& s); +sequence const& escaped (const spec& s); +either const& basic_char (const spec& s); +sequence const& basic_string(const spec& s); // --------------------------------------------------------------------------- // multiline string -sequence escaped_newline(const spec& s); -sequence ml_basic_string(const spec& s); +sequence const& escaped_newline(const spec& s); +sequence const& ml_basic_string(const spec& s); // --------------------------------------------------------------------------- // literal string -either literal_char(const spec& s); -sequence literal_string(const spec& s); - -sequence ml_literal_string(const spec& s); - -either string(const spec& s); +either const& literal_char(const spec& s); +sequence const& literal_string(const spec& s); +sequence const& ml_literal_string(const spec& s); +either const& string(const spec& s); // =========================================================================== // Keys @@ -11459,16 +11484,11 @@ class non_ascii_key_char final : public scanner_base }; -repeat_at_least unquoted_key(const spec& s); - -either quoted_key(const spec& s); - -either simple_key(const spec& s); - -sequence dot_sep(const spec& s); - -sequence dotted_key(const spec& s); - +repeat_at_least const& unquoted_key(const spec& s); +either const& quoted_key(const spec& s); +either const& simple_key(const spec& s); +sequence const& dot_sep(const spec& s); +sequence const& dotted_key(const spec& s); class key final : public scanner_base { @@ -11478,12 +11498,23 @@ class key final : public scanner_base public: - explicit key(const spec& s) noexcept; + explicit key(const spec& s) noexcept + : dotted_(dotted_key(s)), + simple_(simple_key(s)) + {} ~key() override = default; region scan(location& loc) const override { - return scanner_.scan(loc); + { + const auto reg = dotted_.scan(loc); + if(reg.is_ok()) {return reg;} + } + { + const auto reg = simple_.scan(loc); + if(reg.is_ok()) {return reg;} + } + return region{}; } std::string expected_chars(location&) const override @@ -11503,22 +11534,23 @@ class key final : public scanner_base private: - either scanner_; + sequence dotted_; + either simple_; }; -sequence keyval_sep(const spec& s); +sequence const& keyval_sep(const spec& s); // =========================================================================== // Table key -sequence std_table(const spec& s); +sequence const& std_table(const spec& s); -sequence array_table(const spec& s); +sequence const& array_table(const spec& s); // =========================================================================== // extension: null -literal null_value(const spec&); +literal const& null_value(const spec&); } // namespace syntax } // namespace detail @@ -11539,535 +11571,669 @@ namespace syntax using char_type = location::char_type; +template +struct syntax_cache +{ + using value_type = cxx::return_type_of_t; + static_assert(std::is_base_of::value, ""); + + explicit syntax_cache(F f) + : func_(std::move(f)), cache_(cxx::make_nullopt()) + {} + + value_type const& at(const spec& s) + { + if( ! this->cache_.has_value() || this->cache_.value().first != s) + { + this->cache_ = std::make_pair(s, func_(s)); + } + return this->cache_.value().second; + } + + private: + F func_; + cxx::optional> cache_; +}; + +template +syntax_cache> make_cache(F&& f) +{ + return syntax_cache>(std::forward(f)); +} + // =========================================================================== // UTF-8 // avoid redundant representation and out-of-unicode sequence -TOML11_INLINE character_in_range utf8_1byte(const spec&) +TOML11_INLINE character_in_range const& utf8_1byte(const spec&) { - return character_in_range(0x00, 0x7F); + static thread_local character_in_range cache(0x00, 0x7F); + return cache; } -TOML11_INLINE sequence utf8_2bytes(const spec&) +TOML11_INLINE sequence const& utf8_2bytes(const spec&) { - return sequence(character_in_range(0xC2, 0xDF), - character_in_range(0x80, 0xBF)); + static thread_local sequence cache( + character_in_range(0xC2, 0xDF), + character_in_range(0x80, 0xBF)); + return cache; } -TOML11_INLINE sequence utf8_3bytes(const spec&) +TOML11_INLINE sequence const& utf8_3bytes(const spec&) { - return sequence(/*1~2 bytes = */either( + static thread_local sequence cache(/*1~2 bytes = */either( sequence(character (0xE0), character_in_range(0xA0, 0xBF)), sequence(character_in_range(0xE1, 0xEC), character_in_range(0x80, 0xBF)), sequence(character (0xED), character_in_range(0x80, 0x9F)), sequence(character_in_range(0xEE, 0xEF), character_in_range(0x80, 0xBF)) ), /*3rd byte = */ character_in_range(0x80, 0xBF)); + + return cache; } -TOML11_INLINE sequence utf8_4bytes(const spec&) +TOML11_INLINE sequence const& utf8_4bytes(const spec&) { - return sequence(/*1~2 bytes = */either( + static thread_local sequence cache(/*1~2 bytes = */either( sequence(character (0xF0), character_in_range(0x90, 0xBF)), sequence(character_in_range(0xF1, 0xF3), character_in_range(0x80, 0xBF)), sequence(character (0xF4), character_in_range(0x80, 0x8F)) ), character_in_range(0x80, 0xBF), character_in_range(0x80, 0xBF)); + + return cache; } -TOML11_INLINE non_ascii::non_ascii(const spec& s) noexcept - : scanner_(utf8_2bytes(s), utf8_3bytes(s), utf8_4bytes(s)) -{} - - // =========================================================================== // Whitespace -TOML11_INLINE character_either wschar(const spec&) +TOML11_INLINE character_either const& wschar(const spec&) { - return character_either{char_type(' '), char_type('\t')}; + static thread_local character_either cache(" \t"); + return cache; } -TOML11_INLINE repeat_at_least ws(const spec& s) +TOML11_INLINE repeat_at_least const& ws(const spec& sp) { - return repeat_at_least(0, wschar(s)); + static thread_local auto cache = make_cache([](const spec& s){ + return repeat_at_least(0, wschar(s)); + }); + return cache.at(sp); } // =========================================================================== // Newline -TOML11_INLINE either newline(const spec&) +TOML11_INLINE either const& newline(const spec&) { - return either(character(char_type('\n')), literal("\r\n")); + static thread_local either cache(character(char_type('\n')), literal("\r\n")); + return cache; } // =========================================================================== // Comments -TOML11_INLINE either allowed_comment_char(const spec& s) +TOML11_INLINE either const& allowed_comment_char(const spec& sp) { - if(s.v1_1_0_allow_control_characters_in_comments) - { - return either( - character_in_range(0x01, 0x09), - character_in_range(0x0E, 0x7F), - non_ascii(s) - ); - } - else - { - return either( - character(0x09), - character_in_range(0x20, 0x7E), - non_ascii(s) - ); - } + static thread_local auto cache = make_cache([](const spec& s){ + if(s.v1_1_0_allow_control_characters_in_comments) + { + return either( + character_in_range(0x01, 0x09), + character_in_range(0x0E, 0x7F), + non_ascii(s) + ); + } + else + { + return either( + character(0x09), + character_in_range(0x20, 0x7E), + non_ascii(s) + ); + } + }); + return cache.at(sp); } // XXX Note that it does not take newline -TOML11_INLINE sequence comment(const spec& s) +TOML11_INLINE sequence const& comment(const spec& sp) { - return sequence(character(char_type('#')), + static thread_local auto cache = make_cache([](const spec& s){ + return sequence(character(char_type('#')), repeat_at_least(0, allowed_comment_char(s))); + }); + return cache.at(sp); } // =========================================================================== // Boolean -TOML11_INLINE either boolean(const spec&) +TOML11_INLINE either const& boolean(const spec&) { - return either(literal("true"), literal("false")); + static thread_local either cache(literal("true"), literal("false")); + return cache; } // =========================================================================== // Integer -TOML11_INLINE digit::digit(const spec&) noexcept - : scanner_(char_type('0'), char_type('9')) -{} - -TOML11_INLINE alpha::alpha(const spec&) noexcept - : scanner_( - character_in_range(char_type('a'), char_type('z')), - character_in_range(char_type('A'), char_type('Z')) - ) -{} - -TOML11_INLINE hexdig::hexdig(const spec& s) noexcept - : scanner_( - digit(s), - character_in_range(char_type('a'), char_type('f')), - character_in_range(char_type('A'), char_type('F')) - ) -{} - // non-digit-graph = ([a-zA-Z]|unicode mb char) // graph = ([a-zA-Z0-9]|unicode mb char) // suffix = _ non-digit-graph (graph | _graph) -TOML11_INLINE sequence num_suffix(const spec& s) +TOML11_INLINE sequence const& num_suffix(const spec& sp) { - const auto non_digit_graph = [&s]() { - return either( - alpha(s), - non_ascii(s) - ); - }; - const auto graph = [&s]() { - return either( - alpha(s), - digit(s), - non_ascii(s) - ); - }; + static thread_local auto cache = make_cache([](const spec& s) { + const auto non_digit_graph = [&s]() { + return either( + alpha(s), + non_ascii(s) + ); + }; + const auto graph = [&s]() { + return either( + alpha(s), + digit(s), + non_ascii(s) + ); + }; - return sequence( - character(char_type('_')), - non_digit_graph(), - repeat_at_least(0, - either( - sequence(character(char_type('_')), graph()), - graph() - ) - ) - ); -} - -TOML11_INLINE sequence dec_int(const spec& s) -{ - const auto digit19 = []() { - return character_in_range(char_type('1'), char_type('9')); - }; - return sequence( - maybe(character_either{char_type('-'), char_type('+')}), - either( - sequence( - digit19(), - repeat_at_least(1, - either( - digit(s), - sequence(character(char_type('_')), digit(s)) - ) + return sequence( + character(char_type('_')), + non_digit_graph(), + repeat_at_least(0, + either( + sequence(character(char_type('_')), graph()), + graph() ) - ), - digit(s) - ) - ); -} - -TOML11_INLINE sequence hex_int(const spec& s) -{ - return sequence( - literal("0x"), - hexdig(s), - repeat_at_least(0, - either( - hexdig(s), - sequence(character(char_type('_')), hexdig(s)) ) - ) - ); + ); + }); + return cache.at(sp); } -TOML11_INLINE sequence oct_int(const spec&) +TOML11_INLINE sequence const& dec_int(const spec& sp) { - const auto digit07 = []() { - return character_in_range(char_type('0'), char_type('7')); - }; - return sequence( - literal("0o"), - digit07(), - repeat_at_least(0, + static thread_local auto cache = make_cache([](const spec& s) { + const auto digit19 = []() { + return character_in_range(char_type('1'), char_type('9')); + }; + return sequence( + maybe(character_either("+-")), either( - digit07(), - sequence(character(char_type('_')), digit07()) + sequence( + digit19(), + repeat_at_least(1, + either( + digit(s), + sequence(character(char_type('_')), digit(s)) + ) + ) + ), + digit(s) ) - ) - ); + ); + }); + return cache.at(sp); } -TOML11_INLINE sequence bin_int(const spec&) +TOML11_INLINE sequence const& hex_int(const spec& sp) { - const auto digit01 = []() { - return character_either{char_type('0'), char_type('1')}; - }; - return sequence( - literal("0b"), - digit01(), - repeat_at_least(0, - either( - digit01(), - sequence(character(char_type('_')), digit01()) + static thread_local auto cache = make_cache([](const spec& s) { + return sequence( + literal("0x"), + hexdig(s), + repeat_at_least(0, + either( + hexdig(s), + sequence(character(char_type('_')), hexdig(s)) + ) ) - ) - ); + ); + }); + return cache.at(sp); } -TOML11_INLINE either integer(const spec& s) +TOML11_INLINE sequence const& oct_int(const spec& s) { - return either( - hex_int(s), - oct_int(s), - bin_int(s), - dec_int(s) - ); + static thread_local auto cache = make_cache([](const spec&) { + const auto digit07 = []() { + return character_in_range(char_type('0'), char_type('7')); + }; + return sequence( + literal("0o"), + digit07(), + repeat_at_least(0, + either( + digit07(), + sequence(character(char_type('_')), digit07()) + ) + ) + ); + }); + return cache.at(s); +} + +TOML11_INLINE sequence const& bin_int(const spec& s) +{ + static thread_local auto cache = make_cache([](const spec&) { + const auto digit01 = []() { + return character_either("01"); + }; + return sequence( + literal("0b"), + digit01(), + repeat_at_least(0, + either( + digit01(), + sequence(character(char_type('_')), digit01()) + ) + ) + ); + }); + return cache.at(s); +} + +TOML11_INLINE either const& integer(const spec& sp) +{ + static thread_local auto cache = make_cache([](const spec& s) { + return either( + hex_int(s), + oct_int(s), + bin_int(s), + dec_int(s) + ); + }); + return cache.at(sp); } // =========================================================================== // Floating -TOML11_INLINE sequence zero_prefixable_int(const spec& s) +TOML11_INLINE sequence const& zero_prefixable_int(const spec& sp) { - return sequence( - digit(s), - repeat_at_least(0, - either( - digit(s), - sequence(character('_'), digit(s)) + static thread_local auto cache = make_cache([](const spec& s) { + return sequence( + digit(s), + repeat_at_least(0, + either( + digit(s), + sequence(character('_'), digit(s)) + ) ) - ) - ); + ); + }); + return cache.at(sp); } -TOML11_INLINE sequence fractional_part(const spec& s) +TOML11_INLINE sequence const& fractional_part(const spec& sp) { - return sequence( - character('.'), - zero_prefixable_int(s) - ); + static thread_local auto cache = make_cache([](const spec& s) { + return sequence( + character('.'), + zero_prefixable_int(s) + ); + }); + return cache.at(sp); } -TOML11_INLINE sequence exponent_part(const spec& s) +TOML11_INLINE sequence const& exponent_part(const spec& sp) { - return sequence( - character_either{char_type('e'), char_type('E')}, - maybe(character_either{char_type('+'), char_type('-')}), - zero_prefixable_int(s) - ); + static thread_local auto cache = make_cache([](const spec& s) { + return sequence( + character_either("eE"), + maybe(character_either("+-")), + zero_prefixable_int(s) + ); + }); + return cache.at(sp); } -TOML11_INLINE sequence hex_floating(const spec& s) +TOML11_INLINE sequence const& hex_floating(const spec& sp) { - // C99 hexfloat (%a) - // [+-]? 0x ( [0-9a-fA-F]*\.[0-9a-fA-F]+ | [0-9a-fA-F]+\.? ) [pP] [+-]? [0-9]+ + static thread_local auto cache = make_cache([](const spec& s) { + // C99 hexfloat (%a) + // [+-]? 0x ( [0-9a-fA-F]*\.[0-9a-fA-F]+ | [0-9a-fA-F]+\.? ) [pP] [+-]? [0-9]+ - // - 0x(int).(frac)p[+-](int) - // - 0x(int).p[+-](int) - // - 0x.(frac)p[+-](int) - // - 0x(int)p[+-](int) + // - 0x(int).(frac)p[+-](int) + // - 0x(int).p[+-](int) + // - 0x.(frac)p[+-](int) + // - 0x(int)p[+-](int) - return sequence( - maybe(character_either{char_type('+'), char_type('-')}), - character('0'), - character_either{char_type('x'), char_type('X')}, - either( + return sequence( + maybe(character_either("+-")), + character('0'), + character_either("xX"), + either( + sequence( + repeat_at_least(0, hexdig(s)), + character('.'), + repeat_at_least(1, hexdig(s)) + ), + sequence( + repeat_at_least(1, hexdig(s)), + maybe(character('.')) + ) + ), + character_either("pP"), + maybe(character_either("+-")), + repeat_at_least(1, character_in_range('0', '9')) + ); + }); + return cache.at(sp); +} + +TOML11_INLINE either const& floating(const spec& sp) +{ + static thread_local auto cache = make_cache([](const spec& s) { + return either( sequence( - repeat_at_least(0, hexdig(s)), - character('.'), - repeat_at_least(1, hexdig(s)) + dec_int(s), + either( + exponent_part(s), + sequence(fractional_part(s), maybe(exponent_part(s))) + ) ), sequence( - repeat_at_least(1, hexdig(s)), - maybe(character('.')) + maybe(character_either("+-")), + either(literal("inf"), literal("nan")) ) - ), - character_either{char_type('p'), char_type('P')}, - maybe(character_either{char_type('+'), char_type('-')}), - repeat_at_least(1, character_in_range('0', '9')) - ); -} - -TOML11_INLINE either floating(const spec& s) -{ - return either( - sequence( - dec_int(s), - either( - exponent_part(s), - sequence(fractional_part(s), maybe(exponent_part(s))) - ) - ), - sequence( - maybe(character_either{char_type('-'), char_type('+')}), - either(literal("inf"), literal("nan")) - ) - ); + ); + }); + return cache.at(sp); } // =========================================================================== // Datetime -TOML11_INLINE sequence local_date(const spec& s) +TOML11_INLINE sequence const& local_date(const spec& sp) { - return sequence( - repeat_exact(4, digit(s)), - character('-'), - repeat_exact(2, digit(s)), - character('-'), - repeat_exact(2, digit(s)) - ); + static thread_local auto cache = make_cache([](const spec& s) { + return sequence( + repeat_exact(4, digit(s)), + character('-'), + repeat_exact(2, digit(s)), + character('-'), + repeat_exact(2, digit(s)) + ); + }); + return cache.at(sp); } -TOML11_INLINE sequence local_time(const spec& s) +TOML11_INLINE sequence const& local_time(const spec& sp) { - auto time = sequence( - repeat_exact(2, digit(s)), - character(':'), - repeat_exact(2, digit(s)) - ); - - if(s.v1_1_0_make_seconds_optional) - { - time.push_back(maybe(sequence( + static thread_local auto cache = make_cache([](const spec& s) { + if(s.v1_1_0_make_seconds_optional) + { + return sequence( + repeat_exact(2, digit(s)), + character(':'), + repeat_exact(2, digit(s)), + maybe(sequence( + character(':'), + repeat_exact(2, digit(s)), + maybe(sequence(character('.'), repeat_at_least(1, digit(s)))) + ))); + } + else + { + return sequence( + repeat_exact(2, digit(s)), + character(':'), + repeat_exact(2, digit(s)), character(':'), repeat_exact(2, digit(s)), maybe(sequence(character('.'), repeat_at_least(1, digit(s)))) - ))); - } - else - { - time.push_back(character(':')); - time.push_back(repeat_exact(2, digit(s))); - time.push_back( - maybe(sequence(character('.'), repeat_at_least(1, digit(s)))) - ); - } - - return time; + ); + } + }); + return cache.at(sp); } -TOML11_INLINE either time_offset(const spec& s) +TOML11_INLINE either const& time_offset(const spec& sp) { - return either( - character_either{'Z', 'z'}, - sequence(character_either{'+', '-'}, - repeat_exact(2, digit(s)), - character(':'), - repeat_exact(2, digit(s)) - ) - ); + static thread_local auto cache = make_cache([](const spec& s) { + return either( + character_either("zZ"), + sequence(character_either("+-"), + repeat_exact(2, digit(s)), + character(':'), + repeat_exact(2, digit(s)) + ) + ); + }); + return cache.at(sp); } -TOML11_INLINE sequence full_time(const spec& s) +TOML11_INLINE sequence const& full_time(const spec& sp) { - return sequence(local_time(s), time_offset(s)); + static thread_local auto cache = make_cache([](const spec& s) { + return sequence(local_time(s), time_offset(s)); + }); + return cache.at(sp); } -TOML11_INLINE character_either time_delim(const spec&) +TOML11_INLINE character_either const& time_delim(const spec& sp) { - return character_either{'T', 't', ' '}; + static thread_local auto cache = make_cache([](const spec&) { + return character_either("Tt "); + }); + return cache.at(sp); } -TOML11_INLINE sequence local_datetime(const spec& s) +TOML11_INLINE sequence const& local_datetime(const spec& sp) { - return sequence(local_date(s), time_delim(s), local_time(s)); + static thread_local auto cache = make_cache([](const spec& s) { + return sequence(local_date(s), time_delim(s), local_time(s)); + }); + return cache.at(sp); } -TOML11_INLINE sequence offset_datetime(const spec& s) +TOML11_INLINE sequence const& offset_datetime(const spec& sp) { - return sequence(local_date(s), time_delim(s), full_time(s)); + static thread_local auto cache = make_cache([](const spec& s) { + return sequence(local_date(s), time_delim(s), full_time(s)); + }); + return cache.at(sp); } // =========================================================================== // String -TOML11_INLINE sequence escaped(const spec& s) +TOML11_INLINE sequence const& escaped_x2(const spec& sp) { - character_either escape_char{ - '\"','\\', 'b', 'f', 'n', 'r', 't' - }; - if(s.v1_1_0_add_escape_sequence_e) - { - escape_char.push_back(char_type('e')); - } - - either escape_seq( - std::move(escape_char), - sequence(character('u'), repeat_exact(4, hexdig(s))), - sequence(character('U'), repeat_exact(8, hexdig(s))) - ); - - if(s.v1_1_0_add_escape_sequence_x) - { - escape_seq.push_back( - sequence(character('x'), repeat_exact(2, hexdig(s))) - ); - } - - return sequence( - character('\\'), - std::move(escape_seq) - ); + static thread_local auto cache = make_cache([](const spec& s) { + return sequence(character('x'), repeat_exact(2, hexdig(s))); + }); + return cache.at(sp); +} +TOML11_INLINE sequence const& escaped_u4(const spec& sp) +{ + static thread_local auto cache = make_cache([](const spec& s) { + return sequence(character('u'), repeat_exact(4, hexdig(s))); + }); + return cache.at(sp); +} +TOML11_INLINE sequence const& escaped_U8(const spec& sp) +{ + static thread_local auto cache = make_cache([](const spec& s) { + return sequence(character('U'), repeat_exact(8, hexdig(s))); + }); + return cache.at(sp); } -TOML11_INLINE either basic_char(const spec& s) +TOML11_INLINE sequence const& escaped(const spec& sp) { - const auto basic_unescaped = [&s]() { - return either( - wschar(s), - character(0x21), // 22 is " - character_in_range(0x23, 0x5B), // 5C is backslash - character_in_range(0x5D, 0x7E), // 7F is DEL - non_ascii(s) + static thread_local auto cache = make_cache([](const spec& s) { + const auto escape_char = [&s] { + if(s.v1_1_0_add_escape_sequence_e) + { + return character_either("\"\\bfnrte"); + } + else + { + return character_either("\"\\bfnrt"); + } + }; + + const auto escape_seq = [&s, &escape_char] { + if(s.v1_1_0_add_escape_sequence_x) + { + return either( + escape_char(), + escaped_u4(s), + escaped_U8(s), + escaped_x2(s) + ); + } + else + { + return either( + escape_char(), + escaped_u4(s), + escaped_U8(s) + ); + } + }; + + return sequence(character('\\'), escape_seq()); + }); + return cache.at(sp); +} + +TOML11_INLINE either const& basic_char(const spec& sp) +{ + static thread_local auto cache = make_cache([](const spec& s) { + const auto basic_unescaped = [&s]() { + return either( + wschar(s), + character(0x21), // 22 is " + character_in_range(0x23, 0x5B), // 5C is backslash + character_in_range(0x5D, 0x7E), // 7F is DEL + non_ascii(s) + ); + }; + return either(basic_unescaped(), escaped(s)); + }); + return cache.at(sp); +} + +TOML11_INLINE sequence const& basic_string(const spec& sp) +{ + static thread_local auto cache = make_cache([](const spec& s) { + return sequence( + character('"'), + repeat_at_least(0, basic_char(s)), + character('"') ); - }; - return either(basic_unescaped(), escaped(s)); -} - -TOML11_INLINE sequence basic_string(const spec& s) -{ - return sequence( - character('"'), - repeat_at_least(0, basic_char(s)), - character('"') - ); + }); + return cache.at(sp); } // --------------------------------------------------------------------------- // multiline string -TOML11_INLINE sequence escaped_newline(const spec& s) +TOML11_INLINE sequence const& escaped_newline(const spec& sp) { - return sequence( - character('\\'), ws(s), newline(s), - repeat_at_least(0, either(wschar(s), newline(s))) - ); + static thread_local auto cache = make_cache([](const spec& s) { + return sequence( + character('\\'), ws(s), newline(s), + repeat_at_least(0, either(wschar(s), newline(s))) + ); + }); + return cache.at(sp); } -TOML11_INLINE sequence ml_basic_string(const spec& s) +TOML11_INLINE sequence const& ml_basic_string(const spec& sp) { - const auto mlb_content = [&s]() { - return either(basic_char(s), newline(s), escaped_newline(s)); - }; - const auto mlb_quotes = []() { - return either(literal("\"\""), character('\"')); - }; + static thread_local auto cache = make_cache([](const spec& s) { + const auto mlb_content = [&s]() { + return either(basic_char(s), newline(s), escaped_newline(s)); + }; + const auto mlb_quotes = []() { + return either(literal("\"\""), character('\"')); + }; - return sequence( - literal("\"\"\""), - maybe(newline(s)), - repeat_at_least(0, mlb_content()), - repeat_at_least(0, - sequence( - mlb_quotes(), - repeat_at_least(1, mlb_content()) - ) - ), - // XXX """ and mlb_quotes are intentionally reordered to avoid - // unexpected match of mlb_quotes - literal("\"\"\""), - maybe(mlb_quotes()) - ); + return sequence( + literal("\"\"\""), + maybe(newline(s)), + repeat_at_least(0, mlb_content()), + repeat_at_least(0, + sequence( + mlb_quotes(), + repeat_at_least(1, mlb_content()) + ) + ), + // XXX """ and mlb_quotes are intentionally reordered to avoid + // unexpected match of mlb_quotes + literal("\"\"\""), + maybe(mlb_quotes()) + ); + }); + return cache.at(sp); } // --------------------------------------------------------------------------- // literal string -TOML11_INLINE either literal_char(const spec& s) +TOML11_INLINE either const& literal_char(const spec& sp) { - return either( - character (0x09), - character_in_range(0x20, 0x26), - character_in_range(0x28, 0x7E), - non_ascii(s) - ); + static thread_local auto cache = make_cache([](const spec& s) { + return either( + character (0x09), + character_in_range(0x20, 0x26), + character_in_range(0x28, 0x7E), + non_ascii(s) + ); + }); + return cache.at(sp); } -TOML11_INLINE sequence literal_string(const spec& s) +TOML11_INLINE sequence const& literal_string(const spec& sp) { - return sequence( - character('\''), - repeat_at_least(0, literal_char(s)), - character('\'') - ); + static thread_local auto cache = make_cache([](const spec& s) { + return sequence( + character('\''), + repeat_at_least(0, literal_char(s)), + character('\'') + ); + }); + return cache.at(sp); } -TOML11_INLINE sequence ml_literal_string(const spec& s) +TOML11_INLINE sequence const& ml_literal_string(const spec& sp) { - const auto mll_quotes = []() { - return either(literal("''"), character('\'')); - }; - const auto mll_content = [&s]() { - return either(literal_char(s), newline(s)); - }; + static thread_local auto cache = make_cache([](const spec& s) { + const auto mll_quotes = []() { + return either(literal("''"), character('\'')); + }; + const auto mll_content = [&s]() { + return either(literal_char(s), newline(s)); + }; - return sequence( - literal("'''"), - maybe(newline(s)), - repeat_at_least(0, mll_content()), - repeat_at_least(0, sequence( - mll_quotes(), - repeat_at_least(1, mll_content()) - ) - ), - literal("'''"), - maybe(mll_quotes()) - // XXX ''' and mll_quotes are intentionally reordered to avoid - // unexpected match of mll_quotes - ); + return sequence( + literal("'''"), + maybe(newline(s)), + repeat_at_least(0, mll_content()), + repeat_at_least(0, sequence( + mll_quotes(), + repeat_at_least(1, mll_content()) + ) + ), + literal("'''"), + maybe(mll_quotes()) + // XXX ''' and mll_quotes are intentionally reordered to avoid + // unexpected match of mll_quotes + ); + }); + return cache.at(sp); } -TOML11_INLINE either string(const spec& s) +TOML11_INLINE either const& string(const spec& sp) { - return either( - ml_basic_string(s), - ml_literal_string(s), - basic_string(s), - literal_string(s) - ); + static thread_local auto cache = make_cache([](const spec& s) { + return either( + ml_basic_string(s), + ml_literal_string(s), + basic_string(s), + literal_string(s) + ); + }); + return cache.at(sp); } // =========================================================================== @@ -12184,71 +12350,94 @@ TOML11_INLINE region non_ascii_key_char::scan(location& loc) const return region{}; } -TOML11_INLINE repeat_at_least unquoted_key(const spec& s) +TOML11_INLINE repeat_at_least const& unquoted_key(const spec& sp) { - auto keychar = either( - alpha(s), digit(s), character{0x2D}, character{0x5F} - ); - - if(s.v1_1_0_allow_non_english_in_bare_keys) - { - keychar.push_back(non_ascii_key_char(s)); - } - - return repeat_at_least(1, std::move(keychar)); + static thread_local auto cache = make_cache([](const spec& s) { + const auto keychar = [&s] { + if(s.v1_1_0_allow_non_english_in_bare_keys) + { + return either(alpha(s), digit(s), character{0x2D}, character{0x5F}, + non_ascii_key_char(s)); + } + else + { + return either(alpha(s), digit(s), character{0x2D}, character{0x5F}); + } + }; + return repeat_at_least(1, keychar()); + }); + return cache.at(sp); } -TOML11_INLINE either quoted_key(const spec& s) +TOML11_INLINE either const& quoted_key(const spec& sp) { + static thread_local auto cache = make_cache([](const spec& s) { return either(basic_string(s), literal_string(s)); + }); + return cache.at(sp); } -TOML11_INLINE either simple_key(const spec& s) +TOML11_INLINE either const& simple_key(const spec& sp) { - return either(unquoted_key(s), quoted_key(s)); + static thread_local auto cache = make_cache([](const spec& s) { + return either(unquoted_key(s), quoted_key(s)); + }); + return cache.at(sp); } -TOML11_INLINE sequence dot_sep(const spec& s) +TOML11_INLINE sequence const& dot_sep(const spec& sp) { - return sequence(ws(s), character('.'), ws(s)); + static thread_local auto cache = make_cache([](const spec& s) { + return sequence(ws(s), character('.'), ws(s)); + }); + return cache.at(sp); } -TOML11_INLINE sequence dotted_key(const spec& s) +TOML11_INLINE sequence const& dotted_key(const spec& sp) { - return sequence( - simple_key(s), - repeat_at_least(1, sequence(dot_sep(s), simple_key(s))) - ); + static thread_local auto cache = make_cache([](const spec& s) { + return sequence( + simple_key(s), + repeat_at_least(1, sequence(dot_sep(s), simple_key(s))) + ); + }); + return cache.at(sp); } -TOML11_INLINE key::key(const spec& s) noexcept - : scanner_(dotted_key(s), simple_key(s)) -{} - -TOML11_INLINE sequence keyval_sep(const spec& s) +TOML11_INLINE sequence const& keyval_sep(const spec& sp) { - return sequence(ws(s), character('='), ws(s)); + static thread_local auto cache = make_cache([](const spec& s) { + return sequence(ws(s), character('='), ws(s)); + }); + return cache.at(sp); } // =========================================================================== // Table key -TOML11_INLINE sequence std_table(const spec& s) +TOML11_INLINE sequence const& std_table(const spec& sp) { - return sequence(character('['), ws(s), key(s), ws(s), character(']')); + static thread_local auto cache = make_cache([](const spec& s) { + return sequence(character('['), ws(s), key(s), ws(s), character(']')); + }); + return cache.at(sp); } -TOML11_INLINE sequence array_table(const spec& s) +TOML11_INLINE sequence const& array_table(const spec& sp) { - return sequence(literal("[["), ws(s), key(s), ws(s), literal("]]")); + static thread_local auto cache = make_cache([](const spec& s) { + return sequence(literal("[["), ws(s), key(s), ws(s), literal("]]")); + }); + return cache.at(sp); } // =========================================================================== // extension: null -TOML11_INLINE literal null_value(const spec&) +TOML11_INLINE literal const& null_value(const spec&) { - return literal("null"); + static thread_local literal cache("null"); + return cache; } } // namespace syntax @@ -13111,7 +13300,7 @@ parse_floating(location& loc, const context& ctx) bool is_hex = false; std::string str; region reg; - if(spec.ext_hex_float && sequence(character('0'), character('x')).scan(loc).is_ok()) + if(spec.ext_hex_float && literal("0x").scan(loc).is_ok()) { loc = first; is_hex = true; @@ -13884,8 +14073,7 @@ parse_escape_sequence(location& loc, const context& ctx) } else if(spec.v1_1_0_add_escape_sequence_x && loc.current() == 'x') { - auto scanner = sequence(character('x'), repeat_exact(2, syntax::hexdig(spec))); - const auto reg = scanner.scan(loc); + const auto reg = syntax::escaped_x2(spec).scan(loc); if( ! reg.is_ok()) { auto src = source_location(region(loc)); @@ -13902,8 +14090,7 @@ parse_escape_sequence(location& loc, const context& ctx) } else if(loc.current() == 'u') { - auto scanner = sequence(character('u'), repeat_exact(4, syntax::hexdig(spec))); - const auto reg = scanner.scan(loc); + const auto reg = syntax::escaped_u4(spec).scan(loc); if( ! reg.is_ok()) { auto src = source_location(region(loc)); @@ -13920,8 +14107,7 @@ parse_escape_sequence(location& loc, const context& ctx) } else if(loc.current() == 'U') { - auto scanner = sequence(character('U'), repeat_exact(8, syntax::hexdig(spec))); - const auto reg = scanner.scan(loc); + const auto reg = syntax::escaped_U8(spec).scan(loc); if( ! reg.is_ok()) { auto src = source_location(region(loc)); @@ -15317,7 +15503,7 @@ guess_number_type(const location& first, const context& ctx) if('0' <= c && c <= '9') { return err(make_syntax_error("bad datetime: missing T or space", - character_either{'T', 't', ' '}, loc, std::string( + character_either("Tt "), loc, std::string( "Hint: valid : 1979-05-27T07:32:00, 1979-05-27 07:32:00.999999\n" "Hint: invalid: 1979-05-27T7:32:00, 1979-05-27 17:32\n"))); }