diff --git a/PROPOSAL.md b/PROPOSAL.md deleted file mode 100644 index bbdc424..0000000 --- a/PROPOSAL.md +++ /dev/null @@ -1,59 +0,0 @@ - -### encoding user's data - -You can encode your data to toml format. - -```cpp -const toml::value integer(1); -const toml::value array{3.1, 3.14, 3.141, 3.1415}; -const toml::value table{{"answer", 42}, {"pi", 3.14}, {"string", "foobar"}}; - -std::cout << toml::format("integer", integer) << std::endl; -std::cout << toml::format("array", array) << std::endl; -std::cout << toml::format("table", table) << std::endl; -``` - -this program will output as below. - -```toml -integer = 1 -array = [3.1, 3.14, 3.141, 3.1415] -[table] -answer = 42 -pi = 3.14 -string = "foobar" -``` - -Without key name, you can make string formatted as toml. - -```cpp -const std::string integer_ = toml::format(integer); // "1" -const std::string array_ = toml::format(array); // "[3.1, 3.14, 3.141, 3.1415]" -const std::string table_ = toml::format(table); // "answer = 42\npi=3.14\nstring=foobar" -``` - -### inlinize - -You can make `toml::Table` inline. - -```cpp -const toml::value table{{"answer", 42}, {"pi", 3.14}, {"string", "foobar"}}; -// if the inline-table format length is less than 80, the table will be inlined -std::cout << toml::format("table", table, toml::make_inline(80)) << std::endl; -// In any case, the table will be inlined. -std::cout << toml::format("table", table, toml::forceinline) << std::endl; -``` - -```toml -table = {answer = 42, pi = 3.14, string = "foobar"} -``` - -And there are some stream manipulators for toml format. - -```cpp -const toml::value table{{"answer", 42}, {"pi", 3.14}, {"string", "foobar"}}; -// if the inline-table format length is less than 80, the table will be inlined -std::cout << toml::make_inline(80) << table << std::endl; -// In any case, the table will be inlined. -std::cout << toml::forceinline << table << std::endl; -``` diff --git a/README.md b/README.md index ab950b6..d26cc2e 100644 --- a/README.md +++ b/README.md @@ -423,6 +423,22 @@ int i = 0; toml::from_toml(i, data.at("something")); ``` +### visiting toml::value + +TOML v2.1.0+ provides `toml::visit` to apply a function to `toml::value` in the +same way as `std::variant`. + +```cpp +const toml::value v(3.14); +toml::visit([](const auto& val) -> void { + std::cout << val << std::endl; + }, v); +``` + +The function object that would be passed to `toml::visit` must be able to +recieve all the possible TOML types. Also, the result types should be the same +each other. + ### Sanitizing UTF-8 codepoints toml11 shows warning if a value of an escape sequence used @@ -498,6 +514,97 @@ you will get an error message like this. | ~~ maximum number here ``` +### Serializing TOML data + +toml11 v2.1.0 enables you to serialize data into toml format. + +```cpp +const auto data = toml::table{{"foo", 42}, {"bar", "baz"}}; + +const std::string serial = toml::format(data); +assert(serial == "bar = \"baz\"\nfoo = 42"); + +std::cout << data << std::endl; +// bar = "baz" +// foo = 42 +``` + +toml11 automatically makes a tiny table and array inline. +You can specify the width to make them inline by `std::setw` for streams. + +```cpp +const auto data = toml::table{ + {"qux", toml::table{{"foo", 42}, {"bar", "baz"}}}, + {"quux", toml::array{"small", "array", "of", "strings"}}, + {"foobar", toml::array{"this", "array", "of", "strings", "is", "too", "long", + "to", "print", "into", "single", "line", "isn't", "it?"}}, +}; + +// the threshold becomes 80. +std::cout << std::setw(80) << data << std::endl; +// foobar = [ +// "this","array","of","strings","is","too","long","to","print","into", +// "single","line","isn't","it?", +// ] +// quux = ["small","array","of","strings"] +// qux = {bar="baz",foo=42} + + +// the width is 0. nothing become inline. +std::cout << std::setw(0) << data << std::endl; +// foobar = [ +// "this", +// ... (snip) +// "it?", +// ] +// quux = [ +// "small", +// "array", +// "of", +// "strings", +// ] +// [qux] +// bar = "baz" +// foo = 42 +``` + +It is recommended to set width before printing data. Some I/O functions changes +width to 0, and it makes all the stuff (including `toml::array`) multiline. +The resulting files becomes too long. + +`toml::format` receives optional second argument to set the width. +By default, it is 80. + +```cpp +const auto data = toml::table{ + {"qux", toml::table{{"foo", 42}, {"bar", "baz"}}} +}; + +const std::string serial = toml::format(data, /*width = */ 0); +// [qux] +// bar = "baz" +// foo = 42 +``` + +To control the precision of floating point numbers, you need to pass +`std::setprecision` to stream or pass `int` to the optional third argument of +`toml::format` (by default, it is `std::numeric_limits::max_digit10`). + +```cpp +const auto data = toml::table{ + {"pi", 3.141592653589793}, + {"e", 2.718281828459045} +}; +std::cout << std::setprecision(17) << data << std::endl; +// e = 2.7182818284590451 +// pi = 3.1415926535897931 +std::cout << std::setprecision( 7) << data << std::endl; +// e = 2.718282 +// pi = 3.141593 + +const std::string serial = toml::format(data, /*width = */ 0, /*prec = */ 17); +``` + ## Underlying types The toml types (can be used as `toml::*` in this library) and corresponding `enum` names are listed in the table below. diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 2e12fe9..0664150 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -25,6 +25,7 @@ set(TEST_NAMES test_to_toml test_from_toml test_parse_file + test_serialize_file test_parse_unicode test_error_detection ) diff --git a/tests/test_serialize_file.cpp b/tests/test_serialize_file.cpp new file mode 100644 index 0000000..0f6e546 --- /dev/null +++ b/tests/test_serialize_file.cpp @@ -0,0 +1,54 @@ +#define BOOST_TEST_MODULE "test_serialize_file" +#ifdef UNITTEST_FRAMEWORK_LIBRARY_EXIST +#include +#else +#define BOOST_TEST_NO_LIB +#include +#endif +#include +#include +#include + + +BOOST_AUTO_TEST_CASE(test_example) +{ + const auto data = toml::parse("toml/tests/example.toml"); + { + std::ofstream ofs("tmp1.toml"); + ofs << std::setw(80) << data; + } + + auto serialized = toml::parse("tmp1.toml"); + { + auto& owner = toml::get(serialized.at("owner")); + auto& bio = toml::get(owner.at("bio")); + const auto CR = std::find(bio.begin(), bio.end(), '\r'); + if(CR != bio.end()) + { + bio.erase(CR); + } + } + BOOST_CHECK(data == serialized); +} + +BOOST_AUTO_TEST_CASE(test_fruit) +{ + const auto data = toml::parse("toml/tests/fruit.toml"); + { + std::ofstream ofs("tmp2.toml"); + ofs << std::setw(80) << data; + } + const auto serialized = toml::parse("tmp2.toml"); + BOOST_CHECK(data == serialized); +} + +BOOST_AUTO_TEST_CASE(test_hard_example) +{ + const auto data = toml::parse("toml/tests/hard_example.toml"); + { + std::ofstream ofs("tmp3.toml"); + ofs << std::setw(80) << data; + } + const auto serialized = toml::parse("tmp3.toml"); + BOOST_CHECK(data == serialized); +} diff --git a/toml.hpp b/toml.hpp index cfae56b..80e62e2 100644 --- a/toml.hpp +++ b/toml.hpp @@ -34,6 +34,7 @@ #endif #include "toml/parser.hpp" +#include "toml/serializer.hpp" #include "toml/to_toml.hpp" #include "toml/from_toml.hpp" #include "toml/get.hpp" diff --git a/toml/serializer.hpp b/toml/serializer.hpp new file mode 100644 index 0000000..1944ead --- /dev/null +++ b/toml/serializer.hpp @@ -0,0 +1,511 @@ +// Copyright Toru Niina 2019. +// Distributed under the MIT License. +#ifndef TOML11_SERIALIZER_HPP +#define TOML11_SERIALIZER_HPP +#include "value.hpp" +#include "lexer.hpp" +#include + +namespace toml +{ + +struct serializer +{ + serializer(const std::size_t w = 80, + const int float_prec = std::numeric_limits::max_digits10, + const bool can_be_inlined = false, + std::vector ks = {}) + : can_be_inlined_(can_be_inlined), float_prec_(float_prec), width_(w), + keys_(std::move(ks)) + {} + ~serializer() = default; + + std::string operator()(const toml::boolean& b) const + { + return b ? "true" : "false"; + } + std::string operator()(const integer i) const + { + return std::to_string(i); + } + std::string operator()(const toml::floating f) const + { + std::string token = [=] { + // every float value needs decimal point (or exponent). + std::ostringstream oss; + oss << std::setprecision(float_prec_) << std::showpoint << f; + return oss.str(); + }(); + + if(token.back() == '.') // 1. => 1.0 + { + token += '0'; + } + const auto e = std::find_if(token.cbegin(), token.cend(), + [](const char c) -> bool { + return c == 'E' || c == 'e'; + }); + if(e == token.cend()) + { + return token; // there is no exponent part. just return it. + } + + // zero-prefix in an exponent is not allowed in TOML. + // remove it if it exists. + bool sign_exists = false; + std::size_t zero_prefix = 0; + for(auto iter = std::next(e), iend = token.cend(); iter != iend; ++iter) + { + if(*iter == '+' || *iter == '-'){sign_exists = true; continue;} + if(*iter == '0'){zero_prefix += 1;} + else {break;} + } + if(zero_prefix != 0) + { + const auto offset = std::distance(token.cbegin(), e) + + (sign_exists ? 2 : 1); + token.erase(offset, zero_prefix); + } + return token; + } + std::string operator()(const string& s) const + { + if(s.kind == string_t::basic) + { + if(std::find(s.str.cbegin(), s.str.cend(), '\n') != s.str.cend()) + { + // if linefeed is contained, make it multiline-string. + const std::string open("\"\"\"\n"); + const std::string close("\\\n\"\"\""); + return open + this->escape_ml_basic_string(s.str) + close; + } + + // no linefeed. try to make it oneline-string. + std::string oneline = this->escape_basic_string(s.str); + if(oneline.size() + 2 < width_ || width_ < 2) + { + const std::string quote("\""); + return quote + oneline + quote; + } + + // the line is too long compared to the specified width. + // split it into multiple lines. + std::string token("\"\"\"\n"); + while(!oneline.empty()) + { + if(oneline.size() < width_) + { + token += oneline; + oneline.clear(); + } + else if(oneline.at(width_-2) == '\\') + { + token += oneline.substr(0, width_-2); + token += "\\\n"; + oneline.erase(0, width_-2); + } + else + { + token += oneline.substr(0, width_-1); + token += "\\\n"; + oneline.erase(0, width_-1); + } + } + return token + std::string("\\\n\"\"\""); + } + else // the string `s` is literal-string. + { + if(std::find(s.str.cbegin(), s.str.cend(), '\n') != s.str.cend() || + std::find(s.str.cbegin(), s.str.cend(), '\'') != s.str.cend() ) + { + const std::string open("'''\n"); + const std::string close("'''"); + return open + s.str + close; + } + else + { + const std::string quote("'"); + return quote + s.str + quote; + } + } + } + + std::string operator()(const local_date& d) const + { + std::ostringstream oss; + oss << d; + return oss.str(); + } + std::string operator()(const local_time& t) const + { + std::ostringstream oss; + oss << t; + return oss.str(); + } + std::string operator()(const local_datetime& dt) const + { + std::ostringstream oss; + oss << dt; + return oss.str(); + } + std::string operator()(const offset_datetime& odt) const + { + std::ostringstream oss; + oss << odt; + return oss.str(); + } + + std::string operator()(const array& v) const + { + if(!v.empty() && v.front().is(value_t::Table))// v is an array of tables + { + // if it's not inlined, we need to add `[[table.key]]`. + // but if it can be inlined, we need `table.key = [...]`. + if(this->can_be_inlined_) + { + std::string token; + if(!keys_.empty()) + { + token += this->serialize_key(keys_.back()); + token += " = "; + } + bool width_exceeds = false; + token += "[\n"; + for(const auto& item : v) + { + const auto t = + this->make_inline_table(item.cast()); + + if(t.size() + 1 > width_ || // +1 for the last comma {...}, + std::find(t.cbegin(), t.cend(), '\n') != t.cend()) + { + width_exceeds = true; + break; + } + token += t; + token += ",\n"; + } + if(!width_exceeds) + { + token += "]\n"; + return token; + } + // if width_exceeds, serialize it as [[array.of.tables]]. + } + + std::string token; + for(const auto& item : v) + { + token += "[["; + token += this->serialize_dotted_key(keys_); + token += "]]\n"; + token += this->make_multiline_table(item.cast()); + } + return token; + } + if(v.empty()) + { + return std::string("[]"); + } + + // not an array of tables. normal array. first, try to make it inline. + { + const auto inl = this->make_inline_array(v); + if(inl.size() < this->width_ && + std::find(inl.cbegin(), inl.cend(), '\n') == inl.cend()) + { + return inl; + } + } + + // if the length exceeds this->width_, print multiline array + std::string token; + std::string current_line; + token += "[\n"; + for(const auto& item : v) + { + auto next_elem = toml::visit(*this, item); + // newline between array-value and comma is not allowed + if(next_elem.back() == '\n'){next_elem.pop_back();} + + if(current_line.size() + next_elem.size() + 1 < this->width_) + { + current_line += next_elem; + current_line += ','; + } + else if(current_line.empty()) + { + // the next elem cannot be within the width. + token += next_elem; + token += ",\n"; + // keep current line empty + } + else // current_line has some tokens and it exceeds width + { + assert(current_line.back() == ','); + token += current_line; + token += '\n'; + current_line = next_elem; + current_line += ','; + } + } + if(!current_line.empty()) + { + if(current_line.back() != '\n') {current_line += '\n';} + token += current_line; + } + token += "]\n"; + return token; + } + + std::string operator()(const table& v) const + { + if(this->can_be_inlined_) + { + std::string token; + if(!this->keys_.empty()) + { + token += this->serialize_key(this->keys_.back()); + token += " = "; + } + token += this->make_inline_table(v); + if(token.size() < this->width_) + { + return token; + } + } + + std::string token; + if(!keys_.empty()) + { + token += '['; + token += this->serialize_dotted_key(keys_); + token += "]\n"; + } + token += this->make_multiline_table(v); + return token; + } + + private: + + std::string serialize_key(const toml::key& key) const + { + detail::location loc(key, key); + detail::lex_unquoted_key::invoke(loc); + if(loc.iter() == loc.end()) + { + return key; // all the tokens are consumed. the key is unquoted-key. + } + std::string token("\""); + token += this->escape_basic_string(key); + token += "\""; + return token; + } + + std::string serialize_dotted_key(const std::vector& keys) const + { + std::string token; + if(keys.empty()){return token;} + + for(const auto& k : keys) + { + token += this->serialize_key(k); + token += '.'; + } + token.erase(token.size() - 1, 1); // remove trailing `.` + return token; + } + + std::string escape_basic_string(const std::string& s) const + { + //XXX assuming `s` is a valid utf-8 sequence. + std::string retval; + for(const char c : s) + { + switch(c) + { + case '\\': {retval += "\\\\"; break;} + case '\"': {retval += "\\\""; break;} + case '\b': {retval += "\\b"; break;} + case '\t': {retval += "\\t"; break;} + case '\f': {retval += "\\f"; break;} + case '\n': {retval += "\\n"; break;} + case '\r': {retval += "\\r"; break;} + default : {retval += c; break;} + } + } + return retval; + } + + std::string escape_ml_basic_string(const std::string& s) const + { + std::string retval; + for(auto i=s.cbegin(), e=s.cend(); i!=e; ++i) + { + switch(*i) + { + case '\\': {retval += "\\\\"; break;} + case '\"': {retval += "\\\""; break;} + case '\b': {retval += "\\b"; break;} + case '\t': {retval += "\\t"; break;} + case '\f': {retval += "\\f"; break;} + case '\n': {retval += "\n"; break;} + case '\r': + { + if(std::next(i) != e && *std::next(i) == '\n') + { + retval += "\r\n"; + ++i; + } + else + { + retval += "\\r"; + } + break; + } + default: {retval += *i; break;} + } + } + return retval; + } + + std::string make_inline_array(const array& v) const + { + std::string token; + token += '['; + bool is_first = true; + for(const auto& item : v) + { + if(is_first) {is_first = false;} else {token += ',';} + token += visit(serializer(std::numeric_limits::max(), + this->float_prec_, true), item); + } + token += ']'; + return token; + } + + std::string make_inline_table(const table& v) const + { + assert(this->can_be_inlined_); + std::string token; + token += '{'; + bool is_first = true; + for(const auto& kv : v) + { + // in inline tables, trailing comma is not allowed (toml-lang #569). + if(is_first) {is_first = false;} else {token += ',';} + token += this->serialize_key(kv.first); + token += '='; + token += visit(serializer(std::numeric_limits::max(), + this->float_prec_, true), kv.second); + } + token += '}'; + return token; + } + + std::string make_multiline_table(const table& v) const + { + std::string token; + + // print non-table stuff first. because after printing [foo.bar], the + // remaining non-table values will be assigned into [foo.bar], not [foo] + for(const auto kv : v) + { + if(kv.second.is(value_t::Table) || is_array_of_tables(kv.second)) + { + continue; + } + + const auto key_and_sep = this->serialize_key(kv.first) + " = "; + const auto residual_width = (this->width_ > key_and_sep.size()) ? + this->width_ - key_and_sep.size() : 0; + token += key_and_sep; + token += visit(serializer(residual_width, this->float_prec_, true), + kv.second); + if(token.back() != '\n') + { + token += '\n'; + } + } + + // normal tables / array of tables + + // after multiline table appeared, the other tables cannot be inline + // because the table would be assigned into the table. + // [foo] + // ... + // bar = {...} # <- bar will be a member of [foo]. + bool multiline_table_printed = false; + for(const auto& kv : v) + { + if(!kv.second.is(value_t::Table) && !is_array_of_tables(kv.second)) + { + continue; // other stuff are already serialized. skip them. + } + + std::vector ks(this->keys_); + ks.push_back(kv.first); + + auto tmp = visit(serializer( + this->width_, this->float_prec_, !multiline_table_printed, ks), + kv.second); + + if((!multiline_table_printed) && + std::find(tmp.cbegin(), tmp.cend(), '\n') != tmp.cend()) + { + multiline_table_printed = true; + } + else + { + // still inline tables only. + tmp += '\n'; + } + token += tmp; + } + return token; + } + + bool is_array_of_tables(const value& v) const + { + if(!v.is(value_t::Array)) {return false;} + + const auto& a = v.cast(); + return !a.empty() && a.front().is(value_t::Table); + } + + private: + + bool can_be_inlined_; + int float_prec_; + std::size_t width_; + std::vector keys_; +}; + +inline std::string +format(const value& v, std::size_t w = 80, + int fprec = std::numeric_limits::max_digits10) +{ + return visit(serializer(w, fprec, true), v); +} +inline std::string +format(const table& t, std::size_t w = 80, + int fprec = std::numeric_limits::max_digits10) +{ + return serializer(w, fprec, true)(t); +} + +template +std::basic_ostream& +operator<<(std::basic_ostream& os, const value& v) +{ + // get status of std::setw(). + const std::size_t w = os.width(); + const int fprec = os.precision(); + + // the root object can't be an inline table. so pass `false`. + os << visit(serializer(w, fprec, false), v); + return os; +} + +} // toml +#endif// TOML11_SERIALIZER_HPP diff --git a/toml/value.hpp b/toml/value.hpp index 4241f4e..d71813c 100644 --- a/toml/value.hpp +++ b/toml/value.hpp @@ -820,5 +820,77 @@ inline std::string format_error(const std::string& err_msg, std::move(hints)); } +template +detail::return_type_of_t +visit(Visitor&& visitor, const toml::value& v) +{ + switch(v.type()) + { + case value_t::Boolean : {return visitor(v.cast());} + case value_t::Integer : {return visitor(v.cast());} + case value_t::Float : {return visitor(v.cast());} + case value_t::String : {return visitor(v.cast());} + case value_t::OffsetDatetime: {return visitor(v.cast());} + case value_t::LocalDatetime : {return visitor(v.cast());} + case value_t::LocalDate : {return visitor(v.cast());} + case value_t::LocalTime : {return visitor(v.cast());} + case value_t::Array : {return visitor(v.cast());} + case value_t::Table : {return visitor(v.cast());} + case value_t::Empty : break; + case value_t::Unknown : break; + default: break; + } + throw std::runtime_error(format_error("[error] toml::visit: toml::value " + "does not have any valid value.", v, "here")); +} + +template +detail::return_type_of_t +visit(Visitor&& visitor, toml::value& v) +{ + switch(v.type()) + { + case value_t::Boolean : {return visitor(v.cast());} + case value_t::Integer : {return visitor(v.cast());} + case value_t::Float : {return visitor(v.cast());} + case value_t::String : {return visitor(v.cast());} + case value_t::OffsetDatetime: {return visitor(v.cast());} + case value_t::LocalDatetime : {return visitor(v.cast());} + case value_t::LocalDate : {return visitor(v.cast());} + case value_t::LocalTime : {return visitor(v.cast());} + case value_t::Array : {return visitor(v.cast());} + case value_t::Table : {return visitor(v.cast());} + case value_t::Empty : break; + case value_t::Unknown : break; + default: break; + } + throw std::runtime_error(format_error("[error] toml::visit: toml::value " + "does not have any valid value.", v, "here")); +} + +template +detail::return_type_of_t +visit(Visitor&& visitor, toml::value&& v) +{ + switch(v.type()) + { + case value_t::Boolean : {return visitor(std::move(v.cast()));} + case value_t::Integer : {return visitor(std::move(v.cast()));} + case value_t::Float : {return visitor(std::move(v.cast()));} + case value_t::String : {return visitor(std::move(v.cast()));} + case value_t::OffsetDatetime: {return visitor(std::move(v.cast()));} + case value_t::LocalDatetime : {return visitor(std::move(v.cast()));} + case value_t::LocalDate : {return visitor(std::move(v.cast()));} + case value_t::LocalTime : {return visitor(std::move(v.cast()));} + case value_t::Array : {return visitor(std::move(v.cast()));} + case value_t::Table : {return visitor(std::move(v.cast()));} + case value_t::Empty : break; + case value_t::Unknown : break; + default: break; + } + throw std::runtime_error(format_error("[error] toml::visit: toml::value " + "does not have any valid value.", v, "here")); +} + }// toml #endif// TOML11_VALUE