Merge pull request #26 from ToruNiina/serialize

add serializer
This commit is contained in:
Toru Niina
2019-02-16 23:27:05 +09:00
committed by GitHub
7 changed files with 746 additions and 59 deletions

View File

@@ -1,59 +0,0 @@
### encoding user's data
You can encode your data to toml format.
```cpp
const toml::value integer(1);
const toml::value array{3.1, 3.14, 3.141, 3.1415};
const toml::value table{{"answer", 42}, {"pi", 3.14}, {"string", "foobar"}};
std::cout << toml::format("integer", integer) << std::endl;
std::cout << toml::format("array", array) << std::endl;
std::cout << toml::format("table", table) << std::endl;
```
this program will output as below.
```toml
integer = 1
array = [3.1, 3.14, 3.141, 3.1415]
[table]
answer = 42
pi = 3.14
string = "foobar"
```
Without key name, you can make string formatted as toml.
```cpp
const std::string integer_ = toml::format(integer); // "1"
const std::string array_ = toml::format(array); // "[3.1, 3.14, 3.141, 3.1415]"
const std::string table_ = toml::format(table); // "answer = 42\npi=3.14\nstring=foobar"
```
### inlinize
You can make `toml::Table` inline.
```cpp
const toml::value table{{"answer", 42}, {"pi", 3.14}, {"string", "foobar"}};
// if the inline-table format length is less than 80, the table will be inlined
std::cout << toml::format("table", table, toml::make_inline(80)) << std::endl;
// In any case, the table will be inlined.
std::cout << toml::format("table", table, toml::forceinline) << std::endl;
```
```toml
table = {answer = 42, pi = 3.14, string = "foobar"}
```
And there are some stream manipulators for toml format.
```cpp
const toml::value table{{"answer", 42}, {"pi", 3.14}, {"string", "foobar"}};
// if the inline-table format length is less than 80, the table will be inlined
std::cout << toml::make_inline(80) << table << std::endl;
// In any case, the table will be inlined.
std::cout << toml::forceinline << table << std::endl;
```

107
README.md
View File

@@ -423,6 +423,22 @@ int i = 0;
toml::from_toml(i, data.at("something"));
```
### visiting toml::value
TOML v2.1.0+ provides `toml::visit` to apply a function to `toml::value` in the
same way as `std::variant`.
```cpp
const toml::value v(3.14);
toml::visit([](const auto& val) -> void {
std::cout << val << std::endl;
}, v);
```
The function object that would be passed to `toml::visit` must be able to
recieve all the possible TOML types. Also, the result types should be the same
each other.
### Sanitizing UTF-8 codepoints
toml11 shows warning if a value of an escape sequence used
@@ -498,6 +514,97 @@ you will get an error message like this.
| ~~ maximum number here
```
### Serializing TOML data
toml11 v2.1.0 enables you to serialize data into toml format.
```cpp
const auto data = toml::table{{"foo", 42}, {"bar", "baz"}};
const std::string serial = toml::format(data);
assert(serial == "bar = \"baz\"\nfoo = 42");
std::cout << data << std::endl;
// bar = "baz"
// foo = 42
```
toml11 automatically makes a tiny table and array inline.
You can specify the width to make them inline by `std::setw` for streams.
```cpp
const auto data = toml::table{
{"qux", toml::table{{"foo", 42}, {"bar", "baz"}}},
{"quux", toml::array{"small", "array", "of", "strings"}},
{"foobar", toml::array{"this", "array", "of", "strings", "is", "too", "long",
"to", "print", "into", "single", "line", "isn't", "it?"}},
};
// the threshold becomes 80.
std::cout << std::setw(80) << data << std::endl;
// foobar = [
// "this","array","of","strings","is","too","long","to","print","into",
// "single","line","isn't","it?",
// ]
// quux = ["small","array","of","strings"]
// qux = {bar="baz",foo=42}
// the width is 0. nothing become inline.
std::cout << std::setw(0) << data << std::endl;
// foobar = [
// "this",
// ... (snip)
// "it?",
// ]
// quux = [
// "small",
// "array",
// "of",
// "strings",
// ]
// [qux]
// bar = "baz"
// foo = 42
```
It is recommended to set width before printing data. Some I/O functions changes
width to 0, and it makes all the stuff (including `toml::array`) multiline.
The resulting files becomes too long.
`toml::format` receives optional second argument to set the width.
By default, it is 80.
```cpp
const auto data = toml::table{
{"qux", toml::table{{"foo", 42}, {"bar", "baz"}}}
};
const std::string serial = toml::format(data, /*width = */ 0);
// [qux]
// bar = "baz"
// foo = 42
```
To control the precision of floating point numbers, you need to pass
`std::setprecision` to stream or pass `int` to the optional third argument of
`toml::format` (by default, it is `std::numeric_limits<double>::max_digit10`).
```cpp
const auto data = toml::table{
{"pi", 3.141592653589793},
{"e", 2.718281828459045}
};
std::cout << std::setprecision(17) << data << std::endl;
// e = 2.7182818284590451
// pi = 3.1415926535897931
std::cout << std::setprecision( 7) << data << std::endl;
// e = 2.718282
// pi = 3.141593
const std::string serial = toml::format(data, /*width = */ 0, /*prec = */ 17);
```
## Underlying types
The toml types (can be used as `toml::*` in this library) and corresponding `enum` names are listed in the table below.

View File

@@ -25,6 +25,7 @@ set(TEST_NAMES
test_to_toml
test_from_toml
test_parse_file
test_serialize_file
test_parse_unicode
test_error_detection
)

View File

@@ -0,0 +1,54 @@
#define BOOST_TEST_MODULE "test_serialize_file"
#ifdef UNITTEST_FRAMEWORK_LIBRARY_EXIST
#include <boost/test/unit_test.hpp>
#else
#define BOOST_TEST_NO_LIB
#include <boost/test/included/unit_test.hpp>
#endif
#include <toml.hpp>
#include <iostream>
#include <fstream>
BOOST_AUTO_TEST_CASE(test_example)
{
const auto data = toml::parse("toml/tests/example.toml");
{
std::ofstream ofs("tmp1.toml");
ofs << std::setw(80) << data;
}
auto serialized = toml::parse("tmp1.toml");
{
auto& owner = toml::get<toml::table>(serialized.at("owner"));
auto& bio = toml::get<std::string>(owner.at("bio"));
const auto CR = std::find(bio.begin(), bio.end(), '\r');
if(CR != bio.end())
{
bio.erase(CR);
}
}
BOOST_CHECK(data == serialized);
}
BOOST_AUTO_TEST_CASE(test_fruit)
{
const auto data = toml::parse("toml/tests/fruit.toml");
{
std::ofstream ofs("tmp2.toml");
ofs << std::setw(80) << data;
}
const auto serialized = toml::parse("tmp2.toml");
BOOST_CHECK(data == serialized);
}
BOOST_AUTO_TEST_CASE(test_hard_example)
{
const auto data = toml::parse("toml/tests/hard_example.toml");
{
std::ofstream ofs("tmp3.toml");
ofs << std::setw(80) << data;
}
const auto serialized = toml::parse("tmp3.toml");
BOOST_CHECK(data == serialized);
}

View File

@@ -34,6 +34,7 @@
#endif
#include "toml/parser.hpp"
#include "toml/serializer.hpp"
#include "toml/to_toml.hpp"
#include "toml/from_toml.hpp"
#include "toml/get.hpp"

511
toml/serializer.hpp Normal file
View File

@@ -0,0 +1,511 @@
// Copyright Toru Niina 2019.
// Distributed under the MIT License.
#ifndef TOML11_SERIALIZER_HPP
#define TOML11_SERIALIZER_HPP
#include "value.hpp"
#include "lexer.hpp"
#include <limits>
namespace toml
{
struct serializer
{
serializer(const std::size_t w = 80,
const int float_prec = std::numeric_limits<toml::floating>::max_digits10,
const bool can_be_inlined = false,
std::vector<toml::key> ks = {})
: can_be_inlined_(can_be_inlined), float_prec_(float_prec), width_(w),
keys_(std::move(ks))
{}
~serializer() = default;
std::string operator()(const toml::boolean& b) const
{
return b ? "true" : "false";
}
std::string operator()(const integer i) const
{
return std::to_string(i);
}
std::string operator()(const toml::floating f) const
{
std::string token = [=] {
// every float value needs decimal point (or exponent).
std::ostringstream oss;
oss << std::setprecision(float_prec_) << std::showpoint << f;
return oss.str();
}();
if(token.back() == '.') // 1. => 1.0
{
token += '0';
}
const auto e = std::find_if(token.cbegin(), token.cend(),
[](const char c) -> bool {
return c == 'E' || c == 'e';
});
if(e == token.cend())
{
return token; // there is no exponent part. just return it.
}
// zero-prefix in an exponent is not allowed in TOML.
// remove it if it exists.
bool sign_exists = false;
std::size_t zero_prefix = 0;
for(auto iter = std::next(e), iend = token.cend(); iter != iend; ++iter)
{
if(*iter == '+' || *iter == '-'){sign_exists = true; continue;}
if(*iter == '0'){zero_prefix += 1;}
else {break;}
}
if(zero_prefix != 0)
{
const auto offset = std::distance(token.cbegin(), e) +
(sign_exists ? 2 : 1);
token.erase(offset, zero_prefix);
}
return token;
}
std::string operator()(const string& s) const
{
if(s.kind == string_t::basic)
{
if(std::find(s.str.cbegin(), s.str.cend(), '\n') != s.str.cend())
{
// if linefeed is contained, make it multiline-string.
const std::string open("\"\"\"\n");
const std::string close("\\\n\"\"\"");
return open + this->escape_ml_basic_string(s.str) + close;
}
// no linefeed. try to make it oneline-string.
std::string oneline = this->escape_basic_string(s.str);
if(oneline.size() + 2 < width_ || width_ < 2)
{
const std::string quote("\"");
return quote + oneline + quote;
}
// the line is too long compared to the specified width.
// split it into multiple lines.
std::string token("\"\"\"\n");
while(!oneline.empty())
{
if(oneline.size() < width_)
{
token += oneline;
oneline.clear();
}
else if(oneline.at(width_-2) == '\\')
{
token += oneline.substr(0, width_-2);
token += "\\\n";
oneline.erase(0, width_-2);
}
else
{
token += oneline.substr(0, width_-1);
token += "\\\n";
oneline.erase(0, width_-1);
}
}
return token + std::string("\\\n\"\"\"");
}
else // the string `s` is literal-string.
{
if(std::find(s.str.cbegin(), s.str.cend(), '\n') != s.str.cend() ||
std::find(s.str.cbegin(), s.str.cend(), '\'') != s.str.cend() )
{
const std::string open("'''\n");
const std::string close("'''");
return open + s.str + close;
}
else
{
const std::string quote("'");
return quote + s.str + quote;
}
}
}
std::string operator()(const local_date& d) const
{
std::ostringstream oss;
oss << d;
return oss.str();
}
std::string operator()(const local_time& t) const
{
std::ostringstream oss;
oss << t;
return oss.str();
}
std::string operator()(const local_datetime& dt) const
{
std::ostringstream oss;
oss << dt;
return oss.str();
}
std::string operator()(const offset_datetime& odt) const
{
std::ostringstream oss;
oss << odt;
return oss.str();
}
std::string operator()(const array& v) const
{
if(!v.empty() && v.front().is(value_t::Table))// v is an array of tables
{
// if it's not inlined, we need to add `[[table.key]]`.
// but if it can be inlined, we need `table.key = [...]`.
if(this->can_be_inlined_)
{
std::string token;
if(!keys_.empty())
{
token += this->serialize_key(keys_.back());
token += " = ";
}
bool width_exceeds = false;
token += "[\n";
for(const auto& item : v)
{
const auto t =
this->make_inline_table(item.cast<value_t::Table>());
if(t.size() + 1 > width_ || // +1 for the last comma {...},
std::find(t.cbegin(), t.cend(), '\n') != t.cend())
{
width_exceeds = true;
break;
}
token += t;
token += ",\n";
}
if(!width_exceeds)
{
token += "]\n";
return token;
}
// if width_exceeds, serialize it as [[array.of.tables]].
}
std::string token;
for(const auto& item : v)
{
token += "[[";
token += this->serialize_dotted_key(keys_);
token += "]]\n";
token += this->make_multiline_table(item.cast<value_t::Table>());
}
return token;
}
if(v.empty())
{
return std::string("[]");
}
// not an array of tables. normal array. first, try to make it inline.
{
const auto inl = this->make_inline_array(v);
if(inl.size() < this->width_ &&
std::find(inl.cbegin(), inl.cend(), '\n') == inl.cend())
{
return inl;
}
}
// if the length exceeds this->width_, print multiline array
std::string token;
std::string current_line;
token += "[\n";
for(const auto& item : v)
{
auto next_elem = toml::visit(*this, item);
// newline between array-value and comma is not allowed
if(next_elem.back() == '\n'){next_elem.pop_back();}
if(current_line.size() + next_elem.size() + 1 < this->width_)
{
current_line += next_elem;
current_line += ',';
}
else if(current_line.empty())
{
// the next elem cannot be within the width.
token += next_elem;
token += ",\n";
// keep current line empty
}
else // current_line has some tokens and it exceeds width
{
assert(current_line.back() == ',');
token += current_line;
token += '\n';
current_line = next_elem;
current_line += ',';
}
}
if(!current_line.empty())
{
if(current_line.back() != '\n') {current_line += '\n';}
token += current_line;
}
token += "]\n";
return token;
}
std::string operator()(const table& v) const
{
if(this->can_be_inlined_)
{
std::string token;
if(!this->keys_.empty())
{
token += this->serialize_key(this->keys_.back());
token += " = ";
}
token += this->make_inline_table(v);
if(token.size() < this->width_)
{
return token;
}
}
std::string token;
if(!keys_.empty())
{
token += '[';
token += this->serialize_dotted_key(keys_);
token += "]\n";
}
token += this->make_multiline_table(v);
return token;
}
private:
std::string serialize_key(const toml::key& key) const
{
detail::location<toml::key> loc(key, key);
detail::lex_unquoted_key::invoke(loc);
if(loc.iter() == loc.end())
{
return key; // all the tokens are consumed. the key is unquoted-key.
}
std::string token("\"");
token += this->escape_basic_string(key);
token += "\"";
return token;
}
std::string serialize_dotted_key(const std::vector<toml::key>& keys) const
{
std::string token;
if(keys.empty()){return token;}
for(const auto& k : keys)
{
token += this->serialize_key(k);
token += '.';
}
token.erase(token.size() - 1, 1); // remove trailing `.`
return token;
}
std::string escape_basic_string(const std::string& s) const
{
//XXX assuming `s` is a valid utf-8 sequence.
std::string retval;
for(const char c : s)
{
switch(c)
{
case '\\': {retval += "\\\\"; break;}
case '\"': {retval += "\\\""; break;}
case '\b': {retval += "\\b"; break;}
case '\t': {retval += "\\t"; break;}
case '\f': {retval += "\\f"; break;}
case '\n': {retval += "\\n"; break;}
case '\r': {retval += "\\r"; break;}
default : {retval += c; break;}
}
}
return retval;
}
std::string escape_ml_basic_string(const std::string& s) const
{
std::string retval;
for(auto i=s.cbegin(), e=s.cend(); i!=e; ++i)
{
switch(*i)
{
case '\\': {retval += "\\\\"; break;}
case '\"': {retval += "\\\""; break;}
case '\b': {retval += "\\b"; break;}
case '\t': {retval += "\\t"; break;}
case '\f': {retval += "\\f"; break;}
case '\n': {retval += "\n"; break;}
case '\r':
{
if(std::next(i) != e && *std::next(i) == '\n')
{
retval += "\r\n";
++i;
}
else
{
retval += "\\r";
}
break;
}
default: {retval += *i; break;}
}
}
return retval;
}
std::string make_inline_array(const array& v) const
{
std::string token;
token += '[';
bool is_first = true;
for(const auto& item : v)
{
if(is_first) {is_first = false;} else {token += ',';}
token += visit(serializer(std::numeric_limits<std::size_t>::max(),
this->float_prec_, true), item);
}
token += ']';
return token;
}
std::string make_inline_table(const table& v) const
{
assert(this->can_be_inlined_);
std::string token;
token += '{';
bool is_first = true;
for(const auto& kv : v)
{
// in inline tables, trailing comma is not allowed (toml-lang #569).
if(is_first) {is_first = false;} else {token += ',';}
token += this->serialize_key(kv.first);
token += '=';
token += visit(serializer(std::numeric_limits<std::size_t>::max(),
this->float_prec_, true), kv.second);
}
token += '}';
return token;
}
std::string make_multiline_table(const table& v) const
{
std::string token;
// print non-table stuff first. because after printing [foo.bar], the
// remaining non-table values will be assigned into [foo.bar], not [foo]
for(const auto kv : v)
{
if(kv.second.is(value_t::Table) || is_array_of_tables(kv.second))
{
continue;
}
const auto key_and_sep = this->serialize_key(kv.first) + " = ";
const auto residual_width = (this->width_ > key_and_sep.size()) ?
this->width_ - key_and_sep.size() : 0;
token += key_and_sep;
token += visit(serializer(residual_width, this->float_prec_, true),
kv.second);
if(token.back() != '\n')
{
token += '\n';
}
}
// normal tables / array of tables
// after multiline table appeared, the other tables cannot be inline
// because the table would be assigned into the table.
// [foo]
// ...
// bar = {...} # <- bar will be a member of [foo].
bool multiline_table_printed = false;
for(const auto& kv : v)
{
if(!kv.second.is(value_t::Table) && !is_array_of_tables(kv.second))
{
continue; // other stuff are already serialized. skip them.
}
std::vector<toml::key> ks(this->keys_);
ks.push_back(kv.first);
auto tmp = visit(serializer(
this->width_, this->float_prec_, !multiline_table_printed, ks),
kv.second);
if((!multiline_table_printed) &&
std::find(tmp.cbegin(), tmp.cend(), '\n') != tmp.cend())
{
multiline_table_printed = true;
}
else
{
// still inline tables only.
tmp += '\n';
}
token += tmp;
}
return token;
}
bool is_array_of_tables(const value& v) const
{
if(!v.is(value_t::Array)) {return false;}
const auto& a = v.cast<value_t::Array>();
return !a.empty() && a.front().is(value_t::Table);
}
private:
bool can_be_inlined_;
int float_prec_;
std::size_t width_;
std::vector<toml::key> keys_;
};
inline std::string
format(const value& v, std::size_t w = 80,
int fprec = std::numeric_limits<toml::floating>::max_digits10)
{
return visit(serializer(w, fprec, true), v);
}
inline std::string
format(const table& t, std::size_t w = 80,
int fprec = std::numeric_limits<toml::floating>::max_digits10)
{
return serializer(w, fprec, true)(t);
}
template<typename charT, typename traits>
std::basic_ostream<charT, traits>&
operator<<(std::basic_ostream<charT, traits>& os, const value& v)
{
// get status of std::setw().
const std::size_t w = os.width();
const int fprec = os.precision();
// the root object can't be an inline table. so pass `false`.
os << visit(serializer(w, fprec, false), v);
return os;
}
} // toml
#endif// TOML11_SERIALIZER_HPP

View File

@@ -820,5 +820,77 @@ inline std::string format_error(const std::string& err_msg,
std::move(hints));
}
template<typename Visitor>
detail::return_type_of_t<Visitor, const toml::boolean&>
visit(Visitor&& visitor, const toml::value& v)
{
switch(v.type())
{
case value_t::Boolean : {return visitor(v.cast<value_t::Boolean >());}
case value_t::Integer : {return visitor(v.cast<value_t::Integer >());}
case value_t::Float : {return visitor(v.cast<value_t::Float >());}
case value_t::String : {return visitor(v.cast<value_t::String >());}
case value_t::OffsetDatetime: {return visitor(v.cast<value_t::OffsetDatetime>());}
case value_t::LocalDatetime : {return visitor(v.cast<value_t::LocalDatetime >());}
case value_t::LocalDate : {return visitor(v.cast<value_t::LocalDate >());}
case value_t::LocalTime : {return visitor(v.cast<value_t::LocalTime >());}
case value_t::Array : {return visitor(v.cast<value_t::Array >());}
case value_t::Table : {return visitor(v.cast<value_t::Table >());}
case value_t::Empty : break;
case value_t::Unknown : break;
default: break;
}
throw std::runtime_error(format_error("[error] toml::visit: toml::value "
"does not have any valid value.", v, "here"));
}
template<typename Visitor>
detail::return_type_of_t<Visitor, toml::boolean&>
visit(Visitor&& visitor, toml::value& v)
{
switch(v.type())
{
case value_t::Boolean : {return visitor(v.cast<value_t::Boolean >());}
case value_t::Integer : {return visitor(v.cast<value_t::Integer >());}
case value_t::Float : {return visitor(v.cast<value_t::Float >());}
case value_t::String : {return visitor(v.cast<value_t::String >());}
case value_t::OffsetDatetime: {return visitor(v.cast<value_t::OffsetDatetime>());}
case value_t::LocalDatetime : {return visitor(v.cast<value_t::LocalDatetime >());}
case value_t::LocalDate : {return visitor(v.cast<value_t::LocalDate >());}
case value_t::LocalTime : {return visitor(v.cast<value_t::LocalTime >());}
case value_t::Array : {return visitor(v.cast<value_t::Array >());}
case value_t::Table : {return visitor(v.cast<value_t::Table >());}
case value_t::Empty : break;
case value_t::Unknown : break;
default: break;
}
throw std::runtime_error(format_error("[error] toml::visit: toml::value "
"does not have any valid value.", v, "here"));
}
template<typename Visitor>
detail::return_type_of_t<Visitor, toml::boolean&>
visit(Visitor&& visitor, toml::value&& v)
{
switch(v.type())
{
case value_t::Boolean : {return visitor(std::move(v.cast<value_t::Boolean >()));}
case value_t::Integer : {return visitor(std::move(v.cast<value_t::Integer >()));}
case value_t::Float : {return visitor(std::move(v.cast<value_t::Float >()));}
case value_t::String : {return visitor(std::move(v.cast<value_t::String >()));}
case value_t::OffsetDatetime: {return visitor(std::move(v.cast<value_t::OffsetDatetime>()));}
case value_t::LocalDatetime : {return visitor(std::move(v.cast<value_t::LocalDatetime >()));}
case value_t::LocalDate : {return visitor(std::move(v.cast<value_t::LocalDate >()));}
case value_t::LocalTime : {return visitor(std::move(v.cast<value_t::LocalTime >()));}
case value_t::Array : {return visitor(std::move(v.cast<value_t::Array >()));}
case value_t::Table : {return visitor(std::move(v.cast<value_t::Table >()));}
case value_t::Empty : break;
case value_t::Unknown : break;
default: break;
}
throw std::runtime_error(format_error("[error] toml::visit: toml::value "
"does not have any valid value.", v, "here"));
}
}// toml
#endif// TOML11_VALUE