18 #include <boost/iostreams/copy.hpp>
19 #include <boost/iostreams/filtering_stream.hpp>
20 #include <boost/iostreams/filter/bzip2.hpp>
21 #include <boost/iostreams/filter/counter.hpp>
22 #include <boost/iostreams/filter/gzip.hpp>
30 #define ERR_SWML LOG_STREAM(err, log_config)
31 #define LOG_SWML LOG_STREAM(info, log_config)
39 void debug_delete(
node*
n) {
45 int nalloc = input.
size();
48 std::istringstream stream(std::string(input.
begin(), input.
end()));
50 boost::iostreams::filtering_stream<boost::iostreams::input>
filter;
53 filter.push(boost::iostreams::bzip2_decompressor());
55 filter.push(boost::iostreams::gzip_decompressor());
60 const std::size_t chunk_size = input.
size() * 10;
62 std::vector<char> buf(chunk_size);
66 while(
filter.good() && (len =
filter.read(&buf[pos], chunk_size).gcount()) == chunk_size) {
68 throw error(
"WML document exceeded size limit during decompression");
72 buf.resize(pos + chunk_size);
77 throw error(
"failed to uncompress");
87 char* small_out =
new char[pos+1];
88 memcpy(small_out, &buf[0], pos);
96 }
catch (
const std::bad_alloc&
e) {
97 ERR_SWML <<
"ERROR: bad_alloc caught in uncompress_buffer() state "
98 << state <<
" alloc bytes " << nalloc <<
" with input: '"
99 << input <<
"' " <<
e.what();
100 throw error(
"Bad allocation request in uncompress_buffer().");
104 char* compress_buffer(
const char* input,
string_span* span,
bool bzip2)
106 int nalloc = strlen(input);
109 std::string
in(input);
111 std::istringstream istream(
in);
113 boost::iostreams::filtering_stream<boost::iostreams::output>
filter;
116 filter.push(boost::iostreams::bzip2_compressor());
118 filter.push(boost::iostreams::gzip_compressor());
121 nalloc =
in.size()*2 + 80;
122 std::vector<char> buf(nalloc);
123 boost::iostreams::array_sink out(&buf[0], buf.size());
124 filter.push(boost::iostreams::counter());
129 boost::iostreams::copy(istream,
filter, buf.size());
130 const int len =
filter.component<boost::iostreams::counter>(1)->characters();
131 assert(len < 128*1024*1024);
132 if((!
filter.eof() && !
filter.good()) || len ==
static_cast<int>(buf.size())) {
133 throw error(
"failed to compress");
141 char* small_out =
new char[len];
142 memcpy(small_out, &buf[0], len);
146 assert(*small_out == (bzip2 ?
'B' : 31));
149 }
catch (
const std::bad_alloc&
e) {
150 ERR_SWML <<
"ERROR: bad_alloc caught in compress_buffer() state "
151 << state <<
" alloc bytes " << nalloc <<
" with input: '"
152 << input <<
"' " <<
e.what();
153 throw error(
"Bad allocation request in compress_buffer().");
162 return default_value;
165 if (
operator==(
"no") ||
operator==(
"off") ||
operator==(
"false") ||
operator==(
"0") ||
operator==(
"0.0"))
173 const int buf_size = 64;
174 if(
size() >= buf_size) {
190 char* buf =
new char[
size() + 1];
204 o << std::string(
s.begin(),
s.end());
219 #pragma warning (push)
220 #pragma warning (disable: 4706)
231 throw error(
"elements nested too deep");
234 const char*&
s = *str;
236 const char*
const begin =
s;
244 throw error(
"end element unterminated");
252 const char* end = strchr(
s,
']');
254 throw error(
"unterminated element");
262 children_[list_index].second.push_back(
new node(doc,
this, str, depth+1));
276 throw error(
"did not find newline after '#'");
280 const char* end = strchr(
s,
'=');
283 throw error(
"did not find '=' after attribute");
291 throw error(
"did not find '\"' after '_'");
296 end = strchr(
s,
'\n');
298 ERR_SWML <<
"ATTR: '" << name <<
"' (((" <<
s <<
")))";
299 throw error(
"did not find end of attribute");
301 if (memchr(
s,
'"', end -
s))
302 throw error(
"found stray quotes in unquoted value");
309 while((end = strchr(end+1,
'"')) && end[1] ==
'"') {
311 #pragma warning (pop)
316 throw error(
"did not find end of attribute");
319 const char *endline = end + 1;
320 while (*endline ==
' ') ++endline;
321 if (*endline ==
'\n')
break;
324 if (*(endline++) !=
'+')
325 throw error(
"did not find newline after end of attribute");
326 if (*(endline++) !=
'\n')
327 throw error(
"did not find newline after '+'");
330 if (*endline ==
'#') {
331 endline = strchr(endline + 1,
'\n');
333 throw error(
"did not find newline after '#'");
338 while (*endline ==
'\t') ++endline;
339 if (*endline ==
'_') ++endline;
341 throw error(
"did not find quotes after '+'");
349 if(
attr_.empty() ==
false && !(
attr_.back().key < name)) {
350 ERR_SWML <<
"attributes: '" <<
attr_.back().key <<
"' < '" << name <<
"'";
351 throw error(
"attributes not in order");
356 attr_.emplace_back(name, value);
375 struct string_span_pair_comparer
381 bool operator()(
const node::attribute& a,
const string_span&
b)
const {
385 bool operator()(
const node::attribute& a,
386 const node::attribute&
b)
const {
387 return a.key <
b.key;
396 std::pair<attribute_list::const_iterator,
397 attribute_list::const_iterator> range = std::equal_range(
attr_.begin(),
attr_.end(), span, string_span_pair_comparer());
398 if(range.first != range.second) {
399 return range.first->value;
408 std::pair<attribute_list::const_iterator,
409 attribute_list::const_iterator> range = std::equal_range(
attr_.begin(),
attr_.end(), span, string_span_pair_comparer());
410 return range.first != range.second;
420 if(range.first != range.second) {
443 std::string temp = std::to_string(value);
453 if(
index > list.size()) {
473 list.push_back(
new node(*
doc_,
this));
490 if(
index >= list.size()) {
496 debug_delete(list[
index]);
497 list.erase(list.begin() +
index);
507 bool inserted =
false;
510 if(
i->child_map_index == child_map_index &&
i->child_list_index > child_list_index) {
511 i->child_list_index++;
512 }
else if(
i->child_map_index == child_map_index &&
i->child_list_index == child_list_index) {
514 i->child_list_index++;
532 if(
i->child_map_index == child_map_index &&
i->child_list_index == child_list_index) {
536 if(
i->child_map_index == child_map_index &&
i->child_list_index > child_list_index) {
537 i->child_list_index--;
543 assert(erase_count == 1);
550 if(
i->child_map_index >= child_map_index) {
551 i->child_map_index++;
560 if(
i->child_map_index == child_map_index) {
564 if(
i->child_map_index > child_map_index) {
565 i->child_map_index--;
576 #ifdef CHECK_ORDERED_CHILDREN
579 assert(
i->child_map_index <
children_.size());
580 assert(
i->child_list_index <
children_[
i->child_map_index].second.size());
585 const unsigned short child_map_index = j -
children_.begin();
586 for(child_list::const_iterator k = j->second.begin(); k != j->second.end(); ++k) {
587 const unsigned short child_list_index = k - j->second.begin();
611 if(
i->first == name) {
612 assert(
i->second.empty() ==
false);
613 return i->second.front();
623 if(
i->first == name) {
624 if(
i->second.empty()) {
627 return i->second.front();
646 if(
i->first == name) {
663 if(
i->first == name) {
674 child_map::const_iterator
i = m.begin();
675 for(;
i != m.end(); ++
i) {
676 if(
i->first ==
attr) {
687 for(;
i != m.end(); ++
i) {
688 if(
i->first ==
attr) {
714 for(attribute_list::const_iterator
i =
attr_.begin();
i !=
attr_.end(); ++
i) {
715 res +=
i->key.size() +
i->value.size() + 4;
718 std::size_t count_children = 0;
720 for(child_list::const_iterator j =
i->second.begin(); j !=
i->second.end(); ++j) {
721 res +=
i->first.size()*2 + 7;
722 res += (*j)->output_size();
740 i->value =
string_span(
i->value.begin() + offset,
i->value.size());
747 (*j)->shift_buffers(offset);
766 memcpy(buf,
i->key.begin(),
i->key.size());
770 buf +=
i->key.size();
773 memcpy(buf,
i->value.begin(),
i->value.size());
777 buf +=
i->value.size();
784 assert(
i->child_map_index <
children_.size());
785 assert(
i->child_list_index <
children_[
i->child_map_index].second.size());
795 children_[
i->child_map_index].second[
i->child_list_index]->output(buf, cache_status);
813 node& mutable_node =
const_cast<node&
>(
n);
817 assert(ptr == &v[0] + v.size());
818 return std::string(v.begin(), v.end());
824 for(attribute_list::const_iterator
i =
attr_.begin();
i !=
attr_.end(); ++
i) {
825 char* key =
i->key.duplicate();
826 char* value =
i->value.duplicate();
827 n.doc_->take_ownership_of_buffer(key);
828 n.doc_->take_ownership_of_buffer(value);
829 n.set_attr(key, value);
834 assert(
i->child_map_index <
children_.size());
835 assert(
i->child_list_index <
children_[
i->child_map_index].second.size());
836 char* buf =
children_[
i->child_map_index].first.duplicate();
837 n.doc_->take_ownership_of_buffer(buf);
838 children_[
i->child_map_index].second[
i->child_list_index]->copy_into(
n.add_child(buf));
845 const node* inserts = diff.
child(
"insert");
846 if(inserts !=
nullptr) {
847 for(attribute_list::const_iterator
i = inserts->
attr_.begin();
i != inserts->
attr_.end(); ++
i) {
848 char* name =
i->key.duplicate();
849 char* value =
i->value.duplicate();
856 const node* deletes = diff.
child(
"delete");
857 if(deletes !=
nullptr) {
858 for(attribute_list::const_iterator
i = deletes->
attr_.begin();
i != deletes->
attr_.end(); ++
i) {
861 if(range.first != range.second) {
862 attr_.erase(range.first);
868 for(child_list::const_iterator
i = child_changes.begin();
i != child_changes.end(); ++
i) {
869 const std::size_t
index = (**i)[
"index"].to_int();
870 for(child_map::const_iterator j = (*i)->children_.begin(); j != (*i)->children_.end(); ++j) {
872 for(child_list::const_iterator k = j->second.begin(); k != j->second.end(); ++k) {
875 if(index < itor->second.size()) {
876 itor->second[
index]->apply_diff(**k);
884 for(child_list::const_iterator
i = child_inserts.begin();
i != child_inserts.end(); ++
i) {
885 const std::size_t
index = (**i)[
"index"].to_int();
886 for(child_map::const_iterator j = (*i)->children_.begin(); j != (*i)->children_.end(); ++j) {
888 for(child_list::const_iterator k = j->second.begin(); k != j->second.end(); ++k) {
897 for(child_list::const_iterator
i = child_deletes.begin();
i != child_deletes.end(); ++
i) {
898 const std::size_t
index = (**i)[
"index"].to_int();
899 for(child_map::const_iterator j = (*i)->children_.begin(); j != (*i)->children_.end(); ++j) {
900 if(j->second.empty()) {
925 for(child_list::const_iterator j =
i->second.begin(); j !=
i->second.end(); ++j) {
927 res += (*j)->nchildren();
936 int res =
attr_.capacity();
938 for(child_list::const_iterator j =
i->second.begin(); j !=
i->second.end(); ++j) {
939 res += (*j)->nattributes_recursive();
948 for(
node*
n =
this;
n !=
nullptr &&
n->output_cache_.is_null() ==
false;
n =
n->parent_) {
957 root_(new
node(*this, nullptr)),
975 const char* cbuf = buf;
1000 compressed_buf_(compressed_buf),
1008 buffers_.push_back(uncompress_buffer(compressed_buf, &uncompressed_buf));
1012 root_ =
new node(*
this,
nullptr, &cbuf);
1030 debug_delete(
root_);
1037 const int len = strlen(str);
1038 char* res =
new char[len+1];
1039 memcpy(res, str, len + 1);
1060 std::vector<char*> bufs;
1066 buf =
new char[buf_size];
1067 }
catch (
const std::bad_alloc&
e) {
1068 ERR_SWML <<
"ERROR: Trying to allocate " << buf_size <<
" bytes. "
1070 throw error(
"Bad allocation request in output().");
1077 assert(buf ==
output_ + buf_size);
1105 debug_delete(
root_);
1108 std::vector<char*> new_buffers;
1113 new_buffers.push_back(*
i);
1130 assert(
root_ ==
nullptr);
1132 root_ =
new node(*
this,
nullptr, &cbuf);
1137 char* buf =
new char[strlen(
output())+1];
1139 return std::make_unique<document>(buf);
1157 debug_delete(
root_);
1183 if(head_doc ==
this) {
1199 std::ostringstream
s;
1201 int ncompressed = 0;
1202 int compressed_size = 0;
1208 int nattributes = 0;
1209 for(
document*
d = head_doc;
d !=
nullptr;
d =
d->next_) {
1211 nbuffers +=
d->buffers_.size();
1213 if(
d->compressed_buf_.is_null() ==
false) {
1215 compressed_size +=
d->compressed_buf_.size();
1220 text_size += strlen(
d->output_);
1224 nnodes += 1 +
d->root_->nchildren();
1225 nattributes +=
d->root_->nattributes_recursive();
1228 if(
d->root_ &&
d->root_->is_dirty()) {
1233 const int nodes_alloc = nnodes*(
sizeof(
node) + 12);
1234 const int attr_alloc = nattributes*(
sizeof(
string_span)*2);
1235 const int total_alloc = compressed_size + text_size + nodes_alloc + attr_alloc;
1237 s <<
"WML documents: " << ndocs <<
"\n"
1238 <<
"Dirty: " << ndirty <<
"\n"
1239 <<
"With compression: " << ncompressed <<
" (" << compressed_size
1241 <<
"With text: " << ntext <<
" (" << text_size
1243 <<
"Nodes: " << nnodes <<
" (" << nodes_alloc <<
" bytes)\n"
1244 <<
"Attr: " << nattributes <<
" (" << attr_alloc <<
" bytes)\n"
1245 <<
"Buffers: " << nbuffers <<
"\n"
1246 <<
"Total allocation: " << total_alloc <<
" bytes\n";
void take_ownership_of_buffer(char *buffer)
std::unique_ptr< document > clone()
static std::string stats()
static std::size_t document_size_limit
std::vector< char * > buffers_
string_span output_compressed(bool bzip2=false)
const char * dup_string(const char *str)
string_span compressed_buf_
const string_span & attr(const char *key) const
void set_doc(document *doc)
void insert_ordered_child(int child_map_index, int child_list_index)
void insert_ordered_child_list(int child_map_index)
void check_ordered_children() const
void remove_child(const char *name, std::size_t index)
node(document &doc, node *parent)
int get_children(const string_span &name)
const string_span & operator[](const char *key) const
const child_list & children(const char *name) const
void apply_diff(const node &diff)
bool has_attr(const char *key) const
node & set_attr_int(const char *key, int value)
std::vector< child_pair > child_map
node * child(const char *name)
std::vector< node * > child_list
void remove_ordered_child_list(int child_map_index)
int nattributes_recursive() const
std::vector< node_pos > ordered_children_
const string_span & first_child() const
void output(char *&buf, CACHE_STATUS status=DO_NOT_MODIFY_CACHE)
node & add_child(const char *name)
node & child_or_add(const char *name)
node & set_attr(const char *key, const char *value)
string_span output_cache_
node & add_child_at(const char *name, std::size_t index)
void copy_into(node &n) const
node & set_attr_dup(const char *key, const char *value)
void remove_ordered_child(int child_map_index, int child_list_index)
void shift_buffers(ptrdiff_t offset)
static child_map::const_iterator find_in_map(const child_map &m, const string_span &attr)
bool to_bool(bool default_value=false) const
std::string to_string() const
const char * begin() const
unsigned in
If equal to search_counter, the node is off the list.
Standard logging facilities (interface).
void swap(document &lhs, document &rhs)
Implement non-member swap function for std::swap (calls document::swap).
std::ostream & operator<<(std::ostream &o, const string_span &s)
std::string node_to_string(const node &n)
std::size_t index(std::string_view str, const std::size_t index)
Codepoint index corresponding to the nth character in a UTF-8 string.
std::string get_unknown_exception_type()
Utility function for finding the type of thing caught with catch(...).
std::string::const_iterator iterator
static void msg(const char *act, debug_info &i, const char *to="", const char *result="")
static lg::log_domain log_config("config")
static map_location::direction n
static map_location::direction s