The Battle for Wesnoth  1.19.0-dev
simple_wml.cpp
Go to the documentation of this file.
1 /*
2  Copyright (C) 2008 - 2024
3  by David White <dave@whitevine.net>
4  Part of the Battle for Wesnoth Project https://www.wesnoth.org/
5 
6  This program is free software; you can redistribute it and/or modify
7  it under the terms of the GNU General Public License as published by
8  the Free Software Foundation; either version 2 of the License, or
9  (at your option) any later version.
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY.
12 
13  See the COPYING file for more details.
14 */
15 
16 #include <sstream>
17 
18 #include <boost/iostreams/copy.hpp>
19 #include <boost/iostreams/filtering_stream.hpp>
20 #include <boost/iostreams/filter/bzip2.hpp>
21 #include <boost/iostreams/filter/counter.hpp>
22 #include <boost/iostreams/filter/gzip.hpp>
23 
25 
26 #include "log.hpp"
27 #include "utils/general.hpp"
28 
29 static lg::log_domain log_config("config");
30 #define ERR_SWML LOG_STREAM(err, log_config)
31 #define LOG_SWML LOG_STREAM(info, log_config)
32 
33 namespace simple_wml {
34 
35 std::size_t document::document_size_limit = 40000000;
36 
37 namespace {
38 
39 void debug_delete(node* n) {
40  delete n;
41 }
42 
43 char* uncompress_buffer(const string_span& input, string_span* span)
44 {
45  int nalloc = input.size();
46  int state = 0;
47  try {
48  std::istringstream stream(std::string(input.begin(), input.end()));
49  state = 1;
50  boost::iostreams::filtering_stream<boost::iostreams::input> filter;
51  state = 2;
52  if (!span->empty() && *span->begin() == 'B') {
53  filter.push(boost::iostreams::bzip2_decompressor());
54  } else {
55  filter.push(boost::iostreams::gzip_decompressor());
56  }
57  filter.push(stream);
58  state = 3;
59 
60  const std::size_t chunk_size = input.size() * 10;
61  nalloc = chunk_size;
62  std::vector<char> buf(chunk_size);
63  state = 4;
64  std::size_t len = 0;
65  std::size_t pos = 0;
66  while(filter.good() && (len = filter.read(&buf[pos], chunk_size).gcount()) == chunk_size) {
67  if(pos + chunk_size > document::document_size_limit) {
68  throw error("WML document exceeded size limit during decompression");
69  }
70 
71  pos += len;
72  buf.resize(pos + chunk_size);
73  len = 0;
74  }
75 
76  if(!filter.eof() && !filter.good()) {
77  throw error("failed to uncompress");
78  }
79 
80  pos += len;
81  state = 5;
82  nalloc = pos;
83 
84  buf.resize(pos);
85  state = 6;
86 
87  char* small_out = new char[pos+1];
88  memcpy(small_out, &buf[0], pos);
89  state = 7;
90 
91  small_out[pos] = 0;
92 
93  *span = string_span(small_out, pos);
94  state = 8;
95  return small_out;
96  } catch (const std::bad_alloc& e) {
97  ERR_SWML << "ERROR: bad_alloc caught in uncompress_buffer() state "
98  << state << " alloc bytes " << nalloc << " with input: '"
99  << input << "' " << e.what();
100  throw error("Bad allocation request in uncompress_buffer().");
101  }
102 }
103 
104 char* compress_buffer(const char* input, string_span* span, bool bzip2)
105 {
106  int nalloc = strlen(input);
107  int state = 0;
108  try {
109  std::string in(input);
110  state = 1;
111  std::istringstream istream(in);
112  state = 2;
113  boost::iostreams::filtering_stream<boost::iostreams::output> filter;
114  state = 3;
115  if (bzip2) {
116  filter.push(boost::iostreams::bzip2_compressor());
117  } else {
118  filter.push(boost::iostreams::gzip_compressor());
119  }
120  state = 4;
121  nalloc = in.size()*2 + 80;
122  std::vector<char> buf(nalloc);
123  boost::iostreams::array_sink out(&buf[0], buf.size());
124  filter.push(boost::iostreams::counter());
125  filter.push(out);
126 
127  state = 5;
128 
129  boost::iostreams::copy(istream, filter, buf.size());
130  const int len = filter.component<boost::iostreams::counter>(1)->characters();
131  assert(len < 128*1024*1024);
132  if((!filter.eof() && !filter.good()) || len == static_cast<int>(buf.size())) {
133  throw error("failed to compress");
134  }
135  state = 6;
136  nalloc = len;
137 
138  buf.resize(len);
139  state = 7;
140 
141  char* small_out = new char[len];
142  memcpy(small_out, &buf[0], len);
143  state = 8;
144 
145  *span = string_span(small_out, len);
146  assert(*small_out == (bzip2 ? 'B' : 31));
147  state = 9;
148  return small_out;
149  } catch (const std::bad_alloc& e) {
150  ERR_SWML << "ERROR: bad_alloc caught in compress_buffer() state "
151  << state << " alloc bytes " << nalloc << " with input: '"
152  << input << "' " << e.what();
153  throw error("Bad allocation request in compress_buffer().");
154  }
155 }
156 
157 } // namespace
158 
159 bool string_span::to_bool(bool default_value) const
160 {
161  if(empty()) {
162  return default_value;
163  }
164 
165  if (operator==("no") || operator==("off") || operator==("false") || operator==("0") || operator==("0.0"))
166  return false;
167 
168  return true;
169 }
170 
172 {
173  const int buf_size = 64;
174  if(size() >= buf_size) {
175  return 0;
176  }
177  char buf[64];
178  memcpy(buf, begin(), size());
179  buf[size()] = 0;
180  return atoi(buf);
181 }
182 
183 std::string string_span::to_string() const
184 {
185  return std::string(begin(), end());
186 }
187 
189 {
190  char* buf = new char[size() + 1];
191  memcpy(buf, begin(), size());
192  buf[size()] = 0;
193  return buf;
194 }
195 
196 error::error(const char* msg)
197  : game::error(msg)
198 {
199  ERR_SWML << "ERROR: '" << msg << "'";
200 }
201 
202 std::ostream& operator<<(std::ostream& o, const string_span& s)
203 {
204  o << std::string(s.begin(), s.end());
205  return o;
206 }
207 
208 node::node(document& doc, node* parent) :
209  doc_(&doc),
210  attr_(),
211  parent_(parent),
212  children_(),
213  ordered_children_(),
214  output_cache_()
215 {
216 }
217 
218 #ifdef _MSC_VER
219 #pragma warning (push)
220 #pragma warning (disable: 4706)
221 #endif
222 node::node(document& doc, node* parent, const char** str, int depth) :
223  doc_(&doc),
224  attr_(),
225  parent_(parent),
226  children_(),
227  ordered_children_(),
228  output_cache_()
229 {
230  if(depth >= 1000) {
231  throw error("elements nested too deep");
232  }
233 
234  const char*& s = *str;
235 
236  const char* const begin = s;
237  while(*s) {
238  switch(*s) {
239  case '[': {
240  if(s[1] == '/') {
241  output_cache_ = string_span(begin, s - begin);
242  s = strchr(s, ']');
243  if(s == nullptr) {
244  throw error("end element unterminated");
245  }
246 
247  ++s;
248  return;
249  }
250 
251  ++s;
252  const char* end = strchr(s, ']');
253  if(end == nullptr) {
254  throw error("unterminated element");
255  }
256 
257  const int list_index = get_children(string_span(s, end - s));
259 
260  s = end + 1;
261 
262  children_[list_index].second.push_back(new node(doc, this, str, depth+1));
263  ordered_children_.emplace_back(list_index, children_[list_index].second.size() - 1);
265 
266  break;
267  }
268  case ' ':
269  case '\t':
270  case '\n':
271  ++s;
272  break;
273  case '#':
274  s = strchr(s, '\n');
275  if(s == nullptr) {
276  throw error("did not find newline after '#'");
277  }
278  break;
279  default: {
280  const char* end = strchr(s, '=');
281  if(end == nullptr) {
282  ERR_SWML << "attribute: " << s;
283  throw error("did not find '=' after attribute");
284  }
285 
286  string_span name(s, end - s);
287  s = end + 1;
288  if(*s == '_') {
289  s = strchr(s, '"');
290  if(s == nullptr) {
291  throw error("did not find '\"' after '_'");
292  }
293  }
294 
295  if (*s != '"') {
296  end = strchr(s, '\n');
297  if (!end) {
298  ERR_SWML << "ATTR: '" << name << "' (((" << s << ")))";
299  throw error("did not find end of attribute");
300  }
301  if (memchr(s, '"', end - s))
302  throw error("found stray quotes in unquoted value");
303  goto read_attribute;
304  }
305  end = s;
306  while(true)
307  {
308  // Read until the first single double quote.
309  while((end = strchr(end+1, '"')) && end[1] == '"') {
310 #ifdef _MSC_VER
311 #pragma warning (pop)
312 #endif
313  ++end;
314  }
315  if(end == nullptr)
316  throw error("did not find end of attribute");
317 
318  // Stop if newline.
319  const char *endline = end + 1;
320  while (*endline == ' ') ++endline;
321  if (*endline == '\n') break;
322 
323  // Read concatenation marker.
324  if (*(endline++) != '+')
325  throw error("did not find newline after end of attribute");
326  if (*(endline++) != '\n')
327  throw error("did not find newline after '+'");
328 
329  // Read textdomain marker.
330  if (*endline == '#') {
331  endline = strchr(endline + 1, '\n');
332  if (!endline)
333  throw error("did not find newline after '#'");
334  ++endline;
335  }
336 
337  // Read indentation and start of string.
338  while (*endline == '\t') ++endline;
339  if (*endline == '_') ++endline;
340  if (*endline != '"')
341  throw error("did not find quotes after '+'");
342  end = endline;
343  }
344 
345  ++s;
346 
347  read_attribute:
348  string_span value(s, end - s);
349  if(attr_.empty() == false && !(attr_.back().key < name)) {
350  ERR_SWML << "attributes: '" << attr_.back().key << "' < '" << name << "'";
351  throw error("attributes not in order");
352  }
353 
354  s = end + 1;
355 
356  attr_.emplace_back(name, value);
357  }
358  }
359  }
360 
361  output_cache_ = string_span(begin, s - begin);
363 }
364 
366 {
367  for(child_map::iterator i = children_.begin(); i != children_.end(); ++i) {
368  for(child_list::iterator j = i->second.begin(); j != i->second.end(); ++j) {
369  debug_delete(*j);
370  }
371  }
372 }
373 
374 namespace {
375 struct string_span_pair_comparer
376 {
377  bool operator()(const string_span& a, const node::attribute& b) const {
378  return a < b.key;
379  }
380 
381  bool operator()(const node::attribute& a, const string_span& b) const {
382  return a.key < b;
383  }
384 
385  bool operator()(const node::attribute& a,
386  const node::attribute& b) const {
387  return a.key < b.key;
388  }
389 };
390 }
391 
392 const string_span& node::operator[](const char* key) const
393 {
394  static string_span empty("");
395  string_span span(key);
396  std::pair<attribute_list::const_iterator,
397  attribute_list::const_iterator> range = std::equal_range(attr_.begin(), attr_.end(), span, string_span_pair_comparer());
398  if(range.first != range.second) {
399  return range.first->value;
400  }
401 
402  return empty;
403 }
404 
405 bool node::has_attr(const char* key) const
406 {
407  string_span span(key);
408  std::pair<attribute_list::const_iterator,
409  attribute_list::const_iterator> range = std::equal_range(attr_.begin(), attr_.end(), span, string_span_pair_comparer());
410  return range.first != range.second;
411 }
412 
413 node& node::set_attr(const char* key, const char* value)
414 {
415  set_dirty();
416 
417  string_span span(key);
418  std::pair<attribute_list::iterator,
419  attribute_list::iterator> range = std::equal_range(attr_.begin(), attr_.end(), span, string_span_pair_comparer());
420  if(range.first != range.second) {
421  range.first->value = string_span(value);
422  } else {
423  attr_.insert(range.first, attribute(span, string_span(value)));
424  }
425 
426  return *this;
427 }
428 
429 node& node::set_attr_dup(const char* key, const char* value)
430 {
431  return set_attr(key, doc_->dup_string(value));
432 }
433 
434 node& node::set_attr_dup(const char* key, const string_span& value)
435 {
436  char* buf = value.duplicate();
438  return set_attr(key, buf);
439 }
440 
441 node& node::set_attr_int(const char* key, int value)
442 {
443  std::string temp = std::to_string(value);
444  return set_attr_dup(key, temp.c_str());
445 }
446 
447 node& node::add_child_at(const char* name, std::size_t index)
448 {
449  set_dirty();
450 
451  const int list_index = get_children(name);
452  child_list& list = children_[list_index].second;
453  if(index > list.size()) {
454  index = list.size();
455  }
456 
458  list.insert(list.begin() + index, new node(*doc_, this));
459  insert_ordered_child(list_index, index);
460 
462  return *list[index];
463 }
464 
465 
466 node& node::add_child(const char* name)
467 {
468  set_dirty();
469 
470  const int list_index = get_children(name);
472  child_list& list = children_[list_index].second;
473  list.push_back(new node(*doc_, this));
474  ordered_children_.emplace_back(list_index, list.size() - 1);
476  return *list.back();
477 }
478 
479 void node::remove_child(const string_span& name, std::size_t index)
480 {
481  set_dirty();
482 
483  //if we don't already have a vector for this item we don't want to add one.
485  if(itor == children_.end()) {
486  return;
487  }
488 
489  child_list& list = itor->second;
490  if(index >= list.size()) {
491  return;
492  }
493 
494  remove_ordered_child(std::distance(children_.begin(), itor), index);
495 
496  debug_delete(list[index]);
497  list.erase(list.begin() + index);
498 
499  if(list.empty()) {
500  remove_ordered_child_list(std::distance(children_.begin(), itor));
501  children_.erase(itor);
502  }
503 }
504 
505 void node::insert_ordered_child(int child_map_index, int child_list_index)
506 {
507  bool inserted = false;
509  while(i != ordered_children_.end()) {
510  if(i->child_map_index == child_map_index && i->child_list_index > child_list_index) {
511  i->child_list_index++;
512  } else if(i->child_map_index == child_map_index && i->child_list_index == child_list_index) {
513  inserted = true;
514  i->child_list_index++;
515  i = ordered_children_.insert(i, node_pos(child_map_index, child_list_index));
516  ++i;
517  }
518 
519  ++i;
520  }
521 
522  if(!inserted) {
523  ordered_children_.emplace_back(child_map_index, child_list_index);
524  }
525 }
526 
527 void node::remove_ordered_child(int child_map_index, int child_list_index)
528 {
529  int erase_count = 0;
531  while(i != ordered_children_.end()) {
532  if(i->child_map_index == child_map_index && i->child_list_index == child_list_index) {
533  i = ordered_children_.erase(i);
534  ++erase_count;
535  } else {
536  if(i->child_map_index == child_map_index && i->child_list_index > child_list_index) {
537  i->child_list_index--;
538  }
539  ++i;
540  }
541  }
542 
543  assert(erase_count == 1);
544 }
545 
546 void node::insert_ordered_child_list(int child_map_index)
547 {
549  while(i != ordered_children_.end()) {
550  if(i->child_map_index >= child_map_index) {
551  i->child_map_index++;
552  }
553  }
554 }
555 
556 void node::remove_ordered_child_list(int child_map_index)
557 {
559  while(i != ordered_children_.end()) {
560  if(i->child_map_index == child_map_index) {
561  assert(false);
562  i = ordered_children_.erase(i);
563  } else {
564  if(i->child_map_index > child_map_index) {
565  i->child_map_index--;
566  }
567 
568  ++i;
569  }
570  }
571 }
572 
574 {
575 // only define this symbol in debug mode to work out child ordering.
576 #ifdef CHECK_ORDERED_CHILDREN
577  std::vector<node_pos>::const_iterator i = ordered_children_.begin();
578  while(i != ordered_children_.end()) {
579  assert(i->child_map_index < children_.size());
580  assert(i->child_list_index < children_[i->child_map_index].second.size());
581  ++i;
582  }
583 
584  for(child_map::const_iterator j = children_.begin(); j != children_.end(); ++j) {
585  const unsigned short child_map_index = j - children_.begin();
586  for(child_list::const_iterator k = j->second.begin(); k != j->second.end(); ++k) {
587  const unsigned short child_list_index = k - j->second.begin();
588  bool found = false;
589  for(int n = 0; n != ordered_children_.size(); ++n) {
590  if(ordered_children_[n].child_map_index == child_map_index &&
591  ordered_children_[n].child_list_index == child_list_index) {
592  found = true;
593  break;
594  }
595  }
596 
597  assert(found);
598  }
599  }
600 #endif // CHECK_ORDERED_CHILDREN
601 }
602 
603 void node::remove_child(const char* name, std::size_t index)
604 {
606 }
607 
608 node* node::child(const char* name)
609 {
610  for(child_map::iterator i = children_.begin(); i != children_.end(); ++i) {
611  if(i->first == name) {
612  assert(i->second.empty() == false);
613  return i->second.front();
614  }
615  }
616 
617  return nullptr;
618 }
619 
620 const node* node::child(const char* name) const
621 {
622  for(child_map::const_iterator i = children_.begin(); i != children_.end(); ++i) {
623  if(i->first == name) {
624  if(i->second.empty()) {
625  return nullptr;
626  } else {
627  return i->second.front();
628  }
629  }
630  }
631 
632  return nullptr;
633 }
634 
635 const node::child_list& node::children(const char* name) const
636 {
637  for(child_map::const_iterator i = children_.begin(); i != children_.end(); ++i) {
638  if(i->first == name) {
639  return i->second;
640  }
641  }
642 
643  static const node::child_list empty;
644  return empty;
645 }
646 
647 int node::get_children(const char* name)
648 {
649  return get_children(string_span(name));
650 }
651 
653 {
654  for(child_map::iterator i = children_.begin(); i != children_.end(); ++i) {
655  if(i->first == name) {
656  return std::distance(children_.begin(), i);
657  }
658  }
659 
660  children_.emplace_back(string_span(name), child_list());
661  return children_.size() - 1;
662 }
663 
664 node::child_map::const_iterator node::find_in_map(const child_map& m, const string_span& attr)
665 {
666  child_map::const_iterator i = m.begin();
667  for(; i != m.end(); ++i) {
668  if(i->first == attr) {
669  break;
670  }
671  }
672 
673  return i;
674 }
675 
677 {
678  child_map::iterator i = m.begin();
679  for(; i != m.end(); ++i) {
680  if(i->first == attr) {
681  break;
682  }
683  }
684 
685  return i;
686 }
687 
689 {
690  if(children_.empty()) {
691  static const string_span empty;
692  return empty;
693  }
694 
695  return children_.begin()->first;
696 }
697 
698 int node::output_size() const
699 {
701  if(output_cache_.empty() == false) {
702  return output_cache_.size();
703  }
704 
705  int res = 0;
706  for(attribute_list::const_iterator i = attr_.begin(); i != attr_.end(); ++i) {
707  res += i->key.size() + i->value.size() + 4;
708  }
709 
710  std::size_t count_children = 0;
711  for(child_map::const_iterator i = children_.begin(); i != children_.end(); ++i) {
712  for(child_list::const_iterator j = i->second.begin(); j != i->second.end(); ++j) {
713  res += i->first.size()*2 + 7;
714  res += (*j)->output_size();
715  ++count_children;
716  }
717  }
718 
719  assert(count_children == ordered_children_.size());
720 
721  return res;
722 }
723 
724 void node::shift_buffers(ptrdiff_t offset)
725 {
726  if(!output_cache_.empty()) {
728  }
729 
730  for(std::vector<attribute>::iterator i = attr_.begin(); i != attr_.end(); ++i) {
731  i->key = string_span(i->key.begin() + offset, i->key.size());
732  i->value = string_span(i->value.begin() + offset, i->value.size());
733  }
734 
735  for(child_map::iterator i = children_.begin(); i != children_.end(); ++i) {
736  string_span& key = i->first;
737  key = string_span(key.begin() + offset, key.size());
738  for(child_list::iterator j = i->second.begin(); j != i->second.end(); ++j) {
739  (*j)->shift_buffers(offset);
740  }
741  }
742 }
743 
744 void node::output(char*& buf, CACHE_STATUS cache_status)
745 {
746  if(output_cache_.empty() == false) {
747  memcpy(buf, output_cache_.begin(), output_cache_.size());
748  if(cache_status == REFRESH_CACHE) {
750  }
751  buf += output_cache_.size();
752  return;
753  }
754 
755  char* begin = buf;
756 
757  for(std::vector<attribute>::iterator i = attr_.begin(); i != attr_.end(); ++i) {
758  memcpy(buf, i->key.begin(), i->key.size());
759  i->key = string_span(buf, i->key.size());
760  buf += i->key.size();
761  *buf++ = '=';
762  *buf++ = '"';
763  memcpy(buf, i->value.begin(), i->value.size());
764  i->value = string_span(buf, i->value.size());
765  buf += i->value.size();
766  *buf++ = '"';
767  *buf++ = '\n';
768  }
769 
770  for(std::vector<node_pos>::const_iterator i = ordered_children_.begin();
771  i != ordered_children_.end(); ++i) {
772  assert(i->child_map_index < children_.size());
773  assert(i->child_list_index < children_[i->child_map_index].second.size());
774  string_span& attr = children_[i->child_map_index].first;
775  *buf++ = '[';
776  memcpy(buf, attr.begin(), attr.size());
777  attr = string_span(buf, attr.size());
778  buf += attr.size();
779  *buf++ = ']';
780  *buf++ = '\n';
781  children_[i->child_map_index].second[i->child_list_index]->output(buf, cache_status);
782  *buf++ = '[';
783  *buf++ = '/';
784  memcpy(buf, attr.begin(), attr.size());
785  buf += attr.size();
786  *buf++ = ']';
787  *buf++ = '\n';
788  }
789 
790  if(cache_status == REFRESH_CACHE) {
791  output_cache_ = string_span(begin, buf - begin);
792  }
793 }
794 
795 std::string node_to_string(const node& n)
796 {
797  //calling output with status=DO_NOT_MODIFY_CACHE really doesn't modify the
798  //node, so we can do it safely
799  node& mutable_node = const_cast<node&>(n);
800  std::vector<char> v(mutable_node.output_size());
801  char* ptr = &v[0];
802  mutable_node.output(ptr, node::DO_NOT_MODIFY_CACHE);
803  assert(ptr == &v[0] + v.size());
804  return std::string(v.begin(), v.end());
805 }
806 
807 void node::copy_into(node& n) const
808 {
809  n.set_dirty();
810  for(attribute_list::const_iterator i = attr_.begin(); i != attr_.end(); ++i) {
811  char* key = i->key.duplicate();
812  char* value = i->value.duplicate();
813  n.doc_->take_ownership_of_buffer(key);
814  n.doc_->take_ownership_of_buffer(value);
815  n.set_attr(key, value);
816  }
817 
818  for(std::vector<node_pos>::const_iterator i = ordered_children_.begin();
819  i != ordered_children_.end(); ++i) {
820  assert(i->child_map_index < children_.size());
821  assert(i->child_list_index < children_[i->child_map_index].second.size());
822  char* buf = children_[i->child_map_index].first.duplicate();
823  n.doc_->take_ownership_of_buffer(buf);
824  children_[i->child_map_index].second[i->child_list_index]->copy_into(n.add_child(buf));
825  }
826 }
827 
828 void node::apply_diff(const node& diff)
829 {
830  set_dirty();
831  const node* inserts = diff.child("insert");
832  if(inserts != nullptr) {
833  for(attribute_list::const_iterator i = inserts->attr_.begin(); i != inserts->attr_.end(); ++i) {
834  char* name = i->key.duplicate();
835  char* value = i->value.duplicate();
836  set_attr(name, value);
839  }
840  }
841 
842  const node* deletes = diff.child("delete");
843  if(deletes != nullptr) {
844  for(attribute_list::const_iterator i = deletes->attr_.begin(); i != deletes->attr_.end(); ++i) {
845  std::pair<attribute_list::iterator,
846  attribute_list::iterator> range = std::equal_range(attr_.begin(), attr_.end(), i->key, string_span_pair_comparer());
847  if(range.first != range.second) {
848  attr_.erase(range.first);
849  }
850  }
851  }
852 
853  const child_list& child_changes = diff.children("change_child");
854  for(child_list::const_iterator i = child_changes.begin(); i != child_changes.end(); ++i) {
855  const std::size_t index = (**i)["index"].to_int();
856  for(child_map::const_iterator j = (*i)->children_.begin(); j != (*i)->children_.end(); ++j) {
857  const string_span& name = j->first;
858  for(child_list::const_iterator k = j->second.begin(); k != j->second.end(); ++k) {
860  if(itor != children_.end()) {
861  if(index < itor->second.size()) {
862  itor->second[index]->apply_diff(**k);
863  }
864  }
865  }
866  }
867  }
868 
869  const child_list& child_inserts = diff.children("insert_child");
870  for(child_list::const_iterator i = child_inserts.begin(); i != child_inserts.end(); ++i) {
871  const std::size_t index = (**i)["index"].to_int();
872  for(child_map::const_iterator j = (*i)->children_.begin(); j != (*i)->children_.end(); ++j) {
873  const string_span& name = j->first;
874  for(child_list::const_iterator k = j->second.begin(); k != j->second.end(); ++k) {
875  char* buf = name.duplicate();
877  (*k)->copy_into(add_child_at(buf, index));
878  }
879  }
880  }
881 
882  const child_list& child_deletes = diff.children("delete_child");
883  for(child_list::const_iterator i = child_deletes.begin(); i != child_deletes.end(); ++i) {
884  const std::size_t index = (**i)["index"].to_int();
885  for(child_map::const_iterator j = (*i)->children_.begin(); j != (*i)->children_.end(); ++j) {
886  if(j->second.empty()) {
887  continue;
888  }
889 
890  const string_span& name = j->first;
891  remove_child(name, index);
892  }
893  }
894 }
895 
897 {
898  doc_ = doc;
899 
900  for(child_map::iterator i = children_.begin(); i != children_.end(); ++i) {
901  for(child_list::iterator j = i->second.begin(); j != i->second.end(); ++j) {
902  (*j)->set_doc(doc);
903  }
904  }
905 }
906 
907 int node::nchildren() const
908 {
909  int res = 0;
910  for(child_map::const_iterator i = children_.begin(); i != children_.end(); ++i) {
911  for(child_list::const_iterator j = i->second.begin(); j != i->second.end(); ++j) {
912  ++res;
913  res += (*j)->nchildren();
914  }
915  }
916 
917  return res;
918 }
919 
921 {
922  int res = attr_.capacity();
923  for(child_map::const_iterator i = children_.begin(); i != children_.end(); ++i) {
924  for(child_list::const_iterator j = i->second.begin(); j != i->second.end(); ++j) {
925  res += (*j)->nattributes_recursive();
926  }
927  }
928 
929  return res;
930 }
931 
933 {
934  for(node* n = this; n != nullptr && n->output_cache_.is_null() == false; n = n->parent_) {
935  n->output_cache_ = string_span();
936  }
937 }
938 
940  compressed_buf_(),
941  output_(nullptr),
942  buffers_(),
943  root_(new node(*this, nullptr)),
944  prev_(nullptr),
945  next_(nullptr)
946 {
947  attach_list();
948 }
949 
951  compressed_buf_(),
952  output_(buf),
953  buffers_(),
954  root_(nullptr),
955  prev_(nullptr),
956  next_(nullptr)
957 {
958  if(control == INIT_TAKE_OWNERSHIP) {
959  buffers_.push_back(buf);
960  }
961  const char* cbuf = buf;
962  root_ = new node(*this, nullptr, &cbuf);
963 
964  attach_list();
965 }
966 
967 document::document(const char* buf, INIT_STATE state) :
968  compressed_buf_(),
969  output_(buf),
970  buffers_(),
971  root_(nullptr),
972  prev_(nullptr),
973  next_(nullptr)
974 {
975  if(state == INIT_COMPRESSED) {
977  output_ = nullptr;
978  } else {
979  root_ = new node(*this, nullptr, &buf);
980  }
981 
982  attach_list();
983 }
984 
986  compressed_buf_(compressed_buf),
987  output_(nullptr),
988  buffers_(),
989  root_(nullptr),
990  prev_(nullptr),
991  next_(nullptr)
992 {
993  string_span uncompressed_buf;
994  buffers_.push_back(uncompress_buffer(compressed_buf, &uncompressed_buf));
995  output_ = uncompressed_buf.begin();
996  const char* cbuf = output_;
997  try {
998  root_ = new node(*this, nullptr, &cbuf);
999  } catch(...) {
1000  ERR_SWML << "Caught exception creating a new simple_wml node: " << utils::get_unknown_exception_type();
1001  delete [] buffers_.front();
1002  buffers_.clear();
1003  throw;
1004  }
1005 
1006  attach_list();
1007 }
1008 
1010 {
1011  for(std::vector<char*>::iterator i = buffers_.begin(); i != buffers_.end(); ++i) {
1012  delete [] *i;
1013  }
1014 
1015  buffers_.clear();
1016  debug_delete(root_);
1017 
1018  detach_list();
1019 }
1020 
1021 const char* document::dup_string(const char* str)
1022 {
1023  const int len = strlen(str);
1024  char* res = new char[len+1];
1025  memcpy(res, str, len + 1);
1026  buffers_.push_back(res);
1027  return res;
1028 }
1029 
1030 const char* document::output()
1031 {
1032  if(output_ && (!root_ || root_->is_dirty() == false)) {
1033  return output_;
1034  }
1035  if(!root_) {
1036  assert(compressed_buf_.empty() == false);
1037  string_span uncompressed_buf;
1038  buffers_.push_back(uncompress_buffer(compressed_buf_, &uncompressed_buf));
1039  output_ = uncompressed_buf.begin();
1040  return output_;
1041  }
1042 
1043  //we're dirty, so the compressed buf must also be dirty; clear it.
1045 
1046  std::vector<char*> bufs;
1047  bufs.swap(buffers_);
1048 
1049  const int buf_size = root_->output_size() + 1;
1050  char* buf;
1051  try {
1052  buf = new char[buf_size];
1053  } catch (const std::bad_alloc& e) {
1054  ERR_SWML << "ERROR: Trying to allocate " << buf_size << " bytes. "
1055  << e.what();
1056  throw error("Bad allocation request in output().");
1057  }
1058  buffers_.push_back(buf);
1059  output_ = buf;
1060 
1062  *buf++ = 0;
1063  assert(buf == output_ + buf_size);
1064 
1065  for(std::vector<char*>::iterator i = bufs.begin(); i != bufs.end(); ++i) {
1066  delete [] *i;
1067  }
1068 
1069  bufs.clear();
1070 
1071  return output_;
1072 }
1073 
1075 {
1076  if(compressed_buf_.empty() == false &&
1077  (root_ == nullptr || root_->is_dirty() == false)) {
1078  assert(*compressed_buf_.begin() == (bzip2 ? 'B' : 31));
1079  return compressed_buf_;
1080  }
1081 
1082  buffers_.push_back(compress_buffer(output(), &compressed_buf_, bzip2));
1083  assert(*compressed_buf_.begin() == (bzip2 ? 'B' : 31));
1084 
1085  return compressed_buf_;
1086 }
1087 
1089 {
1091  debug_delete(root_);
1092  root_ = nullptr;
1093  output_ = nullptr;
1094  std::vector<char*> new_buffers;
1095  for(std::vector<char*>::iterator i = buffers_.begin(); i != buffers_.end(); ++i) {
1096  if(*i != compressed_buf_.begin()) {
1097  delete [] *i;
1098  } else {
1099  new_buffers.push_back(*i);
1100  }
1101  }
1102 
1103  buffers_.swap(new_buffers);
1104  assert(buffers_.size() == 1);
1105 }
1106 
1108 {
1109  if(output_ == nullptr) {
1110  assert(compressed_buf_.empty() == false);
1111  string_span uncompressed_buf;
1112  buffers_.push_back(uncompress_buffer(compressed_buf_, &uncompressed_buf));
1113  output_ = uncompressed_buf.begin();
1114  }
1115 
1116  assert(root_ == nullptr);
1117  const char* cbuf = output_;
1118  root_ = new node(*this, nullptr, &cbuf);
1119 }
1120 
1121 std::unique_ptr<document> document::clone()
1122 {
1123  char* buf = new char[strlen(output())+1];
1124  strcpy(buf, output());
1125  return std::make_unique<document>(buf);
1126 }
1127 
1129 {
1132  buffers_.swap(o.buffers_);
1133  std::swap(root_, o.root_);
1134 
1135  root_->set_doc(this);
1136  o.root_->set_doc(&o);
1137 }
1138 
1140 {
1142  output_ = nullptr;
1143  debug_delete(root_);
1144  root_ = new node(*this, nullptr);
1145  for(std::vector<char*>::iterator i = buffers_.begin(); i != buffers_.end(); ++i) {
1146  delete [] *i;
1147  }
1148 
1149  buffers_.clear();
1150 }
1151 
1152 namespace {
1153 document* head_doc = nullptr;
1154 }
1155 
1157 {
1158  prev_ = nullptr;
1159  next_ = head_doc;
1160 
1161  if(next_) {
1162  next_->prev_ = this;
1163  }
1164  head_doc = this;
1165 }
1166 
1168 {
1169  if(head_doc == this) {
1170  head_doc = next_;
1171  }
1172 
1173  if(next_) {
1174  next_->prev_ = prev_;
1175  }
1176 
1177  if(prev_) {
1178  prev_->next_ = next_;
1179  }
1180  next_ = prev_ = nullptr;
1181 }
1182 
1183 std::string document::stats()
1184 {
1185  std::ostringstream s;
1186  int ndocs = 0;
1187  int ncompressed = 0;
1188  int compressed_size = 0;
1189  int ntext = 0;
1190  int text_size = 0;
1191  int nbuffers = 0;
1192  int nnodes = 0;
1193  int ndirty = 0;
1194  int nattributes = 0;
1195  for(document* d = head_doc; d != nullptr; d = d->next_) {
1196  ndocs++;
1197  nbuffers += d->buffers_.size();
1198 
1199  if(d->compressed_buf_.is_null() == false) {
1200  ++ncompressed;
1201  compressed_size += d->compressed_buf_.size();
1202  }
1203 
1204  if(d->output_) {
1205  ++ntext;
1206  text_size += strlen(d->output_);
1207  }
1208 
1209  if(d->root_) {
1210  nnodes += 1 + d->root_->nchildren();
1211  nattributes += d->root_->nattributes_recursive();
1212  }
1213 
1214  if(d->root_ && d->root_->is_dirty()) {
1215  ++ndirty;
1216  }
1217  }
1218 
1219  const int nodes_alloc = nnodes*(sizeof(node) + 12);
1220  const int attr_alloc = nattributes*(sizeof(string_span)*2);
1221  const int total_alloc = compressed_size + text_size + nodes_alloc + attr_alloc;
1222 
1223  s << "WML documents: " << ndocs << "\n"
1224  << "Dirty: " << ndirty << "\n"
1225  << "With compression: " << ncompressed << " (" << compressed_size
1226  << " bytes)\n"
1227  << "With text: " << ntext << " (" << text_size
1228  << " bytes)\n"
1229  << "Nodes: " << nnodes << " (" << nodes_alloc << " bytes)\n"
1230  << "Attr: " << nattributes << " (" << attr_alloc << " bytes)\n"
1231  << "Buffers: " << nbuffers << "\n"
1232  << "Total allocation: " << total_alloc << " bytes\n";
1233 
1234  return s.str();
1235 }
1236 
1237 void swap(document& lhs, document& rhs)
1238 {
1239  lhs.swap(rhs);
1240 }
1241 
1242 }
void take_ownership_of_buffer(char *buffer)
Definition: simple_wml.hpp:281
const char * output()
std::unique_ptr< document > clone()
static std::string stats()
static std::size_t document_size_limit
Definition: simple_wml.hpp:290
const char * output_
Definition: simple_wml.hpp:297
void swap(document &o)
std::vector< char * > buffers_
Definition: simple_wml.hpp:298
string_span output_compressed(bool bzip2=false)
const char * dup_string(const char *str)
string_span compressed_buf_
Definition: simple_wml.hpp:296
const string_span & attr(const char *key) const
Definition: simple_wml.hpp:128
int output_size() const
Definition: simple_wml.cpp:698
void set_doc(document *doc)
Definition: simple_wml.cpp:896
void insert_ordered_child(int child_map_index, int child_list_index)
Definition: simple_wml.cpp:505
void insert_ordered_child_list(int child_map_index)
Definition: simple_wml.cpp:546
void check_ordered_children() const
Definition: simple_wml.cpp:573
void remove_child(const char *name, std::size_t index)
Definition: simple_wml.cpp:603
int nchildren() const
Definition: simple_wml.cpp:907
bool is_dirty() const
Definition: simple_wml.hpp:161
node(document &doc, node *parent)
Definition: simple_wml.cpp:208
int get_children(const string_span &name)
Definition: simple_wml.cpp:652
document * doc_
Definition: simple_wml.hpp:190
const string_span & operator[](const char *key) const
Definition: simple_wml.cpp:392
const child_list & children(const char *name) const
Definition: simple_wml.cpp:635
void apply_diff(const node &diff)
Definition: simple_wml.cpp:828
attribute_list attr_
Definition: simple_wml.hpp:193
bool has_attr(const char *key) const
Definition: simple_wml.cpp:405
node & set_attr_int(const char *key, int value)
Definition: simple_wml.cpp:441
std::vector< child_pair > child_map
Definition: simple_wml.hpp:198
node * child(const char *name)
Definition: simple_wml.cpp:608
std::vector< node * > child_list
Definition: simple_wml.hpp:125
void remove_ordered_child_list(int child_map_index)
Definition: simple_wml.cpp:556
int nattributes_recursive() const
Definition: simple_wml.cpp:920
std::vector< node_pos > ordered_children_
Definition: simple_wml.hpp:216
const string_span & first_child() const
Definition: simple_wml.cpp:688
child_map children_
Definition: simple_wml.hpp:202
void output(char *&buf, CACHE_STATUS status=DO_NOT_MODIFY_CACHE)
Definition: simple_wml.cpp:744
node & add_child(const char *name)
Definition: simple_wml.cpp:466
node & set_attr(const char *key, const char *value)
Definition: simple_wml.cpp:413
string_span output_cache_
Definition: simple_wml.hpp:225
node & add_child_at(const char *name, std::size_t index)
Definition: simple_wml.cpp:447
void copy_into(node &n) const
Definition: simple_wml.cpp:807
node & set_attr_dup(const char *key, const char *value)
Definition: simple_wml.cpp:429
void remove_ordered_child(int child_map_index, int child_list_index)
Definition: simple_wml.cpp:527
void shift_buffers(ptrdiff_t offset)
Definition: simple_wml.cpp:724
static child_map::const_iterator find_in_map(const child_map &m, const string_span &attr)
Definition: simple_wml.cpp:664
bool to_bool(bool default_value=false) const
Definition: simple_wml.cpp:159
std::string to_string() const
Definition: simple_wml.cpp:183
const char * begin() const
Definition: simple_wml.hpp:90
char * duplicate() const
Definition: simple_wml.cpp:188
const char * end() const
Definition: simple_wml.hpp:91
std::size_t i
Definition: function.cpp:968
unsigned in
If equal to search_counter, the node is off the list.
Standard logging facilities (interface).
void swap(document &lhs, document &rhs)
Implement non-member swap function for std::swap (calls document::swap).
std::ostream & operator<<(std::ostream &o, const string_span &s)
Definition: simple_wml.cpp:202
std::string node_to_string(const node &n)
Definition: simple_wml.cpp:795
@ INIT_TAKE_OWNERSHIP
Definition: simple_wml.hpp:230
std::size_t index(const std::string &str, const std::size_t index)
Codepoint index corresponding to the nth character in a UTF-8 string.
Definition: unicode.cpp:70
std::string get_unknown_exception_type()
Utility function for finding the type of thing caught with catch(...).
Definition: general.cpp:23
std::string::const_iterator iterator
Definition: tokenizer.hpp:25
static void msg(const char *act, debug_info &i, const char *to="", const char *result="")
Definition: debugger.cpp:109
#define ERR_SWML
Definition: simple_wml.cpp:30
static lg::log_domain log_config("config")
static map_location::DIRECTION n
static map_location::DIRECTION s
#define d
#define e
#define a
#define b