The Battle for Wesnoth  1.19.7+dev
simple_wml.cpp
Go to the documentation of this file.
1 /*
2  Copyright (C) 2008 - 2024
3  by David White <dave@whitevine.net>
4  Part of the Battle for Wesnoth Project https://www.wesnoth.org/
5 
6  This program is free software; you can redistribute it and/or modify
7  it under the terms of the GNU General Public License as published by
8  the Free Software Foundation; either version 2 of the License, or
9  (at your option) any later version.
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY.
12 
13  See the COPYING file for more details.
14 */
15 
16 #include <sstream>
17 
18 #include <boost/iostreams/copy.hpp>
19 #include <boost/iostreams/filtering_stream.hpp>
20 #include <boost/iostreams/filter/bzip2.hpp>
21 #include <boost/iostreams/filter/counter.hpp>
22 #include <boost/iostreams/filter/gzip.hpp>
23 
25 
26 #include "log.hpp"
27 #include "utils/general.hpp"
28 
29 static lg::log_domain log_config("config");
30 #define ERR_SWML LOG_STREAM(err, log_config)
31 #define LOG_SWML LOG_STREAM(info, log_config)
32 
33 namespace simple_wml {
34 
35 std::size_t document::document_size_limit = 40000000;
36 
37 namespace {
38 
39 void debug_delete(node* n) {
40  delete n;
41 }
42 
43 char* uncompress_buffer(const string_span& input, string_span* span)
44 {
45  int nalloc = input.size();
46  int state = 0;
47  try {
48  std::istringstream stream(std::string(input.begin(), input.end()));
49  state = 1;
50  boost::iostreams::filtering_stream<boost::iostreams::input> filter;
51  state = 2;
52  if (!span->empty() && *span->begin() == 'B') {
53  filter.push(boost::iostreams::bzip2_decompressor());
54  } else {
55  filter.push(boost::iostreams::gzip_decompressor());
56  }
57  filter.push(stream);
58  state = 3;
59 
60  const std::size_t chunk_size = input.size() * 10;
61  nalloc = chunk_size;
62  std::vector<char> buf(chunk_size);
63  state = 4;
64  std::size_t len = 0;
65  std::size_t pos = 0;
66  while(filter.good() && (len = filter.read(&buf[pos], chunk_size).gcount()) == chunk_size) {
67  if(pos + chunk_size > document::document_size_limit) {
68  throw error("WML document exceeded size limit during decompression");
69  }
70 
71  pos += len;
72  buf.resize(pos + chunk_size);
73  len = 0;
74  }
75 
76  if(!filter.eof() && !filter.good()) {
77  throw error("failed to uncompress");
78  }
79 
80  pos += len;
81  state = 5;
82  nalloc = pos;
83 
84  buf.resize(pos);
85  state = 6;
86 
87  char* small_out = new char[pos+1];
88  memcpy(small_out, &buf[0], pos);
89  state = 7;
90 
91  small_out[pos] = 0;
92 
93  *span = string_span(small_out, pos);
94  state = 8;
95  return small_out;
96  } catch (const std::bad_alloc& e) {
97  ERR_SWML << "ERROR: bad_alloc caught in uncompress_buffer() state "
98  << state << " alloc bytes " << nalloc << " with input: '"
99  << input << "' " << e.what();
100  throw error("Bad allocation request in uncompress_buffer().");
101  }
102 }
103 
104 char* compress_buffer(const char* input, string_span* span, bool bzip2)
105 {
106  int nalloc = strlen(input);
107  int state = 0;
108  try {
109  std::string in(input);
110  state = 1;
111  std::istringstream istream(in);
112  state = 2;
113  boost::iostreams::filtering_stream<boost::iostreams::output> filter;
114  state = 3;
115  if (bzip2) {
116  filter.push(boost::iostreams::bzip2_compressor());
117  } else {
118  filter.push(boost::iostreams::gzip_compressor());
119  }
120  state = 4;
121  nalloc = in.size()*2 + 80;
122  std::vector<char> buf(nalloc);
123  boost::iostreams::array_sink out(&buf[0], buf.size());
124  filter.push(boost::iostreams::counter());
125  filter.push(out);
126 
127  state = 5;
128 
129  boost::iostreams::copy(istream, filter, buf.size());
130  const int len = filter.component<boost::iostreams::counter>(1)->characters();
131  assert(len < 128*1024*1024);
132  if((!filter.eof() && !filter.good()) || len == static_cast<int>(buf.size())) {
133  throw error("failed to compress");
134  }
135  state = 6;
136  nalloc = len;
137 
138  buf.resize(len);
139  state = 7;
140 
141  char* small_out = new char[len];
142  memcpy(small_out, &buf[0], len);
143  state = 8;
144 
145  *span = string_span(small_out, len);
146  assert(*small_out == (bzip2 ? 'B' : 31));
147  state = 9;
148  return small_out;
149  } catch (const std::bad_alloc& e) {
150  ERR_SWML << "ERROR: bad_alloc caught in compress_buffer() state "
151  << state << " alloc bytes " << nalloc << " with input: '"
152  << input << "' " << e.what();
153  throw error("Bad allocation request in compress_buffer().");
154  }
155 }
156 
157 } // namespace
158 
159 bool string_span::to_bool(bool default_value) const
160 {
161  if(empty()) {
162  return default_value;
163  }
164 
165  if (operator==("no") || operator==("off") || operator==("false") || operator==("0") || operator==("0.0"))
166  return false;
167 
168  return true;
169 }
170 
172 {
173  const int buf_size = 64;
174  if(size() >= buf_size) {
175  return 0;
176  }
177  char buf[64];
178  memcpy(buf, begin(), size());
179  buf[size()] = 0;
180  return atoi(buf);
181 }
182 
183 std::string string_span::to_string() const
184 {
185  return std::string(begin(), end());
186 }
187 
189 {
190  char* buf = new char[size() + 1];
191  memcpy(buf, begin(), size());
192  buf[size()] = 0;
193  return buf;
194 }
195 
196 error::error(const char* msg)
197  : game::error(msg)
198 {
199  ERR_SWML << "ERROR: '" << msg << "'";
200 }
201 
202 std::ostream& operator<<(std::ostream& o, const string_span& s)
203 {
204  o << std::string(s.begin(), s.end());
205  return o;
206 }
207 
208 node::node(document& doc, node* parent) :
209  doc_(&doc),
210  attr_(),
211  parent_(parent),
212  children_(),
213  ordered_children_(),
214  output_cache_()
215 {
216 }
217 
218 #ifdef _MSC_VER
219 #pragma warning (push)
220 #pragma warning (disable: 4706)
221 #endif
222 node::node(document& doc, node* parent, const char** str, int depth) :
223  doc_(&doc),
224  attr_(),
225  parent_(parent),
226  children_(),
227  ordered_children_(),
228  output_cache_()
229 {
230  if(depth >= 1000) {
231  throw error("elements nested too deep");
232  }
233 
234  const char*& s = *str;
235 
236  const char* const begin = s;
237  while(*s) {
238  switch(*s) {
239  case '[': {
240  if(s[1] == '/') {
241  output_cache_ = string_span(begin, s - begin);
242  s = strchr(s, ']');
243  if(s == nullptr) {
244  throw error("end element unterminated");
245  }
246 
247  ++s;
248  return;
249  }
250 
251  ++s;
252  const char* end = strchr(s, ']');
253  if(end == nullptr) {
254  throw error("unterminated element");
255  }
256 
257  const int list_index = get_children(string_span(s, end - s));
259 
260  s = end + 1;
261 
262  children_[list_index].second.push_back(new node(doc, this, str, depth+1));
263  ordered_children_.emplace_back(list_index, children_[list_index].second.size() - 1);
265 
266  break;
267  }
268  case ' ':
269  case '\t':
270  case '\n':
271  ++s;
272  break;
273  case '#':
274  s = strchr(s, '\n');
275  if(s == nullptr) {
276  throw error("did not find newline after '#'");
277  }
278  break;
279  default: {
280  const char* end = strchr(s, '=');
281  if(end == nullptr) {
282  ERR_SWML << "attribute: " << s;
283  throw error("did not find '=' after attribute");
284  }
285 
286  string_span name(s, end - s);
287  s = end + 1;
288  if(*s == '_') {
289  s = strchr(s, '"');
290  if(s == nullptr) {
291  throw error("did not find '\"' after '_'");
292  }
293  }
294 
295  if (*s != '"') {
296  end = strchr(s, '\n');
297  if (!end) {
298  ERR_SWML << "ATTR: '" << name << "' (((" << s << ")))";
299  throw error("did not find end of attribute");
300  }
301  if (memchr(s, '"', end - s))
302  throw error("found stray quotes in unquoted value");
303  goto read_attribute;
304  }
305  end = s;
306  while(true)
307  {
308  // Read until the first single double quote.
309  while((end = strchr(end+1, '"')) && end[1] == '"') {
310 #ifdef _MSC_VER
311 #pragma warning (pop)
312 #endif
313  ++end;
314  }
315  if(end == nullptr)
316  throw error("did not find end of attribute");
317 
318  // Stop if newline.
319  const char *endline = end + 1;
320  while (*endline == ' ') ++endline;
321  if (*endline == '\n') break;
322 
323  // Read concatenation marker.
324  if (*(endline++) != '+')
325  throw error("did not find newline after end of attribute");
326  if (*(endline++) != '\n')
327  throw error("did not find newline after '+'");
328 
329  // Read textdomain marker.
330  if (*endline == '#') {
331  endline = strchr(endline + 1, '\n');
332  if (!endline)
333  throw error("did not find newline after '#'");
334  ++endline;
335  }
336 
337  // Read indentation and start of string.
338  while (*endline == '\t') ++endline;
339  if (*endline == '_') ++endline;
340  if (*endline != '"')
341  throw error("did not find quotes after '+'");
342  end = endline;
343  }
344 
345  ++s;
346 
347  read_attribute:
348  string_span value(s, end - s);
349  if(attr_.empty() == false && !(attr_.back().key < name)) {
350  ERR_SWML << "attributes: '" << attr_.back().key << "' < '" << name << "'";
351  throw error("attributes not in order");
352  }
353 
354  s = end + 1;
355 
356  attr_.emplace_back(name, value);
357  }
358  }
359  }
360 
361  output_cache_ = string_span(begin, s - begin);
363 }
364 
366 {
367  for(child_map::iterator i = children_.begin(); i != children_.end(); ++i) {
368  for(child_list::iterator j = i->second.begin(); j != i->second.end(); ++j) {
369  debug_delete(*j);
370  }
371  }
372 }
373 
374 namespace {
375 struct string_span_pair_comparer
376 {
377  bool operator()(const string_span& a, const node::attribute& b) const {
378  return a < b.key;
379  }
380 
381  bool operator()(const node::attribute& a, const string_span& b) const {
382  return a.key < b;
383  }
384 
385  bool operator()(const node::attribute& a,
386  const node::attribute& b) const {
387  return a.key < b.key;
388  }
389 };
390 }
391 
392 const string_span& node::operator[](const char* key) const
393 {
394  static string_span empty("");
395  string_span span(key);
396  std::pair<attribute_list::const_iterator,
397  attribute_list::const_iterator> range = std::equal_range(attr_.begin(), attr_.end(), span, string_span_pair_comparer());
398  if(range.first != range.second) {
399  return range.first->value;
400  }
401 
402  return empty;
403 }
404 
405 bool node::has_attr(const char* key) const
406 {
407  string_span span(key);
408  std::pair<attribute_list::const_iterator,
409  attribute_list::const_iterator> range = std::equal_range(attr_.begin(), attr_.end(), span, string_span_pair_comparer());
410  return range.first != range.second;
411 }
412 
413 node& node::set_attr(const char* key, const char* value)
414 {
415  set_dirty();
416 
417  string_span span(key);
418  std::pair<attribute_list::iterator,
419  attribute_list::iterator> range = std::equal_range(attr_.begin(), attr_.end(), span, string_span_pair_comparer());
420  if(range.first != range.second) {
421  range.first->value = string_span(value);
422  } else {
423  attr_.insert(range.first, attribute(span, string_span(value)));
424  }
425 
426  return *this;
427 }
428 
429 node& node::set_attr_dup(const char* key, const char* value)
430 {
431  return set_attr(key, doc_->dup_string(value));
432 }
433 
434 node& node::set_attr_dup(const char* key, const string_span& value)
435 {
436  char* buf = value.duplicate();
438  return set_attr(key, buf);
439 }
440 
441 node& node::set_attr_int(const char* key, int value)
442 {
443  std::string temp = std::to_string(value);
444  return set_attr_dup(key, temp.c_str());
445 }
446 
447 node& node::add_child_at(const char* name, std::size_t index)
448 {
449  set_dirty();
450 
451  const int list_index = get_children(name);
452  child_list& list = children_[list_index].second;
453  if(index > list.size()) {
454  index = list.size();
455  }
456 
458  list.insert(list.begin() + index, new node(*doc_, this));
459  insert_ordered_child(list_index, index);
460 
462  return *list[index];
463 }
464 
465 
466 node& node::add_child(const char* name)
467 {
468  set_dirty();
469 
470  const int list_index = get_children(name);
472  child_list& list = children_[list_index].second;
473  list.push_back(new node(*doc_, this));
474  ordered_children_.emplace_back(list_index, list.size() - 1);
476  return *list.back();
477 }
478 
479 void node::remove_child(const string_span& name, std::size_t index)
480 {
481  set_dirty();
482 
483  //if we don't already have a vector for this item we don't want to add one.
485  if(itor == children_.end()) {
486  return;
487  }
488 
489  child_list& list = itor->second;
490  if(index >= list.size()) {
491  return;
492  }
493 
494  remove_ordered_child(std::distance(children_.begin(), itor), index);
495 
496  debug_delete(list[index]);
497  list.erase(list.begin() + index);
498 
499  if(list.empty()) {
500  remove_ordered_child_list(std::distance(children_.begin(), itor));
501  children_.erase(itor);
502  }
503 }
504 
505 void node::insert_ordered_child(int child_map_index, int child_list_index)
506 {
507  bool inserted = false;
509  while(i != ordered_children_.end()) {
510  if(i->child_map_index == child_map_index && i->child_list_index > child_list_index) {
511  i->child_list_index++;
512  } else if(i->child_map_index == child_map_index && i->child_list_index == child_list_index) {
513  inserted = true;
514  i->child_list_index++;
515  i = ordered_children_.insert(i, node_pos(child_map_index, child_list_index));
516  ++i;
517  }
518 
519  ++i;
520  }
521 
522  if(!inserted) {
523  ordered_children_.emplace_back(child_map_index, child_list_index);
524  }
525 }
526 
527 void node::remove_ordered_child(int child_map_index, int child_list_index)
528 {
529  int erase_count = 0;
531  while(i != ordered_children_.end()) {
532  if(i->child_map_index == child_map_index && i->child_list_index == child_list_index) {
533  i = ordered_children_.erase(i);
534  ++erase_count;
535  } else {
536  if(i->child_map_index == child_map_index && i->child_list_index > child_list_index) {
537  i->child_list_index--;
538  }
539  ++i;
540  }
541  }
542 
543  assert(erase_count == 1);
544 }
545 
546 void node::insert_ordered_child_list(int child_map_index)
547 {
549  while(i != ordered_children_.end()) {
550  if(i->child_map_index >= child_map_index) {
551  i->child_map_index++;
552  }
553  }
554 }
555 
556 void node::remove_ordered_child_list(int child_map_index)
557 {
559  while(i != ordered_children_.end()) {
560  if(i->child_map_index == child_map_index) {
561  assert(false);
562  i = ordered_children_.erase(i);
563  } else {
564  if(i->child_map_index > child_map_index) {
565  i->child_map_index--;
566  }
567 
568  ++i;
569  }
570  }
571 }
572 
574 {
575 // only define this symbol in debug mode to work out child ordering.
576 #ifdef CHECK_ORDERED_CHILDREN
577  std::vector<node_pos>::const_iterator i = ordered_children_.begin();
578  while(i != ordered_children_.end()) {
579  assert(i->child_map_index < children_.size());
580  assert(i->child_list_index < children_[i->child_map_index].second.size());
581  ++i;
582  }
583 
584  for(child_map::const_iterator j = children_.begin(); j != children_.end(); ++j) {
585  const unsigned short child_map_index = j - children_.begin();
586  for(child_list::const_iterator k = j->second.begin(); k != j->second.end(); ++k) {
587  const unsigned short child_list_index = k - j->second.begin();
588  bool found = false;
589  for(int n = 0; n != ordered_children_.size(); ++n) {
590  if(ordered_children_[n].child_map_index == child_map_index &&
591  ordered_children_[n].child_list_index == child_list_index) {
592  found = true;
593  break;
594  }
595  }
596 
597  assert(found);
598  }
599  }
600 #endif // CHECK_ORDERED_CHILDREN
601 }
602 
603 void node::remove_child(const char* name, std::size_t index)
604 {
606 }
607 
608 node* node::child(const char* name)
609 {
610  for(child_map::iterator i = children_.begin(); i != children_.end(); ++i) {
611  if(i->first == name) {
612  assert(i->second.empty() == false);
613  return i->second.front();
614  }
615  }
616 
617  return nullptr;
618 }
619 
620 const node* node::child(const char* name) const
621 {
622  for(child_map::const_iterator i = children_.begin(); i != children_.end(); ++i) {
623  if(i->first == name) {
624  if(i->second.empty()) {
625  return nullptr;
626  } else {
627  return i->second.front();
628  }
629  }
630  }
631 
632  return nullptr;
633 }
634 
635 node& node::child_or_add(const char* name)
636 {
637  if(node* res = child(name)) {
638  return *res;
639  }
640  return add_child(name);
641 }
642 
643 const node::child_list& node::children(const char* name) const
644 {
645  for(child_map::const_iterator i = children_.begin(); i != children_.end(); ++i) {
646  if(i->first == name) {
647  return i->second;
648  }
649  }
650 
651  static const node::child_list empty;
652  return empty;
653 }
654 
655 int node::get_children(const char* name)
656 {
657  return get_children(string_span(name));
658 }
659 
661 {
662  for(child_map::iterator i = children_.begin(); i != children_.end(); ++i) {
663  if(i->first == name) {
664  return std::distance(children_.begin(), i);
665  }
666  }
667 
668  children_.emplace_back(string_span(name), child_list());
669  return children_.size() - 1;
670 }
671 
672 node::child_map::const_iterator node::find_in_map(const child_map& m, const string_span& attr)
673 {
674  child_map::const_iterator i = m.begin();
675  for(; i != m.end(); ++i) {
676  if(i->first == attr) {
677  break;
678  }
679  }
680 
681  return i;
682 }
683 
685 {
686  child_map::iterator i = m.begin();
687  for(; i != m.end(); ++i) {
688  if(i->first == attr) {
689  break;
690  }
691  }
692 
693  return i;
694 }
695 
697 {
698  if(children_.empty()) {
699  static const string_span empty;
700  return empty;
701  }
702 
703  return children_.begin()->first;
704 }
705 
706 int node::output_size() const
707 {
709  if(output_cache_.empty() == false) {
710  return output_cache_.size();
711  }
712 
713  int res = 0;
714  for(attribute_list::const_iterator i = attr_.begin(); i != attr_.end(); ++i) {
715  res += i->key.size() + i->value.size() + 4;
716  }
717 
718  std::size_t count_children = 0;
719  for(child_map::const_iterator i = children_.begin(); i != children_.end(); ++i) {
720  for(child_list::const_iterator j = i->second.begin(); j != i->second.end(); ++j) {
721  res += i->first.size()*2 + 7;
722  res += (*j)->output_size();
723  ++count_children;
724  }
725  }
726 
727  assert(count_children == ordered_children_.size());
728 
729  return res;
730 }
731 
732 void node::shift_buffers(ptrdiff_t offset)
733 {
734  if(!output_cache_.empty()) {
736  }
737 
738  for(std::vector<attribute>::iterator i = attr_.begin(); i != attr_.end(); ++i) {
739  i->key = string_span(i->key.begin() + offset, i->key.size());
740  i->value = string_span(i->value.begin() + offset, i->value.size());
741  }
742 
743  for(child_map::iterator i = children_.begin(); i != children_.end(); ++i) {
744  string_span& key = i->first;
745  key = string_span(key.begin() + offset, key.size());
746  for(child_list::iterator j = i->second.begin(); j != i->second.end(); ++j) {
747  (*j)->shift_buffers(offset);
748  }
749  }
750 }
751 
752 void node::output(char*& buf, CACHE_STATUS cache_status)
753 {
754  if(output_cache_.empty() == false) {
755  memcpy(buf, output_cache_.begin(), output_cache_.size());
756  if(cache_status == REFRESH_CACHE) {
758  }
759  buf += output_cache_.size();
760  return;
761  }
762 
763  char* begin = buf;
764 
765  for(std::vector<attribute>::iterator i = attr_.begin(); i != attr_.end(); ++i) {
766  memcpy(buf, i->key.begin(), i->key.size());
767  if(cache_status == REFRESH_CACHE) {
768  i->key = string_span(buf, i->key.size());
769  }
770  buf += i->key.size();
771  *buf++ = '=';
772  *buf++ = '"';
773  memcpy(buf, i->value.begin(), i->value.size());
774  if(cache_status == REFRESH_CACHE) {
775  i->value = string_span(buf, i->value.size());
776  }
777  buf += i->value.size();
778  *buf++ = '"';
779  *buf++ = '\n';
780  }
781 
782  for(std::vector<node_pos>::const_iterator i = ordered_children_.begin();
783  i != ordered_children_.end(); ++i) {
784  assert(i->child_map_index < children_.size());
785  assert(i->child_list_index < children_[i->child_map_index].second.size());
786  string_span& attr = children_[i->child_map_index].first;
787  *buf++ = '[';
788  memcpy(buf, attr.begin(), attr.size());
789  if(cache_status == REFRESH_CACHE) {
790  attr = string_span(buf, attr.size());
791  }
792  buf += attr.size();
793  *buf++ = ']';
794  *buf++ = '\n';
795  children_[i->child_map_index].second[i->child_list_index]->output(buf, cache_status);
796  *buf++ = '[';
797  *buf++ = '/';
798  memcpy(buf, attr.begin(), attr.size());
799  buf += attr.size();
800  *buf++ = ']';
801  *buf++ = '\n';
802  }
803 
804  if(cache_status == REFRESH_CACHE) {
805  output_cache_ = string_span(begin, buf - begin);
806  }
807 }
808 
809 std::string node_to_string(const node& n)
810 {
811  //calling output with status=DO_NOT_MODIFY_CACHE really doesn't modify the
812  //node, so we can do it safely
813  node& mutable_node = const_cast<node&>(n);
814  std::vector<char> v(mutable_node.output_size());
815  char* ptr = &v[0];
816  mutable_node.output(ptr, node::DO_NOT_MODIFY_CACHE);
817  assert(ptr == &v[0] + v.size());
818  return std::string(v.begin(), v.end());
819 }
820 
821 void node::copy_into(node& n) const
822 {
823  n.set_dirty();
824  for(attribute_list::const_iterator i = attr_.begin(); i != attr_.end(); ++i) {
825  char* key = i->key.duplicate();
826  char* value = i->value.duplicate();
827  n.doc_->take_ownership_of_buffer(key);
828  n.doc_->take_ownership_of_buffer(value);
829  n.set_attr(key, value);
830  }
831 
832  for(std::vector<node_pos>::const_iterator i = ordered_children_.begin();
833  i != ordered_children_.end(); ++i) {
834  assert(i->child_map_index < children_.size());
835  assert(i->child_list_index < children_[i->child_map_index].second.size());
836  char* buf = children_[i->child_map_index].first.duplicate();
837  n.doc_->take_ownership_of_buffer(buf);
838  children_[i->child_map_index].second[i->child_list_index]->copy_into(n.add_child(buf));
839  }
840 }
841 
842 void node::apply_diff(const node& diff)
843 {
844  set_dirty();
845  const node* inserts = diff.child("insert");
846  if(inserts != nullptr) {
847  for(attribute_list::const_iterator i = inserts->attr_.begin(); i != inserts->attr_.end(); ++i) {
848  char* name = i->key.duplicate();
849  char* value = i->value.duplicate();
850  set_attr(name, value);
853  }
854  }
855 
856  const node* deletes = diff.child("delete");
857  if(deletes != nullptr) {
858  for(attribute_list::const_iterator i = deletes->attr_.begin(); i != deletes->attr_.end(); ++i) {
859  std::pair<attribute_list::iterator,
860  attribute_list::iterator> range = std::equal_range(attr_.begin(), attr_.end(), i->key, string_span_pair_comparer());
861  if(range.first != range.second) {
862  attr_.erase(range.first);
863  }
864  }
865  }
866 
867  const child_list& child_changes = diff.children("change_child");
868  for(child_list::const_iterator i = child_changes.begin(); i != child_changes.end(); ++i) {
869  const std::size_t index = (**i)["index"].to_int();
870  for(child_map::const_iterator j = (*i)->children_.begin(); j != (*i)->children_.end(); ++j) {
871  const string_span& name = j->first;
872  for(child_list::const_iterator k = j->second.begin(); k != j->second.end(); ++k) {
874  if(itor != children_.end()) {
875  if(index < itor->second.size()) {
876  itor->second[index]->apply_diff(**k);
877  }
878  }
879  }
880  }
881  }
882 
883  const child_list& child_inserts = diff.children("insert_child");
884  for(child_list::const_iterator i = child_inserts.begin(); i != child_inserts.end(); ++i) {
885  const std::size_t index = (**i)["index"].to_int();
886  for(child_map::const_iterator j = (*i)->children_.begin(); j != (*i)->children_.end(); ++j) {
887  const string_span& name = j->first;
888  for(child_list::const_iterator k = j->second.begin(); k != j->second.end(); ++k) {
889  char* buf = name.duplicate();
891  (*k)->copy_into(add_child_at(buf, index));
892  }
893  }
894  }
895 
896  const child_list& child_deletes = diff.children("delete_child");
897  for(child_list::const_iterator i = child_deletes.begin(); i != child_deletes.end(); ++i) {
898  const std::size_t index = (**i)["index"].to_int();
899  for(child_map::const_iterator j = (*i)->children_.begin(); j != (*i)->children_.end(); ++j) {
900  if(j->second.empty()) {
901  continue;
902  }
903 
904  const string_span& name = j->first;
905  remove_child(name, index);
906  }
907  }
908 }
909 
911 {
912  doc_ = doc;
913 
914  for(child_map::iterator i = children_.begin(); i != children_.end(); ++i) {
915  for(child_list::iterator j = i->second.begin(); j != i->second.end(); ++j) {
916  (*j)->set_doc(doc);
917  }
918  }
919 }
920 
921 int node::nchildren() const
922 {
923  int res = 0;
924  for(child_map::const_iterator i = children_.begin(); i != children_.end(); ++i) {
925  for(child_list::const_iterator j = i->second.begin(); j != i->second.end(); ++j) {
926  ++res;
927  res += (*j)->nchildren();
928  }
929  }
930 
931  return res;
932 }
933 
935 {
936  int res = attr_.capacity();
937  for(child_map::const_iterator i = children_.begin(); i != children_.end(); ++i) {
938  for(child_list::const_iterator j = i->second.begin(); j != i->second.end(); ++j) {
939  res += (*j)->nattributes_recursive();
940  }
941  }
942 
943  return res;
944 }
945 
947 {
948  for(node* n = this; n != nullptr && n->output_cache_.is_null() == false; n = n->parent_) {
949  n->output_cache_ = string_span();
950  }
951 }
952 
954  compressed_buf_(),
955  output_(nullptr),
956  buffers_(),
957  root_(new node(*this, nullptr)),
958  prev_(nullptr),
959  next_(nullptr)
960 {
961  attach_list();
962 }
963 
965  compressed_buf_(),
966  output_(buf),
967  buffers_(),
968  root_(nullptr),
969  prev_(nullptr),
970  next_(nullptr)
971 {
972  if(control == INIT_TAKE_OWNERSHIP) {
973  buffers_.push_back(buf);
974  }
975  const char* cbuf = buf;
976  root_ = new node(*this, nullptr, &cbuf);
977 
978  attach_list();
979 }
980 
981 document::document(const char* buf, INIT_STATE state) :
982  compressed_buf_(),
983  output_(buf),
984  buffers_(),
985  root_(nullptr),
986  prev_(nullptr),
987  next_(nullptr)
988 {
989  if(state == INIT_COMPRESSED) {
991  output_ = nullptr;
992  } else {
993  root_ = new node(*this, nullptr, &buf);
994  }
995 
996  attach_list();
997 }
998 
1000  compressed_buf_(compressed_buf),
1001  output_(nullptr),
1002  buffers_(),
1003  root_(nullptr),
1004  prev_(nullptr),
1005  next_(nullptr)
1006 {
1007  string_span uncompressed_buf;
1008  buffers_.push_back(uncompress_buffer(compressed_buf, &uncompressed_buf));
1009  output_ = uncompressed_buf.begin();
1010  const char* cbuf = output_;
1011  try {
1012  root_ = new node(*this, nullptr, &cbuf);
1013  } catch(...) {
1014  ERR_SWML << "Caught exception creating a new simple_wml node: " << utils::get_unknown_exception_type();
1015  delete [] buffers_.front();
1016  buffers_.clear();
1017  throw;
1018  }
1019 
1020  attach_list();
1021 }
1022 
1024 {
1025  for(std::vector<char*>::iterator i = buffers_.begin(); i != buffers_.end(); ++i) {
1026  delete [] *i;
1027  }
1028 
1029  buffers_.clear();
1030  debug_delete(root_);
1031 
1032  detach_list();
1033 }
1034 
1035 const char* document::dup_string(const char* str)
1036 {
1037  const int len = strlen(str);
1038  char* res = new char[len+1];
1039  memcpy(res, str, len + 1);
1040  buffers_.push_back(res);
1041  return res;
1042 }
1043 
1044 const char* document::output()
1045 {
1046  if(output_ && (!root_ || root_->is_dirty() == false)) {
1047  return output_;
1048  }
1049  if(!root_) {
1050  assert(compressed_buf_.empty() == false);
1051  string_span uncompressed_buf;
1052  buffers_.push_back(uncompress_buffer(compressed_buf_, &uncompressed_buf));
1053  output_ = uncompressed_buf.begin();
1054  return output_;
1055  }
1056 
1057  //we're dirty, so the compressed buf must also be dirty; clear it.
1059 
1060  std::vector<char*> bufs;
1061  bufs.swap(buffers_);
1062 
1063  const int buf_size = root_->output_size() + 1;
1064  char* buf;
1065  try {
1066  buf = new char[buf_size];
1067  } catch (const std::bad_alloc& e) {
1068  ERR_SWML << "ERROR: Trying to allocate " << buf_size << " bytes. "
1069  << e.what();
1070  throw error("Bad allocation request in output().");
1071  }
1072  buffers_.push_back(buf);
1073  output_ = buf;
1074 
1076  *buf++ = 0;
1077  assert(buf == output_ + buf_size);
1078 
1079  for(std::vector<char*>::iterator i = bufs.begin(); i != bufs.end(); ++i) {
1080  delete [] *i;
1081  }
1082 
1083  bufs.clear();
1084 
1085  return output_;
1086 }
1087 
1089 {
1090  if(compressed_buf_.empty() == false &&
1091  (root_ == nullptr || root_->is_dirty() == false)) {
1092  assert(*compressed_buf_.begin() == (bzip2 ? 'B' : 31));
1093  return compressed_buf_;
1094  }
1095 
1096  buffers_.push_back(compress_buffer(output(), &compressed_buf_, bzip2));
1097  assert(*compressed_buf_.begin() == (bzip2 ? 'B' : 31));
1098 
1099  return compressed_buf_;
1100 }
1101 
1103 {
1105  debug_delete(root_);
1106  root_ = nullptr;
1107  output_ = nullptr;
1108  std::vector<char*> new_buffers;
1109  for(std::vector<char*>::iterator i = buffers_.begin(); i != buffers_.end(); ++i) {
1110  if(*i != compressed_buf_.begin()) {
1111  delete [] *i;
1112  } else {
1113  new_buffers.push_back(*i);
1114  }
1115  }
1116 
1117  buffers_.swap(new_buffers);
1118  assert(buffers_.size() == 1);
1119 }
1120 
1122 {
1123  if(output_ == nullptr) {
1124  assert(compressed_buf_.empty() == false);
1125  string_span uncompressed_buf;
1126  buffers_.push_back(uncompress_buffer(compressed_buf_, &uncompressed_buf));
1127  output_ = uncompressed_buf.begin();
1128  }
1129 
1130  assert(root_ == nullptr);
1131  const char* cbuf = output_;
1132  root_ = new node(*this, nullptr, &cbuf);
1133 }
1134 
1135 std::unique_ptr<document> document::clone()
1136 {
1137  char* buf = new char[strlen(output())+1];
1138  strcpy(buf, output());
1139  return std::make_unique<document>(buf);
1140 }
1141 
1143 {
1146  buffers_.swap(o.buffers_);
1147  std::swap(root_, o.root_);
1148 
1149  root_->set_doc(this);
1150  o.root_->set_doc(&o);
1151 }
1152 
1154 {
1156  output_ = nullptr;
1157  debug_delete(root_);
1158  root_ = new node(*this, nullptr);
1159  for(std::vector<char*>::iterator i = buffers_.begin(); i != buffers_.end(); ++i) {
1160  delete [] *i;
1161  }
1162 
1163  buffers_.clear();
1164 }
1165 
1166 namespace {
1167 document* head_doc = nullptr;
1168 }
1169 
1171 {
1172  prev_ = nullptr;
1173  next_ = head_doc;
1174 
1175  if(next_) {
1176  next_->prev_ = this;
1177  }
1178  head_doc = this;
1179 }
1180 
1182 {
1183  if(head_doc == this) {
1184  head_doc = next_;
1185  }
1186 
1187  if(next_) {
1188  next_->prev_ = prev_;
1189  }
1190 
1191  if(prev_) {
1192  prev_->next_ = next_;
1193  }
1194  next_ = prev_ = nullptr;
1195 }
1196 
1197 std::string document::stats()
1198 {
1199  std::ostringstream s;
1200  int ndocs = 0;
1201  int ncompressed = 0;
1202  int compressed_size = 0;
1203  int ntext = 0;
1204  int text_size = 0;
1205  int nbuffers = 0;
1206  int nnodes = 0;
1207  int ndirty = 0;
1208  int nattributes = 0;
1209  for(document* d = head_doc; d != nullptr; d = d->next_) {
1210  ndocs++;
1211  nbuffers += d->buffers_.size();
1212 
1213  if(d->compressed_buf_.is_null() == false) {
1214  ++ncompressed;
1215  compressed_size += d->compressed_buf_.size();
1216  }
1217 
1218  if(d->output_) {
1219  ++ntext;
1220  text_size += strlen(d->output_);
1221  }
1222 
1223  if(d->root_) {
1224  nnodes += 1 + d->root_->nchildren();
1225  nattributes += d->root_->nattributes_recursive();
1226  }
1227 
1228  if(d->root_ && d->root_->is_dirty()) {
1229  ++ndirty;
1230  }
1231  }
1232 
1233  const int nodes_alloc = nnodes*(sizeof(node) + 12);
1234  const int attr_alloc = nattributes*(sizeof(string_span)*2);
1235  const int total_alloc = compressed_size + text_size + nodes_alloc + attr_alloc;
1236 
1237  s << "WML documents: " << ndocs << "\n"
1238  << "Dirty: " << ndirty << "\n"
1239  << "With compression: " << ncompressed << " (" << compressed_size
1240  << " bytes)\n"
1241  << "With text: " << ntext << " (" << text_size
1242  << " bytes)\n"
1243  << "Nodes: " << nnodes << " (" << nodes_alloc << " bytes)\n"
1244  << "Attr: " << nattributes << " (" << attr_alloc << " bytes)\n"
1245  << "Buffers: " << nbuffers << "\n"
1246  << "Total allocation: " << total_alloc << " bytes\n";
1247 
1248  return s.str();
1249 }
1250 
1251 void swap(document& lhs, document& rhs)
1252 {
1253  lhs.swap(rhs);
1254 }
1255 
1256 }
void take_ownership_of_buffer(char *buffer)
Definition: simple_wml.hpp:283
const char * output()
std::unique_ptr< document > clone()
static std::string stats()
static std::size_t document_size_limit
Definition: simple_wml.hpp:292
const char * output_
Definition: simple_wml.hpp:299
void swap(document &o)
std::vector< char * > buffers_
Definition: simple_wml.hpp:300
string_span output_compressed(bool bzip2=false)
const char * dup_string(const char *str)
string_span compressed_buf_
Definition: simple_wml.hpp:298
const string_span & attr(const char *key) const
Definition: simple_wml.hpp:128
int output_size() const
Definition: simple_wml.cpp:706
void set_doc(document *doc)
Definition: simple_wml.cpp:910
void insert_ordered_child(int child_map_index, int child_list_index)
Definition: simple_wml.cpp:505
void insert_ordered_child_list(int child_map_index)
Definition: simple_wml.cpp:546
void check_ordered_children() const
Definition: simple_wml.cpp:573
void remove_child(const char *name, std::size_t index)
Definition: simple_wml.cpp:603
int nchildren() const
Definition: simple_wml.cpp:921
bool is_dirty() const
Definition: simple_wml.hpp:163
node(document &doc, node *parent)
Definition: simple_wml.cpp:208
int get_children(const string_span &name)
Definition: simple_wml.cpp:660
document * doc_
Definition: simple_wml.hpp:192
const string_span & operator[](const char *key) const
Definition: simple_wml.cpp:392
const child_list & children(const char *name) const
Definition: simple_wml.cpp:643
void apply_diff(const node &diff)
Definition: simple_wml.cpp:842
attribute_list attr_
Definition: simple_wml.hpp:195
bool has_attr(const char *key) const
Definition: simple_wml.cpp:405
node & set_attr_int(const char *key, int value)
Definition: simple_wml.cpp:441
std::vector< child_pair > child_map
Definition: simple_wml.hpp:200
node * child(const char *name)
Definition: simple_wml.cpp:608
std::vector< node * > child_list
Definition: simple_wml.hpp:125
void remove_ordered_child_list(int child_map_index)
Definition: simple_wml.cpp:556
int nattributes_recursive() const
Definition: simple_wml.cpp:934
std::vector< node_pos > ordered_children_
Definition: simple_wml.hpp:218
const string_span & first_child() const
Definition: simple_wml.cpp:696
child_map children_
Definition: simple_wml.hpp:204
void output(char *&buf, CACHE_STATUS status=DO_NOT_MODIFY_CACHE)
Definition: simple_wml.cpp:752
node & add_child(const char *name)
Definition: simple_wml.cpp:466
node & child_or_add(const char *name)
Definition: simple_wml.cpp:635
node & set_attr(const char *key, const char *value)
Definition: simple_wml.cpp:413
string_span output_cache_
Definition: simple_wml.hpp:227
node & add_child_at(const char *name, std::size_t index)
Definition: simple_wml.cpp:447
void copy_into(node &n) const
Definition: simple_wml.cpp:821
node & set_attr_dup(const char *key, const char *value)
Definition: simple_wml.cpp:429
void remove_ordered_child(int child_map_index, int child_list_index)
Definition: simple_wml.cpp:527
void shift_buffers(ptrdiff_t offset)
Definition: simple_wml.cpp:732
static child_map::const_iterator find_in_map(const child_map &m, const string_span &attr)
Definition: simple_wml.cpp:672
bool to_bool(bool default_value=false) const
Definition: simple_wml.cpp:159
std::string to_string() const
Definition: simple_wml.cpp:183
const char * begin() const
Definition: simple_wml.hpp:90
char * duplicate() const
Definition: simple_wml.cpp:188
const char * end() const
Definition: simple_wml.hpp:91
std::size_t i
Definition: function.cpp:1029
unsigned in
If equal to search_counter, the node is off the list.
Standard logging facilities (interface).
void swap(document &lhs, document &rhs)
Implement non-member swap function for std::swap (calls document::swap).
std::ostream & operator<<(std::ostream &o, const string_span &s)
Definition: simple_wml.cpp:202
std::string node_to_string(const node &n)
Definition: simple_wml.cpp:809
@ INIT_TAKE_OWNERSHIP
Definition: simple_wml.hpp:232
std::size_t index(std::string_view str, const std::size_t index)
Codepoint index corresponding to the nth character in a UTF-8 string.
Definition: unicode.cpp:70
std::string get_unknown_exception_type()
Utility function for finding the type of thing caught with catch(...).
Definition: general.cpp:23
std::string::const_iterator iterator
Definition: tokenizer.hpp:25
static void msg(const char *act, debug_info &i, const char *to="", const char *result="")
Definition: debugger.cpp:109
#define ERR_SWML
Definition: simple_wml.cpp:30
static lg::log_domain log_config("config")
static map_location::direction n
static map_location::direction s
#define d
#define e
#define b