The Battle for Wesnoth  1.19.12+dev
simple_wml.cpp
Go to the documentation of this file.
1 /*
2  Copyright (C) 2008 - 2025
3  by David White <dave@whitevine.net>
4  Part of the Battle for Wesnoth Project https://www.wesnoth.org/
5 
6  This program is free software; you can redistribute it and/or modify
7  it under the terms of the GNU General Public License as published by
8  the Free Software Foundation; either version 2 of the License, or
9  (at your option) any later version.
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY.
12 
13  See the COPYING file for more details.
14 */
15 
16 #include <sstream>
17 
18 #include <boost/iostreams/copy.hpp>
19 #include <boost/iostreams/filtering_stream.hpp>
20 #include <boost/iostreams/filter/bzip2.hpp>
21 #include <boost/iostreams/filter/counter.hpp>
22 #include <boost/iostreams/filter/gzip.hpp>
23 
25 
26 #include "log.hpp"
28 #include "utils/general.hpp"
29 
30 static lg::log_domain log_config("config");
31 #define ERR_SWML LOG_STREAM(err, log_config)
32 #define LOG_SWML LOG_STREAM(info, log_config)
33 
34 namespace simple_wml {
35 
36 std::size_t document::document_size_limit = 40000000;
37 
38 namespace {
39 
40 void debug_delete(node* n) {
41  delete n;
42 }
43 
44 char* uncompress_buffer(const string_span& input, string_span* span)
45 {
46  int nalloc = input.size();
47  int state = 0;
48  try {
49  std::istringstream stream(std::string(input.begin(), input.end()));
50  state = 1;
51  boost::iostreams::filtering_stream<boost::iostreams::input> filter;
52  state = 2;
53  if (!span->empty() && *span->begin() == 'B') {
54  filter.push(boost::iostreams::bzip2_decompressor());
55  } else {
56  filter.push(boost::iostreams::gzip_decompressor());
57  }
58  filter.push(stream);
59  state = 3;
60 
61  const std::size_t chunk_size = input.size() * 10;
62  nalloc = chunk_size;
63  std::vector<char> buf(chunk_size);
64  state = 4;
65  std::size_t len = 0;
66  std::size_t pos = 0;
67  while(filter.good() && (len = filter.read(&buf[pos], chunk_size).gcount()) == chunk_size) {
68  if(pos + chunk_size > document::document_size_limit) {
69  throw error("WML document exceeded size limit during decompression");
70  }
71 
72  pos += len;
73  buf.resize(pos + chunk_size);
74  len = 0;
75  }
76 
77  if(!filter.eof() && !filter.good()) {
78  throw error("failed to uncompress");
79  }
80 
81  pos += len;
82  state = 5;
83  nalloc = pos;
84 
85  buf.resize(pos);
86  state = 6;
87 
88  char* small_out = new char[pos+1];
89  memcpy(small_out, &buf[0], pos);
90  state = 7;
91 
92  small_out[pos] = 0;
93 
94  *span = string_span(small_out, pos);
95  state = 8;
96  return small_out;
97  } catch (const std::bad_alloc& e) {
98  ERR_SWML << "ERROR: bad_alloc caught in uncompress_buffer() state "
99  << state << " alloc bytes " << nalloc << " with input: '"
100  << input << "' " << e.what();
101  throw error("Bad allocation request in uncompress_buffer().");
102  }
103 }
104 
105 char* compress_buffer(const char* input, string_span* span, bool bzip2)
106 {
107  int nalloc = strlen(input);
108  int state = 0;
109  try {
110  std::string in(input);
111  state = 1;
112  std::istringstream istream(in);
113  state = 2;
114  boost::iostreams::filtering_stream<boost::iostreams::output> filter;
115  state = 3;
116  if (bzip2) {
117  filter.push(boost::iostreams::bzip2_compressor());
118  } else {
119  filter.push(boost::iostreams::gzip_compressor());
120  }
121  state = 4;
122  nalloc = in.size()*2 + 80;
123  std::vector<char> buf(nalloc);
124  boost::iostreams::array_sink out(&buf[0], buf.size());
125  filter.push(boost::iostreams::counter());
126  filter.push(out);
127 
128  state = 5;
129 
130  boost::iostreams::copy(istream, filter, buf.size());
131  const int len = filter.component<boost::iostreams::counter>(1)->characters();
132  assert(len < 128*1024*1024);
133  if((!filter.eof() && !filter.good()) || len == static_cast<int>(buf.size())) {
134  throw error("failed to compress");
135  }
136  state = 6;
137  nalloc = len;
138 
139  buf.resize(len);
140  state = 7;
141 
142  char* small_out = new char[len];
143  memcpy(small_out, &buf[0], len);
144  state = 8;
145 
146  *span = string_span(small_out, len);
147  assert(*small_out == (bzip2 ? 'B' : 31));
148  state = 9;
149  return small_out;
150  } catch (const std::bad_alloc& e) {
151  ERR_SWML << "ERROR: bad_alloc caught in compress_buffer() state "
152  << state << " alloc bytes " << nalloc << " with input: '"
153  << input << "' " << e.what();
154  throw error("Bad allocation request in compress_buffer().");
155  }
156 }
157 
158 } // namespace
159 
160 bool string_span::to_bool(bool default_value) const
161 {
162  if(empty()) {
163  return default_value;
164  }
165 
166  if (operator==("no") || operator==("off") || operator==("false") || operator==("0") || operator==("0.0"))
167  return false;
168 
169  return true;
170 }
171 
173 {
174  const int buf_size = 64;
175  if(size() >= buf_size) {
176  return 0;
177  }
178  char buf[64];
179  memcpy(buf, begin(), size());
180  buf[size()] = 0;
181  return atoi(buf);
182 }
183 
184 std::string string_span::to_string() const
185 {
186  return std::string(begin(), end());
187 }
188 
190 {
191  char* buf = new char[size() + 1];
192  memcpy(buf, begin(), size());
193  buf[size()] = 0;
194  return buf;
195 }
196 
197 error::error(const char* msg)
198  : game::error(msg)
199 {
200  ERR_SWML << "ERROR: '" << msg << "'";
201 }
202 
203 std::ostream& operator<<(std::ostream& o, const string_span& s)
204 {
205  o << std::string(s.begin(), s.end());
206  return o;
207 }
208 
209 node::node(document& doc, node* parent) :
210  doc_(&doc),
211  attr_(),
212  parent_(parent),
213  children_(),
214  ordered_children_(),
215  output_cache_()
216 {
217 }
218 
219 #ifdef _MSC_VER
220 #pragma warning (push)
221 #pragma warning (disable: 4706)
222 #endif
223 node::node(document& doc, node* parent, const char** str, int depth) :
224  doc_(&doc),
225  attr_(),
226  parent_(parent),
227  children_(),
228  ordered_children_(),
229  output_cache_()
230 {
231  if(depth >= 1000) {
232  throw error("elements nested too deep");
233  }
234 
235  const char*& s = *str;
236 
237  const char* const begin = s;
238  while(*s) {
239  switch(*s) {
240  case '[': {
241  if(s[1] == '/') {
242  output_cache_ = string_span(begin, s - begin);
243  s = strchr(s, ']');
244  if(s == nullptr) {
245  throw error("end element unterminated");
246  }
247 
248  ++s;
249  return;
250  }
251 
252  ++s;
253  const char* end = strchr(s, ']');
254  if(end == nullptr) {
255  throw error("unterminated element");
256  }
257 
258  const int list_index = get_children(string_span(s, end - s));
260 
261  s = end + 1;
262 
263  children_[list_index].second.push_back(new node(doc, this, str, depth+1));
264  ordered_children_.emplace_back(list_index, children_[list_index].second.size() - 1);
266 
267  break;
268  }
269  case ' ':
270  case '\t':
271  case '\n':
272  ++s;
273  break;
274  case '#':
275  s = strchr(s, '\n');
276  if(s == nullptr) {
277  throw error("did not find newline after '#'");
278  }
279  break;
280  default: {
281  const char* end = strchr(s, '=');
282  if(end == nullptr) {
283  ERR_SWML << "attribute: " << s;
284  throw error("did not find '=' after attribute");
285  }
286 
287  string_span name(s, end - s);
288  s = end + 1;
289  if(*s == '_') {
290  s = strchr(s, '"');
291  if(s == nullptr) {
292  throw error("did not find '\"' after '_'");
293  }
294  }
295 
296  if (*s != '"') {
297  end = strchr(s, '\n');
298  if (!end) {
299  ERR_SWML << "ATTR: '" << name << "' (((" << s << ")))";
300  throw error("did not find end of attribute");
301  }
302  if (memchr(s, '"', end - s))
303  throw error("found stray quotes in unquoted value");
304  goto read_attribute;
305  }
306  end = s;
307  while(true)
308  {
309  // Read until the first single double quote.
310  while((end = strchr(end+1, '"')) && end[1] == '"') {
311 #ifdef _MSC_VER
312 #pragma warning (pop)
313 #endif
314  ++end;
315  }
316  if(end == nullptr)
317  throw error("did not find end of attribute");
318 
319  // Stop if newline.
320  const char *endline = end + 1;
321  while (*endline == ' ') ++endline;
322  if (*endline == '\n') break;
323 
324  // Read concatenation marker.
325  if (*(endline++) != '+')
326  throw error("did not find newline after end of attribute");
327  if (*(endline++) != '\n')
328  throw error("did not find newline after '+'");
329 
330  // Read textdomain marker.
331  if (*endline == '#') {
332  endline = strchr(endline + 1, '\n');
333  if (!endline)
334  throw error("did not find newline after '#'");
335  ++endline;
336  }
337 
338  // Read indentation and start of string.
339  while (*endline == '\t') ++endline;
340  if (*endline == '_') ++endline;
341  if (*endline != '"')
342  throw error("did not find quotes after '+'");
343  end = endline;
344  }
345 
346  ++s;
347 
348  read_attribute:
349  string_span value(s, end - s);
350  if(attr_.empty() == false && !(attr_.back().key < name)) {
351  ERR_SWML << "attributes: '" << attr_.back().key << "' < '" << name << "'";
352  throw error("attributes not in order");
353  }
354 
355  s = end + 1;
356 
357  attr_.emplace_back(name, value);
358  }
359  }
360  }
361 
362  output_cache_ = string_span(begin, s - begin);
364 }
365 
367 {
368  for(child_map::iterator i = children_.begin(); i != children_.end(); ++i) {
369  for(child_list::iterator j = i->second.begin(); j != i->second.end(); ++j) {
370  debug_delete(*j);
371  }
372  }
373 }
374 
375 namespace {
376 struct string_span_pair_comparer
377 {
378  bool operator()(const string_span& a, const node::attribute& b) const {
379  return a < b.key;
380  }
381 
382  bool operator()(const node::attribute& a, const string_span& b) const {
383  return a.key < b;
384  }
385 
386  bool operator()(const node::attribute& a,
387  const node::attribute& b) const {
388  return a.key < b.key;
389  }
390 };
391 }
392 
393 const string_span& node::operator[](const char* key) const
394 {
395  static string_span empty("");
396  string_span span(key);
397  auto range = std::equal_range(attr_.begin(), attr_.end(), span, string_span_pair_comparer());
398  if(range.first != range.second) {
399  return range.first->value;
400  }
401 
402  return empty;
403 }
404 
405 bool node::has_attr(const char* key) const
406 {
407  string_span span(key);
408  auto range = std::equal_range(attr_.begin(), attr_.end(), span, string_span_pair_comparer());
409  return range.first != range.second;
410 }
411 
412 node& node::set_attr(const char* key, const char* value)
413 {
414  set_dirty();
415 
416  string_span span(key);
417  auto range = std::equal_range(attr_.begin(), attr_.end(), span, string_span_pair_comparer());
418  if(range.first != range.second) {
419  range.first->value = string_span(value);
420  } else {
421  attr_.insert(range.first, attribute(span, string_span(value)));
422  }
423 
424  return *this;
425 }
426 
427 node& node::set_attr_dup(const char* key, const char* value)
428 {
429  return set_attr(key, doc_->dup_string(value));
430 }
431 
432 node& node::set_attr_dup(const char* key, const string_span& value)
433 {
434  char* buf = value.duplicate();
436  return set_attr(key, buf);
437 }
438 
439 node& node::set_attr_esc(const char* key, string_span value)
440 {
441  return set_attr(key, doc_->esc_string(value));
442 }
443 
444 node& node::set_attr_int(const char* key, int value)
445 {
446  std::string temp = std::to_string(value);
447  return set_attr_dup(key, temp.c_str());
448 }
449 
450 node& node::add_child_at(const char* name, std::size_t index)
451 {
452  set_dirty();
453 
454  const int list_index = get_children(name);
455  child_list& list = children_[list_index].second;
456  if(index > list.size()) {
457  index = list.size();
458  }
459 
461  list.insert(list.begin() + index, new node(*doc_, this));
462  insert_ordered_child(list_index, index);
463 
465  return *list[index];
466 }
467 
468 
469 node& node::add_child(const char* name)
470 {
471  set_dirty();
472 
473  const int list_index = get_children(name);
475  child_list& list = children_[list_index].second;
476  list.push_back(new node(*doc_, this));
477  ordered_children_.emplace_back(list_index, list.size() - 1);
479  return *list.back();
480 }
481 
482 void node::remove_child(const string_span& name, std::size_t index)
483 {
484  set_dirty();
485 
486  //if we don't already have a vector for this item we don't want to add one.
488  if(itor == children_.end()) {
489  return;
490  }
491 
492  child_list& list = itor->second;
493  if(index >= list.size()) {
494  return;
495  }
496 
497  remove_ordered_child(std::distance(children_.begin(), itor), index);
498 
499  debug_delete(list[index]);
500  list.erase(list.begin() + index);
501 
502  if(list.empty()) {
503  remove_ordered_child_list(std::distance(children_.begin(), itor));
504  children_.erase(itor);
505  }
506 }
507 
508 void node::insert_ordered_child(int child_map_index, int child_list_index)
509 {
510  bool inserted = false;
512  while(i != ordered_children_.end()) {
513  if(i->child_map_index == child_map_index && i->child_list_index > child_list_index) {
514  i->child_list_index++;
515  } else if(i->child_map_index == child_map_index && i->child_list_index == child_list_index) {
516  inserted = true;
517  i->child_list_index++;
518  i = ordered_children_.insert(i, node_pos(child_map_index, child_list_index));
519  ++i;
520  }
521 
522  ++i;
523  }
524 
525  if(!inserted) {
526  ordered_children_.emplace_back(child_map_index, child_list_index);
527  }
528 }
529 
530 void node::remove_ordered_child(int child_map_index, int child_list_index)
531 {
532  int erase_count = 0;
534  while(i != ordered_children_.end()) {
535  if(i->child_map_index == child_map_index && i->child_list_index == child_list_index) {
536  i = ordered_children_.erase(i);
537  ++erase_count;
538  } else {
539  if(i->child_map_index == child_map_index && i->child_list_index > child_list_index) {
540  i->child_list_index--;
541  }
542  ++i;
543  }
544  }
545 
546  assert(erase_count == 1);
547 }
548 
549 void node::remove_ordered_child_list(int child_map_index)
550 {
552  while(i != ordered_children_.end()) {
553  if(i->child_map_index == child_map_index) {
554  assert(false);
555  i = ordered_children_.erase(i);
556  } else {
557  if(i->child_map_index > child_map_index) {
558  i->child_map_index--;
559  }
560 
561  ++i;
562  }
563  }
564 }
565 
567 {
568 // only define this symbol in debug mode to work out child ordering.
569 #ifdef CHECK_ORDERED_CHILDREN
570  std::vector<node_pos>::const_iterator i = ordered_children_.begin();
571  while(i != ordered_children_.end()) {
572  assert(i->child_map_index < children_.size());
573  assert(i->child_list_index < children_[i->child_map_index].second.size());
574  ++i;
575  }
576 
577  for(child_map::const_iterator j = children_.begin(); j != children_.end(); ++j) {
578  const unsigned short child_map_index = j - children_.begin();
579  for(child_list::const_iterator k = j->second.begin(); k != j->second.end(); ++k) {
580  const unsigned short child_list_index = k - j->second.begin();
581  bool found = false;
582  for(int n = 0; n != ordered_children_.size(); ++n) {
583  if(ordered_children_[n].child_map_index == child_map_index &&
584  ordered_children_[n].child_list_index == child_list_index) {
585  found = true;
586  break;
587  }
588  }
589 
590  assert(found);
591  }
592  }
593 #endif // CHECK_ORDERED_CHILDREN
594 }
595 
596 void node::remove_child(const char* name, std::size_t index)
597 {
599 }
600 
601 node* node::child(const char* name)
602 {
603  for(auto& [key, node_list] : children_) {
604  if(key == name) {
605  assert(node_list.empty() == false);
606  return node_list.front();
607  }
608  }
609 
610  return nullptr;
611 }
612 
613 const node* node::child(const char* name) const
614 {
615  for(const auto& [key, node_list] : children_) {
616  if(key == name) {
617  if(node_list.empty()) {
618  return nullptr;
619  } else {
620  return node_list.front();
621  }
622  }
623  }
624 
625  return nullptr;
626 }
627 
628 node& node::child_or_add(const char* name)
629 {
630  if(node* res = child(name)) {
631  return *res;
632  }
633  return add_child(name);
634 }
635 
636 const node::child_list& node::children(const char* name) const
637 {
638  for(const auto& [key, node_list] : children_) {
639  if(key == name) {
640  return node_list;
641  }
642  }
643 
644  static const node::child_list empty;
645  return empty;
646 }
647 
648 int node::get_children(const char* name)
649 {
650  return get_children(string_span(name));
651 }
652 
654 {
655  for(child_map::iterator i = children_.begin(); i != children_.end(); ++i) {
656  if(i->first == name) {
657  return std::distance(children_.begin(), i);
658  }
659  }
660 
661  children_.emplace_back(string_span(name), child_list());
662  return children_.size() - 1;
663 }
664 
665 node::child_map::const_iterator node::find_in_map(const child_map& m, const string_span& attr)
666 {
667  child_map::const_iterator i = m.begin();
668  for(; i != m.end(); ++i) {
669  if(i->first == attr) {
670  break;
671  }
672  }
673 
674  return i;
675 }
676 
678 {
679  child_map::iterator i = m.begin();
680  for(; i != m.end(); ++i) {
681  if(i->first == attr) {
682  break;
683  }
684  }
685 
686  return i;
687 }
688 
690 {
691  if(children_.empty()) {
692  static const string_span empty;
693  return empty;
694  }
695 
696  return children_.begin()->first;
697 }
698 
699 int node::output_size() const
700 {
702  if(output_cache_.empty() == false) {
703  return output_cache_.size();
704  }
705 
706  int res = 0;
707  for(const auto& [key, value] : attr_) {
708  res += key.size() + value.size() + 4;
709  }
710 
711  std::size_t count_children = 0;
712  for(const auto& [key, node_list] : children_) {
713  for(const simple_wml::node* child_node : node_list) {
714  res += key.size() * 2 + 7;
715  res += child_node->output_size();
716  ++count_children;
717  }
718  }
719 
720  assert(count_children == ordered_children_.size());
721 
722  return res;
723 }
724 
725 void node::shift_buffers(ptrdiff_t offset)
726 {
727  if(!output_cache_.empty()) {
729  }
730 
731  for(auto& [key, value] : attr_) {
732  key = string_span(key.begin() + offset, key.size());
733  value = string_span(value.begin() + offset, value.size());
734  }
735 
736  for(auto& [key, node_list] : children_) {
737  key = string_span(key.begin() + offset, key.size());
738  for(simple_wml::node* child_node : node_list) {
739  child_node->shift_buffers(offset);
740  }
741  }
742 }
743 
744 void node::output(char*& buf, CACHE_STATUS cache_status)
745 {
746  if(output_cache_.empty() == false) {
747  memcpy(buf, output_cache_.begin(), output_cache_.size());
748  if(cache_status == REFRESH_CACHE) {
750  }
751  buf += output_cache_.size();
752  return;
753  }
754 
755  char* begin = buf;
756 
757  for(auto& [key, value] : attr_) {
758  memcpy(buf, key.begin(), key.size());
759  if(cache_status == REFRESH_CACHE) {
760  key = string_span(buf, key.size());
761  }
762  buf += key.size();
763  *buf++ = '=';
764  *buf++ = '"';
765  memcpy(buf, value.begin(), value.size());
766  if(cache_status == REFRESH_CACHE) {
767  value = string_span(buf, value.size());
768  }
769  buf += value.size();
770  *buf++ = '"';
771  *buf++ = '\n';
772  }
773 
774  for(const node::node_pos& pos : ordered_children_) {
775  assert(pos.child_map_index < children_.size());
776  assert(pos.child_list_index < children_[pos.child_map_index].second.size());
777  string_span& attr = children_[pos.child_map_index].first;
778  *buf++ = '[';
779  memcpy(buf, attr.begin(), attr.size());
780  if(cache_status == REFRESH_CACHE) {
781  attr = string_span(buf, attr.size());
782  }
783  buf += attr.size();
784  *buf++ = ']';
785  *buf++ = '\n';
786  children_[pos.child_map_index].second[pos.child_list_index]->output(buf, cache_status);
787  *buf++ = '[';
788  *buf++ = '/';
789  memcpy(buf, attr.begin(), attr.size());
790  buf += attr.size();
791  *buf++ = ']';
792  *buf++ = '\n';
793  }
794 
795  if(cache_status == REFRESH_CACHE) {
796  output_cache_ = string_span(begin, buf - begin);
797  }
798 }
799 
800 std::string node_to_string(const node& n)
801 {
802  //calling output with status=DO_NOT_MODIFY_CACHE really doesn't modify the
803  //node, so we can do it safely
804  node& mutable_node = const_cast<node&>(n);
805  std::vector<char> v(mutable_node.output_size());
806  char* ptr = &v[0];
807  mutable_node.output(ptr, node::DO_NOT_MODIFY_CACHE);
808  assert(ptr == &v[0] + v.size());
809  return std::string(v.begin(), v.end());
810 }
811 
812 void node::copy_into(node& n) const
813 {
814  n.set_dirty();
815  for(const node::attribute& a : attr_) {
816  char* key = a.key.duplicate();
817  char* value = a.value.duplicate();
818  n.doc_->take_ownership_of_buffer(key);
819  n.doc_->take_ownership_of_buffer(value);
820  n.set_attr(key, value);
821  }
822 
823  for(const node::node_pos& pos : ordered_children_) {
824  assert(pos.child_map_index < children_.size());
825  assert(pos.child_list_index < children_[pos.child_map_index].second.size());
826  char* buf = children_[pos.child_map_index].first.duplicate();
827  n.doc_->take_ownership_of_buffer(buf);
828  children_[pos.child_map_index].second[pos.child_list_index]->copy_into(n.add_child(buf));
829  }
830 }
831 
832 void node::apply_diff(const node& diff)
833 {
834  set_dirty();
835  const node* inserts = diff.child("insert");
836  if(inserts != nullptr) {
837  for(const node::attribute& a : inserts->attr_) {
838  char* name = a.key.duplicate();
839  char* value = a.value.duplicate();
840  set_attr(name, value);
843  }
844  }
845 
846  const node* deletes = diff.child("delete");
847  if(deletes != nullptr) {
848  for(const node::attribute& a : deletes->attr_) {
849  std::pair<attribute_list::iterator,
850  attribute_list::iterator> range = std::equal_range(attr_.begin(), attr_.end(), a.key, string_span_pair_comparer());
851  if(range.first != range.second) {
852  attr_.erase(range.first);
853  }
854  }
855  }
856 
857  for(const simple_wml::node* node : diff.children("change_child")) {
858  const std::size_t index = (*node)["index"].to_int();
859  for(const auto& [name, node_list] : node->children_) {
860  for(const simple_wml::node* child_node : node_list) {
862  if(itor != children_.end()) {
863  if(index < itor->second.size()) {
864  itor->second[index]->apply_diff(*child_node);
865  }
866  }
867  }
868  }
869  }
870 
871  for(const simple_wml::node* node : diff.children("insert_child")) {
872  const std::size_t index = (*node)["index"].to_int();
873  for(const auto& [name, node_list] : node->children_) {
874  for(const simple_wml::node* child_node : node_list) {
875  char* buf = name.duplicate();
877  child_node->copy_into(add_child_at(buf, index));
878  }
879  }
880  }
881 
882  for(const simple_wml::node* node : diff.children("delete_child")) {
883  const std::size_t index = (*node)["index"].to_int();
884  for(const auto& [name, node_list] : node->children_) {
885  if(node_list.empty()) {
886  continue;
887  }
888 
889  remove_child(name, index);
890  }
891  }
892 }
893 
895 {
896  doc_ = doc;
897 
898  for(const auto& [_, node_list] : children_) {
899  for(simple_wml::node* child_node : node_list) {
900  child_node->set_doc(doc);
901  }
902  }
903 }
904 
905 int node::nchildren() const
906 {
907  int res = 0;
908  for(const auto& [_, node_list] : children_) {
909  for(const simple_wml::node* child_node : node_list) {
910  ++res;
911  res += child_node->nchildren();
912  }
913  }
914 
915  return res;
916 }
917 
919 {
920  int res = attr_.capacity();
921  for(const auto& [_, node_list] : children_) {
922  for(const simple_wml::node* child_node : node_list) {
923  res += child_node->nattributes_recursive();
924  }
925  }
926 
927  return res;
928 }
929 
931 {
932  for(node* n = this; n != nullptr && n->output_cache_.is_null() == false; n = n->parent_) {
933  n->output_cache_ = string_span();
934  }
935 }
936 
938  compressed_buf_(),
939  output_(nullptr),
940  buffers_(),
941  root_(new node(*this, nullptr)),
942  prev_(nullptr),
943  next_(nullptr)
944 {
945  attach_list();
946 }
947 
949  compressed_buf_(),
950  output_(buf),
951  buffers_(),
952  root_(nullptr),
953  prev_(nullptr),
954  next_(nullptr)
955 {
956  if(control == INIT_TAKE_OWNERSHIP) {
957  buffers_.push_back(buf);
958  }
959  const char* cbuf = buf;
960  root_ = new node(*this, nullptr, &cbuf);
961 
962  attach_list();
963 }
964 
965 document::document(const char* buf, INIT_STATE state) :
966  compressed_buf_(),
967  output_(buf),
968  buffers_(),
969  root_(nullptr),
970  prev_(nullptr),
971  next_(nullptr)
972 {
973  if(state == INIT_COMPRESSED) {
975  output_ = nullptr;
976  } else {
977  root_ = new node(*this, nullptr, &buf);
978  }
979 
980  attach_list();
981 }
982 
984  compressed_buf_(compressed_buf),
985  output_(nullptr),
986  buffers_(),
987  root_(nullptr),
988  prev_(nullptr),
989  next_(nullptr)
990 {
991  string_span uncompressed_buf;
992  buffers_.push_back(uncompress_buffer(compressed_buf, &uncompressed_buf));
993  output_ = uncompressed_buf.begin();
994  const char* cbuf = output_;
995  try {
996  root_ = new node(*this, nullptr, &cbuf);
997  } catch(...) {
998  ERR_SWML << "Caught exception creating a new simple_wml node: " << utils::get_unknown_exception_type();
999  delete [] buffers_.front();
1000  buffers_.clear();
1001  throw;
1002  }
1003 
1004  attach_list();
1005 }
1006 
1008 {
1009  for(std::vector<char*>::iterator i = buffers_.begin(); i != buffers_.end(); ++i) {
1010  delete [] *i;
1011  }
1012 
1013  buffers_.clear();
1014  debug_delete(root_);
1015 
1016  detach_list();
1017 }
1018 
1019 const char* document::dup_string(const char* str)
1020 {
1021  const int len = strlen(str);
1022  char* res = new char[len+1];
1023  memcpy(res, str, len + 1);
1024  buffers_.push_back(res);
1025  return res;
1026 }
1027 
1029 {
1030  char* res = string_span(utils::wml_escape_string(str)).duplicate();
1031  buffers_.push_back(res);
1032  return res;
1033 }
1034 
1035 const char* document::output()
1036 {
1037  if(output_ && (!root_ || root_->is_dirty() == false)) {
1038  return output_;
1039  }
1040  if(!root_) {
1041  assert(compressed_buf_.empty() == false);
1042  string_span uncompressed_buf;
1043  buffers_.push_back(uncompress_buffer(compressed_buf_, &uncompressed_buf));
1044  output_ = uncompressed_buf.begin();
1045  return output_;
1046  }
1047 
1048  //we're dirty, so the compressed buf must also be dirty; clear it.
1050 
1051  std::vector<char*> bufs;
1052  bufs.swap(buffers_);
1053 
1054  const int buf_size = root_->output_size() + 1;
1055  char* buf;
1056  try {
1057  buf = new char[buf_size];
1058  } catch (const std::bad_alloc& e) {
1059  ERR_SWML << "ERROR: Trying to allocate " << buf_size << " bytes. "
1060  << e.what();
1061  throw error("Bad allocation request in output().");
1062  }
1063  buffers_.push_back(buf);
1064  output_ = buf;
1065 
1067  *buf++ = 0;
1068  assert(buf == output_ + buf_size);
1069 
1070  for(std::vector<char*>::iterator i = bufs.begin(); i != bufs.end(); ++i) {
1071  delete [] *i;
1072  }
1073 
1074  bufs.clear();
1075 
1076  return output_;
1077 }
1078 
1080 {
1081  if(compressed_buf_.empty() == false &&
1082  (root_ == nullptr || root_->is_dirty() == false)) {
1083  assert(*compressed_buf_.begin() == (bzip2 ? 'B' : 31));
1084  return compressed_buf_;
1085  }
1086 
1087  buffers_.push_back(compress_buffer(output(), &compressed_buf_, bzip2));
1088  assert(*compressed_buf_.begin() == (bzip2 ? 'B' : 31));
1089 
1090  return compressed_buf_;
1091 }
1092 
1094 {
1096  debug_delete(root_);
1097  root_ = nullptr;
1098  output_ = nullptr;
1099  std::vector<char*> new_buffers;
1100  for(std::vector<char*>::iterator i = buffers_.begin(); i != buffers_.end(); ++i) {
1101  if(*i != compressed_buf_.begin()) {
1102  delete [] *i;
1103  } else {
1104  new_buffers.push_back(*i);
1105  }
1106  }
1107 
1108  buffers_.swap(new_buffers);
1109  assert(buffers_.size() == 1);
1110 }
1111 
1113 {
1114  if(output_ == nullptr) {
1115  assert(compressed_buf_.empty() == false);
1116  string_span uncompressed_buf;
1117  buffers_.push_back(uncompress_buffer(compressed_buf_, &uncompressed_buf));
1118  output_ = uncompressed_buf.begin();
1119  }
1120 
1121  assert(root_ == nullptr);
1122  const char* cbuf = output_;
1123  root_ = new node(*this, nullptr, &cbuf);
1124 }
1125 
1126 std::unique_ptr<document> document::clone()
1127 {
1128  char* buf = new char[strlen(output())+1];
1129  strcpy(buf, output());
1130  return std::make_unique<document>(buf);
1131 }
1132 
1134 {
1137  buffers_.swap(o.buffers_);
1138  std::swap(root_, o.root_);
1139 
1140  root_->set_doc(this);
1141  o.root_->set_doc(&o);
1142 }
1143 
1145 {
1147  output_ = nullptr;
1148  debug_delete(root_);
1149  root_ = new node(*this, nullptr);
1150  for(std::vector<char*>::iterator i = buffers_.begin(); i != buffers_.end(); ++i) {
1151  delete [] *i;
1152  }
1153 
1154  buffers_.clear();
1155 }
1156 
1157 namespace {
1158 document* head_doc = nullptr;
1159 }
1160 
1162 {
1163  prev_ = nullptr;
1164  next_ = head_doc;
1165 
1166  if(next_) {
1167  next_->prev_ = this;
1168  }
1169  head_doc = this;
1170 }
1171 
1173 {
1174  if(head_doc == this) {
1175  head_doc = next_;
1176  }
1177 
1178  if(next_) {
1179  next_->prev_ = prev_;
1180  }
1181 
1182  if(prev_) {
1183  prev_->next_ = next_;
1184  }
1185  next_ = prev_ = nullptr;
1186 }
1187 
1188 std::string document::stats()
1189 {
1190  std::ostringstream s;
1191  int ndocs = 0;
1192  int ncompressed = 0;
1193  int compressed_size = 0;
1194  int ntext = 0;
1195  int text_size = 0;
1196  int nbuffers = 0;
1197  int nnodes = 0;
1198  int ndirty = 0;
1199  int nattributes = 0;
1200  for(document* d = head_doc; d != nullptr; d = d->next_) {
1201  ndocs++;
1202  nbuffers += d->buffers_.size();
1203 
1204  if(d->compressed_buf_.is_null() == false) {
1205  ++ncompressed;
1206  compressed_size += d->compressed_buf_.size();
1207  }
1208 
1209  if(d->output_) {
1210  ++ntext;
1211  text_size += strlen(d->output_);
1212  }
1213 
1214  if(d->root_) {
1215  nnodes += 1 + d->root_->nchildren();
1216  nattributes += d->root_->nattributes_recursive();
1217  }
1218 
1219  if(d->root_ && d->root_->is_dirty()) {
1220  ++ndirty;
1221  }
1222  }
1223 
1224  const int nodes_alloc = nnodes*(sizeof(node) + 12);
1225  const int attr_alloc = nattributes*(sizeof(string_span)*2);
1226  const int total_alloc = compressed_size + text_size + nodes_alloc + attr_alloc;
1227 
1228  s << "WML documents: " << ndocs << "\n"
1229  << "Dirty: " << ndirty << "\n"
1230  << "With compression: " << ncompressed << " (" << compressed_size
1231  << " bytes)\n"
1232  << "With text: " << ntext << " (" << text_size
1233  << " bytes)\n"
1234  << "Nodes: " << nnodes << " (" << nodes_alloc << " bytes)\n"
1235  << "Attr: " << nattributes << " (" << attr_alloc << " bytes)\n"
1236  << "Buffers: " << nbuffers << "\n"
1237  << "Total allocation: " << total_alloc << " bytes\n";
1238 
1239  return s.str();
1240 }
1241 
1242 void swap(document& lhs, document& rhs)
1243 {
1244  lhs.swap(rhs);
1245 }
1246 
1247 }
void take_ownership_of_buffer(char *buffer)
Definition: simple_wml.hpp:289
const char * output()
std::unique_ptr< document > clone()
static std::string stats()
static std::size_t document_size_limit
Definition: simple_wml.hpp:298
const char * esc_string(string_span str)
const char * output_
Definition: simple_wml.hpp:305
void swap(document &o)
std::vector< char * > buffers_
Definition: simple_wml.hpp:306
string_span output_compressed(bool bzip2=false)
const char * dup_string(const char *str)
string_span compressed_buf_
Definition: simple_wml.hpp:304
const string_span & attr(const char *key) const
Definition: simple_wml.hpp:132
int output_size() const
Definition: simple_wml.cpp:699
void set_doc(document *doc)
Definition: simple_wml.cpp:894
void insert_ordered_child(int child_map_index, int child_list_index)
Definition: simple_wml.cpp:508
void check_ordered_children() const
Definition: simple_wml.cpp:566
void remove_child(const char *name, std::size_t index)
Definition: simple_wml.cpp:596
int nchildren() const
Definition: simple_wml.cpp:905
bool is_dirty() const
Definition: simple_wml.hpp:169
node(document &doc, node *parent)
Definition: simple_wml.cpp:209
int get_children(const string_span &name)
Definition: simple_wml.cpp:653
document * doc_
Definition: simple_wml.hpp:198
const string_span & operator[](const char *key) const
Definition: simple_wml.cpp:393
const child_list & children(const char *name) const
Definition: simple_wml.cpp:636
void apply_diff(const node &diff)
Definition: simple_wml.cpp:832
attribute_list attr_
Definition: simple_wml.hpp:201
bool has_attr(const char *key) const
Definition: simple_wml.cpp:405
node & set_attr_int(const char *key, int value)
Definition: simple_wml.cpp:444
std::vector< child_pair > child_map
Definition: simple_wml.hpp:206
node * child(const char *name)
Definition: simple_wml.cpp:601
std::vector< node * > child_list
Definition: simple_wml.hpp:129
void remove_ordered_child_list(int child_map_index)
Definition: simple_wml.cpp:549
int nattributes_recursive() const
Definition: simple_wml.cpp:918
std::vector< node_pos > ordered_children_
Definition: simple_wml.hpp:224
const string_span & first_child() const
Definition: simple_wml.cpp:689
child_map children_
Definition: simple_wml.hpp:210
void output(char *&buf, CACHE_STATUS status=DO_NOT_MODIFY_CACHE)
Definition: simple_wml.cpp:744
node & add_child(const char *name)
Definition: simple_wml.cpp:469
node & child_or_add(const char *name)
Definition: simple_wml.cpp:628
node & set_attr_esc(const char *key, string_span value)
As above but convert value to a WML value
Definition: simple_wml.cpp:439
node & set_attr(const char *key, const char *value)
Definition: simple_wml.cpp:412
string_span output_cache_
Definition: simple_wml.hpp:232
node & add_child_at(const char *name, std::size_t index)
Definition: simple_wml.cpp:450
void copy_into(node &n) const
Definition: simple_wml.cpp:812
node & set_attr_dup(const char *key, const char *value)
Definition: simple_wml.cpp:427
void remove_ordered_child(int child_map_index, int child_list_index)
Definition: simple_wml.cpp:530
void shift_buffers(ptrdiff_t offset)
Definition: simple_wml.cpp:725
static child_map::const_iterator find_in_map(const child_map &m, const string_span &attr)
Definition: simple_wml.cpp:665
bool to_bool(bool default_value=false) const
Definition: simple_wml.cpp:160
std::string to_string() const
Definition: simple_wml.cpp:184
const char * begin() const
Definition: simple_wml.hpp:94
char * duplicate() const
Definition: simple_wml.cpp:189
const char * end() const
Definition: simple_wml.hpp:95
std::size_t i
Definition: function.cpp:1030
unsigned in
If equal to search_counter, the node is off the list.
static std::string _(const char *str)
Definition: gettext.hpp:97
Standard logging facilities (interface).
void swap(document &lhs, document &rhs)
Implement non-member swap function for std::swap (calls document::swap).
std::ostream & operator<<(std::ostream &o, const string_span &s)
Definition: simple_wml.cpp:203
std::string node_to_string(const node &n)
Definition: simple_wml.cpp:800
@ INIT_TAKE_OWNERSHIP
Definition: simple_wml.hpp:237
std::size_t index(std::string_view str, const std::size_t index)
Codepoint index corresponding to the nth character in a UTF-8 string.
Definition: unicode.cpp:70
constexpr auto filter
Definition: ranges.hpp:38
std::string get_unknown_exception_type()
Utility function for finding the type of thing caught with catch(...).
Definition: general.cpp:23
std::string wml_escape_string(std::string_view str)
Format str as a WML value
std::string::const_iterator iterator
Definition: tokenizer.hpp:25
static void msg(const char *act, debug_info &i, const char *to="", const char *result="")
Definition: debugger.cpp:109
#define ERR_SWML
Definition: simple_wml.cpp:31
static lg::log_domain log_config("config")
static map_location::direction n
static map_location::direction s
#define d
#define e
#define b