The Battle for Wesnoth  1.15.0-dev
simple_wml.cpp
Go to the documentation of this file.
1 /*
2  Copyright (C) 2008 - 2018 by David White <dave@whitevine.net>
3  Part of the Battle for Wesnoth Project https://www.wesnoth.org
4 
5  This program is free software; you can redistribute it and/or modify
6  it under the terms of the GNU General Public License 2
7  the Free Software Foundation; either version 2 of the License, or
8  (at your option) any later version.
9  This program is distributed in the hope that it will be useful,
10  but WITHOUT ANY WARRANTY.
11 
12  See the COPYING file for more details.
13 */
14 
15 #include <iostream>
16 #include <sstream>
17 
18 #include <boost/iostreams/copy.hpp>
19 #include <boost/iostreams/filtering_stream.hpp>
20 #include <boost/iostreams/filter/bzip2.hpp>
21 #include <boost/iostreams/filter/counter.hpp>
22 #include <boost/iostreams/filter/gzip.hpp>
23 
24 #include "server/simple_wml.hpp"
25 
26 #include "log.hpp"
27 
28 static lg::log_domain log_config("config");
29 #define ERR_SWML LOG_STREAM(err, log_config)
30 
31 namespace simple_wml {
32 
33 std::size_t document::document_size_limit = 40000000;
34 
35 namespace {
36 
37 inline const char* textdomain_str()
38 {
39  return "#textdomain ";
40 }
41 inline size_t textdomain_str_size()
42 {
43  return sizeof("#textdomain ") - 1; /*-1 for terminating null character in string literal*/
44 }
45 
46 //for extra safety this is only used when reading the document not when writing.
47 string_span default_textdomain()
48 {
49  return string_span("wesnoth");
50 }
51 
52 
53 void debug_delete(node* n) {
54  delete n;
55 }
56 
57 char* uncompress_buffer(const string_span& input, string_span* span)
58 {
59  int nalloc = input.size();
60  int state = 0;
61  try {
62  std::istringstream stream(std::string(input.begin(), input.end()));
63  state = 1;
64  boost::iostreams::filtering_stream<boost::iostreams::input> filter;
65  state = 2;
66  if (!span->empty() && *span->begin() == 'B') {
67  filter.push(boost::iostreams::bzip2_decompressor());
68  } else {
69  filter.push(boost::iostreams::gzip_decompressor());
70  }
71  filter.push(stream);
72  state = 3;
73 
74  const std::size_t chunk_size = input.size() * 10;
75  nalloc = chunk_size;
76  std::vector<char> buf(chunk_size);
77  state = 4;
78  std::size_t len = 0;
79  std::size_t pos = 0;
80  while(filter.good() && (len = filter.read(&buf[pos], chunk_size).gcount()) == chunk_size) {
81  if(pos + chunk_size > document::document_size_limit) {
82  throw error("WML document exceeded size limit during decompression");
83  }
84 
85  pos += len;
86  buf.resize(pos + chunk_size);
87  len = 0;
88  }
89 
90  if(!filter.eof() && !filter.good()) {
91  throw error("failed to uncompress");
92  }
93 
94  pos += len;
95  state = 5;
96  nalloc = pos;
97 
98  buf.resize(pos);
99  state = 6;
100 
101  char* small_out = new char[pos+1];
102  memcpy(small_out, &buf[0], pos);
103  state = 7;
104 
105  small_out[pos] = 0;
106 
107  *span = string_span(small_out, pos);
108  state = 8;
109  return small_out;
110  } catch (const std::bad_alloc& e) {
111  ERR_SWML << "ERROR: bad_alloc caught in uncompress_buffer() state "
112  << state << " alloc bytes " << nalloc << " with input: '"
113  << input << "' " << e.what() << std::endl;
114  throw error("Bad allocation request in uncompress_buffer().");
115  }
116 }
117 
118 char* compress_buffer(const char* input, string_span* span, bool bzip2)
119 {
120  int nalloc = strlen(input);
121  int state = 0;
122  try {
123  std::string in(input);
124  state = 1;
125  std::istringstream istream(in);
126  state = 2;
127  boost::iostreams::filtering_stream<boost::iostreams::output> filter;
128  state = 3;
129  if (bzip2) {
130  filter.push(boost::iostreams::bzip2_compressor());
131  } else {
132  filter.push(boost::iostreams::gzip_compressor());
133  }
134  state = 4;
135  nalloc = in.size()*2 + 80;
136  std::vector<char> buf(nalloc);
137  boost::iostreams::array_sink out(&buf[0], buf.size());
138  filter.push(boost::iostreams::counter());
139  filter.push(out);
140 
141  state = 5;
142 
143  boost::iostreams::copy(istream, filter, buf.size());
144  const int len = filter.component<boost::iostreams::counter>(1)->characters();
145  assert(len < 128*1024*1024);
146  if((!filter.eof() && !filter.good()) || len == static_cast<int>(buf.size())) {
147  throw error("failed to compress");
148  }
149  state = 6;
150  nalloc = len;
151 
152  buf.resize(len);
153  state = 7;
154 
155  char* small_out = new char[len];
156  memcpy(small_out, &buf[0], len);
157  state = 8;
158 
159  *span = string_span(small_out, len);
160  assert(*small_out == (bzip2 ? 'B' : 31));
161  state = 9;
162  return small_out;
163  } catch (const std::bad_alloc& e) {
164  ERR_SWML << "ERROR: bad_alloc caught in compress_buffer() state "
165  << state << " alloc bytes " << nalloc << " with input: '"
166  << input << "' " << e.what() << std::endl;
167  throw error("Bad allocation request in compress_buffer().");
168  }
169 }
170 
171 } // namespace
172 
173 bool string_span::to_bool(bool default_value) const
174 {
175  if(empty()) {
176  return default_value;
177  }
178 
179  if (operator==("no") || operator==("off") || operator==("false") || operator==("0") || operator==("0.0"))
180  return false;
181 
182  return true;
183 }
184 
186 {
187  const int buf_size = 64;
188  if(size() >= buf_size) {
189  return 0;
190  }
191  char buf[64];
192  memcpy(buf, begin(), size());
193  buf[size()] = 0;
194  return atoi(buf);
195 }
196 
197 std::string string_span::to_string() const
198 {
199  return std::string(begin(), end());
200 }
201 
203 {
204  char* buf = new char[size() + 1];
205  memcpy(buf, begin(), size());
206  buf[size()] = 0;
207  return buf;
208 }
209 
210 error::error(const char* msg)
211  : game::error(msg)
212 {
213  ERR_SWML << "ERROR: '" << msg << "'" << std::endl;
214 }
215 
216 std::ostream& operator<<(std::ostream& o, const string_span& s)
217 {
218  o << std::string(s.begin(), s.end());
219  return o;
220 }
221 
222 node::node(document& doc, node* parent) :
223  doc_(&doc),
224  attr_(),
225  parent_(parent),
226  children_(),
227  ordered_children_(),
228  output_cache_()
229 {
230 }
231 
232 static void maybe_change_textdomain(const char* beginline, const char* endline, string_span& textdomain)
233 {
234  size_t size = endline - beginline;
235  if(size < textdomain_str_size()) {
236  return;
237  }
238  if(strncmp(beginline, textdomain_str(), textdomain_str_size()) != 0) {
239  return;
240  }
241  textdomain = string_span(beginline + textdomain_str_size(), endline - beginline - textdomain_str_size());
242 }
243 #ifdef _MSC_VER
244 #pragma warning (push)
245 #pragma warning (disable: 4706)
246 #endif
247 node::node(document& doc, node* parent, const char** str, int depth, string_span& textdomain) :
248  doc_(&doc),
249  attr_(),
250  parent_(parent),
251  children_(),
253  output_cache_()
254 {
255  if(depth >= 1000) {
256  throw error("elements nested too deep");
257  }
258 
259  const char*& s = *str;
260 
261  const char* const begin = s;
262  while(*s) {
263  switch(*s) {
264  case '[': {
265  if(s[1] == '/') {
266  output_cache_ = string_span(begin, s - begin);
267  s = strchr(s, ']');
268  if(s == nullptr) {
269  throw error("end element unterminated");
270  }
271 
272  ++s;
273  return;
274  }
275 
276  ++s;
277  const char* end = strchr(s, ']');
278  if(end == nullptr) {
279  throw error("unterminated element");
280  }
281 
282  const int list_index = get_children(string_span(s, end - s));
284 
285  s = end + 1;
286 
287  children_[list_index].second.push_back(new node(doc, this, str, depth+1, textdomain));
288  ordered_children_.emplace_back(list_index, children_[list_index].second.size() - 1);
290 
291  break;
292  }
293  case ' ':
294  case '\t':
295  case '\n':
296  ++s;
297  break;
298  case '#': {
299  const char * const endline = strchr(s, '\n');
300  if(endline == nullptr) {
301  throw error("did not find newline after '#'");
302  }
303  maybe_change_textdomain(s, endline, textdomain);
304  s = endline;
305  break;
306  }
307  default: {
308  const char* end = strchr(s, '=');
309  bool is_translatable = false;
310  if(end == nullptr) {
311  ERR_SWML << "attribute: " << s << std::endl;
312  throw error("did not find '=' after attribute");
313  }
314 
315  string_span name(s, end - s);
316  s = end + 1;
317  if(*s == '_') {
318  is_translatable = true;
319  s = strchr(s, '"');
320  if(s == nullptr) {
321  throw error("did not find '\"' after '_'");
322  }
323  }
324 
325  if (*s != '"') {
326  end = strchr(s, '\n');
327  if (!end) {
328  ERR_SWML << "ATTR: '" << name << "' (((" << s << ")))" << std::endl;
329  throw error("did not find end of attribute");
330  }
331  if (memchr(s, '"', end - s))
332  throw error("found stray quotes in unquoted value");
333  goto read_attribute;
334  }
335  end = s;
336  for(;;)
337  {
338  // Read until the first single double quote.
339  while((end = strchr(end+1, '"')) && end[1] == '"') {
340 #ifdef _MSC_VER
341 #pragma warning (pop)
342 #endif
343  ++end;
344  }
345  if(end == nullptr)
346  throw error("did not find end of attribute");
347 
348  // Stop if newline.
349  const char *endline = end + 1;
350  while (*endline == ' ') ++endline;
351  if (*endline == '\n') break;
352 
353  // Read concatenation marker.
354  if (*(endline++) != '+')
355  throw error("did not find newline after end of attribute");
356  if (*(endline++) != '\n')
357  throw error("did not find newline after '+'");
358 
359  // Read textdomain marker.
360  if (*endline == '#') {
361  const char* endline2 = strchr(endline + 1, '\n');
362  if (!endline2) {
363  throw error("did not find newline after '#'");
364  }
365  maybe_change_textdomain(endline, endline2, textdomain);
366  endline = endline2;
367  ++endline;
368  }
369 
370  // Read indentation and start of string.
371  while (*endline == '\t') ++endline;
372  if (*endline == '_') ++endline;
373  if (*endline != '"')
374  throw error("did not find quotes after '+'");
375  end = endline;
376  }
377 
378  ++s;
379 
380  read_attribute:
381  string_span value(s, end - s);
382  if(attr_.empty() == false && !(attr_.back().key < name)) {
383  ERR_SWML << "attributes: '" << attr_.back().key << "' < '" << name << "'" << std::endl;
384  throw error("attributes not in order");
385  }
386 
387  s = end + 1;
388  if(is_translatable)
389  attr_.emplace_back(name, value, textdomain);
390  else
391  attr_.emplace_back(name, value);
392  }
393  }
394  }
395 
396  output_cache_ = string_span(begin, s - begin);
398 }
399 
401 {
402  for(child_map::iterator i = children_.begin(); i != children_.end(); ++i) {
403  for(child_list::iterator j = i->second.begin(); j != i->second.end(); ++j) {
404  debug_delete(*j);
405  }
406  }
407 }
408 
409 namespace {
410 struct string_span_pair_comparer
411 {
412  bool operator()(const string_span& a, const node::attribute& b) const {
413  return a < b.key;
414  }
415 
416  bool operator()(const node::attribute& a, const string_span& b) const {
417  return a.key < b;
418  }
419 
420  bool operator()(const node::attribute& a,
421  const node::attribute& b) const {
422  return a.key < b.key;
423  }
424 };
425 }
426 
427 const string_span& node::operator[](const char* key) const
428 {
429  static string_span empty("");
430  string_span span(key);
431  std::pair<attribute_list::const_iterator,
432  attribute_list::const_iterator> range = std::equal_range(attr_.begin(), attr_.end(), span, string_span_pair_comparer());
433  if(range.first != range.second) {
434  return range.first->value;
435  }
436 
437  return empty;
438 }
439 
440 bool node::has_attr(const char* key) const
441 {
442  string_span span(key);
443  std::pair<attribute_list::const_iterator,
444  attribute_list::const_iterator> range = std::equal_range(attr_.begin(), attr_.end(), span, string_span_pair_comparer());
445  return range.first != range.second;
446 }
447 
448 node& node::set_attr(const char* key, const char* value)
449 {
450  set_dirty();
451 
452  string_span span(key);
453  std::pair<attribute_list::iterator,
454  attribute_list::iterator> range = std::equal_range(attr_.begin(), attr_.end(), span, string_span_pair_comparer());
455  if(range.first != range.second) {
456  range.first->value = string_span(value);
457  } else {
458  attr_.insert(range.first, attribute(span, string_span(value)));
459  }
460 
461  return *this;
462 }
463 
464 node& node::set_attr_dup(const char* key, const char* value)
465 {
466  return set_attr(key, doc_->dup_string(value));
467 }
468 
469 node& node::set_attr_dup(const char* key, const string_span& value)
470 {
471  char* buf = value.duplicate();
473  return set_attr(key, buf);
474 }
475 
476 node& node::set_attr_int(const char* key, int value)
477 {
478  std::string temp = std::to_string(value);
479  return set_attr_dup(key, temp.c_str());
480 }
481 
482 node& node::add_child_at(const char* name, std::size_t index)
483 {
484  set_dirty();
485 
486  const int list_index = get_children(name);
487  child_list& list = children_[list_index].second;
488  if(index > list.size()) {
489  index = list.size();
490  }
491 
493  list.insert(list.begin() + index, new node(*doc_, this));
494  insert_ordered_child(list_index, index);
495 
497  return *list[index];
498 }
499 
500 
501 node& node::add_child(const char* name)
502 {
503  set_dirty();
504 
505  const int list_index = get_children(name);
507  child_list& list = children_[list_index].second;
508  list.push_back(new node(*doc_, this));
509  ordered_children_.emplace_back(list_index, list.size() - 1);
511  return *list.back();
512 }
513 
514 void node::remove_child(const string_span& name, std::size_t index)
515 {
516  set_dirty();
517 
518  //if we don't already have a vector for this item we don't want to add one.
520  if(itor == children_.end()) {
521  return;
522  }
523 
524  child_list& list = itor->second;
525  if(index >= list.size()) {
526  return;
527  }
528 
529  remove_ordered_child(std::distance(children_.begin(), itor), index);
530 
531  debug_delete(list[index]);
532  list.erase(list.begin() + index);
533 
534  if(list.empty()) {
535  remove_ordered_child_list(std::distance(children_.begin(), itor));
536  children_.erase(itor);
537  }
538 }
539 
540 void node::insert_ordered_child(int child_map_index, int child_list_index)
541 {
542  bool inserted = false;
544  while(i != ordered_children_.end()) {
545  if(i->child_map_index == child_map_index && i->child_list_index > child_list_index) {
546  i->child_list_index++;
547  } else if(i->child_map_index == child_map_index && i->child_list_index == child_list_index) {
548  inserted = true;
549  i->child_list_index++;
550  i = ordered_children_.insert(i, node_pos(child_map_index, child_list_index));
551  ++i;
552  }
553 
554  ++i;
555  }
556 
557  if(!inserted) {
558  ordered_children_.emplace_back(child_map_index, child_list_index);
559  }
560 }
561 
562 void node::remove_ordered_child(int child_map_index, int child_list_index)
563 {
564  int erase_count = 0;
566  while(i != ordered_children_.end()) {
567  if(i->child_map_index == child_map_index && i->child_list_index == child_list_index) {
568  i = ordered_children_.erase(i);
569  ++erase_count;
570  } else {
571  if(i->child_map_index == child_map_index && i->child_list_index > child_list_index) {
572  i->child_list_index--;
573  }
574  ++i;
575  }
576  }
577 
578  assert(erase_count == 1);
579 }
580 
581 void node::insert_ordered_child_list(int child_map_index)
582 {
584  while(i != ordered_children_.end()) {
585  if(i->child_map_index >= child_map_index) {
586  i->child_map_index++;
587  }
588  }
589 }
590 
591 void node::remove_ordered_child_list(int child_map_index)
592 {
594  while(i != ordered_children_.end()) {
595  if(i->child_map_index == child_map_index) {
596  assert(false);
597  i = ordered_children_.erase(i);
598  } else {
599  if(i->child_map_index > child_map_index) {
600  i->child_map_index--;
601  }
602 
603  ++i;
604  }
605  }
606 }
607 
609 {
610 // only define this symbol in debug mode to work out child ordering.
611 #ifdef CHECK_ORDERED_CHILDREN
612  std::vector<node_pos>::const_iterator i = ordered_children_.begin();
613  while(i != ordered_children_.end()) {
614  assert(i->child_map_index < children_.size());
615  assert(i->child_list_index < children_[i->child_map_index].second.size());
616  ++i;
617  }
618 
619  for(child_map::const_iterator j = children_.begin(); j != children_.end(); ++j) {
620  const unsigned short child_map_index = j - children_.begin();
621  for(child_list::const_iterator k = j->second.begin(); k != j->second.end(); ++k) {
622  const unsigned short child_list_index = k - j->second.begin();
623  bool found = false;
624  for(int n = 0; n != ordered_children_.size(); ++n) {
625  if(ordered_children_[n].child_map_index == child_map_index &&
626  ordered_children_[n].child_list_index == child_list_index) {
627  found = true;
628  break;
629  }
630  }
631 
632  assert(found);
633  }
634  }
635 #endif // CHECK_ORDERED_CHILDREN
636 }
637 
638 void node::remove_child(const char* name, std::size_t index)
639 {
640  remove_child(string_span(name), index);
641 }
642 
643 node* node::child(const char* name)
644 {
645  for(child_map::iterator i = children_.begin(); i != children_.end(); ++i) {
646  if(i->first == name) {
647  assert(i->second.empty() == false);
648  return i->second.front();
649  }
650  }
651 
652  return nullptr;
653 }
654 
655 const node* node::child(const char* name) const
656 {
657  for(child_map::const_iterator i = children_.begin(); i != children_.end(); ++i) {
658  if(i->first == name) {
659  if(i->second.empty()) {
660  return nullptr;
661  } else {
662  return i->second.front();
663  }
664  }
665  }
666 
667  return nullptr;
668 }
669 
670 const node::child_list& node::children(const char* name) const
671 {
672  for(child_map::const_iterator i = children_.begin(); i != children_.end(); ++i) {
673  if(i->first == name) {
674  return i->second;
675  }
676  }
677 
678  static const node::child_list empty;
679  return empty;
680 }
681 
682 int node::get_children(const char* name)
683 {
684  return get_children(string_span(name));
685 }
686 
688 {
689  for(child_map::iterator i = children_.begin(); i != children_.end(); ++i) {
690  if(i->first == name) {
691  return std::distance(children_.begin(), i);
692  }
693  }
694 
695  children_.emplace_back(string_span(name), child_list());
696  return children_.size() - 1;
697 }
698 
699 node::child_map::const_iterator node::find_in_map(const child_map& m, const string_span& attr)
700 {
701  child_map::const_iterator i = m.begin();
702  for(; i != m.end(); ++i) {
703  if(i->first == attr) {
704  break;
705  }
706  }
707 
708  return i;
709 }
710 
712 {
713  child_map::iterator i = m.begin();
714  for(; i != m.end(); ++i) {
715  if(i->first == attr) {
716  break;
717  }
718  }
719 
720  return i;
721 }
722 
724 {
725  if(children_.empty()) {
726  static const string_span empty;
727  return empty;
728  }
729 
730  return children_.begin()->first;
731 }
732 
733 int node::output_size(string_span& textdomain) const
734 {
736  if(output_cache_.empty() == false) {
737  return output_cache_.size();
738  }
739 
740  int res = 0;
741  for(attribute_list::const_iterator i = attr_.begin(); i != attr_.end(); ++i) {
742  res += i->key.size() + i->value.size() + 4;
743  if(i->is_translatable()) {
744  if(i->textdomain != textdomain) {
745  res += textdomain_str_size() + i->textdomain.size() + 1/* \n */;
746  }
747  textdomain = i->textdomain;
748  res += 1; // "_"
749  }
750  }
751 
752  std::size_t count_children = 0;
753  for(child_map::const_iterator i = children_.begin(); i != children_.end(); ++i) {
754  for(child_list::const_iterator j = i->second.begin(); j != i->second.end(); ++j) {
755  res += i->first.size()*2 + 7;
756  res += (*j)->output_size(textdomain);
757  ++count_children;
758  }
759  }
760 
761  assert(count_children == ordered_children_.size());
762 
763  return res;
764 }
765 
766 void node::shift_buffers(ptrdiff_t offset)
767 {
768  if(!output_cache_.empty()) {
770  }
771 
772  for(std::vector<attribute>::iterator i = attr_.begin(); i != attr_.end(); ++i) {
773  i->key = string_span(i->key.begin() + offset, i->key.size());
774  i->value = string_span(i->value.begin() + offset, i->value.size());
775  i->textdomain = string_span(i->textdomain.begin() + offset, i->textdomain.size());
776  }
777 
778  for(child_map::iterator i = children_.begin(); i != children_.end(); ++i) {
779  string_span& key = i->first;
780  key = string_span(key.begin() + offset, key.size());
781  for(child_list::iterator j = i->second.begin(); j != i->second.end(); ++j) {
782  (*j)->shift_buffers(offset);
783  }
784  }
785 }
786 
787 void node::output(char*& buf, string_span& textdomain, CACHE_STATUS cache_status)
788 {
789  if(output_cache_.empty() == false) {
790  // fixme: this skips over textdomain changes.
791  memcpy(buf, output_cache_.begin(), output_cache_.size());
792  if(cache_status == REFRESH_CACHE) {
794  }
795  buf += output_cache_.size();
796  return;
797  }
798 
799  char* begin = buf;
800 
801  for(std::vector<attribute>::iterator i = attr_.begin(); i != attr_.end(); ++i) {
802 
803  if(i->is_translatable()) {
804  if(textdomain != i->textdomain) {
805  memcpy(buf, textdomain_str(), textdomain_str_size());
806  buf += textdomain_str_size();
807 
808  memcpy(buf, i->textdomain.begin(), i->textdomain.size());
809  textdomain = string_span(buf, i->textdomain.size());
810 
811  buf += i->textdomain.size();
812  *buf++ = '\n';
813  }
814  i->textdomain = textdomain;
815  }
816 
817  memcpy(buf, i->key.begin(), i->key.size());
818  i->key = string_span(buf, i->key.size());
819  buf += i->key.size();
820  *buf++ = '=';
821  if(i->is_translatable()) {
822  *buf++ = '_';
823  }
824  *buf++ = '"';
825  memcpy(buf, i->value.begin(), i->value.size());
826  i->value = string_span(buf, i->value.size());
827  buf += i->value.size();
828  *buf++ = '"';
829  *buf++ = '\n';
830  }
831 
832  for(std::vector<node_pos>::const_iterator i = ordered_children_.begin();
833  i != ordered_children_.end(); ++i) {
834  assert(i->child_map_index < children_.size());
835  assert(i->child_list_index < children_[i->child_map_index].second.size());
836  string_span& attr = children_[i->child_map_index].first;
837  *buf++ = '[';
838  memcpy(buf, attr.begin(), attr.size());
839  attr = string_span(buf, attr.size());
840  buf += attr.size();
841  *buf++ = ']';
842  *buf++ = '\n';
843  children_[i->child_map_index].second[i->child_list_index]->output(buf, textdomain, cache_status);
844  *buf++ = '[';
845  *buf++ = '/';
846  memcpy(buf, attr.begin(), attr.size());
847  buf += attr.size();
848  *buf++ = ']';
849  *buf++ = '\n';
850  }
851 
852  if(cache_status == REFRESH_CACHE) {
853  output_cache_ = string_span(begin, buf - begin);
854  }
855 }
856 
857 std::string node_to_string(const node& n)
858 {
859  //calling output with status=DO_NOT_MODIFY_CACHE really doesn't modify the
860  //node, so we can do it safely
861  string_span textdomain(0, 0);
862  node& mutable_node = const_cast<node&>(n);
863  std::vector<char> v(mutable_node.output_size(textdomain));
864  char* ptr = &v[0];
865  textdomain = string_span(0, 0);
866  mutable_node.output(ptr, textdomain, node::DO_NOT_MODIFY_CACHE);
867  assert(ptr == &v[0] + v.size());
868  return std::string(v.begin(), v.end());
869 }
870 
871 void node::copy_into(node& n) const
872 {
873  n.set_dirty();
874  for(attribute_list::const_iterator i = attr_.begin(); i != attr_.end(); ++i) {
875  char* key = i->key.duplicate();
876  char* value = i->value.duplicate();
878  n.doc_->take_ownership_of_buffer(value);
879  n.set_attr(key, value);
880  }
881 
882  for(std::vector<node_pos>::const_iterator i = ordered_children_.begin();
883  i != ordered_children_.end(); ++i) {
884  assert(i->child_map_index < children_.size());
885  assert(i->child_list_index < children_[i->child_map_index].second.size());
886  char* buf = children_[i->child_map_index].first.duplicate();
888  children_[i->child_map_index].second[i->child_list_index]->copy_into(n.add_child(buf));
889  }
890 }
891 
892 void node::apply_diff(const node& diff)
893 {
894  set_dirty();
895  const node* inserts = diff.child("insert");
896  if(inserts != nullptr) {
897  for(attribute_list::const_iterator i = inserts->attr_.begin(); i != inserts->attr_.end(); ++i) {
898  char* name = i->key.duplicate();
899  char* value = i->value.duplicate();
900  set_attr(name, value);
903  }
904  }
905 
906  const node* deletes = diff.child("delete");
907  if(deletes != nullptr) {
908  for(attribute_list::const_iterator i = deletes->attr_.begin(); i != deletes->attr_.end(); ++i) {
909  std::pair<attribute_list::iterator,
910  attribute_list::iterator> range = std::equal_range(attr_.begin(), attr_.end(), i->key, string_span_pair_comparer());
911  if(range.first != range.second) {
912  attr_.erase(range.first);
913  }
914  }
915  }
916 
917  const child_list& child_changes = diff.children("change_child");
918  for(child_list::const_iterator i = child_changes.begin(); i != child_changes.end(); ++i) {
919  const std::size_t index = (**i)["index"].to_int();
920  for(child_map::const_iterator j = (*i)->children_.begin(); j != (*i)->children_.end(); ++j) {
921  const string_span& name = j->first;
922  for(child_list::const_iterator k = j->second.begin(); k != j->second.end(); ++k) {
924  if(itor != children_.end()) {
925  if(index < itor->second.size()) {
926  itor->second[index]->apply_diff(**k);
927  }
928  }
929  }
930  }
931  }
932 
933  const child_list& child_inserts = diff.children("insert_child");
934  for(child_list::const_iterator i = child_inserts.begin(); i != child_inserts.end(); ++i) {
935  const std::size_t index = (**i)["index"].to_int();
936  for(child_map::const_iterator j = (*i)->children_.begin(); j != (*i)->children_.end(); ++j) {
937  const string_span& name = j->first;
938  for(child_list::const_iterator k = j->second.begin(); k != j->second.end(); ++k) {
939  char* buf = name.duplicate();
941  (*k)->copy_into(add_child_at(buf, index));
942  }
943  }
944  }
945 
946  const child_list& child_deletes = diff.children("delete_child");
947  for(child_list::const_iterator i = child_deletes.begin(); i != child_deletes.end(); ++i) {
948  const std::size_t index = (**i)["index"].to_int();
949  for(child_map::const_iterator j = (*i)->children_.begin(); j != (*i)->children_.end(); ++j) {
950  if(j->second.empty()) {
951  continue;
952  }
953 
954  const string_span& name = j->first;
955  remove_child(name, index);
956  }
957  }
958 }
959 
961 {
962  doc_ = doc;
963 
964  for(child_map::iterator i = children_.begin(); i != children_.end(); ++i) {
965  for(child_list::iterator j = i->second.begin(); j != i->second.end(); ++j) {
966  (*j)->set_doc(doc);
967  }
968  }
969 }
970 
971 int node::nchildren() const
972 {
973  int res = 0;
974  for(child_map::const_iterator i = children_.begin(); i != children_.end(); ++i) {
975  for(child_list::const_iterator j = i->second.begin(); j != i->second.end(); ++j) {
976  ++res;
977  res += (*j)->nchildren();
978  }
979  }
980 
981  return res;
982 }
983 
985 {
986  int res = attr_.capacity();
987  for(child_map::const_iterator i = children_.begin(); i != children_.end(); ++i) {
988  for(child_list::const_iterator j = i->second.begin(); j != i->second.end(); ++j) {
989  res += (*j)->nattributes_recursive();
990  }
991  }
992 
993  return res;
994 }
995 
997 {
998  for(node* n = this; n != nullptr && n->output_cache_.is_null() == false; n = n->parent_) {
999  n->output_cache_ = string_span();
1000  }
1001 }
1002 
1004  compressed_buf_(),
1005  output_(nullptr),
1006  buffers_(),
1007  root_(new node(*this, nullptr)),
1008  prev_(nullptr),
1009  next_(nullptr)
1010 {
1011  attach_list();
1012 }
1013 
1015  compressed_buf_(),
1016  output_(buf),
1017  buffers_(),
1018  root_(nullptr),
1019  prev_(nullptr),
1020  next_(nullptr)
1021 {
1022  if(control == INIT_TAKE_OWNERSHIP) {
1023  buffers_.push_back(buf);
1024  }
1025  const char* cbuf = buf;
1026  string_span textdomain = default_textdomain();
1027  root_ = new node(*this, nullptr, &cbuf, 0, textdomain);
1028 
1029  attach_list();
1030 }
1031 
1032 document::document(const char* buf, INIT_STATE state) :
1033  compressed_buf_(),
1034  output_(buf),
1035  buffers_(),
1036  root_(nullptr),
1037  prev_(nullptr),
1038  next_(nullptr)
1039 {
1040  if(state == INIT_COMPRESSED) {
1042  output_ = nullptr;
1043  } else {
1044  string_span textdomain = default_textdomain();
1045  root_ = new node(*this, nullptr, &buf, 0, textdomain);
1046  }
1047 
1048  attach_list();
1049 }
1050 
1052  compressed_buf_(compressed_buf),
1053  output_(nullptr),
1054  buffers_(),
1055  root_(nullptr),
1056  prev_(nullptr),
1057  next_(nullptr)
1058 {
1059  string_span uncompressed_buf;
1060  buffers_.push_back(uncompress_buffer(compressed_buf, &uncompressed_buf));
1061  output_ = uncompressed_buf.begin();
1062  const char* cbuf = output_;
1063  try {
1064  string_span textdomain(0, 0);
1065  root_ = new node(*this, nullptr, &cbuf, 0, textdomain);
1066  } catch(...) {
1067  delete [] buffers_.front();
1068  buffers_.clear();
1069  throw;
1070  }
1071 
1072  attach_list();
1073 }
1074 
1076 {
1077  for(std::vector<char*>::iterator i = buffers_.begin(); i != buffers_.end(); ++i) {
1078  delete [] *i;
1079  }
1080 
1081  buffers_.clear();
1082  debug_delete(root_);
1083 
1084  detach_list();
1085 }
1086 
1087 const char* document::dup_string(const char* str)
1088 {
1089  const int len = strlen(str);
1090  char* res = new char[len+1];
1091  memcpy(res, str, len + 1);
1092  buffers_.push_back(res);
1093  return res;
1094 }
1095 
1096 const char* document::output()
1097 {
1098  if(output_ && (!root_ || root_->is_dirty() == false)) {
1099  return output_;
1100  }
1101  if(!root_) {
1102  assert(compressed_buf_.empty() == false);
1103  string_span uncompressed_buf;
1104  buffers_.push_back(uncompress_buffer(compressed_buf_, &uncompressed_buf));
1105  output_ = uncompressed_buf.begin();
1106  return output_;
1107  }
1108 
1109  //we're dirty, so the compressed buf must also be dirty; clear it.
1111 
1112  std::vector<char*> bufs;
1113  bufs.swap(buffers_);
1114 
1115  string_span textdomain(0, 0);
1116  const int buf_size = root_->output_size(textdomain) + 1;
1117  char* buf;
1118  try {
1119  buf = new char[buf_size];
1120  } catch (const std::bad_alloc& e) {
1121  ERR_SWML << "ERROR: Trying to allocate " << buf_size << " bytes. "
1122  << e.what() << std::endl;
1123  throw error("Bad allocation request in output().");
1124  }
1125  buffers_.push_back(buf);
1126  output_ = buf;
1127 
1128  textdomain = string_span(0, 0);
1129  root_->output(buf, textdomain, node::REFRESH_CACHE);
1130  *buf++ = 0;
1131  assert(buf == output_ + buf_size);
1132 
1133  for(std::vector<char*>::iterator i = bufs.begin(); i != bufs.end(); ++i) {
1134  delete [] *i;
1135  }
1136 
1137  bufs.clear();
1138 
1139  return output_;
1140 }
1141 
1143 {
1144  if(compressed_buf_.empty() == false &&
1145  (root_ == nullptr || root_->is_dirty() == false)) {
1146  assert(*compressed_buf_.begin() == (bzip2 ? 'B' : 31));
1147  return compressed_buf_;
1148  }
1149 
1150  buffers_.push_back(compress_buffer(output(), &compressed_buf_, bzip2));
1151  assert(*compressed_buf_.begin() == (bzip2 ? 'B' : 31));
1152 
1153  return compressed_buf_;
1154 }
1155 
1157 {
1159  debug_delete(root_);
1160  root_ = nullptr;
1161  output_ = nullptr;
1162  std::vector<char*> new_buffers;
1163  for(std::vector<char*>::iterator i = buffers_.begin(); i != buffers_.end(); ++i) {
1164  if(*i != compressed_buf_.begin()) {
1165  delete [] *i;
1166  } else {
1167  new_buffers.push_back(*i);
1168  }
1169  }
1170 
1171  buffers_.swap(new_buffers);
1172  assert(buffers_.size() == 1);
1173 }
1174 
1176 {
1177  if(output_ == nullptr) {
1178  assert(compressed_buf_.empty() == false);
1179  string_span uncompressed_buf;
1180  buffers_.push_back(uncompress_buffer(compressed_buf_, &uncompressed_buf));
1181  output_ = uncompressed_buf.begin();
1182  }
1183 
1184  assert(root_ == nullptr);
1185  const char* cbuf = output_;
1186  string_span textdomain(0, 0);
1187  root_ = new node(*this, nullptr, &cbuf, 0, textdomain);
1188 }
1189 
1191 {
1192  char* buf = new char[strlen(output())+1];
1193  strcpy(buf, output());
1194  return new document(buf);
1195 }
1196 
1198 {
1201  buffers_.swap(o.buffers_);
1202  std::swap(root_, o.root_);
1203 
1204  root_->set_doc(this);
1205  o.root_->set_doc(&o);
1206 }
1207 
1209 {
1211  output_ = nullptr;
1212  debug_delete(root_);
1213  root_ = new node(*this, nullptr);
1214  for(std::vector<char*>::iterator i = buffers_.begin(); i != buffers_.end(); ++i) {
1215  delete [] *i;
1216  }
1217 
1218  buffers_.clear();
1219 }
1220 
1221 namespace {
1222 document* head_doc = nullptr;
1223 }
1224 
1226 {
1227  prev_ = nullptr;
1228  next_ = head_doc;
1229 
1230  if(next_) {
1231  next_->prev_ = this;
1232  }
1233  head_doc = this;
1234 }
1235 
1237 {
1238  if(head_doc == this) {
1239  head_doc = next_;
1240  }
1241 
1242  if(next_) {
1243  next_->prev_ = prev_;
1244  }
1245 
1246  if(prev_) {
1247  prev_->next_ = next_;
1248  }
1249  next_ = prev_ = nullptr;
1250 }
1251 
1252 std::string document::stats()
1253 {
1254  std::ostringstream s;
1255  int ndocs = 0;
1256  int ncompressed = 0;
1257  int compressed_size = 0;
1258  int ntext = 0;
1259  int text_size = 0;
1260  int nbuffers = 0;
1261  int nnodes = 0;
1262  int ndirty = 0;
1263  int nattributes = 0;
1264  for(document* d = head_doc; d != nullptr; d = d->next_) {
1265  ndocs++;
1266  nbuffers += d->buffers_.size();
1267 
1268  if(d->compressed_buf_.is_null() == false) {
1269  ++ncompressed;
1270  compressed_size += d->compressed_buf_.size();
1271  }
1272 
1273  if(d->output_) {
1274  ++ntext;
1275  text_size += strlen(d->output_);
1276  }
1277 
1278  if(d->root_) {
1279  nnodes += 1 + d->root_->nchildren();
1280  nattributes += d->root_->nattributes_recursive();
1281  }
1282 
1283  if(d->root_ && d->root_->is_dirty()) {
1284  ++ndirty;
1285  }
1286  }
1287 
1288  const int nodes_alloc = nnodes*(sizeof(node) + 12);
1289  const int attr_alloc = nattributes*(sizeof(string_span)*2);
1290  const int total_alloc = compressed_size + text_size + nodes_alloc + attr_alloc;
1291 
1292  s << "WML documents: " << ndocs << "\n"
1293  << "Dirty: " << ndirty << "\n"
1294  << "With compression: " << ncompressed << " (" << compressed_size
1295  << " bytes)\n"
1296  << "With text: " << ntext << " (" << text_size
1297  << " bytes)\n"
1298  << "Nodes: " << nnodes << " (" << nodes_alloc << " bytes)\n"
1299  << "Attr: " << nattributes << " (" << attr_alloc << " bytes)\n"
1300  << "Buffers: " << nbuffers << "\n"
1301  << "Total allocation: " << total_alloc << " bytes\n";
1302 
1303  return s.str();
1304 }
1305 
1306 void swap(document& lhs, document& rhs)
1307 {
1308  lhs.swap(rhs);
1309 }
1310 
1311 }
1312 
1313 #ifdef UNIT_TEST_SIMPLE_WML
1314 
1315 int main(int argc, char** argv)
1316 {
1317  char* doctext = strdup(
1318 "[test]\n"
1319 "a=\"blah\"\n"
1320 "b=\"blah\"\n"
1321 "c=\"\\\\\"\n"
1322 "d=\"\\\"\"\n"
1323 "[/test]");
1324  std::cerr << doctext << "\n";
1325  simple_wml::document doc(doctext);
1326 
1327  simple_wml::node& node = doc.root();
1328  simple_wml::node* test_node = node.child("test");
1329  assert(test_node);
1330  assert((*test_node)["a"] == "blah");
1331  assert((*test_node)["b"] == "blah");
1332  assert((*test_node)["c"] == "\\\\");
1333  assert((*test_node)["d"] == "\\\"");
1334 
1335  node.set_attr("blah", "blah");
1336  test_node->set_attr("e", "f");
1337  std::cerr << doc.output();
1338 }
1339 
1340 #endif
void remove_ordered_child(int child_map_index, int child_list_index)
Definition: simple_wml.cpp:562
node & add_child(const char *name)
Definition: simple_wml.cpp:501
const string_span & attr(const char *key) const
Definition: simple_wml.hpp:132
string_span compressed_buf_
Definition: simple_wml.hpp:300
string_span output_compressed(bool bzip2=false)
std::string to_string() const
Definition: simple_wml.cpp:197
std::ostream & operator<<(std::ostream &o, const string_span &s)
Definition: simple_wml.cpp:216
void apply_diff(const node &diff)
Definition: simple_wml.cpp:892
void shift_buffers(ptrdiff_t offset)
Definition: simple_wml.cpp:766
bool has_attr(const char *key) const
Definition: simple_wml.cpp:440
node(document &doc, node *parent)
Definition: simple_wml.cpp:222
void output(char *&buf, string_span &textdomain, CACHE_STATUS status=DO_NOT_MODIFY_CACHE)
Definition: simple_wml.cpp:787
void insert_ordered_child(int child_map_index, int child_list_index)
Definition: simple_wml.cpp:540
static l_noret error(LoadState *S, const char *why)
Definition: lundump.cpp:39
#define a
string_span output_cache_
Definition: simple_wml.hpp:229
static child_map::const_iterator find_in_map(const child_map &m, const string_span &attr)
Definition: simple_wml.cpp:699
std::vector< char * > buffers_
Definition: simple_wml.hpp:302
char * duplicate() const
Definition: simple_wml.cpp:202
const char * end() const
Definition: simple_wml.hpp:90
void check_ordered_children() const
Definition: simple_wml.cpp:608
int nchildren() const
Definition: simple_wml.cpp:971
node & set_attr_int(const char *key, int value)
Definition: simple_wml.cpp:476
const char * dup_string(const char *str)
node & set_attr(const char *key, const char *value)
Definition: simple_wml.cpp:448
bool to_bool(bool default_value=false) const
Definition: simple_wml.cpp:173
int get_children(const string_span &name)
Definition: simple_wml.cpp:687
node & add_child_at(const char *name, std::size_t index)
Definition: simple_wml.cpp:482
static void msg(const char *act, debug_info &i, const char *to="", const char *result="")
Definition: debugger.cpp:108
attribute_list attr_
Definition: simple_wml.hpp:197
#define d
#define ERR_SWML
Definition: simple_wml.cpp:29
const child_list & children(const char *name) const
Definition: simple_wml.cpp:670
document * doc_
Definition: simple_wml.hpp:194
std::vector< child_pair > child_map
Definition: simple_wml.hpp:202
void insert_ordered_child_list(int child_map_index)
Definition: simple_wml.cpp:581
const string_span & operator[](const char *key) const
Definition: simple_wml.cpp:427
#define b
bool is_dirty() const
Definition: simple_wml.hpp:165
unsigned in
If equal to search_counter, the node is off the list.
const char * output()
node * child(const char *name)
Definition: simple_wml.cpp:643
const char * begin() const
Definition: simple_wml.hpp:89
std::size_t size(const std::string &str)
Length in characters of a UTF-8 string.
Definition: unicode.cpp:86
const t_string name
static void maybe_change_textdomain(const char *beginline, const char *endline, string_span &textdomain)
Definition: simple_wml.cpp:232
void remove_ordered_child_list(int child_map_index)
Definition: simple_wml.cpp:591
child_map children_
Definition: simple_wml.hpp:206
static std::size_t document_size_limit
Definition: simple_wml.hpp:294
const string_span & first_child() const
Definition: simple_wml.cpp:723
void swap(document &o)
const char * output_
Definition: simple_wml.hpp:301
static std::string stats()
std::size_t i
Definition: function.cpp:933
node & set_attr_dup(const char *key, const char *value)
Definition: simple_wml.cpp:464
int nattributes_recursive() const
Definition: simple_wml.cpp:984
int main()
static map_location::DIRECTION s
void remove_child(const char *name, std::size_t index)
Definition: simple_wml.cpp:638
std::vector< node * > child_list
Definition: simple_wml.hpp:129
std::size_t index(const std::string &str, const std::size_t index)
Codepoint index corresponding to the nth character in a UTF-8 string.
Definition: unicode.cpp:71
Standard logging facilities (interface).
void set_doc(document *doc)
Definition: simple_wml.cpp:960
#define e
std::vector< node_pos > ordered_children_
Definition: simple_wml.hpp:220
void swap(document &lhs, document &rhs)
Implement non-member swap function for std::swap (calls document::swap).
void copy_into(node &n) const
Definition: simple_wml.cpp:871
static map_location::DIRECTION n
void take_ownership_of_buffer(char *buffer)
Definition: simple_wml.hpp:285
std::string::const_iterator iterator
Definition: tokenizer.hpp:24
std::string node_to_string(const node &n)
Definition: simple_wml.cpp:857
static lg::log_domain log_config("config")
int output_size(string_span &textdomain) const
Definition: simple_wml.cpp:733