The Battle for Wesnoth  1.19.7+dev
tstring.cpp
Go to the documentation of this file.
1 /*
2  Copyright (C) 2004 - 2024
3  by Guillaume Melquiond <guillaume.melquiond@gmail.com>
4  Copyright (C) 2004 by Philippe Plantier <ayin@anathas.org>
5  Part of the Battle for Wesnoth Project https://www.wesnoth.org/
6 
7  This program is free software; you can redistribute it and/or modify
8  it under the terms of the GNU General Public License as published by
9  the Free Software Foundation; either version 2 of the License, or
10  (at your option) any later version.
11  This program is distributed in the hope that it will be useful,
12  but WITHOUT ANY WARRANTY.
13 
14  See the COPYING file for more details.
15 */
16 
17 /**
18  * @file
19  * Routines for translatable strings.
20  */
21 
22 #include "tstring.hpp"
23 
24 #include "gettext.hpp"
25 #include "log.hpp"
26 
27 #include <boost/multi_index/hashed_index.hpp>
28 
29 #include <map>
30 #include <vector>
31 
32 static lg::log_domain log_config("config");
33 #define LOG_CF LOG_STREAM(info, log_config)
34 #define ERR_CF LOG_STREAM(err, log_config)
35 
36 static unsigned language_counter = 0;
37 
38 namespace
39 {
40 const char TRANSLATABLE_PART = 0x01;
41 const char UNTRANSLATABLE_PART = 0x02;
42 const char TEXTDOMAIN_SEPARATOR = 0x03;
43 const char ID_TRANSLATABLE_PART = 0x04;
44 const char PLURAL_PART = 0x05;
45 }
46 
47 std::size_t t_string_base::hash_value() const
48 {
49  std::size_t seed = 0;
50  boost::hash_combine(seed, value_);
51  boost::hash_combine(seed, translatable_);
52  boost::hash_combine(seed, last_untranslatable_);
53  return seed;
54 }
55 
57  : string_(string.value_)
58  , begin_(0)
59  , end_(string_.size())
60  , textdomain_()
61  , translatable_(false)
62  , countable_(false)
63  , count_(0)
64 {
65  if(string.translatable_) {
66  update();
67  }
68 }
69 
70 static std::string mark = std::string(TRANSLATABLE_PART, 1) + UNTRANSLATABLE_PART + ID_TRANSLATABLE_PART + PLURAL_PART;
71 
73 {
74  unsigned int id;
75 
76  if(begin_ == string_.size()) {
77  return;
78  }
79 
80  switch(string_[begin_]) {
81  case TRANSLATABLE_PART: {
82  // Format: [TRANSLATABLE_PART]textdomain[TEXTDOMAIN_SEPARATOR]msgid[...]
83  std::string::size_type textdomain_end = string_.find(TEXTDOMAIN_SEPARATOR, begin_ + 1);
84 
85  if(textdomain_end == std::string::npos || textdomain_end >= string_.size() - 1) {
86  ERR_CF << "Error: invalid string: " << string_;
87  begin_ = string_.size();
88  return;
89  }
90 
91  end_ = string_.find_first_of(mark, textdomain_end + 1);
92  if(end_ == std::string::npos) {
93  end_ = string_.size();
94  }
95 
96  textdomain_ = std::string(string_, begin_ + 1, textdomain_end - begin_ - 1);
97  translatable_ = true;
98  begin_ = textdomain_end + 1;
99 
100  break;
101  }
102  case ID_TRANSLATABLE_PART:
103  // Format: [ID_TRANSLATABLE_PART][2-byte textdomain ID]msgid[...]
104  if(begin_ + 3 >= string_.size()) {
105  ERR_CF << "Error: invalid string: " << string_;
106  begin_ = string_.size();
107  return;
108  }
109 
110  end_ = string_.find_first_of(mark, begin_ + 3);
111  if(end_ == std::string::npos) {
112  end_ = string_.size();
113  }
114 
115  id = static_cast<unsigned char>(string_[begin_ + 1]) + static_cast<unsigned char>(string_[begin_ + 2]) * 256;
116  if(id >= id_to_textdomain.size()) {
117  ERR_CF << "Error: invalid string: " << string_;
118  begin_ = string_.size();
119  return;
120  }
121 
122  textdomain_ = id_to_textdomain[id];
123  begin_ += 3;
124  translatable_ = true;
125 
126  break;
127 
128  case UNTRANSLATABLE_PART:
129  end_ = string_.find_first_of(mark, begin_ + 1);
130  if(end_ == std::string::npos) {
131  end_ = string_.size();
132  }
133 
134  if(end_ <= begin_ + 1) {
135  ERR_CF << "Error: invalid string: " << string_;
136  begin_ = string_.size();
137  return;
138  }
139 
140  translatable_ = false;
141  textdomain_ = "";
142  begin_ += 1;
143  break;
144 
145  case PLURAL_PART:
146  begin_ = string_.find_first_of(mark, end_ + 5);
147  if(begin_ == std::string::npos) {
148  begin_ = string_.size();
149  }
150 
151  if(string_[begin_] == PLURAL_PART) {
152  ERR_CF << "Error: invalid string: " << string_;
153  begin_ = string_.size();
154  return;
155  }
156 
157  update();
158  break;
159 
160  default:
161  end_ = string_.size();
162  translatable_ = false;
163  textdomain_ = "";
164  break;
165  }
166 
167  if(translatable_ && string_[end_] == PLURAL_PART) {
168  // Format: [PLURAL_PART][4-byte count]msgid_plural[...]
169  if(end_ + 5 >= string_.size()) {
170  ERR_CF << "Error: invalid string: " << string_;
171  begin_ = string_.size();
172  return;
173  }
174 
175  std::string::size_type real_end = string_.find_first_of(mark, end_ + 6);
176  if(real_end < string_.size() && string_[real_end] == PLURAL_PART) {
177  ERR_CF << "Error: invalid string: " << string_;
178  begin_ = string_.size();
179  return;
180  }
181 
182  countable_ = true;
183 
184  union {
185  int32_t count;
186  char data[4];
187  } cvt;
188 
189  std::copy_n(string_.data() + end_ + 1, 4, cvt.data);
190  count_ = cvt.count;
191  } else {
192  countable_ = false;
193  count_ = 0;
194  }
195 }
196 
197 std::string::const_iterator t_string_base::walker::plural_begin() const
198 {
199  if(!countable_) {
200  return begin();
201  }
202 
203  return end() + 5;
204 }
205 
206 std::string::const_iterator t_string_base::walker::plural_end() const
207 {
208  if(!countable_) {
209  return end();
210  }
211 
212  std::string::size_type pl_end = string_.find_first_of(mark, end_ + 5);
213  if(pl_end == std::string::npos) {
214  pl_end = string_.size();
215  }
216 
217  return string_.begin() + pl_end;
218 }
219 
221  : value_()
224  , translatable_(false)
225  , last_untranslatable_(false)
226 {
227 }
228 
230 {
231 }
232 
234  : value_(string.value_)
235  , translated_value_(string.translated_value_)
236  , translation_timestamp_(string.translation_timestamp_)
237  , translatable_(string.translatable_)
238  , last_untranslatable_(string.last_untranslatable_)
239 {
240 }
241 
242 t_string_base::t_string_base(const std::string& string)
243  : value_(string)
244  , translated_value_()
245  , translation_timestamp_(0)
246  , translatable_(false)
247  , last_untranslatable_(false)
248 {
249 }
250 
251 t_string_base::t_string_base(std::string&& string)
252  : value_(std::move(string))
253  , translated_value_()
254  , translation_timestamp_(0)
255  , translatable_(false)
256  , last_untranslatable_(false)
257 {
258 }
259 
260 t_string_base::t_string_base(const std::string& string, const std::string& textdomain)
261  : value_(1, ID_TRANSLATABLE_PART)
262  , translated_value_()
263  , translation_timestamp_(0)
264  , translatable_(true)
265  , last_untranslatable_(false)
266 {
267  if(string.empty()) {
268  value_.clear();
269  translatable_ = false;
270  return;
271  }
272 
273  std::map<std::string, unsigned int>::const_iterator idi = textdomain_to_id.find(textdomain);
274  unsigned int id;
275 
276  if(idi == textdomain_to_id.end()) {
277  id = id_to_textdomain.size();
278  textdomain_to_id[textdomain] = id;
279  id_to_textdomain.push_back(textdomain);
280  } else {
281  id = idi->second;
282  }
283 
284  value_ += static_cast<char>(id & 0xff);
285  value_ += static_cast<char>(id >> 8);
286  value_ += string;
287 }
288 
289 t_string_base::t_string_base(const std::string& sing, const std::string& pl, int count, const std::string& textdomain)
290  : value_(1, ID_TRANSLATABLE_PART)
291  , translated_value_()
292  , translation_timestamp_(0)
293  , translatable_(true)
294  , last_untranslatable_(false)
295 {
296  if(sing.empty() && pl.empty()) {
297  value_.clear();
298  translatable_ = false;
299  return;
300  }
301 
302  std::map<std::string, unsigned int>::const_iterator idi = textdomain_to_id.find(textdomain);
303  unsigned int id;
304 
305  if(idi == textdomain_to_id.end()) {
306  id = id_to_textdomain.size();
307  textdomain_to_id[textdomain] = id;
308  id_to_textdomain.push_back(textdomain);
309  } else {
310  id = idi->second;
311  }
312 
313  value_ += static_cast<char>(id & 0xff);
314  value_ += static_cast<char>(id >> 8);
315  value_ += sing;
316  value_ += PLURAL_PART;
317 
318  union {
319  int32_t count;
320  char data[4];
321  } cvt;
322 
323  cvt.count = count;
324  for(char c : cvt.data) {
325  value_ += c;
326  }
327 
328  value_ += pl;
329 }
330 
331 t_string_base::t_string_base(const char* string)
332  : value_(string)
333  , translated_value_()
334  , translation_timestamp_(0)
335  , translatable_(false)
336  , last_untranslatable_(false)
337 {
338 }
339 
341 {
342  t_string_base orig(string);
343 
344  if(!string.empty() && (string[0] == TRANSLATABLE_PART || string[0] == UNTRANSLATABLE_PART)) {
345  orig.translatable_ = true;
346  } else {
347  orig.translatable_ = false;
348  }
349 
350  t_string_base res;
351 
352  for(walker w(orig); !w.eos(); w.next()) {
353  std::string substr(w.begin(), w.end());
354 
355  if(w.translatable()) {
356  res += t_string_base(substr, w.textdomain());
357  } else {
358  res += substr;
359  }
360  }
361 
362  return res;
363 }
364 
365 std::string t_string_base::base_str() const
366 {
367  std::string res;
368  for(walker w(*this); !w.eos(); w.next()) {
369  res += std::string(w.begin(), w.end());
370  }
371 
372  return res;
373 }
374 
375 std::string t_string_base::to_serialized() const
376 {
377  t_string_base res;
378 
379  for(walker w(*this); !w.eos(); w.next()) {
380  t_string_base chunk;
381 
382  std::string substr(w.begin(), w.end());
383  if(w.translatable()) {
384  chunk.translatable_ = true;
385  chunk.last_untranslatable_ = false;
386  chunk.value_ = TRANSLATABLE_PART + w.textdomain() + TEXTDOMAIN_SEPARATOR + substr;
387  } else {
388  chunk.translatable_ = false;
389  chunk.value_ = substr;
390  }
391 
392  res += chunk;
393  }
394 
395  return res.value();
396 }
397 
399 {
400  value_ = string.value_;
401  translated_value_ = string.translated_value_;
402  translation_timestamp_ = string.translation_timestamp_;
403  translatable_ = string.translatable_;
404  last_untranslatable_ = string.last_untranslatable_;
405 
406  return *this;
407 }
408 
409 t_string_base& t_string_base::operator=(const std::string& string)
410 {
411  value_ = string;
412  translated_value_ = "";
414  translatable_ = false;
415  last_untranslatable_ = false;
416 
417  return *this;
418 }
419 
421 {
422  value_ = string;
423  translated_value_ = "";
425  translatable_ = false;
426  last_untranslatable_ = false;
427 
428  return *this;
429 }
430 
432 {
433  t_string_base res(*this);
434  res += string;
435  return res;
436 }
437 
438 t_string_base t_string_base::operator+(const std::string& string) const
439 {
440  t_string_base res(*this);
441  res += string;
442  return res;
443 }
444 
445 t_string_base t_string_base::operator+(const char* string) const
446 {
447  t_string_base res(*this);
448  res += string;
449  return res;
450 }
451 
453 {
454  if(string.value_.empty()) {
455  return *this;
456  }
457 
458  if(value_.empty()) {
459  *this = string;
460  return *this;
461  }
462 
463  if(translatable_ || string.translatable_) {
464  if(!translatable_) {
465  value_ = UNTRANSLATABLE_PART + value_;
466  translatable_ = true;
467  last_untranslatable_ = true;
468  } else {
469  translated_value_ = "";
470  }
471 
472  if(string.translatable_) {
473  if(last_untranslatable_ && string.value_[0] == UNTRANSLATABLE_PART) {
474  value_.append(string.value_.begin() + 1, string.value_.end());
475  } else {
476  value_ += string.value_;
477  }
478 
479  last_untranslatable_ = string.last_untranslatable_;
480  } else {
481  if(!last_untranslatable_) {
482  value_ += UNTRANSLATABLE_PART;
483  last_untranslatable_ = true;
484  }
485 
486  value_ += string.value_;
487  }
488  } else {
489  value_ += string.value_;
490  }
491 
492  return *this;
493 }
494 
495 t_string_base& t_string_base::operator+=(const std::string& string)
496 {
497  if(string.empty()) {
498  return *this;
499  }
500 
501  if(value_.empty()) {
502  *this = string;
503  return *this;
504  }
505 
506  if(translatable_) {
507  if(!last_untranslatable_) {
508  value_ += UNTRANSLATABLE_PART;
509  last_untranslatable_ = true;
510  }
511 
512  value_ += string;
513  translated_value_ = "";
514  } else {
515  value_ += string;
516  }
517 
518  return *this;
519 }
520 
522 {
523  if(string[0] == 0) {
524  return *this;
525  }
526 
527  if(value_.empty()) {
528  *this = string;
529  return *this;
530  }
531 
532  if(translatable_) {
533  if(!last_untranslatable_) {
534  value_ += UNTRANSLATABLE_PART;
535  last_untranslatable_ = true;
536  }
537 
538  value_ += string;
539  translated_value_ = "";
540  } else {
541  value_ += string;
542  }
543 
544  return *this;
545 }
546 
548 {
549  return that.translatable_ == translatable_ && that.value_ == value_;
550 }
551 
552 bool t_string_base::operator==(const std::string& that) const
553 {
554  return !translatable_ && value_ == that;
555 }
556 
557 bool t_string_base::operator==(const char* that) const
558 {
559  return !translatable_ && value_ == that;
560 }
561 
563 {
564  return value_ < that.value_;
565 }
566 
567 const std::string& t_string_base::str() const
568 {
569  if(!translatable_) {
570  return value_;
571  }
572 
574  return translated_value_;
575  }
576 
577  translated_value_.clear();
578 
579  for(walker w(*this); !w.eos(); w.next()) {
580  std::string part(w.begin(), w.end());
581 
582  if(w.translatable()) {
583  if(w.countable()) {
584  std::string plural(w.plural_begin(), w.plural_end());
586  translation::dsngettext(w.textdomain().c_str(), part.c_str(), plural.c_str(), w.count());
587  } else {
589  translation::dsgettext(w.textdomain().c_str(), part.c_str());
590  }
591  } else {
592  translated_value_ += part;
593  }
594  }
595 
597  return translated_value_;
598 }
599 
601  : val_(new base())
602 {
603 }
604 
606 {
607 }
608 
610  : val_(o.val_)
611 {
612 }
613 
615  : val_(new base(o))
616 {
617 }
618 
619 t_string::t_string(const char* o)
620  : val_(new base(o))
621 {
622 }
623 
624 t_string::t_string(const std::string& o)
625  : val_(new base(o))
626 {
627 }
628 
629 t_string::t_string(std::string&& o)
630  : val_(new base(std::move(o)))
631 {
632 }
633 
634 t_string::t_string(const std::string& o, const std::string& textdomain)
635  : val_(new base(o, textdomain))
636 {
637 }
638 
639 t_string::t_string(const std::string& s, const std::string& pl, int c, const std::string& textdomain)
640  : val_(new base(s, pl, c, textdomain))
641 {
642 }
643 
645 {
646  val_ = o.val_;
647  return *this;
648 }
649 
651 {
652  t_string o2(o);
653  swap(o2);
654  return *this;
655 }
656 
657 void t_string::add_textdomain(const std::string& name, const std::string& path)
658 {
659  LOG_CF << "Binding textdomain " << name << " to path " << path;
660 
661  // Register and (re-)bind this textdomain
662  translation::bind_textdomain(name.c_str(), path.c_str(), "UTF-8");
663 }
664 
666 {
668 }
669 
670 void swap(t_string& lhs, t_string& rhs)
671 {
672  lhs.swap(rhs);
673 }
674 
675 std::ostream& operator<<(std::ostream& stream, const t_string_base& string)
676 {
677  stream << string.str();
678  return stream;
679 }
const route_iterator begin_
Definition: move.cpp:387
std::string::const_iterator plural_end() const
Definition: tstring.cpp:206
walker(const t_string_base &string)
Definition: tstring.cpp:56
std::string::const_iterator plural_begin() const
Definition: tstring.cpp:197
Helper class for translatable strings.
Definition: tstring.hpp:27
t_string_base & operator=(const t_string_base &)
Default implementation, but defined out-of-line for efficiency reasons.
Definition: tstring.cpp:398
bool last_untranslatable_
Definition: tstring.hpp:122
static std::map< std::string, unsigned int > textdomain_to_id
Definition: tstring.hpp:124
std::string translated_value_
Definition: tstring.hpp:120
t_string_base & operator+=(const t_string_base &)
Definition: tstring.cpp:452
std::string to_serialized() const
Definition: tstring.cpp:375
bool operator==(const t_string_base &) const
Definition: tstring.cpp:547
std::string value_
Definition: tstring.hpp:119
std::string base_str() const
Definition: tstring.cpp:365
const std::string & value() const
Definition: tstring.hpp:113
bool empty() const
Definition: tstring.hpp:103
bool operator<(const t_string_base &string) const
Definition: tstring.cpp:562
static std::vector< std::string > id_to_textdomain
Definition: tstring.hpp:123
~t_string_base()
Default implementation, but defined out-of-line for efficiency reasons.
Definition: tstring.cpp:229
unsigned translation_timestamp_
Definition: tstring.hpp:121
bool translatable_
Definition: tstring.hpp:122
std::size_t hash_value() const
Definition: tstring.cpp:47
const std::string & str() const
Definition: tstring.cpp:567
static t_string_base from_serialized(const std::string &string)
Definition: tstring.cpp:340
t_string_base operator+(const t_string_base &) const
Definition: tstring.cpp:431
static void reset_translations()
Definition: tstring.cpp:665
static void add_textdomain(const std::string &name, const std::string &path)
Definition: tstring.cpp:657
void swap(t_string &other)
Definition: tstring.hpp:208
~t_string()
Default implementation, but defined out-of-line for efficiency reasons.
Definition: tstring.cpp:605
std::shared_ptr< const t_string_base > val_
Definition: tstring.hpp:212
t_string & operator=(const t_string &)
Default implementation, but defined out-of-line for efficiency reasons.
Definition: tstring.cpp:644
t_string()
Default implementation, but defined out-of-line for efficiency reasons.
Definition: tstring.cpp:600
int w
std::string id
Text to match against addon_info.tags()
Definition: manager.cpp:198
Standard logging facilities (interface).
static void update()
std::string path
Definition: filesystem.cpp:91
void bind_textdomain(const char *domain, const char *directory, const char *)
Definition: gettext.cpp:479
std::string dsgettext(const char *domainname, const char *msgid)
Definition: gettext.cpp:434
std::string dsngettext(const char *domainname, const char *singular, const char *plural, int n)
Definition: gettext.cpp:464
std::size_t size(std::string_view str)
Length in characters of a UTF-8 string.
Definition: unicode.cpp:85
std::string_view data
Definition: picture.cpp:178
mock_char c
static map_location::direction s
static std::string mark
Definition: tstring.cpp:70
static unsigned language_counter
Definition: tstring.cpp:36
void swap(t_string &lhs, t_string &rhs)
Implement non-member swap function for std::swap (calls t_string::swap).
Definition: tstring.cpp:670
std::ostream & operator<<(std::ostream &stream, const t_string_base &string)
Definition: tstring.cpp:675
#define LOG_CF
Definition: tstring.cpp:33
#define ERR_CF
Definition: tstring.cpp:34
static lg::log_domain log_config("config")