The Battle for Wesnoth  1.17.21+dev
tstring.cpp
Go to the documentation of this file.
1 /*
2  Copyright (C) 2004 - 2023
3  by Guillaume Melquiond <guillaume.melquiond@gmail.com>
4  Copyright (C) 2004 by Philippe Plantier <ayin@anathas.org>
5  Part of the Battle for Wesnoth Project https://www.wesnoth.org/
6 
7  This program is free software; you can redistribute it and/or modify
8  it under the terms of the GNU General Public License as published by
9  the Free Software Foundation; either version 2 of the License, or
10  (at your option) any later version.
11  This program is distributed in the hope that it will be useful,
12  but WITHOUT ANY WARRANTY.
13 
14  See the COPYING file for more details.
15 */
16 
17 /**
18  * @file
19  * Routines for translatable strings.
20  */
21 
22 #include "tstring.hpp"
23 
24 #include "gettext.hpp"
25 #include "log.hpp"
26 
27 #include <boost/multi_index/hashed_index.hpp>
28 
29 #include <map>
30 #include <mutex>
31 #include <vector>
32 
33 static lg::log_domain log_config("config");
34 #define LOG_CF LOG_STREAM(info, log_config)
35 #define ERR_CF LOG_STREAM(err, log_config)
36 
37 static unsigned language_counter = 0;
38 
39 namespace
40 {
41 const char TRANSLATABLE_PART = 0x01;
42 const char UNTRANSLATABLE_PART = 0x02;
43 const char TEXTDOMAIN_SEPARATOR = 0x03;
44 const char ID_TRANSLATABLE_PART = 0x04;
45 const char PLURAL_PART = 0x05;
46 }
47 
48 std::size_t t_string_base::hash_value() const
49 {
50  std::size_t seed = 0;
51  boost::hash_combine(seed, value_);
52  boost::hash_combine(seed, translatable_);
53  boost::hash_combine(seed, last_untranslatable_);
54  return seed;
55 }
56 
58  : string_(string.value_)
59  , begin_(0)
60  , end_(string_.size())
61  , textdomain_()
62  , translatable_(false)
63  , countable_(false)
64  , count_(0)
65 {
66  if(string.translatable_) {
67  update();
68  }
69 }
70 
71 static std::string mark = std::string(TRANSLATABLE_PART, 1) + UNTRANSLATABLE_PART + ID_TRANSLATABLE_PART + PLURAL_PART;
72 
74 {
75  unsigned int id;
76 
77  if(begin_ == string_.size()) {
78  return;
79  }
80 
81  switch(string_[begin_]) {
82  case TRANSLATABLE_PART: {
83  // Format: [TRANSLATABLE_PART]textdomain[TEXTDOMAIN_SEPARATOR]msgid[...]
84  std::string::size_type textdomain_end = string_.find(TEXTDOMAIN_SEPARATOR, begin_ + 1);
85 
86  if(textdomain_end == std::string::npos || textdomain_end >= string_.size() - 1) {
87  ERR_CF << "Error: invalid string: " << string_;
88  begin_ = string_.size();
89  return;
90  }
91 
92  end_ = string_.find_first_of(mark, textdomain_end + 1);
93  if(end_ == std::string::npos) {
94  end_ = string_.size();
95  }
96 
97  textdomain_ = std::string(string_, begin_ + 1, textdomain_end - begin_ - 1);
98  translatable_ = true;
99  begin_ = textdomain_end + 1;
100 
101  break;
102  }
103  case ID_TRANSLATABLE_PART:
104  // Format: [ID_TRANSLATABLE_PART][2-byte textdomain ID]msgid[...]
105  if(begin_ + 3 >= string_.size()) {
106  ERR_CF << "Error: invalid string: " << string_;
107  begin_ = string_.size();
108  return;
109  }
110 
111  end_ = string_.find_first_of(mark, begin_ + 3);
112  if(end_ == std::string::npos) {
113  end_ = string_.size();
114  }
115 
116  id = static_cast<unsigned char>(string_[begin_ + 1]) + static_cast<unsigned char>(string_[begin_ + 2]) * 256;
117  if(id >= id_to_textdomain.size()) {
118  ERR_CF << "Error: invalid string: " << string_;
119  begin_ = string_.size();
120  return;
121  }
122 
123  textdomain_ = id_to_textdomain[id];
124  begin_ += 3;
125  translatable_ = true;
126 
127  break;
128 
129  case UNTRANSLATABLE_PART:
130  end_ = string_.find_first_of(mark, begin_ + 1);
131  if(end_ == std::string::npos) {
132  end_ = string_.size();
133  }
134 
135  if(end_ <= begin_ + 1) {
136  ERR_CF << "Error: invalid string: " << string_;
137  begin_ = string_.size();
138  return;
139  }
140 
141  translatable_ = false;
142  textdomain_ = "";
143  begin_ += 1;
144  break;
145 
146  case PLURAL_PART:
147  begin_ = string_.find_first_of(mark, end_ + 5);
148  if(begin_ == std::string::npos) {
149  begin_ = string_.size();
150  }
151 
152  if(string_[begin_] == PLURAL_PART) {
153  ERR_CF << "Error: invalid string: " << string_;
154  begin_ = string_.size();
155  return;
156  }
157 
158  update();
159  break;
160 
161  default:
162  end_ = string_.size();
163  translatable_ = false;
164  textdomain_ = "";
165  break;
166  }
167 
168  if(translatable_ && string_[end_] == PLURAL_PART) {
169  // Format: [PLURAL_PART][4-byte count]msgid_plural[...]
170  if(end_ + 5 >= string_.size()) {
171  ERR_CF << "Error: invalid string: " << string_;
172  begin_ = string_.size();
173  return;
174  }
175 
176  std::string::size_type real_end = string_.find_first_of(mark, end_ + 6);
177  if(real_end < string_.size() && string_[real_end] == PLURAL_PART) {
178  ERR_CF << "Error: invalid string: " << string_;
179  begin_ = string_.size();
180  return;
181  }
182 
183  countable_ = true;
184 
185  union {
186  int32_t count;
187  char data[4];
188  } cvt;
189 
190  std::copy_n(string_.data() + end_ + 1, 4, cvt.data);
191  count_ = cvt.count;
192  } else {
193  countable_ = false;
194  count_ = 0;
195  }
196 }
197 
198 std::string::const_iterator t_string_base::walker::plural_begin() const
199 {
200  if(!countable_) {
201  return begin();
202  }
203 
204  return end() + 5;
205 }
206 
207 std::string::const_iterator t_string_base::walker::plural_end() const
208 {
209  if(!countable_) {
210  return end();
211  }
212 
213  std::string::size_type pl_end = string_.find_first_of(mark, end_ + 5);
214  if(pl_end == std::string::npos) {
215  pl_end = string_.size();
216  }
217 
218  return string_.begin() + pl_end;
219 }
220 
222  : value_()
225  , translatable_(false)
226  , last_untranslatable_(false)
227 {
228 }
229 
231 {
232 }
233 
235  : value_(string.value_)
236  , translated_value_(string.translated_value_)
237  , translation_timestamp_(string.translation_timestamp_)
238  , translatable_(string.translatable_)
239  , last_untranslatable_(string.last_untranslatable_)
240 {
241 }
242 
243 t_string_base::t_string_base(const std::string& string)
244  : value_(string)
245  , translated_value_()
246  , translation_timestamp_(0)
247  , translatable_(false)
248  , last_untranslatable_(false)
249 {
250 }
251 
252 t_string_base::t_string_base(const std::string& string, const std::string& textdomain)
253  : value_(1, ID_TRANSLATABLE_PART)
254  , translated_value_()
255  , translation_timestamp_(0)
256  , translatable_(true)
257  , last_untranslatable_(false)
258 {
259  if(string.empty()) {
260  value_.clear();
261  translatable_ = false;
262  return;
263  }
264 
265  std::map<std::string, unsigned int>::const_iterator idi = textdomain_to_id.find(textdomain);
266  unsigned int id;
267 
268  if(idi == textdomain_to_id.end()) {
269  id = id_to_textdomain.size();
270  textdomain_to_id[textdomain] = id;
271  id_to_textdomain.push_back(textdomain);
272  } else {
273  id = idi->second;
274  }
275 
276  value_ += static_cast<char>(id & 0xff);
277  value_ += static_cast<char>(id >> 8);
278  value_ += string;
279 }
280 
281 t_string_base::t_string_base(const std::string& sing, const std::string& pl, int count, const std::string& textdomain)
282  : value_(1, ID_TRANSLATABLE_PART)
283  , translated_value_()
284  , translation_timestamp_(0)
285  , translatable_(true)
286  , last_untranslatable_(false)
287 {
288  if(sing.empty() && pl.empty()) {
289  value_.clear();
290  translatable_ = false;
291  return;
292  }
293 
294  std::map<std::string, unsigned int>::const_iterator idi = textdomain_to_id.find(textdomain);
295  unsigned int id;
296 
297  if(idi == textdomain_to_id.end()) {
298  id = id_to_textdomain.size();
299  textdomain_to_id[textdomain] = id;
300  id_to_textdomain.push_back(textdomain);
301  } else {
302  id = idi->second;
303  }
304 
305  value_ += static_cast<char>(id & 0xff);
306  value_ += static_cast<char>(id >> 8);
307  value_ += sing;
308  value_ += PLURAL_PART;
309 
310  union {
311  int32_t count;
312  char data[4];
313  } cvt;
314 
315  cvt.count = count;
316  for(char c : cvt.data) {
317  value_ += c;
318  }
319 
320  value_ += pl;
321 }
322 
323 t_string_base::t_string_base(const char* string)
324  : value_(string)
325  , translated_value_()
326  , translation_timestamp_(0)
327  , translatable_(false)
328  , last_untranslatable_(false)
329 {
330 }
331 
333 {
334  t_string_base orig(string);
335 
336  if(!string.empty() && (string[0] == TRANSLATABLE_PART || string[0] == UNTRANSLATABLE_PART)) {
337  orig.translatable_ = true;
338  } else {
339  orig.translatable_ = false;
340  }
341 
342  t_string_base res;
343 
344  for(walker w(orig); !w.eos(); w.next()) {
345  std::string substr(w.begin(), w.end());
346 
347  if(w.translatable()) {
348  res += t_string_base(substr, w.textdomain());
349  } else {
350  res += substr;
351  }
352  }
353 
354  return res;
355 }
356 
357 std::string t_string_base::base_str() const
358 {
359  std::string res;
360  for(walker w(*this); !w.eos(); w.next()) {
361  res += std::string(w.begin(), w.end());
362  }
363 
364  return res;
365 }
366 
367 std::string t_string_base::to_serialized() const
368 {
369  t_string_base res;
370 
371  for(walker w(*this); !w.eos(); w.next()) {
372  t_string_base chunk;
373 
374  std::string substr(w.begin(), w.end());
375  if(w.translatable()) {
376  chunk.translatable_ = true;
377  chunk.last_untranslatable_ = false;
378  chunk.value_ = TRANSLATABLE_PART + w.textdomain() + TEXTDOMAIN_SEPARATOR + substr;
379  } else {
380  chunk.translatable_ = false;
381  chunk.value_ = substr;
382  }
383 
384  res += chunk;
385  }
386 
387  return res.value();
388 }
389 
391 {
392  value_ = string.value_;
393  translated_value_ = string.translated_value_;
394  translation_timestamp_ = string.translation_timestamp_;
395  translatable_ = string.translatable_;
396  last_untranslatable_ = string.last_untranslatable_;
397 
398  return *this;
399 }
400 
401 t_string_base& t_string_base::operator=(const std::string& string)
402 {
403  value_ = string;
404  translated_value_ = "";
406  translatable_ = false;
407  last_untranslatable_ = false;
408 
409  return *this;
410 }
411 
413 {
414  value_ = string;
415  translated_value_ = "";
417  translatable_ = false;
418  last_untranslatable_ = false;
419 
420  return *this;
421 }
422 
424 {
425  t_string_base res(*this);
426  res += string;
427  return res;
428 }
429 
430 t_string_base t_string_base::operator+(const std::string& string) const
431 {
432  t_string_base res(*this);
433  res += string;
434  return res;
435 }
436 
437 t_string_base t_string_base::operator+(const char* string) const
438 {
439  t_string_base res(*this);
440  res += string;
441  return res;
442 }
443 
445 {
446  if(string.value_.empty()) {
447  return *this;
448  }
449 
450  if(value_.empty()) {
451  *this = string;
452  return *this;
453  }
454 
455  if(translatable_ || string.translatable_) {
456  if(!translatable_) {
457  value_ = UNTRANSLATABLE_PART + value_;
458  translatable_ = true;
459  last_untranslatable_ = true;
460  } else {
461  translated_value_ = "";
462  }
463 
464  if(string.translatable_) {
465  if(last_untranslatable_ && string.value_[0] == UNTRANSLATABLE_PART) {
466  value_.append(string.value_.begin() + 1, string.value_.end());
467  } else {
468  value_ += string.value_;
469  }
470 
471  last_untranslatable_ = string.last_untranslatable_;
472  } else {
473  if(!last_untranslatable_) {
474  value_ += UNTRANSLATABLE_PART;
475  last_untranslatable_ = true;
476  }
477 
478  value_ += string.value_;
479  }
480  } else {
481  value_ += string.value_;
482  }
483 
484  return *this;
485 }
486 
487 t_string_base& t_string_base::operator+=(const std::string& string)
488 {
489  if(string.empty()) {
490  return *this;
491  }
492 
493  if(value_.empty()) {
494  *this = string;
495  return *this;
496  }
497 
498  if(translatable_) {
499  if(!last_untranslatable_) {
500  value_ += UNTRANSLATABLE_PART;
501  last_untranslatable_ = true;
502  }
503 
504  value_ += string;
505  translated_value_ = "";
506  } else {
507  value_ += string;
508  }
509 
510  return *this;
511 }
512 
514 {
515  if(string[0] == 0) {
516  return *this;
517  }
518 
519  if(value_.empty()) {
520  *this = string;
521  return *this;
522  }
523 
524  if(translatable_) {
525  if(!last_untranslatable_) {
526  value_ += UNTRANSLATABLE_PART;
527  last_untranslatable_ = true;
528  }
529 
530  value_ += string;
531  translated_value_ = "";
532  } else {
533  value_ += string;
534  }
535 
536  return *this;
537 }
538 
540 {
541  return that.translatable_ == translatable_ && that.value_ == value_;
542 }
543 
544 bool t_string_base::operator==(const std::string& that) const
545 {
546  return !translatable_ && value_ == that;
547 }
548 
549 bool t_string_base::operator==(const char* that) const
550 {
551  return !translatable_ && value_ == that;
552 }
553 
555 {
556  return value_ < that.value_;
557 }
558 
559 const std::string& t_string_base::str() const
560 {
561  if(!translatable_) {
562  return value_;
563  }
564 
566  return translated_value_;
567  }
568 
569  translated_value_.clear();
570 
571  for(walker w(*this); !w.eos(); w.next()) {
572  std::string part(w.begin(), w.end());
573 
574  if(w.translatable()) {
575  if(w.countable()) {
576  std::string plural(w.plural_begin(), w.plural_end());
578  translation::dsngettext(w.textdomain().c_str(), part.c_str(), plural.c_str(), w.count());
579  } else {
581  translation::dsgettext(w.textdomain().c_str(), part.c_str());
582  }
583  } else {
584  translated_value_ += part;
585  }
586  }
587 
589  return translated_value_;
590 }
591 
593  : val_(new base())
594 {
595 }
596 
598 {
599 }
600 
602  : val_(o.val_)
603 {
604 }
605 
607  : val_(new base(o))
608 {
609 }
610 
611 t_string::t_string(const char* o)
612  : val_(new base(o))
613 {
614 }
615 
616 t_string::t_string(const std::string& o)
617  : val_(new base(o))
618 {
619 }
620 
621 t_string::t_string(const std::string& o, const std::string& textdomain)
622  : val_(new base(o, textdomain))
623 {
624 }
625 
626 t_string::t_string(const std::string& s, const std::string& pl, int c, const std::string& textdomain)
627  : val_(new base(s, pl, c, textdomain))
628 {
629 }
630 
632 {
633  val_ = o.val_;
634  return *this;
635 }
636 
638 {
639  t_string o2(o);
640  swap(o2);
641  return *this;
642 }
643 
644 void t_string::add_textdomain(const std::string& name, const std::string& path)
645 {
646  LOG_CF << "Binding textdomain " << name << " to path " << path;
647 
648  // Register and (re-)bind this textdomain
649  translation::bind_textdomain(name.c_str(), path.c_str(), "UTF-8");
650 }
651 
653 {
655 }
656 
657 void swap(t_string& lhs, t_string& rhs)
658 {
659  lhs.swap(rhs);
660 }
661 
662 std::ostream& operator<<(std::ostream& stream, const t_string_base& string)
663 {
664  stream << string.str();
665  return stream;
666 }
const route_iterator begin_
Definition: move.cpp:298
std::string::const_iterator plural_end() const
Definition: tstring.cpp:207
walker(const t_string_base &string)
Definition: tstring.cpp:57
std::string::const_iterator plural_begin() const
Definition: tstring.cpp:198
t_string_base & operator=(const t_string_base &)
Default implementation, but defined out-of-line for efficiency reasons.
Definition: tstring.cpp:390
bool last_untranslatable_
Definition: tstring.hpp:120
static std::map< std::string, unsigned int > textdomain_to_id
Definition: tstring.hpp:122
std::string translated_value_
Definition: tstring.hpp:118
t_string_base & operator+=(const t_string_base &)
Definition: tstring.cpp:444
std::string to_serialized() const
Definition: tstring.cpp:367
bool operator==(const t_string_base &) const
Definition: tstring.cpp:539
std::string value_
Definition: tstring.hpp:117
std::string base_str() const
Definition: tstring.cpp:357
const std::string & value() const
Definition: tstring.hpp:111
bool empty() const
Definition: tstring.hpp:101
bool operator<(const t_string_base &string) const
Definition: tstring.cpp:554
static std::vector< std::string > id_to_textdomain
Definition: tstring.hpp:121
~t_string_base()
Default implementation, but defined out-of-line for efficiency reasons.
Definition: tstring.cpp:230
unsigned translation_timestamp_
Definition: tstring.hpp:119
bool translatable_
Definition: tstring.hpp:120
std::size_t hash_value() const
Definition: tstring.cpp:48
const std::string & str() const
Definition: tstring.cpp:559
static t_string_base from_serialized(const std::string &string)
Definition: tstring.cpp:332
t_string_base operator+(const t_string_base &) const
Definition: tstring.cpp:423
static void reset_translations()
Definition: tstring.cpp:652
static void add_textdomain(const std::string &name, const std::string &path)
Definition: tstring.cpp:644
void swap(t_string &other)
Definition: tstring.hpp:201
~t_string()
Default implementation, but defined out-of-line for efficiency reasons.
Definition: tstring.cpp:597
std::shared_ptr< const t_string_base > val_
Definition: tstring.hpp:205
t_string & operator=(const t_string &)
Default implementation, but defined out-of-line for efficiency reasons.
Definition: tstring.cpp:631
t_string()
Default implementation, but defined out-of-line for efficiency reasons.
Definition: tstring.cpp:592
int w
std::string id
Text to match against addon_info.tags()
Definition: manager.cpp:215
Standard logging facilities (interface).
static void update()
std::string path
Definition: filesystem.cpp:86
void bind_textdomain(const char *domain, const char *directory, const char *)
Definition: gettext.cpp:481
std::string dsgettext(const char *domainname, const char *msgid)
Definition: gettext.cpp:436
std::string dsngettext(const char *domainname, const char *singular, const char *plural, int n)
Definition: gettext.cpp:466
std::size_t size(const std::string &str)
Length in characters of a UTF-8 string.
Definition: unicode.cpp:87
std::string_view data
Definition: picture.cpp:199
mock_char c
static map_location::DIRECTION s
static std::string mark
Definition: tstring.cpp:71
static unsigned language_counter
Definition: tstring.cpp:37
void swap(t_string &lhs, t_string &rhs)
Implement non-member swap function for std::swap (calls t_string::swap).
Definition: tstring.cpp:657
std::ostream & operator<<(std::ostream &stream, const t_string_base &string)
Definition: tstring.cpp:662
#define LOG_CF
Definition: tstring.cpp:34
#define ERR_CF
Definition: tstring.cpp:35
static lg::log_domain log_config("config")