The Battle for Wesnoth  1.17.10+dev
gettext.cpp
Go to the documentation of this file.
1 /*
2  Copyright (C) 2003 - 2022
3  by David White <dave@whitevine.net>
4  Part of the Battle for Wesnoth Project https://www.wesnoth.org/
5 
6  This program is free software; you can redistribute it and/or modify
7  it under the terms of the GNU General Public License as published by
8  the Free Software Foundation; either version 2 of the License, or
9  (at your option) any later version.
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY.
12 
13  See the COPYING file for more details.
14 */
15 #include "gettext.hpp"
16 #include "log.hpp"
17 #include "filesystem.hpp"
18 
19 #include <algorithm>
20 #include <iomanip>
21 #include <iterator>
22 #include <fstream>
23 #include <locale>
24 #include <map>
25 #include <mutex>
26 #include <boost/locale.hpp>
27 #include <set>
28 #include <type_traits>
29 
30 #if defined(__GNUC__)
31 #pragma GCC diagnostic push
32 #pragma GCC diagnostic ignored "-Wmissing-field-initializers"
33 #endif
34 #include "spirit_po/spirit_po.hpp"
35 #if defined(__GNUC__)
36 #pragma GCC diagnostic pop
37 #endif
38 
39 #define DBG_G LOG_STREAM(debug, lg::general())
40 #define LOG_G LOG_STREAM(info, lg::general())
41 #define WRN_G LOG_STREAM(warn, lg::general())
42 #define ERR_G LOG_STREAM(err, lg::general())
43 
44 namespace bl = boost::locale;
45 namespace
46 {
47  std::mutex& get_mutex() { static std::mutex* m = new std::mutex(); return *m; }
48 
49  class default_utf8_locale_name
50  {
51  public:
52  static const std::string& name()
53  {
54  //Use pointers because we don't want it to be destructed at program end.
55  static default_utf8_locale_name* lname = new default_utf8_locale_name();
56  return lname->name_;
57  }
58  private:
59  default_utf8_locale_name()
60  : name_()
61  {
62  LOG_G << "Generating default locale";
63  try
64  {
65  //NOTE: the default_locale objects needs to live as least as long as the locale_info object. Otherwise the program will segfault.
66  std::locale default_locale = bl::generator().generate("");
67  const bl::info& locale_info = std::use_facet<bl::info>(default_locale);
68  name_ += locale_info.language();
69  if(!locale_info.country().empty())
70  name_ += "_" + locale_info.country();
71  name_ += ".UTF-8";
72  if(!locale_info.variant().empty())
73  name_ += "@" + locale_info.variant();
74  }
75  catch(const std::exception& e)
76  {
77  ERR_G << "Failed to generate default locale string. message:" << e.what();
78  }
79  LOG_G << "Finished generating default locale, default is now '" << name_ << "'";
80  }
81 
82  std::string name_;
83  };
84  class wesnoth_message_format : public bl::message_format<char>
85  {
86  public:
87  wesnoth_message_format(std::locale base, const std::set<std::string>& domains, const std::set<std::string>& paths)
88  : base_loc_(base)
89  {
90  const bl::info& inf = std::use_facet<bl::info>(base);
91  if(inf.language() == "c") {
92  return;
93  }
94  std::string lang_name_short = inf.language();
95  std::string lang_name_long = lang_name_short;
96  if(!inf.country().empty()) {
97  lang_name_long += '_';
98  lang_name_long += inf.country();
99  }
100  if(!inf.variant().empty()) {
101  lang_name_long += '@';
102  lang_name_long += inf.variant();
103  lang_name_short += '@';
104  lang_name_short += inf.variant();
105  }
106  DBG_G << "Loading po files for language " << lang_name_long;
107  for(auto& domain : domains) {
108  DBG_G << "Searching for po files for domain " << domain;
109  std::string path;
110  for(auto base_path : paths) {
111  DBG_G << "Searching in dir " << base_path;
112  if(base_path[base_path.length()-1] != '/') {
113  base_path += '/';
114  }
115  base_path += domain;
116  base_path += '/';
117  path = base_path + lang_name_long + ".po";
118  DBG_G << " Trying path " << path;
119  if(filesystem::file_exists(path)) {
120  break;
121  }
122  path = base_path + lang_name_short + ".po";
123  DBG_G << " Trying path " << path;
124  if(filesystem::file_exists(path)) {
125  break;
126  }
127  }
128  if(!filesystem::file_exists(path)) {
129  continue;
130  }
131  LOG_G << "Loading language file from " << path;
132  try {
134  po_file->exceptions(std::ios::badbit);
135  const auto& cat = spirit_po::default_catalog::from_istream(*po_file);
136  extra_messages_.emplace(get_base().domain(domain), cat);
137  } catch(const spirit_po::catalog_exception& e) {
138  // Treat any parsing error in the same way as the file not existing - just leave
139  // this domain untranslated but continue to load other domains.
140  log_po_error(lang_name_long, domain, e.what());
141  } catch(const std::ios::failure&) {
142  log_po_error(lang_name_long, domain, strerror(errno));
143  }
144  }
145  }
146 
147  static void log_po_error(const std::string& lang, const std::string& dom, const std::string& detail) {
148  ERR_G << "Error opening language file for " << lang << ", textdomain " << dom
149  << ":\n " << detail;
150  }
151 
152  const char* get(int domain_id, const char* ctx, const char* msg_id) const override
153  {
154  auto& base = get_base();
155  const char* msg = base.get(domain_id, ctx, msg_id);
156  if(msg == nullptr) {
157  auto iter = extra_messages_.find(domain_id);
158  if(iter == extra_messages_.end()) {
159  return nullptr;
160  }
161  auto& catalog = iter->second;
162  const char* lookup = ctx ? catalog.pgettext(ctx, msg_id) : catalog.gettext(msg_id);
163  if(lookup != msg_id) {
164  // (p)gettext returns the input pointer if the string was not found
165  msg = lookup;
166  }
167  }
168  return msg;
169  }
170 
171  const char* get(int domain_id, const char* ctx, const char* sid, int n) const override
172  {
173  auto& base = get_base();
174  const char* msg = base.get(domain_id, ctx, sid, n);
175  if(msg == nullptr) {
176  auto iter = extra_messages_.find(domain_id);
177  if(iter == extra_messages_.end()) {
178  return nullptr;
179  }
180  auto& catalog = iter->second;
181  const char* lookup = ctx ? catalog.npgettext(ctx, sid, sid, n) : catalog.ngettext(sid, sid, n);
182  if(lookup != sid) {
183  // n(p)gettext returns one of the input pointers if the string was not found
184  msg = lookup;
185  }
186  }
187  return msg;
188  }
189 
190  int domain(const std::string& domain) const override
191  {
192  auto& base = get_base();
193  return base.domain(domain);
194  }
195 
196  const char* convert(const char* msg, std::string& buffer) const override
197  {
198  auto& base = get_base();
199  return base.convert(msg, buffer);
200  }
201  private:
202  const bl::message_format<char>& get_base() const
203  {
204  return std::use_facet<bl::message_format<char>>(base_loc_);
205  }
206 
207  std::locale base_loc_;
208  std::map<int, spirit_po::default_catalog> extra_messages_;
209  };
210  struct translation_manager
211  {
212  translation_manager()
213  : loaded_paths_()
214  , loaded_domains_()
215  , current_language_(default_utf8_locale_name::name())
216  , generator_()
217  , current_locale_()
218  , is_dirty_(true)
219  {
220  const bl::localization_backend_manager& g_mgr = bl::localization_backend_manager::global();
221  for(const std::string& name : g_mgr.get_all_backends())
222  {
223  LOG_G << "Found boost locale backend: '" << name << "'";
224  }
225 
226  generator_.use_ansi_encoding(false);
227 #if BOOST_VERSION < 108100
228  generator_.categories(bl::message_facet | bl::information_facet | bl::collation_facet | bl::formatting_facet | bl::convert_facet);
229  generator_.characters(bl::char_facet);
230 #else
231  generator_.categories(bl::category_t::message | bl::category_t::information | bl::category_t::collation | bl::category_t::formatting | bl::category_t::convert);
232  generator_.characters(bl::char_facet_t::char_f);
233 #endif
234  // We cannot have current_locale_ be a non boost-generated locale since it might not supply
235  // the bl::info facet. As soon as we add message paths, update_locale_internal might fail,
236  // for example because of invalid .mo files. So make sure we call it at least once before adding paths/domains
237  update_locale_internal();
238  }
239 
240  void add_messages_domain(const std::string& domain)
241  {
242  if(loaded_domains_.find(domain) != loaded_domains_.end())
243  {
244  return;
245  }
246 
247  if(domain.find('/') != std::string::npos)
248  {
249  // Forward slash has a specific meaning in Boost.Locale domain
250  // names, specifying the encoding. We use UTF-8 for everything
251  // so we can't possibly support that, and odds are it's a user
252  // mistake (as in bug #23839).
253  ERR_G << "illegal textdomain name '" << domain
254  << "', skipping textdomain";
255  return;
256  }
257 
258  generator_.add_messages_domain(domain);
259  loaded_domains_.insert(domain);
260  }
261 
262  void add_messages_path(const std::string& path)
263  {
264  if(loaded_paths_.find(path) != loaded_paths_.end())
265  {
266  return;
267  }
268  generator_.add_messages_path(path);
269  loaded_paths_.insert(path);
270  }
271 
272  void set_default_messages_domain(const std::string& domain)
273  {
274  generator_.set_default_messages_domain(domain);
275  update_locale();
276  }
277 
278  void set_language(const std::string& language)
279  {
280  std::string::size_type at_pos = language.rfind('@');
281  if(language.empty())
282  {
283  current_language_ = default_utf8_locale_name::name();
284  }
285  else if(at_pos != std::string::npos)
286  {
287  current_language_ = language.substr(0, at_pos) + ".UTF-8" + language.substr(at_pos);
288  }
289  else
290  {
291  current_language_ = language + ".UTF-8";
292  }
293  update_locale();
294  }
295 
296  void update_locale()
297  {
298  is_dirty_ = true;
299  }
300 
301  /* This is called three times: once during the constructor, before any .mo files' paths have
302  * been added to the generator, once after adding the mainline .mo files, and once more
303  * after adding all add-ons. Corrupt .mo files might make the called functions throw, and so
304  * this might fail as soon as we've added message paths.
305  *
306  * Throwing exceptions from here is (in 1.15.18) going to end up in wesnoth.cpp's "Caught
307  * general ... exception" handler, so the effect of letting an exception escape this
308  * function is an immediate exit. Given that, it doesn't seem useful to change the assert
309  * to a throw, at least not within the 1.16 branch.
310  *
311  * Postcondition: current_locale_ is a valid boost-generated locale, supplying the bl::info
312  * facet. If there are corrupt .mo files, the locale might have no translations loaded.
313  */
314  void update_locale_internal()
315  {
316  try
317  {
318  LOG_G << "attempting to generate locale by name '" << current_language_ << "'";
319  current_locale_ = generator_.generate(current_language_);
320  current_locale_ = std::locale(current_locale_, new wesnoth_message_format(current_locale_, loaded_domains_, loaded_paths_));
321  const bl::info& info = std::use_facet<bl::info>(current_locale_);
322  LOG_G << "updated locale to '" << current_language_ << "' locale is now '" << current_locale_.name() << "' ( "
323  << "name='" << info.name()
324  << "' country='" << info.country()
325  << "' language='" << info.language()
326  << "' encoding='" << info.encoding()
327  << "' variant='" << info.variant() << "')";
328  }
329  catch(const bl::conv::conversion_error&)
330  {
331  assert(std::has_facet<bl::info>(current_locale_));
332  const bl::info& info = std::use_facet<bl::info>(current_locale_);
333  ERR_G << "Failed to update locale due to conversion error, locale is now: "
334  << "name='" << info.name()
335  << "' country='" << info.country()
336  << "' language='" << info.language()
337  << "' encoding='" << info.encoding()
338  << "' variant='" << info.variant()
339  << "'";
340  }
341  catch(const std::runtime_error&)
342  {
343  assert(std::has_facet<bl::info>(current_locale_));
344  const bl::info& info = std::use_facet<bl::info>(current_locale_);
345  ERR_G << "Failed to update locale due to runtime error, locale is now: "
346  << "name='" << info.name()
347  << "' country='" << info.country()
348  << "' language='" << info.language()
349  << "' encoding='" << info.encoding()
350  << "' variant='" << info.variant()
351  << "'";
352  }
353  is_dirty_ = false;
354  }
355 
356  std::string debug_description()
357  {
358  std::stringstream res;
359  const bl::localization_backend_manager& g_mgr = bl::localization_backend_manager::global();
360  for(const std::string& name : g_mgr.get_all_backends())
361  {
362  res << "has backend: '" << name << "',";
363  }
364  if(std::has_facet<bl::info>(current_locale_)) {
365  const bl::info& info = std::use_facet<bl::info>(current_locale_);
366  res << " locale: (name='" << info.name()
367  << "' country='" << info.country()
368  << "' language='" << info.language()
369  << "' encoding='" << info.encoding()
370  << "' variant='" << info.variant()
371  << "'),";
372  }
373  if(std::has_facet<bl::collator<char>>(current_locale_)) {
374  res << "has bl::collator<char> facet, ";
375  }
376 #if BOOST_VERSION < 108100
377  res << "generator categories='" << generator_.categories() << "'";
378 #else
379  res << "generator categories='" <<
380  static_cast<std::underlying_type<bl::category_t>::type>(generator_.categories()) << "'";
381 #endif
382  return res.str();
383  }
384 
385  const std::locale& get_locale()
386  {
387  if(is_dirty_)
388  {
389  update_locale_internal();
390  }
391  return current_locale_;
392  }
393 
394  private:
395  std::set<std::string> loaded_paths_;
396  std::set<std::string> loaded_domains_;
397  std::string current_language_;
398  bl::generator generator_;
399  std::locale current_locale_;
400  bool is_dirty_;
401  };
402 
403  translation_manager& get_manager()
404  {
405  static translation_manager* mng = new translation_manager();
406  return *mng;
407  }
408 
409  // Converts ASCII letters to lowercase. Ignores Unicode letters.
410  std::string ascii_to_lowercase(const std::string& str)
411  {
412  std::string result;
413  result.reserve(str.length());
414  std::transform(str.begin(), str.end(), std::back_inserter(result), [](char c)
415  {
416  return c >= 'A' && c <= 'Z' ? c | 0x20 : c;
417  });
418  return result;
419  }
420 }
421 
422 namespace translation
423 {
424 
425 std::string dgettext(const char* domain, const char* msgid)
426 {
427  std::scoped_lock lock(get_mutex());
428  return bl::dgettext(domain, msgid, get_manager().get_locale());
429 }
430 std::string egettext(char const *msgid)
431 {
432  std::scoped_lock lock(get_mutex());
433  return msgid[0] == '\0' ? msgid : bl::gettext(msgid, get_manager().get_locale());
434 }
435 
436 std::string dsgettext (const char * domainname, const char *msgid)
437 {
438  std::string msgval = dgettext (domainname, msgid);
439  if (msgval == msgid) {
440  const char* firsthat = std::strchr (msgid, '^');
441  if (firsthat == nullptr)
442  msgval = msgid;
443  else
444  msgval = firsthat + 1;
445  }
446  return msgval;
447 }
448 
449 namespace {
450 
451 inline const char* is_unlocalized_string2(const std::string& str, const char* singular, const char* plural)
452 {
453  if (str == singular) {
454  return singular;
455  }
456 
457  if (str == plural) {
458  return plural;
459  }
460 
461  return nullptr;
462 }
463 
464 }
465 
466 std::string dsngettext (const char * domainname, const char *singular, const char *plural, int n)
467 {
468  //TODO: only the next line needs to be in the lock.
469  std::scoped_lock lock(get_mutex());
470  std::string msgval = bl::dngettext(domainname, singular, plural, n, get_manager().get_locale());
471  auto original = is_unlocalized_string2(msgval, singular, plural);
472  if (original) {
473  const char* firsthat = std::strchr (original, '^');
474  if (firsthat == nullptr)
475  msgval = original;
476  else
477  msgval = firsthat + 1;
478  }
479  return msgval;
480 }
481 
482 void bind_textdomain(const char* domain, const char* directory, const char* /*encoding*/)
483 {
484  LOG_G << "adding textdomain '" << domain << "' in directory '" << directory << "'";
485  std::scoped_lock lock(get_mutex());
486  get_manager().add_messages_domain(domain);
487  get_manager().add_messages_path(directory);
488  get_manager().update_locale();
489 }
490 
491 void set_default_textdomain(const char* domain)
492 {
493  LOG_G << "set_default_textdomain: '" << domain << "'";
494  std::scoped_lock lock(get_mutex());
495  get_manager().set_default_messages_domain(domain);
496 }
497 
498 
499 void set_language(const std::string& language, const std::vector<std::string>* /*alternates*/)
500 {
501  // why should we need alternates? which languages we support should only be related
502  // to which languages we ship with and not which the os supports
503  LOG_G << "setting language to '" << language << "'";
504  std::scoped_lock lock(get_mutex());
505  get_manager().set_language(language);
506 }
507 
508 int compare(const std::string& s1, const std::string& s2)
509 {
510  std::scoped_lock lock(get_mutex());
511 
512  try {
513  return std::use_facet<std::collate<char>>(get_manager().get_locale()).compare(s1.c_str(), s1.c_str() + s1.size(), s2.c_str(), s2.c_str() + s2.size());
514  } catch(const std::bad_cast&) {
515  static bool bad_cast_once = false;
516 
517  if(!bad_cast_once) {
518  ERR_G << "locale set-up for compare() is broken, falling back to std::string::compare()";
519  bad_cast_once = true;
520  }
521 
522  return s1.compare(s2);
523  }
524 }
525 
526 int icompare(const std::string& s1, const std::string& s2)
527 {
528  // todo: maybe we should replace this preprocessor check with a std::has_facet<bl::collator<char>> check?
529 #ifdef __APPLE__
530  // https://github.com/wesnoth/wesnoth/issues/2094
531  return compare(ascii_to_lowercase(s1), ascii_to_lowercase(s2));
532 #else
533  std::scoped_lock lock(get_mutex());
534 
535  try {
536  return std::use_facet<bl::collator<char>>(get_manager().get_locale()).compare(
537  bl::collator_base::secondary, s1, s2);
538  } catch(const std::bad_cast&) {
539  static bool bad_cast_once = false;
540 
541  if(!bad_cast_once) {
542  ERR_G << "locale set-up for icompare() is broken, falling back to std::string::compare()";
543 
544  try { //just to be safe.
545  ERR_G << get_manager().debug_description();
546  } catch (const std::exception& e) {
547  ERR_G << e.what();
548  }
549  bad_cast_once = true;
550  }
551 
552  // Let's convert at least ASCII letters to lowercase to get a somewhat case-insensitive comparison.
553  return ascii_to_lowercase(s1).compare(ascii_to_lowercase(s2));
554  }
555 #endif
556 }
557 
558 std::string strftime(const std::string& format, const std::tm* time)
559 {
560  std::basic_ostringstream<char> dummy;
561  std::scoped_lock lock(get_mutex());
562  dummy.imbue(get_manager().get_locale()); // TODO: Calling imbue() with hard-coded locale appears to work with put_time in glibc, but not with get_locale()...
563  // Revert to use of boost (from 1.14) instead of std::put_time() because the latter does not appear to handle locale properly in Linux
564  dummy << bl::as::ftime(format) << mktime(const_cast<std::tm*>(time));
565 
566  return dummy.str();
567 }
568 
569 bool ci_search(const std::string& s1, const std::string& s2)
570 {
571  std::scoped_lock lock(get_mutex());
572  const std::locale& locale = get_manager().get_locale();
573 
574  std::string ls1 = bl::to_lower(s1, locale);
575  std::string ls2 = bl::to_lower(s2, locale);
576 
577  return std::search(ls1.begin(), ls1.end(),
578  ls2.begin(), ls2.end()) != ls1.end();
579 }
580 
582 {
583  std::scoped_lock lock(get_mutex());
584  return std::use_facet<boost::locale::info>(get_manager().get_locale());
585 }
586 }
static log_domain dom("general")
void bind_textdomain(const char *domain, const char *directory, const char *)
Definition: gettext.cpp:482
static domain_map * domains
Definition: log.cpp:165
logger & info()
Definition: log.cpp:182
int compare(const std::string &s1, const std::string &s2)
Case-sensitive lexicographical comparison.
Definition: gettext.cpp:508
static bool file_exists(const bfs::path &fpath)
Definition: filesystem.cpp:264
const language_def & get_locale()
Definition: language.cpp:329
filesystem::scoped_istream istream_file(const std::string &fname, bool treat_failure_as_error)
void set_language(const std::string &language, const std::vector< std::string > *)
Definition: gettext.cpp:499
std::string dsngettext(const char *domainname, const char *singular, const char *plural, int n)
Definition: gettext.cpp:466
std::string dgettext(const char *domain, const char *msgid)
Definition: gettext.cpp:425
static void msg(const char *act, debug_info &i, const char *to="", const char *result="")
Definition: debugger.cpp:110
std::string dsgettext(const char *domainname, const char *msgid)
Definition: gettext.cpp:436
std::string strftime(const std::string &format, const std::tm *time)
Definition: gettext.cpp:558
#define LOG_G
Definition: gettext.cpp:40
std::unique_ptr< std::istream > scoped_istream
Definition: filesystem.hpp:39
std::string path
Definition: game_config.cpp:39
void set_default_textdomain(const char *domain)
Definition: gettext.cpp:491
std::string egettext(char const *msgid)
Definition: gettext.cpp:430
bool ci_search(const std::string &s1, const std::string &s2)
Definition: gettext.cpp:569
static std::string gettext(const char *str)
Definition: gettext.hpp:60
#define ERR_G
Definition: gettext.cpp:42
std::string language()
Definition: general.cpp:538
Declarations for File-IO.
static catalog from_istream(std::istream &is, warning_channel_type w=warning_channel_type())
Definition: catalog.hpp:269
rng * generator
This generator is automatically synced during synced context.
Definition: random.cpp:61
int icompare(const std::string &s1, const std::string &s2)
Case-insensitive lexicographical comparison.
Definition: gettext.cpp:526
Standard logging facilities (interface).
#define DBG_G
Definition: gettext.cpp:39
#define e
mock_char c
static map_location::DIRECTION n
const boost::locale::info & get_effective_locale_info()
A facet that holds general information about the effective locale.
Definition: gettext.cpp:581