The Battle for Wesnoth  1.17.0-dev
gettext.cpp
Go to the documentation of this file.
1 /*
2  Copyright (C) 2003 - 2021
3  by David White <dave@whitevine.net>
4  Part of the Battle for Wesnoth Project https://www.wesnoth.org/
5 
6  This program is free software; you can redistribute it and/or modify
7  it under the terms of the GNU General Public License as published by
8  the Free Software Foundation; either version 2 of the License, or
9  (at your option) any later version.
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY.
12 
13  See the COPYING file for more details.
14 */
15 #include "gettext.hpp"
16 #include "log.hpp"
17 #include "filesystem.hpp"
18 
19 #include <algorithm>
20 #include <iomanip>
21 #include <iostream>
22 #include <iterator>
23 #include <fstream>
24 #include <locale>
25 #include <map>
26 #include <mutex>
27 #include <boost/locale.hpp>
28 #include <set>
29 
30 #if defined(__GNUC__)
31 #pragma GCC diagnostic push
32 #pragma GCC diagnostic ignored "-Wmissing-field-initializers"
33 #endif
34 #include "spirit_po/spirit_po.hpp"
35 #if defined(__GNUC__)
36 #pragma GCC diagnostic pop
37 #endif
38 
39 #define DBG_G LOG_STREAM(debug, lg::general())
40 #define LOG_G LOG_STREAM(info, lg::general())
41 #define WRN_G LOG_STREAM(warn, lg::general())
42 #define ERR_G LOG_STREAM(err, lg::general())
43 
44 namespace bl = boost::locale;
45 namespace
46 {
47  std::mutex& get_mutex() { static std::mutex* m = new std::mutex(); return *m; }
48 
49  class default_utf8_locale_name
50  {
51  public:
52  static const std::string& name()
53  {
54  //Use pointers because we don't want it to be destructed at program end.
55  static default_utf8_locale_name* lname = new default_utf8_locale_name();
56  return lname->name_;
57  }
58  private:
59  default_utf8_locale_name()
60  : name_()
61  {
62  LOG_G << "Generating default locale\n";
63  try
64  {
65  //NOTE: the default_locale objects needs to live as least as long as the locale_info object. Otherwise the program will segfault.
66  std::locale default_locale = bl::generator().generate("");
67  const bl::info& locale_info = std::use_facet<bl::info>(default_locale);
68  name_ += locale_info.language();
69  if(!locale_info.country().empty())
70  name_ += "_" + locale_info.country();
71  name_ += ".UTF-8";
72  if(!locale_info.variant().empty())
73  name_ += "@" + locale_info.variant();
74  }
75  catch(const std::exception& e)
76  {
77  ERR_G << "Failed to generate default locale string. message:" << e.what() << std::endl;
78  }
79  LOG_G << "Finished generating default locale, default is now '" << name_ << "'\n";
80  }
81 
82  std::string name_;
83  };
84  class wesnoth_message_format : public bl::message_format<char>
85  {
86  public:
87  wesnoth_message_format(std::locale base, const std::set<std::string>& domains, const std::set<std::string>& paths)
88  : base_loc_(base)
89  {
90  const bl::info& inf = std::use_facet<bl::info>(base);
91  if(inf.language() == "c") {
92  return;
93  }
94  std::string lang_name_short = inf.language();
95  std::string lang_name_long = lang_name_short;
96  if(!inf.country().empty()) {
97  lang_name_long += '_';
98  lang_name_long += inf.country();
99  }
100  if(!inf.variant().empty()) {
101  lang_name_long += '@';
102  lang_name_long += inf.variant();
103  lang_name_short += '@';
104  lang_name_short += inf.variant();
105  }
106  DBG_G << "Loading po files for language " << lang_name_long << '\n';
107  for(auto& domain : domains) {
108  DBG_G << "Searching for po files for domain " << domain << '\n';
109  std::string path;
110  for(auto base_path : paths) {
111  DBG_G << "Searching in dir " << base_path << '\n';
112  if(base_path[base_path.length()-1] != '/') {
113  base_path += '/';
114  }
115  base_path += domain;
116  base_path += '/';
117  path = base_path + lang_name_long + ".po";
118  DBG_G << " Trying path " << path << '\n';
119  if(filesystem::file_exists(path)) {
120  break;
121  }
122  path = base_path + lang_name_short + ".po";
123  DBG_G << " Trying path " << path << '\n';
124  if(filesystem::file_exists(path)) {
125  break;
126  }
127  }
128  if(!filesystem::file_exists(path)) {
129  continue;
130  }
131  LOG_G << "Loading language file from " << path << '\n';
132  try {
134  po_file->exceptions(std::ios::badbit);
135  const auto& cat = spirit_po::default_catalog::from_istream(*po_file);
136  extra_messages_.emplace(get_base().domain(domain), cat);
137  } catch(const spirit_po::catalog_exception& e) {
138  // Treat any parsing error in the same way as the file not existing - just leave
139  // this domain untranslated but continue to load other domains.
140  log_po_error(lang_name_long, domain, e.what());
141  } catch(const std::ios::failure&) {
142  log_po_error(lang_name_long, domain, strerror(errno));
143  }
144  }
145  }
146 
147  static void log_po_error(const std::string& lang, const std::string& dom, const std::string& detail) {
148  ERR_G << "Error opening language file for " << lang << ", textdomain " << dom
149  << ":\n " << detail << '\n' << std::flush;
150  }
151 
152  const char* get(int domain_id, const char* ctx, const char* msg_id) const override
153  {
154  auto& base = get_base();
155  const char* msg = base.get(domain_id, ctx, msg_id);
156  if(msg == nullptr) {
157  auto iter = extra_messages_.find(domain_id);
158  if(iter == extra_messages_.end()) {
159  return nullptr;
160  }
161  auto& catalog = iter->second;
162  const char* lookup = ctx ? catalog.pgettext(ctx, msg_id) : catalog.gettext(msg_id);
163  if(lookup != msg_id) {
164  // (p)gettext returns the input pointer if the string was not found
165  msg = lookup;
166  }
167  }
168  return msg;
169  }
170 
171  const char* get(int domain_id, const char* ctx, const char* sid, int n) const override
172  {
173  auto& base = get_base();
174  const char* msg = base.get(domain_id, ctx, sid, n);
175  if(msg == nullptr) {
176  auto iter = extra_messages_.find(domain_id);
177  if(iter == extra_messages_.end()) {
178  return nullptr;
179  }
180  auto& catalog = iter->second;
181  const char* lookup = ctx ? catalog.npgettext(ctx, sid, sid, n) : catalog.ngettext(sid, sid, n);
182  if(lookup != sid) {
183  // n(p)gettext returns one of the input pointers if the string was not found
184  msg = lookup;
185  }
186  }
187  return msg;
188  }
189 
190  int domain(const std::string& domain) const override
191  {
192  auto& base = get_base();
193  return base.domain(domain);
194  }
195 
196  const char* convert(const char* msg, std::string& buffer) const override
197  {
198  auto& base = get_base();
199  return base.convert(msg, buffer);
200  }
201  private:
202  const bl::message_format<char>& get_base() const
203  {
204  return std::use_facet<bl::message_format<char>>(base_loc_);
205  }
206 
207  std::locale base_loc_;
208  std::map<int, spirit_po::default_catalog> extra_messages_;
209  };
210  struct translation_manager
211  {
212  translation_manager()
213  : loaded_paths_()
214  , loaded_domains_()
215  , current_language_(default_utf8_locale_name::name())
216  , generator_()
217  , current_locale_()
218  , is_dirty_(true)
219  {
220  const bl::localization_backend_manager& g_mgr = bl::localization_backend_manager::global();
221  for(const std::string& name : g_mgr.get_all_backends())
222  {
223  LOG_G << "Found boost locale backend: '" << name << "'\n";
224  }
225 
226  generator_.use_ansi_encoding(false);
227  generator_.categories(bl::message_facet | bl::information_facet | bl::collation_facet | bl::formatting_facet | bl::convert_facet);
228  generator_.characters(bl::char_facet);
229  // We cannot have current_locale_ be a non boost-generated locale since it might not supply
230  // the bl::info facet. As soon as we add message paths, update_locale_internal might fail,
231  // for example because of invalid .mo files. So make sure we call it at least once before adding paths/domains
232  update_locale_internal();
233  }
234 
235  void add_messages_domain(const std::string& domain)
236  {
237  if(loaded_domains_.find(domain) != loaded_domains_.end())
238  {
239  return;
240  }
241 
242  if(domain.find('/') != std::string::npos)
243  {
244  // Forward slash has a specific meaning in Boost.Locale domain
245  // names, specifying the encoding. We use UTF-8 for everything
246  // so we can't possibly support that, and odds are it's a user
247  // mistake (as in bug #23839).
248  ERR_G << "illegal textdomain name '" << domain
249  << "', skipping textdomain\n";
250  return;
251  }
252 
253  generator_.add_messages_domain(domain);
254  loaded_domains_.insert(domain);
255  }
256 
257  void add_messages_path(const std::string& path)
258  {
259  if(loaded_paths_.find(path) != loaded_paths_.end())
260  {
261  return;
262  }
263  generator_.add_messages_path(path);
264  loaded_paths_.insert(path);
265  }
266 
267  void set_default_messages_domain(const std::string& domain)
268  {
269  generator_.set_default_messages_domain(domain);
270  update_locale();
271  }
272 
273  void set_language(const std::string& language)
274  {
275  std::string::size_type at_pos = language.rfind('@');
276  if(language.empty())
277  {
278  current_language_ = default_utf8_locale_name::name();
279  }
280  else if(at_pos != std::string::npos)
281  {
282  current_language_ = language.substr(0, at_pos) + ".UTF-8" + language.substr(at_pos);
283  }
284  else
285  {
286  current_language_ = language + ".UTF-8";
287  }
288  update_locale();
289  }
290 
291  void update_locale()
292  {
293  is_dirty_ = true;
294  }
295 
296  /* This is called three times: once during the constructor, before any .mo files' paths have
297  * been added to the generator, once after adding the mainline .mo files, and once more
298  * after adding all add-ons. Corrupt .mo files might make the called functions throw, and so
299  * this might fail as soon as we've added message paths.
300  *
301  * Throwing exceptions from here is (in 1.15.18) going to end up in wesnoth.cpp's "Caught
302  * general ... exception" handler, so the effect of letting an exception escape this
303  * function is an immediate exit. Given that, it doesn't seem useful to change the assert
304  * to a throw, at least not within the 1.16 branch.
305  *
306  * Postcondition: current_locale_ is a valid boost-generated locale, supplying the bl::info
307  * facet. If there are corrupt .mo files, the locale might have no translations loaded.
308  */
309  void update_locale_internal()
310  {
311  try
312  {
313  LOG_G << "attempting to generate locale by name '" << current_language_ << "'\n";
314  current_locale_ = generator_.generate(current_language_);
315  current_locale_ = std::locale(current_locale_, new wesnoth_message_format(current_locale_, loaded_domains_, loaded_paths_));
316  const bl::info& info = std::use_facet<bl::info>(current_locale_);
317  LOG_G << "updated locale to '" << current_language_ << "' locale is now '" << current_locale_.name() << "' ( "
318  << "name='" << info.name()
319  << "' country='" << info.country()
320  << "' language='" << info.language()
321  << "' encoding='" << info.encoding()
322  << "' variant='" << info.variant() << "')\n";
323  }
324  catch(const bl::conv::conversion_error&)
325  {
326  assert(std::has_facet<bl::info>(current_locale_));
327  const bl::info& info = std::use_facet<bl::info>(current_locale_);
328  ERR_G << "Failed to update locale due to conversion error, locale is now: "
329  << "name='" << info.name()
330  << "' country='" << info.country()
331  << "' language='" << info.language()
332  << "' encoding='" << info.encoding()
333  << "' variant='" << info.variant()
334  << "'" << std::endl;
335  }
336  catch(const std::runtime_error&)
337  {
338  assert(std::has_facet<bl::info>(current_locale_));
339  const bl::info& info = std::use_facet<bl::info>(current_locale_);
340  ERR_G << "Failed to update locale due to runtime error, locale is now: "
341  << "name='" << info.name()
342  << "' country='" << info.country()
343  << "' language='" << info.language()
344  << "' encoding='" << info.encoding()
345  << "' variant='" << info.variant()
346  << "'" << std::endl;
347  }
348  is_dirty_ = false;
349  }
350 
351  std::string debug_description()
352  {
353  std::stringstream res;
354  const bl::localization_backend_manager& g_mgr = bl::localization_backend_manager::global();
355  for(const std::string& name : g_mgr.get_all_backends())
356  {
357  res << "has backend: '" << name << "',";
358  }
359  if(std::has_facet<bl::info>(current_locale_)) {
360  const bl::info& info = std::use_facet<bl::info>(current_locale_);
361  res << " locale: (name='" << info.name()
362  << "' country='" << info.country()
363  << "' language='" << info.language()
364  << "' encoding='" << info.encoding()
365  << "' variant='" << info.variant()
366  << "'),";
367  }
368  if(std::has_facet<bl::collator<char>>(current_locale_)) {
369  res << "has bl::collator<char> facet, ";
370  }
371  res << "generator categories='" << generator_.categories() << "'";
372  return res.str();
373  }
374 
375  const std::locale& get_locale()
376  {
377  if(is_dirty_)
378  {
379  update_locale_internal();
380  }
381  return current_locale_;
382  }
383 
384  private:
385  std::set<std::string> loaded_paths_;
386  std::set<std::string> loaded_domains_;
387  std::string current_language_;
388  bl::generator generator_;
389  std::locale current_locale_;
390  bool is_dirty_;
391  };
392 
393  translation_manager& get_manager()
394  {
395  static translation_manager* mng = new translation_manager();
396  return *mng;
397  }
398 
399  // Converts ASCII letters to lowercase. Ignores Unicode letters.
400  std::string ascii_to_lowercase(const std::string& str)
401  {
402  std::string result;
403  result.reserve(str.length());
404  std::transform(str.begin(), str.end(), std::back_inserter(result), [](char c)
405  {
406  return c >= 'A' && c <= 'Z' ? c | 0x20 : c;
407  });
408  return result;
409  }
410 }
411 
412 namespace translation
413 {
414 
415 std::string dgettext(const char* domain, const char* msgid)
416 {
417  std::scoped_lock lock(get_mutex());
418  return bl::dgettext(domain, msgid, get_manager().get_locale());
419 }
420 std::string egettext(char const *msgid)
421 {
422  std::scoped_lock lock(get_mutex());
423  return msgid[0] == '\0' ? msgid : bl::gettext(msgid, get_manager().get_locale());
424 }
425 
426 std::string dsgettext (const char * domainname, const char *msgid)
427 {
428  std::string msgval = dgettext (domainname, msgid);
429  if (msgval == msgid) {
430  const char* firsthat = std::strchr (msgid, '^');
431  if (firsthat == nullptr)
432  msgval = msgid;
433  else
434  msgval = firsthat + 1;
435  }
436  return msgval;
437 }
438 
439 namespace {
440 
441 inline const char* is_unlocalized_string2(const std::string& str, const char* singular, const char* plural)
442 {
443  if (str == singular) {
444  return singular;
445  }
446 
447  if (str == plural) {
448  return plural;
449  }
450 
451  return nullptr;
452 }
453 
454 }
455 
456 std::string dsngettext (const char * domainname, const char *singular, const char *plural, int n)
457 {
458  //TODO: only the next line needs to be in the lock.
459  std::scoped_lock lock(get_mutex());
460  std::string msgval = bl::dngettext(domainname, singular, plural, n, get_manager().get_locale());
461  auto original = is_unlocalized_string2(msgval, singular, plural);
462  if (original) {
463  const char* firsthat = std::strchr (original, '^');
464  if (firsthat == nullptr)
465  msgval = original;
466  else
467  msgval = firsthat + 1;
468  }
469  return msgval;
470 }
471 
472 void bind_textdomain(const char* domain, const char* directory, const char* /*encoding*/)
473 {
474  LOG_G << "adding textdomain '" << domain << "' in directory '" << directory << "'\n";
475  std::scoped_lock lock(get_mutex());
476  get_manager().add_messages_domain(domain);
477  get_manager().add_messages_path(directory);
478  get_manager().update_locale();
479 }
480 
481 void set_default_textdomain(const char* domain)
482 {
483  LOG_G << "set_default_textdomain: '" << domain << "'\n";
484  std::scoped_lock lock(get_mutex());
485  get_manager().set_default_messages_domain(domain);
486 }
487 
488 
489 void set_language(const std::string& language, const std::vector<std::string>* /*alternates*/)
490 {
491  // why should we need alternates? which languages we support should only be related
492  // to which languages we ship with and not which the os supports
493  LOG_G << "setting language to '" << language << "' \n";
494  std::scoped_lock lock(get_mutex());
495  get_manager().set_language(language);
496 }
497 
498 int compare(const std::string& s1, const std::string& s2)
499 {
500  std::scoped_lock lock(get_mutex());
501 
502  try {
503  return std::use_facet<std::collate<char>>(get_manager().get_locale()).compare(s1.c_str(), s1.c_str() + s1.size(), s2.c_str(), s2.c_str() + s2.size());
504  } catch(const std::bad_cast&) {
505  static bool bad_cast_once = false;
506 
507  if(!bad_cast_once) {
508  ERR_G << "locale set-up for compare() is broken, falling back to std::string::compare()\n";
509  bad_cast_once = true;
510  }
511 
512  return s1.compare(s2);
513  }
514 }
515 
516 int icompare(const std::string& s1, const std::string& s2)
517 {
518  // todo: maybe we should replace this preprocessor check with a std::has_facet<bl::collator<char>> check?
519 #ifdef __APPLE__
520  // https://github.com/wesnoth/wesnoth/issues/2094
521  return compare(ascii_to_lowercase(s1), ascii_to_lowercase(s2));
522 #else
523  std::scoped_lock lock(get_mutex());
524 
525  try {
526  return std::use_facet<bl::collator<char>>(get_manager().get_locale()).compare(
527  bl::collator_base::secondary, s1, s2);
528  } catch(const std::bad_cast&) {
529  static bool bad_cast_once = false;
530 
531  if(!bad_cast_once) {
532  ERR_G << "locale set-up for icompare() is broken, falling back to std::string::compare()\n";
533 
534  try { //just to be safe.
535  ERR_G << get_manager().debug_description() << "\n";
536  } catch (const std::exception& e) {
537  ERR_G << e.what() << "\n";
538  }
539  bad_cast_once = true;
540  }
541 
542  // Let's convert at least ASCII letters to lowercase to get a somewhat case-insensitive comparison.
543  return ascii_to_lowercase(s1).compare(ascii_to_lowercase(s2));
544  }
545 #endif
546 }
547 
548 std::string strftime(const std::string& format, const std::tm* time)
549 {
550  std::basic_ostringstream<char> dummy;
551  std::scoped_lock lock(get_mutex());
552  dummy.imbue(get_manager().get_locale()); // TODO: Calling imbue() with hard-coded locale appears to work with put_time in glibc, but not with get_locale()...
553  // Revert to use of boost (from 1.14) instead of std::put_time() because the latter does not appear to handle locale properly in Linux
554  dummy << bl::as::ftime(format) << mktime(const_cast<std::tm*>(time));
555 
556  return dummy.str();
557 }
558 
559 bool ci_search(const std::string& s1, const std::string& s2)
560 {
561  std::scoped_lock lock(get_mutex());
562  const std::locale& locale = get_manager().get_locale();
563 
564  std::string ls1 = bl::to_lower(s1, locale);
565  std::string ls2 = bl::to_lower(s2, locale);
566 
567  return std::search(ls1.begin(), ls1.end(),
568  ls2.begin(), ls2.end()) != ls1.end();
569 }
570 
572 {
573  std::scoped_lock lock(get_mutex());
574  return std::use_facet<boost::locale::info>(get_manager().get_locale());
575 }
576 }
static log_domain dom("general")
void bind_textdomain(const char *domain, const char *directory, const char *)
Definition: gettext.cpp:472
static domain_map * domains
Definition: log.cpp:72
int dummy
Definition: lstrlib.cpp:1347
logger & info()
Definition: log.cpp:89
int compare(const std::string &s1, const std::string &s2)
Case-sensitive lexicographical comparison.
Definition: gettext.cpp:498
static bool file_exists(const bfs::path &fpath)
Definition: filesystem.cpp:263
const language_def & get_locale()
Definition: language.cpp:329
filesystem::scoped_istream istream_file(const std::string &fname, bool treat_failure_as_error)
void set_language(const std::string &language, const std::vector< std::string > *)
Definition: gettext.cpp:489
std::string dsngettext(const char *domainname, const char *singular, const char *plural, int n)
Definition: gettext.cpp:456
std::string dgettext(const char *domain, const char *msgid)
Definition: gettext.cpp:415
static void msg(const char *act, debug_info &i, const char *to="", const char *result="")
Definition: debugger.cpp:110
std::string dsgettext(const char *domainname, const char *msgid)
Definition: gettext.cpp:426
std::string strftime(const std::string &format, const std::tm *time)
Definition: gettext.cpp:548
#define LOG_G
Definition: gettext.cpp:40
std::unique_ptr< std::istream > scoped_istream
Definition: filesystem.hpp:39
std::string path
Definition: game_config.cpp:39
void set_default_textdomain(const char *domain)
Definition: gettext.cpp:481
std::string egettext(char const *msgid)
Definition: gettext.cpp:420
bool ci_search(const std::string &s1, const std::string &s2)
Definition: gettext.cpp:559
static std::string gettext(const char *str)
Definition: gettext.hpp:60
#define ERR_G
Definition: gettext.cpp:42
static std::string flush(std::ostringstream &s)
Definition: reports.cpp:92
std::string language()
Definition: general.cpp:505
Declarations for File-IO.
static catalog from_istream(std::istream &is, warning_channel_type w=warning_channel_type())
Definition: catalog.hpp:269
rng * generator
This generator is automatically synced during synced context.
Definition: random.cpp:61
int icompare(const std::string &s1, const std::string &s2)
Case-insensitive lexicographical comparison.
Definition: gettext.cpp:516
Standard logging facilities (interface).
#define DBG_G
Definition: gettext.cpp:39
#define e
mock_char c
static map_location::DIRECTION n
const boost::locale::info & get_effective_locale_info()
A facet that holds general information about the effective locale.
Definition: gettext.cpp:571