The Battle for Wesnoth  1.19.5+dev
gettext.cpp
Go to the documentation of this file.
1 /*
2  Copyright (C) 2003 - 2024
3  by David White <dave@whitevine.net>
4  Part of the Battle for Wesnoth Project https://www.wesnoth.org/
5 
6  This program is free software; you can redistribute it and/or modify
7  it under the terms of the GNU General Public License as published by
8  the Free Software Foundation; either version 2 of the License, or
9  (at your option) any later version.
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY.
12 
13  See the COPYING file for more details.
14 */
15 #include "gettext.hpp"
16 #include "log.hpp"
17 #include "filesystem.hpp"
18 
19 #include <algorithm>
20 #include <iterator>
21 #include <locale>
22 #include <map>
23 #include <boost/locale.hpp>
24 #include <set>
25 #include <type_traits>
26 
27 #if defined(__GNUC__)
28 #pragma GCC diagnostic push
29 #pragma GCC diagnostic ignored "-Wmissing-field-initializers"
30 #endif
31 #include "spirit_po/spirit_po.hpp"
32 #if defined(__GNUC__)
33 #pragma GCC diagnostic pop
34 #endif
35 
36 #define DBG_G LOG_STREAM(debug, lg::general())
37 #define LOG_G LOG_STREAM(info, lg::general())
38 #define WRN_G LOG_STREAM(warn, lg::general())
39 #define ERR_G LOG_STREAM(err, lg::general())
40 
41 namespace bl = boost::locale;
42 namespace
43 {
44 
45  class default_utf8_locale_name
46  {
47  public:
48  static const std::string& name()
49  {
50  //Use pointers because we don't want it to be destructed at program end.
51  static default_utf8_locale_name* lname = new default_utf8_locale_name();
52  return lname->name_;
53  }
54  private:
55  default_utf8_locale_name()
56  : name_()
57  {
58  LOG_G << "Generating default locale";
59  try
60  {
61  //NOTE: the default_locale objects needs to live as least as long as the locale_info object. Otherwise the program will segfault.
62  std::locale default_locale = bl::generator().generate("");
63  const bl::info& locale_info = std::use_facet<bl::info>(default_locale);
64  name_ += locale_info.language();
65  if(!locale_info.country().empty())
66  name_ += "_" + locale_info.country();
67  name_ += ".UTF-8";
68  if(!locale_info.variant().empty())
69  name_ += "@" + locale_info.variant();
70  }
71  catch(const std::exception& e)
72  {
73  ERR_G << "Failed to generate default locale string. message:" << e.what();
74  }
75  LOG_G << "Finished generating default locale, default is now '" << name_ << "'";
76  }
77 
78  std::string name_;
79  };
80  class wesnoth_message_format : public bl::message_format<char>
81  {
82  public:
83  wesnoth_message_format(std::locale base, const std::set<std::string>& domains, const std::set<std::string>& paths)
84  : base_loc_(base)
85  {
86  const bl::info& inf = std::use_facet<bl::info>(base);
87  if(inf.language() == "c") {
88  return;
89  }
90  std::string lang_name_short = inf.language();
91  std::string lang_name_long = lang_name_short;
92  if(!inf.country().empty()) {
93  lang_name_long += '_';
94  lang_name_long += inf.country();
95  }
96  if(!inf.variant().empty()) {
97  lang_name_long += '@';
98  lang_name_long += inf.variant();
99  lang_name_short += '@';
100  lang_name_short += inf.variant();
101  }
102  DBG_G << "Loading po files for language " << lang_name_long;
103  for(auto& domain : domains) {
104  DBG_G << "Searching for po files for domain " << domain;
105  std::string path;
106  for(auto base_path : paths) {
107  DBG_G << "Searching in dir " << base_path;
108  if(base_path[base_path.length()-1] != '/') {
109  base_path += '/';
110  }
111  base_path += domain;
112  base_path += '/';
113  path = base_path + lang_name_long + ".po";
114  DBG_G << " Trying path " << path;
116  break;
117  }
118  path = base_path + lang_name_short + ".po";
119  DBG_G << " Trying path " << path;
121  break;
122  }
123  }
125  continue;
126  }
127  LOG_G << "Loading language file from " << path;
128  try {
130  po_file->exceptions(std::ios::badbit);
131  const auto& cat = spirit_po::default_catalog::from_istream(*po_file);
132  extra_messages_.emplace(get_base().domain(domain), cat);
133  } catch(const spirit_po::catalog_exception& e) {
134  // Treat any parsing error in the same way as the file not existing - just leave
135  // this domain untranslated but continue to load other domains.
136  log_po_error(lang_name_long, domain, e.what());
137  } catch(const std::ios::failure&) {
138  log_po_error(lang_name_long, domain, strerror(errno));
139  }
140  }
141  }
142 
143  static void log_po_error(const std::string& lang, const std::string& dom, const std::string& detail) {
144  ERR_G << "Error opening language file for " << lang << ", textdomain " << dom
145  << ":\n " << detail;
146  }
147 
148  const char* get(int domain_id, const char* ctx, const char* msg_id) const override
149  {
150  auto& base = get_base();
151  const char* msg = base.get(domain_id, ctx, msg_id);
152  if(msg == nullptr) {
153  auto iter = extra_messages_.find(domain_id);
154  if(iter == extra_messages_.end()) {
155  return nullptr;
156  }
157  auto& catalog = iter->second;
158  const char* lookup = ctx ? catalog.pgettext(ctx, msg_id) : catalog.gettext(msg_id);
159  if(lookup != msg_id) {
160  // (p)gettext returns the input pointer if the string was not found
161  msg = lookup;
162  }
163  }
164  return msg;
165  }
166 
167 #if BOOST_VERSION < 108300
168  const char* get(int domain_id, const char* ctx, const char* sid, int n) const override
169 #else
170  const char* get(int domain_id, const char* ctx, const char* sid, bl::count_type n) const override
171 #endif
172  {
173  auto& base = get_base();
174  const char* msg = base.get(domain_id, ctx, sid, n);
175  if(msg == nullptr) {
176  auto iter = extra_messages_.find(domain_id);
177  if(iter == extra_messages_.end()) {
178  return nullptr;
179  }
180  auto& catalog = iter->second;
181  const char* lookup = ctx ? catalog.npgettext(ctx, sid, sid, n) : catalog.ngettext(sid, sid, n);
182  if(lookup != sid) {
183  // n(p)gettext returns one of the input pointers if the string was not found
184  msg = lookup;
185  }
186  }
187  return msg;
188  }
189 
190  int domain(const std::string& domain) const override
191  {
192  auto& base = get_base();
193  return base.domain(domain);
194  }
195 
196  const char* convert(const char* msg, std::string& buffer) const override
197  {
198  auto& base = get_base();
199  return base.convert(msg, buffer);
200  }
201  private:
202  const bl::message_format<char>& get_base() const
203  {
204  return std::use_facet<bl::message_format<char>>(base_loc_);
205  }
206 
207  std::locale base_loc_;
208  std::map<int, spirit_po::default_catalog> extra_messages_;
209  };
210  struct translation_manager
211  {
212  translation_manager()
213  : loaded_paths_()
214  , loaded_domains_()
215  , current_language_(default_utf8_locale_name::name())
216  , generator_()
217  , current_locale_()
218  , is_dirty_(true)
219  {
220  const bl::localization_backend_manager& g_mgr = bl::localization_backend_manager::global();
221  for(const std::string& name : g_mgr.get_all_backends())
222  {
223  LOG_G << "Found boost locale backend: '" << name << "'";
224  }
225 
226  generator_.use_ansi_encoding(false);
227 #if BOOST_VERSION < 108100
228  generator_.categories(bl::message_facet | bl::information_facet | bl::collation_facet | bl::formatting_facet | bl::convert_facet);
229  generator_.characters(bl::char_facet);
230 #else
231  generator_.categories(bl::category_t::message | bl::category_t::information | bl::category_t::collation | bl::category_t::formatting | bl::category_t::convert);
232  generator_.characters(bl::char_facet_t::char_f);
233 #endif
234  // We cannot have current_locale_ be a non boost-generated locale since it might not supply
235  // the bl::info facet. As soon as we add message paths, update_locale_internal might fail,
236  // for example because of invalid .mo files. So make sure we call it at least once before adding paths/domains
237  update_locale_internal();
238  }
239 
240  void add_messages_domain(const std::string& domain)
241  {
242  if(loaded_domains_.find(domain) != loaded_domains_.end())
243  {
244  return;
245  }
246 
247  if(domain.find('/') != std::string::npos)
248  {
249  // Forward slash has a specific meaning in Boost.Locale domain
250  // names, specifying the encoding. We use UTF-8 for everything
251  // so we can't possibly support that, and odds are it's a user
252  // mistake (as in bug #23839).
253  ERR_G << "illegal textdomain name '" << domain
254  << "', skipping textdomain";
255  return;
256  }
257 
258  generator_.add_messages_domain(domain);
259  loaded_domains_.insert(domain);
260  }
261 
262  void add_messages_path(const std::string& path)
263  {
264  if(loaded_paths_.find(path) != loaded_paths_.end())
265  {
266  return;
267  }
268  generator_.add_messages_path(path);
269  loaded_paths_.insert(path);
270  }
271 
272  void set_default_messages_domain(const std::string& domain)
273  {
274  generator_.set_default_messages_domain(domain);
275  update_locale();
276  }
277 
278  void set_language(const std::string& language)
279  {
280  std::string::size_type at_pos = language.rfind('@');
281  if(language.empty())
282  {
283  current_language_ = default_utf8_locale_name::name();
284  }
285  else if(at_pos != std::string::npos)
286  {
287  current_language_ = language.substr(0, at_pos) + ".UTF-8" + language.substr(at_pos);
288  }
289  else
290  {
291  current_language_ = language + ".UTF-8";
292  }
293  update_locale();
294  }
295 
296  void update_locale()
297  {
298  is_dirty_ = true;
299  }
300 
301  /* This is called three times: once during the constructor, before any .mo files' paths have
302  * been added to the generator, once after adding the mainline .mo files, and once more
303  * after adding all add-ons. Corrupt .mo files might make the called functions throw, and so
304  * this might fail as soon as we've added message paths.
305  *
306  * Throwing exceptions from here is (in 1.15.18) going to end up in wesnoth.cpp's "Caught
307  * general ... exception" handler, so the effect of letting an exception escape this
308  * function is an immediate exit. Given that, it doesn't seem useful to change the assert
309  * to a throw, at least not within the 1.16 branch.
310  *
311  * Postcondition: current_locale_ is a valid boost-generated locale, supplying the bl::info
312  * facet. If there are corrupt .mo files, the locale might have no translations loaded.
313  */
314  void update_locale_internal()
315  {
316  try
317  {
318  LOG_G << "attempting to generate locale by name '" << current_language_ << "'";
319  current_locale_ = generator_.generate(current_language_);
320  current_locale_ = std::locale(current_locale_, new wesnoth_message_format(current_locale_, loaded_domains_, loaded_paths_));
321  const bl::info& info = std::use_facet<bl::info>(current_locale_);
322  LOG_G << "updated locale to '" << current_language_ << "' locale is now '" << current_locale_.name() << "' ( "
323  << "name='" << info.name()
324  << "' country='" << info.country()
325  << "' language='" << info.language()
326  << "' encoding='" << info.encoding()
327  << "' variant='" << info.variant() << "')";
328  }
329  catch(const bl::conv::conversion_error& e)
330  {
331  assert(std::has_facet<bl::info>(current_locale_));
332  const bl::info& info = std::use_facet<bl::info>(current_locale_);
333  ERR_G << "Failed to update locale due to conversion error (" << e.what() << ") locale is now: "
334  << "name='" << info.name()
335  << "' country='" << info.country()
336  << "' language='" << info.language()
337  << "' encoding='" << info.encoding()
338  << "' variant='" << info.variant()
339  << "'";
340  }
341  catch(const std::runtime_error& e)
342  {
343  assert(std::has_facet<bl::info>(current_locale_));
344  const bl::info& info = std::use_facet<bl::info>(current_locale_);
345  ERR_G << "Failed to update locale due to runtime error (" << e.what() << ") locale is now: "
346  << "name='" << info.name()
347  << "' country='" << info.country()
348  << "' language='" << info.language()
349  << "' encoding='" << info.encoding()
350  << "' variant='" << info.variant()
351  << "'";
352  }
353  is_dirty_ = false;
354  }
355 
356  std::string debug_description()
357  {
358  std::stringstream res;
359  const bl::localization_backend_manager& g_mgr = bl::localization_backend_manager::global();
360  for(const std::string& name : g_mgr.get_all_backends())
361  {
362  res << "has backend: '" << name << "',";
363  }
364  if(std::has_facet<bl::info>(current_locale_)) {
365  const bl::info& info = std::use_facet<bl::info>(current_locale_);
366  res << " locale: (name='" << info.name()
367  << "' country='" << info.country()
368  << "' language='" << info.language()
369  << "' encoding='" << info.encoding()
370  << "' variant='" << info.variant()
371  << "'),";
372  }
373  if(std::has_facet<bl::collator<char>>(current_locale_)) {
374  res << "has bl::collator<char> facet, ";
375  }
376 #if BOOST_VERSION < 108100
377  res << "generator categories='" << generator_.categories() << "'";
378 #else
379  res << "generator categories='" <<
380  static_cast<std::underlying_type<bl::category_t>::type>(generator_.categories()) << "'";
381 #endif
382  return res.str();
383  }
384 
385  const std::locale& get_locale()
386  {
387  if(is_dirty_)
388  {
389  update_locale_internal();
390  }
391  return current_locale_;
392  }
393 
394  private:
395  std::set<std::string> loaded_paths_;
396  std::set<std::string> loaded_domains_;
397  std::string current_language_;
398  bl::generator generator_;
399  std::locale current_locale_;
400  bool is_dirty_;
401  };
402 
403  translation_manager& get_manager()
404  {
405  static translation_manager* mng = new translation_manager();
406  return *mng;
407  }
408 
409  // Converts ASCII letters to lowercase. Ignores Unicode letters.
410  std::string ascii_to_lowercase(const std::string& str)
411  {
412  std::string result;
413  result.reserve(str.length());
414  std::transform(str.begin(), str.end(), std::back_inserter(result), [](char c)
415  {
416  return c >= 'A' && c <= 'Z' ? c | 0x20 : c;
417  });
418  return result;
419  }
420 }
421 
422 namespace translation
423 {
424 
425 std::string dgettext(const char* domain, const char* msgid)
426 {
427  return bl::dgettext(domain, msgid, get_manager().get_locale());
428 }
429 std::string egettext(char const *msgid)
430 {
431  return msgid[0] == '\0' ? msgid : bl::gettext(msgid, get_manager().get_locale());
432 }
433 
434 std::string dsgettext (const char * domainname, const char *msgid)
435 {
436  std::string msgval = dgettext (domainname, msgid);
437  if (msgval == msgid) {
438  const char* firsthat = std::strchr (msgid, '^');
439  if (firsthat == nullptr)
440  msgval = msgid;
441  else
442  msgval = firsthat + 1;
443  }
444  return msgval;
445 }
446 
447 namespace {
448 
449 inline const char* is_unlocalized_string2(const std::string& str, const char* singular, const char* plural)
450 {
451  if (str == singular) {
452  return singular;
453  }
454 
455  if (str == plural) {
456  return plural;
457  }
458 
459  return nullptr;
460 }
461 
462 }
463 
464 std::string dsngettext (const char * domainname, const char *singular, const char *plural, int n)
465 {
466  std::string msgval = bl::dngettext(domainname, singular, plural, n, get_manager().get_locale());
467 
468  auto original = is_unlocalized_string2(msgval, singular, plural);
469  if (original) {
470  const char* firsthat = std::strchr (original, '^');
471  if (firsthat == nullptr)
472  msgval = original;
473  else
474  msgval = firsthat + 1;
475  }
476  return msgval;
477 }
478 
479 void bind_textdomain(const char* domain, const char* directory, const char* /*encoding*/)
480 {
481  LOG_G << "adding textdomain '" << domain << "' in directory '" << directory << "'";
482  get_manager().add_messages_domain(domain);
483  get_manager().add_messages_path(directory);
484  get_manager().update_locale();
485 }
486 
487 void set_default_textdomain(const char* domain)
488 {
489  LOG_G << "set_default_textdomain: '" << domain << "'";
490  get_manager().set_default_messages_domain(domain);
491 }
492 
493 
494 void set_language(const std::string& language, const std::vector<std::string>* /*alternates*/)
495 {
496  // why should we need alternates? which languages we support should only be related
497  // to which languages we ship with and not which the os supports
498  LOG_G << "setting language to '" << language << "'";
499  get_manager().set_language(language);
500 }
501 
502 int compare(const std::string& s1, const std::string& s2)
503 {
504 
505  try {
506  return std::use_facet<std::collate<char>>(get_manager().get_locale()).compare(s1.c_str(), s1.c_str() + s1.size(), s2.c_str(), s2.c_str() + s2.size());
507  } catch(const std::bad_cast&) {
508  static bool bad_cast_once = false;
509 
510  if(!bad_cast_once) {
511  ERR_G << "locale set-up for compare() is broken, falling back to std::string::compare()";
512  bad_cast_once = true;
513  }
514 
515  return s1.compare(s2);
516  }
517 }
518 
519 int icompare(const std::string& s1, const std::string& s2)
520 {
521  // todo: maybe we should replace this preprocessor check with a std::has_facet<bl::collator<char>> check?
522 #ifdef __APPLE__
523  // https://github.com/wesnoth/wesnoth/issues/2094
524  return compare(ascii_to_lowercase(s1), ascii_to_lowercase(s2));
525 #else
526 
527  try {
528 #if BOOST_VERSION < 108100
529  return std::use_facet<bl::collator<char>>(get_manager().get_locale()).compare(
530  bl::collator_base::secondary, s1, s2);
531 #else
532  return std::use_facet<bl::collator<char>>(get_manager().get_locale()).compare(
533  bl::collate_level::secondary, s1, s2);
534 #endif
535  } catch(const std::bad_cast&) {
536  static bool bad_cast_once = false;
537 
538  if(!bad_cast_once) {
539  ERR_G << "locale set-up for icompare() is broken, falling back to std::string::compare()";
540 
541  try { //just to be safe.
542  ERR_G << get_manager().debug_description();
543  } catch (const std::exception& e) {
544  ERR_G << e.what();
545  }
546  bad_cast_once = true;
547  }
548 
549  // Let's convert at least ASCII letters to lowercase to get a somewhat case-insensitive comparison.
550  return ascii_to_lowercase(s1).compare(ascii_to_lowercase(s2));
551  }
552 #endif
553 }
554 
555 std::string strftime(const std::string& format, const std::tm* time)
556 {
557  std::basic_ostringstream<char> dummy;
558  dummy.imbue(get_manager().get_locale()); // TODO: Calling imbue() with hard-coded locale appears to work with put_time in glibc, but not with get_locale()...
559  // Revert to use of boost (from 1.14) instead of std::put_time() because the latter does not appear to handle locale properly in Linux
560  dummy << bl::as::ftime(format) << mktime(const_cast<std::tm*>(time));
561 
562  return dummy.str();
563 }
564 
565 bool ci_search(const std::string& s1, const std::string& s2)
566 {
567  const std::locale& locale = get_manager().get_locale();
568 
569  std::string ls1 = bl::to_lower(s1, locale);
570  std::string ls2 = bl::to_lower(s2, locale);
571 
572  return std::search(ls1.begin(), ls1.end(),
573  ls2.begin(), ls2.end()) != ls1.end();
574 }
575 
577 {
578  return std::use_facet<boost::locale::info>(get_manager().get_locale());
579 }
580 }
static catalog from_istream(std::istream &is, warning_channel_type w=warning_channel_type())
Definition: catalog.hpp:269
Declarations for File-IO.
static auto & dummy
#define ERR_G
Definition: gettext.cpp:39
#define LOG_G
Definition: gettext.cpp:37
#define DBG_G
Definition: gettext.cpp:36
const language_def & get_locale()
Definition: language.cpp:327
Standard logging facilities (interface).
CURSOR_TYPE get()
Definition: cursor.cpp:216
filesystem::scoped_istream istream_file(const std::string &fname, bool treat_failure_as_error)
static bool file_exists(const bfs::path &fpath)
Definition: filesystem.cpp:325
std::unique_ptr< std::istream > scoped_istream
Definition: filesystem.hpp:53
std::string path
Definition: filesystem.cpp:90
static log_domain dom("general")
static domain_map * domains
Definition: log.cpp:302
logger & info()
Definition: log.cpp:319
rng * generator
This generator is automatically synced during synced context.
Definition: random.cpp:60
void set_language(const std::string &language, const std::vector< std::string > *)
Definition: gettext.cpp:494
std::string egettext(char const *msgid)
Definition: gettext.cpp:429
static std::string gettext(const char *str)
Definition: gettext.hpp:60
void bind_textdomain(const char *domain, const char *directory, const char *)
Definition: gettext.cpp:479
void set_default_textdomain(const char *domain)
Definition: gettext.cpp:487
int compare(const std::string &s1, const std::string &s2)
Case-sensitive lexicographical comparison.
Definition: gettext.cpp:502
int icompare(const std::string &s1, const std::string &s2)
Case-insensitive lexicographical comparison.
Definition: gettext.cpp:519
const boost::locale::info & get_effective_locale_info()
A facet that holds general information about the effective locale.
Definition: gettext.cpp:576
std::string strftime(const std::string &format, const std::tm *time)
Definition: gettext.cpp:555
std::string dgettext(const char *domain, const char *msgid)
Definition: gettext.cpp:425
bool ci_search(const std::string &s1, const std::string &s2)
Definition: gettext.cpp:565
std::string dsgettext(const char *domainname, const char *msgid)
Definition: gettext.cpp:434
std::string dsngettext(const char *domainname, const char *singular, const char *plural, int n)
Definition: gettext.cpp:464
static void msg(const char *act, debug_info &i, const char *to="", const char *result="")
Definition: debugger.cpp:109
mock_char c
static map_location::direction n
#define e