The Battle for Wesnoth  1.15.5+dev
gettext.cpp
Go to the documentation of this file.
1 /*
2  Copyright (C) 2003 - 2018 by David White <dave@whitevine.net>
3  Part of the Battle for Wesnoth Project https://www.wesnoth.org/
4 
5  This program is free software; you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation; either version 2 of the License, or
8  (at your option) any later version.
9  This program is distributed in the hope that it will be useful,
10  but WITHOUT ANY WARRANTY.
11 
12  See the COPYING file for more details.
13 */
14 
15 #include "global.hpp"
16 #include "gettext.hpp"
17 #include "log.hpp"
18 #include "filesystem.hpp"
19 
20 #include <algorithm>
21 #include <iomanip>
22 #include <iostream>
23 #include <iterator>
24 #include <fstream>
25 #include <locale>
26 #include <mutex>
27 #include <boost/locale.hpp>
28 #include <set>
29 
30 #if defined(__GNUC__)
31 #pragma GCC diagnostic push
32 #pragma GCC diagnostic ignored "-Wmissing-field-initializers"
33 #endif
34 #include "spirit_po/spirit_po.hpp"
35 #if defined(__GNUC__)
36 #pragma GCC diagnostic pop
37 #endif
38 
39 #define DBG_G LOG_STREAM(debug, lg::general())
40 #define LOG_G LOG_STREAM(info, lg::general())
41 #define WRN_G LOG_STREAM(warn, lg::general())
42 #define ERR_G LOG_STREAM(err, lg::general())
43 
44 namespace bl = boost::locale;
45 namespace
46 {
47  std::mutex& get_mutex() { static std::mutex* m = new std::mutex(); return *m; }
48 
49  class default_utf8_locale_name
50  {
51  public:
52  static const std::string& name()
53  {
54  //Use pointers because we don't want it to be destructed at program end.
55  static default_utf8_locale_name* lname = new default_utf8_locale_name();
56  return lname->name_;
57  }
58  private:
59  default_utf8_locale_name()
60  : name_()
61  {
62  LOG_G << "Generating default locale\n";
63  try
64  {
65  //NOTE: the default_locale objects needs to live as least as long as the locale_info object. Otherwise the program will segfault.
66  std::locale default_locale = bl::generator().generate("");
67  const bl::info& locale_info = std::use_facet<bl::info>(default_locale);
68  name_ += locale_info.language();
69  if(!locale_info.country().empty())
70  name_ += "_" + locale_info.country();
71  name_ += ".UTF-8";
72  if(!locale_info.variant().empty())
73  name_ += "@" + locale_info.variant();
74  }
75  catch(const std::exception& e)
76  {
77  ERR_G << "Failed to generate default locale string. message:" << e.what() << std::endl;
78  }
79  LOG_G << "Finished generating default locale, default is now '" << name_ << "'\n";
80  }
81 
82  std::string name_;
83  };
84  class wesnoth_message_format : public bl::message_format<char>
85  {
86  using po_catalog = spirit_po::catalog<>;
87  public:
88  wesnoth_message_format(std::locale base, const std::set<std::string>& domains, const std::set<std::string>& paths)
89  : base_loc_(base)
90  {
91  const bl::info& inf = std::use_facet<bl::info>(base);
92  if(inf.language() == "c") {
93  return;
94  }
95  std::string lang_name_short = inf.language();
96  std::string lang_name_long = lang_name_short;
97  if(!inf.country().empty()) {
98  lang_name_long += '_';
99  lang_name_long += inf.country();
100  }
101  if(!inf.variant().empty()) {
102  lang_name_long += '@';
103  lang_name_long += inf.variant();
104  lang_name_short += '@';
105  lang_name_short += inf.variant();
106  }
107  DBG_G << "Loading po files for language " << lang_name_long << '\n';
108  for(auto& domain : domains) {
109  DBG_G << "Searching for po files for domain " << domain << '\n';
111  for(auto base_path : paths) {
112  DBG_G << "Searching in dir " << base_path << '\n';
113  if(base_path[base_path.length()-1] != '/') {
114  base_path += '/';
115  }
116  base_path += domain;
117  base_path += '/';
118  path = base_path + lang_name_long + ".po";
119  DBG_G << " Trying path " << path << '\n';
120  if(filesystem::file_exists(path)) {
121  break;
122  }
123  path = base_path + lang_name_short + ".po";
124  DBG_G << " Trying path " << path << '\n';
125  if(filesystem::file_exists(path)) {
126  break;
127  }
128  }
129  if(!filesystem::file_exists(path)) {
130  continue;
131  }
132  LOG_G << "Loading language file from " << path << '\n';
133  try {
135  po_file->exceptions(std::ios::badbit);
136  const po_catalog& cat = po_catalog::from_istream(*po_file);
137  extra_messages_.emplace(get_base().domain(domain), cat);
138  } catch(const spirit_po::catalog_exception& e) {
139  throw_po_error(lang_name_long, domain, e.what());
140  } catch(const std::ios::failure&) {
141  throw_po_error(lang_name_long, domain, strerror(errno));
142  }
143  }
144  }
145 
146  [[noreturn]] static void throw_po_error(const std::string& lang, const std::string& dom, const std::string& detail) {
147  std::ostringstream err;
148  err << "Error opening language file for " << lang << ", textdomain " << dom
149  << ":\n " << detail << '\n';
150  ERR_G << err.rdbuf() << std::flush;
151  throw game::error(err.str());
152  }
153 
154  const char* get(int domain_id, const char* ctx, const char* msg_id) const override
155  {
156  auto& base = get_base();
157  const char* msg = base.get(domain_id, ctx, msg_id);
158  if(msg == nullptr) {
159  auto iter = extra_messages_.find(domain_id);
160  if(iter == extra_messages_.end()) {
161  return nullptr;
162  }
163  auto& catalog = iter->second;
164  const char* lookup = ctx ? catalog.pgettext(ctx, msg_id) : catalog.gettext(msg_id);
165  if(lookup != msg_id) {
166  // (p)gettext returns the input pointer if the string was not found
167  msg = lookup;
168  }
169  }
170  return msg;
171  }
172 
173  const char* get(int domain_id, const char* ctx, const char* sid, int n) const override
174  {
175  auto& base = get_base();
176  const char* msg = base.get(domain_id, ctx, sid, n);
177  if(msg == nullptr) {
178  auto iter = extra_messages_.find(domain_id);
179  if(iter == extra_messages_.end()) {
180  return nullptr;
181  }
182  auto& catalog = iter->second;
183  const char* lookup = ctx ? catalog.npgettext(ctx, sid, sid, n) : catalog.ngettext(sid, sid, n);
184  if(lookup != sid) {
185  // n(p)gettext returns one of the input pointers if the string was not found
186  msg = lookup;
187  }
188  }
189  return msg;
190  }
191 
192  int domain(const std::string& domain) const override
193  {
194  auto& base = get_base();
195  return base.domain(domain);
196  }
197 
198  const char* convert(const char* msg, std::string& buffer) const override
199  {
200  auto& base = get_base();
201  return base.convert(msg, buffer);
202  }
203  private:
204  const bl::message_format<char>& get_base() const
205  {
206  return std::use_facet<bl::message_format<char>>(base_loc_);
207  }
208 
209  std::locale base_loc_;
210  std::map<int, po_catalog> extra_messages_;
211  };
212  struct translation_manager
213  {
214  translation_manager()
215  : loaded_paths_()
216  , loaded_domains_()
217  , current_language_(default_utf8_locale_name::name())
218  , generator_()
219  , current_locale_()
220  , is_dirty_(true)
221  {
222  const bl::localization_backend_manager& g_mgr = bl::localization_backend_manager::global();
223  for(const std::string& name : g_mgr.get_all_backends())
224  {
225  LOG_G << "Found boost locale backend: '" << name << "'\n";
226  }
227 
228  generator_.use_ansi_encoding(false);
229  generator_.categories(bl::message_facet | bl::information_facet | bl::collation_facet | bl::formatting_facet | bl::convert_facet);
230  generator_.characters(bl::char_facet);
231  // We cannot have current_locale_ be a non boost-generated locale since it might not supply
232  // the bl::info facet. As soon as we add message paths, update_locale_internal might fail,
233  // for example because of invalid .mo files. So make sure we call it at least once before adding paths/domains
234  update_locale_internal();
235  }
236 
237  void add_messages_domain(const std::string& domain)
238  {
239  if(loaded_domains_.find(domain) != loaded_domains_.end())
240  {
241  return;
242  }
243 
244  if(domain.find('/') != std::string::npos)
245  {
246  // Forward slash has a specific meaning in Boost.Locale domain
247  // names, specifying the encoding. We use UTF-8 for everything
248  // so we can't possibly support that, and odds are it's a user
249  // mistake (as in bug #23839).
250  ERR_G << "illegal textdomain name '" << domain
251  << "', skipping textdomain\n";
252  return;
253  }
254 
255  generator_.add_messages_domain(domain);
256  loaded_domains_.insert(domain);
257  }
258 
259  void add_messages_path(const std::string& path)
260  {
261  if(loaded_paths_.find(path) != loaded_paths_.end())
262  {
263  return;
264  }
265  generator_.add_messages_path(path);
266  loaded_paths_.insert(path);
267  }
268 
269  void set_default_messages_domain(const std::string& domain)
270  {
271  generator_.set_default_messages_domain(domain);
272  update_locale();
273  }
274 
275  void set_language(const std::string& language)
276  {
277  std::string::size_type at_pos = language.rfind('@');
278  if(language.empty())
279  {
280  current_language_ = default_utf8_locale_name::name();
281  }
282  else if(at_pos != std::string::npos)
283  {
284  current_language_ = language.substr(0, at_pos) + ".UTF-8" + language.substr(at_pos);
285  }
286  else
287  {
288  current_language_ = language + ".UTF-8";
289  }
290  update_locale();
291  }
292 
293  void update_locale()
294  {
295  is_dirty_ = true;
296  }
297 
298  void update_locale_internal()
299  {
300  try
301  {
302  LOG_G << "attempting to generate locale by name '" << current_language_ << "'\n";
303  current_locale_ = generator_.generate(current_language_);
304  current_locale_ = std::locale(current_locale_, new wesnoth_message_format(current_locale_, loaded_domains_, loaded_paths_));
305  const bl::info& info = std::use_facet<bl::info>(current_locale_);
306  LOG_G << "updated locale to '" << current_language_ << "' locale is now '" << current_locale_.name() << "' ( "
307  << "name='" << info.name()
308  << "' country='" << info.country()
309  << "' language='" << info.language()
310  << "' encoding='" << info.encoding()
311  << "' variant='" << info.variant() << "')\n";
312  }
313  catch(const bl::conv::conversion_error&)
314  {
315  assert(std::has_facet<bl::info>(current_locale_));
316  const bl::info& info = std::use_facet<bl::info>(current_locale_);
317  ERR_G << "Failed to update locale due to conversion error, locale is now: "
318  << "name='" << info.name()
319  << "' country='" << info.country()
320  << "' language='" << info.language()
321  << "' encoding='" << info.encoding()
322  << "' variant='" << info.variant()
323  << "'" << std::endl;
324  }
325  is_dirty_ = false;
326  }
327 
328  std::string debug_description()
329  {
330  std::stringstream res;
331  const bl::localization_backend_manager& g_mgr = bl::localization_backend_manager::global();
332  for(const std::string& name : g_mgr.get_all_backends())
333  {
334  res << "has backend: '" << name << "',";
335  }
336  if(std::has_facet<bl::info>(current_locale_)) {
337  const bl::info& info = std::use_facet<bl::info>(current_locale_);
338  res << " locale: (name='" << info.name()
339  << "' country='" << info.country()
340  << "' language='" << info.language()
341  << "' encoding='" << info.encoding()
342  << "' variant='" << info.variant()
343  << "'),";
344  }
345  if(std::has_facet<bl::collator<char>>(current_locale_)) {
346  res << "has bl::collator<char> facet, ";
347  }
348  res << "generator categories='" << generator_.categories() << "'";
349  return res.str();
350  }
351 
352  const std::locale& get_locale()
353  {
354  if(is_dirty_)
355  {
356  update_locale_internal();
357  }
358  return current_locale_;
359  }
360 
361  private:
362  std::set<std::string> loaded_paths_;
363  std::set<std::string> loaded_domains_;
364  std::string current_language_;
365  bl::generator generator_;
366  std::locale current_locale_;
367  bool is_dirty_;
368  };
369 
370  translation_manager& get_manager()
371  {
372  static translation_manager* mng = new translation_manager();
373  return *mng;
374  }
375 
376  // Converts ASCII letters to lowercase. Ignores Unicode letters.
377  std::string ascii_to_lowercase(const std::string& str)
378  {
379  std::string result;
380  result.reserve(str.length());
381  std::transform(str.begin(), str.end(), std::back_inserter(result), [](char c)
382  {
383  return c >= 'A' && c <= 'Z' ? c | 0x20 : c;
384  });
385  return result;
386  }
387 }
388 
389 namespace translation
390 {
391 
392 std::string dgettext(const char* domain, const char* msgid)
393 {
394  std::lock_guard<std::mutex> lock(get_mutex());
395  return bl::dgettext(domain, msgid, get_manager().get_locale());
396 }
397 std::string egettext(char const *msgid)
398 {
399  std::lock_guard<std::mutex> lock(get_mutex());
400  return msgid[0] == '\0' ? msgid : bl::gettext(msgid, get_manager().get_locale());
401 }
402 
403 std::string dsgettext (const char * domainname, const char *msgid)
404 {
405  std::string msgval = dgettext (domainname, msgid);
406  if (msgval == msgid) {
407  const char* firsthat = std::strchr (msgid, '^');
408  if (firsthat == nullptr)
409  msgval = msgid;
410  else
411  msgval = firsthat + 1;
412  }
413  return msgval;
414 }
415 
416 std::string dsngettext (const char * domainname, const char *singular, const char *plural, int n)
417 {
418  //TODO: only the next line needs to be in the lock.
419  std::lock_guard<std::mutex> lock(get_mutex());
420  std::string msgval = bl::dngettext(domainname, singular, plural, n, get_manager().get_locale());
421  if (msgval == singular) {
422  const char* firsthat = std::strchr (singular, '^');
423  if (firsthat == nullptr)
424  msgval = singular;
425  else
426  msgval = firsthat + 1;
427  }
428  return msgval;
429 }
430 
431 void bind_textdomain(const char* domain, const char* directory, const char* /*encoding*/)
432 {
433  LOG_G << "adding textdomain '" << domain << "' in directory '" << directory << "'\n";
434  std::lock_guard<std::mutex> lock(get_mutex());
435  get_manager().add_messages_domain(domain);
436  get_manager().add_messages_path(directory);
437  get_manager().update_locale();
438 }
439 
440 void set_default_textdomain(const char* domain)
441 {
442  LOG_G << "set_default_textdomain: '" << domain << "'\n";
443  std::lock_guard<std::mutex> lock(get_mutex());
444  get_manager().set_default_messages_domain(domain);
445 }
446 
447 
448 void set_language(const std::string& language, const std::vector<std::string>* /*alternates*/)
449 {
450  // why should we need alternates? which languages we support should only be related
451  // to which languages we ship with and not which the os supports
452  LOG_G << "setting language to '" << language << "' \n";
453  std::lock_guard<std::mutex> lock(get_mutex());
454  get_manager().set_language(language);
455 }
456 
457 int compare(const std::string& s1, const std::string& s2)
458 {
459  std::lock_guard<std::mutex> lock(get_mutex());
460 
461  try {
462  return std::use_facet<std::collate<char>>(get_manager().get_locale()).compare(s1.c_str(), s1.c_str() + s1.size(), s2.c_str(), s2.c_str() + s2.size());
463  } catch(const std::bad_cast&) {
464  static bool bad_cast_once = false;
465 
466  if(!bad_cast_once) {
467  ERR_G << "locale set-up for compare() is broken, falling back to std::string::compare()\n";
468  bad_cast_once = true;
469  }
470 
471  return s1.compare(s2);
472  }
473 }
474 
475 int icompare(const std::string& s1, const std::string& s2)
476 {
477  // todo: maybe we should replace this preprocessor check with a std::has_facet<bl::collator<char>> check?
478 #ifdef __APPLE__
479  // https://github.com/wesnoth/wesnoth/issues/2094
480  return compare(ascii_to_lowercase(s1), ascii_to_lowercase(s2));
481 #else
482  std::lock_guard<std::mutex> lock(get_mutex());
483 
484  try {
485  return std::use_facet<bl::collator<char>>(get_manager().get_locale()).compare(
486  bl::collator_base::secondary, s1, s2);
487  } catch(const std::bad_cast&) {
488  static bool bad_cast_once = false;
489 
490  if(!bad_cast_once) {
491  ERR_G << "locale set-up for icompare() is broken, falling back to std::string::compare()\n";
492 
493  try { //just to be safe.
494  ERR_G << get_manager().debug_description() << "\n";
495  } catch (const std::exception& e) {
496  ERR_G << e.what() << "\n";
497  }
498  bad_cast_once = true;
499  }
500 
501  // Let's convert at least ASCII letters to lowercase to get a somewhat case-insensitive comparison.
502  return ascii_to_lowercase(s1).compare(ascii_to_lowercase(s2));
503  }
504 #endif
505 }
506 
507 std::string strftime(const std::string& format, const std::tm* time)
508 {
509  std::basic_ostringstream<char> dummy;
510  std::lock_guard<std::mutex> lock(get_mutex());
511  dummy.imbue(get_manager().get_locale()); // TODO: Calling imbue() with hard-coded locale appears to work with put_time in glibc, but not with get_locale()...
512  // Revert to use of boost (from 1.14) instead of std::put_time() because the latter does not appear to handle locale properly in Linux
513  dummy << bl::as::ftime(format) << mktime(const_cast<std::tm*>(time));
514 
515  return dummy.str();
516 }
517 
518 bool ci_search(const std::string& s1, const std::string& s2)
519 {
520  std::lock_guard<std::mutex> lock(get_mutex());
521  const std::locale& locale = get_manager().get_locale();
522 
523  std::string ls1 = bl::to_lower(s1, locale);
524  std::string ls2 = bl::to_lower(s2, locale);
525 
526  return std::search(ls1.begin(), ls1.end(),
527  ls2.begin(), ls2.end()) != ls1.end();
528 }
529 
531 {
532  std::lock_guard<std::mutex> lock(get_mutex());
533  return std::use_facet<boost::locale::info>(get_manager().get_locale());
534 }
535 }
static log_domain dom("general")
void bind_textdomain(const char *domain, const char *directory, const char *)
Definition: gettext.cpp:431
static domain_map * domains
Definition: log.cpp:73
int dummy
Definition: lstrlib.cpp:1125
static l_noret error(LoadState *S, const char *why)
Definition: lundump.cpp:39
logger & info()
Definition: log.cpp:90
int compare(const std::string &s1, const std::string &s2)
Case-sensitive lexicographical comparison.
Definition: gettext.cpp:457
static bool file_exists(const bfs::path &fpath)
Definition: filesystem.cpp:267
const language_def & get_locale()
Definition: language.cpp:318
filesystem::scoped_istream istream_file(const std::string &fname, bool treat_failure_as_error)
Definition: filesystem.cpp:993
void set_language(const std::string &language, const std::vector< std::string > *)
Definition: gettext.cpp:448
std::string dsngettext(const char *domainname, const char *singular, const char *plural, int n)
Definition: gettext.cpp:416
std::string dgettext(const char *domain, const char *msgid)
Definition: gettext.cpp:392
static void msg(const char *act, debug_info &i, const char *to="", const char *result="")
Definition: debugger.cpp:109
std::string str
Definition: statement.cpp:110
std::string dsgettext(const char *domainname, const char *msgid)
Definition: gettext.cpp:403
std::string strftime(const std::string &format, const std::tm *time)
Definition: gettext.cpp:507
#define LOG_G
Definition: gettext.cpp:40
std::unique_ptr< std::istream > scoped_istream
Definition: filesystem.hpp:39
std::string path
Definition: game_config.cpp:39
void set_default_textdomain(const char *domain)
Definition: gettext.cpp:440
static UNUSEDNOWARN std::string gettext(const char *str)
Definition: gettext.hpp:67
std::string egettext(char const *msgid)
Definition: gettext.cpp:397
bool ci_search(const std::string &s1, const std::string &s2)
Definition: gettext.cpp:518
logger & err()
Definition: log.cpp:78
#define ERR_G
Definition: gettext.cpp:42
static std::string flush(std::ostringstream &s)
Definition: reports.cpp:91
std::string language()
Definition: general.cpp:476
Declarations for File-IO.
rng * generator
This generator is automatically synced during synced context.
Definition: random.cpp:60
int icompare(const std::string &s1, const std::string &s2)
Case-insensitive lexicographical comparison.
Definition: gettext.cpp:475
Standard logging facilities (interface).
#define DBG_G
Definition: gettext.cpp:39
#define e
mock_char c
static map_location::DIRECTION n
const boost::locale::info & get_effective_locale_info()
A facet that holds general information about the effective locale.
Definition: gettext.cpp:530