The Battle for Wesnoth  1.17.0-dev
gettext.cpp
Go to the documentation of this file.
1 /*
2  Copyright (C) 2003 - 2018 by David White <dave@whitevine.net>
3  Part of the Battle for Wesnoth Project https://www.wesnoth.org/
4 
5  This program is free software; you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation; either version 2 of the License, or
8  (at your option) any later version.
9  This program is distributed in the hope that it will be useful,
10  but WITHOUT ANY WARRANTY.
11 
12  See the COPYING file for more details.
13 */
14 
15 #include "gettext.hpp"
16 #include "log.hpp"
17 #include "filesystem.hpp"
18 
19 #include <algorithm>
20 #include <iomanip>
21 #include <iostream>
22 #include <iterator>
23 #include <fstream>
24 #include <locale>
25 #include <map>
26 #include <mutex>
27 #include <boost/locale.hpp>
28 #include <set>
29 
30 #if defined(__GNUC__)
31 #pragma GCC diagnostic push
32 #pragma GCC diagnostic ignored "-Wmissing-field-initializers"
33 #endif
34 #include "spirit_po/spirit_po.hpp"
35 #if defined(__GNUC__)
36 #pragma GCC diagnostic pop
37 #endif
38 
39 #define DBG_G LOG_STREAM(debug, lg::general())
40 #define LOG_G LOG_STREAM(info, lg::general())
41 #define WRN_G LOG_STREAM(warn, lg::general())
42 #define ERR_G LOG_STREAM(err, lg::general())
43 
44 namespace bl = boost::locale;
45 namespace
46 {
47  std::mutex& get_mutex() { static std::mutex* m = new std::mutex(); return *m; }
48 
49  class default_utf8_locale_name
50  {
51  public:
52  static const std::string& name()
53  {
54  //Use pointers because we don't want it to be destructed at program end.
55  static default_utf8_locale_name* lname = new default_utf8_locale_name();
56  return lname->name_;
57  }
58  private:
59  default_utf8_locale_name()
60  : name_()
61  {
62  LOG_G << "Generating default locale\n";
63  try
64  {
65  //NOTE: the default_locale objects needs to live as least as long as the locale_info object. Otherwise the program will segfault.
66  std::locale default_locale = bl::generator().generate("");
67  const bl::info& locale_info = std::use_facet<bl::info>(default_locale);
68  name_ += locale_info.language();
69  if(!locale_info.country().empty())
70  name_ += "_" + locale_info.country();
71  name_ += ".UTF-8";
72  if(!locale_info.variant().empty())
73  name_ += "@" + locale_info.variant();
74  }
75  catch(const std::exception& e)
76  {
77  ERR_G << "Failed to generate default locale string. message:" << e.what() << std::endl;
78  }
79  LOG_G << "Finished generating default locale, default is now '" << name_ << "'\n";
80  }
81 
82  std::string name_;
83  };
84  class wesnoth_message_format : public bl::message_format<char>
85  {
86  public:
87  wesnoth_message_format(std::locale base, const std::set<std::string>& domains, const std::set<std::string>& paths)
88  : base_loc_(base)
89  {
90  const bl::info& inf = std::use_facet<bl::info>(base);
91  if(inf.language() == "c") {
92  return;
93  }
94  std::string lang_name_short = inf.language();
95  std::string lang_name_long = lang_name_short;
96  if(!inf.country().empty()) {
97  lang_name_long += '_';
98  lang_name_long += inf.country();
99  }
100  if(!inf.variant().empty()) {
101  lang_name_long += '@';
102  lang_name_long += inf.variant();
103  lang_name_short += '@';
104  lang_name_short += inf.variant();
105  }
106  DBG_G << "Loading po files for language " << lang_name_long << '\n';
107  for(auto& domain : domains) {
108  DBG_G << "Searching for po files for domain " << domain << '\n';
109  std::string path;
110  for(auto base_path : paths) {
111  DBG_G << "Searching in dir " << base_path << '\n';
112  if(base_path[base_path.length()-1] != '/') {
113  base_path += '/';
114  }
115  base_path += domain;
116  base_path += '/';
117  path = base_path + lang_name_long + ".po";
118  DBG_G << " Trying path " << path << '\n';
119  if(filesystem::file_exists(path)) {
120  break;
121  }
122  path = base_path + lang_name_short + ".po";
123  DBG_G << " Trying path " << path << '\n';
124  if(filesystem::file_exists(path)) {
125  break;
126  }
127  }
128  if(!filesystem::file_exists(path)) {
129  continue;
130  }
131  LOG_G << "Loading language file from " << path << '\n';
132  try {
134  po_file->exceptions(std::ios::badbit);
135  const auto& cat = spirit_po::default_catalog::from_istream(*po_file);
136  extra_messages_.emplace(get_base().domain(domain), cat);
137  } catch(const spirit_po::catalog_exception& e) {
138  // Treat any parsing error in the same way as the file not existing - just leave
139  // this domain untranslated but continue to load other domains.
140  log_po_error(lang_name_long, domain, e.what());
141  } catch(const std::ios::failure&) {
142  log_po_error(lang_name_long, domain, strerror(errno));
143  }
144  }
145  }
146 
147  static void log_po_error(const std::string& lang, const std::string& dom, const std::string& detail) {
148  ERR_G << "Error opening language file for " << lang << ", textdomain " << dom
149  << ":\n " << detail << '\n' << std::flush;
150  }
151 
152  const char* get(int domain_id, const char* ctx, const char* msg_id) const override
153  {
154  auto& base = get_base();
155  const char* msg = base.get(domain_id, ctx, msg_id);
156  if(msg == nullptr) {
157  auto iter = extra_messages_.find(domain_id);
158  if(iter == extra_messages_.end()) {
159  return nullptr;
160  }
161  auto& catalog = iter->second;
162  const char* lookup = ctx ? catalog.pgettext(ctx, msg_id) : catalog.gettext(msg_id);
163  if(lookup != msg_id) {
164  // (p)gettext returns the input pointer if the string was not found
165  msg = lookup;
166  }
167  }
168  return msg;
169  }
170 
171  const char* get(int domain_id, const char* ctx, const char* sid, int n) const override
172  {
173  auto& base = get_base();
174  const char* msg = base.get(domain_id, ctx, sid, n);
175  if(msg == nullptr) {
176  auto iter = extra_messages_.find(domain_id);
177  if(iter == extra_messages_.end()) {
178  return nullptr;
179  }
180  auto& catalog = iter->second;
181  const char* lookup = ctx ? catalog.npgettext(ctx, sid, sid, n) : catalog.ngettext(sid, sid, n);
182  if(lookup != sid) {
183  // n(p)gettext returns one of the input pointers if the string was not found
184  msg = lookup;
185  }
186  }
187  return msg;
188  }
189 
190  int domain(const std::string& domain) const override
191  {
192  auto& base = get_base();
193  return base.domain(domain);
194  }
195 
196  const char* convert(const char* msg, std::string& buffer) const override
197  {
198  auto& base = get_base();
199  return base.convert(msg, buffer);
200  }
201  private:
202  const bl::message_format<char>& get_base() const
203  {
204  return std::use_facet<bl::message_format<char>>(base_loc_);
205  }
206 
207  std::locale base_loc_;
208  std::map<int, spirit_po::default_catalog> extra_messages_;
209  };
210  struct translation_manager
211  {
212  translation_manager()
213  : loaded_paths_()
214  , loaded_domains_()
215  , current_language_(default_utf8_locale_name::name())
216  , generator_()
217  , current_locale_()
218  , is_dirty_(true)
219  {
220  const bl::localization_backend_manager& g_mgr = bl::localization_backend_manager::global();
221  for(const std::string& name : g_mgr.get_all_backends())
222  {
223  LOG_G << "Found boost locale backend: '" << name << "'\n";
224  }
225 
226  generator_.use_ansi_encoding(false);
227  generator_.categories(bl::message_facet | bl::information_facet | bl::collation_facet | bl::formatting_facet | bl::convert_facet);
228  generator_.characters(bl::char_facet);
229  // We cannot have current_locale_ be a non boost-generated locale since it might not supply
230  // the bl::info facet. As soon as we add message paths, update_locale_internal might fail,
231  // for example because of invalid .mo files. So make sure we call it at least once before adding paths/domains
232  update_locale_internal();
233  }
234 
235  void add_messages_domain(const std::string& domain)
236  {
237  if(loaded_domains_.find(domain) != loaded_domains_.end())
238  {
239  return;
240  }
241 
242  if(domain.find('/') != std::string::npos)
243  {
244  // Forward slash has a specific meaning in Boost.Locale domain
245  // names, specifying the encoding. We use UTF-8 for everything
246  // so we can't possibly support that, and odds are it's a user
247  // mistake (as in bug #23839).
248  ERR_G << "illegal textdomain name '" << domain
249  << "', skipping textdomain\n";
250  return;
251  }
252 
253  generator_.add_messages_domain(domain);
254  loaded_domains_.insert(domain);
255  }
256 
257  void add_messages_path(const std::string& path)
258  {
259  if(loaded_paths_.find(path) != loaded_paths_.end())
260  {
261  return;
262  }
263  generator_.add_messages_path(path);
264  loaded_paths_.insert(path);
265  }
266 
267  void set_default_messages_domain(const std::string& domain)
268  {
269  generator_.set_default_messages_domain(domain);
270  update_locale();
271  }
272 
273  void set_language(const std::string& language)
274  {
275  std::string::size_type at_pos = language.rfind('@');
276  if(language.empty())
277  {
278  current_language_ = default_utf8_locale_name::name();
279  }
280  else if(at_pos != std::string::npos)
281  {
282  current_language_ = language.substr(0, at_pos) + ".UTF-8" + language.substr(at_pos);
283  }
284  else
285  {
286  current_language_ = language + ".UTF-8";
287  }
288  update_locale();
289  }
290 
291  void update_locale()
292  {
293  is_dirty_ = true;
294  }
295 
296  void update_locale_internal()
297  {
298  try
299  {
300  LOG_G << "attempting to generate locale by name '" << current_language_ << "'\n";
301  current_locale_ = generator_.generate(current_language_);
302  current_locale_ = std::locale(current_locale_, new wesnoth_message_format(current_locale_, loaded_domains_, loaded_paths_));
303  const bl::info& info = std::use_facet<bl::info>(current_locale_);
304  LOG_G << "updated locale to '" << current_language_ << "' locale is now '" << current_locale_.name() << "' ( "
305  << "name='" << info.name()
306  << "' country='" << info.country()
307  << "' language='" << info.language()
308  << "' encoding='" << info.encoding()
309  << "' variant='" << info.variant() << "')\n";
310  }
311  catch(const bl::conv::conversion_error&)
312  {
313  assert(std::has_facet<bl::info>(current_locale_));
314  const bl::info& info = std::use_facet<bl::info>(current_locale_);
315  ERR_G << "Failed to update locale due to conversion error, locale is now: "
316  << "name='" << info.name()
317  << "' country='" << info.country()
318  << "' language='" << info.language()
319  << "' encoding='" << info.encoding()
320  << "' variant='" << info.variant()
321  << "'" << std::endl;
322  }
323  is_dirty_ = false;
324  }
325 
326  std::string debug_description()
327  {
328  std::stringstream res;
329  const bl::localization_backend_manager& g_mgr = bl::localization_backend_manager::global();
330  for(const std::string& name : g_mgr.get_all_backends())
331  {
332  res << "has backend: '" << name << "',";
333  }
334  if(std::has_facet<bl::info>(current_locale_)) {
335  const bl::info& info = std::use_facet<bl::info>(current_locale_);
336  res << " locale: (name='" << info.name()
337  << "' country='" << info.country()
338  << "' language='" << info.language()
339  << "' encoding='" << info.encoding()
340  << "' variant='" << info.variant()
341  << "'),";
342  }
343  if(std::has_facet<bl::collator<char>>(current_locale_)) {
344  res << "has bl::collator<char> facet, ";
345  }
346  res << "generator categories='" << generator_.categories() << "'";
347  return res.str();
348  }
349 
350  const std::locale& get_locale()
351  {
352  if(is_dirty_)
353  {
354  update_locale_internal();
355  }
356  return current_locale_;
357  }
358 
359  private:
360  std::set<std::string> loaded_paths_;
361  std::set<std::string> loaded_domains_;
362  std::string current_language_;
363  bl::generator generator_;
364  std::locale current_locale_;
365  bool is_dirty_;
366  };
367 
368  translation_manager& get_manager()
369  {
370  static translation_manager* mng = new translation_manager();
371  return *mng;
372  }
373 
374  // Converts ASCII letters to lowercase. Ignores Unicode letters.
375  std::string ascii_to_lowercase(const std::string& str)
376  {
377  std::string result;
378  result.reserve(str.length());
379  std::transform(str.begin(), str.end(), std::back_inserter(result), [](char c)
380  {
381  return c >= 'A' && c <= 'Z' ? c | 0x20 : c;
382  });
383  return result;
384  }
385 }
386 
387 namespace translation
388 {
389 
390 std::string dgettext(const char* domain, const char* msgid)
391 {
392  std::scoped_lock lock(get_mutex());
393  return bl::dgettext(domain, msgid, get_manager().get_locale());
394 }
395 std::string egettext(char const *msgid)
396 {
397  std::scoped_lock lock(get_mutex());
398  return msgid[0] == '\0' ? msgid : bl::gettext(msgid, get_manager().get_locale());
399 }
400 
401 std::string dsgettext (const char * domainname, const char *msgid)
402 {
403  std::string msgval = dgettext (domainname, msgid);
404  if (msgval == msgid) {
405  const char* firsthat = std::strchr (msgid, '^');
406  if (firsthat == nullptr)
407  msgval = msgid;
408  else
409  msgval = firsthat + 1;
410  }
411  return msgval;
412 }
413 
414 std::string dsngettext (const char * domainname, const char *singular, const char *plural, int n)
415 {
416  //TODO: only the next line needs to be in the lock.
417  std::scoped_lock lock(get_mutex());
418  std::string msgval = bl::dngettext(domainname, singular, plural, n, get_manager().get_locale());
419  if (msgval == singular) {
420  const char* firsthat = std::strchr (singular, '^');
421  if (firsthat == nullptr)
422  msgval = singular;
423  else
424  msgval = firsthat + 1;
425  }
426  return msgval;
427 }
428 
429 void bind_textdomain(const char* domain, const char* directory, const char* /*encoding*/)
430 {
431  LOG_G << "adding textdomain '" << domain << "' in directory '" << directory << "'\n";
432  std::scoped_lock lock(get_mutex());
433  get_manager().add_messages_domain(domain);
434  get_manager().add_messages_path(directory);
435  get_manager().update_locale();
436 }
437 
438 void set_default_textdomain(const char* domain)
439 {
440  LOG_G << "set_default_textdomain: '" << domain << "'\n";
441  std::scoped_lock lock(get_mutex());
442  get_manager().set_default_messages_domain(domain);
443 }
444 
445 
446 void set_language(const std::string& language, const std::vector<std::string>* /*alternates*/)
447 {
448  // why should we need alternates? which languages we support should only be related
449  // to which languages we ship with and not which the os supports
450  LOG_G << "setting language to '" << language << "' \n";
451  std::scoped_lock lock(get_mutex());
452  get_manager().set_language(language);
453 }
454 
455 int compare(const std::string& s1, const std::string& s2)
456 {
457  std::scoped_lock lock(get_mutex());
458 
459  try {
460  return std::use_facet<std::collate<char>>(get_manager().get_locale()).compare(s1.c_str(), s1.c_str() + s1.size(), s2.c_str(), s2.c_str() + s2.size());
461  } catch(const std::bad_cast&) {
462  static bool bad_cast_once = false;
463 
464  if(!bad_cast_once) {
465  ERR_G << "locale set-up for compare() is broken, falling back to std::string::compare()\n";
466  bad_cast_once = true;
467  }
468 
469  return s1.compare(s2);
470  }
471 }
472 
473 int icompare(const std::string& s1, const std::string& s2)
474 {
475  // todo: maybe we should replace this preprocessor check with a std::has_facet<bl::collator<char>> check?
476 #ifdef __APPLE__
477  // https://github.com/wesnoth/wesnoth/issues/2094
478  return compare(ascii_to_lowercase(s1), ascii_to_lowercase(s2));
479 #else
480  std::scoped_lock lock(get_mutex());
481 
482  try {
483  return std::use_facet<bl::collator<char>>(get_manager().get_locale()).compare(
484  bl::collator_base::secondary, s1, s2);
485  } catch(const std::bad_cast&) {
486  static bool bad_cast_once = false;
487 
488  if(!bad_cast_once) {
489  ERR_G << "locale set-up for icompare() is broken, falling back to std::string::compare()\n";
490 
491  try { //just to be safe.
492  ERR_G << get_manager().debug_description() << "\n";
493  } catch (const std::exception& e) {
494  ERR_G << e.what() << "\n";
495  }
496  bad_cast_once = true;
497  }
498 
499  // Let's convert at least ASCII letters to lowercase to get a somewhat case-insensitive comparison.
500  return ascii_to_lowercase(s1).compare(ascii_to_lowercase(s2));
501  }
502 #endif
503 }
504 
505 std::string strftime(const std::string& format, const std::tm* time)
506 {
507  std::basic_ostringstream<char> dummy;
508  std::scoped_lock lock(get_mutex());
509  dummy.imbue(get_manager().get_locale()); // TODO: Calling imbue() with hard-coded locale appears to work with put_time in glibc, but not with get_locale()...
510  // Revert to use of boost (from 1.14) instead of std::put_time() because the latter does not appear to handle locale properly in Linux
511  dummy << bl::as::ftime(format) << mktime(const_cast<std::tm*>(time));
512 
513  return dummy.str();
514 }
515 
516 bool ci_search(const std::string& s1, const std::string& s2)
517 {
518  std::scoped_lock lock(get_mutex());
519  const std::locale& locale = get_manager().get_locale();
520 
521  std::string ls1 = bl::to_lower(s1, locale);
522  std::string ls2 = bl::to_lower(s2, locale);
523 
524  return std::search(ls1.begin(), ls1.end(),
525  ls2.begin(), ls2.end()) != ls1.end();
526 }
527 
529 {
530  std::scoped_lock lock(get_mutex());
531  return std::use_facet<boost::locale::info>(get_manager().get_locale());
532 }
533 }
static log_domain dom("general")
void bind_textdomain(const char *domain, const char *directory, const char *)
Definition: gettext.cpp:429
static domain_map * domains
Definition: log.cpp:71
int dummy
Definition: lstrlib.cpp:1347
logger & info()
Definition: log.cpp:88
int compare(const std::string &s1, const std::string &s2)
Case-sensitive lexicographical comparison.
Definition: gettext.cpp:455
static bool file_exists(const bfs::path &fpath)
Definition: filesystem.cpp:262
const language_def & get_locale()
Definition: language.cpp:328
filesystem::scoped_istream istream_file(const std::string &fname, bool treat_failure_as_error)
void set_language(const std::string &language, const std::vector< std::string > *)
Definition: gettext.cpp:446
std::string dsngettext(const char *domainname, const char *singular, const char *plural, int n)
Definition: gettext.cpp:414
std::string dgettext(const char *domain, const char *msgid)
Definition: gettext.cpp:390
static void msg(const char *act, debug_info &i, const char *to="", const char *result="")
Definition: debugger.cpp:109
std::string dsgettext(const char *domainname, const char *msgid)
Definition: gettext.cpp:401
std::string strftime(const std::string &format, const std::tm *time)
Definition: gettext.cpp:505
#define LOG_G
Definition: gettext.cpp:40
std::unique_ptr< std::istream > scoped_istream
Definition: filesystem.hpp:38
std::string path
Definition: game_config.cpp:38
void set_default_textdomain(const char *domain)
Definition: gettext.cpp:438
std::string egettext(char const *msgid)
Definition: gettext.cpp:395
bool ci_search(const std::string &s1, const std::string &s2)
Definition: gettext.cpp:516
static std::string gettext(const char *str)
Definition: gettext.hpp:59
#define ERR_G
Definition: gettext.cpp:42
static std::string flush(std::ostringstream &s)
Definition: reports.cpp:91
std::string language()
Definition: general.cpp:494
Declarations for File-IO.
static catalog from_istream(std::istream &is, warning_channel_type w=warning_channel_type())
Definition: catalog.hpp:269
rng * generator
This generator is automatically synced during synced context.
Definition: random.cpp:60
int icompare(const std::string &s1, const std::string &s2)
Case-insensitive lexicographical comparison.
Definition: gettext.cpp:473
Standard logging facilities (interface).
#define DBG_G
Definition: gettext.cpp:39
#define e
mock_char c
static map_location::DIRECTION n
const boost::locale::info & get_effective_locale_info()
A facet that holds general information about the effective locale.
Definition: gettext.cpp:528