The Battle for Wesnoth  1.15.11+dev
gettext.cpp
Go to the documentation of this file.
1 /*
2  Copyright (C) 2003 - 2018 by David White <dave@whitevine.net>
3  Part of the Battle for Wesnoth Project https://www.wesnoth.org/
4 
5  This program is free software; you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation; either version 2 of the License, or
8  (at your option) any later version.
9  This program is distributed in the hope that it will be useful,
10  but WITHOUT ANY WARRANTY.
11 
12  See the COPYING file for more details.
13 */
14 
15 #include "gettext.hpp"
16 #include "log.hpp"
17 #include "filesystem.hpp"
18 
19 #include <algorithm>
20 #include <iomanip>
21 #include <iostream>
22 #include <iterator>
23 #include <fstream>
24 #include <locale>
25 #include <map>
26 #include <mutex>
27 #include <boost/locale.hpp>
28 #include <set>
29 
30 #if defined(__GNUC__)
31 #pragma GCC diagnostic push
32 #pragma GCC diagnostic ignored "-Wmissing-field-initializers"
33 #endif
34 #include "spirit_po/spirit_po.hpp"
35 #if defined(__GNUC__)
36 #pragma GCC diagnostic pop
37 #endif
38 
39 #define DBG_G LOG_STREAM(debug, lg::general())
40 #define LOG_G LOG_STREAM(info, lg::general())
41 #define WRN_G LOG_STREAM(warn, lg::general())
42 #define ERR_G LOG_STREAM(err, lg::general())
43 
44 namespace bl = boost::locale;
45 namespace
46 {
47  std::mutex& get_mutex() { static std::mutex* m = new std::mutex(); return *m; }
48 
49  class default_utf8_locale_name
50  {
51  public:
52  static const std::string& name()
53  {
54  //Use pointers because we don't want it to be destructed at program end.
55  static default_utf8_locale_name* lname = new default_utf8_locale_name();
56  return lname->name_;
57  }
58  private:
59  default_utf8_locale_name()
60  : name_()
61  {
62  LOG_G << "Generating default locale\n";
63  try
64  {
65  //NOTE: the default_locale objects needs to live as least as long as the locale_info object. Otherwise the program will segfault.
66  std::locale default_locale = bl::generator().generate("");
67  const bl::info& locale_info = std::use_facet<bl::info>(default_locale);
68  name_ += locale_info.language();
69  if(!locale_info.country().empty())
70  name_ += "_" + locale_info.country();
71  name_ += ".UTF-8";
72  if(!locale_info.variant().empty())
73  name_ += "@" + locale_info.variant();
74  }
75  catch(const std::exception& e)
76  {
77  ERR_G << "Failed to generate default locale string. message:" << e.what() << std::endl;
78  }
79  LOG_G << "Finished generating default locale, default is now '" << name_ << "'\n";
80  }
81 
82  std::string name_;
83  };
84  class wesnoth_message_format : public bl::message_format<char>
85  {
86  public:
87  wesnoth_message_format(std::locale base, const std::set<std::string>& domains, const std::set<std::string>& paths)
88  : base_loc_(base)
89  {
90  const bl::info& inf = std::use_facet<bl::info>(base);
91  if(inf.language() == "c") {
92  return;
93  }
94  std::string lang_name_short = inf.language();
95  std::string lang_name_long = lang_name_short;
96  if(!inf.country().empty()) {
97  lang_name_long += '_';
98  lang_name_long += inf.country();
99  }
100  if(!inf.variant().empty()) {
101  lang_name_long += '@';
102  lang_name_long += inf.variant();
103  lang_name_short += '@';
104  lang_name_short += inf.variant();
105  }
106  DBG_G << "Loading po files for language " << lang_name_long << '\n';
107  for(auto& domain : domains) {
108  DBG_G << "Searching for po files for domain " << domain << '\n';
109  std::string path;
110  for(auto base_path : paths) {
111  DBG_G << "Searching in dir " << base_path << '\n';
112  if(base_path[base_path.length()-1] != '/') {
113  base_path += '/';
114  }
115  base_path += domain;
116  base_path += '/';
117  path = base_path + lang_name_long + ".po";
118  DBG_G << " Trying path " << path << '\n';
119  if(filesystem::file_exists(path)) {
120  break;
121  }
122  path = base_path + lang_name_short + ".po";
123  DBG_G << " Trying path " << path << '\n';
124  if(filesystem::file_exists(path)) {
125  break;
126  }
127  }
128  if(!filesystem::file_exists(path)) {
129  continue;
130  }
131  LOG_G << "Loading language file from " << path << '\n';
132  try {
134  po_file->exceptions(std::ios::badbit);
135  const auto& cat = spirit_po::default_catalog::from_istream(*po_file);
136  extra_messages_.emplace(get_base().domain(domain), cat);
137  } catch(const spirit_po::catalog_exception& e) {
138  throw_po_error(lang_name_long, domain, e.what());
139  } catch(const std::ios::failure&) {
140  throw_po_error(lang_name_long, domain, strerror(errno));
141  }
142  }
143  }
144 
145  [[noreturn]] static void throw_po_error(const std::string& lang, const std::string& dom, const std::string& detail) {
146  std::ostringstream err;
147  err << "Error opening language file for " << lang << ", textdomain " << dom
148  << ":\n " << detail << '\n';
149  ERR_G << err.rdbuf() << std::flush;
150  throw game::error(err.str());
151  }
152 
153  const char* get(int domain_id, const char* ctx, const char* msg_id) const override
154  {
155  auto& base = get_base();
156  const char* msg = base.get(domain_id, ctx, msg_id);
157  if(msg == nullptr) {
158  auto iter = extra_messages_.find(domain_id);
159  if(iter == extra_messages_.end()) {
160  return nullptr;
161  }
162  auto& catalog = iter->second;
163  const char* lookup = ctx ? catalog.pgettext(ctx, msg_id) : catalog.gettext(msg_id);
164  if(lookup != msg_id) {
165  // (p)gettext returns the input pointer if the string was not found
166  msg = lookup;
167  }
168  }
169  return msg;
170  }
171 
172  const char* get(int domain_id, const char* ctx, const char* sid, int n) const override
173  {
174  auto& base = get_base();
175  const char* msg = base.get(domain_id, ctx, sid, n);
176  if(msg == nullptr) {
177  auto iter = extra_messages_.find(domain_id);
178  if(iter == extra_messages_.end()) {
179  return nullptr;
180  }
181  auto& catalog = iter->second;
182  const char* lookup = ctx ? catalog.npgettext(ctx, sid, sid, n) : catalog.ngettext(sid, sid, n);
183  if(lookup != sid) {
184  // n(p)gettext returns one of the input pointers if the string was not found
185  msg = lookup;
186  }
187  }
188  return msg;
189  }
190 
191  int domain(const std::string& domain) const override
192  {
193  auto& base = get_base();
194  return base.domain(domain);
195  }
196 
197  const char* convert(const char* msg, std::string& buffer) const override
198  {
199  auto& base = get_base();
200  return base.convert(msg, buffer);
201  }
202  private:
203  const bl::message_format<char>& get_base() const
204  {
205  return std::use_facet<bl::message_format<char>>(base_loc_);
206  }
207 
208  std::locale base_loc_;
209  std::map<int, spirit_po::default_catalog> extra_messages_;
210  };
211  struct translation_manager
212  {
213  translation_manager()
214  : loaded_paths_()
215  , loaded_domains_()
216  , current_language_(default_utf8_locale_name::name())
217  , generator_()
218  , current_locale_()
219  , is_dirty_(true)
220  {
221  const bl::localization_backend_manager& g_mgr = bl::localization_backend_manager::global();
222  for(const std::string& name : g_mgr.get_all_backends())
223  {
224  LOG_G << "Found boost locale backend: '" << name << "'\n";
225  }
226 
227  generator_.use_ansi_encoding(false);
228  generator_.categories(bl::message_facet | bl::information_facet | bl::collation_facet | bl::formatting_facet | bl::convert_facet);
229  generator_.characters(bl::char_facet);
230  // We cannot have current_locale_ be a non boost-generated locale since it might not supply
231  // the bl::info facet. As soon as we add message paths, update_locale_internal might fail,
232  // for example because of invalid .mo files. So make sure we call it at least once before adding paths/domains
233  update_locale_internal();
234  }
235 
236  void add_messages_domain(const std::string& domain)
237  {
238  if(loaded_domains_.find(domain) != loaded_domains_.end())
239  {
240  return;
241  }
242 
243  if(domain.find('/') != std::string::npos)
244  {
245  // Forward slash has a specific meaning in Boost.Locale domain
246  // names, specifying the encoding. We use UTF-8 for everything
247  // so we can't possibly support that, and odds are it's a user
248  // mistake (as in bug #23839).
249  ERR_G << "illegal textdomain name '" << domain
250  << "', skipping textdomain\n";
251  return;
252  }
253 
254  generator_.add_messages_domain(domain);
255  loaded_domains_.insert(domain);
256  }
257 
258  void add_messages_path(const std::string& path)
259  {
260  if(loaded_paths_.find(path) != loaded_paths_.end())
261  {
262  return;
263  }
264  generator_.add_messages_path(path);
265  loaded_paths_.insert(path);
266  }
267 
268  void set_default_messages_domain(const std::string& domain)
269  {
270  generator_.set_default_messages_domain(domain);
271  update_locale();
272  }
273 
274  void set_language(const std::string& language)
275  {
276  std::string::size_type at_pos = language.rfind('@');
277  if(language.empty())
278  {
279  current_language_ = default_utf8_locale_name::name();
280  }
281  else if(at_pos != std::string::npos)
282  {
283  current_language_ = language.substr(0, at_pos) + ".UTF-8" + language.substr(at_pos);
284  }
285  else
286  {
287  current_language_ = language + ".UTF-8";
288  }
289  update_locale();
290  }
291 
292  void update_locale()
293  {
294  is_dirty_ = true;
295  }
296 
297  void update_locale_internal()
298  {
299  try
300  {
301  LOG_G << "attempting to generate locale by name '" << current_language_ << "'\n";
302  current_locale_ = generator_.generate(current_language_);
303  current_locale_ = std::locale(current_locale_, new wesnoth_message_format(current_locale_, loaded_domains_, loaded_paths_));
304  const bl::info& info = std::use_facet<bl::info>(current_locale_);
305  LOG_G << "updated locale to '" << current_language_ << "' locale is now '" << current_locale_.name() << "' ( "
306  << "name='" << info.name()
307  << "' country='" << info.country()
308  << "' language='" << info.language()
309  << "' encoding='" << info.encoding()
310  << "' variant='" << info.variant() << "')\n";
311  }
312  catch(const bl::conv::conversion_error&)
313  {
314  assert(std::has_facet<bl::info>(current_locale_));
315  const bl::info& info = std::use_facet<bl::info>(current_locale_);
316  ERR_G << "Failed to update locale due to conversion error, locale is now: "
317  << "name='" << info.name()
318  << "' country='" << info.country()
319  << "' language='" << info.language()
320  << "' encoding='" << info.encoding()
321  << "' variant='" << info.variant()
322  << "'" << std::endl;
323  }
324  is_dirty_ = false;
325  }
326 
327  std::string debug_description()
328  {
329  std::stringstream res;
330  const bl::localization_backend_manager& g_mgr = bl::localization_backend_manager::global();
331  for(const std::string& name : g_mgr.get_all_backends())
332  {
333  res << "has backend: '" << name << "',";
334  }
335  if(std::has_facet<bl::info>(current_locale_)) {
336  const bl::info& info = std::use_facet<bl::info>(current_locale_);
337  res << " locale: (name='" << info.name()
338  << "' country='" << info.country()
339  << "' language='" << info.language()
340  << "' encoding='" << info.encoding()
341  << "' variant='" << info.variant()
342  << "'),";
343  }
344  if(std::has_facet<bl::collator<char>>(current_locale_)) {
345  res << "has bl::collator<char> facet, ";
346  }
347  res << "generator categories='" << generator_.categories() << "'";
348  return res.str();
349  }
350 
351  const std::locale& get_locale()
352  {
353  if(is_dirty_)
354  {
355  update_locale_internal();
356  }
357  return current_locale_;
358  }
359 
360  private:
361  std::set<std::string> loaded_paths_;
362  std::set<std::string> loaded_domains_;
363  std::string current_language_;
364  bl::generator generator_;
365  std::locale current_locale_;
366  bool is_dirty_;
367  };
368 
369  translation_manager& get_manager()
370  {
371  static translation_manager* mng = new translation_manager();
372  return *mng;
373  }
374 
375  // Converts ASCII letters to lowercase. Ignores Unicode letters.
376  std::string ascii_to_lowercase(const std::string& str)
377  {
378  std::string result;
379  result.reserve(str.length());
380  std::transform(str.begin(), str.end(), std::back_inserter(result), [](char c)
381  {
382  return c >= 'A' && c <= 'Z' ? c | 0x20 : c;
383  });
384  return result;
385  }
386 }
387 
388 namespace translation
389 {
390 
391 std::string dgettext(const char* domain, const char* msgid)
392 {
393  std::lock_guard lock(get_mutex());
394  return bl::dgettext(domain, msgid, get_manager().get_locale());
395 }
396 std::string egettext(char const *msgid)
397 {
398  std::lock_guard lock(get_mutex());
399  return msgid[0] == '\0' ? msgid : bl::gettext(msgid, get_manager().get_locale());
400 }
401 
402 std::string dsgettext (const char * domainname, const char *msgid)
403 {
404  std::string msgval = dgettext (domainname, msgid);
405  if (msgval == msgid) {
406  const char* firsthat = std::strchr (msgid, '^');
407  if (firsthat == nullptr)
408  msgval = msgid;
409  else
410  msgval = firsthat + 1;
411  }
412  return msgval;
413 }
414 
415 std::string dsngettext (const char * domainname, const char *singular, const char *plural, int n)
416 {
417  //TODO: only the next line needs to be in the lock.
418  std::lock_guard lock(get_mutex());
419  std::string msgval = bl::dngettext(domainname, singular, plural, n, get_manager().get_locale());
420  if (msgval == singular) {
421  const char* firsthat = std::strchr (singular, '^');
422  if (firsthat == nullptr)
423  msgval = singular;
424  else
425  msgval = firsthat + 1;
426  }
427  return msgval;
428 }
429 
430 void bind_textdomain(const char* domain, const char* directory, const char* /*encoding*/)
431 {
432  LOG_G << "adding textdomain '" << domain << "' in directory '" << directory << "'\n";
433  std::lock_guard lock(get_mutex());
434  get_manager().add_messages_domain(domain);
435  get_manager().add_messages_path(directory);
436  get_manager().update_locale();
437 }
438 
439 void set_default_textdomain(const char* domain)
440 {
441  LOG_G << "set_default_textdomain: '" << domain << "'\n";
442  std::lock_guard lock(get_mutex());
443  get_manager().set_default_messages_domain(domain);
444 }
445 
446 
447 void set_language(const std::string& language, const std::vector<std::string>* /*alternates*/)
448 {
449  // why should we need alternates? which languages we support should only be related
450  // to which languages we ship with and not which the os supports
451  LOG_G << "setting language to '" << language << "' \n";
452  std::lock_guard lock(get_mutex());
453  get_manager().set_language(language);
454 }
455 
456 int compare(const std::string& s1, const std::string& s2)
457 {
458  std::lock_guard lock(get_mutex());
459 
460  try {
461  return std::use_facet<std::collate<char>>(get_manager().get_locale()).compare(s1.c_str(), s1.c_str() + s1.size(), s2.c_str(), s2.c_str() + s2.size());
462  } catch(const std::bad_cast&) {
463  static bool bad_cast_once = false;
464 
465  if(!bad_cast_once) {
466  ERR_G << "locale set-up for compare() is broken, falling back to std::string::compare()\n";
467  bad_cast_once = true;
468  }
469 
470  return s1.compare(s2);
471  }
472 }
473 
474 int icompare(const std::string& s1, const std::string& s2)
475 {
476  // todo: maybe we should replace this preprocessor check with a std::has_facet<bl::collator<char>> check?
477 #ifdef __APPLE__
478  // https://github.com/wesnoth/wesnoth/issues/2094
479  return compare(ascii_to_lowercase(s1), ascii_to_lowercase(s2));
480 #else
481  std::lock_guard lock(get_mutex());
482 
483  try {
484  return std::use_facet<bl::collator<char>>(get_manager().get_locale()).compare(
485  bl::collator_base::secondary, s1, s2);
486  } catch(const std::bad_cast&) {
487  static bool bad_cast_once = false;
488 
489  if(!bad_cast_once) {
490  ERR_G << "locale set-up for icompare() is broken, falling back to std::string::compare()\n";
491 
492  try { //just to be safe.
493  ERR_G << get_manager().debug_description() << "\n";
494  } catch (const std::exception& e) {
495  ERR_G << e.what() << "\n";
496  }
497  bad_cast_once = true;
498  }
499 
500  // Let's convert at least ASCII letters to lowercase to get a somewhat case-insensitive comparison.
501  return ascii_to_lowercase(s1).compare(ascii_to_lowercase(s2));
502  }
503 #endif
504 }
505 
506 std::string strftime(const std::string& format, const std::tm* time)
507 {
508  std::basic_ostringstream<char> dummy;
509  std::lock_guard lock(get_mutex());
510  dummy.imbue(get_manager().get_locale()); // TODO: Calling imbue() with hard-coded locale appears to work with put_time in glibc, but not with get_locale()...
511  // Revert to use of boost (from 1.14) instead of std::put_time() because the latter does not appear to handle locale properly in Linux
512  dummy << bl::as::ftime(format) << mktime(const_cast<std::tm*>(time));
513 
514  return dummy.str();
515 }
516 
517 bool ci_search(const std::string& s1, const std::string& s2)
518 {
519  std::lock_guard lock(get_mutex());
520  const std::locale& locale = get_manager().get_locale();
521 
522  std::string ls1 = bl::to_lower(s1, locale);
523  std::string ls2 = bl::to_lower(s2, locale);
524 
525  return std::search(ls1.begin(), ls1.end(),
526  ls2.begin(), ls2.end()) != ls1.end();
527 }
528 
530 {
531  std::lock_guard lock(get_mutex());
532  return std::use_facet<boost::locale::info>(get_manager().get_locale());
533 }
534 }
static log_domain dom("general")
void bind_textdomain(const char *domain, const char *directory, const char *)
Definition: gettext.cpp:430
static domain_map * domains
Definition: log.cpp:71
int dummy
Definition: lstrlib.cpp:1347
static l_noret error(LoadState *S, const char *why)
Definition: lundump.cpp:40
logger & info()
Definition: log.cpp:88
int compare(const std::string &s1, const std::string &s2)
Case-sensitive lexicographical comparison.
Definition: gettext.cpp:456
static bool file_exists(const bfs::path &fpath)
Definition: filesystem.cpp:263
const language_def & get_locale()
Definition: language.cpp:318
filesystem::scoped_istream istream_file(const std::string &fname, bool treat_failure_as_error)
void set_language(const std::string &language, const std::vector< std::string > *)
Definition: gettext.cpp:447
std::string dsngettext(const char *domainname, const char *singular, const char *plural, int n)
Definition: gettext.cpp:415
std::string dgettext(const char *domain, const char *msgid)
Definition: gettext.cpp:391
static void msg(const char *act, debug_info &i, const char *to="", const char *result="")
Definition: debugger.cpp:109
std::string dsgettext(const char *domainname, const char *msgid)
Definition: gettext.cpp:402
std::string strftime(const std::string &format, const std::tm *time)
Definition: gettext.cpp:506
#define LOG_G
Definition: gettext.cpp:40
std::unique_ptr< std::istream > scoped_istream
Definition: filesystem.hpp:36
std::string path
Definition: game_config.cpp:38
void set_default_textdomain(const char *domain)
Definition: gettext.cpp:439
std::string egettext(char const *msgid)
Definition: gettext.cpp:396
bool ci_search(const std::string &s1, const std::string &s2)
Definition: gettext.cpp:517
static std::string gettext(const char *str)
Definition: gettext.hpp:59
logger & err()
Definition: log.cpp:76
#define ERR_G
Definition: gettext.cpp:42
static std::string flush(std::ostringstream &s)
Definition: reports.cpp:91
std::string language()
Definition: general.cpp:489
Declarations for File-IO.
static catalog from_istream(std::istream &is, warning_channel_type w=warning_channel_type())
Definition: catalog.hpp:269
rng * generator
This generator is automatically synced during synced context.
Definition: random.cpp:60
int icompare(const std::string &s1, const std::string &s2)
Case-insensitive lexicographical comparison.
Definition: gettext.cpp:474
Standard logging facilities (interface).
#define DBG_G
Definition: gettext.cpp:39
#define e
mock_char c
static map_location::DIRECTION n
const boost::locale::info & get_effective_locale_info()
A facet that holds general information about the effective locale.
Definition: gettext.cpp:529