The Battle for Wesnoth  1.15.1+dev
gettext.cpp
Go to the documentation of this file.
1 /*
2  Copyright (C) 2003 - 2018 by David White <dave@whitevine.net>
3  Part of the Battle for Wesnoth Project https://www.wesnoth.org/
4 
5  This program is free software; you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation; either version 2 of the License, or
8  (at your option) any later version.
9  This program is distributed in the hope that it will be useful,
10  but WITHOUT ANY WARRANTY.
11 
12  See the COPYING file for more details.
13 */
14 
15 #include "global.hpp"
16 #include "gettext.hpp"
17 #include "log.hpp"
18 #include "filesystem.hpp"
19 
20 #include <algorithm>
21 #include <iomanip>
22 #include <iostream>
23 #include <iterator>
24 #include <fstream>
25 #include <locale>
26 #include <mutex>
27 #include <boost/locale.hpp>
28 #include <set>
29 
30 #if defined(__GNUC__)
31 #pragma GCC diagnostic push
32 #pragma GCC diagnostic ignored "-Wmissing-field-initializers"
33 #endif
34 #include "spirit_po/spirit_po.hpp"
35 #if defined(__GNUC__)
36 #pragma GCC diagnostic pop
37 #endif
38 
39 #define DBG_G LOG_STREAM(debug, lg::general())
40 #define LOG_G LOG_STREAM(info, lg::general())
41 #define WRN_G LOG_STREAM(warn, lg::general())
42 #define ERR_G LOG_STREAM(err, lg::general())
43 
44 namespace bl = boost::locale;
45 namespace
46 {
47  std::mutex& get_mutex() { static std::mutex* m = new std::mutex(); return *m; }
48 
49  class default_utf8_locale_name
50  {
51  public:
52  static const std::string& name()
53  {
54  //Use pointers because we don't want it to be destructed at program end.
55  static default_utf8_locale_name* lname = new default_utf8_locale_name();
56  return lname->name_;
57  }
58  private:
59  default_utf8_locale_name()
60  : name_()
61  {
62  LOG_G << "Generating default locale\n";
63  try
64  {
65  //NOTE: the default_locale objects needs to live as least as long as the locale_info object. Otherwise the program will segfault.
66  std::locale default_locale = bl::generator().generate("");
67  const bl::info& locale_info = std::use_facet<bl::info>(default_locale);
68  name_ += locale_info.language();
69  if(!locale_info.country().empty())
70  name_ += "_" + locale_info.country();
71  name_ += ".UTF-8";
72  if(!locale_info.variant().empty())
73  name_ += "@" + locale_info.variant();
74  }
75  catch(const std::exception& e)
76  {
77  ERR_G << "Failed to generate default locale string. message:" << e.what() << std::endl;
78  }
79  LOG_G << "Finished generating default locale, default is now '" << name_ << "'\n";
80  }
81 
82  std::string name_;
83  };
84  class wesnoth_message_format : public bl::message_format<char>
85  {
86  using po_catalog = spirit_po::catalog<>;
87  public:
88  wesnoth_message_format(std::locale base, const std::set<std::string>& domains, const std::set<std::string>& paths)
89  : base_loc_(base)
90  {
91  const bl::info& inf = std::use_facet<bl::info>(base);
92  if(inf.language() == "c") {
93  return;
94  }
95  std::string lang_name_short = inf.language();
96  std::string lang_name_long = lang_name_short;
97  if(!inf.country().empty()) {
98  lang_name_long += '_';
99  lang_name_long += inf.country();
100  }
101  if(!inf.variant().empty()) {
102  lang_name_long += '@';
103  lang_name_long += inf.variant();
104  lang_name_short += '@';
105  lang_name_short += inf.variant();
106  }
107  DBG_G << "Loading po files for language " << lang_name_long << '\n';
108  for(auto& domain : domains) {
109  DBG_G << "Searching for po files for domain " << domain << '\n';
110  std::string path;
111  for(auto base_path : paths) {
112  DBG_G << "Searching in dir " << base_path << '\n';
113  if(base_path[base_path.length()-1] != '/') {
114  base_path += '/';
115  }
116  base_path += domain;
117  base_path += '/';
118  path = base_path + lang_name_long + ".po";
119  DBG_G << " Trying path " << path << '\n';
120  if(filesystem::file_exists(path)) {
121  break;
122  }
123  path = base_path + lang_name_short + ".po";
124  DBG_G << " Trying path " << path << '\n';
125  if(filesystem::file_exists(path)) {
126  break;
127  }
128  }
129  if(!filesystem::file_exists(path)) {
130  continue;
131  }
132  std::ifstream po_file;
133  po_file.exceptions(std::ios::badbit);
134  LOG_G << "Loading language file from " << path << '\n';
135  try {
136  po_file.open(path);
137  const po_catalog& cat = po_catalog::from_istream(po_file);
138  extra_messages_.emplace(get_base().domain(domain), cat);
139  } catch(const spirit_po::catalog_exception& e) {
140  throw_po_error(lang_name_long, domain, e.what());
141  } catch(const std::ios::failure&) {
142  throw_po_error(lang_name_long, domain, strerror(errno));
143  }
144  }
145  }
146 
147  [[noreturn]] static void throw_po_error(const std::string& lang, const std::string& dom, const std::string& detail) {
148  std::ostringstream err;
149  err << "Error opening language file for " << lang << ", textdomain " << dom
150  << ":\n " << detail << '\n';
151  ERR_G << err.rdbuf() << std::flush;
152  throw game::error(err.str());
153  }
154 
155  const char* get(int domain_id, const char* ctx, const char* msg_id) const override
156  {
157  auto& base = get_base();
158  const char* msg = base.get(domain_id, ctx, msg_id);
159  if(msg == nullptr) {
160  auto iter = extra_messages_.find(domain_id);
161  if(iter == extra_messages_.end()) {
162  return nullptr;
163  }
164  auto& catalog = iter->second;
165  const char* lookup = ctx ? catalog.pgettext(ctx, msg_id) : catalog.gettext(msg_id);
166  if(lookup != msg_id) {
167  // (p)gettext returns the input pointer if the string was not found
168  msg = lookup;
169  }
170  }
171  return msg;
172  }
173 
174  const char* get(int domain_id, const char* ctx, const char* sid, int n) const override
175  {
176  auto& base = get_base();
177  const char* msg = base.get(domain_id, ctx, sid, n);
178  if(msg == nullptr) {
179  auto iter = extra_messages_.find(domain_id);
180  if(iter == extra_messages_.end()) {
181  return nullptr;
182  }
183  auto& catalog = iter->second;
184  const char* lookup = ctx ? catalog.npgettext(ctx, sid, sid, n) : catalog.ngettext(sid, sid, n);
185  if(lookup != sid) {
186  // n(p)gettext returns one of the input pointers if the string was not found
187  msg = lookup;
188  }
189  }
190  return msg;
191  }
192 
193  int domain(const std::string& domain) const override
194  {
195  auto& base = get_base();
196  return base.domain(domain);
197  }
198 
199  const char* convert(const char* msg, std::string& buffer) const override
200  {
201  auto& base = get_base();
202  return base.convert(msg, buffer);
203  }
204  private:
205  const bl::message_format<char>& get_base() const
206  {
207  return std::use_facet<bl::message_format<char>>(base_loc_);
208  }
209 
210  std::locale base_loc_;
211  std::map<int, po_catalog> extra_messages_;
212  };
213  struct translation_manager
214  {
215  translation_manager()
216  : loaded_paths_()
217  , loaded_domains_()
218  , current_language_(default_utf8_locale_name::name())
219  , generator_()
220  , current_locale_()
221  , is_dirty_(true)
222  {
223  const bl::localization_backend_manager& g_mgr = bl::localization_backend_manager::global();
224  for(const std::string& name : g_mgr.get_all_backends())
225  {
226  LOG_G << "Found boost locale backend: '" << name << "'\n";
227  }
228 
229  generator_.use_ansi_encoding(false);
230  generator_.categories(bl::message_facet | bl::information_facet | bl::collation_facet | bl::formatting_facet | bl::convert_facet);
231  generator_.characters(bl::char_facet);
232  // We cannot have current_locale_ be a non boost-generated locale since it might not supply
233  // the bl::info facet. As soon as we add message paths, update_locale_internal might fail,
234  // for example because of invalid .mo files. So make sure we call it at least once before adding paths/domains
235  update_locale_internal();
236  }
237 
238  void add_messages_domain(const std::string& domain)
239  {
240  if(loaded_domains_.find(domain) != loaded_domains_.end())
241  {
242  return;
243  }
244 
245  if(domain.find('/') != std::string::npos)
246  {
247  // Forward slash has a specific meaning in Boost.Locale domain
248  // names, specifying the encoding. We use UTF-8 for everything
249  // so we can't possibly support that, and odds are it's a user
250  // mistake (as in bug #23839).
251  ERR_G << "illegal textdomain name '" << domain
252  << "', skipping textdomain\n";
253  return;
254  }
255 
256  generator_.add_messages_domain(domain);
257  loaded_domains_.insert(domain);
258  }
259 
260  void add_messages_path(const std::string& path)
261  {
262  if(loaded_paths_.find(path) != loaded_paths_.end())
263  {
264  return;
265  }
266  generator_.add_messages_path(path);
267  loaded_paths_.insert(path);
268  }
269 
270  void set_default_messages_domain(const std::string& domain)
271  {
272  generator_.set_default_messages_domain(domain);
273  update_locale();
274  }
275 
276  void set_language(const std::string& language)
277  {
278  std::string::size_type at_pos = language.rfind('@');
279  if(language.empty())
280  {
281  current_language_ = default_utf8_locale_name::name();
282  }
283  else if(at_pos != std::string::npos)
284  {
285  current_language_ = language.substr(0, at_pos) + ".UTF-8" + language.substr(at_pos);
286  }
287  else
288  {
289  current_language_ = language + ".UTF-8";
290  }
291  update_locale();
292  }
293 
294  void update_locale()
295  {
296  is_dirty_ = true;
297  }
298 
299  void update_locale_internal()
300  {
301  try
302  {
303  LOG_G << "attempting to generate locale by name '" << current_language_ << "'\n";
304  current_locale_ = generator_.generate(current_language_);
305  current_locale_ = std::locale(current_locale_, new wesnoth_message_format(current_locale_, loaded_domains_, loaded_paths_));
306  const bl::info& info = std::use_facet<bl::info>(current_locale_);
307  LOG_G << "updated locale to '" << current_language_ << "' locale is now '" << current_locale_.name() << "' ( "
308  << "name='" << info.name()
309  << "' country='" << info.country()
310  << "' language='" << info.language()
311  << "' encoding='" << info.encoding()
312  << "' variant='" << info.variant() << "')\n";
313  }
314  catch(const bl::conv::conversion_error&)
315  {
316  assert(std::has_facet<bl::info>(current_locale_));
317  const bl::info& info = std::use_facet<bl::info>(current_locale_);
318  ERR_G << "Failed to update locale due to conversion error, locale is now: "
319  << "name='" << info.name()
320  << "' country='" << info.country()
321  << "' language='" << info.language()
322  << "' encoding='" << info.encoding()
323  << "' variant='" << info.variant()
324  << "'" << std::endl;
325  }
326  is_dirty_ = false;
327  }
328 
329  std::string debug_description()
330  {
331  std::stringstream res;
332  const bl::localization_backend_manager& g_mgr = bl::localization_backend_manager::global();
333  for(const std::string& name : g_mgr.get_all_backends())
334  {
335  res << "has backend: '" << name << "',";
336  }
337  if(std::has_facet<bl::info>(current_locale_)) {
338  const bl::info& info = std::use_facet<bl::info>(current_locale_);
339  res << " locale: (name='" << info.name()
340  << "' country='" << info.country()
341  << "' language='" << info.language()
342  << "' encoding='" << info.encoding()
343  << "' variant='" << info.variant()
344  << "'),";
345  }
346  if(std::has_facet<bl::collator<char>>(current_locale_)) {
347  res << "has bl::collator<char> facet, ";
348  }
349  res << "generator categories='" << generator_.categories() << "'";
350  return res.str();
351  }
352 
353  const std::locale& get_locale()
354  {
355  if(is_dirty_)
356  {
357  update_locale_internal();
358  }
359  return current_locale_;
360  }
361 
362  private:
363  std::set<std::string> loaded_paths_;
364  std::set<std::string> loaded_domains_;
365  std::string current_language_;
366  bl::generator generator_;
367  std::locale current_locale_;
368  bool is_dirty_;
369  };
370 
371  translation_manager& get_manager()
372  {
373  static translation_manager* mng = new translation_manager();
374  return *mng;
375  }
376 
377  // Converts ASCII letters to lowercase. Ignores Unicode letters.
378  std::string ascii_to_lowercase(const std::string& str)
379  {
380  std::string result;
381  result.reserve(str.length());
382  std::transform(str.begin(), str.end(), std::back_inserter(result), [](char c)
383  {
384  return c >= 'A' && c <= 'Z' ? c | 0x20 : c;
385  });
386  return result;
387  }
388 }
389 
390 namespace translation
391 {
392 
393 std::string dgettext(const char* domain, const char* msgid)
394 {
395  std::lock_guard<std::mutex> lock(get_mutex());
396  return bl::dgettext(domain, msgid, get_manager().get_locale());
397 }
398 std::string egettext(char const *msgid)
399 {
400  std::lock_guard<std::mutex> lock(get_mutex());
401  return msgid[0] == '\0' ? msgid : bl::gettext(msgid, get_manager().get_locale());
402 }
403 
404 std::string dsgettext (const char * domainname, const char *msgid)
405 {
406  std::string msgval = dgettext (domainname, msgid);
407  if (msgval == msgid) {
408  const char* firsthat = std::strchr (msgid, '^');
409  if (firsthat == nullptr)
410  msgval = msgid;
411  else
412  msgval = firsthat + 1;
413  }
414  return msgval;
415 }
416 
417 std::string dsngettext (const char * domainname, const char *singular, const char *plural, int n)
418 {
419  //TODO: only the next line needs to be in the lock.
420  std::lock_guard<std::mutex> lock(get_mutex());
421  std::string msgval = bl::dngettext(domainname, singular, plural, n, get_manager().get_locale());
422  if (msgval == singular) {
423  const char* firsthat = std::strchr (singular, '^');
424  if (firsthat == nullptr)
425  msgval = singular;
426  else
427  msgval = firsthat + 1;
428  }
429  return msgval;
430 }
431 
432 void bind_textdomain(const char* domain, const char* directory, const char* /*encoding*/)
433 {
434  LOG_G << "adding textdomain '" << domain << "' in directory '" << directory << "'\n";
435  std::lock_guard<std::mutex> lock(get_mutex());
436  get_manager().add_messages_domain(domain);
437  get_manager().add_messages_path(directory);
438  get_manager().update_locale();
439 }
440 
441 void set_default_textdomain(const char* domain)
442 {
443  LOG_G << "set_default_textdomain: '" << domain << "'\n";
444  std::lock_guard<std::mutex> lock(get_mutex());
445  get_manager().set_default_messages_domain(domain);
446 }
447 
448 
449 void set_language(const std::string& language, const std::vector<std::string>* /*alternates*/)
450 {
451  // why should we need alternates? which languages we support should only be related
452  // to which languages we ship with and not which the os supports
453  LOG_G << "setting language to '" << language << "' \n";
454  std::lock_guard<std::mutex> lock(get_mutex());
455  get_manager().set_language(language);
456 }
457 
458 int compare(const std::string& s1, const std::string& s2)
459 {
460  std::lock_guard<std::mutex> lock(get_mutex());
461 
462  try {
463  return std::use_facet<std::collate<char>>(get_manager().get_locale()).compare(s1.c_str(), s1.c_str() + s1.size(), s2.c_str(), s2.c_str() + s2.size());
464  } catch(const std::bad_cast&) {
465  static bool bad_cast_once = false;
466 
467  if(!bad_cast_once) {
468  ERR_G << "locale set-up for compare() is broken, falling back to std::string::compare()\n";
469  bad_cast_once = true;
470  }
471 
472  return s1.compare(s2);
473  }
474 }
475 
476 int icompare(const std::string& s1, const std::string& s2)
477 {
478  // todo: maybe we should replace this preprocessor check with a std::has_facet<bl::collator<char>> check?
479 #ifdef __APPLE__
480  // https://github.com/wesnoth/wesnoth/issues/2094
481  return compare(ascii_to_lowercase(s1), ascii_to_lowercase(s2));
482 #else
483  std::lock_guard<std::mutex> lock(get_mutex());
484 
485  try {
486  return std::use_facet<bl::collator<char>>(get_manager().get_locale()).compare(
487  bl::collator_base::secondary, s1, s2);
488  } catch(const std::bad_cast&) {
489  static bool bad_cast_once = false;
490 
491  if(!bad_cast_once) {
492  ERR_G << "locale set-up for icompare() is broken, falling back to std::string::compare()\n";
493 
494  try { //just to be safe.
495  ERR_G << get_manager().debug_description() << "\n";
496  } catch (const std::exception& e) {
497  ERR_G << e.what() << "\n";
498  }
499  bad_cast_once = true;
500  }
501 
502  // Let's convert at least ASCII letters to lowercase to get a somewhat case-insensitive comparison.
503  return ascii_to_lowercase(s1).compare(ascii_to_lowercase(s2));
504  }
505 #endif
506 }
507 
508 std::string strftime(const std::string& format, const std::tm* time)
509 {
510  std::basic_ostringstream<char> dummy;
511  std::lock_guard<std::mutex> lock(get_mutex());
512  dummy.imbue(get_manager().get_locale()); // TODO: Calling imbue() with hard-coded locale appears to work with put_time in glibc, but not with get_locale()...
513  // Revert to use of boost (from 1.14) instead of std::put_time() because the latter does not appear to handle locale properly in Linux
514  dummy << bl::as::ftime(format) << mktime(const_cast<std::tm*>(time));
515 
516  return dummy.str();
517 }
518 
519 bool ci_search(const std::string& s1, const std::string& s2)
520 {
521  std::lock_guard<std::mutex> lock(get_mutex());
522  const std::locale& locale = get_manager().get_locale();
523 
524  std::string ls1 = bl::to_lower(s1, locale);
525  std::string ls2 = bl::to_lower(s2, locale);
526 
527  return std::search(ls1.begin(), ls1.end(),
528  ls2.begin(), ls2.end()) != ls1.end();
529 }
530 
531 }
static log_domain dom("general")
void bind_textdomain(const char *domain, const char *directory, const char *)
Definition: gettext.cpp:432
static domain_map * domains
Definition: log.cpp:73
int dummy
Definition: lstrlib.cpp:1125
static l_noret error(LoadState *S, const char *why)
Definition: lundump.cpp:39
logger & info()
Definition: log.cpp:90
int compare(const std::string &s1, const std::string &s2)
Case-sensitive lexicographical comparison.
Definition: gettext.cpp:458
static bool file_exists(const bfs::path &fpath)
Definition: filesystem.cpp:266
const language_def & get_locale()
Definition: language.cpp:263
void set_language(const std::string &language, const std::vector< std::string > *)
Definition: gettext.cpp:449
std::string dsngettext(const char *domainname, const char *singular, const char *plural, int n)
Definition: gettext.cpp:417
std::string dgettext(const char *domain, const char *msgid)
Definition: gettext.cpp:393
static void msg(const char *act, debug_info &i, const char *to="", const char *result="")
Definition: debugger.cpp:109
std::string dsgettext(const char *domainname, const char *msgid)
Definition: gettext.cpp:404
std::string strftime(const std::string &format, const std::tm *time)
Definition: gettext.cpp:508
#define LOG_G
Definition: gettext.cpp:40
std::string path
Definition: game_config.cpp:39
void set_default_textdomain(const char *domain)
Definition: gettext.cpp:441
static UNUSEDNOWARN std::string gettext(const char *str)
Definition: gettext.hpp:66
std::string egettext(char const *msgid)
Definition: gettext.cpp:398
bool ci_search(const std::string &s1, const std::string &s2)
Definition: gettext.cpp:519
logger & err()
Definition: log.cpp:78
#define ERR_G
Definition: gettext.cpp:42
static std::string flush(std::ostringstream &s)
Definition: reports.cpp:90
std::string language()
Definition: general.cpp:475
Declarations for File-IO.
rng * generator
This generator is automatically synced during synced context.
Definition: random.cpp:60
int icompare(const std::string &s1, const std::string &s2)
Case-insensitive lexicographical comparison.
Definition: gettext.cpp:476
Standard logging facilities (interface).
#define DBG_G
Definition: gettext.cpp:39
#define e
mock_char c
static map_location::DIRECTION n