The Battle for Wesnoth  1.15.2+dev
catalog.hpp
Go to the documentation of this file.
1 // (C) Copyright 2015 - 2017 Christopher Beck
2 
3 // Distributed under the Boost Software License, Version 1.0. (See accompanying
4 // file LICENSE or copy at http://www.boost.org/LICENSE_1_0.txt)
5 
6 #ifndef SPIRIT_PO_CATALOG_HPP_INCLUDED
7 #define SPIRIT_PO_CATALOG_HPP_INCLUDED
8 
9 // This isn't necessary after boost 1.57 I think but we'll leave it here for compat
10 
11 #ifndef BOOST_SPIRIT_USE_PHOENIX_V3
12 #define BOOST_SPIRIT_USE_PHOENIX_V3
13 #endif
14 
15 // SPIRIT_PO_NO_EXCEPTIONS used to be named SPIRIT_PO_NOEXCEPT, but we leave this
16 // here to avoid breakage.
17 
18 #if (!defined SPIRIT_PO_NO_EXCEPTIONS) && (defined SPIRIT_PO_NOEXCEPT)
19 #define SPIRIT_PO_NO_EXCEPTIONS
20 #endif
21 
24 #include <spirit_po/exceptions.hpp>
25 #include <spirit_po/po_grammar.hpp>
26 #include <spirit_po/po_message.hpp>
27 
28 #include <boost/spirit/include/qi.hpp>
29 #include <functional>
30 #include <istream>
31 #include <string>
32 #include <unordered_map>
33 #include <vector>
34 
35 namespace spirit_po {
36 
37 namespace spirit = boost::spirit;
38 namespace qi = spirit::qi;
39 typedef unsigned int uint;
40 
41 typedef std::function<void(const std::string &)> warning_channel_type;
42 typedef std::unordered_map<std::string, po_message> default_hashmap_type;
43 
44 template <typename hashmap_type = default_hashmap_type, typename pf_compiler = default_plural_forms::compiler>
45 class catalog {
47 
48  typename pf_compiler::result_type pf_function_object_;
49  uint singular_index_; // cached result of pf_function_object(1)
50 
51 #ifdef SPIRIT_PO_NO_EXCEPTIONS
52  boost::optional<std::string> error_message_;
53  // if loading failed, error_message_ contains an error
54  // (rather than throwing an exception)
55 #endif // SPIRIT_PO_NO_EXCEPTIONS
56  warning_channel_type warning_channel_;
57 
58  hashmap_type hashmap_;
59 
60 public:
61  static std::string form_context_index(const std::string & msgctxt, const std::string & id) {
62  const char EOT = static_cast<char>(4);
63  // ASCII 4 is EOT character
64  // Used to separate msg context from msgid in the hashmap, in MO files
65  // We use the same formatting system, just for consistency.
66  // c.f. https://www.gnu.org/software/gettext/manual/html_node/MO-Files.html
67 
68  return msgctxt + EOT + id;
69  }
70 
71  static std::string form_index(const po_message & msg) {
72  return msg.context ? form_context_index(*msg.context, msg.id) : msg.id;
73  }
74 
75 private:
76  /***
77  * Helper for interacting with hashmap results
78  * get(msg) gets the *singular* string from the message. if it's a plural message, look in singular_index_.
79  * if it's not a plural message, then there is only one string. also, the po header is never a plural message
80  */
81  const std::string & get(const po_message & msg) const {
82  if (msg.strings().size() == 1) { return msg.strings()[0]; }
83  return msg.strings()[singular_index_];
84  }
85 
86  const std::string & get(const po_message & msg, uint plural) const {
87  uint idx = (plural == 1 ? singular_index_ : pf_function_object_(plural));
88  if (idx >= msg.strings().size()) { idx = 0; }
89  return msg.strings()[idx];
90  }
91 
92  /***
93  * Emplace a message into the hashmap
94  */
95  void insert_message(po_message && msg) {
96  if (!msg.strings().size()) { return; }
97  // don't allow messages with ZERO translations into the catalog, this will cause segfaults later.
98  // should perhaps throw an exception here
99 
100  if (!msg.strings()[0].size()) { return; }
101  // if the (first) translated string is "", it is untranslated and message does not enter catalog
102 
103  if (msg.strings().size() > 1 && msg.strings().size() != metadata_.num_plural_forms) {
104  if (warning_channel_) {
105  warning_channel_("Ignoring a message with an incorrect number of plural forms: plural = " + std::to_string(msg.strings().size()) + " msgid = '" + msg.id + "'");
106  }
107  return;
108  }
109 
110  std::string index = form_index(msg);
111  // adjust the id based on context if necessary
112 
113  auto result = hashmap_.emplace(std::move(index), std::move(msg));
114 
115  // Issue a warning if emplace failed, rather than silently overwrite.
116  if (!result.second) {
117  if (warning_channel_) {
118  std::string warning = "Overwriting a message: msgid = <<<" + msg.id + ">>>";
119  if (msg.context) { warning += " msgctxt = <<<" + *msg.context + ">>>"; }
120  warning_channel_(warning);
121  }
122  result.first->second = std::move(msg);
123  }
124  }
125 
126 public:
127 #ifdef SPIRIT_PO_NO_EXCEPTIONS
128  /***
129  * Error checking (this is done so we don't have to throw exceptions from the ctor.
130  */
131  explicit operator bool() const {
132  return !error_message_;
133  }
134 
135  std::string error() const {
136  return *error_message_; // UB if there there is not an error message
137  }
138 #endif // SPIRIT_PO_NO_EXCEPTIONS
139 
140  /***
141  * Ctors
142  */
143  template <typename Iterator>
144  catalog(spirit::line_pos_iterator<Iterator> & it, spirit::line_pos_iterator<Iterator> & end, warning_channel_type warn_channel = warning_channel_type(), pf_compiler compiler = pf_compiler())
145  : metadata_()
146  , pf_function_object_()
147  , singular_index_(0)
148  , warning_channel_(warn_channel)
149  , hashmap_()
150  {
151  typedef spirit::line_pos_iterator<Iterator> iterator_type;
153 
154  po_message msg;
155  std::size_t line_no = 0;
156 
157  // Parse header first
158  {
159  // must be able to parse first message
160  qi::parse(it, end, grammar.skipped_block); // first parse any comments
161  if (!qi::parse(it, end, grammar, msg)) { // now parse the main grammar target
162  int err_line = it.position();
163  SPIRIT_PO_CATALOG_FAIL("Failed to parse po header, stopped at line " + std::to_string(err_line) + ": " + iterator_context(it, end));
164  }
165 
166  // first message must have empty MSGID (po format says so)
167  if (msg.id.size()) {
168  SPIRIT_PO_CATALOG_FAIL("Failed to parse po header, first msgid must be empty string \"\", found: " + msg.id);
169  }
170 
171  // Now parse the header string itself
172  if (msg.strings().size()) {
173  std::string maybe_error = metadata_.parse_header(msg.strings()[0]);
174  if (maybe_error.size()) {
175  SPIRIT_PO_CATALOG_FAIL("Failed to parse po header: " + maybe_error);
176  }
177  }
178 
179  if (!metadata_.num_plural_forms) {
180  SPIRIT_PO_CATALOG_FAIL("Invalid metadata in po header, found num_plurals = 0");
181  }
182 
183  // Try to compile the plural forms function string
184  pf_function_object_ = compiler(metadata_.plural_forms_function_string);
185  if (!pf_function_object_) {
186  SPIRIT_PO_CATALOG_FAIL(("Failed to read plural forms function. "
187  "Input: '" + metadata_.plural_forms_function_string + "', "
188  "error message: " + pf_function_object_.error()));
189  }
190 
191  // Cache the 'singular' form index since it is most common
192  singular_index_ = pf_function_object_(1);
193  if (singular_index_ >= metadata_.num_plural_forms) {
194  SPIRIT_PO_CATALOG_FAIL(("Invalid plural forms function. "
195  "On input n = 1, returned plural = " + std::to_string(singular_index_) + ", "
196  "while num_plurals = " + std::to_string(metadata_.num_plural_forms)));
197  }
198 
199  msg.line_no = line_no;
200  this->insert_message(std::move(msg)); // for compatibility, need to insert the header message at msgid ""
201  }
202 
203  // Now parse non-fuzzy messages
204  while (it != end) {
205  // this parse rule cannot fail, it can be a zero length match
206  qi::parse(it, end, grammar.ignored_comments);
207 
208  bool fuzzy = false;
209  // this parse rule cannot fail, it can be a zero length match
210  qi::parse(it, end, grammar.message_preamble, fuzzy);
211 
212  // check if we exhausted the file by comments
213  if (it != end) {
214  msg = po_message{};
215  msg.strings().reserve(metadata_.num_plural_forms); // try to prevent frequent vector reallocations
216  line_no = it.position();
217  // actually parse a message
218  if (!qi::parse(it, end, grammar, msg)) {
219  int err_line = it.position();
220  SPIRIT_PO_CATALOG_FAIL(("Failed to parse po file, "
221  "started at " + std::to_string(line_no) + ": , stopped at " + std::to_string(err_line) + ":\n"
222  + iterator_context(it, end)));
223  }
224  // cannot overwrite header
225  if (!msg.id.size()) {
226  int err_line = it.position();
227  SPIRIT_PO_CATALOG_FAIL(("Malformed po file: Cannot overwrite the header entry later in the po file."
228  "Started at " + std::to_string(line_no) + ": , stopped at " + std::to_string(err_line) + ":\n"
229  + iterator_context(it, end)));
230  }
231  msg.line_no = line_no;
232  // only insert it if it wasn't marked fuzzy
233  if (!fuzzy) { this->insert_message(std::move(msg)); }
234  }
235  }
236 
237 #ifdef SPIRIT_PO_DEBUG
238  // validate resulting hashmap
239  for (const auto & p : hashmap_) {
240  if (!p.second.strings().size()) { SPIRIT_PO_CATALOG_FAIL(("Internal catalog error: found a message id with no strings, msgid='" + p.first + "'")); }
241  if (p.second.strings().size() != 1 && p.second.strings().size() != metadata_.num_plural_forms) {
242  SPIRIT_PO_CATALOG_FAIL(("Internal catalog error: found a message id with wrong number of strings, msgid='" + p.first + "' num msgstr = " + std::to_string(p.second.strings().size()) + ", catalog num_plural_forms = " + std::to_string(metadata_.num_plural_forms) + "\nWhole message: " + debug_string(p.second) ));
243  }
244  }
245 #endif // SPIRIT_PO_DEBUG
246  }
247 
248  // Upgrade an iterator pair to spirit::line_pos_iterators
249  template <typename Iterator>
250  static catalog from_iterators(Iterator & b, Iterator & e, warning_channel_type w = warning_channel_type()) {
251  spirit::line_pos_iterator<Iterator> it{b};
252  spirit::line_pos_iterator<Iterator> end{e};
253  return catalog(it, end, w);
254  }
255 
256  template <typename Iterator>
257  static catalog from_iterators(spirit::line_pos_iterator<Iterator> & b, spirit::line_pos_iterator<Iterator> & e, warning_channel_type w = warning_channel_type()) {
258  return catalog(b, e, w);
259  }
260 
261  // Construct a catalog from a range using one expression
262  template <typename Range>
263  static catalog from_range(const Range & range, warning_channel_type w = warning_channel_type()) {
264  auto it = boost::begin(range);
265  auto end = boost::end(range);
266  return from_iterators(it, end, w);
267  }
268 
269  static catalog from_istream(std::istream & is, warning_channel_type w = warning_channel_type()) {
270  // no white space skipping in the stream!
271  is.unsetf(std::ios::skipws);
272  spirit::istream_iterator it(is);
273  spirit::istream_iterator end;
274  return from_iterators(it, end, w);
275  }
276 
277  ///////////////
278  // ACCESSORS //
279  ///////////////
280 
281  /***
282  * Lookup strings from the catalog
283  *
284  * When using string literals as the parameters, these versions are safe and
285  * are maximally efficient.
286  * (The returned pointer is either the input pointer, having static storage
287  * duration, or has lifetime as long as the catalog.)
288  *
289  * Chosen to behave in the same manner as corresponding gettext functions.
290  */
291  const char * gettext(const char * msgid) const {
292  auto it = hashmap_.find(msgid);
293  if (it != hashmap_.end()) {
294  return this->get(it->second).c_str();
295  } else {
296  return msgid;
297  }
298  }
299 
300  const char * ngettext(const char * msgid, const char * msgid_plural, uint plural) const {
301  auto it = hashmap_.find(msgid);
302  if (it != hashmap_.end() && it->second.is_plural()) {
303  return this->get(it->second, plural).c_str();
304  } else {
305  return (plural == 1 ? msgid : msgid_plural);
306  }
307  }
308 
309  const char * pgettext(const char * context, const char * msgid) const {
310  auto it = hashmap_.find(form_context_index(context, msgid));
311  if (it != hashmap_.end()) {
312  return this->get(it->second).c_str();
313  } else {
314  return msgid;
315  }
316  }
317 
318  const char * npgettext(const char * context, const char * msgid, const char * msgid_plural, uint plural) const {
319  auto it = hashmap_.find(form_context_index(context, msgid));
320  if (it != hashmap_.end() && it->second.is_plural()) {
321  return this->get(it->second, plural).c_str();
322  } else {
323  return (plural == 1 ? msgid : msgid_plural);
324  }
325  }
326 
327  /***
328  * Lookup strings from catalog, return std::string.
329  *
330  * When, for whatever reason, it is more comfortable to use idiomatic C++.
331  *
332  * Template arguments here should always be `std::string &&` or `const std::string &`
333  */
334 
335 private:
336  template <typename S>
337  std::string gettext_str_impl(S && msgid) const {
338  auto it = hashmap_.find(msgid);
339  if (it != hashmap_.end()) {
340  return this->get(it->second);
341  } else {
342  return std::forward<S>(msgid);
343  }
344  }
345 
346  template <typename S1, typename S2>
347  std::string ngettext_str_impl(S1 && msgid, S2 && msgid_plural, uint plural) const {
348  auto it = hashmap_.find(msgid);
349  if (it != hashmap_.end() && it->second.is_plural()) {
350  return this->get(it->second, plural);
351  } else {
352  if (plural == 1) {
353  return std::forward<S1>(msgid);
354  } else {
355  return std::forward<S2>(msgid_plural);
356  }
357  }
358  }
359 
360  template <typename S>
361  std::string pgettext_str_impl(const std::string & context, S && msgid) const {
362  auto it = hashmap_.find(form_context_index(context, msgid));
363  if (it != hashmap_.end()) {
364  return this->get(it->second);
365  } else {
366  return std::forward<S>(msgid);
367  }
368  }
369 
370  template <typename S1, typename S2>
371  std::string npgettext_str_impl(const std::string & context, S1 && msgid, S2 && msgid_plural, uint plural) const {
372  auto it = hashmap_.find(form_context_index(context, msgid));
373  if (it != hashmap_.end() && it->second.is_plural()) {
374  return this->get(it->second, plural);
375  } else {
376  if (plural == 1) {
377  return std::forward<S1>(msgid);
378  } else {
379  return std::forward<S2>(msgid_plural);
380  }
381  }
382  }
383 
384 public:
385  // Interface to implementations above, enforcing that arguments are `std::string`.
386 
387  std::string gettext_str(const std::string & msgid) const { return this->gettext_str_impl(msgid); }
388  std::string gettext_str(std::string && msgid) const { return this->gettext_str_impl(std::move(msgid)); }
389 
390  std::string ngettext_str(const std::string & msgid, const std::string & msgid_plural, uint plural) const { return this->ngettext_str_impl(msgid, msgid_plural, plural); }
391  std::string ngettext_str(std::string && msgid, const std::string & msgid_plural, uint plural) const { return this->ngettext_str_impl(std::move(msgid), msgid_plural, plural); }
392  std::string ngettext_str(const std::string & msgid, std::string && msgid_plural, uint plural) const { return this->ngettext_str_impl(msgid, std::move(msgid_plural), plural); }
393  std::string ngettext_str(std::string && msgid, std::string && msgid_plural, uint plural) const { return this->ngettext_str_impl(std::move(msgid), std::move(msgid_plural), plural); }
394 
395  std::string pgettext_str(const std::string & context, const std::string & msgid) const { return this->pgettext_str_impl(context, msgid); }
396  std::string pgettext_str(const std::string & context, std::string && msgid) const { return this->pgettext_str_impl(context, std::move(msgid)); }
397 
398  std::string npgettext_str(const std::string & context, const std::string & msgid, const std::string & msgid_plural, uint plural) const { return this->npgettext_str_impl(context, msgid, msgid_plural, plural); }
399  std::string npgettext_str(const std::string & context, std::string && msgid, const std::string & msgid_plural, uint plural) const { return this->npgettext_str_impl(context, std::move(msgid), msgid_plural, plural); }
400  std::string npgettext_str(const std::string & context, const std::string & msgid, std::string && msgid_plural, uint plural) const { return this->npgettext_str_impl(context, msgid, std::move(msgid_plural), plural); }
401  std::string npgettext_str(const std::string & context, std::string && msgid, std::string && msgid_plural, uint plural) const { return this->npgettext_str_impl(context, std::move(msgid), std::move(msgid_plural), plural); }
402 
403  /***
404  * Get line numbers of messages
405  */
406  std::size_t gettext_line_no(const std::string & msgid) const {
407  auto it = hashmap_.find(msgid);
408  if (it != hashmap_.end()) {
409  return it->second.line_no;
410  } else {
411  return 0;
412  }
413  }
414 
415  std::size_t pgettext_line_no(const std::string & context, const std::string & msgid) const {
416  auto it = hashmap_.find(form_context_index(context, msgid));
417  if (it != hashmap_.end()) {
418  return it->second.line_no;
419  } else {
420  return 0;
421  }
422  }
423 
424  /***
425  * Access metadata
426  */
427  const catalog_metadata & get_metadata() const { return metadata_; }
428 
429  /***
430  * Catalog size
431  */
432  uint size() const {
433  // exclude po header from the count, this is how msgfmt reports size also
434  return hashmap_.size() - hashmap_.count("");
435  }
436 
437  /***
438  * Debugging output
439  */
440  const hashmap_type & get_hashmap() const { return hashmap_; }
441 
442  /***
443  * Set warning channel (for msgid overwrites)
444  */
445  void set_warning_channel(const warning_channel_type & w) { warning_channel_ = w; }
446 
447  /***
448  * Merge a different catalog into this one
449  */
450  template <typename H, typename P>
451  void merge(catalog<H, P> && other) {
452  std::string maybe_error = metadata_.check_compatibility(other.metadata_);
453  if (maybe_error.size()) {
454  SPIRIT_PO_CATALOG_FAIL(("Cannot merge catalogs: " + maybe_error));
455  }
456  for (auto & p : other.hashmap_) {
457  if (p.first.size()) { // don't copy over the header, keep our original header
458  this->insert_message(std::move(p.second));
459  }
460  }
461  }
462 };
463 
464 } // end namespace spirit_po
465 
466 #endif // SPIRIT_PO_CATALOG_HPP_INCLUDED
void merge(catalog< H, P > &&other)
Definition: catalog.hpp:451
catalog_metadata metadata_
Definition: catalog.hpp:46
const char * pgettext(const char *context, const char *msgid) const
Definition: catalog.hpp:309
std::string npgettext_str(const std::string &context, const std::string &msgid, const std::string &msgid_plural, uint plural) const
Definition: catalog.hpp:398
std::string gettext_str(std::string &&msgid) const
Definition: catalog.hpp:388
std::function< void(const std::string &)> warning_channel_type
Definition: catalog.hpp:41
std::string ngettext_str(const std::string &msgid, const std::string &msgid_plural, uint plural) const
Definition: catalog.hpp:390
std::string gettext_str(const std::string &msgid) const
Definition: catalog.hpp:387
std::string npgettext_str_impl(const std::string &context, S1 &&msgid, S2 &&msgid_plural, uint plural) const
Definition: catalog.hpp:371
static l_noret error(LoadState *S, const char *why)
Definition: lundump.cpp:39
std::string ngettext_str(std::string &&msgid, const std::string &msgid_plural, uint plural) const
Definition: catalog.hpp:391
std::string parse_header(const std::string &header)
static void msg(const char *act, debug_info &i, const char *to="", const char *result="")
Definition: debugger.cpp:109
std::string pgettext_str(const std::string &context, std::string &&msgid) const
Definition: catalog.hpp:396
qi::rule< Iterator, bool()> message_preamble
consume any number of non-white comment line (using #). bool result represents if we saw #...
Definition: po_grammar.hpp:65
pf_compiler::result_type pf_function_object_
Definition: catalog.hpp:48
#define b
const hashmap_type & get_hashmap() const
Definition: catalog.hpp:440
std::string npgettext_str(const std::string &context, const std::string &msgid, std::string &&msgid_plural, uint plural) const
Definition: catalog.hpp:400
std::string ngettext_str(std::string &&msgid, std::string &&msgid_plural, uint plural) const
Definition: catalog.hpp:393
boost::optional< std::string > context
Definition: po_message.hpp:19
catalog(spirit::line_pos_iterator< Iterator > &it, spirit::line_pos_iterator< Iterator > &end, warning_channel_type warn_channel=warning_channel_type(), pf_compiler compiler=pf_compiler())
Definition: catalog.hpp:144
const char * npgettext(const char *context, const char *msgid, const char *msgid_plural, uint plural) const
Definition: catalog.hpp:318
std::unordered_map< std::string, po_message > default_hashmap_type
Definition: catalog.hpp:42
std::vector< std::string > & strings()
Definition: po_message.hpp:30
std::string check_compatibility(const catalog_metadata &other) const
std::string npgettext_str(const std::string &context, std::string &&msgid, std::string &&msgid_plural, uint plural) const
Definition: catalog.hpp:401
static catalog from_iterators(Iterator &b, Iterator &e, warning_channel_type w=warning_channel_type())
Definition: catalog.hpp:250
uint size() const
Definition: catalog.hpp:432
qi::rule< Iterator > skipped_block
Definition: po_grammar.hpp:37
std::string iterator_context(Iterator &it, Iterator &end)
Definition: exceptions.hpp:18
static catalog from_iterators(spirit::line_pos_iterator< Iterator > &b, spirit::line_pos_iterator< Iterator > &e, warning_channel_type w=warning_channel_type())
Definition: catalog.hpp:257
std::size_t pgettext_line_no(const std::string &context, const std::string &msgid) const
Definition: catalog.hpp:415
std::size_t gettext_line_no(const std::string &msgid) const
Definition: catalog.hpp:406
mock_party p
static catalog from_range(const Range &range, warning_channel_type w=warning_channel_type())
Definition: catalog.hpp:263
static std::string form_index(const po_message &msg)
Definition: catalog.hpp:71
void set_warning_channel(const warning_channel_type &w)
Definition: catalog.hpp:445
std::string pgettext_str_impl(const std::string &context, S &&msgid) const
Definition: catalog.hpp:361
const char * gettext(const char *msgid) const
Definition: catalog.hpp:291
hashmap_type hashmap_
Definition: catalog.hpp:58
void insert_message(po_message &&msg)
Definition: catalog.hpp:95
std::string npgettext_str(const std::string &context, std::string &&msgid, const std::string &msgid_plural, uint plural) const
Definition: catalog.hpp:399
int w
std::size_t index(const std::string &str, const std::size_t index)
Codepoint index corresponding to the nth character in a UTF-8 string.
Definition: unicode.cpp:71
static catalog from_istream(std::istream &is, warning_channel_type w=warning_channel_type())
Definition: catalog.hpp:269
const catalog_metadata & get_metadata() const
Definition: catalog.hpp:427
unsigned int uint
Definition: catalog.hpp:39
std::string pgettext_str(const std::string &context, const std::string &msgid) const
Definition: catalog.hpp:395
qi::rule< Iterator > ignored_comments
consume any number of blocks, consisting of any number of comments followed by a white line ...
Definition: po_grammar.hpp:63
std::string ngettext_str_impl(S1 &&msgid, S2 &&msgid_plural, uint plural) const
Definition: catalog.hpp:347
warning_channel_type warning_channel_
Definition: catalog.hpp:56
#define e
const char * ngettext(const char *msgid, const char *msgid_plural, uint plural) const
Definition: catalog.hpp:300
std::string ngettext_str(const std::string &msgid, std::string &&msgid_plural, uint plural) const
Definition: catalog.hpp:392
#define SPIRIT_PO_CATALOG_FAIL(Message)
Definition: exceptions.hpp:72
std::string gettext_str_impl(S &&msgid) const
Definition: catalog.hpp:337
static std::string form_context_index(const std::string &msgctxt, const std::string &id)
Definition: catalog.hpp:61