The Battle for Wesnoth  1.19.8+dev
default_plural_forms_expressions.hpp
Go to the documentation of this file.
1 // (C) Copyright 2015 - 2017 Christopher Beck
2 
3 // Distributed under the Boost Software License, Version 1.0. (See accompanying
4 // file LICENSE or copy at http://www.boost.org/LICENSE_1_0.txt)
5 
6 #ifndef SPIRIT_PO_DEFAULT_PLURAL_FORMS_EXPRESSIONS_HPP_INCLUDED
7 #define SPIRIT_PO_DEFAULT_PLURAL_FORMS_EXPRESSIONS_HPP_INCLUDED
8 
9 /***
10  * The namespace default_plural_forms contains all the details to implement
11  * the subset of the C grammar used by standard GNU gettext po headers.
12  *
13  * Boolean expressions return uint 0 or 1.
14  *
15  * The 'compiler' is a spirit grammar which parses a string into an expression
16  * object. The expressions are evaluated by a simple stack machine.
17  */
18 
19 #ifndef BOOST_SPIRIT_USE_PHOENIX_V3
20 #define BOOST_SPIRIT_USE_PHOENIX_V3
21 #endif
22 
23 #include <algorithm>
24 #include <vector>
25 #include <boost/spirit/include/qi.hpp>
26 #include <boost/phoenix/core.hpp>
27 #include <boost/phoenix/operator.hpp>
28 #include <boost/fusion/include/adapt_struct.hpp>
29 #include <boost/variant/variant.hpp>
30 #include <boost/variant/recursive_wrapper.hpp>
31 
32 #ifdef SPIRIT_PO_DEBUG
33 #include <cassert>
34 #include <string>
35 #endif
36 
37 namespace spirit_po {
38 
39 namespace qi = boost::spirit::qi;
40 typedef unsigned int uint;
41 
42 namespace default_plural_forms {
43 
44 // X Macro for repetitive binary ops declarations
45 
46 #define FOREACH_SPIRIT_PO_BINARY_OP(X_) \
47  X_(eq_op, ==) X_(neq_op, !=) X_(ge_op, >=) X_(le_op, <=) X_(gt_op, >) X_(lt_op, <) X_(mod_op, %)
48 
49 // && and || are treated slightly differently from other binary ops
50 
51 #define FOREACH_SPIRIT_PO_CONJUNCTION(X_) \
52  X_(and_op, &&) X_(or_op, ||)
53 
54 /***
55  * Declare / forward declare expr struct types
56  */
57 
58 struct constant { uint value; };
59 struct n_var { n_var() = default; explicit n_var(char) {}}; // work around a quirk in spirit
60 struct not_op;
61 struct ternary_op;
62 
63 #define FWD_DECL_(name, op) \
64 struct name ; \
65 
68 
69 #undef FWD_DECL_
70 
71 /***
72  * Define expr variant type
73  */
74 
75 #define WRAP_(name, op) boost::recursive_wrapper< name >, \
76 
77 typedef boost::variant<constant, n_var, boost::recursive_wrapper<not_op>,
80 boost::recursive_wrapper<ternary_op>> expr;
81 
82 #undef WRAP_
83 
84 /***
85  * Define structs
86  */
87 
88 struct not_op { expr e1; };
89 struct ternary_op { expr e1, e2, e3; };
90 
91 #define DECL_(name, op) \
92 struct name { expr e1, e2; }; \
93 
96 
97 #undef DECL_
98 
99 /***
100  * Visitor that naively evaluates expressions
101  */
102 struct evaluator : public boost::static_visitor<uint> {
104  explicit evaluator(uint n) : n_value_(n) {}
105 
106  uint operator()(const constant & c) const { return c.value; }
107  uint operator()(n_var) const { return n_value_; }
108  uint operator()(const not_op & op) const { return !boost::apply_visitor(*this, op.e1); }
109 
110 #define EVAL_OP_(name, OPERATOR) \
111  uint operator()(const name & op) const { return (boost::apply_visitor(*this, op.e1)) OPERATOR (boost::apply_visitor(*this, op.e2)); } \
112 
115 #undef EVAL_OP_
116 
117  uint operator()(const ternary_op & op) const { return boost::apply_visitor(*this, op.e1) ? boost::apply_visitor(*this, op.e2) : boost::apply_visitor(*this, op.e3); }
118 };
119 
120 } // end namespace default_plural_forms
121 
122 } // end namespace spirit_po
123 
124 /***
125  * Adapt structs for fusion / qi
126  */
127 
128 BOOST_FUSION_ADAPT_STRUCT(spirit_po::default_plural_forms::constant,
129  (unsigned int, value))
130 BOOST_FUSION_ADAPT_STRUCT(spirit_po::default_plural_forms::not_op,
132 BOOST_FUSION_ADAPT_STRUCT(spirit_po::default_plural_forms::ternary_op,
136 
137 #define ADAPT_STRUCT_(name, op) \
138 BOOST_FUSION_ADAPT_STRUCT(spirit_po::default_plural_forms:: name, \
139  (spirit_po::default_plural_forms::expr, e1) \
140  (spirit_po::default_plural_forms::expr, e2)) \
141 
144 
145 #undef ADAPT_STRUCT_
146 
147 namespace spirit_po {
148 
149 namespace default_plural_forms {
150 
151 /***
152  * Pseudo-C Grammar
153  *
154  * Note that the grammar has been somewhat optimized by using local variables
155  * and inherited attributes, in order to avoid exponential backtracking overhead.
156  * This makes it a little harder to read than if we got rid of all local variables,
157  * but then it is too slow to parse the expressions for certain languages.
158  *
159  * The main idea is that instead of parsing things like
160  *
161  * BINARY_OP = LOWER_PRECENDENCE >> BINARY_OP_LITERAL >> CURRENT_PRECEDENCE
162  * CURRENT_PRECEDENCE = BINARY_OP | OTHER_OP | YET_ANOTHER_OP | LOWER_PRECEDENCE
163  *
164  * (which is bad because if the binary op literal is not there then we have to
165  * backtrack through an entire subexpression)
166  *
167  * we make BINARY_OP take the subexpression as a parameter, and in each
168  * precedence level, we capture the subexpression first and store it in a local
169  * variable, so that it does not get reparsed when we backtrack.
170  *
171  * BINARY_OP = BINARY_OP_LITERAL >> qi::attr(parameter) >> CURRENT_PRECEDENCE
172  *
173  * CURRENT_PRECEDENCE = LOWER_PRECEDENCE[local_var = result] >>
174  * (BINARY_OP(local_var) | OTHER_OP(local_var) | YET_ANOTHER_OP(local_var) | qi::attr(local_var)
175  *
176  */
177 
178 template <typename Iterator>
179 struct op_grammar : qi::grammar<Iterator, expr(), qi::space_type> {
180  qi::rule<Iterator, constant(), qi::space_type> constant_;
181  qi::rule<Iterator, n_var(), qi::space_type> n_;
182  qi::rule<Iterator, not_op(), qi::space_type> not_;
183  qi::rule<Iterator, and_op(expr), qi::space_type> and_;
184  qi::rule<Iterator, or_op(expr), qi::space_type> or_;
185  qi::rule<Iterator, eq_op(expr), qi::space_type> eq_;
186  qi::rule<Iterator, neq_op(expr), qi::space_type> neq_;
187  qi::rule<Iterator, ge_op(expr), qi::space_type> ge_;
188  qi::rule<Iterator, le_op(expr), qi::space_type> le_;
189  qi::rule<Iterator, gt_op(expr), qi::space_type> gt_;
190  qi::rule<Iterator, lt_op(expr), qi::space_type> lt_;
191  qi::rule<Iterator, mod_op(expr), qi::space_type> mod_;
192  qi::rule<Iterator, ternary_op(expr), qi::space_type> ternary_;
193  qi::rule<Iterator, expr(), qi::space_type> paren_expr_;
194 
195  // expression precedence levels
196  qi::rule<Iterator, expr(), qi::space_type, qi::locals<expr>> ternary_level_;
197  qi::rule<Iterator, expr(), qi::space_type, qi::locals<expr>> or_level_;
198  qi::rule<Iterator, expr(), qi::space_type, qi::locals<expr>> and_level_;
199  qi::rule<Iterator, expr(), qi::space_type, qi::locals<expr>> eq_level_;
200  qi::rule<Iterator, expr(), qi::space_type, qi::locals<expr>> rel_level_;
201  qi::rule<Iterator, expr(), qi::space_type, qi::locals<expr>> mod_level_;
202  qi::rule<Iterator, expr(), qi::space_type> atom_level_;
203  qi::rule<Iterator, expr(), qi::space_type> expr_;
204 
205  // handle optional ';' at end
206  qi::rule<Iterator, expr(), qi::space_type> main_;
207 
208  op_grammar() : op_grammar::base_type(main_) {
209  using qi::attr;
210  using qi::lit;
211 
212  constant_ = qi::uint_;
213  n_ = qi::char_('n');
214  paren_expr_ = lit('(') >> expr_ >> lit(')');
215  not_ = lit('!') >> atom_level_;
216  atom_level_ = paren_expr_ | not_ | n_ | constant_;
217 
218  mod_ = lit('%') >> attr(qi::_r1) >> atom_level_;
219  mod_level_ = qi::omit[atom_level_[qi::_a = qi::_1]] >> (mod_(qi::_a) | attr(qi::_a));
220 
221  ge_ = lit(">=") >> attr(qi::_r1) >> mod_level_;
222  le_ = lit("<=") >> attr(qi::_r1) >> mod_level_;
223  gt_ = lit('>') >> attr(qi::_r1) >> mod_level_;
224  lt_ = lit('<') >> attr(qi::_r1) >> mod_level_;
225  rel_level_ = qi::omit[mod_level_[qi::_a = qi::_1]] >> (ge_(qi::_a) | le_(qi::_a) | gt_(qi::_a) | lt_(qi::_a) | attr(qi::_a));
226 
227  eq_ = lit("==") >> attr(qi::_r1) >> rel_level_;
228  neq_ = lit("!=") >> attr(qi::_r1) >> rel_level_;
229  eq_level_ = qi::omit[rel_level_[qi::_a = qi::_1]] >> (eq_(qi::_a) | neq_(qi::_a) | attr(qi::_a));
230 
231  and_ = lit("&&") >> attr(qi::_r1) >> and_level_;
232  and_level_ = qi::omit[eq_level_[qi::_a = qi::_1]] >> (and_(qi::_a) | attr(qi::_a));
233 
234  or_ = lit("||") >> attr(qi::_r1) >> or_level_;
235  or_level_ = qi::omit[and_level_[qi::_a = qi::_1]] >> (or_(qi::_a) | attr(qi::_a));
236 
237  ternary_ = lit('?') >> attr(qi::_r1) >> ternary_level_ >> lit(':') >> ternary_level_;
238  ternary_level_ = qi::omit[or_level_[qi::_a = qi::_1]] >> (ternary_(qi::_a) | attr(qi::_a));
239 
240  expr_ = ternary_level_;
241 
242  main_ = expr_ >> -lit(';');
243  }
244 };
245 
246 /***
247  * Now define a simple stack machine to evaluate the expressions efficiently.
248  *
249  * First define op_codes
250  */
251 
252 #define ENUMERATE(X, Y) X,
253 
254 enum class op_code { n_var, FOREACH_SPIRIT_PO_BINARY_OP(ENUMERATE) not_op };
255 
256 #undef ENUMERATE
257 
258 /** Instruction that causes us to skip upcoming instructions */
259 struct skip {
260  uint distance;
261 };
262 
263 /** Instructions that conditionally cause us to skip upcoming instructions */
264 struct skip_if {
265  uint distance;
266 };
267 
268 struct skip_if_not {
269  uint distance;
270 };
271 
272 /***
273  * Instruction is a variant type that represents either a push_constant, branch, jump, or arithmetic op.
274  */
275 typedef boost::variant<constant, skip, skip_if, skip_if_not, op_code> instruction;
276 
277 /***
278  * Debug strings for instruction set
279  */
280 #ifdef SPIRIT_PO_DEBUG
281 inline std::string op_code_string(op_code oc) {
282  std::string result = "[ ";
283  switch (oc) {
284  case op_code::n_var: {
285  result += "n ";
286  break;
287  }
288  case op_code::not_op: {
289  result += "! ";
290  break;
291  }
292 #define OP_CODE_STR_CASE_(X, Y) \
293  case op_code::X: { \
294  result += #Y; \
295  break; \
296  }
297 
298 FOREACH_SPIRIT_PO_BINARY_OP(OP_CODE_STR_CASE_)
299 
300 #undef OP_CODE_STR_CASE_
301  }
302 
303  if (result.size() < 5) { result += ' '; } \
304  result += " : ]";
305  return result;
306 }
307 
308 struct instruction_debug_string_maker : boost::static_visitor<std::string> {
309  std::string operator()(const constant & c) const {
310  return "[ push : " + std::to_string(c.value) + " ]";
311  }
312  std::string operator()(const skip & s) const {
313  return "[ skip : " + std::to_string(s.distance) + " ]";
314  }
315  std::string operator()(const skip_if & s) const {
316  return "[ sif : " + std::to_string(s.distance) + " ]";
317  }
318  std::string operator()(const skip_if_not & s) const {
319  return "[ sifn : " + std::to_string(s.distance) + " ]";
320  }
321  std::string operator()(const op_code & oc) const {
322  return op_code_string(oc);
323  }
324 };
325 
326 inline std::string debug_string(const instruction & i) {
327  return boost::apply_visitor(instruction_debug_string_maker{}, i);
328 }
329 
330 #endif // SPIRIT_PO_DEBUG
331 
332 /***
333  * Helper: Check if an expression obviously is zero-one valued
334  */
335 struct is_boolean : public boost::static_visitor<bool> {
336  bool operator()(const and_op &) const { return true; }
337  bool operator()(const or_op &) const { return true; }
338  bool operator()(const not_op &) const { return true; }
339  bool operator()(const eq_op &) const { return true; }
340  bool operator()(const neq_op &) const { return true; }
341  bool operator()(const ge_op &) const { return true; }
342  bool operator()(const le_op &) const { return true; }
343  bool operator()(const gt_op &) const { return true; }
344  bool operator()(const lt_op &) const { return true; }
345  bool operator()(const n_var &) const { return false; }
346  bool operator()(const constant & c) const { return (c.value == 0 || c.value == 1); }
347  bool operator()(const mod_op & m) const { return boost::apply_visitor(*this, m.e1); }
348  bool operator()(const ternary_op & t) const { return boost::apply_visitor(*this, t.e2) && boost::apply_visitor(*this, t.e3); }
349 };
350 
351 
352 /***
353  * Visitor that maps expressions to instruction sequences
354  */
355 struct emitter : public boost::static_visitor<std::vector<instruction>> {
356  std::vector<instruction> operator()(const constant & c) const {
357  return std::vector<instruction>{instruction{c}};
358  }
359  std::vector<instruction> operator()(const n_var &) const {
360  return std::vector<instruction>{instruction{op_code::n_var}};
361  }
362  std::vector<instruction> operator()(const not_op & o) const {
363  auto result = boost::apply_visitor(*this, o.e1);
364  result.emplace_back(op_code::not_op);
365  return result;
366  }
367 #define EMIT_OP_(name, op) \
368  std::vector<instruction> operator()(const name & o) const { \
369  auto result = boost::apply_visitor(*this, o.e1); \
370  auto temp = boost::apply_visitor(*this, o.e2); \
371  std::move(temp.begin(), temp.end(), std::back_inserter(result)); \
372  result.emplace_back(op_code::name); \
373  return result; \
374  }
375 
377 
378 #undef EMIT_OP_
379 
380  /***
381  * We make &&, ||, and ? shortcut
382  */
383  std::vector<instruction> operator()(const and_op & o) const {
384  auto result = boost::apply_visitor(*this, o.e1);
385  auto second = boost::apply_visitor(*this, o.e2);
386  bool second_is_boolean = boost::apply_visitor(is_boolean(), o.e2);
387 
388  uint sec_size = static_cast<uint>(second.size());
389  if (!second_is_boolean) { sec_size += 2; }
390 
391  result.emplace_back(skip_if{2});
392  result.emplace_back(constant{0});
393  result.emplace_back(skip{sec_size});
394 
395  std::move(second.begin(), second.end(), std::back_inserter(result));
396  if (!second_is_boolean) {
397  result.emplace_back(op_code::not_op);
398  result.emplace_back(op_code::not_op);
399  }
400 
401  return result;
402  }
403 
404  std::vector<instruction> operator()(const or_op & o) const {
405  auto result = boost::apply_visitor(*this, o.e1);
406  auto second = boost::apply_visitor(*this, o.e2);
407  bool second_is_boolean = boost::apply_visitor(is_boolean(), o.e2);
408 
409  uint sec_size = static_cast<uint>(second.size());
410  if (!second_is_boolean) { sec_size += 2; }
411 
412  result.emplace_back(skip_if_not{2});
413  result.emplace_back(constant{1});
414  result.emplace_back(skip{sec_size});
415 
416  std::move(second.begin(), second.end(), std::back_inserter(result));
417  if (!second_is_boolean) {
418  result.emplace_back(op_code::not_op);
419  result.emplace_back(op_code::not_op);
420  }
421 
422  return result;
423  }
424 
425  std::vector<instruction> operator()(const ternary_op & o) const {
426  auto result = boost::apply_visitor(*this, o.e1);
427  auto tbranch = boost::apply_visitor(*this, o.e2);
428  auto fbranch = boost::apply_visitor(*this, o.e3);
429 
430  uint tsize = static_cast<uint>(tbranch.size());
431  uint fsize = static_cast<uint>(fbranch.size());
432 
433  // We use jump if / jump if not in the way that will let us put the shorter branch first.
434  if (tbranch.size() > fbranch.size()) {
435  // + 1 to size because we have to put a jump at end of this branch also
436  result.emplace_back(skip_if{fsize + 1});
437  std::move(fbranch.begin(), fbranch.end(), std::back_inserter(result));
438  result.emplace_back(skip{tsize});
439  std::move(tbranch.begin(), tbranch.end(), std::back_inserter(result));
440  } else {
441  result.emplace_back(skip_if_not{tsize + 1});
442  std::move(tbranch.begin(), tbranch.end(), std::back_inserter(result));
443  result.emplace_back(skip{fsize});
444  std::move(fbranch.begin(), fbranch.end(), std::back_inserter(result));
445  }
446  return result;
447  }
448 };
449 
450 /***
451  * Actual stack machine
452  */
453 
454 class stack_machine : public boost::static_visitor<uint> {
455  std::vector<instruction> instruction_seq_;
456  std::vector<uint> stack_;
457  uint n_value_;
458 
459 #ifdef SPIRIT_PO_DEBUG
460 public:
461  void debug_print_instructions() const {
462  std::cerr << "Instruction sequence:\n";
463  for (const auto & i : instruction_seq_) {
464  std::cerr << debug_string(i) << std::endl;
465  }
466  }
467 private:
468 
469 #define MACHINE_ASSERT(X) \
470  do { \
471  if (!(X)) { \
472  std::cerr << "Stack machine failure:\n"; \
473  debug_print_instructions(); \
474  assert(false && #X); \
475  } \
476  } while(0)
477 
478 #else // SPIRIT_PO_DEBUG
479 
480 #define MACHINE_ASSERT(...) do {} while(0)
481 
482 #endif // SPIRIT_PO_DEBUG
483 
484  uint pop_one() {
485  MACHINE_ASSERT(stack_.size());
486 
487  uint result = stack_.back();
488  stack_.resize(stack_.size() - 1);
489  return result;
490  }
491 
492 public:
493  explicit stack_machine(const expr & e)
494  : instruction_seq_(boost::apply_visitor(emitter(), e))
495  , stack_()
496  , n_value_()
497  {}
498 
499  /***
500  * operator() takes the instruction that we should execute
501  * It should perform the operation adjusting the stack
502  * It returns the amount by which we should increment the
503  * program counter.
504  */
505  uint operator()(const constant & c) {
506  stack_.emplace_back(c.value);
507  return 1;
508  }
509 
510  uint operator()(const skip & s) {
511  return 1 + s.distance;
512  }
513 
514  uint operator()(const skip_if & s) {
515  return 1 + (pop_one() ? s.distance : 0);
516  }
517 
518  uint operator()(const skip_if_not & s) {
519  return 1 + (pop_one() ? 0 : s.distance);
520  }
521 
522  uint operator()(op_code oc) {
523  switch (oc) {
524  case op_code::n_var: {
525  stack_.emplace_back(n_value_);
526  return 1;
527  }
528  case op_code::not_op: {
529  MACHINE_ASSERT(stack_.size());
530  stack_.back() = !stack_.back();
531  return 1;
532  }
533 #define STACK_MACHINE_CASE_(name, op) \
534  case op_code::name: { \
535  MACHINE_ASSERT(stack_.size() >= 2); \
536  uint parm2 = pop_one(); \
537  \
538  if (op_code::name == op_code::mod_op) { \
539  MACHINE_ASSERT(parm2 && "Division by zero when evaluating gettext plural form expression"); \
540  } \
541  \
542  stack_.back() = (stack_.back() op parm2); \
543  return 1; \
544  }
545 
547 
548 #undef STACK_MACHINE_CASE_
549  }
550  MACHINE_ASSERT(false);
551  return 1;
552  }
553 
554  uint compute(uint arg) {
555  n_value_ = arg;
556  stack_.resize(0);
557  uint pc = 0;
558  while (pc < instruction_seq_.size()) {
559  pc += boost::apply_visitor(*this, instruction_seq_[pc]);
560  }
561  MACHINE_ASSERT(pc == instruction_seq_.size());
562  MACHINE_ASSERT(stack_.size() == 1);
563  return stack_[0];
564  }
565 };
566 
567 #undef MACHINE_ASSERT
568 
569 // X macros not used anymore
570 #undef FOREACH_SPIRIT_PO_BINARY_OP
571 #undef FOREACH_SPIRIT_PO_CONJUNCTION
572 
573 } // end namespace default_plural_forms
574 
575 } // end namespace spirit_po
576 
577 #endif // SPIRIT_PO_DEFAULT_PLURAL_FORMS_EXPRESSIONS_HPP_INCLUDED
double t
Definition: astarsearch.cpp:63
#define EVAL_OP_(name, OPERATOR)
#define EMIT_OP_(name, op)
#define ENUMERATE(X, Y)
#define WRAP_(name, op)
#define DECL_(name, op)
#define FOREACH_SPIRIT_PO_CONJUNCTION(X_)
#define STACK_MACHINE_CASE_(name, op)
#define ADAPT_STRUCT_(name, op)
#define MACHINE_ASSERT(...)
#define FOREACH_SPIRIT_PO_BINARY_OP(X_)
#define FWD_DECL_(name, op)
std::size_t i
Definition: function.cpp:1029
expression_ptr expr_
Definition: function.cpp:816
boost::variant< constant, n_var, boost::recursive_wrapper< not_op >, boost::recursive_wrapper< ternary_op > > expr
unsigned int uint
Definition: catalog.hpp:39
V::result_t apply_visitor(typename V::param_t state, T &&... args)
Helper function to apply the result of a specified visitor to a variable_info object.
mock_char c
static map_location::direction n
static map_location::direction s
static std::size_t compute(std::string expr, std::size_t ref1, std::size_t ref2=0)
Definition: theme.cpp:44
#define e