The Battle for Wesnoth  1.19.7+dev
po_grammar.hpp
Go to the documentation of this file.
1 // (C) Copyright 2015 - 2017 Christopher Beck
2 
3 // Distributed under the Boost Software License, Version 1.0. (See accompanying
4 // file LICENSE or copy at http://www.boost.org/LICENSE_1_0.txt)
5 
6 #ifndef SPIRIT_PO_PO_GRAMMAR_HPP_INCLUDED
7 #define SPIRIT_PO_PO_GRAMMAR_HPP_INCLUDED
8 
9 #ifndef BOOST_SPIRIT_USE_PHOENIX_V3
10 #define BOOST_SPIRIT_USE_PHOENIX_V3
11 #endif
12 
13 #include <boost/spirit/include/qi.hpp>
14 #include <boost/phoenix/core.hpp>
15 #include <boost/phoenix/operator.hpp>
16 #include <boost/fusion/include/std_pair.hpp>
17 #include <boost/fusion/include/define_struct.hpp>
18 
20 
21 #include <boost/optional/optional.hpp>
22 #include <string>
23 #include <utility>
24 #include <vector>
25 
26 namespace spirit_po {
27 
28 typedef unsigned int uint;
29 namespace qi = boost::spirit::qi;
30 namespace phoenix = boost::phoenix;
31 
32 template <typename Iterator>
33 struct po_grammar : qi::grammar<Iterator, po_message()> {
34  qi::rule<Iterator> white_line;
35  qi::rule<Iterator> comment_line;
36  qi::rule<Iterator> skipped_line;
37  qi::rule<Iterator> skipped_block;
38 
39  qi::rule<Iterator, char()> escaped_character;
40  qi::rule<Iterator, std::string()> single_line_string;
41  qi::rule<Iterator, std::string()> multiline_string;
42 
43  qi::rule<Iterator, std::string()> message_id;
44  qi::rule<Iterator, std::string()> message_id_plural;
45  qi::rule<Iterator, std::string()> message_context;
46  qi::rule<Iterator, std::string()> message_str;
47  qi::rule<Iterator, std::string(uint)> message_str_plural;
48 
49  qi::rule<Iterator, std::vector<std::string>()> message_single_str;
50  qi::rule<Iterator, std::vector<std::string>(uint)> message_strs;
51 
53  qi::rule<Iterator, plural_and_strings_type()> message_plural;
54 
55  qi::rule<Iterator, po_message()> message;
56 
57  // Related to parsing "fuzzy" po comment
58  qi::rule<Iterator, qi::locals<bool>> fuzzy;
59  qi::rule<Iterator> preamble_comment_line;
60  qi::rule<Iterator> preamble_comment_block;
61 
62  /** consume any number of blocks, consisting of any number of comments followed by a white line */
63  qi::rule<Iterator> ignored_comments;
64  /** consume any number of non-white comment line (using #). bool result represents if we saw #, fuzzy comment */
65  qi::rule<Iterator, bool()> message_preamble;
66 
67  po_grammar() : po_grammar::base_type(message) {
68  using qi::attr;
69  using qi::char_;
70  using qi::eoi;
71  using qi::lit;
72  using qi::omit;
73  using qi::uint_;
74 
75  white_line = *char_(" \t\r"); // nullable
76  comment_line = char_('#') >> *(char_ - '\n'); // not nullable
77  skipped_line = (comment_line | white_line) >> lit('\n'); // not nullable
78  skipped_block = *skipped_line; // nullable
79 
80  // TODO: Do we need to handle other escaped characters?
81  escaped_character = lit('\\') >> (char_("\'\"\\") | (lit('n') >> attr('\n')) | (lit('t') >> attr('\t')));
82  single_line_string = lit('"') >> *(escaped_character | (char_ - '\\' - '"')) >> lit('"');
83  multiline_string = single_line_string % skipped_block; // ^ this is important, if we don't have this then \\ does not have to be escaped in po string, just form an illegal escape code
84 
85  message_context = skipped_block >> lit("msgctxt ") >> multiline_string;
86  message_id = skipped_block >> lit("msgid ") >> multiline_string;
87  message_str = skipped_block >> lit("msgstr ") >> multiline_string;
88  message_id_plural = skipped_block >> lit("msgid_plural ") >> multiline_string;
89  message_str_plural = skipped_block >> lit("msgstr[") >> omit[ uint_(qi::_r1) ] >> lit("] ") >> multiline_string;
90  // ^ the index in the po file must match what we expect
91 
92  // qi::repeat converts it from a std::string, to a singleton vector, as required
93  message_single_str = qi::repeat(1)[message_str];
94  message_strs = message_str_plural(qi::_r1) >> -message_strs(qi::_r1 + 1);
95  // ^ enforces that indices must count up
96 
97  // Detect whether we should read multiple messages or a single message by presence of `msgid_plural`
98  message_plural = message_id_plural >> message_strs(0); // first line should be msgstr[0]
101 
102  /***
103  * The remaining rules are not contributing to message -- their job is to consume comments leading up to the message,
104  * keep track of if we saw a fuzzy marker, and to consume the entire file if only whitespace lines remain, whether or
105  * not it ends in new-line.
106  *
107  * First, parse "ignored_comments",
108  * message_preamble is the main rule of this section
109  */
110 
111  /**
112  * Fuzzy: Expect comment of the form #, with literal `, fuzzy` in the list somewhere.
113  * We use a qi local to keep track of if we saw it, this avoids excessive backtracking
114  */
115  fuzzy = lit('#') >> (&lit(','))[qi::_a = false] >> *(lit(',') >> -(lit(" fuzzy")[qi::_a = true]) >> *(char_ - '\n' - ',')) >> lit('\n') >> qi::eps(qi::_a);
116  preamble_comment_line = comment_line >> lit('\n');
117 
118  ignored_comments = *(*preamble_comment_line >> white_line >> lit('\n'));
120  // ^ if po-file ends in a comment without eol we should still consume it
122  // ^ if we find fuzzy, short cut out of this test ^ consume one comment line and repeat ^ didn't find fuzzy, return false
123  // ^ note: no backtrack after fuzzy... ^ note: no backtrack after comment line... and consume trailing comment
124  // preamble_comment_block is nullable message_preamble is nullable
125  }
126 };
127 
128 } // end namespace spirit_po
129 
130 #endif // SPIRIT_PO_PO_GRAMMAR_HPP_INCLUDED
unsigned int uint
Definition: catalog.hpp:39
std::pair< std::string, std::vector< std::string > > plural_and_strings_type
Definition: po_message.hpp:16
qi::rule< Iterator, std::string()> multiline_string
Definition: po_grammar.hpp:41
qi::rule< Iterator > skipped_block
Definition: po_grammar.hpp:37
qi::rule< Iterator, std::vector< std::string >)> message_single_str
Definition: po_grammar.hpp:49
qi::rule< Iterator > preamble_comment_line
Definition: po_grammar.hpp:59
qi::rule< Iterator, po_message()> message
Definition: po_grammar.hpp:55
qi::rule< Iterator, std::string()> message_id_plural
Definition: po_grammar.hpp:44
qi::rule< Iterator, bool()> message_preamble
consume any number of non-white comment line (using #).
Definition: po_grammar.hpp:65
qi::rule< Iterator, plural_and_strings_type()> message_plural
Definition: po_grammar.hpp:53
qi::rule< Iterator, std::string()> single_line_string
Definition: po_grammar.hpp:40
qi::rule< Iterator, std::string()> message_context
Definition: po_grammar.hpp:45
qi::rule< Iterator, std::vector< std::string >uint)> message_strs
Definition: po_grammar.hpp:50
qi::rule< Iterator, std::string(uint)> message_str_plural
Definition: po_grammar.hpp:47
qi::rule< Iterator, plural_and_strings_type()> message_singular
Definition: po_grammar.hpp:52
qi::rule< Iterator, qi::locals< bool > > fuzzy
Definition: po_grammar.hpp:58
qi::rule< Iterator > white_line
Definition: po_grammar.hpp:34
qi::rule< Iterator, char()> escaped_character
Definition: po_grammar.hpp:39
qi::rule< Iterator > ignored_comments
consume any number of blocks, consisting of any number of comments followed by a white line
Definition: po_grammar.hpp:63
qi::rule< Iterator, std::string()> message_str
Definition: po_grammar.hpp:46
qi::rule< Iterator > comment_line
Definition: po_grammar.hpp:35
qi::rule< Iterator > preamble_comment_block
Definition: po_grammar.hpp:60
qi::rule< Iterator > skipped_line
Definition: po_grammar.hpp:36
qi::rule< Iterator, std::string()> message_id
Definition: po_grammar.hpp:43