The Battle for Wesnoth  1.19.0-dev
tokenizer.hpp
Go to the documentation of this file.
1 /*
2  Copyright (C) 2010 - 2024
3  by Guillaume Melquiond <guillaume.melquiond@gmail.com>
4  Copyright (C) 2004 - 2009 by Philippe Plantier <ayin@anathas.org>
5  Part of the Battle for Wesnoth Project https://www.wesnoth.org/
6 
7  This program is free software; you can redistribute it and/or modify
8  it under the terms of the GNU General Public License as published by
9  the Free Software Foundation; either version 2 of the License, or
10  (at your option) any later version.
11  This program is distributed in the hope that it will be useful,
12  but WITHOUT ANY WARRANTY.
13 
14  See the COPYING file for more details.
15 */
16 
17 #pragma once
18 
19 //#define DEBUG_TOKENIZER
20 
21 #include "buffered_istream.hpp"
22 
23 #include <istream>
24 #include <string>
25 
26 struct token
27 {
28  token() :
29  type(END),
30  value()
31  {}
32 
34  {
39 
40  LF = '\n',
41  EQUALS = '=',
42  COMMA = ',',
43  PLUS = '+',
44  SLASH = '/',
45  OPEN_BRACKET = '[',
47  UNDERSCORE = '_',
48  END
49  };
50 
52  std::string value;
53 };
54 
55 /** Abstract baseclass for the tokenizer. */
56 class tokenizer
57 {
58 public:
59  tokenizer(std::istream& in);
60  ~tokenizer();
61 
62  const token &next_token();
63 
64  const token &current_token() const
65  {
66  return token_;
67  }
68 
69 #ifdef DEBUG_TOKENIZER
70  const token &previous_token() const
71  {
72  return previous_token_;
73  }
74 #endif
75 
76  const std::string &textdomain() const
77  {
78  return textdomain_;
79  }
80 
81  const std::string &get_file() const
82  {
83  return file_;
84  }
85 
86  int get_start_line() const
87  {
88  return startlineno_;
89  }
90 
91 private:
93  int current_;
94  int lineno_;
96 
97  void next_char()
98  {
99  if (current_ == '\n')
100  ++lineno_;
101  next_char_fast();
102  }
103 
105  {
106  do {
107  current_ = in_.get();
108  } while (current_ == '\r');
109 #if 0
110  // TODO: disabled until the campaign server is fixed
111  if(in_.good()) {
112  current_ = in_.get();
113  if (current_ == '\r')
114  {
115  // we assume that there is only one '\r'
116  if(in_.good()) {
117  current_ = in_.get();
118  } else {
119  current_ = EOF;
120  }
121  }
122  } else {
123  current_ = EOF;
124  }
125 #endif
126  }
127 
128  int peek_char()
129  {
130  return in_.peek();
131  }
132 
133  enum
134  {
135  TOK_NONE = 0,
138  TOK_ALPHA = 4
139  };
140 
141  int char_type(unsigned c) const
142  {
143  return c < 128 ? char_types_[c] : 0;
144  }
145 
146  bool is_space(int c) const
147  {
148  return (char_type(c) & TOK_SPACE) == TOK_SPACE;
149  }
150 
151  bool is_num(int c) const
152  {
153  return (char_type(c) & TOK_NUMERIC) == TOK_NUMERIC;
154  }
155 
156  bool is_alnum(int c) const
157  {
158  return (char_type(c) & (TOK_ALPHA | TOK_NUMERIC)) != TOK_NONE;
159  }
160 
161  void skip_comment();
162 
163  /**
164  * Returns true if the next characters are the one from @a cmd
165  * followed by a space. Skips all the matching characters.
166  */
167  bool skip_command(char const *cmd);
168 
169  std::string textdomain_;
170  std::string file_;
172 #ifdef DEBUG_TOKENIZER
173  token previous_token_;
174 #endif
176  char char_types_[128];
177 };
Helper class for buffering a std::istream.
Helper class for buffering a std::istream.
int get()
Gets and consumes a character from the buffer.
int peek()
Gets a character from the buffer.
Abstract baseclass for the tokenizer.
Definition: tokenizer.hpp:57
bool is_space(int c) const
Definition: tokenizer.hpp:146
std::string file_
Definition: tokenizer.hpp:170
int peek_char()
Definition: tokenizer.hpp:128
buffered_istream in_
Definition: tokenizer.hpp:175
bool is_num(int c) const
Definition: tokenizer.hpp:151
void skip_comment()
Definition: tokenizer.cpp:174
const std::string & textdomain() const
Definition: tokenizer.hpp:76
void next_char_fast()
Definition: tokenizer.hpp:104
const token & next_token()
Definition: tokenizer.cpp:51
int lineno_
Definition: tokenizer.hpp:94
int char_type(unsigned c) const
Definition: tokenizer.hpp:141
bool skip_command(char const *cmd)
Returns true if the next characters are the one from cmd followed by a space.
Definition: tokenizer.cpp:162
int current_
Definition: tokenizer.hpp:93
bool is_alnum(int c) const
Definition: tokenizer.hpp:156
const token & current_token() const
Definition: tokenizer.hpp:64
std::string textdomain_
Definition: tokenizer.hpp:169
void next_char()
Definition: tokenizer.hpp:97
char char_types_[128]
Definition: tokenizer.hpp:176
token token_
Definition: tokenizer.hpp:171
int startlineno_
Definition: tokenizer.hpp:95
const std::string & get_file() const
Definition: tokenizer.hpp:81
int get_start_line() const
Definition: tokenizer.hpp:86
unsigned in
If equal to search_counter, the node is off the list.
token()
Definition: tokenizer.hpp:28
token_type
Definition: tokenizer.hpp:34
@ SLASH
Definition: tokenizer.hpp:44
@ QSTRING
Definition: tokenizer.hpp:36
@ COMMA
Definition: tokenizer.hpp:42
@ PLUS
Definition: tokenizer.hpp:43
@ MISC
Definition: tokenizer.hpp:38
@ CLOSE_BRACKET
Definition: tokenizer.hpp:46
@ UNTERMINATED_QSTRING
Definition: tokenizer.hpp:37
@ EQUALS
Definition: tokenizer.hpp:41
@ UNDERSCORE
Definition: tokenizer.hpp:47
@ OPEN_BRACKET
Definition: tokenizer.hpp:45
@ STRING
Definition: tokenizer.hpp:35
token_type type
Definition: tokenizer.hpp:51
std::string value
Definition: tokenizer.hpp:52
mock_char c