The Battle for Wesnoth  1.17.0-dev
tokenizer.hpp
Go to the documentation of this file.
1 /*
2  Copyright (C) 2010 - 2021
3  by Guillaume Melquiond <guillaume.melquiond@gmail.com>
4  Copyright (C) 2004 - 2009 by Philippe Plantier <ayin@anathas.org>
5  Part of the Battle for Wesnoth Project https://www.wesnoth.org/
6 
7  This program is free software; you can redistribute it and/or modify
8  it under the terms of the GNU General Public License as published by
9  the Free Software Foundation; either version 2 of the License, or
10  (at your option) any later version.
11  This program is distributed in the hope that it will be useful,
12  but WITHOUT ANY WARRANTY.
13 
14  See the COPYING file for more details.
15 */
16 
17 #pragma once
18 
19 //#define DEBUG_TOKENIZER
20 
21 #include "buffered_istream.hpp"
22 
23 #include <istream>
24 #include <string>
25 
26 struct token
27 {
28  token() :
29  type(END),
30  value()
31  {}
32 
34  {
39 
40  LF = '\n',
41  EQUALS = '=',
42  COMMA = ',',
43  PLUS = '+',
44  SLASH = '/',
45  OPEN_BRACKET = '[',
47  UNDERSCORE = '_',
49  };
50 
52  std::string value;
53 };
54 
55 /** Abstract baseclass for the tokenizer. */
56 class tokenizer
57 {
58 public:
59  tokenizer(std::istream& in);
60  ~tokenizer();
61 
62  const token &next_token();
63 
64  const token &current_token() const
65  {
66  return token_;
67  }
68 
69 #ifdef DEBUG_TOKENIZER
70  const token &previous_token() const
71  {
72  return previous_token_;
73  }
74 #endif
75 
76  const std::string &textdomain() const
77  {
78  return textdomain_;
79  }
80 
81  const std::string &get_file() const
82  {
83  return file_;
84  }
85 
86  int get_start_line() const
87  {
88  return startlineno_;
89  }
90 
91 private:
92  tokenizer();
93  int current_;
94  int lineno_;
96 
97  void next_char()
98  {
99  if (current_ == '\n')
100  ++lineno_;
101  next_char_fast();
102  }
103 
105  {
106  do {
107  current_ = in_.get();
108  } while (current_ == '\r');
109 #if 0
110  // TODO: disabled until the campaign server is fixed
111  if(in_.good()) {
112  current_ = in_.get();
113  if (current_ == '\r')
114  {
115  // we assume that there is only one '\r'
116  if(in_.good()) {
117  current_ = in_.get();
118  } else {
119  current_ = EOF;
120  }
121  }
122  } else {
123  current_ = EOF;
124  }
125 #endif
126  }
127 
128  int peek_char()
129  {
130  return in_.peek();
131  }
132 
133  enum
134  {
135  TOK_NONE = 0,
136  TOK_SPACE = 1,
137  TOK_NUMERIC = 2,
138  TOK_ALPHA = 4
139  };
140 
141  int char_type(unsigned c) const
142  {
143  return c < 128 ? char_types_[c] : 0;
144  }
145 
146  bool is_space(int c) const
147  {
148  return (char_type(c) & TOK_SPACE) == TOK_SPACE;
149  }
150 
151  bool is_num(int c) const
152  {
153  return (char_type(c) & TOK_NUMERIC) == TOK_NUMERIC;
154  }
155 
156  bool is_alnum(int c) const
157  {
158  return (char_type(c) & (TOK_ALPHA | TOK_NUMERIC)) != TOK_NONE;
159  }
160 
161  void skip_comment();
162 
163  /**
164  * Returns true if the next characters are the one from @a cmd
165  * followed by a space. Skips all the matching characters.
166  */
167  bool skip_command(char const *cmd);
168 
169  std::string textdomain_;
170  std::string file_;
172 #ifdef DEBUG_TOKENIZER
173  token previous_token_;
174 #endif
176  char char_types_[128];
177 };
void next_char()
Definition: tokenizer.hpp:97
const std::string & textdomain() const
Definition: tokenizer.hpp:76
token()
Definition: tokenizer.hpp:28
bool is_space(int c) const
Definition: tokenizer.hpp:146
int startlineno_
Definition: tokenizer.hpp:95
Abstract baseclass for the tokenizer.
Definition: tokenizer.hpp:56
Helper class for buffering a std::istream.
void next_char_fast()
Definition: tokenizer.hpp:104
unsigned in
If equal to search_counter, the node is off the list.
int get_start_line() const
Definition: tokenizer.hpp:86
token_type
Definition: tokenizer.hpp:33
std::string file_
Definition: tokenizer.hpp:170
int lineno_
Definition: tokenizer.hpp:94
bool is_alnum(int c) const
Definition: tokenizer.hpp:156
Helper class for buffering a std::istream.
const token & current_token() const
Definition: tokenizer.hpp:64
int peek_char()
Definition: tokenizer.hpp:128
int char_type(unsigned c) const
Definition: tokenizer.hpp:141
std::string textdomain_
Definition: tokenizer.hpp:169
int current_
Definition: tokenizer.hpp:93
bool is_num(int c) const
Definition: tokenizer.hpp:151
buffered_istream in_
Definition: tokenizer.hpp:175
token token_
Definition: tokenizer.hpp:171
mock_char c
const std::string & get_file() const
Definition: tokenizer.hpp:81
std::string value
Definition: tokenizer.hpp:52
token_type type
Definition: tokenizer.hpp:51