The Battle for Wesnoth  1.17.0-dev
tokenizer.cpp
Go to the documentation of this file.
1 /*
2  Copyright (C) 2010 - 2021
3  by Guillaume Melquiond <guillaume.melquiond@gmail.com>
4  Copyright (C) 2004 - 2009 by Philippe Plantier <ayin@anathas.org>
5  Part of the Battle for Wesnoth Project https://www.wesnoth.org/
6 
7  This program is free software; you can redistribute it and/or modify
8  it under the terms of the GNU General Public License as published by
9  the Free Software Foundation; either version 2 of the License, or
10  (at your option) any later version.
11  This program is distributed in the hope that it will be useful,
12  but WITHOUT ANY WARRANTY.
13 
14  See the COPYING file for more details.
15 */
16 
18 #include "wesconfig.h"
19 
20 tokenizer::tokenizer(std::istream& in) :
21  current_(EOF),
22  lineno_(1),
23  startlineno_(0),
24  textdomain_(PACKAGE),
25  file_(),
26  token_(),
27  in_(in)
28 {
29  for (int c = 0; c < 128; ++c)
30  {
31  int t = 0;
32  if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_') {
33  t = TOK_ALPHA;
34  } else if (c >= '0' && c <= '9') {
35  t = TOK_NUMERIC;
36  } else if (c == ' ' || c == '\t') {
37  t = TOK_SPACE;
38  }
39  char_types_[c] = t;
40  }
41  in_.stream().exceptions(std::ios_base::badbit);
43 }
44 
46 {
47  in_.stream().clear(std::ios_base::goodbit);
48  in_.stream().exceptions(std::ios_base::goodbit);
49 }
50 
52 {
53 #ifdef DEBUG_TOKENIZER
54  previous_token_ = token_;
55 #endif
56  token_.value.clear();
57 
58  // Dump spaces and inlined comments
59  while(true)
60  {
61  while (is_space(current_)) {
63  }
64  if (current_ != 254)
65  break;
66  skip_comment();
67  // skip the line end
69  }
70 
71  if (current_ == '#')
72  skip_comment();
73 
75 
76  switch(current_) {
77  case EOF:
79  break;
80 
81  case '<':
82  if (peek_char() != '<') {
85  break;
86  }
89  for (;;) {
90  next_char();
91  if (current_ == EOF) {
93  break;
94  }
95  if (current_ == '>' && peek_char() == '>') {
97  break;
98  }
100  }
101  break;
102 
103  case '"':
105  for (;;) {
106  next_char();
107  if (current_ == EOF) {
109  break;
110  }
111  if (current_ == '"') {
112  if (peek_char() != '"') break;
113  next_char_fast();
114  }
115  if (current_ == 254) {
116  skip_comment();
117  --lineno_;
118  continue;
119  }
120  token_.value += current_;
121  }
122  break;
123 
124  case '[': case ']': case '/': case '\n': case '=': case ',': case '+':
127  break;
128 
129  case '_':
130  if (!is_alnum(peek_char())) {
133  break;
134  }
135  [[fallthrough]];
136 
137  default:
138  if (is_alnum(current_) || current_ == '$') {
140  do {
141  token_.value += current_;
142  next_char_fast();
143  while (current_ == 254) {
144  skip_comment();
145  next_char_fast();
146  }
147  } while (is_alnum(current_) || current_ == '$');
148  } else {
150  token_.value += current_;
151  next_char();
152  }
153  return token_;
154  }
155 
156  if (current_ != EOF)
157  next_char();
158 
159  return token_;
160 }
161 
162 bool tokenizer::skip_command(char const *cmd)
163 {
164  for (; *cmd; ++cmd) {
165  next_char_fast();
166  if (current_ != *cmd) return false;
167  }
168  next_char_fast();
169  if (!is_space(current_)) return false;
170  next_char_fast();
171  return true;
172 }
173 
175 {
176  next_char_fast();
177  if (current_ == '\n' || current_ == EOF) return;
178  std::string *dst = nullptr;
179 
180  if (current_ == 't')
181  {
182  if (!skip_command("extdomain")) goto fail;
183  dst = &textdomain_;
184  }
185  else if (current_ == 'l')
186  {
187  if (!skip_command("ine")) goto fail;
188  lineno_ = 0;
189  while (is_num(current_)) {
190  lineno_ = lineno_ * 10 + (current_ - '0');
191  next_char_fast();
192  }
193  if (!is_space(current_)) goto fail;
194  next_char_fast();
195  dst = &file_;
196  }
197  else
198  {
199  fail:
200  while (current_ != '\n' && current_ != EOF) {
201  next_char_fast();
202  }
203  return;
204  }
205 
206  dst->clear();
207  while (current_ != '\n' && current_ != EOF) {
208  *dst += current_;
209  next_char_fast();
210  }
211 }
void skip_comment()
Definition: tokenizer.cpp:174
void next_char()
Definition: tokenizer.hpp:97
bool is_space(int c) const
Definition: tokenizer.hpp:146
int startlineno_
Definition: tokenizer.hpp:95
void next_char_fast()
Definition: tokenizer.hpp:104
const token & next_token()
Definition: tokenizer.cpp:51
unsigned in
If equal to search_counter, the node is off the list.
token_type
Definition: tokenizer.hpp:33
std::string file_
Definition: tokenizer.hpp:170
int lineno_
Definition: tokenizer.hpp:94
bool is_alnum(int c) const
Definition: tokenizer.hpp:156
#define PACKAGE
Definition: wesconfig.h:23
Some defines: VERSION, PACKAGE, MIN_SAVEGAME_VERSION.
char char_types_[128]
Definition: tokenizer.hpp:176
int peek_char()
Definition: tokenizer.hpp:128
double t
Definition: astarsearch.cpp:65
std::string textdomain_
Definition: tokenizer.hpp:169
int current_
Definition: tokenizer.hpp:93
bool is_num(int c) const
Definition: tokenizer.hpp:151
buffered_istream in_
Definition: tokenizer.hpp:175
std::istream & stream()
Returns the owned stream.
token token_
Definition: tokenizer.hpp:171
mock_char c
bool skip_command(char const *cmd)
Returns true if the next characters are the one from cmd followed by a space.
Definition: tokenizer.cpp:162
std::string value
Definition: tokenizer.hpp:52
token_type type
Definition: tokenizer.hpp:51