The Battle for Wesnoth  1.15.2+dev
tokenizer.cpp
Go to the documentation of this file.
1 /*
2  Copyright (C) 2004 - 2009 by Philippe Plantier <ayin@anathas.org>
3  Copyright (C) 2010 - 2018 by Guillaume Melquiond <guillaume.melquiond@gmail.com>
4  Part of the Battle for Wesnoth Project https://www.wesnoth.org
5 
6  This program is free software; you can redistribute it and/or modify
7  it under the terms of the GNU General Public License as published by
8  the Free Software Foundation; either version 2 of the License, or
9  (at your option) any later version.
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY.
12 
13  See the COPYING file for more details.
14 */
15 
16 /** @file */
17 
18 #include "global.hpp"
20 #include "wesconfig.h"
21 
22 tokenizer::tokenizer(std::istream& in) :
23  current_(EOF),
24  lineno_(1),
25  startlineno_(0),
26  textdomain_(PACKAGE),
27  file_(),
28  token_(),
29  in_(in)
30 {
31  for (int c = 0; c < 128; ++c)
32  {
33  int t = 0;
34  if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_') {
35  t = TOK_ALPHA;
36  } else if (c >= '0' && c <= '9') {
37  t = TOK_NUMERIC;
38  } else if (c == ' ' || c == '\t') {
39  t = TOK_SPACE;
40  }
41  char_types_[c] = t;
42  }
43  in_.stream().exceptions(std::ios_base::badbit);
45 }
46 
48 {
49  in_.stream().clear(std::ios_base::goodbit);
50  in_.stream().exceptions(std::ios_base::goodbit);
51 }
52 
54 {
55 #ifdef DEBUG_TOKENIZER
56  previous_token_ = token_;
57 #endif
58  token_.value.clear();
59 
60  // Dump spaces and inlined comments
61  for(;;)
62  {
63  while (is_space(current_)) {
65  }
66  if (current_ != 254)
67  break;
68  skip_comment();
69  // skip the line end
71  }
72 
73  if (current_ == '#')
74  skip_comment();
75 
77 
78  switch(current_) {
79  case EOF:
81  break;
82 
83  case '<':
84  if (peek_char() != '<') {
87  break;
88  }
91  for (;;) {
92  next_char();
93  if (current_ == EOF) {
95  break;
96  }
97  if (current_ == '>' && peek_char() == '>') {
99  break;
100  }
101  token_.value += current_;
102  }
103  break;
104 
105  case '"':
107  for (;;) {
108  next_char();
109  if (current_ == EOF) {
111  break;
112  }
113  if (current_ == '"') {
114  if (peek_char() != '"') break;
115  next_char_fast();
116  }
117  if (current_ == 254) {
118  skip_comment();
119  --lineno_;
120  continue;
121  }
122  token_.value += current_;
123  }
124  break;
125 
126  case '[': case ']': case '/': case '\n': case '=': case ',': case '+':
129  break;
130 
131  case '_':
132  if (!is_alnum(peek_char())) {
135  break;
136  }
137  FALLTHROUGH;
138 
139  default:
140  if (is_alnum(current_) || current_ == '$') {
142  do {
143  token_.value += current_;
144  next_char_fast();
145  while (current_ == 254) {
146  skip_comment();
147  next_char_fast();
148  }
149  } while (is_alnum(current_) || current_ == '$');
150  } else {
152  token_.value += current_;
153  next_char();
154  }
155  return token_;
156  }
157 
158  if (current_ != EOF)
159  next_char();
160 
161  return token_;
162 }
163 
164 bool tokenizer::skip_command(char const *cmd)
165 {
166  for (; *cmd; ++cmd) {
167  next_char_fast();
168  if (current_ != *cmd) return false;
169  }
170  next_char_fast();
171  if (!is_space(current_)) return false;
172  next_char_fast();
173  return true;
174 }
175 
177 {
178  next_char_fast();
179  if (current_ == '\n' || current_ == EOF) return;
180  std::string *dst = nullptr;
181 
182  if (current_ == 't')
183  {
184  if (!skip_command("extdomain")) goto fail;
185  dst = &textdomain_;
186  }
187  else if (current_ == 'l')
188  {
189  if (!skip_command("ine")) goto fail;
190  lineno_ = 0;
191  while (is_num(current_)) {
192  lineno_ = lineno_ * 10 + (current_ - '0');
193  next_char_fast();
194  }
195  if (!is_space(current_)) goto fail;
196  next_char_fast();
197  dst = &file_;
198  }
199  else
200  {
201  fail:
202  while (current_ != '\n' && current_ != EOF) {
203  next_char_fast();
204  }
205  return;
206  }
207 
208  dst->clear();
209  while (current_ != '\n' && current_ != EOF) {
210  *dst += current_;
211  next_char_fast();
212  }
213 }
void skip_comment()
Definition: tokenizer.cpp:176
void next_char()
Definition: tokenizer.hpp:96
bool is_space(int c) const
Definition: tokenizer.hpp:145
int startlineno_
Definition: tokenizer.hpp:94
void next_char_fast()
Definition: tokenizer.hpp:103
const token & next_token()
Definition: tokenizer.cpp:53
unsigned in
If equal to search_counter, the node is off the list.
token_type
Definition: tokenizer.hpp:32
std::string file_
Definition: tokenizer.hpp:169
int lineno_
Definition: tokenizer.hpp:93
bool is_alnum(int c) const
Definition: tokenizer.hpp:155
#define PACKAGE
Definition: wesconfig.h:23
Some defines: VERSION, PACKAGE, MIN_SAVEGAME_VERSION.
char char_types_[128]
Definition: tokenizer.hpp:175
int peek_char()
Definition: tokenizer.hpp:127
double t
Definition: astarsearch.cpp:64
std::string textdomain_
Definition: tokenizer.hpp:168
int current_
Definition: tokenizer.hpp:92
bool is_num(int c) const
Definition: tokenizer.hpp:150
buffered_istream in_
Definition: tokenizer.hpp:174
std::istream & stream()
Returns the owned stream.
token token_
Definition: tokenizer.hpp:170
mock_char c
bool skip_command(char const *cmd)
Returns true if the next characters are the one from cmd followed by a space.
Definition: tokenizer.cpp:164
std::string value
Definition: tokenizer.hpp:51
token_type type
Definition: tokenizer.hpp:50