The Battle for Wesnoth  1.19.7+dev
string_utils.cpp
Go to the documentation of this file.
1 /*
2  Copyright (C) 2005 - 2024
3  by Guillaume Melquiond <guillaume.melquiond@gmail.com>
4  Copyright (C) 2003 by David White <dave@whitevine.net>
5  Part of the Battle for Wesnoth Project https://www.wesnoth.org/
6 
7  This program is free software; you can redistribute it and/or modify
8  it under the terms of the GNU General Public License as published by
9  the Free Software Foundation; either version 2 of the License, or
10  (at your option) any later version.
11  This program is distributed in the hope that it will be useful,
12  but WITHOUT ANY WARRANTY.
13 
14  See the COPYING file for more details.
15 */
16 
17 #define GETTEXT_DOMAIN "wesnoth-lib"
18 
19 #include "formula/string_utils.hpp"
20 #include "variable.hpp"
21 
22 #include "config.hpp"
23 #include "log.hpp"
24 #include "gettext.hpp"
25 
26 #include <algorithm>
27 #include <array>
28 #include <utility>
29 
30 static lg::log_domain log_engine("engine");
31 #define ERR_NG LOG_STREAM(err, log_engine)
32 #define WRN_NG LOG_STREAM(warn, log_engine)
33 
34 static bool two_dots(char a, char b) { return a == '.' && b == '.'; }
35 
36 namespace utils {
37 
38  namespace detail {
39  std::string(* evaluate_formula)(const std::string& formula) = nullptr;
40  }
41 
42 template <typename T>
44 {
45 public:
46  string_map_variable_set(const std::map<std::string,T>& map) : map_(map) {}
47 
48  virtual config::attribute_value get_variable_const(const std::string &key) const
49  {
51  const auto itor = map_.find(key);
52  if (itor != map_.end())
53  val = itor->second;
54  return val;
55  }
56  virtual variable_access_const get_variable_access_read(const std::string& varname) const
57  {
58  temp_.reset(new config);
59  for(const auto& p : map_) {
60  temp_->insert(p.first, p.second);
61  }
62  return variable_access_const(varname, *temp_);
63  }
64 private:
65  const std::map<std::string,T>& map_;
66  mutable std::shared_ptr<config> temp_; // only used if get_variable_access_read called
67 };
68 }
69 
70 static std::string do_interpolation(const std::string &str, const variable_set& set)
71 {
72  std::string res = str;
73  // This needs to be able to store negative numbers to check for the while's condition
74  // (which is only false when the previous '$' was at index 0)
75  int rfind_dollars_sign_from = res.size();
76  while(rfind_dollars_sign_from >= 0) {
77  // Going in a backwards order allows nested variable-retrieval, e.g. in arrays.
78  // For example, "I am $creatures[$i].user_description!"
79  const std::string::size_type var_begin_loc = res.rfind('$', rfind_dollars_sign_from);
80 
81  // If there are no '$' left then we're done.
82  if(var_begin_loc == std::string::npos) {
83  break;
84  }
85 
86  // For the next iteration of the loop, search for more '$'
87  // (not from the same place because sometimes the '$' is not replaced)
88  rfind_dollars_sign_from = static_cast<int>(var_begin_loc) - 1;
89 
90 
91  const std::string::iterator var_begin = res.begin() + var_begin_loc;
92 
93  // The '$' is not part of the variable name.
94  const std::string::iterator var_name_begin = var_begin + 1;
95  std::string::iterator var_end = var_name_begin;
96 
97  if(var_name_begin == res.end()) {
98  // Any '$' at the end of a string is just a '$'
99  continue;
100  } else if(*var_name_begin == '(') {
101  // The $( ... ) syntax invokes a formula
102  int paren_nesting_level = 0;
103  bool in_string = false,
104  in_comment = false;
105  do {
106  switch(*var_end) {
107  case '(':
108  if(!in_string && !in_comment) {
109  ++paren_nesting_level;
110  }
111  break;
112  case ')':
113  if(!in_string && !in_comment) {
114  --paren_nesting_level;
115  }
116  break;
117  case '#':
118  if(!in_string) {
119  in_comment = !in_comment;
120  }
121  break;
122  case '\'':
123  if(!in_comment) {
124  in_string = !in_string;
125  }
126  break;
127  // TODO: support escape sequences when/if they are allowed in FormulaAI strings
128  }
129  } while(++var_end != res.end() && paren_nesting_level > 0);
130  if(utils::detail::evaluate_formula == nullptr) {
131  WRN_NG << "Formula substitution ignored (and removed) because WFL engine is not present in the server.";
132  res.replace(var_begin, var_end, "");
133  continue;
134  }
135  if(paren_nesting_level > 0) {
136  ERR_NG << "Formula in WML string cannot be evaluated due to "
137  << "a missing closing parenthesis:\n\t--> \""
138  << std::string(var_begin, var_end) << "\"";
139  res.replace(var_begin, var_end, "");
140  continue;
141  }
142  res.replace(var_begin, var_end, utils::detail::evaluate_formula(std::string(var_begin+2, var_end-1)));
143  continue;
144  }
145 
146  // Find the maximum extent of the variable name (it may be shortened later).
147  for(int bracket_nesting_level = 0; var_end != res.end(); ++var_end) {
148  const char c = *var_end;
149  if(c == '[') {
150  ++bracket_nesting_level;
151  }
152  else if(c == ']') {
153  if(--bracket_nesting_level < 0) {
154  break;
155  }
156  }
157  // isascii() breaks on mingw with -std=c++0x
158  else if (!(((c) & ~0x7f) == 0)/*isascii(c)*/ || (!isalnum(c) && c != '.' && c != '_')) {
159  break;
160  }
161  }
162 
163  // Two dots in a row cannot be part of a valid variable name.
164  // That matters for random=, e.g. $x..$y
165  var_end = std::adjacent_find(var_name_begin, var_end, two_dots);
166  // the default value is specified after ''?'
167  const std::string::iterator default_start = var_end < res.end() && *var_end == '?' ? var_end + 1 : res.end();
168 
169  // If the last character is '.', then it can't be a sub-variable.
170  // It's probably meant to be a period instead. Don't include it.
171  // Would need to do it repetitively if there are multiple '.'s at the end,
172  // but don't actually need to do so because the previous check for adjacent '.'s would catch that.
173  // For example, "My score is $score." or "My score is $score..."
174  if(*(var_end-1) == '.'
175  // However, "$array[$i]" by itself does not name a variable,
176  // so if "$array[$i]." is encountered, then best to include the '.',
177  // so that it more closely follows the syntax of a variable (if only to get rid of all of it).
178  // (If it's the script writer's error, they'll have to fix it in either case.)
179  // For example in "$array[$i].$field_name", if field_name does not exist as a variable,
180  // then the result of the expansion should be "", not "." (which it would be if this exception did not exist).
181  && *(var_end-2) != ']') {
182  --var_end;
183  }
184 
185  const std::string var_name(var_name_begin, var_end);
186  if(default_start == res.end()) {
187  if(var_end != res.end() && *var_end == '|') {
188  // It's been used to end this variable name; now it has no more effect.
189  // This can allow use of things like "$$composite_var_name|.x"
190  // (Yes, that's a WML 'pointer' of sorts. They are sometimes useful.)
191  // If there should still be a '|' there afterwards to affect other variable names (unlikely),
192  // just put another '|' there, one matching each '$', e.g. "$$var_containing_var_name||blah"
193  ++var_end;
194  }
195 
196 
197  if (var_name.empty()) {
198  // Allow for a way to have $s in a string.
199  // $| will be replaced by $.
200  res.replace(var_begin, var_end, "$");
201  }
202  else {
203  // The variable is replaced with its value.
204  res.replace(var_begin, var_end,
205  set.get_variable_const(var_name));
206  }
207  }
208  else {
209  var_end = default_start;
210  while(var_end != res.end() && *var_end != '|') {
211  ++var_end;
212  }
213  const std::string::iterator default_end = var_end;
214  const config::attribute_value& val = set.get_variable_const(var_name);
215  if(var_end == res.end()) {
216  res.replace(var_begin, default_start - 1, val);
217  }
218  else if(!val.empty()) {
219  res.replace(var_begin, var_end + 1, val);
220  }
221  else {
222  res.replace(var_begin, var_end + 1, std::string(default_start, default_end));
223  }
224  }
225  }
226 
227  return res;
228 }
229 
230 namespace utils {
231 
232 std::string interpolate_variables_into_string(const std::string &str, const string_map * const symbols)
233 {
234  auto set = string_map_variable_set<t_string>(*symbols);
235  return do_interpolation(str, set);
236 }
237 
238 std::string interpolate_variables_into_string(const std::string &str, const std::map<std::string,std::string> * const symbols)
239 {
240  auto set = string_map_variable_set<std::string>(*symbols);
241  return do_interpolation(str, set);
242 }
243 
244 std::string interpolate_variables_into_string(const std::string &str, const variable_set& variables)
245 {
246  return do_interpolation(str, variables);
247 }
248 
250 {
251  if(!tstr.str().empty()) {
252  std::string interp = utils::interpolate_variables_into_string(tstr.str(), variables);
253  if(tstr.str() != interp) {
254  return t_string(interp);
255  }
256  }
257  return tstr;
258 }
259 
260 std::string format_conjunct_list(const t_string& empty, const std::vector<t_string>& elems) {
261  switch(elems.size()) {
262  case 0: return empty;
263  case 1: return elems[0];
264  // TRANSLATORS: Formats a two-element conjunctive list.
265  case 2: return VGETTEXT("conjunct pair^$first and $second", {{"first", elems[0]}, {"second", elems[1]}});
266  }
267  // TRANSLATORS: Formats the first two elements of a conjunctive list.
268  std::string prefix = VGETTEXT("conjunct start^$first, $second", {{"first", elems[0]}, {"second", elems[1]}});
269  // For size=3 this loop is not entered
270  for(std::size_t i = 2; i < elems.size() - 1; i++) {
271  // TRANSLATORS: Formats successive elements of a conjunctive list.
272  prefix = VGETTEXT("conjunct mid^$prefix, $next", {{"prefix", prefix}, {"next", elems[i]}});
273  }
274  // TRANSLATORS: Formats the final element of a conjunctive list.
275  return VGETTEXT("conjunct end^$prefix, and $last", {{"prefix", prefix}, {"last", elems.back()}});
276 }
277 
278 std::string format_disjunct_list(const t_string& empty, const std::vector<t_string>& elems) {
279  switch(elems.size()) {
280  case 0: return empty;
281  case 1: return elems[0];
282  // TRANSLATORS: Formats a two-element disjunctive list.
283  case 2: return VGETTEXT("disjunct pair^$first or $second", {{"first", elems[0]}, {"second", elems[1]}});
284  }
285  // TRANSLATORS: Formats the first two elements of a disjunctive list.
286  std::string prefix = VGETTEXT("disjunct start^$first, $second", {{"first", elems[0]}, {"second", elems[1]}});
287  // For size=3 this loop is not entered
288  for(std::size_t i = 2; i < elems.size() - 1; i++) {
289  // TRANSLATORS: Formats successive elements of a disjunctive list.
290  prefix = VGETTEXT("disjunct mid^$prefix, $next", {{"prefix", prefix}, {"next", elems[i]}});
291  }
292  // TRANSLATORS: Formats the final element of a disjunctive list.
293  return VGETTEXT("disjunct end^$prefix, or $last", {{"prefix", prefix}, {"last", elems.back()}});
294 }
295 
296 }
297 
298 std::string vgettext_impl(const char *domain
299  , const char *msgid
300  , const utils::string_map& symbols)
301 {
302  const std::string orig(translation::dsgettext(domain, msgid));
303  const std::string msg = utils::interpolate_variables_into_string(orig, &symbols);
304  return msg;
305 }
306 
307 std::string vngettext_impl(const char* domain,
308  const char* singular,
309  const char* plural,
310  int count,
311  const utils::string_map& symbols)
312 {
313  const std::string orig(translation::dsngettext(domain, singular, plural, count));
314  const std::string msg = utils::interpolate_variables_into_string(orig, &symbols);
315  return msg;
316 }
317 
318 [[nodiscard]] std::size_t edit_distance_approx(std::string_view str_1, std::string_view str_2) noexcept
319 {
320  // First, trim prefixes
321  auto s1_first = str_1.begin();
322  auto s2_first = str_2.begin();
323 
324  while(s1_first != str_1.end() && s2_first != str_2.end() && *s1_first == *s2_first) {
325  ++s1_first;
326  ++s2_first;
327  }
328 
329  // Then, trim suffixes
330  auto s1_size = static_cast<std::size_t>(str_1.end() - s1_first);
331  auto s2_size = static_cast<std::size_t>(str_2.end() - s2_first);
332 
333  while(s1_size != 0 && s2_size != 0 && s1_first[s1_size - 1] == s2_first[s2_size - 1]) {
334  --s1_size;
335  --s2_size;
336  }
337 
338  if(s1_size == 0) {
339  return s2_size;
340  }
341 
342  if(s2_size == 0) {
343  return s1_size;
344  }
345 
346  // Limit the relevant characters to no more than 15
347  s1_size = std::min(s1_size, std::size_t{15});
348  s2_size = std::min(s2_size, std::size_t{15});
349 
350  if(s1_size < s2_size) {
351  std::swap(s1_first, s2_first);
352  std::swap(s1_size, s2_size);
353  }
354 
355  // This is an 'optimal string alignment distance' algorithm
356  // (https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance#Optimal_string_alignment_distance)
357  // with some optimizations. Two variables are used to track the previous row instead of using another array.
358  // `up` handles deletion, `row[j]` handles insertion, and `upper_left` handles substitution.
359 
360  // This is a single row of the matrix
361  std::array<std::size_t, 16> row{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
362 
363  for(std::size_t i = 0; i != s1_size; ++i) {
364  auto upper_left = i;
365  row[0] = i + 1;
366 
367  for(std::size_t j = 0; j != s2_size; ++j) {
368  const auto up = row[j + 1];
369  const bool transposed = i > 0 && j > 0 && s1_first[i] == s2_first[j - 1] && s1_first[i - 1] == s2_first[j];
370 
371  if(s1_first[i] != s2_first[j] && !transposed) {
372  row[j + 1] = std::min({up, row[j], upper_left}) + 1;
373  } else {
374  row[j + 1] = upper_left;
375  }
376 
377  // When moving to the next element of a row, the previous `up` element is now the `upper_left`
378  upper_left = up;
379  }
380  }
381 
382  return row[s2_size];
383 }
Variant for storing WML attributes.
bool empty() const
Tests for an attribute that either was never set or was set to "".
A config object defines a single node in a WML file, with access to child nodes.
Definition: config.hpp:172
const std::string & str() const
Definition: tstring.hpp:198
const std::map< std::string, T > & map_
virtual config::attribute_value get_variable_const(const std::string &key) const
string_map_variable_set(const std::map< std::string, T > &map)
std::shared_ptr< config > temp_
virtual variable_access_const get_variable_access_read(const std::string &varname) const
Information on a WML variable.
void swap(config &lhs, config &rhs)
Implement non-member swap function for std::swap (calls config::swap).
Definition: config.cpp:1343
Definitions for the interface to Wesnoth Markup Language (WML).
#define WRN_NG
std::string vngettext_impl(const char *domain, const char *singular, const char *plural, int count, const utils::string_map &symbols)
static lg::log_domain log_engine("engine")
static bool two_dots(char a, char b)
#define ERR_NG
std::string vgettext_impl(const char *domain, const char *msgid, const utils::string_map &symbols)
Implementation functions for the VGETTEXT and VNGETTEXT macros.
static std::string do_interpolation(const std::string &str, const variable_set &set)
std::size_t edit_distance_approx(std::string_view str_1, std::string_view str_2) noexcept
Calculate the approximate edit distance of two strings.
#define VGETTEXT(msgid,...)
Handy wrappers around interpolate_variables_into_string and gettext.
std::size_t i
Definition: function.cpp:1029
Standard logging facilities (interface).
void set(CURSOR_TYPE type)
Use the default parameter to reset cursors.
Definition: cursor.cpp:176
std::string dsgettext(const char *domainname, const char *msgid)
Definition: gettext.cpp:434
std::string dsngettext(const char *domainname, const char *singular, const char *plural, int n)
Definition: gettext.cpp:464
std::string(* evaluate_formula)(const std::string &formula)
std::string interpolate_variables_into_string(const std::string &str, const string_map *const symbols)
Function which will interpolate variables, starting with '$' in the string 'str' with the equivalent ...
t_string interpolate_variables_into_tstring(const t_string &tstr, const variable_set &variables)
Function that does the same as the above, for t_stringS.
std::string format_disjunct_list(const t_string &empty, const std::vector< t_string > &elems)
Format a disjunctive list.
std::string format_conjunct_list(const t_string &empty, const std::vector< t_string > &elems)
Format a conjunctive list.
std::map< std::string, t_string > string_map
std::string::const_iterator iterator
Definition: tokenizer.hpp:25
static void msg(const char *act, debug_info &i, const char *to="", const char *result="")
Definition: debugger.cpp:109
mock_char c
mock_party p
variable_info< const variable_info_implementation::vi_policy_const > variable_access_const
Read-only access.
#define b