The Battle for Wesnoth  1.19.0-dev
string_utils.cpp
Go to the documentation of this file.
1 /*
2  Copyright (C) 2005 - 2024
3  by Guillaume Melquiond <guillaume.melquiond@gmail.com>
4  Copyright (C) 2003 by David White <dave@whitevine.net>
5  Part of the Battle for Wesnoth Project https://www.wesnoth.org/
6 
7  This program is free software; you can redistribute it and/or modify
8  it under the terms of the GNU General Public License as published by
9  the Free Software Foundation; either version 2 of the License, or
10  (at your option) any later version.
11  This program is distributed in the hope that it will be useful,
12  but WITHOUT ANY WARRANTY.
13 
14  See the COPYING file for more details.
15 */
16 
17 #define GETTEXT_DOMAIN "wesnoth-lib"
18 
19 #include "formula/string_utils.hpp"
20 #include "variable.hpp"
21 
22 #include "config.hpp"
23 #include "log.hpp"
24 #include "gettext.hpp"
25 
26 #include <algorithm>
27 #include <array>
28 #include <utility>
29 
30 static lg::log_domain log_engine("engine");
31 #define ERR_NG LOG_STREAM(err, log_engine)
32 #define WRN_NG LOG_STREAM(warn, log_engine)
33 
34 static bool two_dots(char a, char b) { return a == '.' && b == '.'; }
35 
36 namespace utils {
37 
38  namespace detail {
39  std::string(* evaluate_formula)(const std::string& formula) = nullptr;
40  }
41 
42 template <typename T>
44 {
45 public:
46  string_map_variable_set(const std::map<std::string,T>& map) : map_(map) {}
47 
48  virtual config::attribute_value get_variable_const(const std::string &key) const
49  {
51  const auto itor = map_.find(key);
52  if (itor != map_.end())
53  val = itor->second;
54  return val;
55  }
56  virtual variable_access_const get_variable_access_read(const std::string& varname) const
57  {
58  temp_.reset(new config);
59  for(const auto& p : map_) {
60  temp_->insert(p.first, p.second);
61  }
62  return variable_access_const(varname, *temp_);
63  }
64 private:
65  const std::map<std::string,T>& map_;
66  mutable std::shared_ptr<config> temp_; // only used if get_variable_access_read called
67 };
68 }
69 
70 static std::string do_interpolation(const std::string &str, const variable_set& set)
71 {
72  std::string res = str;
73  // This needs to be able to store negative numbers to check for the while's condition
74  // (which is only false when the previous '$' was at index 0)
75  int rfind_dollars_sign_from = res.size();
76  while(rfind_dollars_sign_from >= 0) {
77  // Going in a backwards order allows nested variable-retrieval, e.g. in arrays.
78  // For example, "I am $creatures[$i].user_description!"
79  const std::string::size_type var_begin_loc = res.rfind('$', rfind_dollars_sign_from);
80 
81  // If there are no '$' left then we're done.
82  if(var_begin_loc == std::string::npos) {
83  break;
84  }
85 
86  // For the next iteration of the loop, search for more '$'
87  // (not from the same place because sometimes the '$' is not replaced)
88  rfind_dollars_sign_from = static_cast<int>(var_begin_loc) - 1;
89 
90 
91  const std::string::iterator var_begin = res.begin() + var_begin_loc;
92 
93  // The '$' is not part of the variable name.
94  const std::string::iterator var_name_begin = var_begin + 1;
95  std::string::iterator var_end = var_name_begin;
96 
97  if(var_name_begin == res.end()) {
98  // Any '$' at the end of a string is just a '$'
99  continue;
100  } else if(*var_name_begin == '(') {
101  // The $( ... ) syntax invokes a formula
102  int paren_nesting_level = 0;
103  bool in_string = false,
104  in_comment = false;
105  do {
106  switch(*var_end) {
107  case '(':
108  if(!in_string && !in_comment) {
109  ++paren_nesting_level;
110  }
111  break;
112  case ')':
113  if(!in_string && !in_comment) {
114  --paren_nesting_level;
115  }
116  break;
117  case '#':
118  if(!in_string) {
119  in_comment = !in_comment;
120  }
121  break;
122  case '\'':
123  if(!in_comment) {
124  in_string = !in_string;
125  }
126  break;
127  // TODO: support escape sequences when/if they are allowed in FormulaAI strings
128  }
129  } while(++var_end != res.end() && paren_nesting_level > 0);
130  if(utils::detail::evaluate_formula == nullptr) {
131  WRN_NG << "Formula substitution ignored (and removed) because WFL engine is not present in the server.";
132  res.replace(var_begin, var_end, "");
133  continue;
134  }
135  if(paren_nesting_level > 0) {
136  ERR_NG << "Formula in WML string cannot be evaluated due to "
137  << "a missing closing parenthesis:\n\t--> \""
138  << std::string(var_begin, var_end) << "\"";
139  res.replace(var_begin, var_end, "");
140  continue;
141  }
142  res.replace(var_begin, var_end, utils::detail::evaluate_formula(std::string(var_begin+2, var_end-1)));
143  continue;
144  }
145 
146  // Find the maximum extent of the variable name (it may be shortened later).
147  for(int bracket_nesting_level = 0; var_end != res.end(); ++var_end) {
148  const char c = *var_end;
149  if(c == '[') {
150  ++bracket_nesting_level;
151  }
152  else if(c == ']') {
153  if(--bracket_nesting_level < 0) {
154  break;
155  }
156  }
157  // isascii() breaks on mingw with -std=c++0x
158  else if (!(((c) & ~0x7f) == 0)/*isascii(c)*/ || (!isalnum(c) && c != '.' && c != '_')) {
159  break;
160  }
161  }
162 
163  // Two dots in a row cannot be part of a valid variable name.
164  // That matters for random=, e.g. $x..$y
165  var_end = std::adjacent_find(var_name_begin, var_end, two_dots);
166  // the default value is specified after ''?'
167  const std::string::iterator default_start = var_end < res.end() && *var_end == '?' ? var_end + 1 : res.end();
168 
169  // If the last character is '.', then it can't be a sub-variable.
170  // It's probably meant to be a period instead. Don't include it.
171  // Would need to do it repetitively if there are multiple '.'s at the end,
172  // but don't actually need to do so because the previous check for adjacent '.'s would catch that.
173  // For example, "My score is $score." or "My score is $score..."
174  if(*(var_end-1) == '.'
175  // However, "$array[$i]" by itself does not name a variable,
176  // so if "$array[$i]." is encountered, then best to include the '.',
177  // so that it more closely follows the syntax of a variable (if only to get rid of all of it).
178  // (If it's the script writer's error, they'll have to fix it in either case.)
179  // For example in "$array[$i].$field_name", if field_name does not exist as a variable,
180  // then the result of the expansion should be "", not "." (which it would be if this exception did not exist).
181  && *(var_end-2) != ']') {
182  --var_end;
183  }
184 
185  const std::string var_name(var_name_begin, var_end);
186  if(default_start == res.end()) {
187  if(var_end != res.end() && *var_end == '|') {
188  // It's been used to end this variable name; now it has no more effect.
189  // This can allow use of things like "$$composite_var_name|.x"
190  // (Yes, that's a WML 'pointer' of sorts. They are sometimes useful.)
191  // If there should still be a '|' there afterwards to affect other variable names (unlikely),
192  // just put another '|' there, one matching each '$', e.g. "$$var_containing_var_name||blah"
193  ++var_end;
194  }
195 
196 
197  if (var_name.empty()) {
198  // Allow for a way to have $s in a string.
199  // $| will be replaced by $.
200  res.replace(var_begin, var_end, "$");
201  }
202  else {
203  // The variable is replaced with its value.
204  res.replace(var_begin, var_end,
205  set.get_variable_const(var_name));
206  }
207  }
208  else {
209  var_end = default_start;
210  while(var_end != res.end() && *var_end != '|') {
211  ++var_end;
212  }
213  const std::string::iterator default_end = var_end;
214  const config::attribute_value& val = set.get_variable_const(var_name);
215  if(var_end == res.end()) {
216  res.replace(var_begin, default_start - 1, val);
217  }
218  else if(!val.empty()) {
219  res.replace(var_begin, var_end + 1, val);
220  }
221  else {
222  res.replace(var_begin, var_end + 1, std::string(default_start, default_end));
223  }
224  }
225  }
226 
227  return res;
228 }
229 
230 namespace utils {
231 
232 std::string interpolate_variables_into_string(const std::string &str, const string_map * const symbols)
233 {
234  auto set = string_map_variable_set<t_string>(*symbols);
235  return do_interpolation(str, set);
236 }
237 
238 std::string interpolate_variables_into_string(const std::string &str, const std::map<std::string,std::string> * const symbols)
239 {
240  auto set = string_map_variable_set<std::string>(*symbols);
241  return do_interpolation(str, set);
242 }
243 
244 std::string interpolate_variables_into_string(const std::string &str, const variable_set& variables)
245 {
246  return do_interpolation(str, variables);
247 }
248 
250 {
251  if(!tstr.str().empty()) {
252  std::string interp = utils::interpolate_variables_into_string(tstr.str(), variables);
253  if(tstr.str() != interp) {
254  return t_string(interp);
255  }
256  }
257  return tstr;
258 }
259 
260 std::string format_conjunct_list(const t_string& empty, const std::vector<t_string>& elems) {
261  switch(elems.size()) {
262  case 0: return empty;
263  case 1: return elems[0];
264  // TRANSLATORS: Formats a two-element conjunctive list.
265  case 2: return VGETTEXT("conjunct pair^$first and $second", {{"first", elems[0]}, {"second", elems[1]}});
266  }
267  // TRANSLATORS: Formats the first two elements of a conjunctive list.
268  std::string prefix = VGETTEXT("conjunct start^$first, $second", {{"first", elems[0]}, {"second", elems[1]}});
269  // For size=3 this loop is not entered
270  for(std::size_t i = 2; i < elems.size() - 1; i++) {
271  // TRANSLATORS: Formats successive elements of a conjunctive list.
272  prefix = VGETTEXT("conjunct mid^$prefix, $next", {{"prefix", prefix}, {"next", elems[i]}});
273  }
274  // TRANSLATORS: Formats the final element of a conjunctive list.
275  return VGETTEXT("conjunct end^$prefix, and $last", {{"prefix", prefix}, {"last", elems.back()}});
276 }
277 
278 std::string format_disjunct_list(const t_string& empty, const std::vector<t_string>& elems) {
279  switch(elems.size()) {
280  case 0: return empty;
281  case 1: return elems[0];
282  // TRANSLATORS: Formats a two-element disjunctive list.
283  case 2: return VGETTEXT("disjunct pair^$first or $second", {{"first", elems[0]}, {"second", elems[1]}});
284  }
285  // TRANSLATORS: Formats the first two elements of a disjunctive list.
286  std::string prefix = VGETTEXT("disjunct start^$first, $second", {{"first", elems[0]}, {"second", elems[1]}});
287  // For size=3 this loop is not entered
288  for(std::size_t i = 2; i < elems.size() - 1; i++) {
289  // TRANSLATORS: Formats successive elements of a disjunctive list.
290  prefix = VGETTEXT("disjunct mid^$prefix, $next", {{"prefix", prefix}, {"next", elems[i]}});
291  }
292  // TRANSLATORS: Formats the final element of a disjunctive list.
293  return VGETTEXT("disjunct end^$prefix, or $last", {{"prefix", prefix}, {"last", elems.back()}});
294 }
295 
296 std::string format_timespan(std::time_t time, bool detailed)
297 {
298  if(time <= 0) {
299  return _("timespan^expired");
300  }
301 
302  typedef std::tuple<std::time_t, const char*, const char*> time_factor;
303 
304  static const std::vector<time_factor> TIME_FACTORS{
305  // TRANSLATORS: The "timespan^$num xxxxx" strings originating from the same file
306  // as the string with this comment MUST be translated following the usual rules
307  // for WML variable interpolation -- that is, without including or translating
308  // the caret^ prefix, and leaving the $num variable specification intact, since
309  // it is technically code. The only translatable natural word to be found here
310  // is the time unit (year, month, etc.) For example, for French you would
311  // translate "timespan^$num years" as "$num ans", thus allowing the game UI to
312  // generate output such as "39 ans" after variable interpolation.
313  time_factor{ 31104000, N_n("timespan^$num year", "timespan^$num years") }, // 12 months
314  time_factor{ 2592000, N_n("timespan^$num month", "timespan^$num months") }, // 30 days
315  time_factor{ 604800, N_n("timespan^$num week", "timespan^$num weeks") },
316  time_factor{ 86400, N_n("timespan^$num day", "timespan^$num days") },
317  time_factor{ 3600, N_n("timespan^$num hour", "timespan^$num hours") },
318  time_factor{ 60, N_n("timespan^$num minute", "timespan^$num minutes") },
319  time_factor{ 1, N_n("timespan^$num second", "timespan^$num seconds") },
320  };
321 
322  std::vector<t_string> display_text;
323  string_map i18n;
324 
325  for(const auto& factor : TIME_FACTORS) {
326  const auto [ secs, fmt_singular, fmt_plural ] = factor;
327  const int amount = time / secs;
328 
329  if(amount) {
330  time -= secs * amount;
331  i18n["num"] = std::to_string(amount);
332  display_text.emplace_back(VNGETTEXT(fmt_singular, fmt_plural, amount, i18n));
333  if(!detailed) {
334  break;
335  }
336  }
337  }
338 
339  return format_conjunct_list(_("timespan^expired"), display_text);
340 }
341 
342 }
343 
344 std::string vgettext_impl(const char *domain
345  , const char *msgid
346  , const utils::string_map& symbols)
347 {
348  const std::string orig(translation::dsgettext(domain, msgid));
349  const std::string msg = utils::interpolate_variables_into_string(orig, &symbols);
350  return msg;
351 }
352 
353 std::string vngettext_impl(const char* domain,
354  const char* singular,
355  const char* plural,
356  int count,
357  const utils::string_map& symbols)
358 {
359  const std::string orig(translation::dsngettext(domain, singular, plural, count));
360  const std::string msg = utils::interpolate_variables_into_string(orig, &symbols);
361  return msg;
362 }
363 
364 [[nodiscard]] std::size_t edit_distance_approx(std::string_view str_1, std::string_view str_2) noexcept
365 {
366  // First, trim prefixes
367  auto s1_first = str_1.begin();
368  auto s2_first = str_2.begin();
369 
370  while(s1_first != str_1.end() && s2_first != str_2.end() && *s1_first == *s2_first) {
371  ++s1_first;
372  ++s2_first;
373  }
374 
375  // Then, trim suffixes
376  auto s1_size = static_cast<std::size_t>(str_1.end() - s1_first);
377  auto s2_size = static_cast<std::size_t>(str_2.end() - s2_first);
378 
379  while(s1_size != 0 && s2_size != 0 && s1_first[s1_size - 1] == s2_first[s2_size - 1]) {
380  --s1_size;
381  --s2_size;
382  }
383 
384  if(s1_size == 0) {
385  return s2_size;
386  }
387 
388  if(s2_size == 0) {
389  return s1_size;
390  }
391 
392  // Limit the relevant characters to no more than 15
393  s1_size = std::min(s1_size, std::size_t{15});
394  s2_size = std::min(s2_size, std::size_t{15});
395 
396  if(s1_size < s2_size) {
397  std::swap(s1_first, s2_first);
398  std::swap(s1_size, s2_size);
399  }
400 
401  // This is an 'optimal string alignment distance' algorithm
402  // (https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance#Optimal_string_alignment_distance)
403  // with some optimizations. Two variables are used to track the previous row instead of using another array.
404  // `up` handles deletion, `row[j]` handles insertion, and `upper_left` handles substitution.
405 
406  // This is a single row of the matrix
407  std::array<std::size_t, 16> row{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
408 
409  for(std::size_t i = 0; i != s1_size; ++i) {
410  auto upper_left = i;
411  row[0] = i + 1;
412 
413  for(std::size_t j = 0; j != s2_size; ++j) {
414  const auto up = row[j + 1];
415  const bool transposed = i > 0 && j > 0 && s1_first[i] == s2_first[j - 1] && s1_first[i - 1] == s2_first[j];
416 
417  if(s1_first[i] != s2_first[j] && !transposed) {
418  row[j + 1] = std::min({up, row[j], upper_left}) + 1;
419  } else {
420  row[j + 1] = upper_left;
421  }
422 
423  // When moving to the next element of a row, the previous `up` element is now the `upper_left`
424  upper_left = up;
425  }
426  }
427 
428  return row[s2_size];
429 }
Variant for storing WML attributes.
bool empty() const
Tests for an attribute that either was never set or was set to "".
A config object defines a single node in a WML file, with access to child nodes.
Definition: config.hpp:159
const std::string & str() const
Definition: tstring.hpp:190
const std::map< std::string, T > & map_
virtual config::attribute_value get_variable_const(const std::string &key) const
string_map_variable_set(const std::map< std::string, T > &map)
std::shared_ptr< config > temp_
virtual variable_access_const get_variable_access_read(const std::string &varname) const
Information on a WML variable.
void swap(config &lhs, config &rhs)
Implement non-member swap function for std::swap (calls config::swap).
Definition: config.cpp:1347
#define WRN_NG
std::string vngettext_impl(const char *domain, const char *singular, const char *plural, int count, const utils::string_map &symbols)
static lg::log_domain log_engine("engine")
static bool two_dots(char a, char b)
#define ERR_NG
std::string vgettext_impl(const char *domain, const char *msgid, const utils::string_map &symbols)
Implementation functions for the VGETTEXT and VNGETTEXT macros.
static std::string do_interpolation(const std::string &str, const variable_set &set)
std::size_t edit_distance_approx(std::string_view str_1, std::string_view str_2) noexcept
Calculate the approximate edit distance of two strings.
#define VGETTEXT(msgid,...)
Handy wrappers around interpolate_variables_into_string and gettext.
#define VNGETTEXT(msgid, msgid_plural, count,...)
std::size_t i
Definition: function.cpp:968
#define N_n(String1, String2)
Definition: gettext.hpp:102
static std::string _(const char *str)
Definition: gettext.hpp:93
Standard logging facilities (interface).
void set(CURSOR_TYPE type)
Use the default parameter to reset cursors.
Definition: cursor.cpp:176
std::string dsgettext(const char *domainname, const char *msgid)
Definition: gettext.cpp:434
std::string dsngettext(const char *domainname, const char *singular, const char *plural, int n)
Definition: gettext.cpp:464
std::string(* evaluate_formula)(const std::string &formula)
std::string interpolate_variables_into_string(const std::string &str, const string_map *const symbols)
Function which will interpolate variables, starting with '$' in the string 'str' with the equivalent ...
std::string format_timespan(std::time_t time, bool detailed)
Formats a timespan into human-readable text for player authentication functions.
t_string interpolate_variables_into_tstring(const t_string &tstr, const variable_set &variables)
Function that does the same as the above, for t_stringS.
std::string format_disjunct_list(const t_string &empty, const std::vector< t_string > &elems)
Format a disjunctive list.
std::string format_conjunct_list(const t_string &empty, const std::vector< t_string > &elems)
Format a conjunctive list.
std::map< std::string, t_string > string_map
std::string::const_iterator iterator
Definition: tokenizer.hpp:25
static void msg(const char *act, debug_info &i, const char *to="", const char *result="")
Definition: debugger.cpp:109
mock_char c
mock_party p
variable_info< const variable_info_implementation::vi_policy_const > variable_access_const
Read-only access.
#define a
#define b