The Battle for Wesnoth  1.19.15+dev
string_utils.hpp
Go to the documentation of this file.
1 /*
2  Copyright (C) 2005 - 2025
3  by Philippe Plantier <ayin@anathas.org>
4  Copyright (C) 2005 by Guillaume Melquiond <guillaume.melquiond@gmail.com>
5  Copyright (C) 2003 by David White <dave@whitevine.net>
6  Part of the Battle for Wesnoth Project https://www.wesnoth.org/
7 
8  This program is free software; you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation; either version 2 of the License, or
11  (at your option) any later version.
12  This program is distributed in the hope that it will be useful,
13  but WITHOUT ANY WARRANTY.
14 
15  See the COPYING file for more details.
16 */
17 
18 #pragma once
19 
20 #include "font/constants.hpp"
21 
22 #include <algorithm>
23 #include <map>
24 #include <set>
25 #include <sstream>
26 #include <string>
27 #include <string_view>
28 #include <utility>
29 #include <vector>
30 
31 class t_string;
32 
33 namespace utils {
34 
35 using string_map = std::map<std::string, t_string>;
36 
37 const std::vector<std::string> res_order = {"blade", "pierce", "impact", "fire", "cold", "arcane"};
38 
39 struct res_compare {
40  /** Returns whether a < b, considering res_order. */
41  bool operator()(const std::string& a, const std::string& b) const {
42  for(const std::string& r : res_order) {
43  if (b == r) // this means b <= a, so a < b is false
44  return false;
45  if (a == r)
46  return true;
47  }
48  return a < b; // fallback only reached when neither a nor b occur in res_order
49  }
50 };
51 
52 using string_map_res = std::map<std::string, t_string, res_compare>;
53 
54 bool isnewline(const char c);
55 bool portable_isspace(const char c);
56 bool notspace(char c);
57 
58 enum {
59  REMOVE_EMPTY = 0x01, /** REMOVE_EMPTY: remove empty elements. */
60  STRIP_SPACES = 0x02 /** STRIP_SPACES: strips leading and trailing blank spaces. */
61 };
62 
63 void trim(std::string_view& s);
64 
65 template<typename F>
66 void split_foreach_impl(std::string_view s, char sep, const F& f)
67 {
68  if(s.empty()) {
69  return;
70  }
71  while(true)
72  {
73  std::size_t partend = s.find(sep);
74  if(partend == std::string_view::npos) {
75  break;
76  }
77  f(s.substr(0, partend));
78  s.remove_prefix(partend + 1);
79  }
80  f(s);
81 }
82 
83 template<typename F>
84 void split_foreach(std::string_view s, char sep, const int flags, const F& f)
85 {
86  split_foreach_impl(s, sep, [&](std::string_view item) {
87  if(flags & STRIP_SPACES) {
88  trim(item);
89  }
90  if(!(flags & REMOVE_EMPTY) || !item.empty()) {
91  f(item);
92  }
93  });
94 }
95 
96 /** Splits a (comma-)separated string into a vector of pieces. */
97 std::vector<std::string> split(std::string_view val, const char c = ',', const int flags = REMOVE_EMPTY | STRIP_SPACES);
98 std::set<std::string> split_set(std::string_view val, const char c = ',', const int flags = REMOVE_EMPTY | STRIP_SPACES);
99 
100 std::vector<std::string_view> split_view(std::string_view val, const char c = ',', const int flags = REMOVE_EMPTY | STRIP_SPACES);
101 
102 /** You cannot take a non-owning view to a temporary string! */
103 std::vector<std::string_view> split_view(std::string&&) = delete;
104 
105 /**
106  * This function is identical to split(), except it does not split when it otherwise would if the
107  * previous character was identical to the parameter 'quote' (i.e. it does not split quoted commas).
108  * This method was added to make it possible to quote user input, particularly so commas in user input
109  * would not cause visual problems in menus.
110  *
111  * @todo Why not change split()? That would change the methods post condition.
112  */
113 std::vector<std::string> quoted_split(const std::string& val, char c= ',', int flags = REMOVE_EMPTY | STRIP_SPACES, char quote = '\\');
114 
115 /**
116  * Splits a string based on two separators into a map.
117  *
118  * Major: the separator between elements of the map
119  * Minor: the separator between keys and values in one element
120  *
121  * For example, the string 'a:b,c:d,e:f' would be parsed into:
122  * a => b
123  * c => d
124  * e => f
125  */
126 std::map<std::string, std::string> map_split(
127  const std::string& val,
128  char major = ',',
129  char minor = ':',
130  int flags = REMOVE_EMPTY | STRIP_SPACES,
131  const std::string& default_value = "");
132 
133 /**
134  * Splits a string based either on a separator, except then the text appears within specified parenthesis.
135  *
136  * If the separator is "0" (default), it splits a string into an odd number of parts:
137  * - The part before the first '(',
138  * - the part between the first '('
139  * - and the matching right ')', etc ...
140  * and the remainder of the string.
141  *
142  * Note that one can use the same character for both the left and right parenthesis, which usually makes
143  * the most sense for this function.
144  *
145  * Note that this will find the first matching char in the left string and match against the corresponding
146  * char in the right string. A correctly processed string should return a vector with an odd number of
147  * elements. Empty elements are never removed as they are placeholders, hence REMOVE EMPTY only works for
148  * the separator split.
149  *
150  * INPUT: ("a(b)c{d}e(f{g})h", 0, "({", ")}")
151  * RETURNS: {"a", "b", "c", "d", "e", "f{g}", "h"}
152  */
153 std::vector< std::string > parenthetical_split(
154  std::string_view val,
155  const char separator = 0,
156  std::string_view left = "(",
157  std::string_view right = ")",
158  const int flags = REMOVE_EMPTY | STRIP_SPACES);
159 
160 /**
161  * Similar to parenthetical_split, but also expands embedded square brackets.
162  *
163  * Notes:
164  * - The Separator must be specified and number of entries in each square bracket must match in each section.
165  * - Leading zeros are preserved if specified between square brackets.
166  * - An asterisk as in [a*n] indicates to expand 'a' n times
167  *
168  * This is useful for expanding animation WML code.
169  *
170  * Examples:
171  *
172  * INPUT: ("a[1~3](1,[5,6,7]),b[8,9]", ",")
173  * RETURNS: {"a1(1,5)", "a2(1,6)", "a3(1,7)", "b8", "b9"}
174  *
175  * INPUT: ("abc[07~10]")
176  * RETURNS: {"abc07", "abc08", "abc09", "abc10"}
177  *
178  * INPUT: ("a[1,2]b[3~4]:c[5,6]")
179  * RETURNS: {"a1b3:c5", "a2b4:c6"}
180  *
181  * INPUT: ("abc[3~1].png")
182  * RETURNS: {"abc3.png", "abc2.png", "abc1.png"}
183  *
184  * INPUT: ("abc[3,1].png")
185  * RETURNS: {"abc3.png", "abc1.png"}
186  *
187  * INPUT: ("abc[de,xyz]")
188  * RETURNS: {"abcde", "abcxyz"}
189  *
190  * INPUT: ("abc[1*3]")
191  * RETURNS: {"abc1", "abc1", "abc1"}
192  */
193 std::vector<std::string> square_parenthetical_split(
194  const std::string& val,
195  const char separator = ',',
196  const std::string& left = "([",
197  const std::string& right = ")]",
198  const int flags = REMOVE_EMPTY | STRIP_SPACES);
199 
200 /**
201  * Generates a new string joining container items in a list.
202  *
203  * @param v A container with elements.
204  * @param s List delimiter.
205  */
206 template <typename T>
207 std::string join(const T& v, const std::string& s = ",")
208 {
209  std::stringstream str;
210 
211  for(typename T::const_iterator i = v.begin(); i != v.end(); ++i) {
212  str << *i;
213  if(std::next(i) != v.end()) {
214  str << s;
215  }
216  }
217 
218  return str.str();
219 }
220 
221 template <typename T>
222 std::string join_map(
223  const T& v,
224  const std::string& major = ",",
225  const std::string& minor = ":")
226 {
227  std::stringstream str;
228 
229  for(typename T::const_iterator i = v.begin(); i != v.end(); ++i) {
230  str << i->first << minor << i->second;
231  if(std::next(i) != v.end()) {
232  str << major;
233  }
234  }
235 
236  return str.str();
237 }
238 
239 /**
240  * Generates a new string containing a bullet list.
241  *
242  * List items are preceded by the indentation blanks, a bullet string and
243  * another blank; all but the last item are followed by a newline.
244  *
245  * @param v A container with elements.
246  * @param indent Number of indentation blanks.
247  * @param bullet The leading bullet string.
248  */
249 template<typename T>
250 std::string bullet_list(const T& v, std::size_t indent = 4, const std::string& bullet = font::unicode_bullet)
251 {
252  std::ostringstream str;
253 
254  for(typename T::const_iterator i = v.begin(); i != v.end(); ++i) {
255  if(i != v.begin()) {
256  str << '\n';
257  }
258 
259  str << std::string(indent, ' ') << bullet << ' ' << *i;
260  }
261 
262  return str.str();
263 }
264 
265 /**
266  * Indent a block of text.
267  *
268  * Only lines with content are changed; empty lines are left intact. However,
269  * if @a string is an empty string itself, the indentation unit with the
270  * specified @a indent_size will be returned instead.
271  *
272  * @param string Text to indent.
273  * @param indent_size Number of indentation units to use.
274  */
275 std::string indent(const std::string& string, std::size_t indent_size = 4);
276 
277 /**
278  * Recognises the following patterns, and returns a {min, max} pair.
279  *
280  * * "1" returns {1, 1}
281  * * "1-3" returns {1, 3}
282  * * "1-infinity" returns {1, maximum int}
283  * * "-1" returns {-1, -1}
284  * * "-3--1" returns {-3, -1}
285  *
286  * Note that:
287  *
288  * * "3-1" returns {3, 3} and does not log an error
289  * * "-1--3" returns {-1, -1} and does not log an error
290  * * Although "-infinity--1", "2-infinity" and "-infinity-infinity" are all supported,
291  * * ranges that can't match a reasonable number, e.g. "-infinity" or "infinity..infinity", may be treated as errors.
292  */
293 std::pair<int, int> parse_range(std::string_view str);
294 
295 /**
296  * Handles a comma-separated list of inputs to parse_range, in a context that does not expect
297  * negative values. Will return an empty list if any of the ranges have a minimum that's below
298  * zero.
299  */
300 std::vector<std::pair<int, int>> parse_ranges_unsigned(const std::string& str);
301 
302 /**
303  * Handles a comma-separated list of inputs to parse_range.
304  */
305 std::vector<std::pair<int, int>> parse_ranges_int(const std::string& str);
306 
307 /**
308  * Recognises similar patterns to parse_range, and returns a {min, max} pair.
309  *
310  * For this function, "infinity" results in std::numeric_limits<double>::infinity.
311  */
312 std::pair<double, double> parse_range_real(std::string_view str);
313 
314 std::vector<std::pair<double, double>> parse_ranges_real(const std::string& str);
315 
316 int apply_modifier(const int number, const std::string &amount, const int minimum = 0);
317 
318 /** Add a "+" or replace the "-" par Unicode minus */
319 inline std::string print_modifier(const std::string &mod)
320 {
321  return mod[0] == '-' ? (font::unicode_minus + std::string(mod.begin() + 1, mod.end())) : ("+" + mod);
322 }
323 
324 /** Format @a str as a WML value */
325 inline std::string wml_escape_string(std::string_view str)
326 {
327  std::string res;
328 
329  for(char c : str) {
330  res.append(c == '"' ? 2 : 1, c);
331  }
332 
333  return res;
334 }
335 
336 /** Prepends a configurable set of characters with a backslash */
337 std::string escape(std::string_view str, const char *special_chars);
338 
339 /**
340  * Prepend all special characters with a backslash.
341  *
342  * Special characters are:
343  * #@{}+-,\*=
344  */
345 inline std::string escape(std::string_view str)
346 {
347  return escape(str, "#@{}+-,\\*=");
348 }
349 
350 /** Remove all escape characters (backslash) */
351 std::string unescape(std::string_view str);
352 
353 /** Percent-escape characters in a UTF-8 string intended to be part of a URL. */
354 std::string urlencode(std::string_view str);
355 
356 /** Convert no, false, off, 0, 0.0 to false, empty to def, and others to true */
357 bool string_bool(const std::string& str,bool def=false);
358 
359 /** Converts a bool value to 'true' or 'false' */
360 std::string bool_string(const bool value);
361 
362 /** Convert into a signed value (using the Unicode "−" and +0 convention */
363 std::string signed_value(int val);
364 
365 /** Sign with Unicode "−" if negative */
366 std::string half_signed_value(int val);
367 
368 /** Convert into a percentage (using the Unicode "−" and +0% convention */
369 inline std::string signed_percent(int val) {return signed_value(val) + "%";}
370 
371 /**
372  * Convert into a string with an SI-postfix.
373  *
374  * If the unit is to be translatable,
375  * a t_string should be passed as the third argument.
376  * _("unit_byte^B") is suggested as standard.
377  *
378  * There are no default values because they would not be translatable.
379  */
380 std::string si_string(double input, bool base2, const std::string& unit);
381 
382 /**
383  * Try to complete the last word of 'text' with the 'wordlist'.
384  *
385  * @param[in, out] text The parameter's usage is:
386  * - Input: Text where we try to complete the last word
387  * of.
388  * - Output: Text with completed last word.
389  * @param[in, out] wordlist
390  * The parameter's usage is:
391  * - Inout: A vector of strings to complete against.
392  * - Output: A vector of strings that matched 'text'.
393  *
394  * @retval true iff text is just one word (no spaces)
395  */
396 bool word_completion(std::string& text, std::vector<std::string>& wordlist);
397 
398 /** Check if a message contains a word. */
399 bool word_match(const std::string& message, const std::string& word);
400 
401 /**
402  * @brief Performs pattern matching with wildcards.
403  *
404  * @param str Any byte-string.
405  * @param pat A string of characters with the following interpretation:
406  * - @c '*' represents zero or more characters.
407  * - @c '+' represents one or more characters.
408  * - @c '?' represents exactly one character.
409  * - All other characters are interpreted literally.
410  *
411  * @returns @c true if @p str matches @p pat
412  */
413 [[nodiscard]] bool wildcard_string_match(std::string_view str, std::string_view pat) noexcept;
414 
415 /**
416  * Converts '*' to '%' and optionally escapes '_'.
417  *
418  * @param str The original string.
419  * @param underscores Whether to escape underscore characters as well.
420  */
421 void to_sql_wildcards(std::string& str, bool underscores = true);
422 
423 /**
424  * Check if the username contains only valid characters.
425  *
426  * (all alpha-numeric characters plus underscore and hyphen)
427  */
428 bool isvalid_username(const std::string& login);
429 
430 /**
431  * Check if the username pattern contains only valid characters.
432  *
433  * (all alpha-numeric characters plus underscore, hyphen,
434  * question mark and asterisk)
435  */
436 bool isvalid_wildcard(const std::string& login);
437 
438 /**
439  * Truncates a string to a given utf-8 character count and then appends an ellipsis.
440  */
441 void ellipsis_truncate(std::string& str, const std::size_t size);
442 
443 } // end namespace utils
This class represents a single unit of a specific type.
Definition: unit.hpp:132
std::size_t i
Definition: function.cpp:1032
const std::string unicode_bullet
Definition: constants.cpp:47
const std::string unicode_minus
Definition: constants.cpp:42
std::size_t size(std::string_view str)
Length in characters of a UTF-8 string.
Definition: unicode.cpp:81
@ STRIP_SPACES
REMOVE_EMPTY: remove empty elements.
@ REMOVE_EMPTY
std::vector< std::string_view > split_view(std::string_view s, const char sep, const int flags)
std::string si_string(double input, bool base2, const std::string &unit)
Convert into a string with an SI-postfix.
void trim(std::string_view &s)
std::string indent(const std::string &string, std::size_t indent_size)
Indent a block of text.
std::map< std::string, std::string > map_split(const std::string &val, char major, char minor, int flags, const std::string &default_value)
Splits a string based on two separators into a map.
bool isvalid_wildcard(const std::string &username)
Check if the username pattern contains only valid characters.
std::set< std::string > split_set(std::string_view s, char sep, const int flags)
std::vector< std::string > quoted_split(const std::string &val, char c, int flags, char quote)
This function is identical to split(), except it does not split when it otherwise would if the previo...
std::string join_map(const T &v, const std::string &major=",", const std::string &minor=":")
std::vector< std::pair< int, int > > parse_ranges_int(const std::string &str)
Handles a comma-separated list of inputs to parse_range.
std::string bullet_list(const T &v, std::size_t indent=4, const std::string &bullet=font::unicode_bullet)
Generates a new string containing a bullet list.
void split_foreach_impl(std::string_view s, char sep, const F &f)
std::vector< std::string > parenthetical_split(std::string_view val, const char separator, std::string_view left, std::string_view right, const int flags)
Splits a string based either on a separator, except then the text appears within specified parenthesi...
const std::vector< std::string > res_order
bool wildcard_string_match(std::string_view str, std::string_view pat) noexcept
Performs pattern matching with wildcards.
std::string half_signed_value(int val)
Sign with Unicode "−" if negative.
std::string bool_string(const bool value)
Converts a bool value to 'true' or 'false'.
std::map< std::string, t_string, res_compare > string_map_res
void ellipsis_truncate(std::string &str, const std::size_t size)
Truncates a string to a given utf-8 character count and then appends an ellipsis.
std::vector< std::pair< int, int > > parse_ranges_unsigned(const std::string &str)
Handles a comma-separated list of inputs to parse_range, in a context that does not expect negative v...
std::string urlencode(std::string_view str)
Percent-escape characters in a UTF-8 string intended to be part of a URL.
void to_sql_wildcards(std::string &str, bool underscores)
Converts '*' to '' and optionally escapes '_'.
void split_foreach(std::string_view s, char sep, const int flags, const F &f)
bool string_bool(const std::string &str, bool def)
Convert no, false, off, 0, 0.0 to false, empty to def, and others to true.
bool isvalid_username(const std::string &username)
Check if the username contains only valid characters.
bool portable_isspace(const char c)
int apply_modifier(const int number, const std::string &amount, const int minimum)
std::vector< std::string > square_parenthetical_split(const std::string &val, const char separator, const std::string &left, const std::string &right, const int flags)
Similar to parenthetical_split, but also expands embedded square brackets.
std::pair< int, int > parse_range(std::string_view str)
Recognises the following patterns, and returns a {min, max} pair.
bool isnewline(const char c)
bool notspace(const char c)
std::string join(const T &v, const std::string &s=",")
Generates a new string joining container items in a list.
std::string unescape(std::string_view str)
Remove all escape characters (backslash)
std::vector< std::pair< double, double > > parse_ranges_real(const std::string &str)
bool word_match(const std::string &message, const std::string &word)
Check if a message contains a word.
std::string escape(std::string_view str, const char *special_chars)
Prepends a configurable set of characters with a backslash.
std::string signed_value(int val)
Convert into a signed value (using the Unicode "−" and +0 convention.
std::map< std::string, t_string > string_map
std::string signed_percent(int val)
Convert into a percentage (using the Unicode "−" and +0% convention.
std::pair< double, double > parse_range_real(std::string_view str)
Recognises similar patterns to parse_range, and returns a {min, max} pair.
std::vector< std::string > split(const config_attribute_value &val)
bool word_completion(std::string &text, std::vector< std::string > &wordlist)
Try to complete the last word of 'text' with the 'wordlist'.
std::string wml_escape_string(std::string_view str)
Format str as a WML value
std::string print_modifier(const std::string &mod)
Add a "+" or replace the "-" par Unicode minus.
bool operator()(const std::string &a, const std::string &b) const
Returns whether a < b, considering res_order.
mock_char c
static map_location::direction s
#define f
#define b