The Battle for Wesnoth  1.19.4+dev
string_utils.hpp
Go to the documentation of this file.
1 /*
2  Copyright (C) 2005 - 2024
3  by Philippe Plantier <ayin@anathas.org>
4  Copyright (C) 2005 by Guillaume Melquiond <guillaume.melquiond@gmail.com>
5  Copyright (C) 2003 by David White <dave@whitevine.net>
6  Part of the Battle for Wesnoth Project https://www.wesnoth.org/
7 
8  This program is free software; you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation; either version 2 of the License, or
11  (at your option) any later version.
12  This program is distributed in the hope that it will be useful,
13  but WITHOUT ANY WARRANTY.
14 
15  See the COPYING file for more details.
16 */
17 
18 #pragma once
19 
20 #include "font/constants.hpp"
21 
22 #include <algorithm>
23 #include <map>
24 #include <set>
25 #include <sstream>
26 #include <string>
27 #include <string_view>
28 #include <utility>
29 #include <vector>
30 
31 class t_string;
32 
33 namespace utils {
34 
35 using string_map = std::map<std::string, t_string>;
36 
37 const std::vector<std::string> res_order = {"blade", "pierce", "impact", "fire", "cold", "arcane"};
38 
39 struct res_compare {
40  /** Returns whether a < b, considering res_order. */
41  bool operator()(const std::string& a, const std::string& b) const {
42  for(const std::string& r : res_order) {
43  if (b == r) // this means b <= a, so a < b is false
44  return false;
45  if (a == r)
46  return true;
47  }
48  return a < b; // fallback only reached when neither a nor b occur in res_order
49  }
50 };
51 
52 using string_map_res = std::map<std::string, t_string, res_compare>;
53 
54 bool isnewline(const char c);
55 bool portable_isspace(const char c);
56 bool notspace(char c);
57 
58 enum {
59  REMOVE_EMPTY = 0x01, /** REMOVE_EMPTY: remove empty elements. */
60  STRIP_SPACES = 0x02 /** STRIP_SPACES: strips leading and trailing blank spaces. */
61 };
62 
63 void trim(std::string_view& s);
64 
65 template<typename F>
66 void split_foreach_impl(std::string_view s, char sep, const F& f)
67 {
68  if(s.empty()) {
69  return;
70  }
71  while(true)
72  {
73  std::size_t partend = s.find(sep);
74  if(partend == std::string_view::npos) {
75  break;
76  }
77  f(s.substr(0, partend));
78  s.remove_prefix(partend + 1);
79  }
80  f(s);
81 }
82 
83 template<typename F>
84 void split_foreach(std::string_view s, char sep, const int flags, const F& f)
85 {
86  split_foreach_impl(s, sep, [&](std::string_view item) {
87  if(flags & STRIP_SPACES) {
88  trim(item);
89  }
90  if(!(flags & REMOVE_EMPTY) || !item.empty()) {
91  f(item);
92  }
93  });
94 }
95 
96 /** Splits a (comma-)separated string into a vector of pieces. */
97 std::vector<std::string> split(std::string_view val, const char c = ',', const int flags = REMOVE_EMPTY | STRIP_SPACES);
98 std::set<std::string> split_set(std::string_view val, const char c = ',', const int flags = REMOVE_EMPTY | STRIP_SPACES);
99 
100 std::vector<std::string_view> split_view(std::string_view val, const char c = ',', const int flags = REMOVE_EMPTY | STRIP_SPACES);
101 
102 /**
103  * This function is identical to split(), except it does not split when it otherwise would if the
104  * previous character was identical to the parameter 'quote' (i.e. it does not split quoted commas).
105  * This method was added to make it possible to quote user input, particularly so commas in user input
106  * would not cause visual problems in menus.
107  *
108  * @todo Why not change split()? That would change the methods post condition.
109  */
110 std::vector<std::string> quoted_split(const std::string& val, char c= ',', int flags = REMOVE_EMPTY | STRIP_SPACES, char quote = '\\');
111 
112 /**
113  * Splits a string based on two separators into a map.
114  *
115  * Major: the separator between elements of the map
116  * Minor: the separator between keys and values in one element
117  *
118  * For example, the string 'a:b,c:d,e:f' would be parsed into:
119  * a => b
120  * c => d
121  * e => f
122  */
123 std::map<std::string, std::string> map_split(
124  const std::string& val,
125  char major = ',',
126  char minor = ':',
127  int flags = REMOVE_EMPTY | STRIP_SPACES,
128  const std::string& default_value = "");
129 
130 /**
131  * Splits a string based either on a separator, except then the text appears within specified parenthesis.
132  *
133  * If the separator is "0" (default), it splits a string into an odd number of parts:
134  * - The part before the first '(',
135  * - the part between the first '('
136  * - and the matching right ')', etc ...
137  * and the remainder of the string.
138  *
139  * Note that one can use the same character for both the left and right parenthesis, which usually makes
140  * the most sense for this function.
141  *
142  * Note that this will find the first matching char in the left string and match against the corresponding
143  * char in the right string. A correctly processed string should return a vector with an odd number of
144  * elements. Empty elements are never removed as they are placeholders, hence REMOVE EMPTY only works for
145  * the separator split.
146  *
147  * INPUT: ("a(b)c{d}e(f{g})h", 0, "({", ")}")
148  * RETURNS: {"a", "b", "c", "d", "e", "f{g}", "h"}
149  */
150 std::vector< std::string > parenthetical_split(
151  std::string_view val,
152  const char separator = 0,
153  std::string_view left = "(",
154  std::string_view right = ")",
155  const int flags = REMOVE_EMPTY | STRIP_SPACES);
156 
157 /**
158  * Similar to parenthetical_split, but also expands embedded square brackets.
159  *
160  * Notes:
161  * - The Separator must be specified and number of entries in each square bracket must match in each section.
162  * - Leading zeros are preserved if specified between square brackets.
163  * - An asterisk as in [a*n] indicates to expand 'a' n times
164  *
165  * This is useful for expanding animation WML code.
166  *
167  * Examples:
168  *
169  * INPUT: ("a[1~3](1,[5,6,7]),b[8,9]", ",")
170  * RETURNS: {"a1(1,5)", "a2(1,6)", "a3(1,7)", "b8", "b9"}
171  *
172  * INPUT: ("abc[07~10]")
173  * RETURNS: {"abc07", "abc08", "abc09", "abc10"}
174  *
175  * INPUT: ("a[1,2]b[3~4]:c[5,6]")
176  * RETURNS: {"a1b3:c5", "a2b4:c6"}
177  *
178  * INPUT: ("abc[3~1].png")
179  * RETURNS: {"abc3.png", "abc2.png", "abc1.png"}
180  *
181  * INPUT: ("abc[3,1].png")
182  * RETURNS: {"abc3.png", "abc1.png"}
183  *
184  * INPUT: ("abc[de,xyz]")
185  * RETURNS: {"abcde", "abcxyz"}
186  *
187  * INPUT: ("abc[1*3]")
188  * RETURNS: {"abc1", "abc1", "abc1"}
189  */
190 std::vector<std::string> square_parenthetical_split(
191  const std::string& val,
192  const char separator = ',',
193  const std::string& left = "([",
194  const std::string& right = ")]",
195  const int flags = REMOVE_EMPTY | STRIP_SPACES);
196 
197 /**
198  * Generates a new string joining container items in a list.
199  *
200  * @param v A container with elements.
201  * @param s List delimiter.
202  */
203 template <typename T>
204 std::string join(const T& v, const std::string& s = ",")
205 {
206  std::stringstream str;
207 
208  for(typename T::const_iterator i = v.begin(); i != v.end(); ++i) {
209  str << *i;
210  if(std::next(i) != v.end()) {
211  str << s;
212  }
213  }
214 
215  return str.str();
216 }
217 
218 template <typename T>
219 std::string join_map(
220  const T& v,
221  const std::string& major = ",",
222  const std::string& minor = ":")
223 {
224  std::stringstream str;
225 
226  for(typename T::const_iterator i = v.begin(); i != v.end(); ++i) {
227  str << i->first << minor << i->second;
228  if(std::next(i) != v.end()) {
229  str << major;
230  }
231  }
232 
233  return str.str();
234 }
235 
236 /**
237  * Generates a new string containing a bullet list.
238  *
239  * List items are preceded by the indentation blanks, a bullet string and
240  * another blank; all but the last item are followed by a newline.
241  *
242  * @param v A container with elements.
243  * @param indent Number of indentation blanks.
244  * @param bullet The leading bullet string.
245  */
246 template<typename T>
247 std::string bullet_list(const T& v, std::size_t indent = 4, const std::string& bullet = font::unicode_bullet)
248 {
249  std::ostringstream str;
250 
251  for(typename T::const_iterator i = v.begin(); i != v.end(); ++i) {
252  if(i != v.begin()) {
253  str << '\n';
254  }
255 
256  str << std::string(indent, ' ') << bullet << ' ' << *i;
257  }
258 
259  return str.str();
260 }
261 
262 /**
263  * Indent a block of text.
264  *
265  * Only lines with content are changed; empty lines are left intact. However,
266  * if @a string is an empty string itself, the indentation unit with the
267  * specified @a indent_size will be returned instead.
268  *
269  * @param string Text to indent.
270  * @param indent_size Number of indentation units to use.
271  */
272 std::string indent(const std::string& string, std::size_t indent_size = 4);
273 
274 /**
275  * Recognises the following patterns, and returns a {min, max} pair.
276  *
277  * * "1" returns {1, 1}
278  * * "1-3" returns {1, 3}
279  * * "1-infinity" returns {1, maximum int}
280  * * "-1" returns {-1, -1}
281  * * "-3--1" returns {-3, -1}
282  *
283  * Note that:
284  *
285  * * "3-1" returns {3, 3} and does not log an error
286  * * "-1--3" returns {-1, -1} and does not log an error
287  * * Although "-infinity--1", "2-infinity" and "-infinity-infinity" are all supported,
288  * * ranges that can't match a reasonable number, e.g. "-infinity" or "infinity..infinity", may be treated as errors.
289  */
290 std::pair<int, int> parse_range(const std::string& str);
291 
292 /**
293  * Handles a comma-separated list of inputs to parse_range, in a context that does not expect
294  * negative values. Will return an empty list if any of the ranges have a minimum that's below
295  * zero.
296  */
297 std::vector<std::pair<int, int>> parse_ranges_unsigned(const std::string& str);
298 
299 /**
300  * Handles a comma-separated list of inputs to parse_range.
301  */
302 std::vector<std::pair<int, int>> parse_ranges_int(const std::string& str);
303 
304 /**
305  * Recognises similar patterns to parse_range, and returns a {min, max} pair.
306  *
307  * For this function, "infinity" results in std::numeric_limits<double>::infinity.
308  */
309 std::pair<double, double> parse_range_real(const std::string& str);
310 
311 std::vector<std::pair<double, double>> parse_ranges_real(const std::string& str);
312 
313 int apply_modifier(const int number, const std::string &amount, const int minimum = 0);
314 
315 /** Add a "+" or replace the "-" par Unicode minus */
316 inline std::string print_modifier(const std::string &mod)
317 {
318  return mod[0] == '-' ? (font::unicode_minus + std::string(mod.begin() + 1, mod.end())) : ("+" + mod);
319 }
320 
321 /** Prepends a configurable set of characters with a backslash */
322 std::string escape(const std::string &str, const char *special_chars);
323 
324 /**
325  * Prepend all special characters with a backslash.
326  *
327  * Special characters are:
328  * #@{}+-,\*=
329  */
330 inline std::string escape(const std::string &str)
331 {
332  return escape(str, "#@{}+-,\\*=");
333 }
334 
335 /** Remove all escape characters (backslash) */
336 std::string unescape(const std::string &str);
337 
338 /** Percent-escape characters in a UTF-8 string intended to be part of a URL. */
339 std::string urlencode(const std::string &str);
340 
341 /** Surround the string 'str' with double quotes. */
342 inline std::string quote(const std::string &str)
343 {
344  return '"' + str + '"';
345 }
346 
347 /** Convert no, false, off, 0, 0.0 to false, empty to def, and others to true */
348 bool string_bool(const std::string& str,bool def=false);
349 
350 /** Converts a bool value to 'true' or 'false' */
351 std::string bool_string(const bool value);
352 
353 /** Convert into a signed value (using the Unicode "−" and +0 convention */
354 std::string signed_value(int val);
355 
356 /** Sign with Unicode "−" if negative */
357 std::string half_signed_value(int val);
358 
359 /** Convert into a percentage (using the Unicode "−" and +0% convention */
360 inline std::string signed_percent(int val) {return signed_value(val) + "%";}
361 
362 /**
363  * Convert into a string with an SI-postfix.
364  *
365  * If the unit is to be translatable,
366  * a t_string should be passed as the third argument.
367  * _("unit_byte^B") is suggested as standard.
368  *
369  * There are no default values because they would not be translatable.
370  */
371 std::string si_string(double input, bool base2, const std::string& unit);
372 
373 /**
374  * Try to complete the last word of 'text' with the 'wordlist'.
375  *
376  * @param[in, out] text The parameter's usage is:
377  * - Input: Text where we try to complete the last word
378  * of.
379  * - Output: Text with completed last word.
380  * @param[in, out] wordlist
381  * The parameter's usage is:
382  * - Inout: A vector of strings to complete against.
383  * - Output: A vector of strings that matched 'text'.
384  *
385  * @retval true iff text is just one word (no spaces)
386  */
387 bool word_completion(std::string& text, std::vector<std::string>& wordlist);
388 
389 /** Check if a message contains a word. */
390 bool word_match(const std::string& message, const std::string& word);
391 
392 /**
393  * Match using '*' as any number of characters (including none),
394  * '+' as one or more characters, and '?' as any one character.
395  */
396 bool wildcard_string_match(const std::string& str, const std::string& match);
397 
398 /**
399  * Converts '*' to '%' and optionally escapes '_'.
400  *
401  * @param str The original string.
402  * @param underscores Whether to escape underscore characters as well.
403  */
404 void to_sql_wildcards(std::string& str, bool underscores = true);
405 
406 /**
407  * Check if the username contains only valid characters.
408  *
409  * (all alpha-numeric characters plus underscore and hyphen)
410  */
411 bool isvalid_username(const std::string& login);
412 
413 /**
414  * Check if the username pattern contains only valid characters.
415  *
416  * (all alpha-numeric characters plus underscore, hyphen,
417  * question mark and asterisk)
418  */
419 bool isvalid_wildcard(const std::string& login);
420 
421 /**
422  * Truncates a string to a given utf-8 character count and then appends an ellipsis.
423  */
424 void ellipsis_truncate(std::string& str, const std::size_t size);
425 
426 } // end namespace utils
This class represents a single unit of a specific type.
Definition: unit.hpp:133
std::size_t i
Definition: function.cpp:1023
const std::string unicode_bullet
Definition: constants.cpp:47
const std::string unicode_minus
Definition: constants.cpp:42
std::pair< std::string, unsigned > item
Definition: help_impl.hpp:387
std::size_t size(const std::string &str)
Length in characters of a UTF-8 string.
Definition: unicode.cpp:85
@ STRIP_SPACES
REMOVE_EMPTY: remove empty elements.
@ REMOVE_EMPTY
std::vector< std::string_view > split_view(std::string_view s, const char sep, const int flags)
std::string si_string(double input, bool base2, const std::string &unit)
Convert into a string with an SI-postfix.
void trim(std::string_view &s)
std::string indent(const std::string &string, std::size_t indent_size)
Indent a block of text.
std::map< std::string, std::string > map_split(const std::string &val, char major, char minor, int flags, const std::string &default_value)
Splits a string based on two separators into a map.
bool isvalid_wildcard(const std::string &username)
Check if the username pattern contains only valid characters.
std::set< std::string > split_set(std::string_view s, char sep, const int flags)
std::vector< std::string > quoted_split(const std::string &val, char c, int flags, char quote)
This function is identical to split(), except it does not split when it otherwise would if the previo...
std::string join_map(const T &v, const std::string &major=",", const std::string &minor=":")
std::vector< std::pair< int, int > > parse_ranges_int(const std::string &str)
Handles a comma-separated list of inputs to parse_range.
std::string bullet_list(const T &v, std::size_t indent=4, const std::string &bullet=font::unicode_bullet)
Generates a new string containing a bullet list.
void split_foreach_impl(std::string_view s, char sep, const F &f)
std::pair< int, int > parse_range(const std::string &str)
Recognises the following patterns, and returns a {min, max} pair.
std::vector< std::string > parenthetical_split(std::string_view val, const char separator, std::string_view left, std::string_view right, const int flags)
Splits a string based either on a separator, except then the text appears within specified parenthesi...
const std::vector< std::string > res_order
std::string half_signed_value(int val)
Sign with Unicode "−" if negative.
std::string bool_string(const bool value)
Converts a bool value to 'true' or 'false'.
std::string urlencode(const std::string &str)
Percent-escape characters in a UTF-8 string intended to be part of a URL.
std::map< std::string, t_string, res_compare > string_map_res
std::string quote(const std::string &str)
Surround the string 'str' with double quotes.
void ellipsis_truncate(std::string &str, const std::size_t size)
Truncates a string to a given utf-8 character count and then appends an ellipsis.
std::vector< std::pair< int, int > > parse_ranges_unsigned(const std::string &str)
Handles a comma-separated list of inputs to parse_range, in a context that does not expect negative v...
void to_sql_wildcards(std::string &str, bool underscores)
Converts '*' to '' and optionally escapes '_'.
void split_foreach(std::string_view s, char sep, const int flags, const F &f)
bool wildcard_string_match(const std::string &str, const std::string &match)
Match using '*' as any number of characters (including none), '+' as one or more characters,...
bool string_bool(const std::string &str, bool def)
Convert no, false, off, 0, 0.0 to false, empty to def, and others to true.
bool isvalid_username(const std::string &username)
Check if the username contains only valid characters.
std::string unescape(const std::string &str)
Remove all escape characters (backslash)
bool portable_isspace(const char c)
int apply_modifier(const int number, const std::string &amount, const int minimum)
std::vector< std::string > square_parenthetical_split(const std::string &val, const char separator, const std::string &left, const std::string &right, const int flags)
Similar to parenthetical_split, but also expands embedded square brackets.
bool isnewline(const char c)
bool notspace(const char c)
std::string join(const T &v, const std::string &s=",")
Generates a new string joining container items in a list.
std::string escape(const std::string &str, const char *special_chars)
Prepends a configurable set of characters with a backslash.
std::vector< std::pair< double, double > > parse_ranges_real(const std::string &str)
bool word_match(const std::string &message, const std::string &word)
Check if a message contains a word.
std::string signed_value(int val)
Convert into a signed value (using the Unicode "−" and +0 convention.
std::map< std::string, t_string > string_map
std::string signed_percent(int val)
Convert into a percentage (using the Unicode "−" and +0% convention.
std::vector< std::string > split(const config_attribute_value &val)
bool word_completion(std::string &text, std::vector< std::string > &wordlist)
Try to complete the last word of 'text' with the 'wordlist'.
std::string print_modifier(const std::string &mod)
Add a "+" or replace the "-" par Unicode minus.
std::pair< double, double > parse_range_real(const std::string &str)
Recognises similar patterns to parse_range, and returns a {min, max} pair.
bool operator()(const std::string &a, const std::string &b) const
Returns whether a < b, considering res_order.
mock_char c
static map_location::DIRECTION s
#define f
#define b