The Battle for Wesnoth  1.17.0-dev
string_utils.hpp
Go to the documentation of this file.
1 /*
2  Copyright (C) 2005 - 2021
3  by Philippe Plantier <ayin@anathas.org>
4  Copyright (C) 2005 by Guillaume Melquiond <guillaume.melquiond@gmail.com>
5  Copyright (C) 2003 by David White <dave@whitevine.net>
6  Part of the Battle for Wesnoth Project https://www.wesnoth.org/
7 
8  This program is free software; you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation; either version 2 of the License, or
11  (at your option) any later version.
12  This program is distributed in the hope that it will be useful,
13  but WITHOUT ANY WARRANTY.
14 
15  See the COPYING file for more details.
16 */
17 
18 #pragma once
19 
20 #include "font/constants.hpp"
21 
22 #include <algorithm>
23 #include <map>
24 #include <ostream>
25 #include <set>
26 #include <sstream>
27 #include <string>
28 #include <string_view>
29 #include <utility>
30 #include <vector>
31 
32 class t_string;
33 
34 namespace utils {
35 
36 using string_map = std::map<std::string, t_string>;
37 
38 bool isnewline(const char c);
39 bool portable_isspace(const char c);
40 bool notspace(char c);
41 
42 enum {
43  REMOVE_EMPTY = 0x01, /** REMOVE_EMPTY: remove empty elements. */
44  STRIP_SPACES = 0x02 /** STRIP_SPACES: strips leading and trailing blank spaces. */
45 };
46 
47 void trim(std::string_view& s);
48 
49 template<typename F>
50 void split_foreach_impl(std::string_view s, char sep, const F& f)
51 {
52  if(s.empty()) {
53  return;
54  }
55  while(true)
56  {
57  int partend = s.find(sep);
58  if(partend == int(std::string_view::npos)) {
59  break;
60  }
61  f(s.substr(0, partend));
62  s.remove_prefix(partend + 1);
63  }
64  f(s);
65 }
66 
67 template<typename F>
68 void split_foreach(std::string_view s, char sep, const int flags, const F& f)
69 {
70  split_foreach_impl(s, sep, [&](std::string_view item) {
71  if(flags & STRIP_SPACES) {
72  trim(item);
73  }
74  if(!(flags & REMOVE_EMPTY) || !item.empty()) {
75  f(item);
76  }
77  });
78 }
79 
80 
81 
82 /** Splits a (comma-)separated string into a vector of pieces. */
83 std::vector<std::string> split(std::string_view val, const char c = ',', const int flags = REMOVE_EMPTY | STRIP_SPACES);
84 std::set<std::string> split_set(std::string_view val, const char c = ',', const int flags = REMOVE_EMPTY | STRIP_SPACES);
85 
86 /**
87  * This function is identical to split(), except it does not split when it otherwise would if the
88  * previous character was identical to the parameter 'quote' (i.e. it does not split quoted commas).
89  * This method was added to make it possible to quote user input, particularly so commas in user input
90  * would not cause visual problems in menus.
91  *
92  * @todo Why not change split()? That would change the methods post condition.
93  */
94 std::vector<std::string> quoted_split(const std::string& val, char c= ',', int flags = REMOVE_EMPTY | STRIP_SPACES, char quote = '\\');
95 
96 /**
97  * Splits a (comma-)separated string into a set of pieces.
98  * See split() for the meanings of the parameters.
99  */
100 inline std::set<std::string> set_split(const std::string& val, const char c = ',', const int flags = REMOVE_EMPTY | STRIP_SPACES)
101 {
102  std::vector<std::string> vec_split = split(val, c, flags);
103  return std::set< std::string >(vec_split.begin(), vec_split.end());
104 }
105 
106 /**
107  * Splits a string based on two separators into a map.
108  *
109  * Major: the separator between elements of the map
110  * Minor: the separator between keys and values in one element
111  *
112  * For example, the string 'a:b,c:d,e:f' would be parsed into:
113  * a => b
114  * c => d
115  * e => f
116  */
117 std::map<std::string, std::string> map_split(
118  const std::string& val,
119  char major = ',',
120  char minor = ':',
121  int flags = REMOVE_EMPTY | STRIP_SPACES,
122  const std::string& default_value = "");
123 
124 /**
125  * Splits a string based either on a separator, except then the text appears within specified parenthesis.
126  *
127  * If the separator is "0" (default), it splits a string into an odd number of parts:
128  * - The part before the first '(',
129  * - the part between the first '('
130  * - and the matching right ')', etc ...
131  * and the remainder of the string.
132  *
133  * Note that one can use the same character for both the left and right parenthesis, which usually makes
134  * the most sense for this function.
135  *
136  * Note that this will find the first matching char in the left string and match against the corresponding
137  * char in the right string. A correctly processed string should return a vector with an odd number of
138  * elements. Empty elements are never removed as they are placeholders, hence REMOVE EMPTY only works for
139  * the separator split.
140  *
141  * INPUT: ("a(b)c{d}e(f{g})h", 0, "({", ")}")
142  * RETURNS: {"a", "b", "c", "d", "e", "f{g}", "h"}
143  */
144 std::vector< std::string > parenthetical_split(
145  const std::string& val,
146  const char separator = 0,
147  const std::string& left = "(",
148  const std::string& right = ")",
149  const int flags = REMOVE_EMPTY | STRIP_SPACES);
150 
151 /**
152  * Similar to parenthetical_split, but also expands embedded square brackets.
153  *
154  * Notes:
155  * - The Separator must be specified and number of entries in each square bracket must match in each section.
156  * - Leading zeros are preserved if specified between square brackets.
157  * - An asterisk as in [a*n] indicates to expand 'a' n times
158  *
159  * This is useful for expanding animation WML code.
160  *
161  * Examples:
162  *
163  * INPUT: ("a[1~3](1,[5,6,7]),b[8,9]", ",")
164  * RETURNS: {"a1(1,5)", "a2(1,6)", "a3(1,7)", "b8", "b9"}
165  *
166  * INPUT: ("abc[07~10]")
167  * RETURNS: {"abc07", "abc08", "abc09", "abc10"}
168  *
169  * INPUT: ("a[1,2]b[3~4]:c[5,6]")
170  * RETURNS: {"a1b3:c5", "a2b4:c6"}
171  *
172  * INPUT: ("abc[3~1].png")
173  * RETURNS: {"abc3.png", "abc2.png", "abc1.png"}
174  *
175  * INPUT: ("abc[3,1].png")
176  * RETURNS: {"abc3.png", "abc1.png"}
177  *
178  * INPUT: ("abc[de,xyz]")
179  * RETURNS: {"abcde", "abcxyz"}
180  *
181  * INPUT: ("abc[1*3]")
182  * RETURNS: {"abc1", "abc1", "abc1"}
183  */
184 std::vector<std::string> square_parenthetical_split(
185  const std::string& val,
186  const char separator = ',',
187  const std::string& left = "([",
188  const std::string& right = ")]",
189  const int flags = REMOVE_EMPTY | STRIP_SPACES);
190 
191 /**
192  * Generates a new string joining container items in a list.
193  *
194  * @param v A container with elements.
195  * @param s List delimiter.
196  */
197 template <typename T>
198 std::string join(const T& v, const std::string& s = ",")
199 {
200  std::stringstream str;
201 
202  for(typename T::const_iterator i = v.begin(); i != v.end(); ++i) {
203  str << *i;
204  if(std::next(i) != v.end()) {
205  str << s;
206  }
207  }
208 
209  return str.str();
210 }
211 
212 template <typename T>
213 std::string join_map(
214  const T& v,
215  const std::string& major = ",",
216  const std::string& minor = ":")
217 {
218  std::stringstream str;
219 
220  for(typename T::const_iterator i = v.begin(); i != v.end(); ++i) {
221  str << i->first << minor << i->second;
222  if(std::next(i) != v.end()) {
223  str << major;
224  }
225  }
226 
227  return str.str();
228 }
229 
230 /**
231  * Generates a new string containing a bullet list.
232  *
233  * List items are preceded by the indentation blanks, a bullet string and
234  * another blank; all but the last item are followed by a newline.
235  *
236  * @param v A container with elements.
237  * @param indent Number of indentation blanks.
238  * @param bullet The leading bullet string.
239  */
240 template<typename T>
241 std::string bullet_list(const T& v, std::size_t indent = 4, const std::string& bullet = font::unicode_bullet)
242 {
243  std::ostringstream str;
244 
245  for(typename T::const_iterator i = v.begin(); i != v.end(); ++i) {
246  if(i != v.begin()) {
247  str << '\n';
248  }
249 
250  str << std::string(indent, ' ') << bullet << ' ' << *i;
251  }
252 
253  return str.str();
254 }
255 
256 /**
257  * Indent a block of text.
258  *
259  * Only lines with content are changed; empty lines are left intact. However,
260  * if @a string is an empty string itself, the indentation unit with the
261  * specified @a indent_size will be returned instead.
262  *
263  * @param string Text to indent.
264  * @param indent_size Number of indentation units to use.
265  */
266 std::string indent(const std::string& string, std::size_t indent_size = 4);
267 
268 std::pair<int, int> parse_range(const std::string& str);
269 
270 std::vector<std::pair<int, int>> parse_ranges(const std::string& str);
271 
272 int apply_modifier(const int number, const std::string &amount, const int minimum = 0);
273 
274 /** Add a "+" or replace the "-" par Unicode minus */
275 inline std::string print_modifier(const std::string &mod)
276 {
277  return mod[0] == '-' ? (font::unicode_minus + std::string(mod.begin() + 1, mod.end())) : ("+" + mod);
278 }
279 
280 /** Prepends a configurable set of characters with a backslash */
281 std::string escape(const std::string &str, const char *special_chars);
282 
283 /**
284  * Prepend all special characters with a backslash.
285  *
286  * Special characters are:
287  * #@{}+-,\*=
288  */
289 inline std::string escape(const std::string &str)
290 {
291  return escape(str, "#@{}+-,\\*=");
292 }
293 
294 /** Remove all escape characters (backslash) */
295 std::string unescape(const std::string &str);
296 
297 /** Percent-escape characters in a UTF-8 string intended to be part of a URL. */
298 std::string urlencode(const std::string &str);
299 
300 /** Surround the string 'str' with double quotes. */
301 inline std::string quote(const std::string &str)
302 {
303  return '"' + str + '"';
304 }
305 
306 /** Convert no, false, off, 0, 0.0 to false, empty to def, and others to true */
307 bool string_bool(const std::string& str,bool def=false);
308 
309 /** Converts a bool value to 'true' or 'false' */
310 std::string bool_string(const bool value);
311 
312 /** Convert into a signed value (using the Unicode "−" and +0 convention */
313 std::string signed_value(int val);
314 
315 /** Sign with Unicode "−" if negative */
316 std::string half_signed_value(int val);
317 
318 /** Convert into a percentage (using the Unicode "−" and +0% convention */
319 inline std::string signed_percent(int val) {return signed_value(val) + "%";}
320 
321 /**
322  * Convert into a string with an SI-postfix.
323  *
324  * If the unit is to be translatable,
325  * a t_string should be passed as the third argument.
326  * _("unit_byte^B") is suggested as standard.
327  *
328  * There are no default values because they would not be translatable.
329  */
330 std::string si_string(double input, bool base2, const std::string& unit);
331 
332 /**
333  * Try to complete the last word of 'text' with the 'wordlist'.
334  *
335  * @param[in, out] text The parameter's usage is:
336  * - Input: Text where we try to complete the last word
337  * of.
338  * - Output: Text with completed last word.
339  * @param[in, out] wordlist
340  * The parameter's usage is:
341  * - Inout: A vector of strings to complete against.
342  * - Output: A vector of strings that matched 'text'.
343  *
344  * @retval true iff text is just one word (no spaces)
345  */
346 bool word_completion(std::string& text, std::vector<std::string>& wordlist);
347 
348 /** Check if a message contains a word. */
349 bool word_match(const std::string& message, const std::string& word);
350 
351 /**
352  * Match using '*' as any number of characters (including none),
353  * '+' as one or more characters, and '?' as any one character.
354  */
355 bool wildcard_string_match(const std::string& str, const std::string& match);
356 
357 /**
358  * Converts '*' to '%' and optionally escapes '_'.
359  *
360  * @param str The original string.
361  * @param underscores Whether to escape underscore characters as well.
362  */
363 void to_sql_wildcards(std::string& str, bool underscores = true);
364 
365 /**
366  * Check if the username contains only valid characters.
367  *
368  * (all alpha-numeric characters plus underscore and hyphen)
369  */
370 bool isvalid_username(const std::string& login);
371 
372 /**
373  * Check if the username pattern contains only valid characters.
374  *
375  * (all alpha-numeric characters plus underscore, hyphen,
376  * question mark and asterisk)
377  */
378 bool isvalid_wildcard(const std::string& login);
379 
380 /**
381  * Truncates a string to a given utf-8 character count and then appends an ellipsis.
382  */
383 void ellipsis_truncate(std::string& str, const std::size_t size);
384 
385 } // end namespace utils
bool isvalid_wildcard(const std::string &username)
Check if the username pattern contains only valid characters.
std::pair< int, int > parse_range(const std::string &str)
std::string join_map(const T &v, const std::string &major=",", const std::string &minor=":")
std::string urlencode(const std::string &str)
Percent-escape characters in a UTF-8 string intended to be part of a URL.
bool isvalid_username(const std::string &username)
Check if the username contains only valid characters.
std::map< std::string, t_string > string_map
This class represents a single unit of a specific type.
Definition: unit.hpp:121
std::string join(const T &v, const std::string &s=",")
Generates a new string joining container items in a list.
std::set< std::string > split_set(std::string_view s, char sep, const int flags)
std::string unescape(const std::string &str)
Remove all escape characters (backslash)
bool wildcard_string_match(const std::string &str, const std::string &match)
Match using &#39;*&#39; as any number of characters (including none), &#39;+&#39; as one or more characters, and &#39;?&#39; as any one character.
void ellipsis_truncate(std::string &str, const std::size_t size)
Truncates a string to a given utf-8 character count and then appends an ellipsis. ...
bool notspace(const char c)
std::map< std::string, std::string > map_split(const std::string &val, char major, char minor, int flags, const std::string &default_value)
Splits a string based on two separators into a map.
std::string quote(const std::string &str)
Surround the string &#39;str&#39; with double quotes.
std::string half_signed_value(int val)
Sign with Unicode "−" if negative.
std::string bullet_list(const T &v, std::size_t indent=4, const std::string &bullet=font::unicode_bullet)
Generates a new string containing a bullet list.
std::size_t size(const std::string &str)
Length in characters of a UTF-8 string.
Definition: unicode.cpp:87
const std::string unicode_minus
Definition: constants.cpp:42
std::vector< std::pair< int, int > > parse_ranges(const std::string &str)
std::string si_string(double input, bool base2, const std::string &unit)
Convert into a string with an SI-postfix.
void to_sql_wildcards(std::string &str, bool underscores)
Converts &#39;*&#39; to &#39;&#39; and optionally escapes &#39;_&#39;.
lu_byte right
Definition: lparser.cpp:1227
std::string escape(const std::string &str, const char *special_chars)
Prepends a configurable set of characters with a backslash.
std::string login()
std::size_t i
Definition: function.cpp:967
static map_location::DIRECTION s
void split_foreach_impl(std::string_view s, char sep, const F &f)
std::vector< std::string > quoted_split(const std::string &val, char c, int flags, char quote)
This function is identical to split(), except it does not split when it otherwise would if the previo...
std::string signed_percent(int val)
Convert into a percentage (using the Unicode "−" and +0% convention.
std::string bool_string(const bool value)
Converts a bool value to &#39;true&#39; or &#39;false&#39;.
bool string_bool(const std::string &str, bool def)
Convert no, false, off, 0, 0.0 to false, empty to def, and others to true.
const std::string unicode_bullet
Definition: constants.cpp:47
bool isnewline(const char c)
REMOVE_EMPTY: remove empty elements.
#define next(ls)
Definition: llex.cpp:32
void split_foreach(std::string_view s, char sep, const int flags, const F &f)
#define f
std::vector< std::string > split(const config_attribute_value &val)
int apply_modifier(const int number, const std::string &amount, const int minimum)
lu_byte left
Definition: lparser.cpp:1226
std::string indent(const std::string &string, std::size_t indent_size)
Indent a block of text.
static const char * match(MatchState *ms, const char *s, const char *p)
Definition: lstrlib.cpp:567
void trim(std::string_view &s)
std::set< std::string > set_split(const std::string &val, const char c=',', const int flags=REMOVE_EMPTY|STRIP_SPACES)
Splits a (comma-)separated string into a set of pieces.
bool portable_isspace(const char c)
std::string signed_value(int val)
Convert into a signed value (using the Unicode "−" and +0 convention.
mock_char c
std::string print_modifier(const std::string &mod)
Add a "+" or replace the "-" par Unicode minus.
bool word_completion(std::string &text, std::vector< std::string > &wordlist)
Try to complete the last word of &#39;text&#39; with the &#39;wordlist&#39;.
std::vector< std::string > square_parenthetical_split(const std::string &val, const char separator, const std::string &left, const std::string &right, const int flags)
Similar to parenthetical_split, but also expands embedded square brackets.
std::pair< std::string, unsigned > item
Definition: help_impl.hpp:410
bool word_match(const std::string &message, const std::string &word)
Check if a message contains a word.
std::vector< std::string > parenthetical_split(const std::string &val, const char separator, const std::string &left, const std::string &right, const int flags)
Splits a string based either on a separator, except then the text appears within specified parenthesi...