The Battle for Wesnoth  1.19.15+dev
unicode.hpp
Go to the documentation of this file.
1 /*
2  Copyright (C) 2003 - 2025
3  by Philippe Plantier <ayin@anathas.org>
4  Copyright (C) 2005 by Guillaume Melquiond <guillaume.melquiond@gmail.com>
5  Copyright (C) 2003 by David White <dave@whitevine.net>
6  Part of the Battle for Wesnoth Project https://www.wesnoth.org/
7 
8  This program is free software; you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation; either version 2 of the License, or
11  (at your option) any later version.
12  This program is distributed in the hope that it will be useful,
13  but WITHOUT ANY WARRANTY.
14 
15  See the COPYING file for more details.
16 */
17 
18 #pragma once
19 
20 #include "ucs4_iterator_base.hpp"
21 #include "ucs4_convert_impl.hpp"
22 #include "unicode_cast.hpp"
23 
24 #include <string>
25 
26 /**
27  * For Win32 API.
28  *
29  * On Windows, wchar_t is defined as uint16_t.
30  * Wide strings are expected to be UTF-16.
31  */
32 namespace utf16 {
34 }
35 
36 /**
37  * Functions for converting Unicode wide-char strings to UTF-8 encoded strings,
38  * back and forth.
39  */
40 namespace utf8 {
42 
43  /** Returns a lowercased version of the string. */
44  std::string lowercase(std::string_view s);
45 
46  /**
47  * Codepoint index corresponding to the nth character in a UTF-8 string.
48  * @throw invalid_utf8_exception if the string is not a valid UTF-8 string.
49  * The checking is partial. No matter what is in between, it will iterate from first byte
50  * of a character to the next first byte of the following character.
51  * @return str.length() if there are less than @p index characters.
52  */
53  std::size_t index(std::string_view str, const std::size_t index);
54 
55  /**
56  * Length in characters of a UTF-8 string.
57  * @throw invalid_utf8_exception if the string is not a valid UTF-8 string.
58  * The checking is partial. No matter what is in between, it will iterate from first byte
59  * of a character to the next first byte of the following character.
60  */
61  std::size_t size(std::string_view str);
62  std::size_t size(const std::string::const_iterator& start, const std::string::const_iterator& end);
63 
64  /** Insert a UTF-8 string at the specified position. */
65  std::string& insert(std::string& str, const std::size_t pos, const std::string& insert) ;
66 
67  /**
68  * Erases a portion of a UTF-8 string.
69  *
70  * @param str UTF-8 encoded string.
71  * @param start Start position.
72  * @param len Number of characters to erase.
73  *
74  * @note This implementation does not check for valid UTF-8. Don't use it
75  * for user input.
76  */
77  std::string& erase(std::string& str, const std::size_t start, const std::size_t len = std::string::npos);
78 
79  /**
80  * Truncates a UTF-8 string to the specified number of characters.
81  *
82  * @param str UTF-8 encoded string.
83  * @param size Size to truncate to.
84  *
85  * @note This implementation does not check for valid UTF-8. Don't use it
86  * for user input.
87  */
88  std::string& truncate(std::string& str, const std::size_t size);
89 
90  /**
91  * Truncates a UTF-8 string to the specified number of characters.
92  *
93  * If the string has more than @p size UTF-8 characters it will be
94  * truncated to this size.
95  *
96  * The output is guaranteed to be valid UTF-8.
97  *
98  * @param[in,out] str [in] String encoded in UTF-8.
99  * [out] String encoded UTF-8 that contains at most @p size
100  * codepoints.
101  * @param size The size to truncate to.
102  */
103  void truncate_as_ucs4(std::string& str, const std::size_t size);
104 } // end namespace utf8
EXIT_STATUS start(bool clear_id, const std::string &filename, bool take_screenshot, const std::string &screenshot_filename)
Main interface for launching the editor from the title screen.
For Win32 API.
Definition: unicode.hpp:32
ucs4::iterator_base< std::u16string, ucs4_convert_impl::convert_impl< char16_t >::type > iterator
Definition: unicode.hpp:33
Functions for converting Unicode wide-char strings to UTF-8 encoded strings, back and forth.
Definition: unicode.cpp:33
std::string lowercase(std::string_view s)
Returns a lowercased version of the string.
Definition: unicode.cpp:50
std::string & insert(std::string &str, const std::size_t pos, const std::string &insert)
Insert a UTF-8 string at the specified position.
Definition: unicode.cpp:100
ucs4::iterator_base< std::string_view, ucs4_convert_impl::convert_impl< char >::type > iterator
Definition: unicode.hpp:41
std::string & erase(std::string &str, const std::size_t start, const std::size_t len)
Erases a portion of a UTF-8 string.
Definition: unicode.cpp:105
void truncate_as_ucs4(std::string &str, const std::size_t size)
Truncates a UTF-8 string to the specified number of characters.
Definition: unicode.cpp:123
std::size_t size(std::string_view str)
Length in characters of a UTF-8 string.
Definition: unicode.cpp:81
std::string & truncate(std::string &str, const std::size_t size)
Truncates a UTF-8 string to the specified number of characters.
Definition: unicode.cpp:118
std::size_t index(std::string_view str, const std::size_t index)
Codepoint index corresponding to the nth character in a UTF-8 string.
Definition: unicode.cpp:70
static map_location::direction s