26 static const char*
get_name() {
return "utf8"; }
31 else if(ch < (1u << 11))
33 else if(ch < (1u << 16))
35 else if(ch < (1u << 21))
37 else if(ch < (1u << 26))
39 else if(ch < (1u << 31))
54 if (count == 1 || count > 6) {
69 template<
typename writer>
70 static inline void write(writer out, char32_t ch)
73 assert(out.can_push(count));
75 out.push(
static_cast<char>(ch));
77 for(
int j =
static_cast<int>(count) - 1; j >= 0; --j) {
78 unsigned char c = (ch >> (6 * j)) & 0x3f;
80 if(j ==
static_cast<int>(count) - 1) {
81 c |= 0xff << (8 - count);
95 template<
typename iitor_t>
96 static inline char32_t
read(iitor_t& input,
const iitor_t& end)
101 char32_t current_char =
static_cast<unsigned char>(*input);
105 current_char &= 0xFF >> (
size + 1);
111 for(std::size_t
i = 1;
i <
size; ++
i, ++input) {
116 if ((*input & 0xC0) != 0x80)
119 current_char = (current_char << 6) | (static_cast<unsigned char>(*input) & 0x3F);
134 template<
typename writer>
135 static inline void write(writer out, char32_t ch)
137 const char32_t bit17 = 0x10000;
141 assert(out.can_push(1));
142 out.push(
static_cast<char16_t
>(ch));
146 assert(out.can_push(2));
147 const char32_t char20 = ch - bit17;
148 assert(char20 < (1 << 20));
149 const char32_t lead = 0xD800 + (char20 >> 10);
150 const char32_t trail = 0xDC00 + (char20 & 0x3FF);
151 assert(lead < bit17);
152 assert(trail < bit17);
153 out.push(
static_cast<char16_t
>(lead));
154 out.push(
static_cast<char16_t
>(trail));
158 template<
typename iitor_t>
159 static inline char32_t
read(iitor_t& input,
const iitor_t& end)
161 const char32_t last10 = 0x3FF;
162 const char32_t type_filter = 0xFC00;
163 const char32_t type_lead = 0xD800;
164 const char32_t type_trail = 0xDC00;
166 assert(input != end);
167 char32_t current_char =
static_cast<char16_t
>(*input);
169 char32_t
type = current_char & type_filter;
170 if(
type == type_trail)
175 else if(
type == type_lead)
182 if((*input & type_filter) != type_trail)
186 current_char &= last10;
188 current_char += (*input & last10);
189 current_char += 0x10000;
199 template<
typename writer>
200 static inline void write(writer out, char32_t ch)
202 assert(out.can_push(1));
206 template<
typename iitor_t>
207 static inline char32_t
read(iitor_t& input,
const iitor_t& end)
209 assert(input != end);
210 char32_t current_char = *input;
216 template<
typename T_CHAR>
Thrown by operations encountering invalid UTF-8 data.
General math utility functions.
constexpr unsigned int count_leading_ones(N n)
Returns the quantity of leading 1 bits in n — i.e., the quantity of bits in n, minus the 1-based bit ...
std::size_t size(std::string_view str)
Length in characters of a UTF-8 string.
static char32_t read(iitor_t &input, const iitor_t &end)
static void write(writer out, char32_t ch)
static const char * get_name()
static const char * get_name()
static char32_t read(iitor_t &input, const iitor_t &end)
static void write(writer out, char32_t ch)
static void write(writer out, char32_t ch)
Writes a UCS-4 character to a UTF-8 stream.
static const char * get_name()
static char32_t read(iitor_t &input, const iitor_t &end)
Reads a UCS-4 character from a UTF-8 stream.
static int byte_size_from_utf8_first(char ch)
static std::size_t byte_size_from_ucs4_codepoint(char32_t ch)