The Battle for Wesnoth  1.15.0-dev
marked-up_text.cpp
Go to the documentation of this file.
1 /*
2  Copyright (C) 2003 - 2018 by David White <dave@whitevine.net>
3  Part of the Battle for Wesnoth Project http://www.wesnoth.org/
4 
5  This program is free software; you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation; either version 2 of the License, or
8  (at your option) any later version.
9  This program is distributed in the hope that it will be useful,
10  but WITHOUT ANY WARRANTY.
11 
12  See the COPYING file for more details.
13 */
14 
15 /**
16  * @file
17  * Support for simple markup in text (fonts, colors, images).
18  * E.g. "@Victory" will be shown in green.
19  */
20 
21 #include "font/marked-up_text.hpp"
22 
23 namespace font
24 {
25 bool is_cjk_char(const char32_t ch)
26 {
27  /**
28  * You can check these range at http://unicode.org/charts/
29  * see the "East Asian Scripts" part.
30  * Notice that not all characters in that part is still in use today, so don't list them all here.
31  * Below are characters that I guess may be used in wesnoth translations.
32  */
33 
34  //FIXME add range from Japanese-specific and Korean-specific section if you know the characters are used today.
35 
36  if (ch < 0x2e80) return false; // shortcut for common non-CJK
37 
38  return
39  //Han Ideographs: all except Supplement
40  (ch >= 0x4e00 && ch < 0x9fcf) ||
41  (ch >= 0x3400 && ch < 0x4dbf) ||
42  (ch >= 0x20000 && ch < 0x2a6df) ||
43  (ch >= 0xf900 && ch < 0xfaff) ||
44  (ch >= 0x3190 && ch < 0x319f) ||
45 
46  //Radicals: all except Ideographic Description
47  (ch >= 0x2e80 && ch < 0x2eff) ||
48  (ch >= 0x2f00 && ch < 0x2fdf) ||
49  (ch >= 0x31c0 && ch < 0x31ef) ||
50 
51  //Chinese-specific: Bopomofo and Bopomofo Extended
52  (ch >= 0x3104 && ch < 0x312e) ||
53  (ch >= 0x31a0 && ch < 0x31bb) ||
54 
55  //Yi-specific: Yi Radicals, Yi Syllables
56  (ch >= 0xa490 && ch < 0xa4c7) ||
57  (ch >= 0xa000 && ch < 0xa48d) ||
58 
59  //Japanese-specific: Hiragana, Katakana, Kana Supplement
60  (ch >= 0x3040 && ch <= 0x309f) ||
61  (ch >= 0x30a0 && ch <= 0x30ff) ||
62  (ch >= 0x1b000 && ch <= 0x1b001) ||
63 
64  //Ainu-specific: Katakana Phonetic Extensions
65  (ch >= 0x31f0 && ch <= 0x31ff) ||
66 
67  //Korean-specific: Hangul Syllables, Hangul Jamo, Hangul Jamo Extended-A, Hangul Jamo Extended-B
68  (ch >= 0xac00 && ch < 0xd7af) ||
69  (ch >= 0x1100 && ch <= 0x11ff) ||
70  (ch >= 0xa960 && ch <= 0xa97c) ||
71  (ch >= 0xd7b0 && ch <= 0xd7fb) ||
72 
73  //CJK Symbols and Punctuation
74  (ch >= 0x3000 && ch < 0x303f) ||
75 
76  //Halfwidth and Fullwidth Forms
77  (ch >= 0xff00 && ch < 0xffef);
78 }
79 
80 // Re-enable if we find use for these
81 #if 0
82 namespace {
83 
84 /*
85  * According to Kinsoku-Shori, Japanese rules about line-breaking:
86  *
87  * * the following characters cannot begin a line (so we will never break before them):
88  * 、。,.)〕]}〉》」』】’”ゝゞヽヾ々?!:;ぁぃぅぇぉゃゅょゎァィゥェォャュョヮっヵッヶ・…ー
89  *
90  * * the following characters cannot end a line (so we will never break after them):
91  * (〔[{〈《「『【‘“
92  *
93  * Unicode range that concerns word wrap for Chinese:
94  * 全角ASCII、全角中英文标点 (Fullwidth Character for ASCII, English punctuations and part of Chinese punctuations)
95  * http://www.unicode.org/charts/PDF/UFF00.pdf
96  * CJK 标点符号 (CJK punctuations)
97  * http://www.unicode.org/charts/PDF/U3000.pdf
98  */
99 inline bool no_break_after(const char32_t ch)
100 {
101  return
102  /**
103  * don't break after these Japanese characters
104  */
105  ch == 0x2018 || ch == 0x201c || ch == 0x3008 || ch == 0x300a || ch == 0x300c ||
106  ch == 0x300e || ch == 0x3010 || ch == 0x3014 || ch == 0xff08 || ch == 0xff3b ||
107  ch == 0xff5b ||
108 
109  /**
110  * FIXME don't break after these Korean characters
111  */
112 
113  /**
114  * don't break after these Chinese characters
115  * contains left side of different kinds of brackets and quotes
116  */
117  ch == 0x3016 || ch == 0x301a || ch == 0x301d;
118 }
119 
120 inline bool no_break_before(const char32_t ch)
121 {
122  return
123  /**
124  * don't break before these Japanese characters
125  */
126  ch == 0x2019 || ch == 0x201d || ch == 0x2026 || ch == 0x3001 || ch == 0x3002 ||
127  ch == 0x3005 || ch == 0x3009 || ch == 0x300b || ch == 0x300d || ch == 0x300f ||
128  ch == 0x3011 || ch == 0x3015 || ch == 0x3041 || ch == 0x3043 || ch == 0x3045 ||
129  ch == 0x3047 || ch == 0x3049 || ch == 0x3063 || ch == 0x3083 || ch == 0x3085 ||
130  ch == 0x3087 || ch == 0x308e || ch == 0x309d || ch == 0x309e || ch == 0x30a1 ||
131  ch == 0x30a3 || ch == 0x30a5 || ch == 0x30a7 || ch == 0x30a9 || ch == 0x30c3 ||
132  ch == 0x30e3 || ch == 0x30e5 || ch == 0x30e7 || ch == 0x30ee || ch == 0x30f5 ||
133  ch == 0x30f6 || ch == 0x30fb || ch == 0x30fc || ch == 0x30fd || ch == 0x30fe ||
134  ch == 0xff01 || ch == 0xff09 || ch == 0xff0c || ch == 0xff0e || ch == 0xff1a ||
135  ch == 0xff1b || ch == 0xff1f || ch == 0xff3d || ch == 0xff5d ||
136 
137  // Small katakana used in Ainu:
138  ch == 0x31f0 || ch == 0x31f1 || ch == 0x31f2 || ch == 0x31f3 || ch == 0x31f4 ||
139  ch == 0x31f5 || ch == 0x31f6 || ch == 0x31f7 || ch == 0x31f8 || ch == 0x31f9 ||
140  ch == 0x31fa || ch == 0x31fb || ch == 0x31fc || ch == 0x31fd || ch == 0x31fe ||
141  ch == 0x31ff ||
142 
143  /**
144  * FIXME don't break before these Korean characters
145  */
146 
147  /**
148  * don't break before these Chinese characters
149  * contains
150  * many Chinese punctuations that should not start a line
151  * and right side of different kinds of brackets, quotes
152  */
153  ch == 0x301c || ch == 0xff0d || ch == 0xff64 || ch == 0xff65 || ch == 0x3017 ||
154  ch == 0x301b || ch == 0x301e;
155 }
156 
157 inline bool break_before(const char32_t ch)
158 {
159  if(no_break_before(ch))
160  return false;
161 
162  return is_cjk_char(ch);
163 }
164 
165 inline bool break_after(const char32_t ch)
166 {
167  if(no_break_after(ch))
168  return false;
169 
170  return is_cjk_char(ch);
171 }
172 
173 } // end of anon namespace
174 #endif
175 
176 } // end namespace font
Collection of helper functions relating to Pango formatting.
bool is_cjk_char(const char32_t ch)
Determine if a char32_t is a CJK character.