Unicode.cpp
1
2//
3// SFML - Simple and Fast Multimedia Library
4// Copyright (C) 2007-2009 Laurent Gomila (laurent.gom@gmail.com)
5//
6// This software is provided 'as-is', without any express or implied warranty.
7// In no event will the authors be held liable for any damages arising from the use of this software.
8//
9// Permission is granted to anyone to use this software for any purpose,
10// including commercial applications, and to alter it and redistribute it freely,
11// subject to the following restrictions:
12//
13// 1. The origin of this software must not be misrepresented;
14// you must not claim that you wrote the original software.
15// If you use this software in a product, an acknowledgment
16// in the product documentation would be appreciated but is not required.
17//
18// 2. Altered source versions must be plainly marked as such,
19// and must not be misrepresented as being the original software.
20//
21// 3. This notice may not be removed or altered from any source distribution.
22//
24
26// Headers
28#include <SFML/System/Unicode.hpp>
29#include <stdexcept>
30#include <string.h>
31
32
34// References :
35//
36// http://www.unicode.org/
37// http://www.unicode.org/Public/PROGRAMS/CVTUTF/ConvertUTF.c
38// http://www.unicode.org/Public/PROGRAMS/CVTUTF/ConvertUTF.h
39// http://people.w3.org/rishida/scripts/uniview/conversion
40//
42
43namespace
44{
46 // Generic utility function to compute the number
47 // of characters in a null-terminated string of any type
49 template <typename T>
50 std::size_t StrLen(const T* Str)
51 {
52 std::size_t Length = 0;
53 while (*Str++) Length++;
54 return Length;
55 }
56
58 // Get the current system locale
60 std::locale GetCurrentLocale()
61 {
62 try
63 {
64 return std::locale("");
65 }
66 catch (std::runtime_error&)
67 {
68 // It seems some implementations don't know the "" locale
69 // (Mac OS, MinGW)
70
71 return std::locale();
72 }
73 }
74}
75
76namespace sf
77{
79// Static member data
81const int Unicode::UTF8TrailingBytes[256] =
82{
83 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
84 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
85 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
86 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
87 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
88 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
89 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
90 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5
91};
92const Uint32 Unicode::UTF8Offsets[6] =
93{
94 0x00000000, 0x00003080, 0x000E2080, 0x03C82080, 0xFA082080, 0x82082080
95};
96const Uint8 Unicode::UTF8FirstBytes[7] =
97{
98 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC
99};
100
101
106{
107 // Nothing to do
108}
109
110
114Unicode::Text::Text(const char* Str)
115{
116 if (Str)
117 {
118 std::size_t Length = StrLen(Str);
119 if (Length > 0)
120 {
121 myUTF32String.reserve(Length + 1);
122 Unicode::ANSIToUTF32(Str, Str + Length, std::back_inserter(myUTF32String));
123 }
124 }
125}
126Unicode::Text::Text(const wchar_t* Str)
127{
128 if (Str)
129 {
130 std::size_t Length = StrLen(Str);
131 if (Length > 0)
132 {
133 // See comments below, in Unicode::Text::Text(const std::wstring&)
134 myUTF32String.reserve(Length + 1);
135 switch (sizeof(wchar_t))
136 {
137 case 2 : Unicode::UTF16ToUTF32(Str, Str + Length, std::back_inserter(myUTF32String), 0); break;
138 case 4 : std::copy(Str, Str + Length, std::back_inserter(myUTF32String)); break;
139 default : break;
140 }
141 }
142 }
143}
144Unicode::Text::Text(const Uint8* Str)
145{
146 if (Str)
147 {
148 std::size_t Length = StrLen(Str);
149 if (Length > 0)
150 {
151 myUTF32String.reserve(Length + 1);
152 Unicode::UTF8ToUTF32(Str, Str + Length, std::back_inserter(myUTF32String), 0);
153 }
154 }
155}
156Unicode::Text::Text(const Uint16* Str)
157{
158 if (Str)
159 {
160 std::size_t Length = StrLen(Str);
161 if (Length > 0)
162 {
163 myUTF32String.reserve(Length+ 1);
164 Unicode::UTF16ToUTF32(Str, Str + Length, std::back_inserter(myUTF32String), 0);
165 }
166 }
167}
168Unicode::Text::Text(const Uint32* Str)
169{
170 if (Str)
171 myUTF32String = Str;
172}
173Unicode::Text::Text(const std::string& Str)
174{
175 myUTF32String.reserve(Str.length() + 1);
176 Unicode::ANSIToUTF32(Str.begin(), Str.end(), std::back_inserter(myUTF32String));
177}
178Unicode::Text::Text(const std::wstring& Str)
179{
180 // This function assumes that 2-byte large wchar_t are encoded in UTF-16 (Windows), and
181 // 4-byte large wchar_t are encoded using UTF-32 (Unix)
182 // Is that always true ? (some platforms may use JIS Japanese encoding)
183 // The macro __STDC_ISO_10646__ should help identifying UTF-32 compliant implementations
184
185 myUTF32String.reserve(Str.length() + 1);
186
187 // Select the proper function according to the (supposed) wchar_t system encoding
188 switch (sizeof(wchar_t))
189 {
190 // wchar_t uses UTF-16 -- need a conversion
191 case 2 :
192 {
193 Unicode::UTF16ToUTF32(Str.begin(), Str.end(), std::back_inserter(myUTF32String), 0);
194 break;
195 }
196
197 // wchar_t uses UTF-32 -- direct copy
198 case 4 :
199 {
200 std::copy(Str.begin(), Str.end(), std::back_inserter(myUTF32String));
201 break;
202 }
203
204 // This should never happen
205 default : break;
206 }
207}
209{
210 myUTF32String.reserve(Str.length() + 1);
211 Unicode::UTF8ToUTF32(Str.begin(), Str.end(), std::back_inserter(myUTF32String), 0);
212}
213Unicode::Text::Text(const Unicode::UTF16String& Str)
214{
215 myUTF32String.reserve(Str.length() + 1);
216 Unicode::UTF16ToUTF32(Str.begin(), Str.end(), std::back_inserter(myUTF32String), 0);
217}
218Unicode::Text::Text(const Unicode::UTF32String& Str)
219{
220 myUTF32String = Str;
221}
222
223
227Unicode::Text::operator std::string() const
228{
229 std::string Output;
230 Output.reserve(myUTF32String.length() + 1);
231 Unicode::UTF32ToANSI(myUTF32String.begin(), myUTF32String.end(), std::back_inserter(Output), 0, Unicode::GetDefaultLocale());
232 return Output;
233}
234Unicode::Text::operator std::wstring() const
235{
236 // This function assumes that 2-byte large wchar_t are encoded in UTF-16 (Windows), and
237 // 4-byte large wchar_t are encoded using UTF-32 (Unix)
238 // Is that always true ? (some platforms may use JIS Japanese encoding)
239 // The macro __STDC_ISO_10646__ should help identifying UTF-32 compliant implementations
240
241 std::wstring Output;
242 Output.reserve(myUTF32String.length() + 1);
243
244 // Select the proper function according to the (supposed) wchar_t system encoding
245 switch (sizeof(wchar_t))
246 {
247 // wchar_t uses UTF-16 -- need a conversion
248 case 2 :
249 {
250 UTF32ToUTF16(myUTF32String.begin(), myUTF32String.end(), std::back_inserter(Output), 0);
251 break;
252 }
253
254 // wchar_t uses UTF-32 -- direct copy
255 case 4 :
256 {
257 std::copy(myUTF32String.begin(), myUTF32String.end(), std::back_inserter(Output));
258 break;
259 }
260
261 // This should never happen
262 default : break;
263 }
264 return Output;
265}
266Unicode::Text::operator sf::Unicode::UTF8String() const
267{
268 Unicode::UTF8String Output;
269 Output.reserve(myUTF32String.length() * 4 + 1);
270 Unicode::UTF32ToUTF8(myUTF32String.begin(), myUTF32String.end(), std::back_inserter(Output), 0);
271 return Output;
272}
273Unicode::Text::operator sf::Unicode::UTF16String() const
274{
275 Unicode::UTF16String Output;
276 Output.reserve(myUTF32String.length() * 2 + 1);
277 Unicode::UTF32ToUTF16(myUTF32String.begin(), myUTF32String.end(), std::back_inserter(Output), 0);
278 return Output;
279}
280Unicode::Text::operator const sf::Unicode::UTF32String&() const
281{
282 return myUTF32String;
283}
284
285
289const std::locale& Unicode::GetDefaultLocale()
290{
291 // It seems getting the default locale is a very expensive operation,
292 // so we only do it once and then store the locale for reuse.
293 // Warning : this code won't be aware of any change of the default locale during runtime
294
295 static std::locale DefaultLocale = GetCurrentLocale();
296
297 return DefaultLocale;
298}
299
300} // namespace sf
Text()
Default constructor (empty text).
Definition Unicode.cpp:105
static Out ANSIToUTF32(In Begin, In End, Out Output, const std::locale &Locale=GetDefaultLocale())
Generic function to convert an ANSI characters range to an UTF-32 characters range,...
Definition Unicode.hpp:68
std::basic_string< Uint8 > UTF8String
Define a string type for each encoding Warning : in UTF8 and UTF16 strings, one element doesn't neces...
Definition Unicode.hpp:54
static Out UTF32ToUTF8(In Begin, In End, Out Output, Uint8 Replacement='?')
Generic function to convert an UTF-32 characters range to an UTF-8 characters range,...
Definition Unicode.hpp:327
static Out UTF16ToUTF32(In Begin, In End, Out Output, Uint32 Replacement='?')
Generic function to convert an UTF-16 characters range to an UTF-32 characters range,...
Definition Unicode.hpp:281
static Out UTF32ToANSI(In Begin, In End, Out Output, char Replacement='?', const std::locale &Locale=GetDefaultLocale())
Generic function to convert an UTF-32 characters range to an ANSI characters range,...
Definition Unicode.hpp:32
static Out UTF32ToUTF16(In Begin, In End, Out Output, Uint16 Replacement='?')
Generic function to convert an UTF-32 characters range to an UTF-16 characters range,...
Definition Unicode.hpp:380
static Out UTF8ToUTF32(In Begin, In End, Out Output, Uint32 Replacement='?')
Generic function to convert an UTF-8 characters range to an UTF-32 characters range,...
Definition Unicode.hpp:164