Electroneum
Loading...
Searching...
No Matches
i18n.cpp
Go to the documentation of this file.
1// Copyrights(c) 2017-2021, The Electroneum Project
2// Copyrights(c) 2014-2019, The Monero Project
3//
4// All rights reserved.
5//
6// Redistribution and use in source and binary forms, with or without modification, are
7// permitted provided that the following conditions are met:
8//
9// 1. Redistributions of source code must retain the above copyright notice, this list of
10// conditions and the following disclaimer.
11//
12// 2. Redistributions in binary form must reproduce the above copyright notice, this list
13// of conditions and the following disclaimer in the documentation and/or other
14// materials provided with the distribution.
15//
16// 3. Neither the name of the copyright holder nor the names of its contributors may be
17// used to endorse or promote products derived from this software without specific
18// prior written permission.
19//
20// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
21// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
22// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
23// THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
27// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
28// THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30#include <stdlib.h>
31#include <string.h>
32#include <ctype.h>
33#include <string>
34#include <map>
35#include "file_io_utils.h"
36#include "common/i18n.h"
37#include "translation_files.h"
38#include <algorithm>
39
40#undef ELECTRONEUM_DEFAULT_LOG_CATEGORY
41#define ELECTRONEUM_DEFAULT_LOG_CATEGORY "i18n"
42
43#define MAX_LANGUAGE_SIZE 16
44
45static const unsigned char qm_magic[16] = {0x3c, 0xb8, 0x64, 0x18, 0xca, 0xef, 0x9c, 0x95, 0xcd, 0x21, 0x1c, 0xbf, 0x60, 0xa1, 0xbd, 0xdd};
46
47static std::map<std::string,std::string> i18n_entries;
48
49/* Logging isn't initialized yet when this is run */
50/* add std::flush, because std::endl doesn't seem to flush, contrary to expected */
51// #define i18n_log(x) do { std::cout << __FILE__ << ":" << __LINE__ << ": " << x << std::endl; std::cout << std::flush; } while(0)
52#define i18n_log(x) ((void)0)
53
54std::string i18n_get_language()
55{
56 const char *e;
57
58 e = getenv("LANG");
59 i18n_log("LANG=" << e);
60 if (!e || !*e) {
61 e = getenv("LC_ALL");
62 i18n_log("LC_ALL=" << e);
63 }
64 if (!e || !*e)
65 e = "en";
66
67 std::string language = e;
68 language = language.substr(0, language.find("."));
69 language = language.substr(0, language.find("@"));
70
71 // check valid values
72 for (char c: language)
73 if (!strchr("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_-.@", c))
74 return "en";
75
76 std::transform(language.begin(), language.end(), language.begin(), tolower);
77 if (language.size() > MAX_LANGUAGE_SIZE)
78 {
79 i18n_log("Language from LANG/LC_ALL suspiciously long, defaulting to en");
80 return "en";
81 }
82 return language;
83}
84
85static uint32_t be32(const unsigned char *data)
86{
87 return (data[0] << 24) | (data[1] << 16) | (data[2] << 8) | data[3];
88}
89
90static std::string utf16(const unsigned char *data, uint32_t len)
91{
92 std::string s;
93 while (len >= 2) {
94 uint32_t code = (data[0] << 8) | data[1];
95 data += 2;
96 len -= 2;
97 if (code >= 0xd800 && code <= 0xdbfff && len >= 2) {
98 uint32_t next = (data[0] << 8) | data[1];
99 if (next >= 0xdc00 && next <= 0xdfff) {
100 code = (code << 10) + next - 0x35dfc00;
101 data += 2;
102 len -= 2;
103 }
104 }
105 if (code <= 0x7f) {
106 s += (char)code;
107 }
108 else if (code <= 0x7ff) {
109 s += 0xc0 | (code >> 6);
110 s += 0x80 | (code & 0x3f);
111 }
112 else if (code <= 0xffff) {
113 s += 0xe0 | (code >> 12);
114 s += 0x80 | ((code >> 6) & 0x3f);
115 s += 0x80 | (code & 0x3f);
116 }
117 else {
118 s += 0xf0 | (code >> 18);
119 s += 0x80 | ((code >> 12) & 0x3f);
120 s += 0x80 | ((code >> 6) & 0x3f);
121 s += 0x80 | (code & 0x3f);
122 }
123 }
124 return s;
125}
126
127static std::string utf8(const unsigned char *data, uint32_t len)
128{
129 /* assume well formedness */
130 return std::string((const char *)data,len);
131}
132
133int i18n_set_language(const char *directory, const char *base, std::string language)
134{
135 std::string filename, contents;
136 const unsigned char *data;
137 size_t datalen;
138 size_t idx;
139 unsigned char chunk_type;
140 uint32_t chunk_size;
141 uint32_t num_messages = (uint32_t)-1;
142 uint32_t messages_idx = (uint32_t)-1;
143 uint32_t offsets_idx = (uint32_t)-1;
144 std::string translation, source, context;
145
146 i18n_log("i18n_set_language(" << directory << "," << base << ")");
147 if (!directory || !base)
148 return -1;
149
150 if (language.empty())
151 language = i18n_get_language();
152 filename = std::string(directory) + "/" + base + "_" + language + ".qm";
153 i18n_log("Loading translations for language " << language);
154
155 boost::system::error_code ignored_ec;
156 if (boost::filesystem::exists(filename, ignored_ec)) {
157 if (!epee::file_io_utils::load_file_to_string(filename, contents)) {
158 i18n_log("Failed to load translations file: " << filename);
159 return -1;
160 }
161 } else {
162 i18n_log("Translations file not found: " << filename);
163 filename = std::string(base) + "_" + language + ".qm";
164 if (!find_embedded_file(filename, contents)) {
165 i18n_log("Embedded translations file not found: " << filename);
166 const char *underscore = strchr(language.c_str(), '_');
167 if (underscore) {
168 std::string fallback_language = std::string(language, 0, underscore - language.c_str());
169 filename = std::string(directory) + "/" + base + "_" + fallback_language + ".qm";
170 i18n_log("Loading translations for language " << fallback_language);
171 if (boost::filesystem::exists(filename, ignored_ec)) {
172 if (!epee::file_io_utils::load_file_to_string(filename, contents)) {
173 i18n_log("Failed to load translations file: " << filename);
174 return -1;
175 }
176 } else {
177 i18n_log("Translations file not found: " << filename);
178 filename = std::string(base) + "_" + fallback_language + ".qm";
179 if (!find_embedded_file(filename, contents)) {
180 i18n_log("Embedded translations file not found: " << filename);
181 return -1;
182 }
183 }
184 } else {
185 return -1;
186 }
187 }
188 }
189
190 data = (const unsigned char*)contents.c_str();
191 datalen = contents.size();
192 idx = 0;
193 i18n_log("Translations file size: " << datalen);
194
195 /* Format of the QM file (AFAICT):
196 * 16 bytes magic
197 * chunk list: N instances of chunks:
198 * 1 byte: chunk type (0x42: offsets, 0x69: messages)
199 * 4 bytes: chunk length, big endian
200 * D bytes: "chunk length" bytes of data
201 *
202 * 0x42 chunk: N instances of subchunks:
203 * 1 byte: subchunk type
204 * 0x01: end, no data
205 * 0x02: unsupported
206 * 0x03: translation
207 * 4 bytes: string length, big endian
208 * N bytes: string data, UTF-16 (or UCS2-BE ?)
209 * 0x04: unsupported
210 * 0x05: obsolete, unsupported
211 * 0x06: source text
212 * 0x07: context
213 * 0x08: obsolete, unsupported
214 * other: unsupported
215 * 4 bytes: subchunk length, big endian - except for 0x01, which has none
216 * S bytes: "chunk length" bytes of data
217 * 0x69 chunk:
218 * string data indexed by the 0x42 chunk data
219 */
220 if (datalen < sizeof(qm_magic) || memcmp(data, qm_magic, sizeof(qm_magic))) {
221 i18n_log("Bad translations file format: " << filename);
222 return -1;
223 }
224 idx += sizeof(qm_magic);
225
226 while (idx < datalen) {
227 if (idx + 5 > datalen) {
228 i18n_log("Bad translations file format: " << filename);
229 return -1;
230 }
231 chunk_type = data[idx++];
232 chunk_size = be32(data+idx);
233 idx += 4;
234
235 i18n_log("Found " << chunk_type << " of " << chunk_size << " bytes");
236 if (chunk_size >= datalen || idx > datalen - chunk_size) {
237 i18n_log("Bad translations file format: " << filename);
238 return -1;
239 }
240
241 switch (chunk_type) {
242 case 0x42:
243 i18n_log("Found offsets at " << idx);
244 /* two 32 bit integers, and possible padding */
245 offsets_idx = idx;
246 num_messages = chunk_size / 8;
247 break;
248 case 0x69:
249 i18n_log("Found messages at " << idx);
250 messages_idx = idx;
251 break;
252 default:
253 i18n_log("Found unsupported chunk type: " << chunk_type);
254 break;
255 }
256
257 idx += chunk_size;
258 }
259
260 if (offsets_idx == (uint32_t)-1) {
261 i18n_log("No offsets chunk found");
262 return -1;
263 }
264 if (messages_idx == (uint32_t)-1) {
265 i18n_log("No messages chunk found");
266 return -1;
267 }
268
269 for (uint32_t m = 0; m < num_messages; ++m) {
270 be32(data+offsets_idx+m*8); // unused
271 idx = be32(data+offsets_idx+m*8+4);
272 idx += messages_idx;
273
274 if (idx > datalen || idx + 1 > datalen) {
275 i18n_log("Bad translations file format: " << filename);
276 return -1;
277 }
278
279 while (1) {
280 if (idx + 5 > datalen) {
281 i18n_log("Bad translations file format: " << filename);
282 return -1;
283 }
284 chunk_type = data[idx++];
285 chunk_size = 0;
286 if (chunk_type == 0x01) {
287 i18n_entries[context + std::string("",1) + source] = translation;
288 context = std::string();
289 source = std::string();
290 translation = std::string();
291 break;
292 }
293
294 chunk_size = be32(data+idx);
295 idx += 4;
296 i18n_log("Found " << chunk_type << " of " << chunk_size << " bytes");
297 if (chunk_size >= datalen || idx > datalen - chunk_size) {
298 i18n_log("Bad translations file format: " << filename);
299 return -1;
300 }
301 switch (chunk_type) {
302 case 0x03: // translation, UTF-16
303 translation = utf16(data+idx, chunk_size);
304 i18n_log("Found translation: " << translation);
305 break;
306 case 0x06: // source, UTF-8
307 source = utf8(data+idx, chunk_size);
308 i18n_log("Found source: " << source);
309 break;
310 case 0x07: // context, UTF-8
311 context = utf8(data+idx, chunk_size);
312 i18n_log("Found context: " << context);
313 break;
314 }
315 idx += chunk_size;
316 }
317 }
318
319 return 0;
320}
321
322/* The entries is constant by that time */
323const char *i18n_translate(const char *s, const std::string &context)
324{
325 const std::string key = context + std::string("", 1) + s;
326 std::map<std::string,std::string>::const_iterator i = i18n_entries.find(key);
327 if (i == i18n_entries.end())
328 return s;
329 return (*i).second.c_str();
330}
331
332
const char * key
#define MAX_LANGUAGE_SIZE
Definition i18n.cpp:43
#define i18n_log(x)
Definition i18n.cpp:52
std::string i18n_get_language()
Definition i18n.cpp:54
const char * i18n_translate(const char *s, const std::string &context)
Definition i18n.cpp:323
int i18n_set_language(const char *directory, const char *base, std::string language)
Definition i18n.cpp:133
bool load_file_to_string(const std::string &path_to_file, std::string &target_str, size_t max_size=1000000000)
const CharType(& source)[N]
Definition pointer.h:1147
unsigned int uint32_t
Definition stdint.h:126