Electroneum
i18n.cpp
Go to the documentation of this file.
1 // Copyrights(c) 2017-2021, The Electroneum Project
2 // Copyrights(c) 2014-2019, The Monero Project
3 //
4 // All rights reserved.
5 //
6 // Redistribution and use in source and binary forms, with or without modification, are
7 // permitted provided that the following conditions are met:
8 //
9 // 1. Redistributions of source code must retain the above copyright notice, this list of
10 // conditions and the following disclaimer.
11 //
12 // 2. Redistributions in binary form must reproduce the above copyright notice, this list
13 // of conditions and the following disclaimer in the documentation and/or other
14 // materials provided with the distribution.
15 //
16 // 3. Neither the name of the copyright holder nor the names of its contributors may be
17 // used to endorse or promote products derived from this software without specific
18 // prior written permission.
19 //
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
21 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
22 // MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
23 // THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
27 // STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
28 // THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 
30 #include <stdlib.h>
31 #include <string.h>
32 #include <ctype.h>
33 #include <string>
34 #include <map>
35 #include "file_io_utils.h"
36 #include "common/i18n.h"
37 #include "translation_files.h"
38 
39 #undef ELECTRONEUM_DEFAULT_LOG_CATEGORY
40 #define ELECTRONEUM_DEFAULT_LOG_CATEGORY "i18n"
41 
42 #define MAX_LANGUAGE_SIZE 16
43 
44 static const unsigned char qm_magic[16] = {0x3c, 0xb8, 0x64, 0x18, 0xca, 0xef, 0x9c, 0x95, 0xcd, 0x21, 0x1c, 0xbf, 0x60, 0xa1, 0xbd, 0xdd};
45 
46 static std::map<std::string,std::string> i18n_entries;
47 
48 /* Logging isn't initialized yet when this is run */
49 /* add std::flush, because std::endl doesn't seem to flush, contrary to expected */
50 // #define i18n_log(x) do { std::cout << __FILE__ << ":" << __LINE__ << ": " << x << std::endl; std::cout << std::flush; } while(0)
51 #define i18n_log(x) ((void)0)
52 
54 {
55  const char *e;
56 
57  e = getenv("LANG");
58  i18n_log("LANG=" << e);
59  if (!e || !*e) {
60  e = getenv("LC_ALL");
61  i18n_log("LC_ALL=" << e);
62  }
63  if (!e || !*e)
64  e = "en";
65 
66  std::string language = e;
67  language = language.substr(0, language.find("."));
68  language = language.substr(0, language.find("@"));
69 
70  // check valid values
71  for (char c: language)
72  if (!strchr("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_-.@", c))
73  return "en";
74 
75  std::transform(language.begin(), language.end(), language.begin(), tolower);
76  if (language.size() > MAX_LANGUAGE_SIZE)
77  {
78  i18n_log("Language from LANG/LC_ALL suspiciously long, defaulting to en");
79  return "en";
80  }
81  return language;
82 }
83 
84 static uint32_t be32(const unsigned char *data)
85 {
86  return (data[0] << 24) | (data[1] << 16) | (data[2] << 8) | data[3];
87 }
88 
89 static std::string utf16(const unsigned char *data, uint32_t len)
90 {
91  std::string s;
92  while (len >= 2) {
93  uint32_t code = (data[0] << 8) | data[1];
94  data += 2;
95  len -= 2;
96  if (code >= 0xd800 && code <= 0xdbfff && len >= 2) {
97  uint32_t next = (data[0] << 8) | data[1];
98  if (next >= 0xdc00 && next <= 0xdfff) {
99  code = (code << 10) + next - 0x35dfc00;
100  data += 2;
101  len -= 2;
102  }
103  }
104  if (code <= 0x7f) {
105  s += (char)code;
106  }
107  else if (code <= 0x7ff) {
108  s += 0xc0 | (code >> 6);
109  s += 0x80 | (code & 0x3f);
110  }
111  else if (code <= 0xffff) {
112  s += 0xe0 | (code >> 12);
113  s += 0x80 | ((code >> 6) & 0x3f);
114  s += 0x80 | (code & 0x3f);
115  }
116  else {
117  s += 0xf0 | (code >> 18);
118  s += 0x80 | ((code >> 12) & 0x3f);
119  s += 0x80 | ((code >> 6) & 0x3f);
120  s += 0x80 | (code & 0x3f);
121  }
122  }
123  return s;
124 }
125 
126 static std::string utf8(const unsigned char *data, uint32_t len)
127 {
128  /* assume well formedness */
129  return std::string((const char *)data,len);
130 }
131 
132 int i18n_set_language(const char *directory, const char *base, std::string language)
133 {
134  std::string filename, contents;
135  const unsigned char *data;
136  size_t datalen;
137  size_t idx;
138  unsigned char chunk_type;
139  uint32_t chunk_size;
140  uint32_t num_messages = (uint32_t)-1;
141  uint32_t messages_idx = (uint32_t)-1;
142  uint32_t offsets_idx = (uint32_t)-1;
143  std::string translation, source, context;
144 
145  i18n_log("i18n_set_language(" << directory << "," << base << ")");
146  if (!directory || !base)
147  return -1;
148 
149  if (language.empty())
150  language = i18n_get_language();
151  filename = std::string(directory) + "/" + base + "_" + language + ".qm";
152  i18n_log("Loading translations for language " << language);
153 
154  boost::system::error_code ignored_ec;
155  if (boost::filesystem::exists(filename, ignored_ec)) {
156  if (!epee::file_io_utils::load_file_to_string(filename, contents)) {
157  i18n_log("Failed to load translations file: " << filename);
158  return -1;
159  }
160  } else {
161  i18n_log("Translations file not found: " << filename);
162  filename = std::string(base) + "_" + language + ".qm";
163  if (!find_embedded_file(filename, contents)) {
164  i18n_log("Embedded translations file not found: " << filename);
165  const char *underscore = strchr(language.c_str(), '_');
166  if (underscore) {
167  std::string fallback_language = std::string(language, 0, underscore - language.c_str());
168  filename = std::string(directory) + "/" + base + "_" + fallback_language + ".qm";
169  i18n_log("Loading translations for language " << fallback_language);
170  if (boost::filesystem::exists(filename, ignored_ec)) {
171  if (!epee::file_io_utils::load_file_to_string(filename, contents)) {
172  i18n_log("Failed to load translations file: " << filename);
173  return -1;
174  }
175  } else {
176  i18n_log("Translations file not found: " << filename);
177  filename = std::string(base) + "_" + fallback_language + ".qm";
178  if (!find_embedded_file(filename, contents)) {
179  i18n_log("Embedded translations file not found: " << filename);
180  return -1;
181  }
182  }
183  } else {
184  return -1;
185  }
186  }
187  }
188 
189  data = (const unsigned char*)contents.c_str();
190  datalen = contents.size();
191  idx = 0;
192  i18n_log("Translations file size: " << datalen);
193 
194  /* Format of the QM file (AFAICT):
195  * 16 bytes magic
196  * chunk list: N instances of chunks:
197  * 1 byte: chunk type (0x42: offsets, 0x69: messages)
198  * 4 bytes: chunk length, big endian
199  * D bytes: "chunk length" bytes of data
200  *
201  * 0x42 chunk: N instances of subchunks:
202  * 1 byte: subchunk type
203  * 0x01: end, no data
204  * 0x02: unsupported
205  * 0x03: translation
206  * 4 bytes: string length, big endian
207  * N bytes: string data, UTF-16 (or UCS2-BE ?)
208  * 0x04: unsupported
209  * 0x05: obsolete, unsupported
210  * 0x06: source text
211  * 0x07: context
212  * 0x08: obsolete, unsupported
213  * other: unsupported
214  * 4 bytes: subchunk length, big endian - except for 0x01, which has none
215  * S bytes: "chunk length" bytes of data
216  * 0x69 chunk:
217  * string data indexed by the 0x42 chunk data
218  */
219  if (datalen < sizeof(qm_magic) || memcmp(data, qm_magic, sizeof(qm_magic))) {
220  i18n_log("Bad translations file format: " << filename);
221  return -1;
222  }
223  idx += sizeof(qm_magic);
224 
225  while (idx < datalen) {
226  if (idx + 5 > datalen) {
227  i18n_log("Bad translations file format: " << filename);
228  return -1;
229  }
230  chunk_type = data[idx++];
231  chunk_size = be32(data+idx);
232  idx += 4;
233 
234  i18n_log("Found " << chunk_type << " of " << chunk_size << " bytes");
235  if (chunk_size >= datalen || idx > datalen - chunk_size) {
236  i18n_log("Bad translations file format: " << filename);
237  return -1;
238  }
239 
240  switch (chunk_type) {
241  case 0x42:
242  i18n_log("Found offsets at " << idx);
243  /* two 32 bit integers, and possible padding */
244  offsets_idx = idx;
245  num_messages = chunk_size / 8;
246  break;
247  case 0x69:
248  i18n_log("Found messages at " << idx);
249  messages_idx = idx;
250  break;
251  default:
252  i18n_log("Found unsupported chunk type: " << chunk_type);
253  break;
254  }
255 
256  idx += chunk_size;
257  }
258 
259  if (offsets_idx == (uint32_t)-1) {
260  i18n_log("No offsets chunk found");
261  return -1;
262  }
263  if (messages_idx == (uint32_t)-1) {
264  i18n_log("No messages chunk found");
265  return -1;
266  }
267 
268  for (uint32_t m = 0; m < num_messages; ++m) {
269  be32(data+offsets_idx+m*8); // unused
270  idx = be32(data+offsets_idx+m*8+4);
271  idx += messages_idx;
272 
273  if (idx > datalen || idx + 1 > datalen) {
274  i18n_log("Bad translations file format: " << filename);
275  return -1;
276  }
277 
278  while (1) {
279  if (idx + 5 > datalen) {
280  i18n_log("Bad translations file format: " << filename);
281  return -1;
282  }
283  chunk_type = data[idx++];
284  chunk_size = 0;
285  if (chunk_type == 0x01) {
286  i18n_entries[context + std::string("",1) + source] = translation;
287  context = std::string();
288  source = std::string();
289  translation = std::string();
290  break;
291  }
292 
293  chunk_size = be32(data+idx);
294  idx += 4;
295  i18n_log("Found " << chunk_type << " of " << chunk_size << " bytes");
296  if (chunk_size >= datalen || idx > datalen - chunk_size) {
297  i18n_log("Bad translations file format: " << filename);
298  return -1;
299  }
300  switch (chunk_type) {
301  case 0x03: // translation, UTF-16
302  translation = utf16(data+idx, chunk_size);
303  i18n_log("Found translation: " << translation);
304  break;
305  case 0x06: // source, UTF-8
306  source = utf8(data+idx, chunk_size);
307  i18n_log("Found source: " << source);
308  break;
309  case 0x07: // context, UTF-8
310  context = utf8(data+idx, chunk_size);
311  i18n_log("Found context: " << context);
312  break;
313  }
314  idx += chunk_size;
315  }
316  }
317 
318  return 0;
319 }
320 
321 /* The entries is constant by that time */
322 const char *i18n_translate(const char *s, const std::string &context)
323 {
324  const std::string key = context + std::string("", 1) + s;
325  std::map<std::string,std::string>::const_iterator i = i18n_entries.find(key);
326  if (i == i18n_entries.end())
327  return s;
328  return (*i).second.c_str();
329 }
330 
331 
const char * key
Definition: hmac_keccak.cpp:39
#define MAX_LANGUAGE_SIZE
Definition: i18n.cpp:42
const char * i18n_translate(const char *s, const std::string &context)
Definition: i18n.cpp:322
#define i18n_log(x)
Definition: i18n.cpp:51
std::string i18n_get_language()
Definition: i18n.cpp:53
int i18n_set_language(const char *directory, const char *base, std::string language)
Definition: i18n.cpp:132
bool load_file_to_string(const std::string &path_to_file, std::string &target_str, size_t max_size=1000000000)
std::unique_ptr< void, terminate > context
Unique ZMQ context handle, calls zmq_term on destruction.
Definition: zmq.h:98
::std::string string
Definition: gtest-port.h:1097
const CharType(& source)[N]
Definition: pointer.h:1147
unsigned int uint32_t
Definition: stdint.h:126