Monero
Loading...
Searching...
No Matches
language_base.h
Go to the documentation of this file.
1// Copyright (c) 2014-2022, The Monero Project
2//
3// All rights reserved.
4//
5// Redistribution and use in source and binary forms, with or without modification, are
6// permitted provided that the following conditions are met:
7//
8// 1. Redistributions of source code must retain the above copyright notice, this list of
9// conditions and the following disclaimer.
10//
11// 2. Redistributions in binary form must reproduce the above copyright notice, this list
12// of conditions and the following disclaimer in the documentation and/or other
13// materials provided with the distribution.
14//
15// 3. Neither the name of the copyright holder nor the names of its contributors may be
16// used to endorse or promote products derived from this software without specific
17// prior written permission.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
20// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
21// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
22// THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
26// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
27// THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
34
35#ifndef LANGUAGE_BASE_H
36#define LANGUAGE_BASE_H
37
38#include <vector>
39#include <unordered_map>
40#include <string>
41#include <boost/algorithm/string.hpp>
42#include "misc_log_ex.h"
43#include "fnv1.h"
44#include "common/utf8.h"
45
50namespace Language
51{
59 template<typename T>
60 inline T utf8prefix(const T &s, size_t count)
61 {
62 T prefix = "";
63 size_t avail = s.size();
64 const char *ptr = s.data();
65 while (count-- && avail--)
66 {
67 prefix += *ptr++;
68 while (avail && ((*ptr) & 0xc0) == 0x80)
69 {
70 prefix += *ptr++;
71 --avail;
72 }
73 }
74 return prefix;
75 }
76
77 struct WordHash
78 {
79 std::size_t operator()(const epee::wipeable_string &s) const
80 {
81 const epee::wipeable_string sc = tools::utf8canonical(s, [](wint_t c) -> wint_t { return std::towlower(c); });
82 return epee::fnv::FNV1a(sc.data(), sc.size());
83 }
84 };
85
86 struct WordEqual
87 {
89 {
90 const epee::wipeable_string s0c = tools::utf8canonical(s0, [](wint_t c) -> wint_t { return std::towlower(c); });
91 const epee::wipeable_string s1c = tools::utf8canonical(s1, [](wint_t c) -> wint_t { return std::towlower(c); });
92 return s0c == s1c;
93 }
94 };
95
101 class Base
102 {
103 protected:
104 enum {
107 };
108 enum {
109 NWORDS = 1626
110 };
111 std::vector<std::string> word_list;
112 std::unordered_map<epee::wipeable_string, uint32_t, WordHash, WordEqual> word_map;
113 std::unordered_map<epee::wipeable_string, uint32_t, WordHash, WordEqual> trimmed_word_map;
114 std::string language_name;
121 {
122 int ii;
123 std::vector<std::string>::const_iterator it;
124 if (word_list.size () != NWORDS)
125 throw std::runtime_error("Wrong word list length for " + language_name);
126 for (it = word_list.begin(), ii = 0; it != word_list.end(); it++, ii++)
127 {
128 word_map[*it] = ii;
129 if ((*it).size() < unique_prefix_length)
130 {
132 MINFO(language_name << " word '" << *it << "' is shorter than its prefix length, " << unique_prefix_length);
133 else
134 throw std::runtime_error("Too short word in " + language_name + " word list: " + *it);
135 }
137 if (it->length() > unique_prefix_length)
138 {
140 }
141 else
142 {
143 trimmed = *it;
144 }
145 if (trimmed_word_map.find(trimmed) != trimmed_word_map.end())
146 {
148 MWARNING("Duplicate prefix in " << language_name << " word list: " << std::string(trimmed.data(), trimmed.size()));
149 else
150 throw std::runtime_error("Duplicate prefix in " + language_name + " word list: " + std::string(trimmed.data(), trimmed.size()));
151 }
153 }
154 }
155 public:
156 Base(const char *language_name, const char *english_language_name, const std::vector<std::string> &words, uint32_t prefix_length):
157 word_list(words),
158 unique_prefix_length(prefix_length),
161 {
162 }
163 virtual ~Base()
164 {
165 }
166 void set_words(const char * const words[])
167 {
168 word_list.resize(NWORDS);
169 for (size_t i = 0; i < NWORDS; ++i)
170 word_list[i] = words[i];
171 }
172
176 const std::vector<std::string>& get_word_list() const
177 {
178 return word_list;
179 }
180
184 const std::unordered_map<epee::wipeable_string, uint32_t, WordHash, WordEqual>& get_word_map() const
185 {
186 return word_map;
187 }
188
192 const std::unordered_map<epee::wipeable_string, uint32_t, WordHash, WordEqual>& get_trimmed_word_map() const
193 {
194 return trimmed_word_map;
195 }
196
200 const std::string &get_language_name() const
201 {
202 return language_name;
203 }
204
208 const std::string &get_english_language_name() const
209 {
211 }
212
220 };
221}
222
223#endif
#define s(x, c)
Definition aesb.c:47
virtual ~Base()
Definition language_base.h:163
@ ALLOW_DUPLICATE_PREFIXES
Definition language_base.h:106
@ ALLOW_SHORT_WORDS
Definition language_base.h:105
const std::vector< std::string > & get_word_list() const
Returns a pointer to the word list.
Definition language_base.h:176
std::unordered_map< epee::wipeable_string, uint32_t, WordHash, WordEqual > word_map
Definition language_base.h:112
const std::string & get_language_name() const
Returns the name of the language.
Definition language_base.h:200
std::string language_name
Definition language_base.h:114
const std::unordered_map< epee::wipeable_string, uint32_t, WordHash, WordEqual > & get_trimmed_word_map() const
Returns a pointer to the trimmed word map.
Definition language_base.h:192
const std::unordered_map< epee::wipeable_string, uint32_t, WordHash, WordEqual > & get_word_map() const
Returns a pointer to the word map.
Definition language_base.h:184
void set_words(const char *const words[])
Definition language_base.h:166
std::vector< std::string > word_list
Definition language_base.h:111
std::string english_language_name
Definition language_base.h:115
Base(const char *language_name, const char *english_language_name, const std::vector< std::string > &words, uint32_t prefix_length)
Definition language_base.h:156
const std::string & get_english_language_name() const
Returns the name of the language in English.
Definition language_base.h:208
void populate_maps(uint32_t flags=0)
Populates the word maps after the list is ready.
Definition language_base.h:120
std::unordered_map< epee::wipeable_string, uint32_t, WordHash, WordEqual > trimmed_word_map
Definition language_base.h:113
uint32_t unique_prefix_length
Definition language_base.h:116
@ NWORDS
Definition language_base.h:109
uint32_t get_unique_prefix_length() const
Returns the number of unique starting characters to be used for matching.
Definition language_base.h:216
Definition wipeable_string.h:41
const char * data() const noexcept
Definition wipeable_string.h:61
size_t size() const noexcept
Definition wipeable_string.h:63
static int flags
Definition mdb_load.c:31
Mnemonic language related namespace.
T utf8prefix(const T &s, size_t count)
Returns a string made of (at most) the first count characters in s. Assumes well formedness....
Definition language_base.h:60
uint64_t FNV1a(const char *ptr, size_t sz)
Definition fnv1.h:37
T utf8canonical(const T &s, Transform t=[](wint_t c) ->wint_t { return c;})
Definition utf8.h:39
unsigned int uint32_t
Definition stdint.h:126
Definition language_base.h:87
bool operator()(const epee::wipeable_string &s0, const epee::wipeable_string &s1) const
Definition language_base.h:88
Definition language_base.h:78
std::size_t operator()(const epee::wipeable_string &s) const
Definition language_base.h:79
static epee::wipeable_string trimmed(const char *s)
Definition wipeable_string.cpp:143
#define T(x)