Monero
language_base.h
Go to the documentation of this file.
1 // Copyright (c) 2014-2022, The Monero Project
2 //
3 // All rights reserved.
4 //
5 // Redistribution and use in source and binary forms, with or without modification, are
6 // permitted provided that the following conditions are met:
7 //
8 // 1. Redistributions of source code must retain the above copyright notice, this list of
9 // conditions and the following disclaimer.
10 //
11 // 2. Redistributions in binary form must reproduce the above copyright notice, this list
12 // of conditions and the following disclaimer in the documentation and/or other
13 // materials provided with the distribution.
14 //
15 // 3. Neither the name of the copyright holder nor the names of its contributors may be
16 // used to endorse or promote products derived from this software without specific
17 // prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
20 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
21 // MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
22 // THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
26 // STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
27 // THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 
35 #ifndef LANGUAGE_BASE_H
36 #define LANGUAGE_BASE_H
37 
38 #include <vector>
39 #include <unordered_map>
40 #include <string>
41 #include <boost/algorithm/string.hpp>
42 #include "misc_log_ex.h"
43 #include "fnv1.h"
44 #include "common/utf8.h"
45 
50 namespace Language
51 {
59  template<typename T>
60  inline T utf8prefix(const T &s, size_t count)
61  {
62  T prefix = "";
63  size_t avail = s.size();
64  const char *ptr = s.data();
65  while (count-- && avail--)
66  {
67  prefix += *ptr++;
68  while (avail && ((*ptr) & 0xc0) == 0x80)
69  {
70  prefix += *ptr++;
71  --avail;
72  }
73  }
74  return prefix;
75  }
76 
77  struct WordHash
78  {
79  std::size_t operator()(const epee::wipeable_string &s) const
80  {
81  const epee::wipeable_string sc = tools::utf8canonical(s, [](wint_t c) -> wint_t { return std::towlower(c); });
82  return epee::fnv::FNV1a(sc.data(), sc.size());
83  }
84  };
85 
86  struct WordEqual
87  {
88  bool operator()(const epee::wipeable_string &s0, const epee::wipeable_string &s1) const
89  {
90  const epee::wipeable_string s0c = tools::utf8canonical(s0, [](wint_t c) -> wint_t { return std::towlower(c); });
91  const epee::wipeable_string s1c = tools::utf8canonical(s1, [](wint_t c) -> wint_t { return std::towlower(c); });
92  return s0c == s1c;
93  }
94  };
95 
101  class Base
102  {
103  protected:
104  enum {
107  };
108  enum {
109  NWORDS = 1626
110  };
111  std::vector<std::string> word_list;
112  std::unordered_map<epee::wipeable_string, uint32_t, WordHash, WordEqual> word_map;
113  std::unordered_map<epee::wipeable_string, uint32_t, WordHash, WordEqual> trimmed_word_map;
121  {
122  int ii;
123  std::vector<std::string>::const_iterator it;
124  if (word_list.size () != NWORDS)
125  throw std::runtime_error("Wrong word list length for " + language_name);
126  for (it = word_list.begin(), ii = 0; it != word_list.end(); it++, ii++)
127  {
128  word_map[*it] = ii;
129  if ((*it).size() < unique_prefix_length)
130  {
131  if (flags & ALLOW_SHORT_WORDS)
132  MINFO(language_name << " word '" << *it << "' is shorter than its prefix length, " << unique_prefix_length);
133  else
134  throw std::runtime_error("Too short word in " + language_name + " word list: " + *it);
135  }
137  if (it->length() > unique_prefix_length)
138  {
140  }
141  else
142  {
143  trimmed = *it;
144  }
145  if (trimmed_word_map.find(trimmed) != trimmed_word_map.end())
146  {
148  MWARNING("Duplicate prefix in " << language_name << " word list: " << std::string(trimmed.data(), trimmed.size()));
149  else
150  throw std::runtime_error("Duplicate prefix in " + language_name + " word list: " + std::string(trimmed.data(), trimmed.size()));
151  }
153  }
154  }
155  public:
156  Base(const char *language_name, const char *english_language_name, const std::vector<std::string> &words, uint32_t prefix_length):
157  word_list(words),
158  unique_prefix_length(prefix_length),
161  {
162  }
163  virtual ~Base()
164  {
165  }
166  void set_words(const char * const words[])
167  {
168  word_list.resize(NWORDS);
169  for (size_t i = 0; i < NWORDS; ++i)
170  word_list[i] = words[i];
171  }
176  const std::vector<std::string>& get_word_list() const
177  {
178  return word_list;
179  }
184  const std::unordered_map<epee::wipeable_string, uint32_t, WordHash, WordEqual>& get_word_map() const
185  {
186  return word_map;
187  }
192  const std::unordered_map<epee::wipeable_string, uint32_t, WordHash, WordEqual>& get_trimmed_word_map() const
193  {
194  return trimmed_word_map;
195  }
201  {
202  return language_name;
203  }
209  {
210  return english_language_name;
211  }
217  {
218  return unique_prefix_length;
219  }
220  };
221 }
222 
223 #endif
const uint32_t T[512]
Definition: groestl_tables.h:36
Base(const char *language_name, const char *english_language_name, const std::vector< std::string > &words, uint32_t prefix_length)
Definition: language_base.h:156
const std::unordered_map< epee::wipeable_string, uint32_t, WordHash, WordEqual > & get_trimmed_word_map() const
Returns a pointer to the trimmed word map.
Definition: language_base.h:192
size_t size() const noexcept
Definition: wipeable_string.h:63
int * count
Definition: gmock_stress_test.cc:176
std::string language_name
Definition: language_base.h:114
int i
Definition: pymoduletest.py:23
::std::string string
Definition: gtest-port.h:1097
std::vector< std::string > word_list
Definition: language_base.h:111
A base language class which all languages have to inherit from for Polymorphism.
Definition: language_base.h:101
const char * s
Definition: minissdp.c:596
void set_words(const char *const words[])
Definition: language_base.h:166
static epee::wipeable_string trimmed(const char *s)
Definition: wipeable_string.cpp:143
const std::unordered_map< epee::wipeable_string, uint32_t, WordHash, WordEqual > & get_word_map() const
Returns a pointer to the word map.
Definition: language_base.h:184
Definition: language_base.h:105
static int flags
Definition: mdb_load.c:31
Definition: language_base.h:86
uint32_t get_unique_prefix_length() const
Returns the number of unique starting characters to be used for matching.
Definition: language_base.h:216
Mnemonic language related namespace.
unsigned int uint32_t
Definition: stdint.h:126
std::unordered_map< epee::wipeable_string, uint32_t, WordHash, WordEqual > trimmed_word_map
Definition: language_base.h:113
T utf8prefix(const T &s, size_t count)
Returns a string made of (at most) the first count characters in s. Assumes well formedness. No check is made for this.
Definition: language_base.h:60
std::string english_language_name
Definition: language_base.h:115
const std::vector< std::string > & get_word_list() const
Returns a pointer to the word list.
Definition: language_base.h:176
uint64_t FNV1a(const char *ptr, size_t sz)
Definition: fnv1.h:37
virtual ~Base()
Definition: language_base.h:163
Definition: language_base.h:109
const std::string & get_english_language_name() const
Returns the name of the language in English.
Definition: language_base.h:208
T utf8canonical(const T &s, Transform t=[](wint_t c) ->wint_t { return c;})
Definition: utf8.h:39
Definition: language_base.h:77
std::unordered_map< epee::wipeable_string, uint32_t, WordHash, WordEqual > word_map
Definition: language_base.h:112
void populate_maps(uint32_t flags=0)
Populates the word maps after the list is ready.
Definition: language_base.h:120
Definition: language_base.h:106
Definition: wipeable_string.h:40
prefix
Definition: check.py:15
const std::string & get_language_name() const
Returns the name of the language.
Definition: language_base.h:200
bool operator()(const epee::wipeable_string &s0, const epee::wipeable_string &s1) const
Definition: language_base.h:88
const char * data() const noexcept
Definition: wipeable_string.h:61
uint32_t unique_prefix_length
Definition: language_base.h:116
c
Definition: pymoduletest.py:79
std::size_t operator()(const epee::wipeable_string &s) const
Definition: language_base.h:79