Electroneum
Loading...
Searching...
No Matches
Language Namespace Reference

Mnemonic language related namespace. More...

Classes

class  Chinese_Simplified
class  Dutch
class  English
class  EnglishOld
class  Esperanto
class  French
class  German
class  Italian
class  Japanese
struct  WordHash
struct  WordEqual
class  Base
 A base language class which all languages have to inherit from for Polymorphism. More...
class  Lojban
class  Portuguese
class  Russian
class  Singleton
 Single helper class. More...
class  Spanish

Functions

template<typename T>
T utf8prefix (const T &s, size_t count)
 Returns a string made of (at most) the first count characters in s. Assumes well formedness. No check is made for this.
template<typename T>
T utf8canonical (const T &s)

Detailed Description

Mnemonic language related namespace.

Function Documentation

◆ utf8canonical()

template<typename T>
T Language::utf8canonical ( const T & s)
inline

Definition at line 78 of file language_base.h.

79 {
80 T sc = "";
81 size_t avail = s.size();
82 const char *ptr = s.data();
83 wint_t cp = 0;
84 int bytes = 1;
85 char wbuf[8], *wptr;
86 while (avail--)
87 {
88 if ((*ptr & 0x80) == 0)
89 {
90 cp = *ptr++;
91 bytes = 1;
92 }
93 else if ((*ptr & 0xe0) == 0xc0)
94 {
95 if (avail < 1)
96 throw std::runtime_error("Invalid UTF-8");
97 cp = (*ptr++ & 0x1f) << 6;
98 cp |= *ptr++ & 0x3f;
99 --avail;
100 bytes = 2;
101 }
102 else if ((*ptr & 0xf0) == 0xe0)
103 {
104 if (avail < 2)
105 throw std::runtime_error("Invalid UTF-8");
106 cp = (*ptr++ & 0xf) << 12;
107 cp |= (*ptr++ & 0x3f) << 6;
108 cp |= *ptr++ & 0x3f;
109 avail -= 2;
110 bytes = 3;
111 }
112 else if ((*ptr & 0xf8) == 0xf0)
113 {
114 if (avail < 3)
115 throw std::runtime_error("Invalid UTF-8");
116 cp = (*ptr++ & 0x7) << 18;
117 cp |= (*ptr++ & 0x3f) << 12;
118 cp |= (*ptr++ & 0x3f) << 6;
119 cp |= *ptr++ & 0x3f;
120 avail -= 3;
121 bytes = 4;
122 }
123 else
124 throw std::runtime_error("Invalid UTF-8");
125
126 cp = std::towlower(cp);
127 wptr = wbuf;
128 switch (bytes)
129 {
130 case 1: *wptr++ = cp; break;
131 case 2: *wptr++ = 0xc0 | (cp >> 6); *wptr++ = 0x80 | (cp & 0x3f); break;
132 case 3: *wptr++ = 0xe0 | (cp >> 12); *wptr++ = 0x80 | ((cp >> 6) & 0x3f); *wptr++ = 0x80 | (cp & 0x3f); break;
133 case 4: *wptr++ = 0xf0 | (cp >> 18); *wptr++ = 0x80 | ((cp >> 12) & 0x3f); *wptr++ = 0x80 | ((cp >> 6) & 0x3f); *wptr++ = 0x80 | (cp & 0x3f); break;
134 default: throw std::runtime_error("Invalid UTF-8");
135 }
136 *wptr = 0;
137 sc += T(wbuf, bytes);
138 cp = 0;
139 bytes = 1;
140 }
141 return sc;
142 }
#define T(x)
Here is the caller graph for this function:

◆ utf8prefix()

template<typename T>
T Language::utf8prefix ( const T & s,
size_t count )
inline

Returns a string made of (at most) the first count characters in s. Assumes well formedness. No check is made for this.

Parameters
sThe string from which to return the first count characters.
countHow many characters to return.
Returns
A string consisting of the first count characters in s.

Definition at line 60 of file language_base.h.

61 {
62 T prefix = "";
63 size_t avail = s.size();
64 const char *ptr = s.data();
65 while (count-- && avail--)
66 {
67 prefix += *ptr++;
68 while (avail && ((*ptr) & 0xc0) == 0x80)
69 {
70 prefix += *ptr++;
71 --avail;
72 }
73 }
74 return prefix;
75 }
Here is the caller graph for this function: