libosmscout  1.1.1
utf8helper.h
Go to the documentation of this file.
1 /*
2  This source is part of the libosmscout library
3  Copyright (C) 2021 Jean-Luc Barriere
4 
5  This library is free software; you can redistribute it and/or
6  modify it under the terms of the GNU Lesser General Public
7  License as published by the Free Software Foundation; either
8  version 2.1 of the License, or (at your option) any later version.
9 
10  This library is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  Lesser General Public License for more details.
14 
15  You should have received a copy of the GNU Lesser General Public
16  License along with this library; if not, write to the Free Software
17  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19 
20 #ifndef UTF8HELPER_H
21 #define UTF8HELPER_H
22 
24 
25 #include <string>
26 #include <vector>
27 
28 namespace utf8helper
29 {
30 
31 extern std::string UTF8ToUpper(const std::string& text);
32 
33 extern std::string UTF8ToLower(const std::string& text);
34 
35 extern std::string UTF8Normalize(const std::string& text);
36 
37 extern std::string UTF8Capitalize(const std::string& text);
38 
39 extern std::string UTF8Transliterate(const std::string& text);
40 
49 using Transform = codepoint (*)(const character*, int context);
50 
51 extern codepoint TransformNop(const character*, int);
52 extern codepoint TransformUpper(const character*, int);
53 extern codepoint TransformLower(const character*, int);
54 extern codepoint TransformCapitalize(const character*, int);
55 extern codepoint TransformNormalize(const character*, int);
56 extern codepoint TransformTransliterate(const character*, int);
57 
69 struct Parser {
70  enum Exit { Done = 0, Continue, Error };
73  int context;
74  byte b[3];
75  char u_size;
78  Parser(const Parser&) = default;
79  void Reset();
80 };
81 
82 
83 class UTF8String {
84  using storage_type = std::vector<codepoint>;
85 public:
89  UTF8String();
90 
94  explicit UTF8String(const std::string& text);
95 
100 
104  UTF8String(const std::string& text, utf8helper::Transform);
105 
106  UTF8String(const UTF8String&) = default;
107 
112  bool Empty() const { return store.empty(); }
113 
118  size_t Size() const { return store.size(); }
119 
124  size_t RawSize() const { return rawSize; }
125 
130  void Reserve(size_t sz) { store.reserve(sz); }
131 
135  void Clear();
136 
143  void WriteByte(char cc);
144 
151  UTF8String& operator<<(char cc) {
152  WriteByte(cc);
153  return *this;
154  }
155 
161  codepoint operator[](size_t pos) const { return store[pos]; }
162 
167  const storage_type& Data() const { return store; }
168 
175  bool Remove(size_t pos, size_t n = 1);
176 
186 
194  size_t Insert(size_t pos, const storage_type& data, utf8helper::Transform func = utf8helper::TransformNop);
195 
203 
210  UTF8String& Append(const storage_type& data, utf8helper::Transform func = utf8helper::TransformNop);
211 
218 
225  UTF8String& Transform(const std::string& text, utf8helper::Transform);
226 
231  std::string ToStdString() const;
232 
240  std::string Substr(size_t pos, size_t n = (-1)) const;
241 
242 private:
243  Parser parser;
244  storage_type store;
245  size_t rawSize;
246 };
247 
248 }
249 
250 #endif // UTF8HELPER_H
byte b[3]
Definition: utf8helper.h:74
std::string UTF8ToUpper(const std::string &text)
std::string Substr(size_t pos, size_t n=(-1)) const
Returns a std::string object with the substring of n characters of this string, starting at the posit...
Exit
Definition: utf8helper.h:70
std::string UTF8ToLower(const std::string &text)
bool Remove(size_t pos, size_t n=1)
Remove n character from the position pos.
UTF8String()
Contructs empty UTF8String object with transformation nop.
codepoint u
Definition: utf8helper.h:76
codepoint TransformTransliterate(const character *, int)
uint32_t codepoint
Definition: utf8helper_charmap.h:37
int context
Definition: utf8helper.h:73
bool Empty() const
Definition: utf8helper.h:112
Definition: utf8helper.h:83
Definition: utf8helper.h:70
Definition: utf8helper.h:70
codepoint TransformCapitalize(const character *, int)
std::string UTF8Normalize(const std::string &text)
std::string UTF8Transliterate(const std::string &text)
Parse and transform an UTF8 string.
Definition: utf8helper.h:69
Parser(utf8helper::Transform func)
size_t RawSize() const
Returns the number of bytes of this string.
Definition: utf8helper.h:124
UTF8String & operator<<(char cc)
Definition: utf8helper.h:151
Exit(* run)(Parser *, byte)
Definition: utf8helper.h:72
utf8helper::Transform func
Definition: utf8helper.h:71
codepoint operator[](size_t pos) const
Returns the character at the position pos.
Definition: utf8helper.h:161
void Reserve(size_t sz)
Attempts to allocate memory for at least sz characters.
Definition: utf8helper.h:130
codepoint TransformNormalize(const character *, int)
codepoint TransformNop(const character *, int)
UTF8String & Transform(utf8helper::Transform)
Apply transformation to the string.
Definition: utf8helper.h:28
Definition: utf8helper.h:70
Definition: utf8helper_charmap.h:41
std::string UTF8Capitalize(const std::string &text)
uint8_t byte
Definition: utf8helper_charmap.h:36
bool Insert(size_t pos, codepoint u, utf8helper::Transform func=utf8helper::TransformNop)
Insert the character u at the position pos.
void WriteByte(char cc)
Push one more byte into this string. A character is added to this string on each valid sequence...
size_t Size() const
Return the number of character of this string.
Definition: utf8helper.h:118
codepoint TransformLower(const character *, int)
void Clear()
Clears the contents of the string and makes it null.
codepoint(*)(const character *, int context) Transform
functor implements desired transformation of the character It has 2 arguments:
Definition: utf8helper.h:49
const storage_type & Data() const
Returns the characters contained in this string.
Definition: utf8helper.h:167
UTF8String & Append(codepoint u, utf8helper::Transform func=utf8helper::TransformNop)
Add character to the string.
std::string ToStdString() const
Returns a std::string object with the data contained in this string.
char u_size
Definition: utf8helper.h:75
codepoint TransformUpper(const character *, int)