libosmscout  1.1.1
utf8helper_charmap.h
Go to the documentation of this file.
1 /*
2  This source is part of the libosmscout library
3  Copyright (C) 2021 Jean-Luc Barriere
4 
5  This library is free software; you can redistribute it and/or
6  modify it under the terms of the GNU Lesser General Public
7  License as published by the Free Software Foundation; either
8  version 2.1 of the License, or (at your option) any later version.
9 
10  This library is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  Lesser General Public License for more details.
14 
15  You should have received a copy of the GNU Lesser General Public
16  License along with this library; if not, write to the Free Software
17  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19 
20 #ifndef UTF8HELPER_CHARMAP_H
21 #define UTF8HELPER_CHARMAP_H
22 
23 #include <cstdint>
24 
25 namespace utf8helper
26 {
27 
28 constexpr int None = 0x00; // no category
29 constexpr int IsSpace = 0x01; // charcater is space (breaking or non-breaking)
30 constexpr int IsBreaker = 0x02; // character is breaker
31 constexpr int IsControl = 0x04; // character is control sequence
32 constexpr int IsModifier = 0x08; // character is modifier
33 constexpr int IsDiacritic = 0x10; // character is diacritic
34 constexpr int IsPunctuation = 0x20; // character is punctuation
35 
36 using byte = uint8_t;
37 using codepoint = uint32_t; // UTF8 codepoint: unsigned 32 bits
38 
39 constexpr codepoint NullCodepoint = 0; // the null codepoint (no character)
40 
41 struct character {
42  const codepoint code; // the codepoint
43  const codepoint upper; // codepoint for the upper case, else the codepoint
44  const codepoint lower; // codepoint for the lower case, else the codepoint
45  const int category; // 32 bits flags to match by category
46  const char* translate; // translated UTF8 string until 4 bytes max
47 };
48 
49 /* character map 1 byte US7 ASCII */
50 extern const character charmap_us7ascii[];
51 
52 /* character map 2 bytes C0-DF */
53 extern const character* pagemap_16[32];
54 extern const character charmap_c2[];
55 extern const character charmap_c3[]; // latin-1
56 extern const character charmap_c4[]; // latin-1
57 extern const character charmap_c5[];
58 extern const character charmap_c6[];
59 extern const character charmap_c7[];
60 extern const character charmap_c8[];
61 extern const character charmap_c9[];
62 extern const character charmap_ca[];
63 extern const character charmap_cb[];
64 extern const character charmap_cc[];
65 extern const character charmap_cd[];
66 extern const character charmap_ce[];
67 extern const character charmap_cf[];
68 extern const character charmap_d0[];
69 extern const character charmap_d1[];
70 extern const character charmap_d2[];
71 extern const character charmap_d3[];
72 extern const character charmap_d4[];
73 extern const character charmap_d5[];
74 extern const character charmap_d6[];
75 
76 /* character map 3 bytes E1 */
77 extern const character* pagemap_24_e1[];
78 extern const character charmap_e1_82[];
79 extern const character charmap_e1_83[];
80 extern const character charmap_e1_b8[];
81 extern const character charmap_e1_b9[];
82 extern const character charmap_e1_ba[];
83 extern const character charmap_e1_bb[];
84 extern const character charmap_e1_bc[];
85 extern const character charmap_e1_bd[];
86 extern const character charmap_e1_be[];
87 extern const character charmap_e1_bf[];
88 
89 /* character map 3 bytes E2 */
90 extern const character* pagemap_24_e2[];
91 extern const character charmap_e2_80[];
92 extern const character charmap_e2_81[];
93 extern const character charmap_e2_82[];
94 extern const character charmap_e2_b4[];
95 
96 /* character map 4 bytes F090 */
97 extern const character* pagemap_32_f0_90[];
98 extern const character charmap_f0_90_92[];
99 extern const character charmap_f0_90_93[];
100 
101 /* character map 4 bytes F09E */
102 extern const character* pagemap_32_f0_9e[];
103 extern const character charmap_f0_9e_a4[];
104 
105 }
106 
107 #endif // UTF8HELPER_CHARMAP_H
constexpr int IsPunctuation
Definition: utf8helper_charmap.h:34
const character * pagemap_16[32]
const character charmap_e1_bd[]
const character charmap_ce[]
const character charmap_e2_81[]
const character charmap_e2_b4[]
const character * pagemap_24_e1[]
const character charmap_c6[]
constexpr int IsModifier
Definition: utf8helper_charmap.h:32
const character charmap_f0_9e_a4[]
uint32_t codepoint
Definition: utf8helper_charmap.h:37
const character charmap_c8[]
constexpr int IsControl
Definition: utf8helper_charmap.h:31
constexpr int IsDiacritic
Definition: utf8helper_charmap.h:33
const codepoint code
Definition: utf8helper_charmap.h:42
const character charmap_cb[]
const character * pagemap_24_e2[]
const character charmap_d3[]
constexpr int None
Definition: utf8helper_charmap.h:28
constexpr int IsBreaker
Definition: utf8helper_charmap.h:30
const char * translate
Definition: utf8helper_charmap.h:46
const character charmap_e1_bf[]
const character charmap_cc[]
const character charmap_c5[]
const character charmap_e1_b9[]
const character charmap_e2_82[]
const character charmap_d6[]
const character charmap_e1_ba[]
constexpr codepoint NullCodepoint
Definition: utf8helper_charmap.h:39
constexpr int IsSpace
Definition: utf8helper_charmap.h:29
const codepoint upper
Definition: utf8helper_charmap.h:43
const character charmap_e1_82[]
const character charmap_e2_80[]
const character charmap_c4[]
const character charmap_e1_bc[]
const character charmap_d1[]
const character charmap_f0_90_93[]
const character charmap_d4[]
Definition: utf8helper.h:28
const character charmap_c2[]
const character charmap_cf[]
Definition: utf8helper_charmap.h:41
const character charmap_e1_b8[]
const int category
Definition: utf8helper_charmap.h:45
const character charmap_d2[]
const codepoint lower
Definition: utf8helper_charmap.h:44
uint8_t byte
Definition: utf8helper_charmap.h:36
const character charmap_e1_bb[]
const character charmap_e1_be[]
const character * pagemap_32_f0_9e[]
const character charmap_d5[]
const character charmap_c9[]
const character charmap_ca[]
const character charmap_c3[]
const character charmap_c7[]
const character charmap_us7ascii[]
const character charmap_e1_83[]
const character charmap_cd[]
const character charmap_f0_90_92[]
const character charmap_d0[]
const character * pagemap_32_f0_90[]