libosmscout 1.1.1
Loading...
Searching...
No Matches
utf8helper_charmap.h
Go to the documentation of this file.
1/*
2 This source is part of the libosmscout library
3 Copyright (C) 2021 Jean-Luc Barriere
4
5 This library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 This library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with this library; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18*/
19
20#ifndef UTF8HELPER_CHARMAP_H
21#define UTF8HELPER_CHARMAP_H
22
23#include <cstdint>
24
25namespace utf8helper
26{
27
28constexpr int None = 0x00; // no category
29constexpr int IsSpace = 0x01; // charcater is space (breaking or non-breaking)
30constexpr int IsBreaker = 0x02; // character is breaker
31constexpr int IsControl = 0x04; // character is control sequence
32constexpr int IsModifier = 0x08; // character is modifier
33constexpr int IsDiacritic = 0x10; // character is diacritic
34constexpr int IsPunctuation = 0x20; // character is punctuation
35
36using byte = uint8_t;
37using codepoint = uint32_t; // UTF8 codepoint: unsigned 32 bits
38
39constexpr codepoint NullCodepoint = 0; // the null codepoint (no character)
40
41struct character {
42 const codepoint code; // the codepoint
43 const codepoint upper; // codepoint for the upper case, else the codepoint
44 const codepoint lower; // codepoint for the lower case, else the codepoint
45 const int category; // 32 bits flags to match by category
46 const char* translate; // translated UTF8 string until 4 bytes max
47};
48
49/* character map 1 byte US7 ASCII */
50extern const character charmap_us7ascii[];
51
52/* character map 2 bytes C0-DF */
53extern const character* pagemap_16[32];
54extern const character charmap_c2[];
55extern const character charmap_c3[]; // latin-1
56extern const character charmap_c4[]; // latin-1
57extern const character charmap_c5[];
58extern const character charmap_c6[];
59extern const character charmap_c7[];
60extern const character charmap_c8[];
61extern const character charmap_c9[];
62extern const character charmap_ca[];
63extern const character charmap_cb[];
64extern const character charmap_cc[];
65extern const character charmap_cd[];
66extern const character charmap_ce[];
67extern const character charmap_cf[];
68extern const character charmap_d0[];
69extern const character charmap_d1[];
70extern const character charmap_d2[];
71extern const character charmap_d3[];
72extern const character charmap_d4[];
73extern const character charmap_d5[];
74extern const character charmap_d6[];
75
76/* character map 3 bytes E1 */
77extern const character* pagemap_24_e1[];
78extern const character charmap_e1_82[];
79extern const character charmap_e1_83[];
80extern const character charmap_e1_b8[];
81extern const character charmap_e1_b9[];
82extern const character charmap_e1_ba[];
83extern const character charmap_e1_bb[];
84extern const character charmap_e1_bc[];
85extern const character charmap_e1_bd[];
86extern const character charmap_e1_be[];
87extern const character charmap_e1_bf[];
88
89/* character map 3 bytes E2 */
90extern const character* pagemap_24_e2[];
91extern const character charmap_e2_80[];
92extern const character charmap_e2_81[];
93extern const character charmap_e2_82[];
94extern const character charmap_e2_b4[];
95
96/* character map 4 bytes F090 */
97extern const character* pagemap_32_f0_90[];
98extern const character charmap_f0_90_92[];
99extern const character charmap_f0_90_93[];
100
101/* character map 4 bytes F09E */
102extern const character* pagemap_32_f0_9e[];
103extern const character charmap_f0_9e_a4[];
104
105}
106
107#endif // UTF8HELPER_CHARMAP_H
Definition utf8helper.h:29
const character charmap_cb[]
const character charmap_c2[]
const character charmap_c4[]
const character charmap_e2_82[]
const character * pagemap_32_f0_9e[]
const character charmap_f0_9e_a4[]
const character charmap_d2[]
uint8_t byte
Definition utf8helper_charmap.h:36
constexpr int IsBreaker
Definition utf8helper_charmap.h:30
constexpr int IsModifier
Definition utf8helper_charmap.h:32
const character charmap_c5[]
const character charmap_ca[]
const character charmap_e1_bf[]
const character charmap_e2_80[]
const character charmap_cf[]
const character charmap_d4[]
const character * pagemap_16[32]
const character charmap_us7ascii[]
const character charmap_e1_b9[]
constexpr codepoint NullCodepoint
Definition utf8helper_charmap.h:39
constexpr int IsPunctuation
Definition utf8helper_charmap.h:34
const character charmap_cd[]
const character charmap_c9[]
const character charmap_cc[]
const character charmap_e1_bc[]
constexpr int IsDiacritic
Definition utf8helper_charmap.h:33
const character charmap_e1_b8[]
const character charmap_e1_ba[]
const character charmap_d6[]
const character * pagemap_24_e1[]
const character charmap_e2_b4[]
const character * pagemap_24_e2[]
const character charmap_f0_90_92[]
const character charmap_d1[]
const character charmap_e1_be[]
const character charmap_c6[]
const character charmap_e1_83[]
const character charmap_ce[]
constexpr int None
Definition utf8helper_charmap.h:28
const character charmap_e1_bd[]
uint32_t codepoint
Definition utf8helper_charmap.h:37
const character * pagemap_32_f0_90[]
const character charmap_f0_90_93[]
constexpr int IsControl
Definition utf8helper_charmap.h:31
const character charmap_c3[]
constexpr int IsSpace
Definition utf8helper_charmap.h:29
const character charmap_e1_bb[]
const character charmap_e2_81[]
const character charmap_d0[]
const character charmap_e1_82[]
const character charmap_d3[]
const character charmap_d5[]
const character charmap_c8[]
const character charmap_c7[]
Definition utf8helper_charmap.h:41
const codepoint upper
Definition utf8helper_charmap.h:43
const codepoint lower
Definition utf8helper_charmap.h:44
const codepoint code
Definition utf8helper_charmap.h:42
const int category
Definition utf8helper_charmap.h:45
const char * translate
Definition utf8helper_charmap.h:46