15#ifndef RAPIDJSON_INTERNAL_REGEX_H_
16#define RAPIDJSON_INTERNAL_REGEX_H_
24RAPIDJSON_DIAG_OFF(padded)
25RAPIDJSON_DIAG_OFF(
switch-
enum)
26RAPIDJSON_DIAG_OFF(implicit-fallthrough)
27#elif defined(_MSC_VER)
29RAPIDJSON_DIAG_OFF(4512)
34RAPIDJSON_DIAG_OFF(effc++)
36RAPIDJSON_DIAG_OFF(implicit-fallthrough)
40#ifndef RAPIDJSON_REGEX_VERBOSE
41#define RAPIDJSON_REGEX_VERBOSE 0
50template <
typename SourceStream,
typename Encoding>
78template <
typename Encoding,
typename Allocator>
113template <
typename Encoding,
typename Allocator = CrtAllocator>
117 typedef typename Encoding::Ch
Ch;
171 return states_.template Bottom<State>()[index];
176 return states_.template Bottom<State>()[index];
181 return ranges_.template Bottom<Range>()[index];
186 return ranges_.template Bottom<Range>()[index];
189 template <
typename InputStream>
196 *atomCountStack.template Push<unsigned>() = 0;
199 while (ds.
Peek() != 0) {
200 switch (codepoint = ds.
Take()) {
210 while (!operatorStack.
Empty() && *operatorStack.template Top<Operator>() <
kAlternation)
211 if (!
Eval(operandStack, *operatorStack.template Pop<Operator>(1)))
213 *operatorStack.template Push<Operator>() =
kAlternation;
214 *atomCountStack.template Top<unsigned>() = 0;
219 *atomCountStack.template Push<unsigned>() = 0;
224 if (!
Eval(operandStack, *operatorStack.template Pop<Operator>(1)))
226 if (operatorStack.
Empty())
228 operatorStack.template Pop<Operator>(1);
229 atomCountStack.template Pop<unsigned>(1);
254 if (ds.
Peek() ==
',') {
256 if (ds.
Peek() ==
'}')
282 *operandStack.template Push<Frag>() = Frag(
s,
s,
s);
298 while (!operatorStack.
Empty())
299 if (!
Eval(operandStack, *operatorStack.template Pop<Operator>(1)))
303 if (operandStack.
GetSize() ==
sizeof(Frag)) {
304 Frag* e = operandStack.template Pop<Frag>(1);
308#if RAPIDJSON_REGEX_VERBOSE
309 printf(
"root: %d\n",
root_);
312 printf(
"[%2d] out: %2d out1: %2d c: '%c'\n", i,
s.out,
s.out1, (
char)
s.codepoint);
320 State*
s =
states_.template Push<State>();
323 s->codepoint = codepoint;
330 *operandStack.template Push<Frag>() = Frag(
s,
s,
s);
334 if (*atomCountStack.template Top<unsigned>())
336 (*atomCountStack.template Top<unsigned>())++;
359 Frag e2 = *operandStack.template Pop<Frag>(1);
360 Frag e1 = *operandStack.template Pop<Frag>(1);
361 Patch(e1.out, e2.start);
362 *operandStack.template Push<Frag>() = Frag(e1.start, e2.out,
Min(e1.minIndex, e2.minIndex));
367 if (operandStack.
GetSize() >=
sizeof(Frag) * 2) {
368 Frag e2 = *operandStack.template Pop<Frag>(1);
369 Frag e1 = *operandStack.template Pop<Frag>(1);
371 *operandStack.template Push<Frag>() = Frag(
s,
Append(e1.out, e2.out),
Min(e1.minIndex, e2.minIndex));
377 if (operandStack.
GetSize() >=
sizeof(Frag)) {
378 Frag e = *operandStack.template Pop<Frag>(1);
380 *operandStack.template Push<Frag>() = Frag(
s,
Append(e.out,
s), e.minIndex);
386 if (operandStack.
GetSize() >=
sizeof(Frag)) {
387 Frag e = *operandStack.template Pop<Frag>(1);
390 *operandStack.template Push<Frag>() = Frag(
s,
s, e.minIndex);
397 if (operandStack.
GetSize() >=
sizeof(Frag)) {
398 Frag e = *operandStack.template Pop<Frag>(1);
401 *operandStack.template Push<Frag>() = Frag(e.start,
s, e.minIndex);
419 for (
unsigned i = 0; i < m - 1; i++)
421 for (
unsigned i = 0; i < m - 1; i++)
427 for (
unsigned i = 0; i < n - 1; i++)
435 for (
unsigned i = n; i < m - 1; i++)
437 for (
unsigned i = n; i < m; i++)
441 for (
unsigned i = 0; i < n - 1; i++)
450 const Frag src = *operandStack.template Top<Frag>();
452 State*
s =
states_.template Push<State>(count);
454 for (
SizeType j = 0; j < count; j++) {
460 *operandStack.template Push<Frag>() = Frag(src.start + count, src.out + count, src.minIndex + count);
464 template <
typename InputStream>
467 if (ds.
Peek() <
'0' || ds.
Peek() >
'9')
469 while (ds.
Peek() >=
'0' && ds.
Peek() <=
'9') {
470 if (r >= 429496729 && ds.
Peek() >
'5')
472 r = r * 10 + (ds.
Take() -
'0');
478 template <
typename InputStream>
486 while ((codepoint = ds.
Take()) != 0) {
489 if (codepoint ==
'^') {
510 if (ds.
Peek() ==
'b') {
521 if (codepoint ==
'-') {
550 Range* r =
ranges_.template Push<Range>();
551 r->start = r->end = codepoint;
556 template <
typename InputStream>
559 switch (codepoint = ds.
Take()) {
574 *escapedCodepoint = codepoint;
return true;
575 case 'f': *escapedCodepoint = 0x000C;
return true;
576 case 'n': *escapedCodepoint = 0x000A;
return true;
577 case 'r': *escapedCodepoint = 0x000D;
return true;
578 case 't': *escapedCodepoint = 0x0009;
return true;
579 case 'v': *escapedCodepoint = 0x000B;
return true;
598template <
typename RegexType,
typename Allocator = CrtAllocator>
602 typedef typename Encoding::Ch
Ch;
621 template <
typename InputStream>
631 template <
typename InputStream>
642 typedef typename RegexType::State
State;
643 typedef typename RegexType::Range
Range;
645 template <
typename InputStream>
656 while (!current->
Empty() && (codepoint = ds.
Take()) != 0) {
660 for (
const SizeType*
s = current->template Bottom<SizeType>();
s != current->template End<SizeType>(); ++
s) {
662 if (sr.codepoint == codepoint ||
663 sr.codepoint == RegexType::kAnyCharacterClass ||
664 (sr.codepoint == RegexType::kRangeCharacterClass &&
MatchRange(sr.rangeStart, codepoint)))
666 matched =
AddState(*next, sr.out) || matched;
667 if (!anchorEnd && matched)
680 return (
regex_.stateCount_ + 31) / 32 * 4;
692 else if (!(
stateSet_[index >> 5] & (1u << (index & 31)))) {
693 stateSet_[index >> 5] |= (1u << (index & 31));
694 *l.template PushUnsafe<SizeType>() = index;
700 bool yes = (
regex_.GetRange(rangeIndex).
start & RegexType::kRangeNegationFlag) == 0;
703 if (codepoint >= (r.start & ~RegexType::kRangeNegationFlag) && codepoint <= r.end)
728#if defined(__clang__) || defined(_MSC_VER)
#define s(x, c)
Definition aesb.c:47
cryptonote::block b
Definition block.cpp:40
DecodedStream(SourceStream &ss)
Definition regex.h:53
unsigned Take()
Definition regex.h:55
unsigned codepoint_
Definition regex.h:69
unsigned Peek()
Definition regex.h:54
SourceStream & ss_
Definition regex.h:68
void Decode()
Definition regex.h:63
Stack< CrtAllocator > state1_
Definition regex.h:714
RegexType::State State
Definition regex.h:642
size_t GetStateSetSize() const
Definition regex.h:679
bool MatchRange(SizeType rangeIndex, unsigned codepoint) const
Definition regex.h:699
uint32_t * stateSet_
Definition regex.h:715
Stack< CrtAllocator > state0_
Definition regex.h:713
GenericRegexSearch(const RegexType ®ex, Allocator *allocator=0)
Definition regex.h:604
bool Search(InputStream &is)
Definition regex.h:632
RegexType::EncodingType Encoding
Definition regex.h:601
CrtAllocator * ownAllocator_
Definition regex.h:712
bool SearchWithAnchoring(InputStream &is, bool anchorBegin, bool anchorEnd)
Definition regex.h:646
Encoding::Ch Ch
Definition regex.h:602
bool Search(const Ch *s)
Definition regex.h:636
bool Match(const Ch *s)
Definition regex.h:626
bool AddState(Stack< Allocator > &l, SizeType index)
Definition regex.h:684
CrtAllocator * allocator_
Definition regex.h:711
RegexType::Range Range
Definition regex.h:643
const Regex & regex_
Definition regex.h:710
~GenericRegexSearch()
Definition regex.h:616
bool Match(InputStream &is)
Definition regex.h:622
Regular expression engine with subset of ECMAscript grammar.
Definition regex.h:114
void Parse(DecodedStream< InputStream, Encoding > &ds)
Definition regex.h:190
static SizeType Min(SizeType a, SizeType b)
Definition regex.h:447
bool Eval(Stack< Allocator > &operandStack, Operator op)
Definition regex.h:354
static const unsigned kInfinityQuantifier
Definition regex.h:591
bool ParseUnsigned(DecodedStream< InputStream, Encoding > &ds, unsigned *u)
Definition regex.h:465
void PushOperand(Stack< Allocator > &operandStack, unsigned codepoint)
Definition regex.h:328
static const unsigned kAnyCharacterClass
Definition regex.h:145
SizeType root_
Definition regex.h:587
bool IsValid() const
Definition regex.h:131
GenericRegex(const Ch *source, Allocator *allocator=0)
Definition regex.h:120
void ImplicitConcatenation(Stack< Allocator > &atomCountStack, Stack< Allocator > &operatorStack)
Definition regex.h:333
void CloneTopOperand(Stack< Allocator > &operandStack)
Definition regex.h:449
Encoding::Ch Ch
Definition regex.h:117
Stack< CrtAllocator > states_
Definition regex.h:585
~GenericRegex()
Definition regex.h:129
bool EvalQuantifier(Stack< Allocator > &operandStack, unsigned n, unsigned m)
Definition regex.h:408
const Range & GetRange(SizeType index) const
Definition regex.h:184
Range & GetRange(SizeType index)
Definition regex.h:179
SizeType NewRange(unsigned codepoint)
Definition regex.h:549
Encoding EncodingType
Definition regex.h:116
void Patch(SizeType l, SizeType s)
Definition regex.h:347
friend class GenericRegexSearch
Definition regex.h:118
SizeType Append(SizeType l1, SizeType l2)
Definition regex.h:339
bool ParseRange(DecodedStream< InputStream, Encoding > &ds, SizeType *range)
Definition regex.h:479
bool anchorBegin_
Definition regex.h:594
Operator
Definition regex.h:136
@ kOneOrMore
Definition regex.h:139
@ kAlternation
Definition regex.h:141
@ kConcatenation
Definition regex.h:140
@ kZeroOrMore
Definition regex.h:138
@ kZeroOrOne
Definition regex.h:137
@ kLeftParenthesis
Definition regex.h:142
SizeType NewState(SizeType out, SizeType out1, unsigned codepoint)
Definition regex.h:319
SizeType rangeCount_
Definition regex.h:589
const State & GetState(SizeType index) const
Definition regex.h:174
bool anchorEnd_
Definition regex.h:595
static const unsigned kRangeNegationFlag
Definition regex.h:147
bool CharacterEscape(DecodedStream< InputStream, Encoding > &ds, unsigned *escapedCodepoint)
Definition regex.h:557
State & GetState(SizeType index)
Definition regex.h:169
static const unsigned kRangeCharacterClass
Definition regex.h:146
SizeType stateCount_
Definition regex.h:588
Stack< CrtAllocator > ranges_
Definition regex.h:586
A type-unsafe stack for storing different types of data.
Definition stack.h:36
bool Empty() const
Definition stack.h:176
size_t GetSize() const
Definition stack.h:177
Concept for allocating, resizing and freeing memory block.
Concept for encoding of Unicode characters.
void * memcpy(void *a, const void *b, size_t c)
Definition glibc_compat.cpp:16
#define RAPIDJSON_ASSERT(x)
Assertion.
Definition rapidjson.h:411
#define RAPIDJSON_NAMESPACE_BEGIN
provide custom rapidjson namespace (opening expression)
Definition rapidjson.h:121
#define RAPIDJSON_NAMESPACE_END
provide custom rapidjson namespace (closing expression)
Definition rapidjson.h:124
Definition document.h:406
GenericRegexSearch< Regex > RegexSearch
Definition regex.h:719
void Swap(T &a, T &b) RAPIDJSON_NOEXCEPT
Custom swap() to avoid dependency on C++ <algorithm> header.
Definition swap.h:33
static const SizeType kRegexInvalidState
Represents an invalid index in GenericRegex::State::out, out1.
Definition regex.h:75
GenericRegex< UTF8<> > Regex
Definition regex.h:718
static const SizeType kRegexInvalidRange
Definition regex.h:76
const CharType(& source)[N]
Definition pointer.h:1147
const GenericPointer< typename T::ValueType > T2 T::AllocatorType & a
Definition pointer.h:1124
#define RAPIDJSON_DELETE(x)
! customization point for global delete
Definition rapidjson.h:610
RAPIDJSON_NAMESPACE_BEGIN typedef unsigned SizeType
Size type (for string lengths, array sizes, etc.).
Definition rapidjson.h:389
#define RAPIDJSON_NEW(TypeName)
! customization point for global new
Definition rapidjson.h:606
unsigned int uint32_t
Definition stdint.h:126
Read-only string stream.
Definition stream.h:154
Frag(SizeType s, SizeType o, SizeType m)
Definition regex.h:163
SizeType out
link-list of all output states
Definition regex.h:165
SizeType minIndex
Definition regex.h:166
SizeType start
Definition regex.h:164
unsigned end
Definition regex.h:151
unsigned start
Definition regex.h:150
SizeType next
Definition regex.h:152
SizeType out1
Equals to non-kInvalid for split.
Definition regex.h:157
SizeType out
Equals to kInvalid for matching state.
Definition regex.h:156
SizeType rangeStart
Definition regex.h:158
unsigned codepoint
Definition regex.h:159