15 #ifndef RAPIDJSON_INTERNAL_REGEX_H_ 16 #define RAPIDJSON_INTERNAL_REGEX_H_ 18 #include "../allocators.h" 19 #include "../stream.h" 24 RAPIDJSON_DIAG_OFF(padded)
25 RAPIDJSON_DIAG_OFF(
switch-
enum)
26 RAPIDJSON_DIAG_OFF(implicit-fallthrough)
27 #elif defined(_MSC_VER) 29 RAPIDJSON_DIAG_OFF(4512)
34 RAPIDJSON_DIAG_OFF(effc++)
36 RAPIDJSON_DIAG_OFF(implicit-fallthrough)
40 #ifndef RAPIDJSON_REGEX_VERBOSE 41 #define RAPIDJSON_REGEX_VERBOSE 0 50 template <
typename SourceStream,
typename Encoding>
78 template <
typename Encoding,
typename Allocator>
113 template <
typename Encoding,
typename Allocator = CrtAllocator>
171 return states_.template Bottom<State>()[index];
176 return states_.template Bottom<State>()[index];
181 return ranges_.template Bottom<Range>()[index];
186 return ranges_.template Bottom<Range>()[index];
189 template <
typename InputStream>
196 *atomCountStack.template Push<unsigned>() = 0;
199 while (ds.
Peek() != 0) {
200 switch (codepoint = ds.
Take()) {
210 while (!operatorStack.
Empty() && *operatorStack.template Top<Operator>() <
kAlternation)
211 if (!
Eval(operandStack, *operatorStack.template Pop<Operator>(1)))
213 *operatorStack.template Push<Operator>() =
kAlternation;
214 *atomCountStack.template Top<unsigned>() = 0;
219 *atomCountStack.template Push<unsigned>() = 0;
224 if (!
Eval(operandStack, *operatorStack.template Pop<Operator>(1)))
226 if (operatorStack.
Empty())
228 operatorStack.template Pop<Operator>(1);
229 atomCountStack.template Pop<unsigned>(1);
254 if (ds.
Peek() ==
',') {
256 if (ds.
Peek() ==
'}')
282 *operandStack.template Push<Frag>() =
Frag(
s,
s,
s);
298 while (!operatorStack.
Empty())
299 if (!
Eval(operandStack, *operatorStack.template Pop<Operator>(1)))
304 Frag*
e = operandStack.template Pop<Frag>(1);
308 #if RAPIDJSON_REGEX_VERBOSE 309 printf(
"root: %d\n",
root_);
312 printf(
"[%2d] out: %2d out1: %2d c: '%c'\n",
i,
s.out,
s.out1, (
char)
s.codepoint);
323 s->codepoint = codepoint;
330 *operandStack.template Push<Frag>() =
Frag(
s,
s,
s);
334 if (*atomCountStack.template Top<unsigned>())
336 (*atomCountStack.template Top<unsigned>())++;
359 Frag e2 = *operandStack.template Pop<Frag>(1);
360 Frag e1 = *operandStack.template Pop<Frag>(1);
362 *operandStack.template Push<Frag>() =
Frag(e1.start, e2.
out,
Min(e1.minIndex, e2.
minIndex));
368 Frag e2 = *operandStack.template Pop<Frag>(1);
369 Frag e1 = *operandStack.template Pop<Frag>(1);
378 Frag e = *operandStack.template Pop<Frag>(1);
380 *operandStack.template Push<Frag>() =
Frag(
s,
Append(
e.out,
s),
e.minIndex);
387 Frag e = *operandStack.template Pop<Frag>(1);
390 *operandStack.template Push<Frag>() =
Frag(
s,
s,
e.minIndex);
398 Frag e = *operandStack.template Pop<Frag>(1);
401 *operandStack.template Push<Frag>() =
Frag(
e.start,
s,
e.minIndex);
419 for (
unsigned i = 0;
i < m - 1;
i++)
421 for (
unsigned i = 0;
i < m - 1;
i++)
427 for (
unsigned i = 0;
i < n - 1;
i++)
435 for (
unsigned i = n;
i < m - 1;
i++)
437 for (
unsigned i = n;
i < m;
i++)
441 for (
unsigned i = 0;
i < n - 1;
i++)
450 const Frag src = *operandStack.template Top<Frag>();
464 template <
typename InputStream>
467 if (ds.
Peek() <
'0' || ds.
Peek() >
'9')
469 while (ds.
Peek() >=
'0' && ds.
Peek() <=
'9') {
470 if (
r >= 429496729 && ds.
Peek() >
'5')
472 r =
r * 10 + (ds.
Take() -
'0');
478 template <
typename InputStream>
486 while ((codepoint = ds.
Take()) != 0) {
489 if (codepoint ==
'^') {
510 if (ds.
Peek() ==
'b') {
521 if (codepoint ==
'-') {
551 r->start =
r->end = codepoint;
556 template <
typename InputStream>
559 switch (codepoint = ds.
Take()) {
574 *escapedCodepoint = codepoint;
return true;
575 case 'f': *escapedCodepoint = 0x000C;
return true;
576 case 'n': *escapedCodepoint = 0x000A;
return true;
577 case 'r': *escapedCodepoint = 0x000D;
return true;
578 case 't': *escapedCodepoint = 0x0009;
return true;
579 case 'v': *escapedCodepoint = 0x000B;
return true;
598 template <
typename RegexType,
typename Allocator = CrtAllocator>
621 template <
typename InputStream>
631 template <
typename InputStream>
642 typedef typename RegexType::State
State;
643 typedef typename RegexType::Range
Range;
645 template <
typename InputStream>
656 while (!current->
Empty() && (codepoint = ds.
Take()) != 0) {
660 for (
const SizeType*
s = current->template Bottom<SizeType>();
s != current->template End<SizeType>(); ++
s) {
662 if (sr.codepoint == codepoint ||
663 sr.codepoint == RegexType::kAnyCharacterClass ||
664 (sr.codepoint == RegexType::kRangeCharacterClass &&
MatchRange(sr.rangeStart, codepoint)))
667 if (!anchorEnd && matched)
680 return (
regex_.stateCount_ + 31) / 32 * 4;
692 else if (!(
stateSet_[index >> 5] & (1
u << (index & 31)))) {
693 stateSet_[index >> 5] |= (1
u << (index & 31));
694 *
l.template PushUnsafe<SizeType>() = index;
700 bool yes = (
regex_.GetRange(rangeIndex).start & RegexType::kRangeNegationFlag) == 0;
703 if (codepoint >= (
r.start & ~RegexType::kRangeNegationFlag) && codepoint <=
r.end)
728 #if defined(__clang__) || defined(_MSC_VER) 732 #endif // RAPIDJSON_INTERNAL_REGEX_H_ SizeType stateCount_
Definition: regex.h:588
unsigned Take()
Definition: regex.h:55
Allocator * ownAllocator_
Definition: regex.h:712
bool Eval(Stack< Allocator > &operandStack, Operator op)
Definition: regex.h:354
SizeType NewRange(unsigned codepoint)
Definition: regex.h:549
const CharType(& source)[N]
Definition: pointer.h:1147
const Range & GetRange(SizeType index) const
Definition: regex.h:184
RAPIDJSON_NAMESPACE_BEGIN typedef unsigned SizeType
Size type (for string lengths, array sizes, etc.)
Definition: rapidjson.h:389
SizeType out
link-list of all output states
Definition: regex.h:165
int * count
Definition: gmock_stress_test.cc:176
#define RAPIDJSON_ASSERT(x)
Assertion.
Definition: rapidjson.h:411
unsigned codepoint_
Definition: regex.h:69
int i
Definition: pymoduletest.py:23
bool anchorBegin_
Definition: regex.h:594
u
Definition: pymoduletest.py:20
Read-only string stream.
Definition: fwd.h:47
SizeType minIndex
Definition: regex.h:166
#define RAPIDJSON_NAMESPACE_END
provide custom rapidjson namespace (closing expression)
Definition: rapidjson.h:124
GenericRegexSearch< Regex > RegexSearch
Definition: regex.h:719
Stack< Allocator > state0_
Definition: regex.h:713
Stack< Allocator > ranges_
Definition: regex.h:586
unsigned codepoint
Definition: regex.h:159
SizeType out1
Equals to non-kInvalid for split.
Definition: regex.h:157
#define Ch(x, y, z)
Definition: sha512-blocks.c:34
size_t GetStateSetSize() const
Definition: regex.h:679
A type-unsafe stack for storing different types of data.
Definition: stack.h:36
~GenericRegex()
Definition: regex.h:129
const char * s
Definition: minissdp.c:596
const RegexType & regex_
Definition: regex.h:710
Operator
Definition: regex.h:136
Regular expression engine with subset of ECMAscript grammar.
Definition: regex.h:114
void Decode()
Definition: regex.h:63
bool AddState(Stack< Allocator > &l, SizeType index)
Definition: regex.h:684
GenericRegex< UTF8<> > Regex
Definition: regex.h:718
#define RAPIDJSON_NAMESPACE_BEGIN
provide custom rapidjson namespace (opening expression)
Definition: rapidjson.h:121
Allocator * allocator_
Definition: regex.h:711
static const unsigned kInfinityQuantifier
Definition: regex.h:591
e
Definition: pymoduletest.py:79
void CloneTopOperand(Stack< Allocator > &operandStack)
Definition: regex.h:449
SizeType out
Equals to kInvalid for matching state.
Definition: regex.h:156
#define RAPIDJSON_NEW(TypeName)
! customization point for global new
Definition: rapidjson.h:606
RegexType::State State
Definition: regex.h:642
SourceStream & ss_
Definition: regex.h:68
SizeType NewState(SizeType out, SizeType out1, unsigned codepoint)
Definition: regex.h:319
void PushOperand(Stack< Allocator > &operandStack, unsigned codepoint)
Definition: regex.h:328
bool MatchRange(SizeType rangeIndex, unsigned codepoint) const
Definition: regex.h:699
unsigned int uint32_t
Definition: stdint.h:126
SizeType rangeCount_
Definition: regex.h:589
bool SearchWithAnchoring(InputStream &is, bool anchorBegin, bool anchorEnd)
Definition: regex.h:646
DecodedStream(SourceStream &ss)
Definition: regex.h:53
bool IsValid() const
Definition: regex.h:131
static const unsigned kRangeNegationFlag
Definition: regex.h:147
GenericRegexSearch(const RegexType ®ex, Allocator *allocator=0)
Definition: regex.h:604
Encoding EncodingType
Definition: regex.h:116
static const SizeType kRegexInvalidState
Represents an invalid index in GenericRegex::State::out, out1.
Definition: regex.h:75
size_t GetSize() const
Definition: stack.h:177
Stack< Allocator > state1_
Definition: regex.h:714
SizeType start
Definition: regex.h:164
SizeType next
Definition: regex.h:152
void Swap(T &a, T &b) RAPIDJSON_NOEXCEPT
Custom swap() to avoid dependency on C++ <algorithm> header.
Definition: swap.h:33
#define RAPIDJSON_DELETE(x)
! customization point for global delete
Definition: rapidjson.h:610
void Parse(DecodedStream< InputStream, Encoding > &ds)
Definition: regex.h:190
const GenericPointer< typename T::ValueType > T2 T::AllocatorType & a
Definition: pointer.h:1124
r
Definition: testupnpigd.py:61
bool ParseRange(DecodedStream< InputStream, Encoding > &ds, SizeType *range)
Definition: regex.h:479
bool Search(InputStream &is)
Definition: regex.h:632
static SizeType Min(SizeType a, SizeType b)
Definition: regex.h:447
Definition: document.h:406
State & GetState(SizeType index)
Definition: regex.h:169
Encoding::Ch Ch
Definition: regex.h:602
SizeType rangeStart
Definition: regex.h:158
Frag(SizeType s, SizeType o, SizeType m)
Definition: regex.h:163
void * memcpy(void *a, const void *b, size_t c)
Definition: glibc_compat.cpp:16
static const unsigned kRangeCharacterClass
Definition: regex.h:146
unsigned end
Definition: regex.h:151
unsigned start
Definition: regex.h:150
static const unsigned kAnyCharacterClass
For '.'.
Definition: regex.h:145
bool EvalQuantifier(Stack< Allocator > &operandStack, unsigned n, unsigned m)
Definition: regex.h:408
~GenericRegexSearch()
Definition: regex.h:616
bool anchorEnd_
Definition: regex.h:595
bool Match(InputStream &is)
Definition: regex.h:622
bool Search(const Ch *s)
Definition: regex.h:636
SizeType root_
Definition: regex.h:587
Encoding::Ch Ch
Definition: regex.h:117
RegexType::Range Range
Definition: regex.h:643
void Patch(SizeType l, SizeType s)
Definition: regex.h:347
bool ParseUnsigned(DecodedStream< InputStream, Encoding > &ds, unsigned *u)
Definition: regex.h:465
RegexType::EncodingType Encoding
Definition: regex.h:601
bool Empty() const
Definition: stack.h:176
SizeType Append(SizeType l1, SizeType l2)
Definition: regex.h:339
def next(obj)
Definition: ast.py:58
bool Match(const Ch *s)
Definition: regex.h:626
Range & GetRange(SizeType index)
Definition: regex.h:179
bool CharacterEscape(DecodedStream< InputStream, Encoding > &ds, unsigned *escapedCodepoint)
Definition: regex.h:557
uint32_t * stateSet_
Definition: regex.h:715
void ImplicitConcatenation(Stack< Allocator > &atomCountStack, Stack< Allocator > &operatorStack)
Definition: regex.h:333
static const SizeType kRegexInvalidRange
Definition: regex.h:76
const State & GetState(SizeType index) const
Definition: regex.h:174
c
Definition: pymoduletest.py:79
cryptonote::block b
Definition: block.cpp:40
unsigned Peek()
Definition: regex.h:54
Stack< Allocator > states_
Definition: regex.h:585
GenericRegex(const Ch *source, Allocator *allocator=0)
Definition: regex.h:120