Pattern class holds a regex pattern and its compiled FSM opcode table or code for the reflex::Matcher engine. More...
#include <pattern.h>
Classes | |
struct | Chars |
Set of chars and meta chars. More... | |
struct | Const |
Common constants. More... | |
struct | DFA |
DFA created by subset construction from regex patterns. More... | |
struct | ModConst |
Modifiers 'i', 'm', 'q', 's', 'u' (enable) 'I', 'M', 'Q', 'S', 'U' (disable) More... | |
struct | Option |
Global modifier modes, syntax flags, and compiler options. More... | |
struct | Position |
Finite state machine construction position information. More... | |
Public Types | |
typedef uint8_t | Pred |
predict match bits More... | |
typedef uint16_t | Hash |
hash value type, max value is Const::HASH More... | |
typedef uint32_t | Index |
index into opcodes array Pattern::opc_ and subpattern indexing More... | |
typedef uint32_t | Accept |
group capture index More... | |
typedef uint32_t | Opcode |
32 bit opcode word More... | |
typedef void(* | FSM) (class Matcher &) |
Public Member Functions | |
Pattern () | |
Construct an unset pattern. More... | |
Pattern (const char *regex, const char *options=NULL) | |
Construct a pattern object given a regex string. More... | |
Pattern (const char *regex, const std::string &options) | |
Construct a pattern object given a regex string. More... | |
Pattern (const std::string ®ex, const char *options=NULL) | |
Construct a pattern object given a regex string. More... | |
Pattern (const std::string ®ex, const std::string &options) | |
Construct a pattern object given a regex string. More... | |
Pattern (const Opcode *code, const uint8_t *pred=NULL) | |
Construct a pattern object given an opcode table. More... | |
Pattern (FSM fsm, const uint8_t *pred=NULL) | |
Construct a pattern object given a function pointer to FSM code. More... | |
Pattern (const Pattern &pattern) | |
Copy constructor. More... | |
virtual | ~Pattern () |
Destructor, deletes internal code array when owned and allocated. More... | |
void | clear () |
Clear and delete pattern data. More... | |
Pattern & | assign (const char *regex, const char *options=NULL) |
Assign a (new) pattern. More... | |
Pattern & | assign (const char *regex, const std::string &options) |
Assign a (new) pattern. More... | |
Pattern & | assign (const std::string ®ex, const char *options=NULL) |
Assign a (new) pattern. More... | |
Pattern & | assign (const std::string ®ex, const std::string &options) |
Assign a (new) pattern. More... | |
Pattern & | assign (const Opcode *code, const uint8_t *pred=NULL) |
Assign a (new) pattern. More... | |
Pattern & | assign (FSM fsm, const uint8_t *pred=NULL) |
Assign a (new) pattern. More... | |
Pattern & | operator= (const Pattern &pattern) |
Assign a (new) pattern. More... | |
Pattern & | operator= (const char *regex) |
Assign a (new) pattern. More... | |
Pattern & | operator= (const std::string ®ex) |
Assign a (new) pattern. More... | |
Pattern & | operator= (const Opcode *code) |
Assign a (new) pattern. More... | |
Pattern & | operator= (FSM fsm) |
Assign a (new) pattern. More... | |
Accept | size () const |
Get the number of subpatterns of this pattern object. More... | |
bool | empty () const |
Return true if this pattern is not assigned. More... | |
const std::string | operator[] (Accept choice) const |
Get subpattern regex of this pattern object or the whole regex with index 0. More... | |
bool | reachable (Accept choice) const |
Check if subpattern is reachable by a match. More... | |
size_t | nodes () const |
Get the number of finite state machine nodes (vertices). More... | |
size_t | edges () const |
Get the number of finite state machine edges (transitions on input characters). More... | |
size_t | words () const |
Get the code size in number of words. More... | |
float | parse_time () const |
Get elapsed regex parsing and analysis time. More... | |
float | nodes_time () const |
Get elapsed DFA vertices construction time. More... | |
float | edges_time () const |
Get elapsed DFA edges construction time. More... | |
float | words_time () const |
Get elapsed code words assembly time. More... | |
Static Public Member Functions | |
static bool | predict_match (const Pred pmh[], const char *s, size_t n) |
Returns true when match is predicted, based on s[0..3..e-1] (e >= s + 4). More... | |
static size_t | predict_match (const Pred pma[], const char *s) |
Returns zero when match is predicted or nonzero shift value, based on s[0..3]. More... | |
Protected Member Functions | |
virtual void | error (regex_error_type code, size_t pos=0) const |
Throw an error. More... | |
Private Types | |
enum | Meta { META_MIN = 0x100, META_NWB = 0x101, META_NWE = 0x102, META_BWB = 0x103, META_EWB = 0x104, META_BWE = 0x105, META_EWE = 0x106, META_BOL = 0x107, META_EOL = 0x108, META_BOB = 0x109, META_EOB = 0x10A, META_UND = 0x10B, META_IND = 0x10C, META_DED = 0x10D, META_MAX } |
Meta characters. More... | |
typedef uint8_t | Mod |
typedef uint16_t | Char |
typedef uint8_t | Lazy |
typedef uint16_t | Iter |
typedef uint16_t | Lookahead |
typedef std::set< Lookahead > | Lookaheads |
typedef uint32_t | Location |
typedef ORanges< Location > | Locations |
typedef std::map< int, Locations > | Map |
typedef Locations | Mods[10] |
typedef std::vector< Lazy > | Lazyset |
typedef std::vector< Position > | Positions |
typedef std::map< Position, Positions > | Follow |
typedef std::pair< Chars, Positions > | Move |
typedef std::list< Move > | Moves |
Private Member Functions | |
void | init (const char *options, const uint8_t *pred=NULL) |
Initialize the pattern at construction. More... | |
void | init_options (const char *options) |
void | parse (Positions &startpos, Follow &followpos, Mods modifiers, Map &lookahead) |
void | parse1 (bool begin, Location &loc, Positions &firstpos, Positions &lastpos, bool &nullable, Follow &followpos, Lazy &lazyidx, Lazyset &lazyset, Mods modifiers, Locations &lookahead, Iter &iter) |
void | parse2 (bool begin, Location &loc, Positions &firstpos, Positions &lastpos, bool &nullable, Follow &followpos, Lazy &lazyidx, Lazyset &lazyset, Mods modifiers, Locations &lookahead, Iter &iter) |
void | parse3 (bool begin, Location &loc, Positions &firstpos, Positions &lastpos, bool &nullable, Follow &followpos, Lazy &lazyidx, Lazyset &lazyset, Mods modifiers, Locations &lookahead, Iter &iter) |
void | parse4 (bool begin, Location &loc, Positions &firstpos, Positions &lastpos, bool &nullable, Follow &followpos, Lazy &lazyidx, Lazyset &lazyset, Mods modifiers, Locations &lookahead, Iter &iter) |
Char | parse_esc (Location &loc, Chars *chars=NULL) const |
void | compile (DFA::State *start, Follow &followpos, const Mods modifiers, const Map &lookahead) |
void | lazy (const Lazyset &lazyset, Positions &pos) const |
void | lazy (const Lazyset &lazyset, const Positions &pos, Positions &pos1) const |
void | greedy (Positions &pos) const |
void | trim_anchors (Positions &follow, const Position p) const |
void | trim_lazy (Positions *pos) const |
void | compile_transition (DFA::State *state, Follow &followpos, const Mods modifiers, const Map &lookahead, Moves &moves) const |
void | transition (Moves &moves, Chars &chars, const Positions &follow) const |
void | compile_list (Location loc, Chars &chars, const Mods modifiers) const |
void | posix (size_t index, Chars &chars) const |
void | flip (Chars &chars) const |
void | assemble (DFA::State *start) |
void | compact_dfa (DFA::State *start) |
void | encode_dfa (DFA::State *start) |
void | gencode_dfa (const DFA::State *start) const |
void | check_dfa_closure (const DFA::State *state, int nest, bool &peek, bool &prev) const |
void | gencode_dfa_closure (FILE *fd, const DFA::State *start, int nest, bool peek) const |
void | graph_dfa (const DFA::State *start) const |
void | export_code () const |
void | predict_match_dfa (DFA::State *start) |
void | gen_predict_match (DFA::State *state) |
void | gen_predict_match_transitions (DFA::State *state, std::map< DFA::State *, ORanges< Hash > > &states) |
void | gen_predict_match_transitions (size_t level, DFA::State *state, ORanges< Hash > &labels, std::map< DFA::State *, ORanges< Hash > > &states) |
void | write_predictor (FILE *fd) const |
void | write_namespace_open (FILE *fd) const |
void | write_namespace_close (FILE *fd) const |
size_t | find_at (Location loc, char c) const |
Char | at (Location k) const |
bool | eq_at (Location loc, const char *s) const |
Char | escape_at (Location loc) const |
Char | escapes_at (Location loc, const char *escapes) const |
Static Private Member Functions | |
static void | pos_insert (Positions &s1, const Positions &s2) |
static void | pos_add (Positions &s, const Position &e) |
static void | lazy_insert (Lazyset &s1, const Lazyset &s2) |
static void | lazy_add (Lazyset &s, const Lazy &e) |
static bool | is_modified (Mod mod, const Mods modifiers, Location loc) |
static void | update_modified (Mod mod, Mods modifiers, Location from, Location to) |
static uint16_t | hash_pos (const Positions *pos) |
static bool | valid_goto_index (Index index) |
static bool | valid_take_index (Index index) |
static bool | valid_lookahead_index (Index index) |
static bool | is_meta (Char c) |
static Opcode | opcode_long (Index index) |
static Opcode | opcode_take (Index index) |
static Opcode | opcode_redo () |
static Opcode | opcode_tail (Index index) |
static Opcode | opcode_head (Index index) |
static Opcode | opcode_goto (Char lo, Char hi, Index index) |
static Opcode | opcode_halt () |
static bool | is_opcode_long (Opcode opcode) |
static bool | is_opcode_take (Opcode opcode) |
static bool | is_opcode_redo (Opcode opcode) |
static bool | is_opcode_tail (Opcode opcode) |
static bool | is_opcode_head (Opcode opcode) |
static bool | is_opcode_halt (Opcode opcode) |
static bool | is_opcode_goto (Opcode opcode) |
static bool | is_opcode_meta (Opcode opcode) |
static bool | is_opcode_goto (Opcode opcode, unsigned char c) |
static Char | meta_of (Opcode opcode) |
static Char | lo_of (Opcode opcode) |
static Char | hi_of (Opcode opcode) |
static Index | index_of (Opcode opcode) |
static Index | long_index_of (Opcode opcode) |
static Lookahead | lookahead_of (Opcode opcode) |
static Char | lowercase (Char c) |
static Char | uppercase (Char c) |
static Char | reversecase (Char c) |
static Hash | hash (Hash h, uint8_t b) |
static Hash | hash (Hash h) |
Private Attributes | |
Option | opt_ |
pattern compiler options More... | |
DFA | tfa_ |
tree DFA constructed from strings More... | |
DFA | dfa_ |
DFA constructed from regex with subset construction using firstpos/lastpos/followpos. More... | |
std::string | rex_ |
regular expression string More... | |
std::vector< Location > | end_ |
entries point to the subpattern's ending '|' or '\0' More... | |
std::vector< bool > | acc_ |
true if subpattern n is accepting (state is reachable) More... | |
size_t | vno_ |
number of finite state machine vertices |V| More... | |
size_t | eno_ |
number of finite state machine edges |E| More... | |
const Opcode * | opc_ |
points to the opcode table More... | |
Index | nop_ |
number of opcodes generated More... | |
FSM | fsm_ |
function pointer to FSM code More... | |
size_t | len_ |
prefix length of pre_[], less or equal to 255 More... | |
size_t | min_ |
patterns after the prefix are at least this long but no more than 8 More... | |
char | pre_ [256] |
pattern prefix, shorter or equal to 255 bytes More... | |
Pred | bit_ [256] |
bitap array More... | |
Pred | pmh_ [Const::HASH] |
predict-match hash array More... | |
Pred | pma_ [Const::HASH] |
predict-match array More... | |
float | pms_ |
ms elapsed time to parse regex More... | |
float | vms_ |
ms elapsed time to compile DFA vertices More... | |
float | ems_ |
ms elapsed time to compile DFA edges More... | |
float | wms_ |
ms elapsed time to assemble code words More... | |
bool | one_ |
true if matching one string in pre_[] without meta/anchors More... | |
Friends | |
class | Matcher |
permit access by the reflex::Matcher engine More... | |
class | FuzzyMatcher |
permit access by the reflex::FuzzyMatcher engine More... | |
Pattern class holds a regex pattern and its compiled FSM opcode table or code for the reflex::Matcher engine.
typedef uint32_t reflex::Pattern::Accept |
group capture index
|
private |
|
private |
typedef void(* reflex::Pattern::FSM) (class Matcher &) |
function pointer to FSM code
typedef uint16_t reflex::Pattern::Hash |
hash value type, max value is Const::HASH
typedef uint32_t reflex::Pattern::Index |
index into opcodes array Pattern::opc_ and subpattern indexing
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
typedef uint32_t reflex::Pattern::Opcode |
32 bit opcode word
|
private |
typedef uint8_t reflex::Pattern::Pred |
predict match bits
|
private |
Meta characters.
|
inline |
Construct an unset pattern.
|
inlineexplicit |
Construct a pattern object given a regex string.
|
inline |
Construct a pattern object given a regex string.
|
inlineexplicit |
Construct a pattern object given a regex string.
|
inline |
Construct a pattern object given a regex string.
|
inlineexplicit |
Construct a pattern object given an opcode table.
|
inlineexplicit |
Construct a pattern object given a function pointer to FSM code.
|
inline |
Copy constructor.
pattern | pattern to copy |
|
inlinevirtual |
Destructor, deletes internal code array when owned and allocated.
|
private |
|
inline |
Assign a (new) pattern.
|
inline |
Assign a (new) pattern.
|
inline |
Assign a (new) pattern.
|
inline |
Assign a (new) pattern.
Assign a (new) pattern.
Assign a (new) pattern.
|
private |
|
inline |
Clear and delete pattern data.
|
private |
|
private |
|
private |
|
private |
|
inline |
Get the number of finite state machine edges (transitions on input characters).
|
inline |
Get elapsed DFA edges construction time.
|
inline |
Return true if this pattern is not assigned.
|
private |
|
inlineprivate |
|
protectedvirtual |
Throw an error.
code | error code |
pos | optional location of the error in regex string Pattern::rex_ |
|
private |
|
inlineprivate |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
inlinestaticprivate |
|
private |
Initialize the pattern at construction.
|
private |
|
inlinestaticprivate |
|
inlinestaticprivate |
|
inlinestaticprivate |
|
inlinestaticprivate |
|
inlinestaticprivate |
|
inlinestaticprivate |
|
inlinestaticprivate |
|
inlinestaticprivate |
|
inlinestaticprivate |
|
inlinestaticprivate |
|
inlinestaticprivate |
|
private |
|
inline |
Get the number of finite state machine nodes (vertices).
|
inline |
Get elapsed DFA vertices construction time.
|
inlinestaticprivate |
|
inlinestaticprivate |
|
inline |
Assign a (new) pattern.
|
inline |
Assign a (new) pattern.
const std::string reflex::Pattern::operator[] | ( | Accept | choice | ) | const |
Get subpattern regex of this pattern object or the whole regex with index 0.
|
private |
|
private |
|
private |
|
private |
|
private |
|
inline |
Get elapsed regex parsing and analysis time.
|
inlinestaticprivate |
|
private |
|
inlinestatic |
Returns true when match is predicted, based on s[0..3..e-1] (e >= s + 4).
|
inlinestatic |
Returns zero when match is predicted or nonzero shift value, based on s[0..3].
|
private |
|
inline |
Check if subpattern is reachable by a match.
|
inline |
Get the number of subpatterns of this pattern object.
|
private |
|
private |
|
inlinestaticprivate |
|
inlinestaticprivate |
|
inlinestaticprivate |
|
inlinestaticprivate |
|
inline |
Get the code size in number of words.
|
inline |
Get elapsed code words assembly time.
|
private |
|
private |
|
private |
|
friend |
permit access by the reflex::FuzzyMatcher engine
|
friend |
permit access by the reflex::Matcher engine
|
private |
true if subpattern n is accepting (state is reachable)
|
private |
bitap array
|
private |
DFA constructed from regex with subset construction using firstpos/lastpos/followpos.
|
private |
ms elapsed time to compile DFA edges
|
private |
entries point to the subpattern's ending '|' or '\0'
|
private |
number of finite state machine edges |E|
|
private |
function pointer to FSM code
|
private |
prefix length of pre_[], less or equal to 255
|
private |
patterns after the prefix are at least this long but no more than 8
|
private |
number of opcodes generated
|
private |
true if matching one string in pre_[] without meta/anchors
|
private |
points to the opcode table
|
private |
pattern compiler options
|
private |
predict-match array
|
private |
predict-match hash array
|
private |
ms elapsed time to parse regex
|
private |
pattern prefix, shorter or equal to 255 bytes
|
private |
regular expression string
|
private |
ms elapsed time to compile DFA vertices
|
private |
number of finite state machine vertices |V|
|
private |
ms elapsed time to assemble code words