|
| | Pattern () |
| | Construct an unset pattern. More...
|
| |
| | Pattern (const char *regex, const char *options=NULL) |
| | Construct a pattern object given a regex string. More...
|
| |
| | Pattern (const char *regex, const std::string &options) |
| | Construct a pattern object given a regex string. More...
|
| |
| | Pattern (const std::string ®ex, const char *options=NULL) |
| | Construct a pattern object given a regex string. More...
|
| |
| | Pattern (const std::string ®ex, const std::string &options) |
| | Construct a pattern object given a regex string. More...
|
| |
| | Pattern (const Opcode *code, const char *pred=NULL) |
| | Construct a pattern object given a FSM opcode table and predictor table. More...
|
| |
| | Pattern (FSM fsm, const char *pred=NULL) |
| | Construct a pattern object given a function pointer to FSM code and predictor table. More...
|
| |
| | Pattern (const Pattern &pattern) |
| | Copy constructor. More...
|
| |
| virtual | ~Pattern () |
| | Destructor, deletes internal code array when owned and allocated. More...
|
| |
| void | clear () |
| | Clear and delete pattern data. More...
|
| |
| Pattern & | assign (const char *regex, const char *options=NULL) |
| | Assign a (new) pattern. More...
|
| |
| Pattern & | assign (const char *regex, const std::string &options) |
| | Assign a (new) pattern. More...
|
| |
| Pattern & | assign (const std::string ®ex, const char *options=NULL) |
| | Assign a (new) pattern. More...
|
| |
| Pattern & | assign (const std::string ®ex, const std::string &options) |
| | Assign a (new) pattern. More...
|
| |
| Pattern & | assign (const Opcode *code, const char *pred=NULL) |
| | Assign a (new) pattern given a FSM opcode table and predictor table. More...
|
| |
| Pattern & | assign (FSM fsm, const char *pred=NULL) |
| | Assign a (new) pattern given a function pointer to FSM code and predictor table. More...
|
| |
| Pattern & | operator= (const Pattern &pattern) |
| | Assign a (new) pattern. More...
|
| |
| Pattern & | operator= (const char *regex) |
| | Assign a (new) pattern. More...
|
| |
| Pattern & | operator= (const std::string ®ex) |
| | Assign a (new) pattern. More...
|
| |
| Pattern & | operator= (const Opcode *code) |
| | Assign a (new) pattern. More...
|
| |
| Pattern & | operator= (FSM fsm) |
| | Assign a (new) pattern. More...
|
| |
| Accept | size () const |
| | Get the number of subpatterns of this pattern object. More...
|
| |
| bool | empty () const |
| | Return true if this pattern is not assigned. More...
|
| |
| const std::string | operator[] (Accept choice) const |
| | Get subpattern regex of this pattern object or the whole regex with index 0. More...
|
| |
| bool | reachable (Accept choice) const |
| | Check if subpattern is reachable by a match. More...
|
| |
| size_t | nodes () const |
| | Get the number of finite state machine nodes (vertices). More...
|
| |
| size_t | edges () const |
| | Get the number of finite state machine edges (transitions on input characters). More...
|
| |
| size_t | words () const |
| | Get the code size in number of words. More...
|
| |
| size_t | hashes () const |
| | Get the total number of indexing hash tables constructed for the optional HFA. More...
|
| |
| float | parse_time () const |
| | Get elapsed regex parsing and analysis time. More...
|
| |
| float | nodes_time () const |
| | Get elapsed DFA vertices construction time. More...
|
| |
| float | edges_time () const |
| | Get elapsed DFA edges construction time. More...
|
| |
| float | words_time () const |
| | Get elapsed code words assembly time. More...
|
| |
| float | analysis_time () const |
| | Get elapsed time of DFA analysis to predict matches and construct an optional HFA. More...
|
| |
| bool | predict_match (const char *s) const |
| | Returns true when match is predicted using my PM3+PM5 logic for min>=1. More...
|
| |
| bool | predict_match_quick (const char *s) const |
| | Returns true when match is predicted using my PM3 logic for min>=1 prior to PM3+PM5. More...
|
| |
| bool | predict_match_min (const char *s) const |
| | Returns true when match is predicted using my PM3+PM5 logic for min>=3. More...
|
| |
| bool | has_hfa () const |
| |
| bool | match_hfa (const uint8_t *indexed, size_t size) const |
| |
|
| void | init (const char *options, const char *pred=NULL) |
| | Initialize the pattern at construction. More...
|
| |
| void | init_options (const char *options) |
| |
| void | parse (Positions &startpos, Follow &followpos, Lazypos &lazypos, Mods modifiers, Map &lookahead) |
| |
| void | parse1 (bool begin, Location &loc, Positions &firstpos, Positions &lastpos, bool &nullable, Follow &followpos, Lazy &lazyidx, Lazypos &lazypos, Mods modifiers, Locations &lookahead, Iter &iter) |
| |
| void | parse2 (bool begin, Location &loc, Positions &firstpos, Positions &lastpos, bool &nullable, Follow &followpos, Lazy &lazyidx, Lazypos &lazypos, Mods modifiers, Locations &lookahead, Iter &iter) |
| |
| void | parse3 (bool begin, Location &loc, Positions &firstpos, Positions &lastpos, bool &nullable, Follow &followpos, Lazy &lazyidx, Lazypos &lazypos, Mods modifiers, Locations &lookahead, Iter &iter) |
| |
| void | parse4 (bool begin, Location &loc, Positions &firstpos, Positions &lastpos, bool &nullable, Follow &followpos, Lazy &lazyidx, Lazypos &lazypos, Mods modifiers, Locations &lookahead, Iter &iter) |
| |
| Char | parse_esc (Location &loc, Chars *chars=NULL) const |
| |
| void | compile (DFA::State *start, Follow &followpos, const Lazypos &lazypos, const Mods modifiers, const Map &lookahead) |
| |
| void | lazy (const Lazypos &lazypos, Positions &pos) const |
| |
| void | lazy (const Lazypos &lazypos, const Positions &pos, Positions &pos1) const |
| |
| void | greedy (Positions &pos) const |
| |
| void | trim_anchors (Positions &follow) const |
| |
| void | trim_lazy (Positions *pos, const Lazypos &lazypos) const |
| |
| void | compile_transition (DFA::State *state, Follow &followpos, const Lazypos &lazypos, const Mods modifiers, const Map &lookahead, Moves &moves) const |
| |
| void | transition (Moves &moves, Chars &chars, const Positions &follow) const |
| |
| void | compile_list (Location loc, Chars &chars, const Mods modifiers) const |
| |
| void | posix (size_t index, Chars &chars) const |
| |
| void | flip (Chars &chars) const |
| |
| void | assemble (DFA::State *start) |
| |
| void | compact_dfa (DFA::State *start) |
| |
| void | encode_dfa (DFA::State *start) |
| |
| void | gencode_dfa (const DFA::State *start) const |
| |
| void | check_dfa_closure (const DFA::State *state, int nest, bool &peek) const |
| |
| void | gencode_dfa_closure (FILE *fd, const DFA::State *start, int nest, bool peek) const |
| |
| void | graph_dfa (const DFA::State *start) const |
| |
| void | export_code () const |
| |
| void | analyze_dfa (DFA::State *start) |
| |
| void | gen_min (std::set< DFA::State * > &states) |
| |
| void | gen_predict_match (std::set< DFA::State * > &states) |
| |
| void | gen_predict_match_start (std::set< DFA::State * > &states, std::map< DFA::State *, std::pair< ORanges< Hash >, ORanges< Char > > > &first_hashes) |
| |
| void | gen_predict_match_transitions (uint16_t level, DFA::State *state, const std::pair< ORanges< Hash >, ORanges< Char > > &previous, std::map< DFA::State *, std::pair< ORanges< Hash >, ORanges< Char > > > &level_hashes) |
| |
| void | gen_match_hfa (DFA::State *start) |
| |
| void | gen_match_hfa_start (DFA::State *start, HFA::State &index, HFA::StateHashes &hashes) |
| |
| bool | gen_match_hfa_transitions (size_t level, size_t &max_level, DFA::State *state, const HFA::HashRanges &previous, HFA::State &index, HFA::StateHashes &hashes) |
| |
| bool | match_hfa_transitions (size_t level, const HFA::Hashes &hashes, const uint8_t *indexed, size_t size, HFA::VisitSet &visit, HFA::VisitSet &next_visit, bool &accept) const |
| |
| void | write_predictor (FILE *fd) const |
| |
| void | write_namespace_open (FILE *fd) const |
| |
| void | write_namespace_close (FILE *fd) const |
| |
| size_t | find_at (Location loc, char c) const |
| |
| Char | at (Location k) const |
| |
| bool | eq_at (Location loc, const char *s) const |
| |
| Char | escape_at (Location loc) const |
| |
| Char | escapes_at (Location loc, const char *escapes) const |
| |
|
| static void | pos_insert (Positions &s1, const Positions &s2) |
| |
| static void | pos_add (Positions &s, const Position &e) |
| |
| static void | lazy_insert (Lazypos &s1, const Lazypos &s2) |
| |
| static void | lazy_add (Lazypos &s, const Lazy i, Location p) |
| |
| static bool | is_modified (Mod mod, const Mods modifiers, Location loc) |
| |
| static void | update_modified (Mod mod, Mods modifiers, Location from, Location to) |
| |
| static uint16_t | hash_pos (const Positions *pos) |
| |
| static bool | valid_goto_index (Index index) |
| |
| static bool | valid_take_index (Index index) |
| |
| static bool | valid_lookahead_index (Index index) |
| |
| static bool | is_meta (Char c) |
| |
| static Opcode | opcode_long (Index index) |
| |
| static Opcode | opcode_take (Index index) |
| |
| static Opcode | opcode_redo () |
| |
| static Opcode | opcode_tail (Index index) |
| |
| static Opcode | opcode_head (Index index) |
| |
| static Opcode | opcode_goto (Char lo, Char hi, Index index) |
| |
| static Opcode | opcode_halt () |
| |
| static bool | is_opcode_long (Opcode opcode) |
| |
| static bool | is_opcode_take (Opcode opcode) |
| |
| static bool | is_opcode_redo (Opcode opcode) |
| |
| static bool | is_opcode_tail (Opcode opcode) |
| |
| static bool | is_opcode_head (Opcode opcode) |
| |
| static bool | is_opcode_halt (Opcode opcode) |
| |
| static bool | is_opcode_goto (Opcode opcode) |
| |
| static bool | is_opcode_meta (Opcode opcode) |
| |
| static bool | is_opcode_goto (Opcode opcode, unsigned char c) |
| |
| static Char | meta_of (Opcode opcode) |
| |
| static Char | lo_of (Opcode opcode) |
| |
| static Char | hi_of (Opcode opcode) |
| |
| static Index | index_of (Opcode opcode) |
| |
| static Index | long_index_of (Opcode opcode) |
| |
| static Lookahead | lookahead_of (Opcode opcode) |
| |
| static bool | islowercase (Char c) |
| | check if lower case More...
|
| |
| static bool | isuppercase (Char c) |
| | check if upper case More...
|
| |
| static bool | isanycase (Char c) |
| | check if lower or upper case More...
|
| |
| static Char | lowercase (Char c) |
| | convert to lower case if c is a letter a-z, A-Z. More...
|
| |
| static Char | uppercase (Char c) |
| | convert to upper case if c is a letter a-z, A-Z. More...
|
| |
| static uint32_t | hash (uint32_t h, uint8_t b) |
| | predict match hash 0 <= hash() < Const::HASH, must be additive: hash(h,b+1) = hash(h,b)+1 modulo Const::HASH. More...
|
| |
| static uint32_t | bihash (uint8_t a, uint8_t b) |
| | bitap character pairs hash. More...
|
| |
| static uint32_t | indexhash (Hash h, uint8_t b) |
| | file indexing hash 0 <= indexhash() < 65536, must be additive: indexhash(x,b+1) = indexhash(x,b)+1 modulo 2^16. More...
|
| |
|
| Option | opt_ |
| | pattern compiler options More...
|
| |
| HFA | hfa_ |
| | indexing hash finite state automaton More...
|
| |
| DFA | tfa_ |
| | tree DFA constructed from strings More...
|
| |
| DFA | dfa_ |
| | DFA constructed from regex with subset construction using firstpos/lastpos/followpos. More...
|
| |
| std::string | rex_ |
| | regular expression string More...
|
| |
| std::vector< Location > | end_ |
| | entries point to the subpattern's ending '|' or '\0' More...
|
| |
| std::vector< bool > | acc_ |
| | true if subpattern n is accepting (state is reachable) More...
|
| |
| uint32_t | vno_ |
| | number of finite state machine vertices |V| (nodes) More...
|
| |
| uint32_t | eno_ |
| | number of finite state machine edges |E| (arrows) More...
|
| |
| uint32_t | hno_ |
| | number of indexing hash tables (HFA edges) More...
|
| |
| const Opcode * | opc_ |
| | points to the table with compiled finite state machine opcodes More...
|
| |
| FSM | fsm_ |
| | function pointer to FSM code More...
|
| |
| Index | nop_ |
| | number of opcodes generated More...
|
| |
| Index | cut_ |
| | DFA s-t cut to improve predict match and HFA accuracy with lbk_ and cbk_. More...
|
| |
| uint16_t | len_ |
| | length of chr_[], less or equal to 255 More...
|
| |
| uint16_t | min_ |
| | patterns after the prefix are at least this long but no more than Const::BITS More...
|
| |
| uint16_t | pin_ |
| | number of needles, 0 to 16 More...
|
| |
| std::bitset< 256 > | cbk_ |
| | characters to look back over when lbk_ > 0, never includes
More...
|
| |
| std::bitset< 256 > | fst_ |
| | the beginning characters of the pattern More...
|
| |
| char | chr_ [256] |
| | pattern prefix string or character needles for needle-based search More...
|
| |
| Bitap | bit_ [256] |
| | bitsets of characters for the first positions (one position per bit) More...
|
| |
| Bitap | tap_ [Const::BTAP] |
| | bitap hashed character pairs array More...
|
| |
| Pred | pma_ [Const::HASH] |
| | predict-match array More...
|
| |
| uint16_t | lbk_ |
| | lookback distance or 0xffff unlimited lookback or 0 for no lookback (empty cbk_) More...
|
| |
| uint16_t | lbm_ |
| | loopback minimum distance when lbk_ > 0 More...
|
| |
| uint16_t | lcp_ |
| | primary least common character position in the pattern or 0xffff More...
|
| |
| uint16_t | lcs_ |
| | secondary least common character position in the pattern or 0xffff More...
|
| |
| uint16_t | bmd_ |
| | Boyer-Moore jump distance on mismatch, B-M is enabled when bmd_ > 0 (<= 255) More...
|
| |
| uint8_t | bms_ [256] |
| | Boyer-Moore skip array. More...
|
| |
| float | pms_ |
| | ms elapsed time to parse regex More...
|
| |
| float | vms_ |
| | ms elapsed time to compile DFA vertices More...
|
| |
| float | ems_ |
| | ms elapsed time to compile DFA edges More...
|
| |
| float | wms_ |
| | ms elapsed time to assemble code words More...
|
| |
| float | ams_ |
| | ms elapsed time to analyze DFA for predict match and HFA More...
|
| |
| uint16_t | npy_ |
| | entropy derived from the bitap array bit_[] More...
|
| |
| bool | one_ |
| | true if matching one string stored in chr_[] without meta/anchors More...
|
| |
| bool | bol_ |
| | true if matching all patterns at the begin of a line with anchor ^ More...
|
| |
Pattern class holds a regex pattern and its compiled FSM opcode table or code for the reflex::Matcher engine.