2874c5fd28426 (Thomas Gleixner 2019-05-27 08:55:01 +0200 1) // SPDX-License-Identifier: GPL-2.0-or-later
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 2) /*
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 3) * lib/ts_fsm.c A naive finite state machine text search approach
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 4) *
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 5) * Authors: Thomas Graf <tgraf@suug.ch>
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 6) *
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 7) * ==========================================================================
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 8) *
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 9) * A finite state machine consists of n states (struct ts_fsm_token)
7433a8d6fa60a (Randy Dunlap 2017-10-20 12:15:52 -0700 10) * representing the pattern as a finite automaton. The data is read
1497b2749babb (Andreas Mohr 2006-09-29 02:01:28 -0700 11) * sequentially on an octet basis. Every state token specifies the number
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 12) * of recurrences and the type of value accepted which can be either a
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 13) * specific character or ctype based set of characters. The available
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 14) * type of recurrences include 1, (0|1), [0 n], and [1 n].
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 15) *
1497b2749babb (Andreas Mohr 2006-09-29 02:01:28 -0700 16) * The algorithm differs between strict/non-strict mode specifying
1497b2749babb (Andreas Mohr 2006-09-29 02:01:28 -0700 17) * whether the pattern has to start at the first octet. Strict mode
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 18) * is enabled by default and can be disabled by inserting
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 19) * TS_FSM_HEAD_IGNORE as the first token in the chain.
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 20) *
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 21) * The runtime performance of the algorithm should be around O(n),
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 22) * however while in strict mode the average runtime can be better.
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 23) */
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 24)
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 25) #include <linux/module.h>
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 26) #include <linux/types.h>
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 27) #include <linux/string.h>
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 28) #include <linux/ctype.h>
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 29) #include <linux/textsearch.h>
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 30) #include <linux/textsearch_fsm.h>
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 31)
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 32) struct ts_fsm
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 33) {
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 34) unsigned int ntokens;
842ae1f52b44a (Gustavo A. R. Silva 2020-04-06 20:10:03 -0700 35) struct ts_fsm_token tokens[];
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 36) };
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 37)
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 38) /* other values derived from ctype.h */
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 39) #define _A 0x100 /* ascii */
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 40) #define _W 0x200 /* wildcard */
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 41)
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 42) /* Map to _ctype flags and some magic numbers */
1497b2749babb (Andreas Mohr 2006-09-29 02:01:28 -0700 43) static const u16 token_map[TS_FSM_TYPE_MAX+1] = {
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 44) [TS_FSM_SPECIFIC] = 0,
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 45) [TS_FSM_WILDCARD] = _W,
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 46) [TS_FSM_CNTRL] = _C,
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 47) [TS_FSM_LOWER] = _L,
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 48) [TS_FSM_UPPER] = _U,
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 49) [TS_FSM_PUNCT] = _P,
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 50) [TS_FSM_SPACE] = _S,
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 51) [TS_FSM_DIGIT] = _D,
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 52) [TS_FSM_XDIGIT] = _D | _X,
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 53) [TS_FSM_ALPHA] = _U | _L,
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 54) [TS_FSM_ALNUM] = _U | _L | _D,
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 55) [TS_FSM_PRINT] = _P | _U | _L | _D | _SP,
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 56) [TS_FSM_GRAPH] = _P | _U | _L | _D,
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 57) [TS_FSM_ASCII] = _A,
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 58) };
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 59)
1497b2749babb (Andreas Mohr 2006-09-29 02:01:28 -0700 60) static const u16 token_lookup_tbl[256] = {
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 61) _W|_A|_C, _W|_A|_C, _W|_A|_C, _W|_A|_C, /* 0- 3 */
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 62) _W|_A|_C, _W|_A|_C, _W|_A|_C, _W|_A|_C, /* 4- 7 */
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 63) _W|_A|_C, _W|_A|_C|_S, _W|_A|_C|_S, _W|_A|_C|_S, /* 8- 11 */
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 64) _W|_A|_C|_S, _W|_A|_C|_S, _W|_A|_C, _W|_A|_C, /* 12- 15 */
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 65) _W|_A|_C, _W|_A|_C, _W|_A|_C, _W|_A|_C, /* 16- 19 */
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 66) _W|_A|_C, _W|_A|_C, _W|_A|_C, _W|_A|_C, /* 20- 23 */
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 67) _W|_A|_C, _W|_A|_C, _W|_A|_C, _W|_A|_C, /* 24- 27 */
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 68) _W|_A|_C, _W|_A|_C, _W|_A|_C, _W|_A|_C, /* 28- 31 */
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 69) _W|_A|_S|_SP, _W|_A|_P, _W|_A|_P, _W|_A|_P, /* 32- 35 */
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 70) _W|_A|_P, _W|_A|_P, _W|_A|_P, _W|_A|_P, /* 36- 39 */
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 71) _W|_A|_P, _W|_A|_P, _W|_A|_P, _W|_A|_P, /* 40- 43 */
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 72) _W|_A|_P, _W|_A|_P, _W|_A|_P, _W|_A|_P, /* 44- 47 */
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 73) _W|_A|_D, _W|_A|_D, _W|_A|_D, _W|_A|_D, /* 48- 51 */
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 74) _W|_A|_D, _W|_A|_D, _W|_A|_D, _W|_A|_D, /* 52- 55 */
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 75) _W|_A|_D, _W|_A|_D, _W|_A|_P, _W|_A|_P, /* 56- 59 */
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 76) _W|_A|_P, _W|_A|_P, _W|_A|_P, _W|_A|_P, /* 60- 63 */
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 77) _W|_A|_P, _W|_A|_U|_X, _W|_A|_U|_X, _W|_A|_U|_X, /* 64- 67 */
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 78) _W|_A|_U|_X, _W|_A|_U|_X, _W|_A|_U|_X, _W|_A|_U, /* 68- 71 */
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 79) _W|_A|_U, _W|_A|_U, _W|_A|_U, _W|_A|_U, /* 72- 75 */
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 80) _W|_A|_U, _W|_A|_U, _W|_A|_U, _W|_A|_U, /* 76- 79 */
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 81) _W|_A|_U, _W|_A|_U, _W|_A|_U, _W|_A|_U, /* 80- 83 */
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 82) _W|_A|_U, _W|_A|_U, _W|_A|_U, _W|_A|_U, /* 84- 87 */
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 83) _W|_A|_U, _W|_A|_U, _W|_A|_U, _W|_A|_P, /* 88- 91 */
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 84) _W|_A|_P, _W|_A|_P, _W|_A|_P, _W|_A|_P, /* 92- 95 */
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 85) _W|_A|_P, _W|_A|_L|_X, _W|_A|_L|_X, _W|_A|_L|_X, /* 96- 99 */
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 86) _W|_A|_L|_X, _W|_A|_L|_X, _W|_A|_L|_X, _W|_A|_L, /* 100-103 */
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 87) _W|_A|_L, _W|_A|_L, _W|_A|_L, _W|_A|_L, /* 104-107 */
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 88) _W|_A|_L, _W|_A|_L, _W|_A|_L, _W|_A|_L, /* 108-111 */
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 89) _W|_A|_L, _W|_A|_L, _W|_A|_L, _W|_A|_L, /* 112-115 */
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 90) _W|_A|_L, _W|_A|_L, _W|_A|_L, _W|_A|_L, /* 116-119 */
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 91) _W|_A|_L, _W|_A|_L, _W|_A|_L, _W|_A|_P, /* 120-123 */
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 92) _W|_A|_P, _W|_A|_P, _W|_A|_P, _W|_A|_C, /* 124-127 */
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 93) _W, _W, _W, _W, /* 128-131 */
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 94) _W, _W, _W, _W, /* 132-135 */
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 95) _W, _W, _W, _W, /* 136-139 */
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 96) _W, _W, _W, _W, /* 140-143 */
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 97) _W, _W, _W, _W, /* 144-147 */
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 98) _W, _W, _W, _W, /* 148-151 */
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 99) _W, _W, _W, _W, /* 152-155 */
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 100) _W, _W, _W, _W, /* 156-159 */
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 101) _W|_S|_SP, _W|_P, _W|_P, _W|_P, /* 160-163 */
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 102) _W|_P, _W|_P, _W|_P, _W|_P, /* 164-167 */
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 103) _W|_P, _W|_P, _W|_P, _W|_P, /* 168-171 */
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 104) _W|_P, _W|_P, _W|_P, _W|_P, /* 172-175 */
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 105) _W|_P, _W|_P, _W|_P, _W|_P, /* 176-179 */
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 106) _W|_P, _W|_P, _W|_P, _W|_P, /* 180-183 */
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 107) _W|_P, _W|_P, _W|_P, _W|_P, /* 184-187 */
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 108) _W|_P, _W|_P, _W|_P, _W|_P, /* 188-191 */
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 109) _W|_U, _W|_U, _W|_U, _W|_U, /* 192-195 */
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 110) _W|_U, _W|_U, _W|_U, _W|_U, /* 196-199 */
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 111) _W|_U, _W|_U, _W|_U, _W|_U, /* 200-203 */
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 112) _W|_U, _W|_U, _W|_U, _W|_U, /* 204-207 */
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 113) _W|_U, _W|_U, _W|_U, _W|_U, /* 208-211 */
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 114) _W|_U, _W|_U, _W|_U, _W|_P, /* 212-215 */
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 115) _W|_U, _W|_U, _W|_U, _W|_U, /* 216-219 */
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 116) _W|_U, _W|_U, _W|_U, _W|_L, /* 220-223 */
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 117) _W|_L, _W|_L, _W|_L, _W|_L, /* 224-227 */
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 118) _W|_L, _W|_L, _W|_L, _W|_L, /* 228-231 */
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 119) _W|_L, _W|_L, _W|_L, _W|_L, /* 232-235 */
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 120) _W|_L, _W|_L, _W|_L, _W|_L, /* 236-239 */
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 121) _W|_L, _W|_L, _W|_L, _W|_L, /* 240-243 */
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 122) _W|_L, _W|_L, _W|_L, _W|_P, /* 244-247 */
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 123) _W|_L, _W|_L, _W|_L, _W|_L, /* 248-251 */
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 124) _W|_L, _W|_L, _W|_L, _W|_L}; /* 252-255 */
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 125)
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 126) static inline int match_token(struct ts_fsm_token *t, u8 d)
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 127) {
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 128) if (t->type)
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 129) return (token_lookup_tbl[d] & t->type) != 0;
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 130) else
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 131) return t->value == d;
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 132) }
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 133)
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 134) static unsigned int fsm_find(struct ts_config *conf, struct ts_state *state)
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 135) {
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 136) struct ts_fsm *fsm = ts_config_priv(conf);
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 137) struct ts_fsm_token *cur = NULL, *next;
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 138) unsigned int match_start, block_idx = 0, tok_idx;
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 139) unsigned block_len = 0, strict, consumed = state->offset;
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 140) const u8 *data;
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 141)
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 142) #define GET_NEXT_BLOCK() \
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 143) ({ consumed += block_idx; \
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 144) block_idx = 0; \
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 145) block_len = conf->get_next_block(consumed, &data, conf, state); })
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 146)
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 147) #define TOKEN_MISMATCH() \
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 148) do { \
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 149) if (strict) \
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 150) goto no_match; \
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 151) block_idx++; \
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 152) goto startover; \
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 153) } while(0)
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 154)
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 155) #define end_of_data() unlikely(block_idx >= block_len && !GET_NEXT_BLOCK())
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 156)
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 157) if (end_of_data())
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 158) goto no_match;
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 159)
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 160) strict = fsm->tokens[0].recur != TS_FSM_HEAD_IGNORE;
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 161)
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 162) startover:
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 163) match_start = consumed + block_idx;
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 164)
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 165) for (tok_idx = 0; tok_idx < fsm->ntokens; tok_idx++) {
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 166) cur = &fsm->tokens[tok_idx];
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 167)
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 168) if (likely(tok_idx < (fsm->ntokens - 1)))
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 169) next = &fsm->tokens[tok_idx + 1];
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 170) else
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 171) next = NULL;
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 172)
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 173) switch (cur->recur) {
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 174) case TS_FSM_SINGLE:
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 175) if (end_of_data())
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 176) goto no_match;
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 177)
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 178) if (!match_token(cur, data[block_idx]))
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 179) TOKEN_MISMATCH();
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 180) break;
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 181)
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 182) case TS_FSM_PERHAPS:
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 183) if (end_of_data() ||
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 184) !match_token(cur, data[block_idx]))
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 185) continue;
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 186) break;
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 187)
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 188) case TS_FSM_MULTI:
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 189) if (end_of_data())
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 190) goto no_match;
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 191)
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 192) if (!match_token(cur, data[block_idx]))
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 193) TOKEN_MISMATCH();
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 194)
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 195) block_idx++;
4c1ca831adb10 (Nick Desaulniers 2020-11-15 20:35:31 -0800 196) fallthrough;
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 197)
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 198) case TS_FSM_ANY:
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 199) if (next == NULL)
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 200) goto found_match;
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 201)
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 202) if (end_of_data())
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 203) continue;
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 204)
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 205) while (!match_token(next, data[block_idx])) {
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 206) if (!match_token(cur, data[block_idx]))
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 207) TOKEN_MISMATCH();
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 208) block_idx++;
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 209) if (end_of_data())
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 210) goto no_match;
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 211) }
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 212) continue;
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 213)
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 214) /*
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 215) * Optimization: Prefer small local loop over jumping
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 216) * back and forth until garbage at head is munched.
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 217) */
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 218) case TS_FSM_HEAD_IGNORE:
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 219) if (end_of_data())
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 220) continue;
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 221)
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 222) while (!match_token(next, data[block_idx])) {
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 223) /*
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 224) * Special case, don't start over upon
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 225) * a mismatch, give the user the
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 226) * chance to specify the type of data
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 227) * allowed to be ignored.
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 228) */
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 229) if (!match_token(cur, data[block_idx]))
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 230) goto no_match;
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 231)
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 232) block_idx++;
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 233) if (end_of_data())
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 234) goto no_match;
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 235) }
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 236)
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 237) match_start = consumed + block_idx;
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 238) continue;
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 239) }
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 240)
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 241) block_idx++;
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 242) }
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 243)
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 244) if (end_of_data())
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 245) goto found_match;
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 246)
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 247) no_match:
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 248) return UINT_MAX;
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 249)
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 250) found_match:
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 251) state->offset = consumed + block_idx;
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 252) return match_start;
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 253) }
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 254)
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 255) static struct ts_config *fsm_init(const void *pattern, unsigned int len,
43138833ee9af (Joonwoo Park 2008-07-08 02:38:27 -0700 256) gfp_t gfp_mask, int flags)
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 257) {
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 258) int i, err = -EINVAL;
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 259) struct ts_config *conf;
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 260) struct ts_fsm *fsm;
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 261) struct ts_fsm_token *tokens = (struct ts_fsm_token *) pattern;
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 262) unsigned int ntokens = len / sizeof(*tokens);
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 263) size_t priv_size = sizeof(*fsm) + len;
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 264)
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 265) if (len % sizeof(struct ts_fsm_token) || ntokens < 1)
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 266) goto errout;
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 267)
43138833ee9af (Joonwoo Park 2008-07-08 02:38:27 -0700 268) if (flags & TS_IGNORECASE)
43138833ee9af (Joonwoo Park 2008-07-08 02:38:27 -0700 269) goto errout;
43138833ee9af (Joonwoo Park 2008-07-08 02:38:27 -0700 270)
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 271) for (i = 0; i < ntokens; i++) {
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 272) struct ts_fsm_token *t = &tokens[i];
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 273)
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 274) if (t->type > TS_FSM_TYPE_MAX || t->recur > TS_FSM_RECUR_MAX)
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 275) goto errout;
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 276)
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 277) if (t->recur == TS_FSM_HEAD_IGNORE &&
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 278) (i != 0 || i == (ntokens - 1)))
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 279) goto errout;
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 280) }
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 281)
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 282) conf = alloc_ts_config(priv_size, gfp_mask);
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 283) if (IS_ERR(conf))
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 284) return conf;
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 285)
43138833ee9af (Joonwoo Park 2008-07-08 02:38:27 -0700 286) conf->flags = flags;
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 287) fsm = ts_config_priv(conf);
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 288) fsm->ntokens = ntokens;
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 289) memcpy(fsm->tokens, pattern, len);
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 290)
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 291) for (i = 0; i < fsm->ntokens; i++) {
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 292) struct ts_fsm_token *t = &fsm->tokens[i];
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 293) t->type = token_map[t->type];
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 294) }
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 295)
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 296) return conf;
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 297)
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 298) errout:
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 299) return ERR_PTR(err);
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 300) }
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 301)
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 302) static void *fsm_get_pattern(struct ts_config *conf)
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 303) {
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 304) struct ts_fsm *fsm = ts_config_priv(conf);
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 305) return fsm->tokens;
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 306) }
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 307)
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 308) static unsigned int fsm_get_pattern_len(struct ts_config *conf)
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 309) {
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 310) struct ts_fsm *fsm = ts_config_priv(conf);
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 311) return fsm->ntokens * sizeof(struct ts_fsm_token);
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 312) }
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 313)
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 314) static struct ts_ops fsm_ops = {
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 315) .name = "fsm",
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 316) .find = fsm_find,
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 317) .init = fsm_init,
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 318) .get_pattern = fsm_get_pattern,
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 319) .get_pattern_len = fsm_get_pattern_len,
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 320) .owner = THIS_MODULE,
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 321) .list = LIST_HEAD_INIT(fsm_ops.list)
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 322) };
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 323)
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 324) static int __init init_fsm(void)
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 325) {
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 326) return textsearch_register(&fsm_ops);
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 327) }
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 328)
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 329) static void __exit exit_fsm(void)
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 330) {
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 331) textsearch_unregister(&fsm_ops);
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 332) }
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 333)
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 334) MODULE_LICENSE("GPL");
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 335)
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 336) module_init(init_fsm);
6408f79cce401 (Thomas Graf 2005-06-23 20:59:16 -0700 337) module_exit(exit_fsm);