regexp.c revision 6e7c7370
1#include <linux/init.h>
2#include <linux/module.h>
3#include <linux/types.h>
4#include <linux/mm.h>
5#include <linux/slab.h>
6//#include "regexp.h"
7
8typedef enum{CHAR, DOT, BEGIN, END, STAR, PLUS, QUES, LIST, TYPENUM}TYPE;
9
10typedef struct RE{
11	TYPE type;
12	int ch;
13	char *ccl;
14	int nccl;
15	struct RE *next;
16}RE;
17
18int match_longest = 0;
19char *match_first = NULL;
20
21
22static void * getmem(size_t size)
23{
24	void *tmp;
25	if((tmp = kmalloc(size, GFP_ATOMIC))==NULL)
26	{
27		printk("malloc failed");
28		return NULL;
29	}
30	return tmp;
31}
32
33static size_t creat_list(char *str, int start, int end)
34{
35	size_t cnt = end - start + 1;
36	for(; start <= end ;start++)
37		*str++ = start;
38	return (cnt > 0)?cnt:0;
39}
40
41static int in_list(char ch, RE *regexp)
42{
43	char *str = regexp->ccl;
44	if(regexp->type != LIST)
45		return 0;
46	for(; *str && ch != *str; str++)
47		;
48	return (*str != '\0') ^ regexp->nccl;
49}
50
51static void regexp_free(RE *regexp)
52{
53	RE *tmp;
54	for(; regexp; regexp = tmp)
55	{
56		tmp = regexp->next;
57		kfree(regexp);
58	}
59}
60
61static RE* compile(char *regexp)
62{
63	RE head, *tail, *tmp;
64	char *pstr;
65	int err_flag = 0;
66
67	for(tail = &head; *regexp != '\0' && err_flag == 0; regexp++)
68	{
69		tmp = getmem(sizeof(RE));
70		switch(*regexp){
71			case '\\':
72				regexp++;
73				if(*regexp == 'd')
74				{
75					tmp->type = LIST;
76					tmp->nccl = 0;
77					tmp->ccl = getmem(11);
78					creat_list(tmp->ccl, '0','9');
79					tmp->ccl[11] = '\0';
80				}else if(*regexp == 'D')
81				{
82					tmp->type = LIST;
83					tmp->nccl = 1;
84					tmp->ccl = getmem(11);
85					creat_list(tmp->ccl, '0','9');
86					tmp->ccl[11] = '\0';
87				}else
88				{
89					tmp->type = CHAR;
90					tmp->ch = *(regexp + 1);
91				}
92				break;
93			case '.':
94				tmp->type = DOT;
95				break;
96			case '^':
97				tmp->type = BEGIN;
98				tmp->ch = '^';
99				break;
100			case '$':
101				tmp->type = END;
102				tmp->ch = '$';
103				break;
104			case '*':
105				tmp->type = STAR;
106				break;
107			case '+':
108				tmp->type = PLUS;
109				break;
110			case '?':
111				tmp->type = QUES;
112				break;
113			case '[':
114				pstr = tmp->ccl = getmem(256);
115				tmp->nccl = 0;
116				if(*++regexp == '^')
117				{
118					tmp->nccl = 1;
119					regexp++;
120				}
121				while(*regexp != '\0' && *regexp != ']')
122				{
123					if(*regexp != '-')
124					{
125						*pstr++ = *regexp++;
126						continue;
127					}
128					if(pstr == tmp->ccl || *(regexp + 1) == ']')
129					{
130						err_flag = 1;
131						break;
132					}
133					pstr += creat_list(pstr, *(regexp - 1) + 1, *(regexp + 1));
134					regexp += 2;
135				}
136				*pstr = '\0';
137				if(*regexp == '\0')
138					err_flag = 1;
139				tmp->type = LIST;
140				break;
141			default:
142				tmp->type = CHAR;
143				tmp->ch = *regexp;
144		}
145
146		tail->next = tmp;
147		tail = tmp;
148	}
149
150	tail->next = NULL;
151	if(err_flag)
152	{
153		regexp_free(head.next);
154		return NULL;
155	}
156	return head.next;
157}
158
159#define MATCH_ONE(reg, text) \
160   	(reg->type == DOT || in_list(*text, reg) || *text == reg->ch)
161#define MATCH_ONE_P(reg, text) \
162   	(in_list(*text++, reg) || *(text - 1) == reg->ch || reg->type == DOT)
163
164static int matchhere(RE *regexp, char *text);
165
166static int matchstar(RE *cur, RE *regexp, char *text)
167{
168	do{
169		if(matchhere(regexp, text))
170			return 1;
171	}while(*text != '\0' && MATCH_ONE_P(cur, text));
172	return 0;
173}
174
175static int matchstar_l(RE *cur, RE *regexp, char *text)
176{
177	char *t;
178	for(t = text; *t != '\0' && MATCH_ONE(cur, t); t++)
179		;
180	do{
181		if(matchhere(regexp, t))
182			return 1;
183	}while(t-- > text);
184	return 0;
185}
186
187static int matchplus(RE *cur, RE *regexp, char *text)
188{
189	while(*text != '\0' && MATCH_ONE_P(cur, text))
190	{
191		if(matchhere(regexp, text))
192			return 1;
193	}
194	return 0;
195}
196
197static int matchplus_l(RE *cur, RE *regexp, char *text)
198{
199	char *t;
200	for(t = text; *t != '\0' && MATCH_ONE(cur, t); t++)
201		;
202	for(; t > text; t--)
203	{
204		if(matchhere(regexp, t))
205			return 1;
206	}
207	return 0;
208}
209
210static int matchques(RE *cur, RE *regexp, char *text)
211{
212	int cnt = 1;
213	char *t = text;
214	if(*t != '\0' && cnt-- && MATCH_ONE(cur, t))
215		t++;
216	do{
217		if(matchhere(regexp, t))
218			return 1;
219	}while(t-- > text);
220	return 0;
221}
222
223static int (*matchfun[TYPENUM][2])(RE *, RE *, char *) = {
224	0, 0, 0, 0, 0, 0, 0, 0,
225	matchstar, matchstar_l,
226	matchplus, matchplus_l,
227	matchques, matchques,
228};
229
230static int matchhere(RE *regexp, char *text)
231{
232	if(regexp == NULL)
233		return 1;
234	if(regexp->type == END && regexp->next == NULL)
235		return *text == '\0';
236	if(regexp->next && matchfun[regexp->next->type][match_longest])
237		return matchfun[regexp->next->type][match_longest](regexp, regexp->next->next, text);
238
239	if(*text != '\0' && MATCH_ONE(regexp, text))
240		return matchhere(regexp->next, text + 1);
241	return 0;
242}
243
244/*
245 * return value:
246 *		-1		error
247 *		0		not match
248 *		1		matched
249 */
250int regexp_match(char *reg, char *text)
251{
252	int ret;
253	RE *regexp = compile(reg);
254	if(regexp == NULL)
255		return -1;
256
257	if(regexp->type == BEGIN)
258	{
259		ret = matchhere(regexp->next, text);
260		goto out;
261	}
262
263	do{
264		if(ret = matchhere(regexp, text))
265		{
266			goto out;
267		}
268	}while(*text++ != '\0');
269
270out:
271	regexp_free(regexp);
272	return ret;
273}
274
275
276void TEST_reg_func(char *reg, char * str, int ret)
277{
278
279	if (ret != regexp_match(reg, str)) {
280		if (reg)
281			printk("reg = %s,", reg);
282		else
283			printk("reg = null");
284		if (str)
285			printk("str = %s ", str);
286		else
287			printk("str= null");
288		printk("error, unit test.... failed, ret = %d\n",ret);
289	}
290	else {
291		if (reg && str)
292			printk("[unit test] %s %s......ok,ret = %d\n", reg, str, ret);
293	}
294}
295
296void TEST_regexp(void)
297{
298	TEST_reg_func(".*baidu.com$", "www.baidu.com", 1);
299	TEST_reg_func("^sina.com", "www.sina.com.cn", 0);
300	TEST_reg_func("^sina.com", "sina.com.cn", 1);
301	TEST_reg_func(".*baidu.com$", "www.baidu.com223", 0);
302}
303