regexp.c revision 6e7c7370
1#include <linux/init.h> 2#include <linux/module.h> 3#include <linux/types.h> 4#include <linux/mm.h> 5#include <linux/slab.h> 6//#include "regexp.h" 7 8typedef enum{CHAR, DOT, BEGIN, END, STAR, PLUS, QUES, LIST, TYPENUM}TYPE; 9 10typedef struct RE{ 11 TYPE type; 12 int ch; 13 char *ccl; 14 int nccl; 15 struct RE *next; 16}RE; 17 18int match_longest = 0; 19char *match_first = NULL; 20 21 22static void * getmem(size_t size) 23{ 24 void *tmp; 25 if((tmp = kmalloc(size, GFP_ATOMIC))==NULL) 26 { 27 printk("malloc failed"); 28 return NULL; 29 } 30 return tmp; 31} 32 33static size_t creat_list(char *str, int start, int end) 34{ 35 size_t cnt = end - start + 1; 36 for(; start <= end ;start++) 37 *str++ = start; 38 return (cnt > 0)?cnt:0; 39} 40 41static int in_list(char ch, RE *regexp) 42{ 43 char *str = regexp->ccl; 44 if(regexp->type != LIST) 45 return 0; 46 for(; *str && ch != *str; str++) 47 ; 48 return (*str != '\0') ^ regexp->nccl; 49} 50 51static void regexp_free(RE *regexp) 52{ 53 RE *tmp; 54 for(; regexp; regexp = tmp) 55 { 56 tmp = regexp->next; 57 kfree(regexp); 58 } 59} 60 61static RE* compile(char *regexp) 62{ 63 RE head, *tail, *tmp; 64 char *pstr; 65 int err_flag = 0; 66 67 for(tail = &head; *regexp != '\0' && err_flag == 0; regexp++) 68 { 69 tmp = getmem(sizeof(RE)); 70 switch(*regexp){ 71 case '\\': 72 regexp++; 73 if(*regexp == 'd') 74 { 75 tmp->type = LIST; 76 tmp->nccl = 0; 77 tmp->ccl = getmem(11); 78 creat_list(tmp->ccl, '0','9'); 79 tmp->ccl[11] = '\0'; 80 }else if(*regexp == 'D') 81 { 82 tmp->type = LIST; 83 tmp->nccl = 1; 84 tmp->ccl = getmem(11); 85 creat_list(tmp->ccl, '0','9'); 86 tmp->ccl[11] = '\0'; 87 }else 88 { 89 tmp->type = CHAR; 90 tmp->ch = *(regexp + 1); 91 } 92 break; 93 case '.': 94 tmp->type = DOT; 95 break; 96 case '^': 97 tmp->type = BEGIN; 98 tmp->ch = '^'; 99 break; 100 case '$': 101 tmp->type = END; 102 tmp->ch = '$'; 103 break; 104 case '*': 105 tmp->type = STAR; 106 break; 107 case '+': 108 tmp->type = PLUS; 109 break; 110 case '?': 111 tmp->type = QUES; 112 break; 113 case '[': 114 pstr = tmp->ccl = getmem(256); 115 tmp->nccl = 0; 116 if(*++regexp == '^') 117 { 118 tmp->nccl = 1; 119 regexp++; 120 } 121 while(*regexp != '\0' && *regexp != ']') 122 { 123 if(*regexp != '-') 124 { 125 *pstr++ = *regexp++; 126 continue; 127 } 128 if(pstr == tmp->ccl || *(regexp + 1) == ']') 129 { 130 err_flag = 1; 131 break; 132 } 133 pstr += creat_list(pstr, *(regexp - 1) + 1, *(regexp + 1)); 134 regexp += 2; 135 } 136 *pstr = '\0'; 137 if(*regexp == '\0') 138 err_flag = 1; 139 tmp->type = LIST; 140 break; 141 default: 142 tmp->type = CHAR; 143 tmp->ch = *regexp; 144 } 145 146 tail->next = tmp; 147 tail = tmp; 148 } 149 150 tail->next = NULL; 151 if(err_flag) 152 { 153 regexp_free(head.next); 154 return NULL; 155 } 156 return head.next; 157} 158 159#define MATCH_ONE(reg, text) \ 160 (reg->type == DOT || in_list(*text, reg) || *text == reg->ch) 161#define MATCH_ONE_P(reg, text) \ 162 (in_list(*text++, reg) || *(text - 1) == reg->ch || reg->type == DOT) 163 164static int matchhere(RE *regexp, char *text); 165 166static int matchstar(RE *cur, RE *regexp, char *text) 167{ 168 do{ 169 if(matchhere(regexp, text)) 170 return 1; 171 }while(*text != '\0' && MATCH_ONE_P(cur, text)); 172 return 0; 173} 174 175static int matchstar_l(RE *cur, RE *regexp, char *text) 176{ 177 char *t; 178 for(t = text; *t != '\0' && MATCH_ONE(cur, t); t++) 179 ; 180 do{ 181 if(matchhere(regexp, t)) 182 return 1; 183 }while(t-- > text); 184 return 0; 185} 186 187static int matchplus(RE *cur, RE *regexp, char *text) 188{ 189 while(*text != '\0' && MATCH_ONE_P(cur, text)) 190 { 191 if(matchhere(regexp, text)) 192 return 1; 193 } 194 return 0; 195} 196 197static int matchplus_l(RE *cur, RE *regexp, char *text) 198{ 199 char *t; 200 for(t = text; *t != '\0' && MATCH_ONE(cur, t); t++) 201 ; 202 for(; t > text; t--) 203 { 204 if(matchhere(regexp, t)) 205 return 1; 206 } 207 return 0; 208} 209 210static int matchques(RE *cur, RE *regexp, char *text) 211{ 212 int cnt = 1; 213 char *t = text; 214 if(*t != '\0' && cnt-- && MATCH_ONE(cur, t)) 215 t++; 216 do{ 217 if(matchhere(regexp, t)) 218 return 1; 219 }while(t-- > text); 220 return 0; 221} 222 223static int (*matchfun[TYPENUM][2])(RE *, RE *, char *) = { 224 0, 0, 0, 0, 0, 0, 0, 0, 225 matchstar, matchstar_l, 226 matchplus, matchplus_l, 227 matchques, matchques, 228}; 229 230static int matchhere(RE *regexp, char *text) 231{ 232 if(regexp == NULL) 233 return 1; 234 if(regexp->type == END && regexp->next == NULL) 235 return *text == '\0'; 236 if(regexp->next && matchfun[regexp->next->type][match_longest]) 237 return matchfun[regexp->next->type][match_longest](regexp, regexp->next->next, text); 238 239 if(*text != '\0' && MATCH_ONE(regexp, text)) 240 return matchhere(regexp->next, text + 1); 241 return 0; 242} 243 244/* 245 * return value: 246 * -1 error 247 * 0 not match 248 * 1 matched 249 */ 250int regexp_match(char *reg, char *text) 251{ 252 int ret; 253 RE *regexp = compile(reg); 254 if(regexp == NULL) 255 return -1; 256 257 if(regexp->type == BEGIN) 258 { 259 ret = matchhere(regexp->next, text); 260 goto out; 261 } 262 263 do{ 264 if(ret = matchhere(regexp, text)) 265 { 266 goto out; 267 } 268 }while(*text++ != '\0'); 269 270out: 271 regexp_free(regexp); 272 return ret; 273} 274 275 276void TEST_reg_func(char *reg, char * str, int ret) 277{ 278 279 if (ret != regexp_match(reg, str)) { 280 if (reg) 281 printk("reg = %s,", reg); 282 else 283 printk("reg = null"); 284 if (str) 285 printk("str = %s ", str); 286 else 287 printk("str= null"); 288 printk("error, unit test.... failed, ret = %d\n",ret); 289 } 290 else { 291 if (reg && str) 292 printk("[unit test] %s %s......ok,ret = %d\n", reg, str, ret); 293 } 294} 295 296void TEST_regexp(void) 297{ 298 TEST_reg_func(".*baidu.com$", "www.baidu.com", 1); 299 TEST_reg_func("^sina.com", "www.sina.com.cn", 0); 300 TEST_reg_func("^sina.com", "sina.com.cn", 1); 301 TEST_reg_func(".*baidu.com$", "www.baidu.com223", 0); 302} 303