1
1
/*
2
- Copyright (C) 2010, Bruce Ediger
2
+ Copyright (C) 2010-2011 , Bruce Ediger
3
3
4
4
This file is part of acl.
5
5
18
18
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
19
20
20
*/
21
- /* $Id: aho_corasick.c,v 1.3 2010/08/10 20:50:39 bediger Exp $ */
21
+ /* $Id: aho_corasick.c,v 1.8 2011/06/12 18:22:00 bediger Exp $ */
22
22
23
23
/*
24
24
* This code based on:
@@ -56,6 +56,7 @@ struct stack_elem {
56
56
struct node * n ; /* 1 */
57
57
int state_at_n ; /* 2 */
58
58
int visited ; /* 3 */
59
+ int node_number ;
59
60
};
60
61
61
62
void set_output_length (struct gto * p , int state , int node_count );
@@ -258,7 +259,6 @@ construct_failure(struct gto *g)
258
259
int i ;
259
260
struct queue * q ;
260
261
261
-
262
262
g -> failure = malloc (g -> ary_len * sizeof (int ));
263
263
264
264
for (i = 0 ; i < g -> ary_len ; ++ i )
@@ -394,6 +394,7 @@ algorithm_d(struct gto *g, struct node *t, int subject_node_count, int pat_path_
394
394
{
395
395
int top = 1 ;
396
396
int matched = 0 ;
397
+ int breadth_counter = 0 ;
397
398
int next_state ;
398
399
int i ;
399
400
int * count ;
@@ -415,23 +416,34 @@ algorithm_d(struct gto *g, struct node *t, int subject_node_count, int pat_path_
415
416
stack [top ].n = t ;
416
417
stack [top ].state_at_n = next_state ;
417
418
stack [top ].visited = 0 ;
419
+ stack [top ].node_number = breadth_counter ++ ;
418
420
419
421
matched += tabulate (g , stack , top , next_state , pat_path_cnt , count );
420
422
421
423
if (!matched )
422
424
{
423
- if (var_in_tree (t , abstr_var_name ))
424
- next_state = g -> delta [0 ][(int )'+' ];
425
- else
426
- next_state = g -> delta [0 ][(int )'-' ];
427
-
428
- matched += tabulate (g , stack , top , next_state , pat_path_cnt , count );
425
+ if (any_var_in_tree (t ))
426
+ {
427
+ if (var_in_tree (t , abstr_var_name ))
428
+ next_state = g -> delta [0 ][(int )'+' ];
429
+ else
430
+ next_state = g -> delta [0 ][(int )'-' ];
431
+ matched += tabulate (g , stack , top , next_state , pat_path_cnt , count );
432
+ } else {
433
+ next_state = g -> delta [0 ][(int )'!' ];
434
+ matched += tabulate (g , stack , top , next_state , pat_path_cnt , count );
435
+ if (!matched )
436
+ {
437
+ next_state = g -> delta [0 ][(int )'-' ];
438
+ matched += tabulate (g , stack , top , next_state , pat_path_cnt , count );
439
+ }
440
+ }
429
441
}
430
442
431
443
while (!matched && top > 0 )
432
444
{
433
445
struct node * next_node , * this_node = stack [top ].n ;
434
- int intstate , next_state , this_state = stack [top ].state_at_n ;
446
+ int intstate , nxt_st , this_state = stack [top ].state_at_n ;
435
447
int visited = stack [top ].visited ;
436
448
437
449
if (visited == 2 || this_node -> typ == ATOM || top > g -> max_node_count )
@@ -444,27 +456,35 @@ algorithm_d(struct gto *g, struct node *t, int subject_node_count, int pat_path_
444
456
matched += tabulate (g , stack , top , intstate , pat_path_cnt , count );
445
457
446
458
next_node = (visited == 1 )? this_node -> left : this_node -> right ;
447
- next_state = intstate ;
459
+ nxt_st = intstate ;
448
460
449
461
p = (next_node -> name != abstr_var_name )? next_node -> name : abstr_meta_var ;
450
462
while (* p )
451
- next_state = g -> delta [next_state ][(int )* p ++ ];
463
+ nxt_st = g -> delta [nxt_st ][(int )* p ++ ];
452
464
453
465
++ top ;
454
466
stack [top ].n = next_node ;
455
- stack [top ].state_at_n = next_state ;
467
+ stack [top ].state_at_n = nxt_st ;
456
468
stack [top ].visited = 0 ;
469
+ stack [top ].node_number = breadth_counter ++ ;
457
470
458
471
if (top <= g -> max_node_count )
459
472
{
460
- matched += tabulate (g , stack , top , next_state , pat_path_cnt , count );
461
-
462
- if (var_in_tree (next_node , abstr_var_name ))
463
- next_state = g -> delta [intstate ][(int )'+' ];
464
- else
465
- next_state = g -> delta [intstate ][(int )'-' ];
466
-
467
- matched += tabulate (g , stack , top , next_state , pat_path_cnt , count );
473
+ matched += tabulate (g , stack , top , nxt_st , pat_path_cnt , count );
474
+
475
+ if (any_var_in_tree (next_node ))
476
+ {
477
+ if (var_in_tree (next_node , abstr_var_name ))
478
+ nxt_st = g -> delta [intstate ][(int )'+' ];
479
+ else
480
+ nxt_st = g -> delta [intstate ][(int )'-' ];
481
+ } else {
482
+ nxt_st = g -> delta [intstate ][(int )'!' ];
483
+ if (0 == nxt_st )
484
+ nxt_st = g -> delta [intstate ][(int )'-' ];
485
+ }
486
+
487
+ matched += tabulate (g , stack , top , nxt_st , pat_path_cnt , count );
468
488
}
469
489
}
470
490
}
@@ -494,11 +514,11 @@ tabulate(struct gto *g, struct stack_elem *stack, int top, int state, int pat_le
494
514
495
515
for (i = 0 ; r == 0 && i < oxt -> len ; ++ i )
496
516
{
497
- struct node * n = stack [ top - oxt -> out [i ] + 1 ]. n ;
517
+ int idx = top - oxt -> out [i ] + 1 ;
498
518
499
- ++ count [n -> node_number ];
519
+ ++ count [stack [ idx ]. node_number ];
500
520
501
- if (count [n -> node_number ] == pat_leaf_count )
521
+ if (count [stack [ idx ]. node_number ] == pat_leaf_count )
502
522
r = 1 ;
503
523
}
504
524
0 commit comments