Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- --- dfa.c.back 2013-08-05 16:53:47.824514544 +0400
- +++ dfa.c 2013-08-25 04:23:18.783885918 +0400
- @@ -1992,6 +1992,115 @@
- s->elems[i] = s->elems[i + 1];
- }
- +/* merge implementation of sorted sets, then number of sets smore than 2
- + priority queue aka 'heap' data structure used to select maximum of
- + many elements */
- +
- +typedef struct {
- + position p;
- + size_t set_index; /* remember which set this element was taken from */
- +} heap_elem;
- +
- +
- +static void
- +heap_add (heap_elem *heap, size_t *heap_size, heap_elem e)
- +{
- + size_t i = *heap_size;
- + while (i) {
- + size_t parent = (i-1) / 2;
- + if (e.p.index < heap[parent].p.index)
- + break;
- +
- + heap[i] = heap[parent];
- + i = parent;
- + }
- + heap[i] = e;
- + *heap_size += 1;
- +}
- +
- +static void
- +heap_remove_top_and_add (heap_elem *heap, size_t heap_size, heap_elem e)
- +{
- + size_t i = 0;
- + size_t lchild = 2*i + 1;
- + size_t n = heap_size;
- + while (lchild < n) {
- + size_t rchild = lchild + 1;
- + size_t nextchild = lchild;
- + if (rchild < n && heap[lchild].p.index < heap[rchild].p.index)
- + nextchild = rchild;
- + if (e.p.index >= heap[nextchild].p.index)
- + break;
- + heap[i] = heap[nextchild];
- + i = nextchild;
- + lchild = 2*i + 1;
- + }
- + heap[i] = e;
- +}
- +
- +
- +static void
- +heap_remove_top (heap_elem *heap, size_t *heap_size)
- +{
- + *heap_size -= 1;
- + if (*heap_size == 0)
- + return;
- + heap_remove_top_and_add(heap, *heap_size, heap[*heap_size]);
- +}
- +
- +
- +static void
- +merge_many_sets (position_set *sets, size_t n, position_set *result)
- +{
- + //TODO: special cases for n == 1, and n == 2
- + //heap_elem *heap = (heap_elem*)malloc(n * sizeof(heap_elem));
- + size_t heap_size = 0;
- + heap_elem *heap = XNMALLOC(n, heap_elem);
- + size_t *ids = XNMALLOC(n, size_t);
- + size_t sum_of_sizes = 0;
- + size_t i;
- +
- + for (i = 0; i < n; ++i) {
- + sum_of_sizes += sets[i].nelem;
- + /* special value for "end of set", to reduce
- + dependent memory reads */
- + ids[i] = ~(size_t)0;
- + if (sets[i].nelem) {
- + heap_elem e = { sets[i].elems[0], i };
- + heap_add(heap, &heap_size, e);
- + if (sets[i].nelem > 1)
- + ids[i] = 1;
- + }
- + }
- +
- + position_set r;
- + REALLOC_IF_NECESSARY(r.elems, r.alloc, sum_of_sizes);
- + r.nelem = 0;
- +
- + while (heap_size) {
- + heap_elem e = heap[0];
- + if (r.nelem && r.elems[r.nelem-1].index == e.p.index)
- + r.elems[r.nelem-1].constraint |= e.p.constraint;
- + else {
- + r.elems[r.nelem++] = e.p;
- + }
- + i = e.set_index;
- + if (ids[i] != ~(size_t)0) {
- + heap_elem e2 = { sets[i].elems[ids[i]], i };
- + heap_remove_top_and_add(heap, heap_size, e2);
- + ++ids[i];
- + if (ids[i] == sets[i].nelem)
- + ids[i] = ~(size_t)0;
- + }
- + else {
- + heap_remove_top(heap, &heap_size);
- + }
- + }
- + *result = r;
- + free(ids);
- + free(heap);
- +}
- +
- /* Find the index of the state corresponding to the given position set with
- the given preceding context, or create a new state if there is no such
- state. Context tells whether we got here on a newline or letter. */
- @@ -2482,7 +2591,7 @@
- charclass leftovers; /* Stuff in the label that didn't match. */
- int leftoversf; /* True if leftovers is nonempty. */
- position_set follows; /* Union of the follows of some group. */
- - position_set tmp; /* Temporary space for merging sets. */
- + position_set follows_tmp; /* Temporary space for merging sets. */
- int possible_contexts; /* Contexts that this group can match. */
- int separate_contexts; /* Context that new state wants to know. */
- state_num state; /* New state. */
- @@ -2607,7 +2716,11 @@
- }
- alloc_position_set (&follows, d->nleaves);
- - alloc_position_set (&tmp, d->nleaves);
- + alloc_position_set (&follows_tmp, d->nleaves);
- +
- + position_set *sets = 0;
- + size_t sets_n = 0, sets_alloc = 0;
- +
- /* If we are a searching matcher, the default transition is to a state
- containing the positions of state 0, otherwise the default transition
- @@ -2637,13 +2750,12 @@
- for (i = 0; i < ngrps; ++i)
- {
- - follows.nelem = 0;
- -
- - /* Find the union of the follows of the positions of the group.
- - This is a hideously inefficient loop. Fix it someday. */
- + /* Find the union of the follows of the positions of the group. */
- + sets_n = grps[i].nelem;
- + REALLOC_IF_NECESSARY(sets, sets_alloc, sets_n);
- for (j = 0; j < grps[i].nelem; ++j)
- - for (k = 0; k < d->follows[grps[i].elems[j]].nelem; ++k)
- - insert (d->follows[grps[i].elems[j]].elems[k], &follows);
- + sets[j] = d->follows[grps[i].elems[j]];
- + merge_many_sets(sets, sets_n, &follows_tmp);
- if (d->mb_cur_max > 1)
- {
- @@ -2666,9 +2778,9 @@
- <mb A>, so we cannot add state[0]. */
- next_isnt_1st_byte = 0;
- - for (j = 0; j < follows.nelem; ++j)
- + for (j = 0; j < follows_tmp.nelem; ++j)
- {
- - if (!(d->multibyte_prop[follows.elems[j].index] & 1))
- + if (!(d->multibyte_prop[follows_tmp.elems[j].index] & 1))
- {
- next_isnt_1st_byte = 1;
- break;
- @@ -2679,11 +2791,8 @@
- /* If we are building a searching matcher, throw in the positions
- of state 0 as well. */
- if (d->searchflag
- - && (!MBS_SUPPORT || (d->mb_cur_max == 1 || !next_isnt_1st_byte))) {
- - for (j = 0; j < d->states[0].elems.nelem; ++j) {
- - insert(d->states[0].elems.elems[j], &follows);
- - }
- - }
- + && (!MBS_SUPPORT || (d->mb_cur_max == 1 || !next_isnt_1st_byte)))
- + merge(&follows_tmp, &d->states[0].elems, &follows);
- /* Find out if the new state will want any context information. */
- possible_contexts = charclass_context (labels[i]);
- @@ -2722,7 +2831,8 @@
- for (i = 0; i < ngrps; ++i)
- free (grps[i].elems);
- free (follows.elems);
- - free (tmp.elems);
- + free (sets);
- + free (follows_tmp.elems);
- free (grps);
- free (labels);
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement