Advertisement
Guest User

Untitled

a guest
Aug 24th, 2013
88
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Diff 6.13 KB | None | 0 0
  1. --- dfa.c.back  2013-08-05 16:53:47.824514544 +0400
  2. +++ dfa.c       2013-08-25 04:23:18.783885918 +0400
  3. @@ -1992,6 +1992,115 @@
  4.        s->elems[i] = s->elems[i + 1];
  5.  }
  6.  
  7. +/* merge implementation of sorted sets, then number of sets smore than 2
  8. +   priority queue aka 'heap' data structure used to select maximum of
  9. +   many elements */
  10. +
  11. +typedef struct {
  12. +  position p;
  13. +  size_t set_index; /* remember which set this element was taken from */
  14. +} heap_elem;
  15. +
  16. +
  17. +static void
  18. +heap_add (heap_elem *heap, size_t *heap_size, heap_elem e)
  19. +{
  20. +  size_t i = *heap_size;
  21. +  while (i) {
  22. +    size_t parent = (i-1) / 2;
  23. +    if (e.p.index < heap[parent].p.index)
  24. +      break;
  25. +
  26. +    heap[i] = heap[parent];
  27. +    i = parent;
  28. +  }
  29. +  heap[i] = e;
  30. +  *heap_size += 1;
  31. +}
  32. +
  33. +static void
  34. +heap_remove_top_and_add (heap_elem *heap, size_t heap_size, heap_elem e)
  35. +{
  36. +  size_t i = 0;
  37. +  size_t lchild  = 2*i + 1;
  38. +  size_t n = heap_size;
  39. +  while (lchild < n) {
  40. +    size_t rchild = lchild + 1;
  41. +    size_t nextchild = lchild;
  42. +    if (rchild < n && heap[lchild].p.index < heap[rchild].p.index)
  43. +      nextchild = rchild;
  44. +    if (e.p.index >= heap[nextchild].p.index)
  45. +      break;
  46. +    heap[i] = heap[nextchild];
  47. +    i = nextchild;
  48. +    lchild = 2*i + 1;
  49. +  }
  50. +  heap[i] = e;
  51. +}
  52. +
  53. +
  54. +static void
  55. +heap_remove_top (heap_elem *heap, size_t *heap_size)
  56. +{
  57. +  *heap_size -= 1;
  58. +  if (*heap_size == 0)
  59. +    return;
  60. +  heap_remove_top_and_add(heap, *heap_size, heap[*heap_size]);
  61. +}
  62. +
  63. +
  64. +static void
  65. +merge_many_sets (position_set *sets, size_t n, position_set *result)
  66. +{
  67. +  //TODO: special cases for n == 1, and n == 2
  68. +  //heap_elem *heap = (heap_elem*)malloc(n * sizeof(heap_elem));
  69. +  size_t heap_size = 0;
  70. +  heap_elem *heap = XNMALLOC(n, heap_elem);
  71. +  size_t *ids = XNMALLOC(n, size_t);
  72. +  size_t sum_of_sizes = 0;
  73. +  size_t i;
  74. +
  75. +  for (i = 0; i < n; ++i) {
  76. +    sum_of_sizes += sets[i].nelem;
  77. +    /* special value for "end of set", to reduce
  78. +       dependent memory reads */
  79. +    ids[i] = ~(size_t)0;
  80. +    if (sets[i].nelem) {
  81. +      heap_elem e = { sets[i].elems[0], i };
  82. +      heap_add(heap, &heap_size, e);
  83. +      if (sets[i].nelem > 1)
  84. +        ids[i] = 1;
  85. +    }
  86. +  }
  87. +
  88. +  position_set r;
  89. +  REALLOC_IF_NECESSARY(r.elems, r.alloc, sum_of_sizes);
  90. +  r.nelem = 0;
  91. +
  92. +  while (heap_size) {
  93. +    heap_elem e = heap[0];
  94. +    if (r.nelem && r.elems[r.nelem-1].index == e.p.index)
  95. +      r.elems[r.nelem-1].constraint |= e.p.constraint;
  96. +    else {
  97. +      r.elems[r.nelem++] = e.p;
  98. +    }
  99. +    i = e.set_index;
  100. +    if (ids[i] != ~(size_t)0) {
  101. +      heap_elem e2 = { sets[i].elems[ids[i]], i };
  102. +      heap_remove_top_and_add(heap, heap_size, e2);
  103. +      ++ids[i];
  104. +      if (ids[i] == sets[i].nelem)
  105. +        ids[i] = ~(size_t)0;
  106. +    }
  107. +    else {
  108. +      heap_remove_top(heap, &heap_size);
  109. +    }
  110. +  }
  111. +  *result = r;
  112. +  free(ids);
  113. +  free(heap);
  114. +}
  115. +
  116.  /* Find the index of the state corresponding to the given position set with
  117.     the given preceding context, or create a new state if there is no such
  118.     state.  Context tells whether we got here on a newline or letter. */
  119. @@ -2482,7 +2591,7 @@
  120.    charclass leftovers;          /* Stuff in the label that didn't match. */
  121.    int leftoversf;               /* True if leftovers is nonempty. */
  122.    position_set follows;         /* Union of the follows of some group. */
  123. -  position_set tmp;             /* Temporary space for merging sets. */
  124. +  position_set follows_tmp;             /* Temporary space for merging sets. */
  125.    int possible_contexts;        /* Contexts that this group can match. */
  126.    int separate_contexts;        /* Context that new state wants to know. */
  127.    state_num state;              /* New state. */
  128. @@ -2607,7 +2716,11 @@
  129.      }
  130.  
  131.    alloc_position_set (&follows, d->nleaves);
  132. -  alloc_position_set (&tmp, d->nleaves);
  133. +  alloc_position_set (&follows_tmp, d->nleaves);
  134. +
  135. +  position_set *sets = 0;
  136. +  size_t sets_n = 0, sets_alloc = 0;
  137. +
  138.  
  139.    /* If we are a searching matcher, the default transition is to a state
  140.       containing the positions of state 0, otherwise the default transition
  141. @@ -2637,13 +2750,12 @@
  142.  
  143.    for (i = 0; i < ngrps; ++i)
  144.      {
  145. -      follows.nelem = 0;
  146. -
  147. -      /* Find the union of the follows of the positions of the group.
  148. -         This is a hideously inefficient loop.  Fix it someday. */
  149. +      /* Find the union of the follows of the positions of the group.  */
  150. +      sets_n = grps[i].nelem;
  151. +      REALLOC_IF_NECESSARY(sets, sets_alloc, sets_n);
  152.        for (j = 0; j < grps[i].nelem; ++j)
  153. -        for (k = 0; k < d->follows[grps[i].elems[j]].nelem; ++k)
  154. -          insert (d->follows[grps[i].elems[j]].elems[k], &follows);
  155. +        sets[j] = d->follows[grps[i].elems[j]];
  156. +      merge_many_sets(sets, sets_n, &follows_tmp);
  157.  
  158.        if (d->mb_cur_max > 1)
  159.          {
  160. @@ -2666,9 +2778,9 @@
  161.               <mb A>, so we cannot add state[0].  */
  162.  
  163.            next_isnt_1st_byte = 0;
  164. -          for (j = 0; j < follows.nelem; ++j)
  165. +          for (j = 0; j < follows_tmp.nelem; ++j)
  166.              {
  167. -              if (!(d->multibyte_prop[follows.elems[j].index] & 1))
  168. +              if (!(d->multibyte_prop[follows_tmp.elems[j].index] & 1))
  169.                  {
  170.                    next_isnt_1st_byte = 1;
  171.                    break;
  172. @@ -2679,11 +2791,8 @@
  173.        /* If we are building a searching matcher, throw in the positions
  174.           of state 0 as well. */
  175.        if (d->searchflag
  176. -          && (!MBS_SUPPORT || (d->mb_cur_max == 1 || !next_isnt_1st_byte))) {
  177. -        for (j = 0; j < d->states[0].elems.nelem; ++j) {
  178. -          insert(d->states[0].elems.elems[j], &follows);
  179. -        }
  180. -      }
  181. +          && (!MBS_SUPPORT || (d->mb_cur_max == 1 || !next_isnt_1st_byte)))
  182. +        merge(&follows_tmp, &d->states[0].elems, &follows);
  183.  
  184.        /* Find out if the new state will want any context information. */
  185.        possible_contexts = charclass_context (labels[i]);
  186. @@ -2722,7 +2831,8 @@
  187.    for (i = 0; i < ngrps; ++i)
  188.      free (grps[i].elems);
  189.    free (follows.elems);
  190. -  free (tmp.elems);
  191. +  free (sets);
  192. +  free (follows_tmp.elems);
  193.    free (grps);
  194.    free (labels);
  195.  }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement