Guest User

Untitled

a guest
Aug 24th, 2013
42
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. --- dfa.c.back  2013-08-05 16:53:47.824514544 +0400
  2. +++ dfa.c       2013-08-25 04:23:18.783885918 +0400
  3. @@ -1992,6 +1992,115 @@
  4.        s->elems[i] = s->elems[i + 1];
  5.  }
  6.  
  7. +/* merge implementation of sorted sets, then number of sets smore than 2
  8. +   priority queue aka 'heap' data structure used to select maximum of
  9. +   many elements */
  10. +
  11. +typedef struct {
  12. +  position p;
  13. +  size_t set_index; /* remember which set this element was taken from */
  14. +} heap_elem;
  15. +
  16. +
  17. +static void
  18. +heap_add (heap_elem *heap, size_t *heap_size, heap_elem e)
  19. +{
  20. +  size_t i = *heap_size;
  21. +  while (i) {
  22. +    size_t parent = (i-1) / 2;
  23. +    if (e.p.index < heap[parent].p.index)
  24. +      break;
  25. +
  26. +    heap[i] = heap[parent];
  27. +    i = parent;
  28. +  }
  29. +  heap[i] = e;
  30. +  *heap_size += 1;
  31. +}
  32. +
  33. +static void
  34. +heap_remove_top_and_add (heap_elem *heap, size_t heap_size, heap_elem e)
  35. +{
  36. +  size_t i = 0;
  37. +  size_t lchild  = 2*i + 1;
  38. +  size_t n = heap_size;
  39. +  while (lchild < n) {
  40. +    size_t rchild = lchild + 1;
  41. +    size_t nextchild = lchild;
  42. +    if (rchild < n && heap[lchild].p.index < heap[rchild].p.index)
  43. +      nextchild = rchild;
  44. +    if (e.p.index >= heap[nextchild].p.index)
  45. +      break;
  46. +    heap[i] = heap[nextchild];
  47. +    i = nextchild;
  48. +    lchild = 2*i + 1;
  49. +  }
  50. +  heap[i] = e;
  51. +}
  52. +
  53. +
  54. +static void
  55. +heap_remove_top (heap_elem *heap, size_t *heap_size)
  56. +{
  57. +  *heap_size -= 1;
  58. +  if (*heap_size == 0)
  59. +    return;
  60. +  heap_remove_top_and_add(heap, *heap_size, heap[*heap_size]);
  61. +}
  62. +
  63. +
  64. +static void
  65. +merge_many_sets (position_set *sets, size_t n, position_set *result)
  66. +{
  67. +  //TODO: special cases for n == 1, and n == 2
  68. +  //heap_elem *heap = (heap_elem*)malloc(n * sizeof(heap_elem));
  69. +  size_t heap_size = 0;
  70. +  heap_elem *heap = XNMALLOC(n, heap_elem);
  71. +  size_t *ids = XNMALLOC(n, size_t);
  72. +  size_t sum_of_sizes = 0;
  73. +  size_t i;
  74. +
  75. +  for (i = 0; i < n; ++i) {
  76. +    sum_of_sizes += sets[i].nelem;
  77. +    /* special value for "end of set", to reduce
  78. +       dependent memory reads */
  79. +    ids[i] = ~(size_t)0;
  80. +    if (sets[i].nelem) {
  81. +      heap_elem e = { sets[i].elems[0], i };
  82. +      heap_add(heap, &heap_size, e);
  83. +      if (sets[i].nelem > 1)
  84. +        ids[i] = 1;
  85. +    }
  86. +  }
  87. +
  88. +  position_set r;
  89. +  REALLOC_IF_NECESSARY(r.elems, r.alloc, sum_of_sizes);
  90. +  r.nelem = 0;
  91. +
  92. +  while (heap_size) {
  93. +    heap_elem e = heap[0];
  94. +    if (r.nelem && r.elems[r.nelem-1].index == e.p.index)
  95. +      r.elems[r.nelem-1].constraint |= e.p.constraint;
  96. +    else {
  97. +      r.elems[r.nelem++] = e.p;
  98. +    }
  99. +    i = e.set_index;
  100. +    if (ids[i] != ~(size_t)0) {
  101. +      heap_elem e2 = { sets[i].elems[ids[i]], i };
  102. +      heap_remove_top_and_add(heap, heap_size, e2);
  103. +      ++ids[i];
  104. +      if (ids[i] == sets[i].nelem)
  105. +        ids[i] = ~(size_t)0;
  106. +    }
  107. +    else {
  108. +      heap_remove_top(heap, &heap_size);
  109. +    }
  110. +  }
  111. +  *result = r;
  112. +  free(ids);
  113. +  free(heap);
  114. +}
  115. +
  116.  /* Find the index of the state corresponding to the given position set with
  117.     the given preceding context, or create a new state if there is no such
  118.     state.  Context tells whether we got here on a newline or letter. */
  119. @@ -2482,7 +2591,7 @@
  120.    charclass leftovers;          /* Stuff in the label that didn't match. */
  121.    int leftoversf;               /* True if leftovers is nonempty. */
  122.    position_set follows;         /* Union of the follows of some group. */
  123. -  position_set tmp;             /* Temporary space for merging sets. */
  124. +  position_set follows_tmp;             /* Temporary space for merging sets. */
  125.    int possible_contexts;        /* Contexts that this group can match. */
  126.    int separate_contexts;        /* Context that new state wants to know. */
  127.    state_num state;              /* New state. */
  128. @@ -2607,7 +2716,11 @@
  129.      }
  130.  
  131.    alloc_position_set (&follows, d->nleaves);
  132. -  alloc_position_set (&tmp, d->nleaves);
  133. +  alloc_position_set (&follows_tmp, d->nleaves);
  134. +
  135. +  position_set *sets = 0;
  136. +  size_t sets_n = 0, sets_alloc = 0;
  137. +
  138.  
  139.    /* If we are a searching matcher, the default transition is to a state
  140.       containing the positions of state 0, otherwise the default transition
  141. @@ -2637,13 +2750,12 @@
  142.  
  143.    for (i = 0; i < ngrps; ++i)
  144.      {
  145. -      follows.nelem = 0;
  146. -
  147. -      /* Find the union of the follows of the positions of the group.
  148. -         This is a hideously inefficient loop.  Fix it someday. */
  149. +      /* Find the union of the follows of the positions of the group.  */
  150. +      sets_n = grps[i].nelem;
  151. +      REALLOC_IF_NECESSARY(sets, sets_alloc, sets_n);
  152.        for (j = 0; j < grps[i].nelem; ++j)
  153. -        for (k = 0; k < d->follows[grps[i].elems[j]].nelem; ++k)
  154. -          insert (d->follows[grps[i].elems[j]].elems[k], &follows);
  155. +        sets[j] = d->follows[grps[i].elems[j]];
  156. +      merge_many_sets(sets, sets_n, &follows_tmp);
  157.  
  158.        if (d->mb_cur_max > 1)
  159.          {
  160. @@ -2666,9 +2778,9 @@
  161.               <mb A>, so we cannot add state[0].  */
  162.  
  163.            next_isnt_1st_byte = 0;
  164. -          for (j = 0; j < follows.nelem; ++j)
  165. +          for (j = 0; j < follows_tmp.nelem; ++j)
  166.              {
  167. -              if (!(d->multibyte_prop[follows.elems[j].index] & 1))
  168. +              if (!(d->multibyte_prop[follows_tmp.elems[j].index] & 1))
  169.                  {
  170.                    next_isnt_1st_byte = 1;
  171.                    break;
  172. @@ -2679,11 +2791,8 @@
  173.        /* If we are building a searching matcher, throw in the positions
  174.           of state 0 as well. */
  175.        if (d->searchflag
  176. -          && (!MBS_SUPPORT || (d->mb_cur_max == 1 || !next_isnt_1st_byte))) {
  177. -        for (j = 0; j < d->states[0].elems.nelem; ++j) {
  178. -          insert(d->states[0].elems.elems[j], &follows);
  179. -        }
  180. -      }
  181. +          && (!MBS_SUPPORT || (d->mb_cur_max == 1 || !next_isnt_1st_byte)))
  182. +        merge(&follows_tmp, &d->states[0].elems, &follows);
  183.  
  184.        /* Find out if the new state will want any context information. */
  185.        possible_contexts = charclass_context (labels[i]);
  186. @@ -2722,7 +2831,8 @@
  187.    for (i = 0; i < ngrps; ++i)
  188.      free (grps[i].elems);
  189.    free (follows.elems);
  190. -  free (tmp.elems);
  191. +  free (sets);
  192. +  free (follows_tmp.elems);
  193.    free (grps);
  194.    free (labels);
  195.  }
RAW Paste Data

Adblocker detected! Please consider disabling it...

We've detected AdBlock Plus or some other adblocking software preventing Pastebin.com from fully loading.

We don't have any obnoxious sound, or popup ads, we actively block these annoying types of ads!

Please add Pastebin.com to your ad blocker whitelist or disable your adblocking software.

×