Advertisement
Guest User

Untitled

a guest
Jul 3rd, 2016
174
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C 17.08 KB | None | 0 0
  1. #include <stdio.h>
  2. #include <stdlib.h>
  3. #include <string.h>
  4.  
  5. /**
  6.   *  POOL ALLOCATOR
  7.   */
  8. #define POOL_CAP 1024
  9. typedef struct mem_pool
  10. {
  11.     struct mem_pool* next;
  12.     size_t pos;
  13.     char data[];
  14. } mem_pool;
  15.  
  16. /**
  17.   * Creates a new memory pool
  18.   */
  19. mem_pool*
  20. new_mem_pool ()
  21. {
  22.     mem_pool* pool = malloc(sizeof(mem_pool) + POOL_CAP);
  23.     if (pool == NULL)
  24.     {
  25.         fprintf(stderr, "failed to allocate memory pool\n");
  26.         exit(-1);
  27.     }
  28.  
  29.     pool->next = NULL;
  30.     pool->pos = 0;
  31.     return pool;
  32. }
  33.  
  34. /**
  35.   * Free memory pool (and all sub-pools)
  36.   */
  37. void
  38. free_mem_pool (mem_pool* pool)
  39. {
  40.     mem_pool* next;
  41.     while (pool != NULL)
  42.     {
  43.         next = pool->next;
  44.         free(pool);
  45.         pool = next;
  46.     }
  47. }
  48.  
  49. /**
  50.   * Allocate a block from `pool'
  51.   * Returns `NULL' if memory could not be allocated, or a
  52.   *  new pointer to atleast `siz' bytes
  53.   */
  54. void*
  55. mem_pool_alloc (mem_pool* pool, size_t siz)
  56. {
  57.     /* keep aligned to word boundary */
  58.     if (siz & 3)
  59.         siz = 1 + (siz | 3);
  60.  
  61.     /* impossible to alloc */
  62.     if (siz > POOL_CAP || pool == NULL)
  63.         return NULL;
  64.  
  65.     while (pool->pos + siz > POOL_CAP)
  66.     {
  67.         if (pool->next == NULL)
  68.             pool->next = new_mem_pool();
  69.  
  70.         pool = pool->next;
  71.     }
  72.  
  73.     void* ptr = pool->data + pool->pos;
  74.     pool->pos += siz;
  75.     return ptr;
  76. }
  77.  
  78.  
  79.  
  80. #define RE_CHAR   0   //  [c]
  81. #define RE_CHARS  1   //  [abc]
  82. #define RE_WILD   2   //  .
  83. #define RE_AND    3   //  xy
  84. #define RE_OR     4   //  x|y
  85. #define RE_MANY   5   //  x*
  86. #define RE_MORE   6   //  x+
  87. #define RE_MAYBE  7   //  x?
  88.  
  89.  
  90. #define RE_TRANS_EPS    0
  91. #define RE_TRANS_SINGLE 1
  92. #define RE_TRANS_WILD   2
  93. #define RE_TRANS_MAP    3
  94.  
  95.  
  96. /**
  97.   * Structure for AST node in the regular expression
  98.   * .kind:  What form of expression
  99.   *     RE_CHAR    <char>
  100.   *     RE_CHARS   [<chars>]
  101.   *     RE_WILD    .
  102.   *     RE_AND     (e[0] e[1])
  103.   *     RE_OR      (e[0] | e[1])
  104.   *     RE_MANY    (e[0])*
  105.   *     RE_MORE    (e[0])+
  106.   *     RE_MAYBE   (e[0])?
  107.   * .ch  which character for RE_CHAR kind
  108.   * .e   sub-expressions
  109.   */
  110. typedef struct rexpr
  111. {
  112.     int kind;
  113.     union {
  114.         char ch;
  115.         int* char_map;
  116.     };
  117.     struct rexpr* e[];
  118. } rexpr;
  119.  
  120.  
  121. #define CHAR_MAP_SIZE (256 / sizeof(int))
  122. #define char_map_idx(c)  (((unsigned char) (c)) / sizeof(int))
  123. #define char_map_mask(c) (1 << ((unsigned char) (c)) % sizeof(int))
  124.  
  125. static rexpr
  126. static_wildcard_expr =
  127. { .kind = RE_WILD };
  128.  
  129. static int char_map_space[] = {
  130.     13824,0,1,0,0,0,0,0,
  131.     0,0,0,0,0,0,0,0, };
  132. static int char_map_digit[] = {
  133.     0,0,0,1023,0,0,0,0,
  134.     0,0,0,0,0,0,0,0, };
  135. static int char_map_alpha[] = {
  136.     0,0,0,0,65534,2047,65534,2047,
  137.     0,0,0,0,0,0,0,0, };
  138. static int char_map_i_space[] = {
  139.     -13825,-1,-2,-1,-1,-1,-1,-1,
  140.     -1,-1,-1,-1,-1,-1,-1,-1, };
  141. static int char_map_i_digit[] = {
  142.     -1,-1,-1,-1024,-1,-1,-1,-1,
  143.     -1,-1,-1,-1,-1,-1,-1,-1, };
  144. static int char_map_i_alpha[] = {
  145.     -1,-1,-1,-1,-65535,-2048,-65535,-2048,
  146.     -1,-1,-1,-1,-1,-1,-1,-1, };
  147.  
  148. static int single_char_maps[CHAR_MAP_SIZE * 256] = {0,};
  149. static int* single_char_map (char c)
  150. {
  151.     int* map = single_char_maps(CHAR_MAP_SIZE * (unsigned char) c);
  152.     map[char_map_idx(c)] = char_map_mask(c);
  153.     return map;
  154. }
  155.  
  156. #if 0
  157. void generate_mask (const char* name, int invert, const char* chars)
  158. {
  159.     int mask[256 / 16];
  160.     memset(mask, invert ? -1 : 0, sizeof(mask));
  161.  
  162.     int i;
  163.     for (i = 0; chars[i]; i++)
  164.     {
  165.         unsigned char c = chars[i];
  166.         if (invert) mask[c / 16] &= ~(1 << (c % 16));
  167.         else        mask[c / 16] |=  (1 << (c % 16));
  168.     }
  169.     printf("static int %s[] = {", name);
  170.     for (i = 0; i < sizeof(mask) / sizeof(*mask); i++)
  171.     {
  172.         printf("%s%d,",
  173.             (i % 8) ? "" : "\n\t",
  174.             mask[i]);
  175.     }
  176.     printf(" };\n");
  177. }
  178. int main (void)
  179. {
  180.     generate_mask("char_map_space",   0, " \n\r\t\f");
  181.     generate_mask("char_map_digit",   0, "0123456789");
  182.     generate_mask("char_map_alpha",   0, "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ");
  183.     generate_mask("char_map_i_space", 1, " \n\r\t\f");
  184.     generate_mask("char_map_i_digit", 1, "0123456789");
  185.     generate_mask("char_map_i_alpha", 1, "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ");
  186.     return 0;
  187. }
  188. #endif
  189.  
  190.  
  191.  
  192. /* error handling */
  193. int re_error;
  194. static char re_error_string[256];
  195. #define re_errorf(...) \
  196.     re_error = 1, \
  197.     snprintf(re_error_string, sizeof(re_error_string), \
  198.             __VA_ARGS__)
  199.  
  200. /* public interface to internal error string */
  201. const char*
  202. re_get_error () { return re_error_string; }
  203.  
  204.  
  205. /* memory allocation */
  206. mem_pool* re_pool;
  207. #define re_pool_ALLOC_EXPR(sub_exprs) \
  208.     (rexpr*) mem_pool_alloc(re_pool, \
  209.                 sizeof(rexpr) + (sub_exprs) * sizeof(rexpr*))
  210. #define re_pool_ALLOC_CHARS() (char*)   mem_pool_alloc(re_pool, CHAR_MAP_SIZE)
  211.  
  212.  
  213.  
  214.  
  215. /**
  216.   *  RegExp AST PARSER
  217.   */
  218.  
  219. static rexpr*
  220. re_parse_expr (const char* rstr, const char** strp);
  221.  
  222. static rexpr*
  223. re_parse_term (const char* rstr, const char** strp);
  224.  
  225. static int
  226. re_parse_suffix (const char* rstr, const char** strp, rexpr** exprp);
  227.  
  228. static int
  229. re_parse_and_or (const char* rstr, const char** strp, rexpr** exprp);
  230.  
  231.  
  232. /**
  233.   *  Parse a full RegExp expression
  234.   *  rstr: input string
  235.   *  [optional] strp: final string position
  236.   */
  237. static rexpr*
  238. re_parse_expr (const char* rstr, const char** strp)
  239. {
  240.     rexpr* expr = re_parse_term(rstr, &rstr);
  241.     while (re_parse_and_or(rstr, &rstr, &expr))
  242.     {
  243.         if (re_error)
  244.             break;
  245.     }
  246.  
  247.     if (strp) *strp = rstr;
  248.     return expr;
  249. }
  250.  
  251. /**
  252.   * Parse a simple `term'
  253.   *  <char> or `.' or [<range>]
  254.   *  followed by `*', `+' or `?'
  255.   */
  256. static rexpr*
  257. re_parse_term (const char* rstr, const char** strp)
  258. {
  259.     char ch;
  260.     rexpr* expr;
  261.  
  262.     switch (ch = *rstr++)
  263.     {
  264.     case ')':
  265.     case '\0':
  266.         re_errorf("expression missing");
  267.         expr = NULL;
  268.         break;
  269.  
  270.     case '|':
  271.     case '*':
  272.     case '+':
  273.     case '?':
  274.         re_errorf("invalid '%c' before expression", ch);
  275.         expr = NULL;
  276.         break;
  277.  
  278.     case '(':
  279.         expr = re_parse_expr(rstr, &rstr);
  280.         if (!re_error && *rstr++ != ')')
  281.         {
  282.             re_errorf("missing closing ')'");
  283.             expr = NULL;
  284.         }
  285.         break;
  286.  
  287.     case '.':
  288.         expr = &static_wildcard_expr;
  289.         break;
  290.  
  291.     case '[':
  292.         /* TODO: char map */
  293.  
  294.     default:
  295.         expr = re_pool_ALLOC_EXPR(0);
  296.         expr->kind = RE_CHAR;
  297.         expr->ch = ch;
  298.         break;
  299.     }
  300.  
  301.     /* e* e? e+ */
  302.     while (re_parse_suffix(rstr, &rstr, &expr))
  303.     {
  304.         if (re_error)
  305.             break;
  306.     }
  307.  
  308.     if (strp) *strp = rstr;
  309.     return expr;
  310. }
  311.  
  312. /**
  313.   * Parse a suffix: `*', `+', or `?'
  314.   * Returns 0 if none more to parse
  315.   */
  316. static int
  317. re_parse_suffix (const char* rstr, const char** strp, rexpr** exprp)
  318. {
  319.     rexpr* expr;
  320.     char ch;
  321.  
  322.     switch (ch = *rstr)
  323.     {
  324.     case '*':
  325.     case '+':
  326.     case '?':
  327.         rstr++;
  328.         expr = re_pool_ALLOC_EXPR(1);
  329.         expr->kind =
  330.             ch == '*' ? RE_MANY :
  331.             ch == '+' ? RE_MORE :
  332.             /* == '?' */ RE_MAYBE;
  333.         expr->e[0] = *exprp;
  334.         break;
  335.     default:
  336.         return 0;
  337.     }
  338.  
  339.     *exprp = expr;
  340.     if (strp) *strp = rstr;
  341.     return 1;
  342. }
  343.  
  344. /**
  345.   * Parse and/or terms
  346.   *  ab  and  a|b
  347.   * Returns 0 if none more to parse
  348.   */
  349. static int
  350. re_parse_and_or (const char* rstr, const char** strp, rexpr** exprp)
  351. {
  352.     int retval = 1;
  353.     char ch;
  354.     rexpr* expr = NULL, *expr2;
  355.  
  356.     switch (ch = *rstr)
  357.     {
  358.     case '\0':
  359.     case ')':
  360.         expr = *exprp;
  361.         retval = 0;
  362.         break;
  363.  
  364.     case '|':
  365.         expr2 = re_parse_expr(rstr + 1, &rstr);
  366.         if (re_error)
  367.             break;
  368.  
  369.         expr = re_pool_ALLOC_EXPR(2);
  370.         expr->kind = RE_OR;
  371.         expr->e[0] = *exprp;
  372.         expr->e[1] = expr2;
  373.         retval = 0;
  374.         break;
  375.  
  376.     default:
  377.         expr2 = re_parse_term(rstr, &rstr);
  378.         if (re_error)
  379.             break;
  380.  
  381.         expr = re_pool_ALLOC_EXPR(2);
  382.         expr->kind = RE_AND;
  383.         expr->e[0] = *exprp;
  384.         expr->e[1] = expr2;
  385.         break;
  386.     }
  387.  
  388.     *exprp = expr;
  389.     if (strp) *strp = rstr;
  390.     return retval;
  391. }
  392.  
  393.  
  394.  
  395. /**
  396.   * State table, contains states and their transitions
  397.   * .list:  Array of states
  398.   * .siz/.cap:  Size/capacity for resizable array
  399.   */
  400. typedef struct re_st_table
  401. {
  402.     int siz, cap;
  403.     struct re_state* list;
  404. } re_st_table;
  405.  
  406. /**
  407.   * A single state, contains list of transitions
  408.   * .trans:  Array of transitions
  409.   * .siz/.cap:  Size/capacity for resizable array
  410.   */
  411. typedef struct re_state
  412. {
  413.     int siz, cap;
  414.     struct re_st_trans* trans;
  415. } re_state;
  416.  
  417. /**
  418.   * A state transition
  419.   * .kind  What kind of transition
  420.   *    RE_TRANS_EPS     epsilon transition (automatic)
  421.   *    RE_TRANS_SINGLE  single character
  422.   *    RE_TRANS_WILD    wildcard (any character)
  423.   *    RE_TRANS_MAP     character map (any of a group a chars)
  424.   * .dest  Destination state after the transition
  425.   * .ch    Which character (for RE_TRANS_SINGLE)
  426.   * .chars Character map (for RE_TRANS_MAP)
  427.   */
  428. typedef struct re_st_trans
  429. {
  430.     int kind;
  431.     int dest;
  432.     union {
  433.         char ch;
  434.         int* char_map;
  435.     };
  436. } re_st_trans;
  437.  
  438. /**
  439.   * Compiled regular expression
  440.   */
  441. typedef struct re_regexp
  442. {
  443.     re_st_table tbl;
  444.     int init_state;
  445.     int accept_state;
  446. } re_regexp;
  447.  
  448.  
  449.  
  450.  
  451. /**
  452.   * Create a new state table
  453.   */
  454. static re_st_table
  455. new_re_st_table ()
  456. {
  457.     const int CAP = 16;
  458.     re_state* list = malloc(CAP * sizeof(re_state));
  459.     return (re_st_table) {
  460.         .siz = 0,
  461.         .cap = CAP,
  462.         .list = list,
  463.     };
  464. }
  465.  
  466. /**
  467.   * Free resources from state table (and
  468.   *  all sub-states / transitions)
  469.   */
  470. static void
  471. re_st_table_free (re_st_table tbl)
  472. {
  473.     int i, j;
  474.     for (i = 0; i < tbl.siz; i++)
  475.     {
  476.         for (j = 0; j < tbl.list[i].siz; j++)
  477.         {
  478.             if (tbl.list[i].trans[j].kind == RE_TRANS_MAP)
  479.                 free(tbl.list[i].trans[j].d.chars);
  480.         }
  481.         free(tbl.list[i].trans);
  482.     }
  483.     free(tbl.list);
  484. }
  485.  
  486. /**
  487.   * Create a new state in table `tbl'
  488.   */
  489. static int
  490. new_state (re_st_table* tbl)
  491. {
  492.     if (tbl->cap <= tbl->siz)
  493.     {
  494.         tbl->cap *= 2;
  495.         tbl->list = realloc(tbl->list, tbl->cap * sizeof(re_state));
  496.     }
  497.  
  498.     const int CAP = 4;
  499.     re_st_trans* trans = malloc(CAP * sizeof(re_st_trans));
  500.  
  501.     re_state* state = &tbl->list[tbl->siz];
  502.     state->siz = 0;
  503.     state->cap = CAP;
  504.     state->trans = trans;
  505.     return tbl->siz++;
  506. }
  507.  
  508. /**
  509.   * Create a transition in `tbl' from state `st_id' to `st_dest'
  510.   *  with character `ch' (for RE_TRANS_SINGLE)
  511.   */
  512. static re_st_trans*
  513. transition (re_st_table* tbl, int st_id, int st_dest, int kind, char ch)
  514. {
  515.     re_state* state = &tbl->list[st_id];
  516.     if (state->cap <= state->siz)
  517.     {
  518.         state->cap *= 2;
  519.         state->trans = realloc(state->trans, state->cap * sizeof(re_st_trans));
  520.     }
  521.  
  522.     re_st_trans* tr = &state->trans[state->siz];
  523.     tr->kind = kind;
  524.     tr->dest = st_dest;
  525.     tr->ch = ch;
  526.     state->siz++;
  527.     return tr;
  528. }
  529.  
  530. /**
  531.   * Same as above ( transition(...) ), but without argument `ch'
  532.   */
  533. #define transition_(tbl,src,dst,k) transition(tbl,src,dst,k,'\0')
  534.  
  535.  
  536. /**
  537.   * Create a character map transition (RE_TRANS_MAP) with
  538.   *  map `chars'
  539.   */
  540. static re_st_trans*
  541. transition_map (re_st_table* tbl, int st_id, int st_dest, int* char_map)
  542. {
  543.     re_st_trans* tr = transition_(tbl, st_id, st_dest, RE_TRANS_MAP);
  544.     tr->char_map = char_map;
  545.     return tr;
  546. }
  547.  
  548.  
  549.  
  550. /**
  551.   *   RegExp COMPILE
  552.   */
  553.  
  554. /**
  555.   * Compile an expression into state transitions (into `tbl'),
  556.   *  from previous state `st_from'. Returns the new state after
  557.   *  the transition(s).
  558.   */
  559. static int
  560. compile (re_st_table* tbl, int st_from, rexpr* expr)
  561. {
  562.     int st_final, st_1, st_2;
  563.  
  564. head:
  565.     switch (expr->kind)
  566.     {
  567.     case RE_CHAR:
  568.         st_final = new_state(tbl);
  569.         transition(tbl, st_from, st_final, RE_TRANS_SINGLE, expr->ch);
  570.         return st_final;
  571.  
  572.     case RE_WILD:
  573.         st_final = new_state(tbl);
  574.         transition_(tbl, st_from, st_final, RE_TRANS_WILD);
  575.         return st_final;
  576.  
  577.     case RE_AND:
  578.         st_from = compile(tbl, st_from, expr->e[0]);
  579.         expr = expr->e[1];
  580.         /* sorry, EWD */
  581.         /* the semantically consistent call would be
  582.               st_final = compile(tbl, st_from, expr->e[1]);
  583.            but the goto ensures that we don't add any
  584.            stack space (also a for(;;) loop would indent too far) */
  585.         goto head;
  586.  
  587.     case RE_OR:
  588.         st_1 = new_state(tbl);
  589.         st_2 = new_state(tbl);
  590.         st_final = new_state(tbl);
  591.         transition_(tbl, st_from, st_1, RE_TRANS_EPS);
  592.         transition_(tbl, st_from, st_2, RE_TRANS_EPS);
  593.         st_1 = compile(tbl, st_1, expr->e[0]);
  594.         st_2 = compile(tbl, st_2, expr->e[1]);
  595.         transition_(tbl, st_1, st_final, RE_TRANS_EPS);
  596.         transition_(tbl, st_2, st_final, RE_TRANS_EPS);
  597.         return st_final;
  598.  
  599.     case RE_MANY:
  600.         st_final = compile(tbl, st_from, expr->e[0]);
  601.         transition_(tbl, st_from, st_final, RE_TRANS_EPS);
  602.         transition_(tbl, st_final, st_from, RE_TRANS_EPS);
  603.         return st_final;
  604.  
  605.     case RE_MORE:
  606.         st_final = compile(tbl, st_from, expr->e[0]);
  607.         transition_(tbl, st_final, st_from, RE_TRANS_EPS);
  608.         return st_final;
  609.  
  610.     case RE_MAYBE:
  611.         st_final = compile(tbl, st_from, expr->e[0]);
  612.         transition_(tbl, st_from, st_final, RE_TRANS_EPS);
  613.         return st_final;
  614.  
  615.     default:
  616.         fprintf(stderr, "invalid expression compile\n");
  617.         exit(-1);
  618.     }
  619. }
  620.  
  621.  
  622.  
  623. /**
  624.   *   RegExp EXECUTE
  625.   */
  626.  
  627. static int
  628. does_match (const re_st_trans* trans, char ch)
  629. {
  630.     switch (trans->kind)
  631.     {
  632.     case RE_TRANS_SINGLE:
  633.         return trans->ch == ch;
  634.     case RE_TRANS_WILD:
  635.         return 1;
  636.     case RE_TRANS_MAP:
  637.         return trans->chars[(unsigned char) ch];
  638.     default:
  639.         return 0;
  640.     }
  641. }
  642.  
  643. static int
  644. follow_matches (const re_st_table* tbl, int* states, int* target, char ch)
  645. {
  646.     int retval = 0;
  647.     int i, j;
  648.     re_state state;
  649.  
  650.     for (i = 0; i < tbl->siz; i++)
  651.     {
  652.         if (!states[i])
  653.             continue;
  654.  
  655.         state = tbl->list[i];
  656.         for (j = 0; j < state.siz; j++)
  657.         {
  658.             if (does_match(&state.trans[j], ch))
  659.             {
  660.                 target[state.trans[j].dest] = 1;
  661.                 retval = 1;
  662.             }
  663.         }
  664.     }
  665.     return retval;
  666. }
  667.  
  668. static void
  669. follow_epsilon (const re_st_table* tbl, int* states)
  670. {
  671.     /* if in state `s1' and there is a transition 's1: eps -> s2',
  672.        then also go in state `s2' */
  673.  
  674.     int i, j, dest;
  675.     re_state state;
  676.  
  677.     for (i = 0; i < tbl->siz; i++)
  678.     {
  679.         if (!states[i])
  680.             continue;
  681.  
  682.         state = tbl->list[i];
  683.         for (j = 0; j < state.siz; j++)
  684.         {
  685.             /* only follow RE_TRANS_EPS transitions */
  686.             if (state.trans[j].kind != RE_TRANS_EPS)
  687.                 continue;
  688.  
  689.             /* backtrack the dest state? */
  690.             dest = state.trans[j].dest;
  691.             if (dest < i && !states[dest])
  692.                 i = dest - 1;
  693.  
  694.             states[dest] = 1;
  695.         }
  696.     }
  697. }
  698.  
  699. /*
  700.  * Finds the longest match to get from `init_state' to
  701.  *  `accept_state' in `string'. Returns -1 if no match,
  702.  *  or the length of the substring match if a match is found
  703.  */
  704. static int
  705. compute_longest_match (const re_st_table* tbl, int init_state, int accept_state, const char* string)
  706. {
  707. #define CLEAR(b) memset((b), 0, sizeof(int) * tbl->siz)
  708.  
  709.     const char* str = string;
  710.     char ch;
  711.     int length = -1;
  712.  
  713.     /* `flip' is kind of like a "backbuffer" for `states'
  714.        we compute the new set of states into `flip' and then
  715.        swap it with `states'.
  716.     */
  717.     int* states, *flip, *temp;
  718.     states = malloc(sizeof(int) * tbl->siz);
  719.     flip = malloc(sizeof(int) * tbl->siz);
  720.  
  721.     /* set up initial state */
  722.     CLEAR(flip);
  723.     CLEAR(states);
  724.     states[init_state] = 1;
  725.     follow_epsilon(tbl, states);
  726.  
  727.     for (;;) {
  728.         /* if accept_state, then it is an acceptable end-of-match */
  729.         if (states[accept_state])
  730.             length = (int) (str - string);
  731.  
  732.         ch = *str++;
  733.  
  734. /*      printf("(char %c)", ch);
  735.  *      int i;
  736.  *      for (i = 0; i < tbl->siz; i++)
  737.  *      {
  738.  *          if (states[i])
  739.  *              printf(" %s%d",
  740.  *                  (i == accept_state) ? "*" : "",
  741.  *                  i);
  742.  *      }
  743.  *      printf("\n");
  744.  */
  745.  
  746.         /* `follow_matches' returns 0 if the new set of
  747.            states is empty, in which case we should stop
  748.            checking additional characters */
  749.         if (!follow_matches(tbl, states, flip, ch))
  750.             break;
  751.  
  752.         /* swap */
  753.         temp = states;
  754.         states = flip;
  755.         flip = temp;
  756.  
  757.         CLEAR(flip);
  758.         follow_epsilon(tbl, states);
  759.     }
  760.  
  761.     free(states);
  762.     free(flip);
  763.     return length;
  764. #undef CLEAR
  765. }
  766.  
  767.  
  768. int
  769. re_compile (re_regexp* out, const char* regexp)
  770. {
  771.     int retval = 0;
  772.  
  773.     /* initialize RE parser */
  774.     re_pool = new_mem_pool();
  775.     re_error = 0;
  776.     *re_error_string = '\0';
  777.  
  778.     /* parse input regexp */
  779.     rexpr* expr = re_parse_expr(regexp, NULL);
  780.     if (re_error)
  781.         goto cleanup;
  782.  
  783.     /* compile expr -> states */
  784.     re_st_table tbl = new_re_st_table();
  785.  
  786.     int init_state = new_state(&tbl);
  787.     int accept_state = compile(&tbl, init_state, expr);
  788.     if (re_error)
  789.     {
  790.         re_st_table_free(tbl);
  791.         goto cleanup;
  792.     }
  793.  
  794.     /* return as re_regexp object */
  795.     if (out)
  796.     {
  797.         out->tbl = tbl;
  798.         out->init_state = init_state;
  799.         out->accept_state = accept_state;
  800.     }
  801.     else /* no output?? */
  802.         re_st_table_free(tbl);
  803.  
  804.     /* okay! */
  805.     retval = 1;
  806. cleanup:
  807.     /* deinitialize */
  808.     free_mem_pool(re_pool);
  809.     return retval;
  810. }
  811.  
  812. /* front-end for interfaces */
  813. /* (same return value as above) */
  814. int
  815. re_match (const re_regexp* regexp, const char* string)
  816. {
  817.     return compute_longest_match(&regexp->tbl,
  818.                 regexp->init_state,
  819.                 regexp->accept_state,
  820.                 string);
  821. }
  822.  
  823. void
  824. re_free (re_regexp regexp)
  825. {
  826.     re_st_table_free(regexp.tbl);
  827. }
  828.  
  829.  
  830. int
  831. main (int argc, char** argv)
  832. {
  833.     if (argc < 3)
  834.     {
  835.         printf("usage: re [regexp] [string]\n");
  836.         return 0;
  837.     }
  838.  
  839.     const char* re_string = argv[1];
  840.     const char* inp_string = argv[2];
  841.  
  842.     re_regexp regexp;
  843.     if (!re_compile(&regexp, re_string))
  844.     {
  845.         fprintf(stderr, "regexp error: %s\n", re_get_error());
  846.         return 1;
  847.     }
  848.  
  849.     int length = re_match(&regexp, inp_string);
  850.     if (length == -1)
  851.     {
  852.         printf("no match.\n");
  853.     }
  854.     else
  855.     {
  856.         char matched[length + 1];
  857.         memcpy(matched, inp_string, length);
  858.         matched[length] = '\0';
  859.         printf("matched string: '%s'\n", matched);
  860.     }
  861.  
  862.     re_free(regexp);
  863.     return 0;
  864. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement