1. <?php
  2. /**
  3.  * Program for enumerating the words of a context-free language from
  4.  * a grammar.
  5.  *
  6.  * Copyright (C) Ambroz Bizjak <ambrop7@gmail.com>
  7.  *
  8.  * This program is free software: you can redistribute it and/or modify
  9.  * it under the terms of the GNU General Public License version 2
  10.  * as published by the Free Software Foundation.
  11.  *
  12.  * This program is distributed in the hope that it will be useful,
  13.  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14.  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15.  * GNU General Public License for more details.
  16.  */
  17.  
  18. /*
  19. // simple iteration
  20. $grammar = array(
  21.     "S" => array("a", "aS")
  22. );
  23. */
  24. /*
  25. // correctly nested parantheses
  26. $grammar = array(
  27.     "S" => array("", "(S)", "(S)L"),
  28.     "L" => array("(S)", "(S)L")
  29. );
  30. */
  31.  
  32. // arithmetic expressions
  33. $grammar = array(
  34.     "S" => array("P+S", "L*P", "x", "(S)"),
  35.     "P" => array("L*P", "x", "(S)"),
  36.     "L" => array("x", "(S)")
  37. );
  38.  
  39.  
  40. $maxlen = (count($argv) < 2 ? 8 : $argv[1]);
  41.  
  42. function array_tail (&$arr)
  43. {
  44.     $arr2 = $arr;
  45.     array_shift($arr2);
  46.     return $arr2;
  47. }
  48.  
  49. function array_cons ($head, &$arr)
  50. {
  51.     $arr2 = $arr;
  52.     array_unshift($arr2, $head);
  53.     return $arr2;
  54. }
  55.  
  56. // Returns all possible divisions of a string of length $length into $count substrings
  57. // Examples:
  58. //   enum_splits(3, 2) = array(array(0, 3), array(1, 2), array(2, 1), array(3, 1))
  59. //   enum_splits(1, 3) = array(array(0, 0, 1), array(0, 1, 0), array(1, 0, 0))
  60. //
  61. function enum_splits ($length, $count)
  62. {
  63.     assert($length >= 0);
  64.     assert($count > 0);
  65.  
  66.     if ($count == 1) {
  67.         return array(array($length));
  68.     }
  69.  
  70.     $results = array();
  71.  
  72.     for ($i = 0; $i <= $length; $i++) {
  73.         $rest_results = enum_splits($length - $i, $count - 1);
  74.  
  75.         foreach ($rest_results as $rest_result) {
  76.             $results[] = array_merge(array($i), $rest_result);
  77.         }
  78.     }
  79.  
  80.     return $results;
  81. }
  82.  
  83. // Retuns an array of all variables in a production, in the order they appear.
  84. // Example: prod_vars("aAbBB", array("A", "B", "C")) = array("A", "B", "B")
  85. function prod_vars ($prod, $allvars)
  86. {
  87.     $vars = array();
  88.  
  89.     for ($i = 0; $i < strlen($prod); $i++) {
  90.         if (in_array($prod[$i], $allvars)) {
  91.             $vars[] = $prod[$i];
  92.         }
  93.     }
  94.  
  95.     return $vars;
  96. }
  97.  
  98. // Instantiates a production by substituting words for variables.
  99. // Example: instantiate_prod("aAbBB", array("A", "B", "C"), array("xx", "yy", "zz")) = "axxbyyzz"
  100. function instantiate_prod ($prod, $vars, $var_values)
  101. {
  102.     $result = "";
  103.  
  104.     for ($i = 0; $i < strlen($prod); $i++) {
  105.         if (in_array($prod[$i], $vars)) {
  106.             $result .= array_shift($var_values);
  107.         } else {
  108.             $result .= $prod[$i];
  109.         }
  110.     }
  111.  
  112.     return $result;
  113. }
  114.  
  115. // Check if a grammar is suitable for use with the enumaration algorithm.
  116. function verify_grammar ($gram)
  117. {
  118.     foreach ($gram as $var => $prods) {
  119.         foreach ($prods as $prod) {
  120.             $prod_vars = prod_vars($prod, array_keys($gram));
  121.             $prod_nvars = count($prod_vars);
  122.             $prod_nterms = strlen($prod) - $prod_nvars;
  123.  
  124.             if ($prod_nvars > 0 && $prod_nterms == 0) {
  125.                 return false;
  126.             }
  127.         }
  128.     }
  129.  
  130.     return true;
  131. }
  132.  
  133. function enum_split_values (&$table, $vars, $var_lengths)
  134. {
  135.     assert(count($vars) == count($var_lengths));
  136.     assert(count($vars) > 0);
  137.  
  138.     $var = $vars[0];
  139.     $var_len = $var_lengths[0];
  140.  
  141.     $results = array();
  142.  
  143.     foreach ($table[$var][$var_len] as $word) {
  144.         if (count($vars) == 1) {
  145.             $results[] = array($word);
  146.         } else {
  147.             $rest = enum_split_values($table, array_tail($vars), array_tail($var_lengths));
  148.             foreach ($rest as $r) {
  149.                 $results[] = array_cons($word, $r);
  150.             }
  151.         }
  152.     }
  153.  
  154.     return $results;
  155. }
  156.  
  157. function add_word (&$table, $var, $word, $startvar)
  158. {
  159.     if ($var == $startvar) {
  160.         echo "$word\n";
  161.     }
  162.     $table[$var][strlen($word)][] = $word;
  163. }
  164.  
  165. function enumerate_grammar ($grammar, $maxlen)
  166. {
  167.     global $stderr;
  168.     assert(verify_grammar($grammar));
  169.  
  170.     $vars = array_keys($grammar);
  171.  
  172.     $table = array();
  173.     foreach ($vars as $var) {
  174.         $table[$var] = array();
  175.     }
  176.  
  177.     for ($i = 0; $i <= $maxlen; $i++) {
  178.         $start_time = microtime(TRUE);
  179.  
  180.         foreach ($vars as $var) {
  181.             $table[$var][$i] = array();
  182.  
  183.             foreach ($grammar[$var] as $prod) {
  184.                 $prod_vars = prod_vars($prod, $vars);
  185.                 $prod_nvars = count($prod_vars);
  186.                 $prod_nterms = strlen($prod) - $prod_nvars;
  187.                 $vars_len = $i - $prod_nterms;
  188.  
  189.                 if ($vars_len >= 0) {
  190.                     if ($prod_nvars == 0) {
  191.                         if ($vars_len == 0) {
  192.                             add_word($table, $var, $prod, $vars[0]);
  193.                         }
  194.                     } else {
  195.                         assert($vars_len < $i);
  196.  
  197.                         foreach (enum_splits($vars_len, $prod_nvars) as $split) {
  198.                             foreach (enum_split_values($table, $prod_vars, $split) as $var_values) {
  199.                                 add_word($table, $var, instantiate_prod($prod, $prod_vars, $var_values), $vars[0]);
  200.                             }
  201.                         }
  202.                     }
  203.                 }
  204.             }
  205.         }
  206.  
  207.         $time = microtime(TRUE) - $start_time;
  208.         $count = count($table[$vars[0]][$i]);
  209.  
  210.         $freq = ($time == 0 ? "" : $count / $time);
  211.  
  212.         fwrite($stderr, "$i: f = $freq\n");
  213.     }
  214. }
  215.  
  216. $stderr = fopen("php://stderr", "w");
  217.  
  218. if (!verify_grammar($grammar)) {
  219.     echo "Invalid grammar (a production has one or more variables but no terminals)!\n";
  220.     exit(1);
  221. }
  222.  
  223. enumerate_grammar($grammar, $maxlen);