<?php
/**
* Program for enumerating the words of a context-free language from
* a grammar.
*
* Copyright (C) Ambroz Bizjak <ambrop7@gmail.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
/*
// simple iteration
$grammar = array(
"S" => array("a", "aS")
);
*/
/*
// correctly nested parantheses
$grammar = array(
"S" => array("", "(S)", "(S)L"),
"L" => array("(S)", "(S)L")
);
*/
// arithmetic expressions
$grammar = array(
"S" => array("P+S", "L*P", "x", "(S)"),
"P" => array("L*P", "x", "(S)"),
"L" => array("x", "(S)")
);
$maxlen = (count($argv) < 2 ? 8 : $argv[1]);
function array_tail (&$arr)
{
$arr2 = $arr;
array_shift($arr2);
return $arr2;
}
function array_cons ($head, &$arr)
{
$arr2 = $arr;
array_unshift($arr2, $head);
return $arr2;
}
// Returns all possible divisions of a string of length $length into $count substrings
// Examples:
// enum_splits(3, 2) = array(array(0, 3), array(1, 2), array(2, 1), array(3, 1))
// enum_splits(1, 3) = array(array(0, 0, 1), array(0, 1, 0), array(1, 0, 0))
//
function enum_splits ($length, $count)
{
assert($length >= 0);
assert($count > 0);
if ($count == 1) {
return array(array($length));
}
$results = array();
for ($i = 0; $i <= $length; $i++) {
$rest_results = enum_splits($length - $i, $count - 1);
foreach ($rest_results as $rest_result) {
$results[] = array_merge(array($i), $rest_result);
}
}
return $results;
}
// Retuns an array of all variables in a production, in the order they appear.
// Example: prod_vars("aAbBB", array("A", "B", "C")) = array("A", "B", "B")
function prod_vars ($prod, $allvars)
{
$vars = array();
for ($i = 0; $i < strlen($prod); $i++) {
if (in_array($prod[$i], $allvars)) {
$vars[] = $prod[$i];
}
}
return $vars;
}
// Instantiates a production by substituting words for variables.
// Example: instantiate_prod("aAbBB", array("A", "B", "C"), array("xx", "yy", "zz")) = "axxbyyzz"
function instantiate_prod ($prod, $vars, $var_values)
{
$result = "";
for ($i = 0; $i < strlen($prod); $i++) {
if (in_array($prod[$i], $vars)) {
$result .= array_shift($var_values);
} else {
$result .= $prod[$i];
}
}
return $result;
}
// Check if a grammar is suitable for use with the enumaration algorithm.
function verify_grammar ($gram)
{
foreach ($gram as $var => $prods) {
foreach ($prods as $prod) {
$prod_vars = prod_vars($prod, array_keys($gram));
$prod_nvars = count($prod_vars);
$prod_nterms = strlen($prod) - $prod_nvars;
if ($prod_nvars > 0 && $prod_nterms == 0) {
return false;
}
}
}
return true;
}
function enum_split_values (&$table, $vars, $var_lengths)
{
assert(count($vars) == count($var_lengths));
assert(count($vars) > 0);
$var = $vars[0];
$var_len = $var_lengths[0];
$results = array();
foreach ($table[$var][$var_len] as $word) {
if (count($vars) == 1) {
$results[] = array($word);
} else {
$rest = enum_split_values($table, array_tail($vars), array_tail($var_lengths));
foreach ($rest as $r) {
$results[] = array_cons($word, $r);
}
}
}
return $results;
}
function add_word (&$table, $var, $word, $startvar)
{
if ($var == $startvar) {
echo "$word\n";
}
$table[$var][strlen($word)][] = $word;
}
function enumerate_grammar ($grammar, $maxlen)
{
global $stderr;
assert(verify_grammar($grammar));
$vars = array_keys($grammar);
$table = array();
foreach ($vars as $var) {
$table[$var] = array();
}
for ($i = 0; $i <= $maxlen; $i++) {
$start_time = microtime(TRUE);
foreach ($vars as $var) {
$table[$var][$i] = array();
foreach ($grammar[$var] as $prod) {
$prod_vars = prod_vars($prod, $vars);
$prod_nvars = count($prod_vars);
$prod_nterms = strlen($prod) - $prod_nvars;
$vars_len = $i - $prod_nterms;
if ($vars_len >= 0) {
if ($prod_nvars == 0) {
if ($vars_len == 0) {
add_word($table, $var, $prod, $vars[0]);
}
} else {
assert($vars_len < $i);
foreach (enum_splits($vars_len, $prod_nvars) as $split) {
foreach (enum_split_values($table, $prod_vars, $split) as $var_values) {
add_word($table, $var, instantiate_prod($prod, $prod_vars, $var_values), $vars[0]);
}
}
}
}
}
}
$time = microtime(TRUE) - $start_time;
$count = count($table[$vars[0]][$i]);
$freq = ($time == 0 ? "" : $count / $time);
fwrite($stderr, "$i: f = $freq\n");
}
}
$stderr = fopen("php://stderr", "w");
if (!verify_grammar($grammar)) {
echo "Invalid grammar (a production has one or more variables but no terminals)!\n";
exit(1);
}
enumerate_grammar($grammar, $maxlen);