/*
* JFLAP - Formal Languages and Automata Package
*
*
* Susan H. Rodger
* Computer Science Department
* Duke University
* August 27, 2009
* Copyright (c) 2002-2009
* All rights reserved.
* JFLAP is open source software. Please see the LICENSE for terms.
*
*/
package grammar.parse;
import grammar.Grammar;
import grammar.Production;
import grammar.UnrestrictedGrammar;
import java.util.*;
/**
* This class is a utility class for determining some facts about unrestricted
* grammars. As structures equivalent in power to Turing machines, a brute force
* parse of an unrestricted grammar may, in some situations, not be recognized.
*
* @author Thomas Finley
*/
public class Unrestricted {
/**
* Dang class aint for instantiation! Get along, lil doggie.
*/
private Unrestricted() {
}
/**
* Given a string and a smaller set, this returns the minimum length that
* the string can derive as indicated by the smaller set.
*
* @param string
* the string to get the "smaller"
* @param smaller
* the "smaller" set, as returned by {@link #smallerSymbols}
*/
public static int minimumLength(String string, Set smaller) {
int length = 0;
for (int j = 0; j < string.length(); j++)
if (!smaller.contains(string.substring(j, j + 1)))
length++;
return length;
}
/**
* Counts the number of characters in a given string.
*
* @param s
* the string
* @param c
* the character
* @return the number of occurances of the character in the string
*/
private static int count(String s, char c) {
int count = 0;
for (int i = 0; i < s.length(); i++)
if (s.charAt(i) == c)
count++;
return count;
}
/**
* Returns a set of those symbols in the grammar that can derive some string
* smaller than it. For a normal grammar, of course, this would be just
* those variables with, but for an unrestricted grammar this can include
* the symbol b and c where babca - aa is a rule.
* a is not included because there are a terminals in the
* result.
*
* @param grammar
* the grammar to find the "small" symbols for
*/
public static Set smallerSymbols(Grammar grammar) {
Set smaller = new HashSet();
Production[] prods = grammar.getProductions();
boolean added;
do {
added = false;
for (int i = 0; i < prods.length; i++) {
String left = prods[i].getLHS();
String right = prods[i].getRHS();
int rightLength = minimumLength(right, smaller);
int leftLength = minimumLength(left, smaller);
if (leftLength > rightLength) {
for (int j = 0; j < left.length(); j++) {
String symbol = left.substring(j, j + 1);
char s = symbol.charAt(0);
if (smaller.contains(symbol))
continue;
if (count(left, s) <= count(right, s))
continue;
smaller.add(symbol);
added = true;
}
}
}
} while (added);
return smaller;
}
/**
* Returns if a grammar is truly unrestricted.
*
* @param grammar
* the grammar to test
* @return if a grammar is unrestricted
*/
public static boolean isUnrestricted(Grammar grammar) {
Production[] prods = grammar.getProductions();
for (int i = 0; i < prods.length; i++)
if (prods[i].getLHS().length() != 1)
return true;
return false;
}
/**
* Given an unrestricted grammar, this will return an unrestricted grammar
* with fewer productions that accepts the same language.
*
* @param grammar
* the input grammar
* @return a grammar with productions some subset of the original grammar,
* or null
in the special case where no production
* with just the start variable on the LHS exists (i.e. the grammar
* accepts no language, though if a grammar accepts no language this
* method is NOT gauranteed to return null
)
*/
public static UnrestrictedGrammar optimize(Grammar grammar) {
String startVariable = grammar.getStartVariable();
Production[] prods = grammar.getProductions();
// Which symbols in the grammar may possibly lead to just
// terminals? First, we just add all those symbols with just
// terminals on the right hand side.
Set terminating = new HashSet();
// Add those variables that lead to success.
boolean[] added = new boolean[prods.length];
for (int i = 0; i < prods.length; i++) {
added[i] = false;
if (prods[i].getVariablesOnRHS().length == 0) {
terminating.addAll(Arrays.asList(prods[i].getSymbols()));
added[i] = true;
}
}
// Repeat
boolean changed;
do {
changed = false;
// If a production has only "terminating" variables, add it.
for (int i = 0; i < prods.length; i++) {
List l = Arrays.asList(prods[i].getVariablesOnRHS());
if (!added[i] && terminating.containsAll(l)) {
terminating.addAll(Arrays.asList(prods[i].getSymbols()));
added[i] = changed = true;
}
}
} while (changed);
UnrestrictedGrammar g = new UnrestrictedGrammar();
g.setStartVariable(grammar.getStartVariable());
// Need to find a production with just the start var on LHS.
int i;
for (i = 0; i < prods.length; i++)
if (added[i] && prods[i].getLHS().equals(startVariable))
break;
if (i == prods.length)
return null;
g.addProduction(prods[i]);
added[i] = false;
for (i = 0; i < prods.length; i++)
if (added[i])
g.addProduction(prods[i]);
return g;
}
}