/*
PseudoCode Interpreted Language (PCIL): Part of the algoviz@vt collection of algorithm visualizations.
Copyright (C) 2008 Brandon Malone, Frank Hadlock
This file is part of the PseudoCode Interpreted Language.
PseudoCode Interpreted Language is free software: you can redistribute
it and/or modify it under the terms of the GNU General Public License as
published by the Free Software Foundation, either version 3 of the License,
or (at your option) any later version.
PseudoCode Interpreted Language is distributed in the hope that it will
be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
Public License for more details.
You should have received a copy of the GNU General Public License along with
PseudoCode Interpreted Language. If not, see
.
*/
/*
* SyntaxAnalyzer.java
*
* Created on March 2, 2008, 6:35 PM
*
* To change this template, choose Tools | Template Manager
* and open the template in the editor.
*/
package syntacticanalysis;
import dynamicmvc.Primitive;
import dynamicmvc.PrimitiveFactory;
import java.util.ArrayList;
import java.util.Stack;
/**
*
* @author Brandon
*/
public class SyntaxAnalyzer {
private Grammar g;
private ArrayList unmatchedSententialForm;
private Stack ifStack;
private Stack whileStack;
private Stack forStack;
private ArrayList functionList;
private int currentLineNumber;
/** Creates a new instance of SyntaxAnalyzer */
public SyntaxAnalyzer(Grammar g) {
this.g = g;
unmatchedSententialForm = new ArrayList();
}
public ArrayList analyze(String sourceTokenFilePath) throws Exception {
ArrayList sourceTokens = lex.SourceToken.readFromFile(sourceTokenFilePath);
return analyze(sourceTokens);
}
public ArrayList analyze(ArrayList sourceTokens) throws Exception {
unmatchedSententialForm = new ArrayList();
unmatchedSententialForm.add(g.getStartSymbol());
ArrayList postfixTokens = new ArrayList();
ifStack = new Stack();
whileStack = new Stack();
forStack = new Stack();
setFunctionList(new ArrayList());
int currentSourceTokenIndex = 0;
currentLineNumber = 0;
while(currentSourceTokenIndex < sourceTokens.size()) {
GrammarToken sententialFormToken = unmatchedSententialForm.get(0);
Terminal sourceToken = new Terminal(sourceTokens.get(currentSourceTokenIndex));
// look at the first unmatched token type
String type = sententialFormToken.getType();
if (type.equals("Nonterminal")) {
// use the parse table to determine which rule to apply
GrammarRule rule = g.determineRule((Nonterminal)sententialFormToken, sourceToken);
// replace the current token with the rhs of this rule
this.unmatchedSententialForm.remove(0);
if(rule == null) {
throw new Exception("Invalid source. Expecting: <" + sententialFormToken.name + ">, but found: <" + sourceToken.name + "> near token #" + currentSourceTokenIndex + ".");
}
this.unmatchedSententialForm.addAll(0, rule.getRhs());
} else if (type.equals("Terminal")) {
// make sure they match
if(sententialFormToken.name.equals(sourceToken.name)) {
// add a new postfix token
lex.SourceToken st = sourceTokens.get(currentSourceTokenIndex);
PostfixToken t = this.generateToken("", st.getLexicalValue(), st.getSourceValue());
postfixTokens.add(t);
// look at the next source character
currentSourceTokenIndex++;
this.unmatchedSententialForm.remove(0);
} else {
// ignore lambda
if (sententialFormToken.name.equals("#")) {
this.unmatchedSententialForm.remove(0);;
} else if (sententialFormToken.name.equals("relationalOperator") && sourceToken.name.equals("=")) {
// add a new postfix token
lex.SourceToken st = sourceTokens.get(currentSourceTokenIndex);
PostfixToken t = this.generateToken("", "relationalOperator", st.getSourceValue());
postfixTokens.add(t);
// look at the next source character
currentSourceTokenIndex++;
this.unmatchedSententialForm.remove(0);
}else {
/* find surrounding text */
int start = currentSourceTokenIndex - 10;
int finish = currentSourceTokenIndex + 10;
if (start < 0) {
start = 0;
}
if (finish > sourceTokens.size()) {
finish = sourceTokens.size();
}
StringBuilder sb = new StringBuilder();
for(int i = start; i < finish; i++) {
sb.append(sourceTokens.get(i).getSourceValue());
sb.append(" ");
}
throw new Exception("Invalid source. Expecting: [" + sententialFormToken.name + "], but found: [" + sourceToken.name + "] at token #" + currentSourceTokenIndex + ".\nAround:" + sb.toString());
}
}
} else if (type.equals("ActionSymbol")) {
// add a new postfix token
PostfixToken t = generateToken("actionSymbol", "actionSymbol", sententialFormToken.name);
postfixTokens.add(t);
this.unmatchedSententialForm.remove(0);
}
}
return postfixTokens;
}
private PostfixToken generateToken(String semanticValue, String lexicalValue, String sourceValue) throws Exception {
PostfixToken t = new PostfixToken(semanticValue, lexicalValue, sourceValue, currentLineNumber);
// is this an if?
if(lexicalValue.equals("if")) {
ifStack.push(t);
} else if (lexicalValue.equals("then")) {
if (ifStack.empty()) {
throw new Exception("Incorrectly nested then.");
}
// pop the if
PostfixToken ifToken = ifStack.pop();
// set the semantic value of the if to be this address
ifToken.setSemanticValue(String.valueOf(t.getId()));
// also, push this on the if stack, so the endIf will match it
ifStack.push(t);
} else if (lexicalValue.equals("else")) {
// pop the if or then
PostfixToken ifToken = ifStack.pop();
// set the semantic value to be this address
ifToken.setSemanticValue(String.valueOf(t.getId()));
ifStack.push(t);
} else if (lexicalValue.equals("endIf")) {
if (ifStack.empty()) {
throw new Exception("Incorrectly nested endIf.");
}
// pop the if or then or else
PostfixToken ifToken = ifStack.pop();
if (ifToken != null) {
// set the semantic value to be this address
ifToken.setSemanticValue(String.valueOf(t.getId()));
} else {
// no need to do anything; there was an else
// actually, an else should also receive the semantic value of the end if
}
// no need to push anything back on the stack
} else if (lexicalValue.equals("while")) {
whileStack.push(t);
} else if (lexicalValue.equals("endWhile")) {
if (whileStack.empty()) {
throw new Exception("Incorrectly nested endWhile.");
}
// pop the while
PostfixToken whileToken = whileStack.pop();
// set the semantic value to be this address
t.setSemanticValue(String.valueOf(whileToken.getId()));
// and the other direction
whileToken.setSemanticValue(String.valueOf(t.getId()));
// no need to push anything back on the stack
}else if (lexicalValue.equals("forEach")) {
forStack.push(t);
} else if (lexicalValue.equals("next")) {
if (forStack.empty()) {
throw new Exception("Incorrectly nested next.");
}
// pop the for each
PostfixToken forToken = forStack.pop();
// set the semantic value to be this address
t.setSemanticValue(String.valueOf(forToken.getId()));
// and the other direction
// this does not need to jump to the for each,
// but rather, the first statement in the for each
forToken.setSemanticValue(String.valueOf(t.getId()));
// no need to push anything back on the stack
} else if (lexicalValue.equals("function")) {
getFunctionList().add(t);
} else if (lexicalValue.equals("nl")) {
currentLineNumber++;
}
return t;
}
public ArrayList findFunctions(ArrayList postfixTokens) {
ArrayList functions = new ArrayList();
// find each function
for(PostfixToken token : functionList) {
Function f = findFunction(token, postfixTokens);
functions.add(f);
}
return functions;
}
private Function findFunction(PostfixToken functionKeyword, ArrayList postfixTokens) {
// assume a function declaration looks something like:
//~function~function~42
//~variableName~calculate~43
//~(~(~44
//actionSymbol~actionSymbol~19~45
//~variableName~Integer~46
//~variableName~M~47
//~,~,~48
//~variableName~Integer~49
//~variableName~N~50
//~)~)~51
//~nl~nl~52
// functionKeyword is the token that has the token keyword (42 in example)
String name = "";
int address = -1;
ArrayList parameterList = new ArrayList();
// the next token should be the function name
PostfixToken t = nextToken(functionKeyword, postfixTokens);
name = t.getSourceValue();
// now, find the arguments
// according to the example, skip two symbols (the left paren and action symbol #19)
// then there are a number pairs, separated by a comma
// a right paren indicates the end of the parameter list
// skip two symbols
t = nextToken(t, postfixTokens); // left parenthesis
t = nextToken(t, postfixTokens); // action symbol #19
t = nextToken(t, postfixTokens); // either the data type name, or ')'
while (!t.getSourceValue().equals(")")) {
String dataTypeName = t.getSourceValue();
t = nextToken(t, postfixTokens); // the parameter name
String parameterName = t.getSourceValue();
Primitive parameter = PrimitiveFactory.create(dataTypeName, parameterName).dereference();
parameterList.add(parameter);
t = nextToken(t, postfixTokens); // either a comma or a )
// skip past commas
if (t.getSourceValue().equals(",")) {
t = nextToken(t, postfixTokens);
}
}
// so the current token is the )
// skip the ) and the newLine
// the next token actually begin the function
t = nextToken(t, postfixTokens); // t is now the newList
t = nextToken(t, postfixTokens); // t is now the beginning of the function
address = t.getId();
Function f = new Function(name, address, parameterList);
return f;
}
private static PostfixToken nextToken(PostfixToken current, ArrayList postfixTokens) {
return postfixTokens.get(current.getId() + 1);
}
public ArrayList getFunctionList() {
return functionList;
}
public void setFunctionList(ArrayList functionList) {
this.functionList = functionList;
}
}