/* PseudoCode Interpreted Language (PCIL): Part of the algoviz@vt collection of algorithm visualizations. Copyright (C) 2008 Brandon Malone, Frank Hadlock This file is part of the PseudoCode Interpreted Language. PseudoCode Interpreted Language is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. PseudoCode Interpreted Language is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with PseudoCode Interpreted Language. If not, see . */ /* * SyntaxAnalyzer.java * * Created on March 2, 2008, 6:35 PM * * To change this template, choose Tools | Template Manager * and open the template in the editor. */ package syntacticanalysis; import dynamicmvc.Primitive; import dynamicmvc.PrimitiveFactory; import java.util.ArrayList; import java.util.Stack; /** * * @author Brandon */ public class SyntaxAnalyzer { private Grammar g; private ArrayList unmatchedSententialForm; private Stack ifStack; private Stack whileStack; private Stack forStack; private ArrayList functionList; private int currentLineNumber; /** Creates a new instance of SyntaxAnalyzer */ public SyntaxAnalyzer(Grammar g) { this.g = g; unmatchedSententialForm = new ArrayList(); } public ArrayList analyze(String sourceTokenFilePath) throws Exception { ArrayList sourceTokens = lex.SourceToken.readFromFile(sourceTokenFilePath); return analyze(sourceTokens); } public ArrayList analyze(ArrayList sourceTokens) throws Exception { unmatchedSententialForm = new ArrayList(); unmatchedSententialForm.add(g.getStartSymbol()); ArrayList postfixTokens = new ArrayList(); ifStack = new Stack(); whileStack = new Stack(); forStack = new Stack(); setFunctionList(new ArrayList()); int currentSourceTokenIndex = 0; currentLineNumber = 0; while(currentSourceTokenIndex < sourceTokens.size()) { GrammarToken sententialFormToken = unmatchedSententialForm.get(0); Terminal sourceToken = new Terminal(sourceTokens.get(currentSourceTokenIndex)); // look at the first unmatched token type String type = sententialFormToken.getType(); if (type.equals("Nonterminal")) { // use the parse table to determine which rule to apply GrammarRule rule = g.determineRule((Nonterminal)sententialFormToken, sourceToken); // replace the current token with the rhs of this rule this.unmatchedSententialForm.remove(0); if(rule == null) { throw new Exception("Invalid source. Expecting: <" + sententialFormToken.name + ">, but found: <" + sourceToken.name + "> near token #" + currentSourceTokenIndex + "."); } this.unmatchedSententialForm.addAll(0, rule.getRhs()); } else if (type.equals("Terminal")) { // make sure they match if(sententialFormToken.name.equals(sourceToken.name)) { // add a new postfix token lex.SourceToken st = sourceTokens.get(currentSourceTokenIndex); PostfixToken t = this.generateToken("", st.getLexicalValue(), st.getSourceValue()); postfixTokens.add(t); // look at the next source character currentSourceTokenIndex++; this.unmatchedSententialForm.remove(0); } else { // ignore lambda if (sententialFormToken.name.equals("#")) { this.unmatchedSententialForm.remove(0);; } else if (sententialFormToken.name.equals("relationalOperator") && sourceToken.name.equals("=")) { // add a new postfix token lex.SourceToken st = sourceTokens.get(currentSourceTokenIndex); PostfixToken t = this.generateToken("", "relationalOperator", st.getSourceValue()); postfixTokens.add(t); // look at the next source character currentSourceTokenIndex++; this.unmatchedSententialForm.remove(0); }else { /* find surrounding text */ int start = currentSourceTokenIndex - 10; int finish = currentSourceTokenIndex + 10; if (start < 0) { start = 0; } if (finish > sourceTokens.size()) { finish = sourceTokens.size(); } StringBuilder sb = new StringBuilder(); for(int i = start; i < finish; i++) { sb.append(sourceTokens.get(i).getSourceValue()); sb.append(" "); } throw new Exception("Invalid source. Expecting: [" + sententialFormToken.name + "], but found: [" + sourceToken.name + "] at token #" + currentSourceTokenIndex + ".\nAround:" + sb.toString()); } } } else if (type.equals("ActionSymbol")) { // add a new postfix token PostfixToken t = generateToken("actionSymbol", "actionSymbol", sententialFormToken.name); postfixTokens.add(t); this.unmatchedSententialForm.remove(0); } } return postfixTokens; } private PostfixToken generateToken(String semanticValue, String lexicalValue, String sourceValue) throws Exception { PostfixToken t = new PostfixToken(semanticValue, lexicalValue, sourceValue, currentLineNumber); // is this an if? if(lexicalValue.equals("if")) { ifStack.push(t); } else if (lexicalValue.equals("then")) { if (ifStack.empty()) { throw new Exception("Incorrectly nested then."); } // pop the if PostfixToken ifToken = ifStack.pop(); // set the semantic value of the if to be this address ifToken.setSemanticValue(String.valueOf(t.getId())); // also, push this on the if stack, so the endIf will match it ifStack.push(t); } else if (lexicalValue.equals("else")) { // pop the if or then PostfixToken ifToken = ifStack.pop(); // set the semantic value to be this address ifToken.setSemanticValue(String.valueOf(t.getId())); ifStack.push(t); } else if (lexicalValue.equals("endIf")) { if (ifStack.empty()) { throw new Exception("Incorrectly nested endIf."); } // pop the if or then or else PostfixToken ifToken = ifStack.pop(); if (ifToken != null) { // set the semantic value to be this address ifToken.setSemanticValue(String.valueOf(t.getId())); } else { // no need to do anything; there was an else // actually, an else should also receive the semantic value of the end if } // no need to push anything back on the stack } else if (lexicalValue.equals("while")) { whileStack.push(t); } else if (lexicalValue.equals("endWhile")) { if (whileStack.empty()) { throw new Exception("Incorrectly nested endWhile."); } // pop the while PostfixToken whileToken = whileStack.pop(); // set the semantic value to be this address t.setSemanticValue(String.valueOf(whileToken.getId())); // and the other direction whileToken.setSemanticValue(String.valueOf(t.getId())); // no need to push anything back on the stack }else if (lexicalValue.equals("forEach")) { forStack.push(t); } else if (lexicalValue.equals("next")) { if (forStack.empty()) { throw new Exception("Incorrectly nested next."); } // pop the for each PostfixToken forToken = forStack.pop(); // set the semantic value to be this address t.setSemanticValue(String.valueOf(forToken.getId())); // and the other direction // this does not need to jump to the for each, // but rather, the first statement in the for each forToken.setSemanticValue(String.valueOf(t.getId())); // no need to push anything back on the stack } else if (lexicalValue.equals("function")) { getFunctionList().add(t); } else if (lexicalValue.equals("nl")) { currentLineNumber++; } return t; } public ArrayList findFunctions(ArrayList postfixTokens) { ArrayList functions = new ArrayList(); // find each function for(PostfixToken token : functionList) { Function f = findFunction(token, postfixTokens); functions.add(f); } return functions; } private Function findFunction(PostfixToken functionKeyword, ArrayList postfixTokens) { // assume a function declaration looks something like: //~function~function~42 //~variableName~calculate~43 //~(~(~44 //actionSymbol~actionSymbol~19~45 //~variableName~Integer~46 //~variableName~M~47 //~,~,~48 //~variableName~Integer~49 //~variableName~N~50 //~)~)~51 //~nl~nl~52 // functionKeyword is the token that has the token keyword (42 in example) String name = ""; int address = -1; ArrayList parameterList = new ArrayList(); // the next token should be the function name PostfixToken t = nextToken(functionKeyword, postfixTokens); name = t.getSourceValue(); // now, find the arguments // according to the example, skip two symbols (the left paren and action symbol #19) // then there are a number pairs, separated by a comma // a right paren indicates the end of the parameter list // skip two symbols t = nextToken(t, postfixTokens); // left parenthesis t = nextToken(t, postfixTokens); // action symbol #19 t = nextToken(t, postfixTokens); // either the data type name, or ')' while (!t.getSourceValue().equals(")")) { String dataTypeName = t.getSourceValue(); t = nextToken(t, postfixTokens); // the parameter name String parameterName = t.getSourceValue(); Primitive parameter = PrimitiveFactory.create(dataTypeName, parameterName).dereference(); parameterList.add(parameter); t = nextToken(t, postfixTokens); // either a comma or a ) // skip past commas if (t.getSourceValue().equals(",")) { t = nextToken(t, postfixTokens); } } // so the current token is the ) // skip the ) and the newLine // the next token actually begin the function t = nextToken(t, postfixTokens); // t is now the newList t = nextToken(t, postfixTokens); // t is now the beginning of the function address = t.getId(); Function f = new Function(name, address, parameterList); return f; } private static PostfixToken nextToken(PostfixToken current, ArrayList postfixTokens) { return postfixTokens.get(current.getId() + 1); } public ArrayList getFunctionList() { return functionList; } public void setFunctionList(ArrayList functionList) { this.functionList = functionList; } }