//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\ // PLEASE DO NOT EDIT WITHOUT THE EXPLICIT CONSENT OF THE AUTHOR! \\ //\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\ using System.IO; using System.Text; using System.Collections; using Ilog.Language.IO; using Ilog.Language.Util; using Ilog.Language.Tools; namespace Ilog.Language.Parsing { using Stack = Ilog.Language.Util.Stack; /** * This is the generic parser that is inherited by all parser classes * generated by Syntax.ParserGenerator. It is further * subclassed by two other abstract classes: StaticParser and * DynamicParser. * * @see Syntax.ParserGenerator * @see StaticParser * @see DynamicParser * * @version Last modified on Tue May 31 07:58:05 2005 by hak * @author Hassan Aït-Kaci * @copyright © 2000 ILOG, S.A. */ public abstract class GenericParser { /** * The members of this class are divided into a static group and * non-static group. The latter is the information proper to each * individual instance. The former is all the (read-only) * information that is intrinsic to the parser's class and * shared by all parser instances. It comprises all the tables * defining the parser symbols and rules, as well as the parsing * automaton tables since all such are read-only shareable * ressources. These tables are filled in the generated parser's * class by a static constructor, and are therefore defined at * most once per application upon loading the parser's class. By * constrast, the members that are proper to an individual * parser's instance are created when a parser's instance * constructor is called. Thus, a single application may create, * and run concurrently, several parsers that are instances of the * same class without interference, while still avoiding the * substantial waste of having redundant tables. */ /* **************************************************************************** */ /* INITIALIZATION */ public GenericParser () { Initialize(); } /** * One may override this method to execute whatever initializations one wishes. * The default method is empty. */ public virtual void Initialize () { } /* **************************************************************************** */ /* STATIC INFORMATION */ /** * A constant indicating that no parse tree must be built. */ public const int NO_TREE = 0; /** * A constant indicating that a compact parse tree must be built. */ public const int COMPACT_TREE = 1; /** * A constant indicating that the full (concrete) parse tree must * be built. */ public const int FULL_TREE = 2; /** * The parser terminals. */ protected static Terminal[] terminals; internal static Terminal[] Terminals { get { return terminals; } } /** * The parser nonterminals. */ protected static NonTerminal[] nonterminals; internal static NonTerminal[] Nonterminals { get { return nonterminals; } } /** * The parser rules. */ protected static Rule[] rules; internal static Rule[] Rules { get { return rules; } } /** * The parser states. */ protected static State[] states; internal static State[] States { get { return states; } } /** * The parser actions. */ protected static Action[] actions; internal static Action[] Actions { get { return actions; } } /** * The action tables. */ protected static Hashtable[] actionTables; /** * The goto tables. */ protected static Hashtable[] gotoTables; /** * The table associating identifiers to terminals. */ private static Hashtable terminalTable = new Hashtable(); /** * The table associating identifiers to nonterminals. */ private static Hashtable nonterminalTable = new Hashtable(); /* **************************************************************************** */ /** * Canonical token denoting the end of input. It gets initialized * by the method NewTerminal. */ private static ParseNode _E_O_I; public static ParseNode E_O_I { get { return _E_O_I; } } /** * Canonical terminal to identify error tokens. It gets * initialized by the method NewTerminal. */ private static Terminal _ERROR_SYMBOL; public static Terminal ERROR_SYMBOL { get { return _ERROR_SYMBOL; } } /** * Returns an error token whose SValue is set to the * supplied string. */ public static ParseNode Error (string errval) { ParseNode ERROR = new ParseNode(_ERROR_SYMBOL); ERROR.SValue = errval; return ERROR; } /** * Returns an error token whose SValue is set to the * supplied parse node's printed value, and whose location extent * is set to that of the supplied node. */ public static ParseNode Error (ParseNode node) { if (node is DynamicToken) { ((DynamicToken)node).Original = node.Copy(); node.SValue = node.ToString(); node.Symbol = _ERROR_SYMBOL; return node; } ParseNode ERROR = new ParseNode(_ERROR_SYMBOL); ERROR.SValue = node.ToString(); ERROR.SetSpan(node); return ERROR; } /* **************************************************************************** */ //\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\ // INITIALIZATION METHODS \\ //\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\ /** * The following are conveniences for initializing some of the above. * They are used only by the generated parser to set up its parameters. */ /** * Defines a new parser terminal symbol with the specified index, * name, precedence, and associativity. */ protected static void NewTerminal (int index, string name, int precedence, int associativity) { name = string.Intern(name); terminalTable[name] = new Terminal(name,index,precedence,associativity); if (name == "$E_O_I$") _E_O_I = new ParseNode(terminals[index]); if (name == "error") _ERROR_SYMBOL = terminals[index]; } /** * Defines a new parser nonterminal symbol with the specified * index and name. */ protected internal static void NewNonTerminal (int index, string name) { nonterminalTable[name] = new NonTerminal(name,index); } /** * Returns the terminal symbol of specified name. */ protected internal static Terminal @Terminal (string name) { name = string.Intern(name); return (Terminal)terminalTable[name]; } /** * Returns the nonterminal symbol of specified name. */ protected static NonTerminal Nonterminal (string name) { name = string.Intern(name); return (NonTerminal)nonterminalTable[name]; } /** * Defines a new parser action with the specified index, type, * and info. */ protected static void NewAction (int index, int type, int info) { new Action(type,info,index); } /** * Defines a new parser state with the specified index. */ protected static void NewState (int index) { new State(index); } /** * Installs the terminal and action at the specified indices in * the action table at index table. */ protected static void SetAction (int table, int terminal, int action) { actionTables[table][terminals[terminal]] = actions[action]; } /** * Installs the nonterminal and state at the specified indices in * the goto table at index table. */ protected static void SetGoto (int table, int nonterminal, int state) { gotoTables[table][nonterminals[nonterminal]] = states[state]; } /** * Allocates the table of action tables to be of the specified * size. */ protected static void NewActionTables (int size) { actionTables = new Hashtable[size]; } /** * Allocates the action table for the state at the specified index * to be of the specified size. */ protected static void NewActionTable (int state, int size) { actionTables[state] = size > 0 ? new Hashtable(size) : new Hashtable(); } /** * Allocates the table of goto tables to be of the specified * size. */ protected static void NewGotoTables (int size) { gotoTables = new Hashtable[size]; } /** * Allocates the goto table for the state at the specified index * to be of the specified size. */ protected static void NewGotoTable (int state, int size) { gotoTables[state] = size > 0 ? new Hashtable(size) : new Hashtable(); } /** * Defines the action and goto tables at the specified indices * to be those of the state at specifies index. */ protected static void SetTables (int state, int actions, int gotos) { states[state].SetTables(actionTables[actions],gotoTables[gotos]); } /* **************************************************************************** */ //\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\ // TOKENIZING METHODS \\ //\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\ /** * The following methods are public conveniences that may be used by the * class implementing the tokenizer interface. */ /** * Returns a parse node whose symbol is the terminal named as the * specified string symbol if one exists, with an SValue * set to the specified string token. If no terminal with * this name exists, this returns an error parse node whose SValue * is set to a string or the form symbol(token). */ public static ParseNode SymbolToken (string symbol, string token) { Terminal term = @Terminal(symbol); return term == null ? Error(symbol + "( " + token + ")") : new ParseNode(term,string.Intern(token)); } /** * Returns a parse node whose symbol is the terminal named as the * specified string symbol if one exists, with an NValue * set to the specified double num. If no terminal with * this name exists, this returns an error parse node whose SValue * is set to a string or the form symbol(num). */ public static ParseNode NumberToken (string symbol, double num) { Terminal term = @Terminal(symbol); return term == null ? Error(symbol + "( " + num + ")") : new ParseNode(term,num); } /** * Returns a parse node whose symbol is the terminal named as the * specified string symbol if one exists, with an NValue * set to the specified int num. If no terminal with this * name exists, this returns an error parse node whose SValue is * set to a string or the form symbol(num). */ public static ParseNode NumberToken (string symbol, int num) { Terminal term = @Terminal(symbol); return term == null ? Error(symbol + "( " + num + ")") : new ParseNode(term,num); } /** * Returns a parse node whose symbol is the terminal named as the * specified string symbol if one exists. If no terminal * with this name exists, this returns an error parse node whose * SValue is set to the string value of symbol. */ public static ParseNode LiteralToken (string symbol) { Terminal term = @Terminal(symbol); return term == null ? Error(symbol) : new ParseNode(term); } /* **************************************************************************** * The following are static local utilities... * ****************************************************************************/ /** * This is the initial parse state. */ protected static State InitialState { get { return states[0]; } } /** * This is the canonical error action. */ protected static Action ErrorAction { get { return actions[0]; } } /** * This is the canonical accept action. */ protected static Action AcceptAction { get { return actions[1]; } } /* **************************************************************************** */ /* NON-STATIC INFORMATION */ //\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\ // INHERITED INFORMATION \\ //\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\ /** * This is used by the parser to build, or not, a parse tree. * The default is No_TREE. */ private int parseTreeType = NO_TREE; public int ParseTreeType { get { return parseTreeType; } set { parseTreeType = value; } } /** * This is the output stream used by the parser. */ private TextWriter _out = System.Console.Out; public TextWriter Out { get { return _out; } set { _out = value; } } /** * This is the error stream used by the parser. */ private TextWriter err = System.Console.Error; public TextWriter Err { get { return err; } set { err = value; } } //\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\ // THE PARSER'S PARAMETERS \\ //\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\ /** * The following items must be supplied by the generated parser subclass. */ /** * The tokenizer. */ protected Tokenizer input; public Tokenizer GetTokenizer () { return input; } public void SetTokenizer (Tokenizer input) { this.input = input; } /** * The semantic action method. */ protected abstract ParseNode SemanticAction (Rule r); // throws IOException /** * Reads the next token into tokenNode. */ protected abstract void ReadToken (); // throws IOException /** * Performs the parse action and returns false iff it is the * ACCEPT action. */ protected abstract bool PerformParseAction (); // throws IOException /** * Sets the appropriate parser action for the current token in the * current state. If none exists, the parser action is set to the * canonical error action. */ protected abstract void GetParseAction (); // throws IOException /** * Whenever this flag is true, this indicates that a new * token needs to be read. */ protected bool readTokenFlag = false; /** * Returns an error token. */ public virtual ParseNode Error () { return new ParseNode(_ERROR_SYMBOL); } /** * This the result returned by the most recent semantic action. */ public ParseNode CurrentNode { get { if (parsedNode.Symbol.Name == "$ROOTS$" && parsedNode.HasChildren) return (ParseNode)parsedNode.LastChild; return parsedNode; } } /** * Returns the current token. */ protected ParseNode TokenNode () // throws IOException { if (readTokenFlag) ReadToken(); return tokenNode; } /** * Returns the token that was the last one actually read. For a * StaticParser, this is equivalent to * TokenNode(), but not necessarily for a * DynamicParser, where it is the bottom of the read * stack when it is not empty - it is TokenNode() * otherwise. */ protected virtual ParseNode LatestToken () // throws IOException { return TokenNode(); } /** * Pushes the current state and the given node on the parser * stack. This method is overridden in DynamicicParser. */ protected virtual void Push (ParseNode node) { stack.Push(new StackElement(parseState,node)); } /** * Traces the specified action. */ protected abstract void Trace (Action a); // throws IOException /* **************************************************************************** */ //\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\ // PARSING METHODS \\ //\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\ /* The following are members used for parsing. */ /** * The parser stack. */ protected Stack stack = new Stack(); /** * This is an artificial parse node set by a partial parsing * method to the switch needed to divert the parser to the partial * parsing mode that is appropriate for the partial root. */ private ParseNode switchToken = null; /** * This method is used by a partial parsing method to set the partial * parse switch token appropriately. */ public void SetSwitchToken (ParseNode token) { switchToken = token; } /** * If the switch token is non-null, this returns it and sets it to * null; otherwise, returns the next token supplied by the * tokenizer */ protected ParseNode NextToken () // throws IOException { if (switchToken == null) return input.NextToken(); ParseNode token = switchToken; switchToken = null; return token; } /** * Resets the parser to a virginal configuration. */ public virtual void ResetParser () { parseState = null; // latest state of the parse previousState = null; // previous state of the parse parseAction = null; // parse action to perform parseRule = null; // rule to use for reduction parseHandle = null; // recognized handle being reduced tokenNode = null; // latest token read parsedNode = null; // result returned by the action previousCulprit = null; // previous error causing token stack.Clear(); } /** * This is the method to invoke for parsing a token stream using * the Tokenizer instance specified as input until the * end of input. */ public void Parse () // throws IOException { ResetParser(); stack.Push(new StackElement(InitialState,_E_O_I)); readTokenFlag = true;; do { SetParseState(CurrentState); GetParseAction(); } while (PerformParseAction()); } /** * Same as Parse() with the tokenizer's reader set to the * specified one. */ public void Parse (TextReader reader) // throws IOException { TextReader rd = input.GetReader(); if (rd != null) rd.Close(); input.SetReader(reader); Parse(); } /** * Same as Parse() with the tokenizer's reader set to one * reading from the file having the specified name. */ public void Parse (string file) // throws IOException { Parse(new IncludeReader(file)); } /** * Same as Parse() with the tokenizer set to the * specified one. */ public void Parse (Tokenizer input) // throws IOException { this.input = input; Parse(); } /** * Same as Parse(), but specifies what type of parse tree * to build. The value must be one of NO_TREE, * COMPACT_TREE, or FULL_TREE. */ public void Parse (int type) // throws IOException { parseTreeType = type; Parse(); } /** * Same as parse(), but specifies building the parse * tree: true is equivalent to FULL_TREE, and * false is equivalent to COMPACT_TREE. */ public void Parse (bool fullTree) // throws IOException { parseTreeType = fullTree ? FULL_TREE : COMPACT_TREE; Parse(); } /** * This method returns the node in the parser stack corresponding * to the nth symbol of the current handle in the process * of being shifted onto the stack (the rule r's RHS). * This node is at offset l-n from the top of the stack, * where l is the length of the rule r. This * method is used essentially by the semantic actions and its uses * result from translating the pseudo-variables occurring in * grammar rules. */ protected ParseNode Node (Rule r, int n) { return ((StackElement)stack.Peek(r.Length-n)).Node; } /** * This method replaces the node corresponding to the nth * symbol of the current handle (the rule r's RHS) on the * stack with the supplied ParseNode. */ protected void ReplaceStackNode (Rule r, int n, ParseNode node) { ((StackElement)stack.Peek(r.Length-n)).Node = node; } /** * These are global variables shared by the parsing methods. * They avoid passing so many arguments everywhere. */ protected State parseState; // latest state of the parse protected State previousState; // previous state of the parse protected Action parseAction; // parse action to perform protected Rule parseRule; // rule to use for reduction protected StackElement[] parseHandle; // recognized handle being reduced protected ParseNode tokenNode; // latest token read protected ParseNode parsedNode; // result returned by the action /** * This is the error manager. It defaults to a * DefaultErrorManager. */ private ErrorManager errorManager = new DefaultErrorManager(); public ErrorManager @ErrorManager { get { return errorManager; } set { errorManager = value; } } public ErrorManager GetErrorManager () { return errorManager; } public void SetErrorManager (ErrorManager errorManager) { this.errorManager = errorManager; } /* **************************************************************************** */ /* The following are local utilities... */ /** * This is parse state currently on top of the parser stack. */ protected State CurrentState { get { return ((StackElement)stack.Peek()).State; } } /** * Returns an error object labeled as a syntax error and * containing the specified message. */ public Error SyntaxError (string msg) { return new Error().SetLabel("Syntax Error: ").SetMessage(msg); } /** * Returns an error object labeled as a syntax error and * containing the specified message, and located at the specified * location extent. */ public Error SyntaxError (string msg, Locatable extent) { return SyntaxError(msg).SetExtent(extent); } /** * Returns an error object labeled as a fatal error and * containing the specified message. */ public Error FatalError (string msg) { return new Error().SetLabel("Fatal Error: ").SetMessage(msg).SetSee(" - aborting"); } /** * Returns an error object labeled as a fatal error and containing * the specified message, and located at the specified location * extent. */ public Error FatalError (string msg, Locatable extent) { return FatalError(msg).SetExtent(extent); } /** * Signals an error, and then attempts error recovery if this is * enabled, by rewinding the parser stack to the most recent * error-handling state. If this works, the parser action which * handles the canonical 'error' token in that state is * performed. Then, tokens are read and ignored until one is read * that can be handled by the current state. */ protected void RecoverFromError () // throws IOException { FindErrorCulprit(); RewindErrorStack(); PerformErrorAction(); SkipErrorTokens(); } /** * This is the node that caused the previous error, or null. */ protected ParseNode previousCulprit; /** * Identifies the error-causing token and puts the error manager * in quiet mode while in error recovery. */ private void FindErrorCulprit () // throws IOException { ParseNode culprit = LatestToken(); if (culprit.IsEOI) { errorManager.ReportError(SyntaxError("unexpected end of input",culprit)); Abort(); } string cause = (culprit.Symbol == _ERROR_SYMBOL) ? (culprit.SValue == null ? "garbage": culprit.SValue) : culprit.ToString(); if (errorManager.IsReportingErrors) { if (!(previousCulprit != null && previousCulprit.Equals(culprit))) errorManager.ReportError(SyntaxError("unexpected "+cause,culprit)); previousCulprit = culprit; errorManager.ReportErrors(false); } if (!errorManager.IsRecoveringErrors) Abort(); if (trace) err.WriteLine("*** Recovering from error..."); } /** * Keeps popping the stack until an error-handling state is found. */ private void RewindErrorStack () // throws IOException { tokenNode = Error(); while (!SymbolIsHandled(tokenNode.Symbol)) { if (SetParseState(CurrentState) == InitialState) { Error error = FatalError("unrecoverable syntax error"); errorManager.ReportError(error); Abort(); } stack.Pop(); } } /** * In an error-handling state, performs action to shift 'error'. *
* Note to myself: this is not safe as it is not guaranteed that * (1) the action is a shift, nor (2) that even performing all * actions until error-shifting will not put the parser in a strange * state. Fix: either handle all these cases, or enforce at parser * generation time that only shifts be allowed as error-handling * actions. *
* The current code works when error-handling rules are of the * form: *
* Foo : 'error' { semantic action; } 'token' ;
*
*/
private void PerformErrorAction () // throws IOException
{
SetParseState(CurrentState);
parseAction = parseState.GetAction((Terminal)tokenNode.Symbol);
PerformParseAction();
}
/**
* Reads and skips tokens until a legal post-error token is read.
*/
private void SkipErrorTokens () // throws IOException
{
do ReadToken();
while (!tokenNode.IsEOI && !SymbolIsHandled(tokenNode.Symbol));
if (tokenNode.IsEOI)
{
Error error = FatalError("can't recover past the end of input");
errorManager.ReportError(error);
Abort();
}
}
/**
* Returns true iff there is an action to perform upon the
* specified symbol in the current state.
*/
protected bool SymbolIsHandled (Symbol symbol)
{
return CurrentState.ActionTable.ContainsKey(symbol);
}
/**
* Sets the latest state of the parse to the specified state
* and returns it.
*/
protected State SetParseState (State state)
{
previousState = parseState;
return parseState = state;
}
/**
* Changes the parse state to the state specified in the
* current state's goto table for the most recently parsed
* node.
*/
protected void ChangeState () // throws IOException
{
SetParseState(CurrentState.GetGoto((NonTerminal)parsedNode.Symbol));
}
/**
* Performs a shift action.
*/
protected void Shift () // throws IOException
{
SetParseState(states[parseAction.Info]);
Push(TokenNode());
if (trace)
Trace(parseAction);
readTokenFlag = true;
}
/**
* Performs a reduce action.
*/
protected void Reduce () // throws IOException
{
parseRule = rules[parseAction.Info];
parsedNode = SemanticAction(parseRule);
PopHandle();
ChangeState();
Push(parsedNode);
if (trace)
Trace(parseAction);
}
/**
* Pops the n latest elements on the parser stack, where
* n is the length of the current rule's RHS. This may also
* build a parse tree as specified by parseTreeType.
*/
protected virtual void PopHandle ()
{
parseHandle = new StackElement[parseRule.Length];
for (int i = parseRule.Length-1; i >= 0; i--)
parseHandle[i] = (StackElement)stack.Pop();
if (parseHandle.Length > 0)
parsedNode.SetSpan(parseHandle);
else
parsedNode.SetSpan(tokenNode.GetStart(),tokenNode.GetStart());
if (parseTreeType != NO_TREE)
for (int i=0; i