//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\ // PLEASE DO NOT EDIT WITHOUT THE EXPLICIT CONSENT OF THE AUTHOR! \\ //\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\ using System; using System.IO; using System.Text; using System.Collections; using Ilog.Language.IO; using Ilog.Language.Util; using Ilog.Language.Tools; namespace Ilog.Language.Syntax { using Stack = System.Collections.Stack; /** * This defines a class for processing a Jacc grammar * contained in a file and specified using the popular UNIX * yacc syntax for building a table-driven LR-parser. The * yacc syntax is adapted to handle Java code and extended with * additionnal functionalities. * *

An instance of this class can then be used to generate a C# * program implementing an LR parser for the specified grammar. * *

* It provides methods for: *

reading a grammar by parsing a file containing it; *
building a grammar's objects (terminals, non-terminals, rules); *
analyzing the constructed grammar by computing the canonical * LR(0) states of its viable-prefix finite state automaton and * propagating the LALR(1) lookahead sets; *
showing the grammar with more details as indicated by a verbosity * level. *

* * The most critical (i.e., complex) part of computation is, of * course, the propagation of the LALR(1) lookahead sets. Traditional * yacc implementations use the method developed by DeRemer * and Penello. I use an improved method due to Park, Choe, and Chang, * which substantially ameliorates the one by DeRemer and Penello. It * is the most efficient method as far as I know. * *

The format of the grammar file is essentially the same as that * required by yacc, with some minor differences, and a few * additional features. Not using the additional features makes it * essentially similar to the yacc format. For more details, * please read the description of the Jacc system and * grammar format assumed by this class and the rest of the * Ilog.Language.Syntax classes. * *

For detailed explanations of most constructions and algorithms * used by this package, please refer to the following: * *

Alfred Aho, Ravi Sethi, and Jeffrey Ullman, Compilers. * Principles, Techniques, and Tools. Addison-Wesley, 1986. *
Joseph Park, K.M Choe, and C.H. Chang, "A new analysis of * LALR formalisms", ACM Transactions of Programming Languages * and Systems, 7(1), pp. 159-175 (Jan. 85). *
Frank DeRemer, and Thomas Penello, "Efficient computation of * lookahead sets", ACM Transactions of Programming Languages * and Systems, 4(4), pp. 615-749 (Oct. 82). *
Stephen C. Johnson, "Yacc: Yet Another Compiler Compiler," * Computer Science Technical Report 32. AT&T Bell * Labs, Murray Hill, NJ, 1975. (Reprinted in the 4.3BSD Unix * Programmer's Manual, Supplementary Documents 1, PS1:15. * UC Berkeley, 1986.) *

* * @see Options * @see ParserGenerator * * @version Last modified on Wed Jun 01 18:40:11 2005 by hak * @author Hassan Aït-Kaci * @copyright © 2000 ILOG, S.A. */ public class Grammar { /** * The constructor of a Grammar object from the grammar * specification contained in the file specified through the * Options class. */ internal Grammar () // throws BadGrammarException { Initialize(); ReadGrammar(); // if (Options.DocOnly) // new Documentor(this); // else BuildGrammar(); } /** * Initializations. */ private void Initialize () { currentGrammar = this; _EMPTY = new Terminal("$EMPTY$"); _END_OF_INPUT = new Terminal("$E_O_I$"); _ERROR = new Terminal("error"); _START = new NonTerminal("$START$"); _ROOTS = new NonTerminal("$ROOTS$"); terminalTable["error"] = _ERROR; GrammarSymbol[] seq = new GrammarSymbol[] {_START, _ROOTS}; new Rule(seq); nodeClasses = new SetOf(nonterminals); superNodeClasses = new SetOf(nonterminals); FDomain = new SetOf(follows); FRange = new SetOf(follows); FRoots = new SetOf(follows); FInners = new SetOf(follows); } /** * The stream tokenizer from which this grammar is to be read. * This is initialized in the ReadGrammar() method. */ private StreamTokenizer st; /** * The stream where output is directed. Defaults to * System.Console.Out. * @see Options */ private static TextWriter _out = Options.Out; /** * The stream where errors are reported. Defaults to * System.Console.Error. * @see Options */ private static TextWriter err = Options.Error; /** * Determines how much to show during analysis. Possible * values are, in increasing order: *

Ilog.Language.Util.Verbose.QUIET, *
Ilog.Language.Util.Verbose.NORMAL (default), *
Ilog.Language.Util.Verbose.VERBOSE, *
Ilog.Language.Util.Verbose.DETAILED, *
Ilog.Language.Util.Verbose.BABBLE. *

* * @see ../Util/Verbose * @see Options */ private int verbosity = Options.Verbosity; /** * The name of the file containing the grammar. Defaults to * "Grammar.grm". * @see Options */ internal string GrammarName { get { return Options.GrammarName; } } /** * When this flag is true, an incomplete grammar is * tolerated (although a warning is issued). The default is * false; in which case, an incomplete grammar causes * a BadGrammarException to be thrown after the warning. * @see Options */ internal bool IsPermissible { get { return Options.IsPermissible; } } /** * The grammar being currently processed. This is a static reference * to this instance itself as we assume that only one grammar is ever * processed at a time. It is used as a handle to access local data * such as symbols and rules from other classes in the namespace. */ internal static Grammar currentGrammar; /** The set of terminals */ private ArrayList terminals = new ArrayList(200); internal ArrayList Terminals { get { return terminals; } } /** The set of non-terminals */ private ArrayList nonterminals = new ArrayList(200); internal ArrayList Nonterminals { get { return nonterminals; } } /** The set of grammar rules */ private ArrayList rules = new ArrayList(500); internal ArrayList Rules { get { return rules; } } /** The set of grammar items */ private ArrayList items = new ArrayList(1000); internal ArrayList Items { get { return items; } } /** The set of grammar states */ private ArrayList states = new ArrayList(3000); internal ArrayList States { get { return states; } } /** The set of dynamic operators */ private ArrayList operators = new ArrayList(100); internal ArrayList Operators { get { return operators; } } /** When true sorting nonterminals gives rule order */ private bool rule_order_mode = false; internal bool RULE_ORDER_MODE { get { return rule_order_mode; } set { rule_order_mode = value; } } /** The number of terminals */ private int tcount = 0; internal int TCount { get { return tcount; } set { tcount = value; } } /** The number of non-terminals */ private int ncount = 0; internal int NCount { get { return ncount; } set { ncount = value; } } /** The number of rules */ private int rcount = 0; internal int RCount { get { return rcount; } set { rcount = value; } } /** The number of items */ private int icount = 0; internal int ICount { get { return icount; } set { icount = value; } } /** The number of states */ private int scount = 0; internal int SCount { get { return scount; } set { scount = value; } } /** The number of dynamic operators */ private int ocount = 0; internal int OCount { get { return ocount; } set { ocount = value; } } /** * The number of Follow objects. */ private int fcount = 0; internal int FCount { get { return fcount; } set { fcount = value; } } /** A hash table for efficient retrieval of states */ private Hashtable stateTable = new Hashtable(500); /** A hash table for efficient retrieval of terminal symbols. */ private Hashtable terminalTable = new Hashtable(100); /** A hash table for efficient retrieval of nonterminal symbols. */ private Hashtable nonterminalTable = new Hashtable(100); /** Storage for a rule's symbol sequence as it is being read. */ private ArrayList ruleSequence = new ArrayList(20); /** * Returns the terminal symbol with the given name * @param index the name. * @return the desired symbol or null */ internal Terminal GetTerminal (string name) { return (Terminal)terminalTable[name]; } /** * Returns the nonterminal symbol with the given name * @param index the name. * @return the desired symbol or null */ internal NonTerminal GetNonTerminal (string name) { return (NonTerminal)nonterminalTable[name]; } /** * Returns the terminal symbol at a given index in the set * of terminals. * @param index the index. * @return the desired symbol or null */ internal Terminal GetTerminal (int index) { return (Terminal)terminals[index]; } /** * Returns the non-terminal symbol at a given index in the set * of non-terminals. * @param index the index. * @return the desired symbol or null */ internal NonTerminal GetNonTerminal (int index) { return (NonTerminal)nonterminals[index]; } /** * Returns the grammar rule at a given index in the set of rules. * @param index the index. * @return the desired rule or null */ internal Rule GetRule (int index) { return (Rule)rules[index]; } /** * Returns the grammar item at a given index in the set of items. * @param index the index. * @return the desired item or null */ internal Item GetItem (int index) { return (Item)items[index]; } /** * Returns the grammar item for a given rule and mark. * @param rule the grammar rule. * @param mark the mark in the rule (>0). * @return the desired item. */ internal Item GetItem (Rule rule, int mark) { return (Item)items[rule.Items[mark-1]]; } /** * Returns the index in the set of all grammar items for the * item corresponding to the given rule and mark. * @param rule the grammar rule. * @param mark the mark in the rule (>0). * @return the desired index. */ internal int ItemIndex (Rule rule, int mark) { return rule.Items[mark-1]; } /** * Returns the grammar state at a given index in the set * of states, or null if index is not within range. * @param index the index. * @return the desired state or null */ internal State GetState (int index) { return (State)states[index]; } /** * Returns the dynamic operator at a given index in the set * of operators, or null if index is not within range. * @param index the index. * @return the desired operator or null */ internal Operator GetOperator (int index) { return (Operator)operators[index]; } /* ********************************************************************* */ /** * This symbol denotes the artificial start symbol added * for LR analysis. */ private static NonTerminal _START; internal static NonTerminal START { get { return _START; } } /** * This symbol denotes epsilon, the empty symbol. */ private static Terminal _EMPTY; internal static Terminal EMPTY { get { return _EMPTY; } } /** * This symbol denotes the end of input marker. */ private static Terminal _END_OF_INPUT; internal static Terminal END_OF_INPUT { get { return _END_OF_INPUT; } } /** * This symbol denotes the artificial error token used * for error recovery. */ private static Terminal _ERROR; internal static Terminal ERROR { get { return _ERROR; } } /** * This symbol denotes the artificial roots symbol added * for the support of partial parsing. */ private static NonTerminal _ROOTS; internal static NonTerminal ROOTS { get { return _ROOTS; } } /** * This symbol denotes the actual (user-specified) start symbol * of the grammar. It is always a root. */ private NonTerminal startSymbol; internal NonTerminal StartSymbol { get { return startSymbol; } } /** * This symbol denotes the first declared root, if any, * which may be used as the implicit start symbol if none * is declared explicitly. If no root is declared either * the implicit start symbol is defined as the first rule's * LHS. */ private NonTerminal FIRST_ROOT; /** * This contains the actual (user-specified) root symbols * of the grammar. The values are the artificial tokens * introduced to control the switch to a partial parse. */ private Hashtable roots = new Hashtable(); internal Hashtable Roots { get { return roots; } } /* ********************************************************************* */ /** A constant string denoting the empty semantic action. */ internal const string EMPTY_ACTION = "$empty$"; /** A constant string denoting the default semantic action. */ internal const string DEFAULT_ACTION = "$default$"; /** A constant denoting the start or end of an implicit token. */ internal const int SINGLE_QUOTE = '\''; /** A constant denoting the start or end of an implicit token. */ internal const int DOUBLE_QUOTE = '"'; /** A constant denoting the start of a rule's body. */ internal const int RULE_NECK = ':'; /** A constant denoting rule disjunction. */ internal const int OR_RULE_BODY = '|'; /** A constant denoting the start of a semantic action. */ internal const int BEGIN_SCOPE = '{'; /** A constant denoting the end of a semantic action. */ internal const int END_SCOPE = '}'; /** A constant denoting the start of grammar's command. */ internal const int COMMAND_START = '%'; /** A constant used to identify a pseudo-variable. */ internal const int PSEUDO_VAR = '$'; /** A constant denoting the end of a rule. */ internal const int END_OF_RULE = ';'; /** A constant used as a list separator. */ internal const int COMMA = ','; /** A constant used as a list separator. */ internal const int SPACE = ' '; /** A constant used as a list separator. */ internal const int NEWLINE = '\n'; /** A constant used as a list separator. */ internal const int TAB = '\t'; /** The prefix for generated node class names. */ private string nodePrefix = "_"; internal string NodePrefix { get { return nodePrefix; } set { nodePrefix = value; } } /** The suffix for generated node class names. */ private string nodeSuffix = "_"; internal string NodeSuffix { get { return nodeSuffix; } set { nodeSuffix = value; } } /**

READING THE GRAMMAR

*/ //\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\ // READING THE GRAMMAR \\ //\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\ /** * *

Constants encoding the command identifiers

*/ /** %start command */ private const string START_COMMAND = "start"; /** %root command */ private const string ROOT_COMMAND = "root"; /** %token command */ private const string TOKEN_COMMAND = "token"; /** %left command */ private const string LEFT_COMMAND = "left"; /** %right command */ private const string RIGHT_COMMAND = "right"; /** %nonassoc command */ private const string NONASSOC_COMMAND = "nonassoc"; /** %{ command */ private const string LBRACE_COMMAND = "{"; /** %} command */ private const string RBRACE_COMMAND = "}"; /** %usefile command */ private const string USEFILE_COMMAND = "usefile"; /** %% command */ private const string SECTION_COMMAND = "%"; /** %prec command */ private const string PREC_COMMAND = "prec"; /** %package command */ private const string PACKAGE_COMMAND = "package"; /** %import command */ private const string IMPORT_COMMAND = "import"; /** %precstep command */ private const string PRECSTEP_COMMAND = "precstep"; /** %nodeclass command */ private const string NODECLASS_COMMAND = "nodeclass"; /** %access command */ private const string ACCESS_COMMAND = "access"; /** %dynamic command */ private const string DYNAMIC_COMMAND = "dynamic"; /** %nodeprefix command */ private const string NODEPREFIX_COMMAND = "nodeprefix"; /** %nodesuffix command */ private const string NODESUFFIX_COMMAND = "nodesuffix"; /** %doc command */ private const string DOC_COMMAND = "doc"; /** %include command */ private const string INCLUDE_COMMAND = "include"; /** * A container for the commands. */ private string[] commands = new string[] { START_COMMAND , ROOT_COMMAND , TOKEN_COMMAND , LEFT_COMMAND , RIGHT_COMMAND , NONASSOC_COMMAND , LBRACE_COMMAND , RBRACE_COMMAND , USEFILE_COMMAND , SECTION_COMMAND , PREC_COMMAND , PACKAGE_COMMAND , IMPORT_COMMAND , PRECSTEP_COMMAND , NODECLASS_COMMAND , ACCESS_COMMAND , DYNAMIC_COMMAND , NODEPREFIX_COMMAND , NODESUFFIX_COMMAND , DOC_COMMAND , INCLUDE_COMMAND } ; //\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\ /** The parser class' modifier. */ private string accessTag = ""; internal string AccessTag { get { return accessTag; } } /** * A flag to indicate whether this grammar allows dynamic operators or * ambiguous tokens. */ private bool isDynamic = false; internal bool IsDynamic { get { return isDynamic; } } /** * An indicator as to whether this grammar allows dynamic operators. */ internal bool AdmitsOperators { get { return isDynamic && ocount > 0; } } /** * Operator category table that associates an operator category's * name to a set of all Operator objects in this * category. */ private Hashtable operatorCategoryTable = new Hashtable(20); internal Hashtable OperatorCategoryTable { get { return operatorCategoryTable; } } /** * Operator name table that associates a specific operator's name * to a set containing all Operator objects with this * name for all categories. */ private Hashtable operatorNameTable = new Hashtable(20); /** The latest operator category command read. */ private NonTerminal operatorCategory; /** The head symbol of the latest rule read. */ private GrammarSymbol currentLHS; /** This is true iff startSymbol has been defined. */ private bool StartIsDefined { get { return startSymbol != null; } } /** * True iff reading a section in which comments must be accumulated * as main documentation. */ private bool mainDocSection = true; /** * True iff reading a code section (i.e., between %{ * and %}. */ private bool codeSection = false; /** The package name, if any. */ private string packageName = null; internal string PackageName { get { return packageName; } } /** The list of imports. */ private ArrayList imports = new ArrayList(); internal ArrayList Imports { get { return imports; } } /** The declarations to include as part of the parser class. */ private ArrayList parserDeclarations = new ArrayList(); internal ArrayList ParserDeclarations { get { return parserDeclarations; } } /** Other non public classes outside the parser class. */ private ArrayList ancillaryClasses = new ArrayList(); internal ArrayList AncillaryClasses { get { return ancillaryClasses; } } /** Other public classes outside the parser class. */ private Hashtable publicClasses = new Hashtable(); internal Hashtable PublicClasses { get { return publicClasses; } } /**

*/ /** * True iff reading the third section (i.e., past the second * %%). */ private bool readingRemainderSection = false; /** * A default size for the input string buffer (= 80 chars). */ private const int bufferSize = 80; /** * The latest semantic forward (or bottom-up) * action having been, or being, read. */ private string ruleAction = null; /** * The latest semantic backward (or undo) action * having been, or being, read. This is executed top-down * upon backtracking over ambiguous dynamic operators. */ private string ruleUndoAction = null; /** * Possible type casts for stack variables in ruleAction. */ private string ruleActionCast = ""; /** * Possible type casts for stack variables in ruleUndoAction. */ private string ruleUndoActionCast = ""; /** * The terminal symbol "tagging" the latest rule having been, or * being, read. This symbol is usually the last (rightmost) terminal * symbol that occurs in the RSH of the production rule), or * null if none exists, but may also be specified by * %prec. */ private Taggable ruleTag = null; /** * True iff the rule being read contains a symbol whose node type * has been refined by the %nodeclass<\tt> command. */ private bool nodeCast = false; /** * True iff the latest action that has been read contains a "$$" pseudo-variable. */ private bool containsHeadReference = false; /** * A counter to generate new symbols. */ private int newSymbolCount = 0; /** * Methods used while reading the grammar. */ /** * Associativity values */ /** A constant denoting left associativity. */ public const int LEFT_ASSOCIATIVE = 0; /** A constant denoting right associativity. */ public const int RIGHT_ASSOCIATIVE = 1; /** A constant denoting absence of associativity. */ public const int NON_ASSOCIATIVE = 2; /** A constant denoting minimum (weakest) precedence. */ public const int MIN_PRECEDENCE = 1; /** * A constant denoting maximum minimum (strongest) precedence * (1200 to match Prolog's). */ public const int MAX_PRECEDENCE = 1200; /** Current precedence value. */ private static int currentPrecedence; /** Current precedence level. */ private static int precedenceLevel = MIN_PRECEDENCE; /** Precedence increment. */ private static int precedenceIncrement = 10; /** * Checks whether the specified precedence level is within legal bounds. * If it is not so, this issues a warning and returns MIN_PRECEDENCE. * * @param p the precedence level */ public static int CheckPrecedenceLevel (int p) { if (p < MIN_PRECEDENCE || p > MAX_PRECEDENCE) { Warning("Token precedence value out of range: " + p + " (precedence set to " + MIN_PRECEDENCE + ")"); return MIN_PRECEDENCE; } return p; } /** * Converts yacc precedence level value into Prolog's binding tightness * measure (i.e., anti-gravity, or repulsion level). * @param p the yacc precedence level */ public static int PrologPrecedence (int p) { return MAX_PRECEDENCE - p + 1; } /** * Creates and returns a new nonterminal symbol. */ private NonTerminal NewSymbol () { return new NonTerminal("$ACTION"+(newSymbolCount++).ToString()+"$"); } /** * Defines and records a new terminal. */ private Terminal NewTerminal (string token) { Terminal symbol = new Terminal(token); terminalTable[token] = symbol; return symbol; } /** * Defines and records a new operator terminal. */ private Terminal NewTerminal (string token, bool isOperator) { Terminal symbol = new Terminal(token,isOperator); terminalTable[token] = symbol; return symbol; } /** * Defines and records a new nonterminal. */ private NonTerminal NewNonTerminal (string token) { NonTerminal symbol = new NonTerminal(token); nonterminalTable[token] = symbol; return symbol; } /** * Creates a new grammar rule for the current LHS using the * current rule sequence of grammar symbols, and resets all the * global parameters to read a new rule. Note the use of * String.Intern() for faster comparison in the ParserGenerator.java class. */ private void NewRule () { GrammarSymbol[] sequence = new GrammarSymbol[ruleSequence.Count]; for (int i=0; icurrentMode. */ private void SetSyntax (int mode) { switch (mode) { case RAW_MODE: st.ResetSyntax(); isRAW_MODE = true; break; case NORMAL_MODE: SetGrammarSyntax(); isRAW_MODE = false; break; case EOL_MODE: st.EolIsSignificant(true); isEOL_MODE = true; break; case NO_EOL_MODE: st.EolIsSignificant(false); isEOL_MODE = false; break; } } /** * Records previous parse mode. */ private void SaveSyntax () { wasRAW_MODE = isRAW_MODE; wasEOL_MODE = isEOL_MODE; } /** * Restores previous parse mode. */ private void RestoreSyntax () { SetSyntax(wasRAW_MODE ? RAW_MODE : NORMAL_MODE); SetSyntax(wasEOL_MODE ? EOL_MODE : NO_EOL_MODE); } /** * Returns the next token after checking for potential slash-star * comments. These are ignored and skipped most of the time, except * when in documentation mode and the comment is a javadoc style * comment (i.e., starting with a slash immediately followed * by two stars. Such comments are specially processed to generate * documentation. They are treated like normal javadoc comments, with * additional annotation specific to Jacc grammars. */ private int GetToken () // throws IOException { int t = st.NextToken(); if (t == '/' && st.Peek() == '*') { SaveSyntax(); if (Options.DocOnly && !codeSection) ProcessSlashStarComment(); else SkipSlashStarComment(); RestoreSyntax(); return GetToken(); } return t; } /** * A GrammarSymbol to hold and accumulate the current grammar * symbol to which all documentation comments entered as javadoc * comments) are to be attached. This is reset explicitly with the * %doc command, or implicitly in the rules section of a grammar * with each reading of a new rule (whereby it is set to the next * nonterminal - the head of the next rule). The comments are accumulated * in the doc attribute of a GrammarSymbol. */ private GrammarSymbol docSymbol; /** * A StringBuilder to hold and accumulate the documentation * (i.e., javadoc) comments for the main page of the grammar's * documentation. */ private StringBuilder mainDoc; internal StringBuilder MainDoc { get { return mainDoc; } } /** * A StringBuilder to hold and accumulate the documentation * (i.e., javadoc) comments for the current grammar symbol's * documentation. */ private StringBuilder doc; /** * Processes a comment; if this is a javadoc style comment, records * it for documentation. */ private void ProcessSlashStarComment () // throws IOException { st.SkipChar(); // skip the first star if (st.Peek() != '*') { // This is not a javadoc-style comment: skip it! SkipSlashStarComment(); return; } // This is a javadoc-style comment: record it... StringBuilder doc = new StringBuilder(); SetSyntax(RAW_MODE); st.NextToken(); // skip the slash while (!(st.NextToken() == '*' && st.Peek() == '/')) doc.Append((char)st.TType); doc.Append("\n\n"); // add a paragraph break st.NextToken(); // skip the slash if (docSymbol != null) { docSymbol.AddDoc(doc); docSymbol = null; doc = null; return; } if (mainDocSection) { if (mainDoc == null) mainDoc = new StringBuilder(); mainDoc.Append(doc); } else { if (this.doc == null) this.doc = new StringBuilder(); this.doc.Append(doc); } } /** * Skips a C-style comment. */ private void SkipSlashStarComment () // throws IOException { st.PushBack(); SetSyntax(RAW_MODE); while (!(st.NextToken() == '*' && st.Peek() == '/')); st.SkipChar(); // skip slash } /** * [Re]Initializes the configuration of the StreamTokenizer * reading the grammar to its default settings. */ private void SetGrammarSyntax () { st.SetDefaultSyntax(); st.QuoteChar(SINGLE_QUOTE); st.QuoteChar(DOUBLE_QUOTE); st.SlashStarComments(false); } /** * The reader used to read the grammar files. */ private IncludeReader rd; /** * This is a string recording the current location in the grammar. */ private string Location { get { return "(file " + rd.FileName + ", line " + rd.LineNumber + ")"; } } /** * This is the main reading method. */ private void ReadGrammar () // throws BadGrammarException { ReportProgress(); try { st = new StreamTokenizer(rd = new IncludeReader(GrammarName)); } catch (Exception e) { throw new BadGrammarException("(file not found: "+e.Message+")"); } SetSyntax(NORMAL_MODE); try { ReadDeclarations(); ReadRules(); } catch (Exception e) { throw new BadGrammarException(Location + " - " + e.Message); } } /** * Reads and processes the grammar's declarations. */ private void ReadDeclarations () // throws Exception { while (ReadDeclaration()); } /** * Reads and processes a grammar declaration; returns false when * there is no more declaration to process, true otherwise. */ private bool ReadDeclaration () // throws Exception { switch (GetToken()) { case COMMAND_START: return ExecuteNextCommand(); case StreamTokenizer.TT_EOF: throw new Exception("Premature end of file"); case StreamTokenizer.TT_EOL: return ReadDeclaration(); default: throw new Exception ("Ill-formed syntax in grammar declarations section - "+Location); } } /** * Reads and processes a command and its arguments; returns false * when there is no more declaration to process, true otherwise. */ private bool ExecuteNextCommand () // throws Exception { string command = ReadCommand(); switch (command) { case SECTION_COMMAND: mainDocSection = false; return false; case START_COMMAND: DeclareStart(); break; case ROOT_COMMAND: DeclareRoot(); break; case LEFT_COMMAND: DeclareLeftAssoc(); break; case RIGHT_COMMAND: DeclareRightAssoc(); break; case TOKEN_COMMAND: // non-associative by default case NONASSOC_COMMAND: DeclareNonassoc(); break; case LBRACE_COMMAND: codeSection = true; parserDeclarations.Add(Verbatim((char)COMMAND_START + RBRACE_COMMAND)); codeSection = false; break; case USEFILE_COMMAND: DeclareList(parserDeclarations); break; case PACKAGE_COMMAND: DeclarePackage(); break; case IMPORT_COMMAND: DeclareList(imports); break; case PRECSTEP_COMMAND: DeclarePrecstep(); break; case NODECLASS_COMMAND: DeclareNodeClass(); break; case NODEPREFIX_COMMAND: DeclareNodePrefix(); break; case NODESUFFIX_COMMAND: DeclareNodeSuffix(); break; case DOC_COMMAND: ProcessDoc(); break; case INCLUDE_COMMAND: ProcessInclude(); break; case DYNAMIC_COMMAND: DeclareDynamic(); break; case ACCESS_COMMAND: DeclareAccess(); break; default: if (operatorCategoryTable[command] != null) { operatorCategory = GetNonTerminal(command); DeclareOperator(); break; } throw new Exception("Unknown command: " + COMMAND_START + command); } return true; } /** * Reads and returns a grammar command. */ private string ReadCommand () // throws Exception { CheckCommandSyntax(); string command = null; if (GetToken() == StreamTokenizer.TT_WORD) command = String.Intern(st.SValue); else { command = String.Intern(((char)st.TType).ToString()); if (command != LBRACE_COMMAND && command != SECTION_COMMAND) throw new Exception("Ill-formed grammar command: "+command); } return command; } /** * Checks whether what follows is a legitimate start of command. * Throws an exception if it is not. */ private void CheckCommandSyntax () // throws Exception { if (!ValidCommandStartCharacter(st.Peek())) throw new Exception("Invalid command start: " + (char)COMMAND_START + (char)st.Peek() + "..."); } /** * Checks whether what follows is a legitimate start of command. * Returns true iff it is ok. */ private bool ValidCommandStartCharacter (int c) { foreach (String com in commands) if ((char)c == com[0]) return true; foreach (String op in operatorCategoryTable.Keys) if ((char)c == op[0]) return true; return false; } /** * Declares the next symbol to be read as the grammar's start symbol. */ private void DeclareStart () // throws Exception { if (StartIsDefined) throw new Exception("Start symbol ("+startSymbol+") cannot be redefined"); if (GetToken() != StreamTokenizer.TT_WORD) throw new Exception("Ill-formed start symbol declaration"); GrammarSymbol symbol = GetTerminal(st.SValue); if (symbol != null) throw new Exception("Token "+symbol+" cannot be the start symbol!"); symbol = GetNonTerminal(st.SValue); if (symbol != null) if (((NonTerminal)symbol).IsStart) throw new Exception("Symbol "+symbol+" is already the start symbol!"); else DefineStart((NonTerminal)symbol); else DefineStart(NewNonTerminal(st.SValue)); } /** * Defines the specified symbol as the grammar's user-supplied start symbol. */ private void DefineStart (NonTerminal symbol) { (startSymbol = symbol).MakeStart(); GrammarSymbol[] seq = new GrammarSymbol[] {_ROOTS, startSymbol}; new Rule(seq); if (!symbol.IsRoot) DefineRoot(symbol); } /** * Declares the next symbol to be read as a root symbol. */ private void DeclareRoot () // throws Exception { if (GetToken() != StreamTokenizer.TT_WORD) throw new Exception("Ill-formed root symbol declaration"); GrammarSymbol symbol = GetTerminal(st.SValue); if (symbol != null) throw new Exception("Token "+symbol+" cannot be a root symbol!"); symbol = GetNonTerminal(st.SValue); if (symbol != null) if (((NonTerminal)symbol).IsRoot) throw new Exception("Symbol "+symbol+" is already a root symbol!"); else DefineRoot((NonTerminal)symbol); else DefineRoot(NewNonTerminal(st.SValue)); } /** * Defines the specified symbol as a grammar root symbol. */ private void DefineRoot (NonTerminal symbol) { symbol.MakeRoot(); Terminal root_switch = NewTerminal("$"+symbol.Name+"_switch$"); roots[symbol] = root_switch; GrammarSymbol[] seq = new GrammarSymbol[] {_ROOTS, root_switch, symbol}; new Rule(seq,"_head_ = _head_.Copy(Node(_rule_,2));"); if (FIRST_ROOT == null) FIRST_ROOT = symbol; } /** * Defines every string in the specified list as a token with the * specified associativity. */ private void DefineTokens (ArrayList tokens, int associativity) // throws Exception { foreach (string symbol in tokens) DefineToken(symbol,associativity); } /** * Defines the specified string as a token with the specified * associativity. */ private void DefineToken (string token, int associativity) // throws Exception { GrammarSymbol symbol = GetNonTerminal(token); if (symbol != null) throw new Exception("Nonterminal "+symbol+" cannot be a token!"); symbol = GetTerminal(token); if (symbol != null) throw new Exception("Duplicate token declaration: "+symbol); symbol = NewTerminal(token); ((Terminal)symbol).Precedence = currentPrecedence; ((Terminal)symbol).Associativity = associativity; } /** * Reads all the tokens in the current line and returns an ArrayList * containing them. */ private ArrayList TokensInLine () // throws Exception { ArrayList tokens = new ArrayList(10); SetSyntax(EOL_MODE); currentPrecedence = -1; while (GetToken() != StreamTokenizer.TT_EOL) { if (st.TType == '\\' && st.Peek() == '\n') { st.SkipChar(); GetToken(); } switch (st.TType) { case SINGLE_QUOTE: case DOUBLE_QUOTE: case StreamTokenizer.TT_WORD: tokens.Add(st.SValue); break; case StreamTokenizer.TT_NUMBER: if (currentPrecedence != -1) throw new Exception ("Duplicate precedence specified in token declaration"); currentPrecedence = CheckPrecedenceLevel(PrologPrecedence((int)st.NValue)); break; case StreamTokenizer.TT_EOF: throw new Exception("Premature end of file"); default: tokens.Add(((char)st.TType).ToString()); break; } } if (tokens.Count == 0) throw new Exception("No token found in token declaration"); if (currentPrecedence == -1) currentPrecedence = NextPrecedenceLevel(); SetSyntax(NO_EOL_MODE); return tokens; } /** * Increases the current precedence level according to the current * precedence increment. */ private int NextPrecedenceLevel () { int p = precedenceLevel; precedenceLevel += precedenceIncrement; return CheckPrecedenceLevel(p); } /** * Declares the tokens in the current line to be left-associative. */ private void DeclareLeftAssoc () // throws Exception { DefineTokens(TokensInLine(),LEFT_ASSOCIATIVE); } /** * Declares the tokens in the current line to be right-associative. */ private void DeclareRightAssoc () // throws Exception { DefineTokens(TokensInLine(),RIGHT_ASSOCIATIVE); } /** * Declares the tokens in the current line to be non-associative. */ private void DeclareNonassoc () // throws Exception { DefineTokens(TokensInLine(),NON_ASSOCIATIVE); } /** * Declares the command argument to be the prefix to add to * node class names for this grammar. */ private void DeclareNodePrefix () // throws Exception { SetSyntax(EOL_MODE); switch (GetToken()) { case StreamTokenizer.TT_WORD: case CC.SQT: case CC.DQT: nodePrefix = String.Intern(st.SValue); break; default: throw new Exception("Missing node prefix specifier"); } SetSyntax(NO_EOL_MODE); } /** * Declares the command argument to be the suffix to add to * node class names for this grammar. */ private void DeclareNodeSuffix () // throws Exception { SetSyntax(EOL_MODE); switch (GetToken()) { case StreamTokenizer.TT_WORD: case CC.SQT: case CC.DQT: nodeSuffix = String.Intern(st.SValue); break; default: throw new Exception("Missing node suffix specifier"); } SetSyntax(NO_EOL_MODE); } /** * Processes a %doc command, whose argument is assumed * to be a nonterminal symbol. */ private void ProcessDoc () // throws Exception { SetSyntax(EOL_MODE); switch (GetToken()) { case SINGLE_QUOTE: case DOUBLE_QUOTE: docSymbol = GetTerminal(st.SValue); if (docSymbol == null) docSymbol = NewTerminal(st.SValue); break; case StreamTokenizer.TT_WORD: docSymbol = GetTerminal(st.SValue); if (docSymbol == null) { docSymbol = GetNonTerminal(st.SValue); if (docSymbol != null) docSymbol = NewTerminal(st.SValue); } break; default: throw new Exception("Missing symbol in doc command"); } SetSyntax(NO_EOL_MODE); } /** * Declares the access modifier for the parser class. */ private void DeclareAccess () // throws Exception { SetSyntax(EOL_MODE); switch (GetToken()) { case StreamTokenizer.TT_WORD: accessTag = String.Intern(st.SValue); if (!(accessTag == "public" || accessTag == "internal" || accessTag == "private" || accessTag == "protected")) throw new Exception("Bad access tag value ("+accessTag+")"); accessTag += " "; break; default: throw new Exception("Missing tag in access command"); } SetSyntax(NO_EOL_MODE); } /** The set of declared node classe. */ private SetOf nodeClasses; /** The set of extended node classe. */ private SetOf superNodeClasses; /** * Declares the next string to be read as a node class. */ private void DeclareNodeClass () // throws Exception { if (GetToken() != StreamTokenizer.TT_WORD) throw new Exception("Ill-formed node class declaration"); string publInfo = ""; string word = String.Intern(st.SValue); if (word == "public") { publInfo = "public"; if (GetToken() != StreamTokenizer.TT_WORD) throw new Exception("Ill-formed nodeclass declaration"); } word = String.Intern(st.SValue); if (GetTerminal(word) != null) throw new Exception("Can't define a node class for a terminal ("+word+")"); NonTerminal symbol = GetNonTerminal(word); if (symbol != null) { if (nodeClasses.Contains(symbol)) throw new Exception("Duplicate node class declaration for symbol "+symbol); } else symbol = NewNonTerminal(word); nodeClasses.Add(symbol); NonTerminal superSymbol = null; if (GetToken() == StreamTokenizer.TT_WORD && (String.Intern(st.SValue) == "extends")) { if (GetToken() != StreamTokenizer.TT_WORD || String.Intern(st.SValue) == "implements" || String.Intern(st.SValue) == "locates") throw new Exception ("Error in node class declaration: identifier missing after 'extends'"); if (GetTerminal(st.SValue) != null) throw new Exception("Can't define node class " + symbol + " as a subclass of a terminal (" + st.SValue + ")"); if ((superSymbol = GetNonTerminal(st.SValue)) == null) superSymbol = NewNonTerminal(st.SValue); superNodeClasses.Add(superSymbol); } else st.PushBack(); string implInfo = ""; if (GetToken() == StreamTokenizer.TT_WORD && String.Intern(st.SValue) == "implements") { implInfo = ", "; if (GetToken() != StreamTokenizer.TT_WORD || String.Intern(st.SValue) == "locates") throw new Exception ("Error in nodeclass declaration: identifier missing after 'implements'"); do { implInfo += st.SValue; if (GetToken() == COMMA) implInfo += ", "; else break; } while (GetToken() == StreamTokenizer.TT_WORD && String.Intern(st.SValue) != "locates"); } st.PushBack(); string locateInfo = null; if (GetToken() == StreamTokenizer.TT_WORD && String.Intern(st.SValue) == "locates") { if (GetToken() != StreamTokenizer.TT_WORD) throw new Exception ("Error in nodeclass declaration: identifier missing after 'locates'"); locateInfo = st.SValue; } else st.PushBack(); if (GetToken() != BEGIN_SCOPE) throw new Exception ("Missing '" + (char)BEGIN_SCOPE + "' in nodeclass declaration"); string body = BracedString(); symbol.NodeType = nodePrefix + symbol.Name + nodeSuffix; string superClass = superSymbol == null ? "ParseNode" : nodePrefix + superSymbol.Name + nodeSuffix; StringBuilder nodeClass = new StringBuilder(bufferSize); nodeClass .Append(publInfo) .Append(publInfo.Length > 0 ? " " : "") .Append("class ") .Append(symbol.NodeType) .Append(" : "+superClass+" ") .Append(implInfo) .Append("\n{\n ") .Append(publInfo) .Append(publInfo.Length > 0 ? " " : "") .Append(symbol.NodeType) .Append(" (ParseNode node) : base(node) {}\n\n ") .Append(body.Trim()); if (locateInfo != null) nodeClass .Append("\n") .Append("\n public void Locate ()") .Append("\n {") .Append("\n if ("+locateInfo+" != null)") .Append("\n {") .Append("\n "+locateInfo+".SetStart(GetStart());") .Append("\n "+locateInfo+".SetEnd(GetEnd());") .Append("\n }") .Append("\n }"); nodeClass.Append("\n}"); if (String.Intern(publInfo) != "public") ancillaryClasses.Add(nodeClass); else publicClasses[symbol.NodeType] = nodeClass; } /** * Returns the contents between "{" and "}" as a string, taking into * account possible nestings. */ private string BracedString () // throws Exception { StringBuilder buffer = new StringBuilder(bufferSize); SetSyntax(RAW_MODE); // Make all characters ordinary int nesting = 1; // Nesting of braces while (nesting > 0) { switch (GetToken()) { case BEGIN_SCOPE: buffer.Append((char)BEGIN_SCOPE); nesting++; break; case END_SCOPE: if (nesting > 1) buffer.Append((char)END_SCOPE); nesting--; break; default: buffer.Append((char)st.TType); break; } } SetSyntax(NORMAL_MODE); return buffer.ToString(); } /** * Declares a package (actually, a .NET namespace), for the parser * class. */ private void DeclarePackage () // throws Exception { if (packageName != null) throw new Exception("Duplicate package declaration"); if (GetToken() != StreamTokenizer.TT_WORD) throw new Exception("Bad package declaration"); packageName = st.SValue; SetSyntax(EOL_MODE); while (GetToken() != StreamTokenizer.TT_EOL); SetSyntax(NO_EOL_MODE); } /** * Reads a command's multiple arguments into the specified ArrayList. */ private void DeclareList (ArrayList v) // throws Exception { SetSyntax(RAW_MODE); StringBuilder buffer = new StringBuilder(bufferSize); string argument = ""; for (;;) switch (GetToken()) { case StreamTokenizer.TT_EOF: if (!readingRemainderSection) throw new Exception("Premature end of file"); break; case COMMAND_START: st.PushBack(); goto case END_OF_RULE; case END_OF_RULE: argument = buffer.ToString().Trim(); if (argument.Length>0) v.Add(argument); goto EXIT; case COMMA: case SPACE: case NEWLINE: case TAB: argument = buffer.ToString().Trim(); if (argument.Length > 0) { v.Add(argument); buffer = new StringBuilder(bufferSize); } break; default: buffer.Append((char)st.TType); break; } EXIT: if (!readingRemainderSection) SetSyntax(NORMAL_MODE); } /** * Processes a file inclusion command. */ private void ProcessInclude () // throws Exception { SetSyntax(EOL_MODE); switch (GetToken()) { case StreamTokenizer.TT_WORD: case CC.SQT: case CC.DQT: rd.Include(st.SValue); break; default: throw new Exception("Bad include argument"); } SetSyntax(NO_EOL_MODE); if (verbosity > Verbose.QUIET) Say("*** Including file " + rd.FileName); } /** * Declares the precedence step increment to be the next number to * be read. */ private void DeclarePrecstep () // throws Exception { if (GetToken() != StreamTokenizer.TT_NUMBER) throw new Exception("Precedence increment must be a number"); precedenceIncrement = (int)st.NValue; } /** * Declares this grammar to allow dynamic parsing. */ private void DeclareDynamic () // throws Exception { SetSyntax(EOL_MODE); string cat = null; switch (GetToken()) { case StreamTokenizer.TT_EOF: throw new Exception("Premature end of file"); case StreamTokenizer.TT_EOL: break; case StreamTokenizer.TT_WORD: cat = st.SValue; break; default: throw new Exception("Bad dynamic operator declaration"); } isDynamic = true; if (cat == null) // simply allows dynamic tokenizing, no dynamic operators return; GrammarSymbol catSymbol = GetNonTerminal(cat); if (catSymbol == null) catSymbol = NewNonTerminal(cat); catSymbol.IsOperator = true; if (GetTerminal(cat) == null) { operatorCategoryTable[cat] = new ArrayList(); cat = cat.ToUpper(); DeclareSubCategory(catSymbol,cat+"_"); DeclareSubCategory(catSymbol,"_"+cat+"_"); DeclareSubCategory(catSymbol,"_"+cat); } else throw new Exception("Terminal "+cat+" cannot be used as an operator category!"); SetSyntax(NO_EOL_MODE); } /** * Declares the specified string as a dynamic operator terminal * symbol in the operator category denoted by the specified * grammar symbol (necessarily a nonterminal). */ private void DeclareSubCategory (GrammarSymbol catSymbol, string subCat) { GrammarSymbol subCatSymbol = GetTerminal(subCat); if (subCatSymbol == null) subCatSymbol = NewTerminal(subCat,true); else Warning("Operator subcategory "+subCat+" is multiply defined."); GrammarSymbol[] sequence = new GrammarSymbol[] {catSymbol, subCatSymbol}; new Rule(sequence,DEFAULT_ACTION); } /** * Declares the next argument to read as a dynamic operator in the * current operator category. */ private void DeclareOperator () // throws Exception { string name = null; SetSyntax(EOL_MODE); ArrayList ops = (ArrayList)operatorCategoryTable[operatorCategory.Name]; switch (GetToken()) { case StreamTokenizer.TT_EOF: throw new Exception("Premature end of file"); case StreamTokenizer.TT_EOL: throw new Exception("Premature end of dynamic operator declaration"); case StreamTokenizer.TT_WORD: case SINGLE_QUOTE: case DOUBLE_QUOTE: name = st.SValue; break; case StreamTokenizer.TT_NUMBER: throw new Exception("Bad dynamic operator declaration"); default: name = ((char)st.TType).ToString(); break; } if (GetNonTerminal(name) != null) throw new Exception("Nonterminal "+name+" cannot be used as an operator!"); string specifier = null; int precedence = 0; switch (GetToken()) { case StreamTokenizer.TT_EOF: throw new Exception("Premature end of file"); case StreamTokenizer.TT_EOL: throw new Exception("Premature end of dynamic operator declaration"); case StreamTokenizer.TT_WORD: case SINGLE_QUOTE: case DOUBLE_QUOTE: specifier = st.SValue; switch (GetToken()) { case StreamTokenizer.TT_NUMBER: precedence = CheckPrecedenceLevel(PrologPrecedence((int)st.NValue)); break; case StreamTokenizer.TT_EOL: precedence = NextPrecedenceLevel(); st.PushBack(); break; default: throw new Exception("Bad dynamic operator declaration"); } break; case StreamTokenizer.TT_NUMBER: precedence = CheckPrecedenceLevel(PrologPrecedence((int)st.NValue)); switch (GetToken()) { case StreamTokenizer.TT_WORD: case SINGLE_QUOTE: case DOUBLE_QUOTE: specifier = st.SValue; break; default: throw new Exception("Bad dynamic operator declaration"); } break; default: throw new Exception("Bad dynamic operator declaration"); } Operator op = new Operator(name,operatorCategory,precedence,specifier); int i = ops.IndexOf(op); if (i == -1) { op.Add(); ocount++; ops.Add(op); ops = (ArrayList)operatorNameTable[name]; if (ops == null) operatorNameTable[name] = ops = new ArrayList(); ops.Add(op); } else ((Operator)operators[i]).Redefine(precedence,specifier); SetSyntax(NO_EOL_MODE); } /** * This reads from the input in raw mode (i.e., with all * characters made ordinary), accumulating what is read into a * string buffer until a string matching the specified end marker * is found - upon which the string buffer is returned. */ private StringBuilder Verbatim (string endMarker) // throws Exception { StringBuilder buffer // character accumulator = new StringBuilder(bufferSize); StringBuilder endMatch // matched prefix of end marker = new StringBuilder(bufferSize); SetSyntax(RAW_MODE); // make all characters ordinary int fullMatch = endMarker.Length; if (fullMatch > 0) { int matchCount; char c = (char)GetToken(); for (;;) { if (c != endMarker[0]) { buffer.Append(c); c = (char)GetToken(); continue; } matchCount = 0; endMatch.Length = 0; while (matchCount < fullMatch && c == endMarker[matchCount]) { endMatch.Append(c); matchCount++; c = (char)GetToken(); } if (matchCount == fullMatch) { st.PushBack(); break; } buffer.Append(endMatch); } } SetSyntax(NORMAL_MODE); return buffer; } // Note for future work [by hak]: // After readRules, one may optionally partially evaluate the // grammar rules by replacing the LHS of a singular rule (i.e., one // such that there is only one rule for this LHS) by the rules RHS // in all the other rules, and delete the rule whenever the LHS // neither recursive nor $START$. NB: it's useless for a singular // recursive rule ... /** * Reads and defines the grammar rules. */ private void ReadRules () // throws Exception { if ((superNodeClasses.Minus(nodeClasses)).Count != 0) { Gripe("*** These non-terminals must be declared as node classes:\n"); foreach (NonTerminal n in superNodeClasses) Gripe("\t"+n); Gripe(); throw new Exception("Incomplete node class declarations"); } while (ReadRule()); if (!StartIsDefined) { string msg = "There are no rules in this grammar!"; if (IsPermissible) Warning(msg); else throw new Exception(msg); } if (UndefinedSymbols() | UnreachableSymbols()) { string msg = "This grammar has disconnected symbols."; if (IsPermissible) Warning(msg); else throw new Exception(msg); } } /** * Reads and processes a grammar rule, returning true iff there * are more rules to process. */ private bool ReadRule () // throws Exception { if (!ReadRuleHead()) return false; ReadRuleBody(); return true; } /** * Reads the head symbol of a grammar rule, returning true iff * there is such a symbol to be read next. */ private bool ReadRuleHead () // throws Exception { switch (GetToken()) { case StreamTokenizer.TT_EOF: return false; case StreamTokenizer.TT_WORD: CheckSymbol(st.SValue,true); if (GetToken() != RULE_NECK) throw new Exception ("Missing RULE_NECK after left-hand side symbol in rule"); break; case RULE_NECK: if (currentLHS == null) throw new Exception("Missing left-hand side symbol in rule"); break; case COMMAND_START: SetSyntax(RAW_MODE); if (st.Peek() == COMMAND_START) return false; goto default; default: throw new Exception("Ill-formed left-hand side in rule"); } return true; } /** * Reads and processes the body of a grammar rule. */ private void ReadRuleBody () // throws Exception { for (;;) { switch (GetToken()) { case StreamTokenizer.TT_EOF: throw new Exception("Premature end of file"); case StreamTokenizer.TT_WORD: CheckSymbol(st.SValue,false); break; case SINGLE_QUOTE: case DOUBLE_QUOTE: CheckTerminal(st.SValue); break; case OR_RULE_BODY: NewRule(); ruleSequence.Add(currentLHS); break; case BEGIN_SCOPE: ReadAction(); break; case COMMAND_START: ProcessRulePrec(); break; case END_OF_RULE: NewRule(); return; default: throw new Exception("Ill-formed syntax in rule body"); } } } /** * Processes an in-rule %prec command. */ private void ProcessRulePrec () // throws Exception { if (st.Peek() == PREC_COMMAND[0]) // e.g., in %prec, a 'p' must follow % { GetToken(); if (String.Intern(st.SValue) == PREC_COMMAND) { if (ruleTag != null) throw new Exception("Duplicate %prec command in rule"); ReadRuleTag(); return; } } throw new Exception("Bad %prec command in grammar rule body"); } /** * Reads and processes a grammar rule's tag declared by its * in-rule %prec command. */ private void ReadRuleTag () // throws Exception { GetToken(); if (st.TType == StreamTokenizer.TT_WORD || st.TType == SINGLE_QUOTE || st.TType == DOUBLE_QUOTE) { ruleTag = GetTerminal(st.SValue); if (ruleTag == null) throw new Exception("Unknown token as %prec argument "+st.SValue); if (ruleTag.IsOperator) throw new Exception ("Dynamic operators cannot be used as %prec argument "+st.SValue); ((Terminal)ruleTag).IsTag = true; return; } if (st.TType == StreamTokenizer.TT_NUMBER) { int precedence = CheckPrecedenceLevel(PrologPrecedence((int)st.NValue)); GetToken(); if (!(st.TType == StreamTokenizer.TT_WORD || st.TType == SINGLE_QUOTE || st.TType == DOUBLE_QUOTE)) throw new Exception("Bad %prec associativity specifier"); try { ruleTag = new RuleTag(precedence,st.SValue); } catch (NonFatalParseErrorException e) { LoudWarning(e.Message + ": " + Location + "; %prec command ignored"); } } else throw new Exception("Ill-formed %prec argument in grammar rule body"); } /** * Returns true iff not all non-terminal symbols are * the LHS of at least one rule. */ private bool UndefinedSymbols () { ArrayList undefs = new ArrayList(); foreach (NonTerminal n in nonterminals) if (n.Rules.Count == 0) undefs.Add(n); if (undefs.Count != 0) { if (undefs.Count > 1) Gripe("*** These non-terminal symbols have no rules:\n"); else Gripe("*** This non-terminal symbol has no rules:\n"); foreach (NonTerminal n in undefs) Gripe("\t"+n); Gripe(); if (undefs.Count > 1) Gripe("*** Declare them as tokens or give rules for them."); else Gripe("*** Declare it as a token or give rules for it."); return true; } return false; } /** * Returns true iff the grammar contains symbols that are * not reachable from the grammar's start symbol. It traverses the * rules starting from the start symbol and recording all symbols it * meets in the dejaVu hash set. It then verifies that * all the known symbols are in dejaVu, returning * true iff any is missing... */ private bool UnreachableSymbols () { HashSet dejaVu = new HashSet(); TraverseRules(_START,dejaVu); HashSet unreached = new HashSet(); foreach (GrammarSymbol s in nonterminals) if (!dejaVu[s]) unreached.Add(s); foreach (Terminal s in terminals) { if (s.IsSpecial) continue; if (!dejaVu[s]) unreached.Add(s); } if (unreached.Count != 0) { if (unreached.Count > 1) GripeSome("*** These symbols are "); else GripeSome("*** This symbol is "); Gripe("not reachable from any root:\n"); foreach (GrammarSymbol s in unreached) Gripe("\t"+s); Gripe(); Gripe("*** Check the grammar in File "+GrammarName+" and correct this."); return true; } return false; } /** * Traverses the rules starting at the specified root symbol and * recording all the grammar symbols it crosses on the way in the * provided hash set, until no new one is found. */ private void TraverseRules (GrammarSymbol root, HashSet dejaVu) { dejaVu[root] = true; if (root is NonTerminal) foreach (Rule r in ((NonTerminal)root).Rules) for (int i = 1; iisLHS is true, it reports * an error because a terminal may not be used as a LHS symbol. If not, * and it is neither a know nonterminal, it creates a new nonterminal * symbol for it and records it. It also sets the grammar's start symbol * if need be and takes care of recording doc comments associated with * this symbol. In all cases, it then checks whether this token follows * (what would be) an intermediate semantic action, which necessitates * creating an implicit empty rule. * * @param token a string token * @param isLHS true iff the token is the LHS of a rule */ private void CheckSymbol (string token, bool isLHS) // throws Exception { GrammarSymbol symbol = GetTerminal(token); if (symbol != null) // this symbol is a terminal { if (isLHS) // it cannot be a left-hand side! throw new Exception("Terminal "+symbol+" cannot be a left-hand side!"); } else { if ((symbol = GetNonTerminal(token)) == null) symbol = NewNonTerminal(token); if (isLHS) { currentLHS = symbol; if (!StartIsDefined) DefineStart(FIRST_ROOT == null ? (NonTerminal)symbol : FIRST_ROOT); symbol.AddDoc(doc); doc = null; } } CheckAction(symbol); } /** * This method checks whether the specified string token is * a known terminal symbol. If not, creates it and records it in * the terminal table. In all cases, it then checks if this token * follows (what would be) an intermediate semantic action which * would necessitate creation of an implicit empty rule. * * @param token a string token */ private void CheckTerminal (string token) { Terminal symbol = GetTerminal(token); if (symbol == null) symbol = NewTerminal(token); CheckAction(symbol); } /** * This method checks whether an intermediate semantic action has * been read immediately prior to the specified grammar symbol. If * so, it creates a new empty rule whose LHS is a new non-terminal * symbol corresponding to the action (as in Yacc), and resets action * reading. Otherwise, this simply adds the grammar symbol to the * symbol sequence for the rule being read. * * @param symbol a grammar symbol */ private void CheckAction (GrammarSymbol symbol) { if (ruleAction != null) // Do we have an intermediate action? { // Yes - create new empty rule for it NonTerminal n = NewSymbol(); GrammarSymbol[] sequence = new GrammarSymbol[1]; int offset = ruleSequence.Count-1; sequence[0] = n; new Rule(sequence, OffsetActionNodes(ruleAction,offset), OffsetActionNodes(ruleUndoAction,offset), nodeCast); ruleSequence.Add(n); if (containsHeadReference) Warning("pseudo-variable $$ in intermediate action "+Location); ResetActionParameters(); } ruleSequence.Add(symbol); } /** * Resets the global parameters pertaining to the current rule to * enable processing a new rule. */ private void ResetRuleParameters () { ruleTag = null; // reset the tag to nil ruleSequence.Clear(); // reset the sequence of grammar symbols ResetActionParameters(); } /** * Resets the global parameters pertaining to the current semantic * action to enable processing a new semantic action. */ private void ResetActionParameters () { ruleAction = null; // reset the bottom-up (forward) semantic action ruleUndoAction = null; // reset the top-down (backward) semantic action ruleActionCast = ""; // reset the bottom-up action's type cast ruleUndoActionCast = ""; // reset the top-down action's type cast nodeCast = false; // reset the LHS's node's type cast to none containsHeadReference = false; // reset the head reference flag to false } /** * Returns a String which corresponds to the input String * action in which all pseudo-variables have been replaced by the * appropriate expressions denoting the corresponding stack reference offsets. * The offset input parameter is the length n of the rule's RHS: * * * r[0] -> r[1], ..., r[n] * * so that $i is mapped to the RHS's symbol r[i] which * will be located at the stack reference n-i at parse time. * * This scans the action for node reference patterns of the form * "(_rule_,i)" and replaces them with * "(_rule_,offset-i)". This works correctly * for calls such that offset == RHS.Count-1. * * @param action a string (program code) * @param offset an integer (rule's RHS's size) * @return the action where rule node references are now parse stack references. */ private string OffsetActionNodes (string action, int offset) { if (action == null || action.Length == 0) return EMPTY_ACTION; // no action: get out! StringBuilder buffer // result accumulator = new StringBuilder(bufferSize); int length = action.Length; int i = 0; char c = action[i]; for (;;) { if (c < '0' || c > '9') { // copy verbatim all chars but numerals buffer.Append(c); if (++i == length) break; c = action[i]; continue; } if (NodeReference(action,i)) // Is this numeral following a node reference? { // yes: read off the node's number: int v = 0; do { v = v * 10 + (c - '0'); if (++i == length) break; c = action[i]; } while ('0' <= c && c <= '9'); // and translate it as its stack offset: buffer.Append((v-offset).ToString()); } else { // copy it, and all the numerals following it, verbatim: do { buffer.Append(c); if (++i == length) break; c = action[i]; } while ('0' <= c && c <= '9'); } if (i == length) break; } // return the string value of the result accumulator: return buffer.ToString(); } /** * Returns true iff there is a node reference (of the form * "pattern") ending at index i in the input * string action. */ private bool NodeReference (string action, string pattern, int i) { int width = pattern.Length; return (i > width-1 && action.Substring(i-width,width).Equals(pattern)); } /** * Returns true iff there is a node reference (of the form * "[_rule_,") ending at index i in the input * string action. */ private bool NodeReference (string action, int i) { return NodeReference(action,"(_rule_,",i); } /** * Reads a semantic action into either of this grammar's * ruleAction (resp., ruleUndoAction) * variable (a String). */ private void ReadAction () // throws Exception { if (ruleAction == null) // No semantic action has been read: // Translate forward into the semantic action TranslateRuleAction(true); else // The forward semantic action has already been read: if (isDynamic && ruleUndoAction == null) // Undo actions are allowed but no undo action has been read: // Translate backward into the undo semantic action. TranslateRuleAction(false); else throw new Exception("Illegal action occurrence in rule"); } /** * A container recording the known type casts for node references * in the current semantic action. */ private HashSet knownCasts = new HashSet(); /** * Translates the forward or backward semantic action and sets it * into the global variable Strings: ruleAction * and ruleUndoAction. Each of these is assumed equal to * null when translateRuleAction is called. * * @param forward the action's direction */ private void TranslateRuleAction (bool forward) // throws Exception { SetSyntax(RAW_MODE); // Make all characters ordinary knownCasts.Clear(); // Clear the known type casts StringBuilder buffer = new StringBuilder(bufferSize); int nesting = 1; // Nesting of braces while (nesting > 0) { switch (GetToken()) { case BEGIN_SCOPE: buffer.Append((char)BEGIN_SCOPE); nesting++; break; case END_SCOPE: if (nesting > 1) buffer.Append((char)END_SCOPE); nesting--; break; case PSEUDO_VAR: buffer.Append(StackReference(forward)); break; case NEWLINE: buffer.Append("\n "); break; case TAB: buffer.Append("\t "); break; default: buffer.Append((char)st.TType); break; } } SetSyntax(NORMAL_MODE); if (forward) ruleAction = ruleActionCast + buffer.ToString(); else ruleUndoAction = ruleUndoActionCast + buffer.ToString(); } /** * Reads and return an integer. */ private int ReadNumber (int c) // throws Exception { int v = 0; do { v = v * 10 + (c - '0'); c = GetToken(); } while ('0' <= c && c <= '9'); st.PushBack(); return v; } /** * Returns the String corresponding to the translation * into a stack reference of a pseudo-variable in a semantic action. * * @param forward is true if top-down action, false otherwise. */ private string StackReference (bool forward) // throws Exception { string stref; int n; // the stack offset; int token = GetToken(); // NB: we read in ordinary mode (one char at a time) switch (token) { case PSEUDO_VAR: // This is the result (LHS) to push on the stack; // check if typecast is needed & translate as '_head_': stref = TypeCast(forward,0,"_head_"); containsHeadReference = true; break; case '1': case '2': case '3': case '4': case '5': case '6': // translate as reference at offset n case '7': case '8': case '9': // after checking whether to typecast: n = ReadNumber(token); stref = TypeCast(forward,n,"Node(_rule_,"+n+")"); break; case '0': // translate as deep reference stref = DeepStackReference(0,"Node(_rule_,0)"); break; case '-': // translate as deep reference n = -ReadNumber(token); stref = DeepStackReference(n,"Node(_rule_,"+n+")"); break; default: throw new Exception("Ill-formed pseudovariable"); } return stref; } /** * Returns the String corresponding to the translation of * a stack reference at given offset n by generating all * appropriate and necessary type adjustments in the case where * the parse node it references has been declared of a more * specific type by the %nodeclass command. In the * process, when a type cast must be generated, the action (or * undo action) is prepended with the appropriate local * declarations and type checking code. * * @param forward is true if top-down action, false otherwise. * @param n the stack offset * @param reference the (partial) reference translation thus far */ private string TypeCast (bool forward, int n, string reference) // throws Exception { StringBuilder buffer = new StringBuilder(bufferSize); if (n >= ruleSequence.Count) throw new Exception("Pseudovariable out of range: $"+n); GrammarSymbol symbol = (GrammarSymbol)ruleSequence[n]; string type = symbol is NonTerminal ? ((NonTerminal)symbol).NodeType : null; string rf; if (type == null) rf = reference; else { nodeCast = true; rf = "_node"+n+"_"; if (!knownCasts[rf]) { buffer.Append(" ").Append(type).Append(" ").Append(rf); if (n == 0) buffer.Append(" = new ").Append(type).Append("(_head_);") .Append("\n _head_ = (").Append(type) .Append(")").Append(rf).Append(";\n"); else buffer.Append(";\n if (").Append(reference) .Append(" is ").Append(type).Append(")") .Append("\n ").Append(rf).Append(" = (") .Append(type).Append(")").Append(reference).Append(";") .Append("\n else\n {") .Append("\n ").Append(rf).Append(" = new ") .Append(type).Append("(").Append(reference).Append(");") .Append("\n ReplaceStackNode(_rule_,") .Append(n).Append(",").Append(rf).Append(");") .Append("\n }\n"); knownCasts[rf] = true; } } // Export the local buffer according to the direction: if (forward) ruleActionCast += buffer.ToString(); else ruleUndoActionCast += buffer.ToString(); // Return the translated referent: return rf; } /** * Returns the translation string for a deep stack reference given * the specified integer offset and node reference string, issuing * a warning in the process. */ private string DeepStackReference (int n, string reference) // throws Exception { if (!knownCasts[reference]) { knownCasts[reference] = true; Warning("Unsafe stack reference: "+n+" "+Location +" - no type cast was generated"); } return reference; } /** * Copies the remainder of the grammar file into the specified * StreamWriter, which is the generated parser file. Include * commands which start a line will be processed by copying the * contents of the included file, which may itself contain include * commands. */ internal void CopyRemainder (StreamWriter p_out) // throws IOException { string include = (char)COMMAND_START + INCLUDE_COMMAND; for (string line = rd.ReadLine(); line != null; line = rd.ReadLine()) if (line.StartsWith(include)) { string file = GetInclude(line.Substring(include.Length+1)); Say("*** Including file "+file); rd.Include(file); } else p_out.Write(line+"\n"); rd.Close(); } /** * Returns the file name to include specified in the given line. * If the first substring past white spaces is single-quoted or * double-quoted, then the name is what's between quotes (allowing * for backslashes escapes). Otherwise, it is the first substring * of the line not containing white spaces. Throws an IOException * if such a name cannot be extracted. */ private string GetInclude (string line) // throws IOException { int start = 0; int end; // Skip white spaces: while (start < line.Length && Char.IsWhiteSpace(line[start])) start++; if (start == line.Length) throw new IOException("Bad argument to include in file " + rd.FileName + ", line " + rd.LineNumber); if (line[start] != '"' && line[start] != '\'') { // Unquoted argument: extract everything until white space end = start; while (end < line.Length && !Char.IsWhiteSpace(line[end])) end++; return line.Substring(start,end-start); } // Quoted argument: extract what's between quotes char quote = line[start]; end = ++start; while (end < line.Length && (line[end] != quote || line[end-1] == '\\')) end++; if (end == line.Length) throw new IOException("Bad argument to include in file " + rd.FileName + ", line " + rd.LineNumber); return line.Substring(start,end-start); } /** * Reads and process the remainder of the grammar specification * following the rules. */ private void ReadRemainder () // throws Exception { // all characters are ordinary at this point // and the next token to be read is COMMAND_START. GetToken(); // skip the COMMAND_START readingRemainderSection = true; StringBuilder buffer = new StringBuilder(bufferSize); for (;;) { switch (GetToken()) { case StreamTokenizer.TT_EOF: if (!IsVacuous(buffer)) ancillaryClasses.Add(buffer); return; case COMMAND_START: if (st.Peek() == USEFILE_COMMAND[0]) // e.g., a 'u' must follows the COMMAND_START (%usefile) { SetSyntax(NORMAL_MODE); GetToken(); if (String.Intern(st.SValue) == USEFILE_COMMAND) { if (!IsVacuous(buffer)) { ancillaryClasses.Add(buffer); buffer = new StringBuilder(bufferSize); } DeclareList(ancillaryClasses); } else buffer.Append(st.SValue); continue; } goto default; default: buffer.Append((char)st.TType); break; } } } /** * Returns true iff the specified buffer contains only white space. */ private static bool IsVacuous (StringBuilder buffer) { for (int i = 0; iBUILDING THE GRAMMAR */ //\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\ // BUILDING THE GRAMMAR \\ //\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\ /** * Initiates bulding the grammar. */ private void BuildGrammar () // throws BadGrammarException { ReportProgress(); PreprocessGrammar(); ReportProgress(); ComputeStates(); ReportProgress(); PropagateLookaheads(); ReportProgress(); } /** ANALYZING THE GRAMMAR */ //\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\ // ANALYZING THE GRAMMAR \\ //\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\ private void PreprocessGrammar () { ComputeFirsts(); ComputeLGraph(); ComputePaths(); } /** * This computes the FIRST set for all the symbols in the grammar. * This set is defined in the Dragon Book, page 189. This method * uses the fix-point algorithm described there. It also computes * the FIRST sets for all item suffixes for all the rules. */ private void ComputeFirsts () { // Initialize the FIRST set of all terminals to contain the // terminal itself. foreach (Terminal t in terminals) { t.First = new SetOf(terminals); if (t.IsEmpty) t.IsNullable = true; else t.First.Add(t); } // Initialize the FIRST set of all non terminals to the empty set. foreach (NonTerminal n in nonterminals) n.First = new SetOf(terminals); // Close all the FIRST sets by fixpoint iteration. int index; SetOf previous; bool progress; do { progress = false; foreach (Rule rule in rules) { NonTerminal head = rule.Head; index = 1; previous = new SetOf(head.First); for (int i=1; i rule.NullableIndex) { progress = true; rule.NullableIndex = index; } if (rule.NullableIndex == rule.Sequence.Length) { progress |= !head.IsNullable; head.IsNullable = true; } } } while (progress); CheckEmptyFirsts(); // Compute FIRST sets for all item suffixes. foreach (Item item in items) item.ComputeSuffixFirst(); } /** * Checks whether some non-terminals are groundless (i.e., never * derive any terminal), reporting a warning or throwing an exception * if the grammar is processed to be non-permissible. */ private void CheckEmptyFirsts () { ArrayList groundless = new ArrayList(); foreach (NonTerminal n in nonterminals) if (n.First.IsEmpty && !n.IsNullable) groundless.Add(n); if (groundless.Count != 0) { Warning("Groundless nonterminal symbols."); if (groundless.Count > 1) GripeSome("\n*** These non-terminal symbols do "); else GripeSome("\n*** This non-terminal symbol does "); Gripe("not derive any terminal:\n"); foreach (NonTerminal n in groundless) Gripe("\t"+n); Gripe(); if (!IsPermissible) { Gripe("*** Check the grammar in File "+GrammarName+ ": correct it, or use Jacc with the -i option."); throw new BadGrammarException("Groundless nonterminal symbols"); } } } //\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\ /** * The domain of the L relation. It will contain all the nonterminals * X such that X L Y for some Y. */ private SetOf LDomain; /** * The range of the L relation. It will contain all the nonterminals * Y such that X L Y for some X. */ private SetOf LRange; /** * The roots of the L relation (i.e., LDomain-LRange). */ private SetOf LRoots; /** * The inner nodes of the L relation (i.e., in both LDomain * and LRange). */ private SetOf LInners; /** * A set containing all the L-related nonterminals in topological * order (w.r.t. a depth-first spanning tree of the L-graph). This * ordering used backwards allows to minimize the number of fixpoint * iterations when computing the transitive closure of the L relation * and in the computation of the PATH sets. */ private ArrayList LOrder = new ArrayList(); /** * This computes the "L graph" of the grammar. It is the reflexive * transitive closure of the L relation on nonterminals defined as: * A L B if A -> B ... for some production rule. It sets the LSet * field of all nonterminals to the set of their L-successors in 0 * or more steps. */ private void ComputeLGraph () { // Initialize each nonterminal's LSet to contain that nonterminal // and the set of nonterminals that are the leftmost symbols in a // rule for that nonterminal. In other words, computes the reflexive // closure of the L relation. It also initializes the paths for // each nonterminal with an empty path to itself. LDomain = new SetOf(nonterminals); LRange = new SetOf(nonterminals); foreach (NonTerminal n in nonterminals) { n.InitPaths(); n.LSet = new SetOf(nonterminals); n.LSet.Add(n); foreach (Rule rule in n.Rules) { GrammarSymbol symbol = rule.LeftMost; if (symbol is NonTerminal) { n.LSet.Add(symbol); LDomain.Add(n); LRange.Add(symbol); } } } LRoots = SetOf.Minus(LDomain,LRange); LInners = SetOf.Intersection(LDomain,LRange); // Build the (generalized) topological ordering of nonterminals // using a depth-first traversal of the L-relation graph. Note // that we first push the inner nodes into the stack and then // the roots. This guarantees that all paths through the L-graph // will be explored, including cycles unreached from any root. Stack stack = new Stack(); foreach (object o in LInners) stack.Push(o); foreach (object o in LRoots) stack.Push(o); SetOf deja_vu = new SetOf(nonterminals); while (stack.Count != 0) { NonTerminal n = (NonTerminal)stack.Pop(); if (!deja_vu[n]) { LOrder.Add(n); deja_vu[n] = true; foreach (object o in n.LSet) stack.Push(o); } } // Iterate backwards through the ordered L-related nonterminals // (i.e., from leaves to roots) closing the LSets until nothing // more is added to any LSet. bool progress; SetOf temp; do { progress = false; for (int i = LOrder.Count-1; i>=0; i--) { NonTerminal n = (NonTerminal)LOrder[i]; temp = new SetOf(n.LSet); foreach (NonTerminal symbol in n.LSet) temp.Union(symbol.LSet); progress |= !n.LSet.IsEqualTo(temp); n.LSet = temp; } } while (progress); } //\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\ /** * This computes the sets PATH(A,B) for nonterminals * A and B such that A L* B. It is * defined as the union of all FIRST(S_n...S_1) such * that A -> A_1 S_1, ..., A_(n-1) -> B S_n. * * It is computed like the L* relation but now the gathered * information is finer; i.e., it is not enough to record that * X L Y, we also need to remember through which particular * rule this relation is realized. So, we need to compute the relation * A L B R which holds iff A L B with R = A -> B S. * Thus, we cannot rely only on just a set of nonterminals like * LSet, but need, for each nonterminal A in the * domain of L, a table associating a nonterminal B to a * set of rules R such that A L B R. However, it * is more convenient to have these edges backwards for calculating * the path sets. So this builds, for each nonterminal A in * the range of L, a table associating a nonterminal B to a * set of rules R such that B L A R. These reverse * links are recorded in a nonterminal's LTable. */ private void ComputePaths () { // Initialize LTables for nonterminals in the domain of L by // creating reverse rule-labeled links from each non-terminal // N to P whenever P L N. foreach (NonTerminal n in LDomain) { foreach (Rule rule in n.Rules) { GrammarSymbol symbol = rule.LeftMost; if (symbol is NonTerminal) ((NonTerminal)symbol).AddLRule(n,rule); // Note the reverse links! } } // Iterate backwards through the topologically ordered L-related // nonterminals (i.e., from leaves to roots) building the rule paths // until none needs to be added; that is, until FIRST sets along // all paths are stationary. bool progress; ArrayList newPaths = null; do { progress = false; // For all nonterminals in the L-Graph: for (int i = LOrder.Count-1; i>=0; i--) { NonTerminal n = (NonTerminal)LOrder[i]; // If n is not a root if (!LRoots[n]) { ArrayList paths = n.Paths; newPaths = new ArrayList(); // For each path starting from n: foreach (RulePath path in paths) // For each L-predecessor pred of n: foreach (NonTerminal pred in n.LTable.Keys) // For each rule linking pred to n: foreach (Rule rule in n.GetLRules(pred)) // Record the path p.rule: newPaths.Add(path.Prepend(rule)); // Validate the new paths: foreach (RulePath newpath in newPaths) progress |= newpath.Start.AddPath(newpath); } } } while (progress); } //\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\ /** * A stack to store newly generated states before they can be * processed to generate their successors. */ private Stack new_states = new Stack(); /** * Adds the given state to the set of states, and records it * in the new_state stack. */ private void AddNewState (State state) { state.Add(); scount++; stateTable[state] = state; new_states.Push(state); } /** * Given a newly generated state, this checks whether it already * exists. If so, returns the previously recorded state. If not, * records this as a new state and returns it. */ internal State CheckNewState (State state) { State old_state = (State)stateTable[state]; if (old_state != null) return old_state; AddNewState(state); return state; } /** * The initial state of the parsing automaton. */ private State initState; /** * This computes the LALR states. It generates the states using * the LR(0) set-of-items construction (see Dragon Book, pages * 223-225), and then separates the kernel items from the set of * items set in each state and computes the PRED relation defined * in the Park-Choe-Chang article by fixpoint iteration. */ private void ComputeStates () { // Seed the initial state with the initial rule's item, // close it, and record it as a new state: initState = new State(GetItem((Rule)(_START.Rules[0]),1)); initState.Closure(); AddNewState(initState); // For each recorded new state, compute its next states: do ((State)(new_states.Pop())).ComputeNextStates(); while (new_states.Count != 0); // Compute the kernels and predecessors of each state: bool progress = false; for (int i=0; iSetOf(follows) objects. */ private ArrayList follows = new ArrayList(); internal ArrayList Follows { get { return follows; } } /** * The domain of the Follow digraph. */ private SetOf FDomain; /** * The range of the Follow digraph. */ private SetOf FRange; /** * The set of roots of the Follow digraph (i.e., FDomain * - FRange). */ private SetOf FRoots; /** * The set of inner nodes of the Follow digraph (i.e., in both * FDomain and FRange). */ private SetOf FInners; /** * This builds the Follow digraph using the paths and the state kernels. * It also generates and initializes the set of all possible follow sets. * This is Algorithm ND1 in the Park-Choe-Chang article. */ private void BuildFollowGraph () { // // Put EOI in the follow set of the user-defined start symbol in state 0: // initState.getFollow(StartSymbol()).AddFollow(_END_OF_INPUT); // Put EOI in the Follow set of the ROOTS symbol in state 0: initState.GetFollow(_ROOTS).AddFollow(_END_OF_INPUT); // For each state: foreach (State state in states) // For each kernel item in the state: foreach (Item item in state.Kernels) // If the item has a nonterminal marker, let item = A -> PB.S if (item.MarkerIsNonTerminal) { NonTerminal marker = (NonTerminal)item.Marker; // Let f be FOLLOW(state,B) Follow f = state.GetFollow(marker); // Add FIRST(S) to FOLLOW(state,B) f.AddFollows(item.SuffixFirst); // If FIRST(S) derives the empty symbol if (item.IsNullable) // For each state pred in PRED(state,P) foreach (State pred in item.Pred(state)) // For each kernel item pitem in pred foreach (Item pitem in pred.Kernels) // If pitem has a nonterminal marker - let pitem = C -> QD.T if (pitem.MarkerIsNonTerminal) { NonTerminal pmarker = (NonTerminal)pitem.Marker; NonTerminal head = item.Rule.Head; // if D L* A if (pmarker.LSet[head]) { SetOf paths = pmarker.Path(head); // Add PATH(D,A) to FOLLOW(state,B) f.AddFollows(paths); Follow pf = pred.GetFollow(pmarker); if (pmarker.PathIsNullable(head)) { pf.AddPred(f); FDomain.Add(pf); FRange.Add(f); } } } } FRoots = SetOf.Minus(FDomain,FRange); FInners = SetOf.Intersection(FDomain,FRange); } /** * A list to record the topological ordering of Follow nodes. */ private ArrayList FOrder; /** * This builds the topological ordering of a depth-first traversal of * the follow digraph. */ private void OrderFollowGraph () { Follow f; FOrder = new ArrayList(fcount); // Build the (generalized) topological ordering of Follow nodes // using a depth-first traversal of the follow digraph. Again, // we first push the inner nodes into the stack and then the // roots to guarantee that all paths through the digraph will // be explored, including cycles unreached from any root. Stack stack = new Stack(); foreach (object o in FInners) stack.Push(o); foreach (object o in FRoots) stack.Push(o); SetOf deja_vu = new SetOf(follows); while (stack.Count != 0) { f = (Follow)stack.Pop(); if (!deja_vu[f]) { FOrder.Add(f); deja_vu[f] = true; foreach (object o in f.Preds) stack.Push(o); } } } /** * This traverses the digraph using the ordering and computes the * follow sets until there is no change. */ private void CloseFollowGraph () { bool progress; Follow f; do { progress = false; for (int i = 0; i SHOWING THE GRAMMAR */ //\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\ // SHOWING THE GRAMMAR \\ //\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\ /** * Time values used to compute how long this grammar's processing takes. */ private DateTime startTime; private DateTime readingStart; private DateTime preprocessStart; private DateTime buildingStart; private DateTime propagationStart; /** * This is the total time used to process this grammar. */ private double totalTime; internal double TotalTime { get { return totalTime; } } /** * Writes a message on the ouput stream ending with a new line. */ internal static void Say (string something) { _out.Write(something); NewLine(); } /** * Writes a message on the ouput stream. */ internal static void SaySome (string something) { _out.Write(something); } /** * Writes a new line on the ouput stream. */ internal static void NewLine () { _out.Write("\n"); } /** * Writes a message on the error stream. */ internal static void Gripe (string something) { err.Write(something); err.Write("\n"); } /** * Writes a partial message on the error stream. */ internal static void GripeSome (string something) { err.Write(something); } /** * Writes a new line on the error stream. */ internal static void Gripe () { err.Write("\n"); } /** * Reports a warning on the error stream. */ internal static void Warning (string msg) { Gripe("*** WARNING: "+msg); } /** * Reports a warning on the error stream and beeps. */ public static void LoudWarning (string msg) { Misc.Beep(); Warning(msg); } /** * An indicator for reporting progress in processing this grammar. */ private int progress = 0; /** * Reports progress in processing this grammar. */ private void ReportProgress () { switch (++progress) { case 1: ReportProgress_1(); break; case 2: ReportProgress_2(); break; case 3: ReportProgress_3(); break; case 4: ReportProgress_4(); break; case 5: ReportProgress_5(); break; } } /** * This is the current instant. */ private DateTime Now { get { return DateTime.Now; } } /** * Returns the number of fractional milliseconds elapsed since * the specified time instance. */ private double Since (DateTime then) { return (Now-then).TotalMilliseconds; } private void ReportProgress_1 () { startTime = Now; if (verbosity > Verbose.QUIET) Say("*** Reading grammar in file "+GrammarName+" ... "); readingStart = Now; } private void ReportProgress_2 () { if (verbosity > Verbose.QUIET) { if (verbosity > Verbose.NORMAL) { Say("***\t... in "+Since(readingStart)+" ms"); if (verbosity > Verbose.VERBOSE) ShowRules(); } Say("*** Starting grammar analysis ... "); if (verbosity > Verbose.NORMAL) Say("***\tPreprocessing the grammar ... "); } preprocessStart = Now; } private void ReportProgress_3 () { if (verbosity > Verbose.NORMAL) { Say("***\t... in "+Since(preprocessStart)+" ms"); if (verbosity > Verbose.VERBOSE) ShowSymbols(); } if (verbosity > Verbose.NORMAL) Say("***\tBuilding canonical LR states ... "); buildingStart = Now; } private void ReportProgress_4 () { if (verbosity > Verbose.NORMAL) { Say("***\t ... in "+Since(buildingStart)+" ms"); Say("***\tPropagating lookahead symbols ... "); } propagationStart = Now; } private void ReportProgress_5 () { if (verbosity > Verbose.NORMAL) Say("***\t ... in "+ Since(propagationStart)+" ms"); if (verbosity > Verbose.QUIET) Say("*** Grammar analysis completed in "+ Since(preprocessStart)+" ms."); totalTime = Since(startTime); } /** * Displays this grammar's states. */ internal void ShowStates() { NewLine(); foreach (State state in states) state.Show(); } /** * Displays this grammar's rules. */ private void ShowRules () { Say("\nRULES:\n"); foreach (Rule rule in rules) Say(rule.ToString()); NewLine(); } /** * Displays this grammar's symbols. */ private void ShowSymbols () { Say("\nTERMINALS:\n"); Say("\t----------\t-------------\t--------"); Say("\tPRECEDENCE\tASSOCIATIVITY\tTERMINAL"); Say("\t----------\t-------------\t--------"); foreach (Terminal t in terminals) Say("["+t.Index+"]" //+"\t"+PrologPrecedence(t.Precedence) +"\t"+t.Precedence +"\t\t"+Associativity(t) +"\t\t"+t ); Say("\t----------------------------------------"); if (operators.Count != 0) { Say("\nDYNAMIC OPERATORS:\n"); Say("\t----------\t---------\t--------\t--------"); Say("\tPRECEDENCE\tSPECIFIER\tOPERATOR\tCATEGORY"); Say("\t----------\t---------\t--------\t--------"); foreach (Operator o in operators) Say("["+o.Index+"]" //+"\t"+PrologPrecedence(o.Precedence) +"\t"+o.Precedence +"\t\t"+o.Specifier +"\t\t"+o.Name +"\t\t"+o.Category.Name ); Say("\t---------------------------------------------------------"); } Say("\nNON TERMINALS:\n"); foreach (NonTerminal n in nonterminals) Say(" [" + n.Index + "]\t" + n + (n.IsNullable?"\t(nullable)":"") + "\n\tFIRST:\t " + n.First + "\n\tLSet:\t " + n.LSet + "\n" ); NewLine(); } /** * Returns a string description of the specified terminal's * associativity value. */ static string Associativity (Terminal t) { if (t.IsOperator) return "dynamic"; switch (t.Associativity) { case LEFT_ASSOCIATIVE: return "left"; case RIGHT_ASSOCIATIVE: return "right"; } return "none"; } } }