//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\ // PLEASE DO NOT EDIT WITHOUT THE EXPLICIT CONSENT OF THE AUTHOR! \\ //\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\ using System; using System.IO; using System.Text; using System.Collections; using Ilog.Language.Tools; using Ilog.Language.Util; namespace Ilog.Language.Syntax { /** * This is the class that uses the analysis provided by a grammar object * (see Grammar) and builds an LALR * parser's action and goto automata, reporting and * resolving conflicts as they may occur. It writes out a C# source file * which defines a class that extends either the class * Parsing.StaticParser or the class * Parsing.DynamicParser, both of which extend the class * Parsing.GenericParser, wherein all necessary data is * initialized for the parser's Parse() method to work. The * Parse() method assumes that the parser disposes of a class * implementing the Parsing.Tokenizer interface. This interface * defines the NextToken() method, which provides the stream of * tokens to be parsed. Such a tokenizer is necessary for the complete * generated parser to work. It typically looks like: * *

* *

   *             using Ilog.Language.Parsing;
   *
   *             public class MyTokenizer : Tokenizer
   *               {
   *                 public MyTokenizer ( ... )
   *                   { ...
   *                   }
   *                 public ParseNode NextToken()
   *                   { ...
   *                   }
   *               }
   * 
* *

* * For detailed explanations of all constructions and algorithms used * by this class, please refer to the following: *

*

* * @see Grammar * @see Options * @see Parsing.GenericParser * @see Parsing.StaticParser * @see Parsing.DynamicParser * * @version Last modified on Wed Jun 01 18:50:44 2005 by hak * @author Hassan Aït-Kaci * @copyright copy; 2000 ILOG, S.A. */ public class ParserGenerator { /** * Constucts a parser generator for the grammar specified by the * Options class. */ public ParserGenerator () { parserFile = parserPrefix+".cs"; BuildParser(); } /** * The grammar object. */ private Grammar grammar; /** * The output stream for messages while generating the parser. * Defaults to System.Console.Out. */ private TextWriter _out = Options.Out; /** * The output stream for messages while generating the parser. * Defaults to System.Console.Out. */ private TextWriter err = Options.Error; /** * The verbosity level for reporting progress messages. */ private int verbosity = Options.Verbosity; /** * The prefix of the generated parser class file name. Defaults to * "Parser". */ private string parserPrefix = Options.ParserPrefix; /** * The name of the generated parser class file name. Since it is a * C# program, its extension is ".cs". */ private string parserFile; /** * The output stream for the generated parser files. It is set by the * SetOuput method. */ private StreamWriter p_out; /** * Time values used to compute how long this parser generation takes. */ private DateTime startTime; private DateTime tableBuildingStart; private DateTime compressionStart; private double totalTime; /** * The default action. */ private string defaultAction = "_head_ = _head_.Copy(Node(_rule_,1));"; /** * Maximum index of parser table per initialization method. This is * to break down large size table initialization methods to help the * C# compiler digest them. Defaults to 1000 statements. */ private int initMethodSize = Options.InitMethodSize; /** * Continuation method name for parser table initialization. It is * used when the number of instructions in a method initializing a * table exceeds initMethodSize. */ private string initContinuation = null; /** * Continuation counter for parser table initialization continuations. */ private int initContinuationCount; /** * Test and (maybe) generate a continuation for a method initializing * a parser table at index i. */ private void TestInitContinuation (int i) // throws IOException { if (i>0 && i%initMethodSize == 0) { WL(); WL(" "+initContinuation+"_"+(++initContinuationCount)+"();"); WL(" }"); WL(); WL(" static void "+initContinuation+"_"+initContinuationCount+" ()"); WL(" {"); } } /***/ //\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\ // BUILDING THE PARSER \\ //\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\ /** * Builds the parser tables using the grammar's analysis data and * writes the parser file(s). */ private void BuildParser () { try { grammar = new Grammar(); if (!Options.DocOnly) { BuildTables(); WriteParser(); } } catch (Exception e) { Grammar.Warning("Parser generation aborted!"); if (!(e is BadGrammarException)) { WEL(e.ToString()); } Environment.Exit(1); // exit with non-zero status code } finally { Options.Out.Flush(); Options.Out.Close(); } } /** * Set to true if there are action conflicts. */ private bool conflict = false; /** * Number of unresolved S/R conflicts. */ private int usrCount; /** * Number of resolved S/R conflicts. */ private int rsrCount; /** * Number of R/R conflicts. */ private int rrCount; /** * This builds the action and goto tables from the information in * each state. This is a straightforward procedure (see, * e.g., Algorithm 4.10, Page 234, of the Dragon Book). */ private void BuildTables () { ReportProgress_1(); new Action(Action.ERROR).Add(); acount++; new Action(Action.ACCEPT).Add(); acount++; foreach (State state in grammar.States) foreach (StateTransition transition in state.Transitions.Values) { GrammarSymbol symbol = transition.Symbol; if (symbol is NonTerminal) state.SetGoto((NonTerminal)symbol,transition.Next); else if (symbol.IsEmpty) foreach (Item item in transition.Items) if (item.Rule.Head.IsSTART) state.SetAction(Grammar.END_OF_INPUT,AcceptAction); else foreach (Terminal lookahead in item.GetLookaheads(state)) conflict |= CheckConflict(state, lookahead, new Action(Action.REDUCE, item.Rule.Index)); else conflict |= CheckConflict(state, (Terminal)symbol, new Action(Action.SHIFT, transition.Next.Index)); } ReportProgress_2(); CompressTables(); ReportProgress_3(); } /** The set of action tables. */ private ArrayList ac_tables = new ArrayList(); /** The number of action tables. */ private int ac_count = 0; /** The number of eliminated action tables. */ private int ac_compression = 0; /** The set of goto tables. */ private ArrayList gt_tables = new ArrayList(); /** The number of goto tables. */ private int gt_count = 0; /** The number of eliminated goto tables. */ private int gt_compression = 0; /** * Compresses the parsing tables eliminiting empty and redundant * rows. */ private void CompressTables () { int index; ReportProgress_4(); foreach (State state in grammar.States) { index = ac_tables.IndexOf(state.ActionTable); if (index < 0) { ac_tables.Add(state.ActionTable); state.ActionTableIndex = ac_count++; } else { state.ActionTable = (Map)ac_tables[index]; state.ActionTableIndex = index; ac_compression++; } index = gt_tables.IndexOf(state.GotoTable); if (index < 0) { gt_tables.Add(state.GotoTable); state.GotoTableIndex = gt_count++; } else { state.GotoTable = (Map)gt_tables[index]; state.GotoTableIndex = index; gt_compression++; } } ReportProgress_5(); } /** * The set of actions. */ private static ArrayList actions = new ArrayList(); internal static ArrayList Actions { get { return actions; } } /** * A hash table for efficient retrieval of actions. */ private Hashtable actionTable = new Hashtable(); /** * The number of actions. */ private int acount = 0; /** * This is the canonical ERROR action. */ private Action ErrorAction { get { return (Action)actions[0]; } } /** * Returns the canonical ACCEPT action. */ private Action AcceptAction { get { return (Action)actions[1]; } } /** * Returns the canonical representative for this action. */ private Action CheckAction (Action action) { Action a = (Action)actionTable[action]; if (a != null) return a; action.Add(); acount++; actionTable[action] = action; return action; } /** * Set to true whenever a conflict is unresolved. */ private bool conflictIsUnresolved = false; /** * Checks if an action already exists for this symbol in this * state. If no previous action exists, this simply installs * action as a static action for this symbol in this * state. Otherwise, let old be the previous action. Note * that action, the new action to be checked, is always a * simple action - i.e., it is never a dynamic nor a choice * action. On the other hand, old may be of any type, * including DYNAMIC or CHOICE. * *

* * Let us first consider the simpler case where old is * not a choice action. Then, what must be done depends on * what the case is among six to consider: * *

    *
  1. * old is DYNAMIC and symbol or action * involves an operator: * * *
  2. * old is DYNAMIC and neither symbol nor * action involves an operator: * * *
  3. * old is an operator action and neither symbol nor action * involves an operator: *
  4. * old is an operator action and symbol or action involves * an operator: *
  5. * old is not an operator action and symbol or * action involves an operator: * * *
  6. * old is not an operator action and neither symbol nor * action involves an operator: * *
* *

* * Let us consider now the general case where the old action may * be a CHOICE action. Note that this affects only cases * (5) and (6) above. This gives two additional cases: * *

    * *
  1. * old is CHOICE and symbol or * action involves an operator: * * *
  2. * old is CHOICE and neither symbol nor * action involves an operator: * * *
* */ private bool CheckConflict (State state, Terminal symbol, Action action) { Action old = state.GetAction(symbol); if (old == null) { state.SetAction(symbol,CheckAction(action)); return false; } if (old.Type == Action.DYNAMIC) { ArrayList actions = (ArrayList)state.DynamicActions[old.Info]; if (symbol.IsOperator || IsOperator(action)) { // Case 1 if (!actions.Contains(action)) actions.Add(CheckAction(action)); } else { // Case 2 int i = FindContender(actions); if (i == -1) actions.Add(CheckAction(action)); else { Action contender = (Action)actions[i]; Action choice = ResolveConflict(contender,action,symbol); if (conflictIsUnresolved && Options.AllowChoiceActions) { actions.Add(CheckAction(action)); old.Type = Action.CHOICE; choice = old; conflictIsUnresolved = false; } else if (choice == action) actions[i] = action; else contender = action; TallyConflict(choice,contender,state,symbol); } } } else // old is not a dynamic action if (old.Type != Action.CHOICE) if (IsOperator(old) || symbol.IsOperator || IsOperator(action)) { // Cases 3, 4, and 5 ArrayList actions = new ArrayList(); actions.Add(old); actions.Add(CheckAction(action)); int index = state.DynamicActions.IndexOf(actions); if (index == -1) { index = state.DynamicActions.Count; state.DynamicActions.Add(actions); } state.SetAction(symbol, CheckAction(new Action(Action.DYNAMIC,index))); } else { // Case 6 Action choice = ResolveConflict(old,action,symbol); if (conflictIsUnresolved && Options.AllowChoiceActions) { ArrayList actions = new ArrayList(2); actions.Add(old); actions.Add(CheckAction(action)); int index = state.DynamicActions.IndexOf(actions); if (index == -1) { index = state.DynamicActions.Count; state.DynamicActions.Add(actions); } choice = new Action(Action.CHOICE,index); state.SetAction(symbol,CheckAction(choice)); conflictIsUnresolved = false; } else if (choice == action) state.SetAction(symbol,CheckAction(choice)); else old = action; TallyConflict(choice,old,state,symbol); } else // old is a choice action { ArrayList actions = (ArrayList)state.DynamicActions[old.Info]; if (symbol.IsOperator || IsOperator(action)) { // case 7 if (!actions.Contains(action)) actions.Add(CheckAction(action)); } else { // case 8 bool winAll = true; foreach (Action option in actions) if (IsOperator(option) || option == ResolveConflict(option,action,symbol)) { winAll = false; break; } if (winAll) state.SetAction(symbol,CheckAction(action)); else if (!actions.Contains(action)) actions.Add(CheckAction(action)); } } return conflictIsUnresolved; } /** * Records a message for reporting a conflict between the two * specified actions, in the given state, for the given symbol, as * appropriate according to optional conditions. */ private void TallyConflict (Action choice, Action contender, State state, Terminal symbol) { bool reportConflict = conflictIsUnresolved && !Options.AllowChoiceActions; string conflict = choice.Conflict(contender) + " conflict: choosing " + choice + "\tover " + contender; state.AddConflict((reportConflict ? "Unresolved " : "Resolved ") + conflict + ", \ton input "+symbol); if (reportConflict && verbosity > Verbose.NORMAL) ML("*** Unresolved "+conflict+",\tin state "+state +", \ton input "+symbol); } /** * Returns true iff the action is a reduction with a rule tagged * by a dynamic operator. */ private bool IsOperator (Action action) { return action.Type == Action.REDUCE && grammar.GetRule(action.Info).IsOperator; } /** * Returns the position of the non-operator action in this ArrayList * of dynamic actions, or -1 if none such exists. */ private int FindContender (ArrayList actions) { for (int i=0; itag. * A rule's tag is the last terminal that occurs in its body, or specified by * the %prec command. If a rule has no explicit tag, then it * defaults to the empty symbol (i.e., Grammar.EMPTY), whose * precedence is Grammar.MIN_PRECEDENCE and associativity is * Grammar.NON_ASSOCIATIVE. * *

* * N.B.: By default, R/R conflicts are resolved as yacc would * - namely, choosing the rule that appears first in the grammar. The * disadvantage of this is that rule order becomes significant. Thus, * another mode may be used that allows the rule with higher tag * precedence to win, and in case of equality, the earlier rule * wins. With this, one can use the %prec command to control * R/R conflict resolutions explicitly. The class Options * offers the static property Options.ResolveRRsWithPrecedence * to get and set for this convenience. Caveat Emptor: * Using this mode to resolve R/R conflict may interfere in subtle * ways with S/R conflict resolution which will quietly use the * specified rule precedence to choose between a shift and a reduce * action without reporting a conflict! One must thus be * acutely aware of all such interplays as they may give surprising * results. * *

* * Only implicitly resolved conflicts are reported as warnings - * namely, S/R conflicts that cannnot be resolved thanks to terminal * symbol's precedence and associativiy, and R/R conflicts resolved * in favor of an earlier rule (that is, if the yacc resolution mode * is used, or if not and the rules have equal precedences). * * @returns the winning action * @see Options * */ Action ResolveConflict (Action old, Action action, Terminal t) { conflictIsUnresolved = false; if (old.Type == Action.REDUCE) { if (action.Type == Action.REDUCE) { int old_prec = grammar.GetRule(old.Info).Precedence; int new_prec = grammar.GetRule(action.Info).Precedence; if (!Options.ResolveRRsWithPrecedence || old_prec == new_prec) { conflictIsUnresolved = true; rrCount++; // favor the earlier rule: return (old.Info < action.Info) ? old : action; } // favor the rule with higher precedence: return (old_prec > new_prec) ? old : action; } // So the new action is a shift: Rule r = grammar.GetRule(old.Info); rsrCount++; if (r.Precedence > t.Precedence) return old; // favor reduction if (r.Precedence < t.Precedence) return action; // favor shifting if (r.Associativity == Grammar.LEFT_ASSOCIATIVE) return old; // favor reduction if (r.Associativity == Grammar.RIGHT_ASSOCIATIVE) return action; // favor shifting if ((t.Associativity == Grammar.NON_ASSOCIATIVE) && (t == r.Tag)) { if (verbosity > Verbose.NORMAL) Grammar.Warning("Rule " + r.Index + " may compose the non-associative symbol: " + t); return ErrorAction; } rsrCount--; usrCount++; conflictIsUnresolved = true; return action; // otherwise, favor shifting } return ResolveConflict(action,old,t); } /***/ //\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\ // WRITING THE PARSER \\ //\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\ /** * Outputs a newline in the generated parser file. */ private void WL () // throws IOException { p_out.Write('\n'); } /** * Outputs the specified string into the generated parser file ending * with a new line character. */ private void WL (string line) // throws IOException { p_out.Write(line+"\n"); } /** * Outputs the specified string into the generated parser file. */ private void W (string s) // throws IOException { p_out.Write(s); } /** * Outputs the specified character into the generated parser file. */ private void W (char c) // throws IOException { p_out.Write(c); } /** * Outputs a message on the error message stream. */ private void WEL (string msg) { err.Write(msg); err.Write("\n"); } /** * Outputs a message on the progress message output stream, ending * with a newline character. */ private void ML (string msg) { _out.Write(msg); _out.Write("\n"); } /** * Outputs a message on the progress message output stream. */ private void M (string msg) { _out.Write(msg); } /** * Sets the generated parser file to the specified file name. */ private void SetOutput (string fileName) // throws IOException { p_out = new StreamWriter(fileName); } /** * Outputs the preamble of the generated parser code. */ private void Preamble () // throws IOException { WL("// *******************************************************************"); WL("// This file has been automatically generated from the grammar in file"); WL("// "+Options.GrammarName+" by Ilog.Language.Syntax.ParserGenerator on"); WL("// "+DateTime.Now.ToLongDateString()+", at "+DateTime.Now.ToLongTimeString() +" --- !!! PLEASE DO NO EDIT !!!"); WL("// *******************************************************************"); WL(); if (grammar.PackageName != null) WL("namespace "+grammar.PackageName+"\n{"); WL("using System.IO;"); if (grammar.IsDynamic) WL("using Ilog.Language.Util;"); WL("using Ilog.Language.Parsing;"); foreach (object import in grammar.Imports) WL("using "+import+";"); WL(); } /** * Outputs each public class in a file on its own. NB: This is not * really needed in C# and is legacy from the Java version of this * program. Indeed, Java demands that there be no more than one * public class in each file, but not C# where we could output all * classes in the same file. But, since there is no harm using many * files in C#, we might as well keep it. */ private void PublicClasses () // throws IOException { foreach (DictionaryEntry entry in grammar.PublicClasses) { string className = (string)entry.Key; string classDef = entry.Value.ToString(); SetOutput(className+".cs"); Preamble(); WL(classDef); p_out.Close(); } } /** * The argument of a %usefile command. */ private string usefile; /** * Dumps the code recorded in the specified list into the generated * parser file, with the specifed banner string as a heading. */ private void DumpCode (ArrayList code, string banner) // throws IOException { if (code.Count != 0) { WL(banner); foreach (object element in code) { if (element is string) { usefile = (string)element; try { StreamReader rd = new StreamReader(usefile); ReportProgress_6(); WL("\n /* START OF CONTENTS OF FILE: "+usefile+" */\n"); for (string line = rd.ReadLine(); line != null; line = rd.ReadLine()) WL(line); rd.Close(); ReportProgress_7(); WL(" /* END OF CONTENTS OF FILE: "+usefile+" */\n"); } catch (FileNotFoundException nofile) { Grammar.Warning("Bad '%usefile' argument: "+nofile.Message); } catch (IOException io) { Grammar.Warning("Something wrong happened while reading: " +usefile+": "+io); } continue; } WL(element.ToString()); WL(); } } } /** * Writes the parser file. */ private void WriteParser () { if (Options.NoParser) return; ReportProgress_8(); Terminal t; NonTerminal n; Rule r; Action a; Map m; State s; string superClass = (grammar.IsDynamic ? "Dynamic" : "Static"); try { SetOutput(parserFile); Preamble(); WL(); WL("/* ************ */"); WL("/* PARSER CLASS */"); WL("/* ************ */"); WL(); WL(grammar.AccessTag+"class "+parserPrefix +" : "+superClass+"Parser\n{"); WL(" /* ************************ */"); WL(" /* PARSER CLASS CONSTRUCTOR */"); WL(" /* ************************ */"); WL(); WL(" public "+parserPrefix+" (Tokenizer t)\n {\n input = t;"); if (grammar.IsDynamic) { WL(" choiceStack = new FiniteStack("+Options.ChoiceHistory+");"); WL(" trailStack = new FiniteStack("+Options.TrailHistory+");"); WL(" resolveRRsWithPrecedence = "+ (Options.ResolveRRsWithPrecedence ? "true" : "false")+";"); } if (grammar.AdmitsOperators) { WL(); WL(" /* **************** */"); WL(" /* OPERATOR SYMBOLS */"); WL(" /* **************** */"); WL(); WL(" operators = new ArrayList(" +grammar.Operators.Count +");\n"); for (int i=0; i 0); M("\n["+i+"]"); for (j = 0; j 0) M("["+i+"]"); for (j = 0; j { "); for (int k = 0; k Verbose.QUIET) ML("*** Building parsing tables ... "); tableBuildingStart = Now; } private void ReportProgress_2 () { if (verbosity > Verbose.NORMAL) ML("***\t... in " + Since(tableBuildingStart) + " ms"); if (verbosity > Verbose.DETAILED) grammar.ShowStates(); if (!Options.AllowChoiceActions && usrCount + rrCount > 0) { string msg = "unresolved conflicts: "; if (usrCount > 0) { msg += usrCount + " shift/reduce"; if (rrCount >0) msg += "; "; } if (rrCount > 0) msg += rrCount + " reduce/reduce"; Grammar.Warning(msg); } } private void ReportProgress_3 () { if (verbosity > Verbose.DETAILED) ShowTables(); } private void ReportProgress_4 () { if (verbosity > Verbose.NORMAL) ML("*** Compressing parsing tables ... "); compressionStart = Now; } private void ReportProgress_5 () { if (verbosity > Verbose.NORMAL) { ML("***\t" + ac_compression + " rows eliminated in action table"); ML("***\t" + gt_compression + " rows eliminated in goto table"); ML("*** Table compression completed in " + Since(compressionStart) + " ms"); } } private void ReportProgress_6 () { if (verbosity > Verbose.QUIET) M("***\tIncluding file "+usefile+"... "); } private void ReportProgress_7 () { if (verbosity > Verbose.QUIET) ML("Done."); } private void ReportProgress_8 () { if (verbosity > Verbose.QUIET) ML("*** Writing parser file "+parserFile); } private void ReportProgress_9 () { totalTime = Since(startTime); if (verbosity > Verbose.QUIET) { ML("*** Parser generation completed in " + totalTime + " ms."); ML("*** Total processing time: " + (grammar.TotalTime+totalTime) + " ms.\n"); } } } }