//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\ // PLEASE DO NOT EDIT WITHOUT THE EXPLICIT CONSENT OF THE AUTHOR! \\ //\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\ using System; using System.IO; using System.Text; using System.Collections; using Ilog.Language.IO; using Ilog.Language.Util; using Ilog.Language.Tools; namespace Ilog.Language.Syntax { using Stack = System.Collections.Stack; /** * This defines a class for processing a Jacc grammar * contained in a file and specified using the popular UNIX * yacc syntax for building a table-driven LR-parser. The * yacc syntax is adapted to handle Java code and extended with * additionnal functionalities. * *
An instance of this class can then be used to generate a C# * program implementing an LR parser for the specified grammar. * *
* It provides methods for: *
The format of the grammar file is essentially the same as that * required by yacc, with some minor differences, and a few * additional features. Not using the additional features makes it * essentially similar to the yacc format. For more details, * please read the description of the Jacc system and * grammar format assumed by this class and the rest of the * Ilog.Language.Syntax classes. * *
For detailed explanations of most constructions and algorithms * used by this package, please refer to the following: * *
*
\n"); // add a paragraph break
st.NextToken(); // skip the slash
if (docSymbol != null)
{
docSymbol.AddDoc(doc);
docSymbol = null;
doc = null;
return;
}
if (mainDocSection)
{
if (mainDoc == null)
mainDoc = new StringBuilder();
mainDoc.Append(doc);
}
else
{
if (this.doc == null)
this.doc = new StringBuilder();
this.doc.Append(doc);
}
}
/**
* Skips a C-style comment.
*/
private void SkipSlashStarComment () // throws IOException
{
st.PushBack();
SetSyntax(RAW_MODE);
while (!(st.NextToken() == '*' && st.Peek() == '/'));
st.SkipChar(); // skip slash
}
/**
* [Re]Initializes the configuration of the StreamTokenizer
* reading the grammar to its default settings.
*/
private void SetGrammarSyntax ()
{
st.SetDefaultSyntax();
st.QuoteChar(SINGLE_QUOTE);
st.QuoteChar(DOUBLE_QUOTE);
st.SlashStarComments(false);
}
/**
* The reader used to read the grammar files.
*/
private IncludeReader rd;
/**
* This is a string recording the current location in the grammar.
*/
private string Location
{
get
{
return "(file " + rd.FileName + ", line " + rd.LineNumber + ")";
}
}
/**
* This is the main reading method.
*/
private void ReadGrammar () // throws BadGrammarException
{
ReportProgress();
try
{
st = new StreamTokenizer(rd = new IncludeReader(GrammarName));
}
catch (Exception e)
{
throw new BadGrammarException("(file not found: "+e.Message+")");
}
SetSyntax(NORMAL_MODE);
try
{
ReadDeclarations();
ReadRules();
}
catch (Exception e)
{
throw new BadGrammarException(Location + " - " + e.Message);
}
}
/**
* Reads and processes the grammar's declarations.
*/
private void ReadDeclarations () // throws Exception
{
while (ReadDeclaration());
}
/**
* Reads and processes a grammar declaration; returns false when
* there is no more declaration to process, true otherwise.
*/
private bool ReadDeclaration () // throws Exception
{
switch (GetToken())
{
case COMMAND_START:
return ExecuteNextCommand();
case StreamTokenizer.TT_EOF:
throw new Exception("Premature end of file");
case StreamTokenizer.TT_EOL:
return ReadDeclaration();
default:
throw new Exception
("Ill-formed syntax in grammar declarations section - "+Location);
}
}
/**
* Reads and processes a command and its arguments; returns false
* when there is no more declaration to process, true otherwise.
*/
private bool ExecuteNextCommand () // throws Exception
{
string command = ReadCommand();
switch (command)
{
case SECTION_COMMAND:
mainDocSection = false;
return false;
case START_COMMAND:
DeclareStart();
break;
case ROOT_COMMAND:
DeclareRoot();
break;
case LEFT_COMMAND:
DeclareLeftAssoc();
break;
case RIGHT_COMMAND:
DeclareRightAssoc();
break;
case TOKEN_COMMAND: // non-associative by default
case NONASSOC_COMMAND:
DeclareNonassoc();
break;
case LBRACE_COMMAND:
codeSection = true;
parserDeclarations.Add(Verbatim((char)COMMAND_START + RBRACE_COMMAND));
codeSection = false;
break;
case USEFILE_COMMAND:
DeclareList(parserDeclarations);
break;
case PACKAGE_COMMAND:
DeclarePackage();
break;
case IMPORT_COMMAND:
DeclareList(imports);
break;
case PRECSTEP_COMMAND:
DeclarePrecstep();
break;
case NODECLASS_COMMAND:
DeclareNodeClass();
break;
case NODEPREFIX_COMMAND:
DeclareNodePrefix();
break;
case NODESUFFIX_COMMAND:
DeclareNodeSuffix();
break;
case DOC_COMMAND:
ProcessDoc();
break;
case INCLUDE_COMMAND:
ProcessInclude();
break;
case DYNAMIC_COMMAND:
DeclareDynamic();
break;
case ACCESS_COMMAND:
DeclareAccess();
break;
default:
if (operatorCategoryTable[command] != null)
{
operatorCategory = GetNonTerminal(command);
DeclareOperator();
break;
}
throw new Exception("Unknown command: " + COMMAND_START + command);
}
return true;
}
/**
* Reads and returns a grammar command.
*/
private string ReadCommand () // throws Exception
{
CheckCommandSyntax();
string command = null;
if (GetToken() == StreamTokenizer.TT_WORD)
command = String.Intern(st.SValue);
else
{
command = String.Intern(((char)st.TType).ToString());
if (command != LBRACE_COMMAND && command != SECTION_COMMAND)
throw new Exception("Ill-formed grammar command: "+command);
}
return command;
}
/**
* Checks whether what follows is a legitimate start of command.
* Throws an exception if it is not.
*/
private void CheckCommandSyntax () // throws Exception
{
if (!ValidCommandStartCharacter(st.Peek()))
throw new Exception("Invalid command start: " +
(char)COMMAND_START + (char)st.Peek() + "...");
}
/**
* Checks whether what follows is a legitimate start of command.
* Returns true iff it is ok.
*/
private bool ValidCommandStartCharacter (int c)
{
foreach (String com in commands)
if ((char)c == com[0])
return true;
foreach (String op in operatorCategoryTable.Keys)
if ((char)c == op[0])
return true;
return false;
}
/**
* Declares the next symbol to be read as the grammar's start symbol.
*/
private void DeclareStart () // throws Exception
{
if (StartIsDefined)
throw new Exception("Start symbol ("+startSymbol+") cannot be redefined");
if (GetToken() != StreamTokenizer.TT_WORD)
throw new Exception("Ill-formed start symbol declaration");
GrammarSymbol symbol = GetTerminal(st.SValue);
if (symbol != null)
throw new Exception("Token "+symbol+" cannot be the start symbol!");
symbol = GetNonTerminal(st.SValue);
if (symbol != null)
if (((NonTerminal)symbol).IsStart)
throw new Exception("Symbol "+symbol+" is already the start symbol!");
else
DefineStart((NonTerminal)symbol);
else
DefineStart(NewNonTerminal(st.SValue));
}
/**
* Defines the specified symbol as the grammar's user-supplied start symbol.
*/
private void DefineStart (NonTerminal symbol)
{
(startSymbol = symbol).MakeStart();
GrammarSymbol[] seq = new GrammarSymbol[] {_ROOTS, startSymbol};
new Rule(seq);
if (!symbol.IsRoot)
DefineRoot(symbol);
}
/**
* Declares the next symbol to be read as a root symbol.
*/
private void DeclareRoot () // throws Exception
{
if (GetToken() != StreamTokenizer.TT_WORD)
throw new Exception("Ill-formed root symbol declaration");
GrammarSymbol symbol = GetTerminal(st.SValue);
if (symbol != null)
throw new Exception("Token "+symbol+" cannot be a root symbol!");
symbol = GetNonTerminal(st.SValue);
if (symbol != null)
if (((NonTerminal)symbol).IsRoot)
throw new Exception("Symbol "+symbol+" is already a root symbol!");
else
DefineRoot((NonTerminal)symbol);
else
DefineRoot(NewNonTerminal(st.SValue));
}
/**
* Defines the specified symbol as a grammar root symbol.
*/
private void DefineRoot (NonTerminal symbol)
{
symbol.MakeRoot();
Terminal root_switch = NewTerminal("$"+symbol.Name+"_switch$");
roots[symbol] = root_switch;
GrammarSymbol[] seq = new GrammarSymbol[] {_ROOTS, root_switch, symbol};
new Rule(seq,"_head_ = _head_.Copy(Node(_rule_,2));");
if (FIRST_ROOT == null)
FIRST_ROOT = symbol;
}
/**
* Defines every string in the specified list as a token with the
* specified associativity.
*/
private void DefineTokens (ArrayList tokens, int associativity) // throws Exception
{
foreach (string symbol in tokens)
DefineToken(symbol,associativity);
}
/**
* Defines the specified string as a token with the specified
* associativity.
*/
private void DefineToken (string token, int associativity) // throws Exception
{
GrammarSymbol symbol = GetNonTerminal(token);
if (symbol != null)
throw new Exception("Nonterminal "+symbol+" cannot be a token!");
symbol = GetTerminal(token);
if (symbol != null)
throw new Exception("Duplicate token declaration: "+symbol);
symbol = NewTerminal(token);
((Terminal)symbol).Precedence = currentPrecedence;
((Terminal)symbol).Associativity = associativity;
}
/**
* Reads all the tokens in the current line and returns an ArrayList
* containing them.
*/
private ArrayList TokensInLine () // throws Exception
{
ArrayList tokens = new ArrayList(10);
SetSyntax(EOL_MODE);
currentPrecedence = -1;
while (GetToken() != StreamTokenizer.TT_EOL)
{
if (st.TType == '\\' && st.Peek() == '\n')
{
st.SkipChar();
GetToken();
}
switch (st.TType)
{
case SINGLE_QUOTE:
case DOUBLE_QUOTE:
case StreamTokenizer.TT_WORD:
tokens.Add(st.SValue);
break;
case StreamTokenizer.TT_NUMBER:
if (currentPrecedence != -1)
throw new Exception
("Duplicate precedence specified in token declaration");
currentPrecedence =
CheckPrecedenceLevel(PrologPrecedence((int)st.NValue));
break;
case StreamTokenizer.TT_EOF:
throw new Exception("Premature end of file");
default:
tokens.Add(((char)st.TType).ToString());
break;
}
}
if (tokens.Count == 0)
throw new Exception("No token found in token declaration");
if (currentPrecedence == -1)
currentPrecedence = NextPrecedenceLevel();
SetSyntax(NO_EOL_MODE);
return tokens;
}
/**
* Increases the current precedence level according to the current
* precedence increment.
*/
private int NextPrecedenceLevel ()
{
int p = precedenceLevel;
precedenceLevel += precedenceIncrement;
return CheckPrecedenceLevel(p);
}
/**
* Declares the tokens in the current line to be left-associative.
*/
private void DeclareLeftAssoc () // throws Exception
{
DefineTokens(TokensInLine(),LEFT_ASSOCIATIVE);
}
/**
* Declares the tokens in the current line to be right-associative.
*/
private void DeclareRightAssoc () // throws Exception
{
DefineTokens(TokensInLine(),RIGHT_ASSOCIATIVE);
}
/**
* Declares the tokens in the current line to be non-associative.
*/
private void DeclareNonassoc () // throws Exception
{
DefineTokens(TokensInLine(),NON_ASSOCIATIVE);
}
/**
* Declares the command argument to be the prefix to add to
* node class names for this grammar.
*/
private void DeclareNodePrefix () // throws Exception
{
SetSyntax(EOL_MODE);
switch (GetToken())
{
case StreamTokenizer.TT_WORD:
case CC.SQT: case CC.DQT:
nodePrefix = String.Intern(st.SValue);
break;
default:
throw new Exception("Missing node prefix specifier");
}
SetSyntax(NO_EOL_MODE);
}
/**
* Declares the command argument to be the suffix to add to
* node class names for this grammar.
*/
private void DeclareNodeSuffix () // throws Exception
{
SetSyntax(EOL_MODE);
switch (GetToken())
{
case StreamTokenizer.TT_WORD:
case CC.SQT: case CC.DQT:
nodeSuffix = String.Intern(st.SValue);
break;
default:
throw new Exception("Missing node suffix specifier");
}
SetSyntax(NO_EOL_MODE);
}
/**
* Processes a %doc command, whose argument is assumed
* to be a nonterminal symbol.
*/
private void ProcessDoc () // throws Exception
{
SetSyntax(EOL_MODE);
switch (GetToken())
{
case SINGLE_QUOTE:
case DOUBLE_QUOTE:
docSymbol = GetTerminal(st.SValue);
if (docSymbol == null)
docSymbol = NewTerminal(st.SValue);
break;
case StreamTokenizer.TT_WORD:
docSymbol = GetTerminal(st.SValue);
if (docSymbol == null)
{
docSymbol = GetNonTerminal(st.SValue);
if (docSymbol != null)
docSymbol = NewTerminal(st.SValue);
}
break;
default:
throw new Exception("Missing symbol in doc command");
}
SetSyntax(NO_EOL_MODE);
}
/**
* Declares the access modifier for the parser class.
*/
private void DeclareAccess () // throws Exception
{
SetSyntax(EOL_MODE);
switch (GetToken())
{
case StreamTokenizer.TT_WORD:
accessTag = String.Intern(st.SValue);
if (!(accessTag == "public"
|| accessTag == "internal"
|| accessTag == "private"
|| accessTag == "protected"))
throw new Exception("Bad access tag value ("+accessTag+")");
accessTag += " ";
break;
default:
throw new Exception("Missing tag in access command");
}
SetSyntax(NO_EOL_MODE);
}
/** The set of declared node classe. */
private SetOf nodeClasses;
/** The set of extended node classe. */
private SetOf superNodeClasses;
/**
* Declares the next string to be read as a node class.
*/
private void DeclareNodeClass () // throws Exception
{
if (GetToken() != StreamTokenizer.TT_WORD)
throw new Exception("Ill-formed node class declaration");
string publInfo = "";
string word = String.Intern(st.SValue);
if (word == "public")
{
publInfo = "public";
if (GetToken() != StreamTokenizer.TT_WORD)
throw new Exception("Ill-formed nodeclass declaration");
}
word = String.Intern(st.SValue);
if (GetTerminal(word) != null)
throw new Exception("Can't define a node class for a terminal ("+word+")");
NonTerminal symbol = GetNonTerminal(word);
if (symbol != null)
{
if (nodeClasses.Contains(symbol))
throw new Exception("Duplicate node class declaration for symbol "+symbol);
}
else
symbol = NewNonTerminal(word);
nodeClasses.Add(symbol);
NonTerminal superSymbol = null;
if (GetToken() == StreamTokenizer.TT_WORD
&& (String.Intern(st.SValue) == "extends"))
{
if (GetToken() != StreamTokenizer.TT_WORD
|| String.Intern(st.SValue) == "implements"
|| String.Intern(st.SValue) == "locates")
throw new Exception
("Error in node class declaration: identifier missing after 'extends'");
if (GetTerminal(st.SValue) != null)
throw new Exception("Can't define node class " + symbol +
" as a subclass of a terminal (" + st.SValue + ")");
if ((superSymbol = GetNonTerminal(st.SValue)) == null)
superSymbol = NewNonTerminal(st.SValue);
superNodeClasses.Add(superSymbol);
}
else
st.PushBack();
string implInfo = "";
if (GetToken() == StreamTokenizer.TT_WORD
&& String.Intern(st.SValue) == "implements")
{
implInfo = ", ";
if (GetToken() != StreamTokenizer.TT_WORD
|| String.Intern(st.SValue) == "locates")
throw new Exception
("Error in nodeclass declaration: identifier missing after 'implements'");
do
{
implInfo += st.SValue;
if (GetToken() == COMMA)
implInfo += ", ";
else
break;
}
while (GetToken() == StreamTokenizer.TT_WORD
&& String.Intern(st.SValue) != "locates");
}
st.PushBack();
string locateInfo = null;
if (GetToken() == StreamTokenizer.TT_WORD
&& String.Intern(st.SValue) == "locates")
{
if (GetToken() != StreamTokenizer.TT_WORD)
throw new Exception
("Error in nodeclass declaration: identifier missing after 'locates'");
locateInfo = st.SValue;
}
else
st.PushBack();
if (GetToken() != BEGIN_SCOPE)
throw new Exception
("Missing '" + (char)BEGIN_SCOPE + "' in nodeclass declaration");
string body = BracedString();
symbol.NodeType = nodePrefix + symbol.Name + nodeSuffix;
string superClass = superSymbol == null
? "ParseNode"
: nodePrefix + superSymbol.Name + nodeSuffix;
StringBuilder nodeClass = new StringBuilder(bufferSize);
nodeClass
.Append(publInfo)
.Append(publInfo.Length > 0 ? " " : "")
.Append("class ")
.Append(symbol.NodeType)
.Append(" : "+superClass+" ")
.Append(implInfo)
.Append("\n{\n ")
.Append(publInfo)
.Append(publInfo.Length > 0 ? " " : "")
.Append(symbol.NodeType)
.Append(" (ParseNode node) : base(node) {}\n\n ")
.Append(body.Trim());
if (locateInfo != null)
nodeClass
.Append("\n")
.Append("\n public void Locate ()")
.Append("\n {")
.Append("\n if ("+locateInfo+" != null)")
.Append("\n {")
.Append("\n "+locateInfo+".SetStart(GetStart());")
.Append("\n "+locateInfo+".SetEnd(GetEnd());")
.Append("\n }")
.Append("\n }");
nodeClass.Append("\n}");
if (String.Intern(publInfo) != "public")
ancillaryClasses.Add(nodeClass);
else
publicClasses[symbol.NodeType] = nodeClass;
}
/**
* Returns the contents between "{" and "}" as a string, taking into
* account possible nestings.
*/
private string BracedString () // throws Exception
{
StringBuilder buffer = new StringBuilder(bufferSize);
SetSyntax(RAW_MODE); // Make all characters ordinary
int nesting = 1; // Nesting of braces
while (nesting > 0)
{
switch (GetToken())
{
case BEGIN_SCOPE:
buffer.Append((char)BEGIN_SCOPE);
nesting++;
break;
case END_SCOPE:
if (nesting > 1)
buffer.Append((char)END_SCOPE);
nesting--;
break;
default:
buffer.Append((char)st.TType);
break;
}
}
SetSyntax(NORMAL_MODE);
return buffer.ToString();
}
/**
* Declares a package (actually, a .NET namespace), for the parser
* class.
*/
private void DeclarePackage () // throws Exception
{
if (packageName != null)
throw new Exception("Duplicate package declaration");
if (GetToken() != StreamTokenizer.TT_WORD)
throw new Exception("Bad package declaration");
packageName = st.SValue;
SetSyntax(EOL_MODE);
while (GetToken() != StreamTokenizer.TT_EOL);
SetSyntax(NO_EOL_MODE);
}
/**
* Reads a command's multiple arguments into the specified ArrayList.
*/
private void DeclareList (ArrayList v) // throws Exception
{
SetSyntax(RAW_MODE);
StringBuilder buffer = new StringBuilder(bufferSize);
string argument = "";
for (;;)
switch (GetToken())
{
case StreamTokenizer.TT_EOF:
if (!readingRemainderSection)
throw new Exception("Premature end of file");
break;
case COMMAND_START:
st.PushBack();
goto case END_OF_RULE;
case END_OF_RULE:
argument = buffer.ToString().Trim();
if (argument.Length>0) v.Add(argument);
goto EXIT;
case COMMA: case SPACE: case NEWLINE: case TAB:
argument = buffer.ToString().Trim();
if (argument.Length > 0)
{
v.Add(argument);
buffer = new StringBuilder(bufferSize);
}
break;
default:
buffer.Append((char)st.TType);
break;
}
EXIT:
if (!readingRemainderSection)
SetSyntax(NORMAL_MODE);
}
/**
* Processes a file inclusion command.
*/
private void ProcessInclude () // throws Exception
{
SetSyntax(EOL_MODE);
switch (GetToken())
{
case StreamTokenizer.TT_WORD:
case CC.SQT: case CC.DQT:
rd.Include(st.SValue);
break;
default:
throw new Exception("Bad include argument");
}
SetSyntax(NO_EOL_MODE);
if (verbosity > Verbose.QUIET)
Say("*** Including file " + rd.FileName);
}
/**
* Declares the precedence step increment to be the next number to
* be read.
*/
private void DeclarePrecstep () // throws Exception
{
if (GetToken() != StreamTokenizer.TT_NUMBER)
throw new Exception("Precedence increment must be a number");
precedenceIncrement = (int)st.NValue;
}
/**
* Declares this grammar to allow dynamic parsing.
*/
private void DeclareDynamic () // throws Exception
{
SetSyntax(EOL_MODE);
string cat = null;
switch (GetToken())
{
case StreamTokenizer.TT_EOF:
throw new Exception("Premature end of file");
case StreamTokenizer.TT_EOL:
break;
case StreamTokenizer.TT_WORD:
cat = st.SValue;
break;
default:
throw new Exception("Bad dynamic operator declaration");
}
isDynamic = true;
if (cat == null) // simply allows dynamic tokenizing, no dynamic operators
return;
GrammarSymbol catSymbol = GetNonTerminal(cat);
if (catSymbol == null)
catSymbol = NewNonTerminal(cat);
catSymbol.IsOperator = true;
if (GetTerminal(cat) == null)
{
operatorCategoryTable[cat] = new ArrayList();
cat = cat.ToUpper();
DeclareSubCategory(catSymbol,cat+"_");
DeclareSubCategory(catSymbol,"_"+cat+"_");
DeclareSubCategory(catSymbol,"_"+cat);
}
else
throw new Exception("Terminal "+cat+" cannot be used as an operator category!");
SetSyntax(NO_EOL_MODE);
}
/**
* Declares the specified string as a dynamic operator terminal
* symbol in the operator category denoted by the specified
* grammar symbol (necessarily a nonterminal).
*/
private void DeclareSubCategory (GrammarSymbol catSymbol, string subCat)
{
GrammarSymbol subCatSymbol = GetTerminal(subCat);
if (subCatSymbol == null)
subCatSymbol = NewTerminal(subCat,true);
else
Warning("Operator subcategory "+subCat+" is multiply defined.");
GrammarSymbol[] sequence = new GrammarSymbol[] {catSymbol, subCatSymbol};
new Rule(sequence,DEFAULT_ACTION);
}
/**
* Declares the next argument to read as a dynamic operator in the
* current operator category.
*/
private void DeclareOperator () // throws Exception
{
string name = null;
SetSyntax(EOL_MODE);
ArrayList ops = (ArrayList)operatorCategoryTable[operatorCategory.Name];
switch (GetToken())
{
case StreamTokenizer.TT_EOF:
throw new Exception("Premature end of file");
case StreamTokenizer.TT_EOL:
throw new Exception("Premature end of dynamic operator declaration");
case StreamTokenizer.TT_WORD:
case SINGLE_QUOTE:
case DOUBLE_QUOTE:
name = st.SValue;
break;
case StreamTokenizer.TT_NUMBER:
throw new Exception("Bad dynamic operator declaration");
default:
name = ((char)st.TType).ToString();
break;
}
if (GetNonTerminal(name) != null)
throw new Exception("Nonterminal "+name+" cannot be used as an operator!");
string specifier = null;
int precedence = 0;
switch (GetToken())
{
case StreamTokenizer.TT_EOF:
throw new Exception("Premature end of file");
case StreamTokenizer.TT_EOL:
throw new Exception("Premature end of dynamic operator declaration");
case StreamTokenizer.TT_WORD:
case SINGLE_QUOTE:
case DOUBLE_QUOTE:
specifier = st.SValue;
switch (GetToken())
{
case StreamTokenizer.TT_NUMBER:
precedence =
CheckPrecedenceLevel(PrologPrecedence((int)st.NValue));
break;
case StreamTokenizer.TT_EOL:
precedence = NextPrecedenceLevel();
st.PushBack();
break;
default:
throw new Exception("Bad dynamic operator declaration");
}
break;
case StreamTokenizer.TT_NUMBER:
precedence = CheckPrecedenceLevel(PrologPrecedence((int)st.NValue));
switch (GetToken())
{
case StreamTokenizer.TT_WORD:
case SINGLE_QUOTE:
case DOUBLE_QUOTE:
specifier = st.SValue;
break;
default:
throw new Exception("Bad dynamic operator declaration");
}
break;
default:
throw new Exception("Bad dynamic operator declaration");
}
Operator op = new Operator(name,operatorCategory,precedence,specifier);
int i = ops.IndexOf(op);
if (i == -1)
{
op.Add();
ocount++;
ops.Add(op);
ops = (ArrayList)operatorNameTable[name];
if (ops == null)
operatorNameTable[name] = ops = new ArrayList();
ops.Add(op);
}
else
((Operator)operators[i]).Redefine(precedence,specifier);
SetSyntax(NO_EOL_MODE);
}
/**
* This reads from the input in raw mode (i.e., with all
* characters made ordinary), accumulating what is read into a
* string buffer until a string matching the specified end marker
* is found - upon which the string buffer is returned.
*/
private StringBuilder Verbatim (string endMarker) // throws Exception
{
StringBuilder buffer // character accumulator
= new StringBuilder(bufferSize);
StringBuilder endMatch // matched prefix of end marker
= new StringBuilder(bufferSize);
SetSyntax(RAW_MODE); // make all characters ordinary
int fullMatch = endMarker.Length;
if (fullMatch > 0)
{
int matchCount;
char c = (char)GetToken();
for (;;)
{
if (c != endMarker[0])
{
buffer.Append(c);
c = (char)GetToken();
continue;
}
matchCount = 0;
endMatch.Length = 0;
while (matchCount < fullMatch && c == endMarker[matchCount])
{
endMatch.Append(c);
matchCount++;
c = (char)GetToken();
}
if (matchCount == fullMatch)
{
st.PushBack();
break;
}
buffer.Append(endMatch);
}
}
SetSyntax(NORMAL_MODE);
return buffer;
}
// Note for future work [by hak]:
// After readRules, one may optionally partially evaluate the
// grammar rules by replacing the LHS of a singular rule (i.e., one
// such that there is only one rule for this LHS) by the rules RHS
// in all the other rules, and delete the rule whenever the LHS
// neither recursive nor $START$. NB: it's useless for a singular
// recursive rule ...
/**
* Reads and defines the grammar rules.
*/
private void ReadRules () // throws Exception
{
if ((superNodeClasses.Minus(nodeClasses)).Count != 0)
{
Gripe("*** These non-terminals must be declared as node classes:\n");
foreach (NonTerminal n in superNodeClasses)
Gripe("\t"+n);
Gripe();
throw new Exception("Incomplete node class declarations");
}
while (ReadRule());
if (!StartIsDefined)
{
string msg = "There are no rules in this grammar!";
if (IsPermissible)
Warning(msg);
else
throw new Exception(msg);
}
if (UndefinedSymbols() | UnreachableSymbols())
{
string msg = "This grammar has disconnected symbols.";
if (IsPermissible)
Warning(msg);
else
throw new Exception(msg);
}
}
/**
* Reads and processes a grammar rule, returning true iff there
* are more rules to process.
*/
private bool ReadRule () // throws Exception
{
if (!ReadRuleHead())
return false;
ReadRuleBody();
return true;
}
/**
* Reads the head symbol of a grammar rule, returning true iff
* there is such a symbol to be read next.
*/
private bool ReadRuleHead () // throws Exception
{
switch (GetToken())
{
case StreamTokenizer.TT_EOF:
return false;
case StreamTokenizer.TT_WORD:
CheckSymbol(st.SValue,true);
if (GetToken() != RULE_NECK)
throw new Exception
("Missing RULE_NECK after left-hand side symbol in rule");
break;
case RULE_NECK:
if (currentLHS == null)
throw new Exception("Missing left-hand side symbol in rule");
break;
case COMMAND_START:
SetSyntax(RAW_MODE);
if (st.Peek() == COMMAND_START)
return false;
goto default;
default:
throw new Exception("Ill-formed left-hand side in rule");
}
return true;
}
/**
* Reads and processes the body of a grammar rule.
*/
private void ReadRuleBody () // throws Exception
{
for (;;)
{
switch (GetToken())
{
case StreamTokenizer.TT_EOF:
throw new Exception("Premature end of file");
case StreamTokenizer.TT_WORD:
CheckSymbol(st.SValue,false);
break;
case SINGLE_QUOTE:
case DOUBLE_QUOTE:
CheckTerminal(st.SValue);
break;
case OR_RULE_BODY:
NewRule();
ruleSequence.Add(currentLHS);
break;
case BEGIN_SCOPE:
ReadAction();
break;
case COMMAND_START:
ProcessRulePrec();
break;
case END_OF_RULE:
NewRule();
return;
default:
throw new Exception("Ill-formed syntax in rule body");
}
}
}
/**
* Processes an in-rule %prec command.
*/
private void ProcessRulePrec () // throws Exception
{
if (st.Peek() == PREC_COMMAND[0])
// e.g., in %prec, a 'p' must follow %
{
GetToken();
if (String.Intern(st.SValue) == PREC_COMMAND)
{
if (ruleTag != null)
throw new Exception("Duplicate %prec command in rule");
ReadRuleTag();
return;
}
}
throw new Exception("Bad %prec command in grammar rule body");
}
/**
* Reads and processes a grammar rule's tag declared by its
* in-rule %prec command.
*/
private void ReadRuleTag () // throws Exception
{
GetToken();
if (st.TType == StreamTokenizer.TT_WORD
|| st.TType == SINGLE_QUOTE
|| st.TType == DOUBLE_QUOTE)
{
ruleTag = GetTerminal(st.SValue);
if (ruleTag == null)
throw new Exception("Unknown token as %prec argument "+st.SValue);
if (ruleTag.IsOperator)
throw new Exception
("Dynamic operators cannot be used as %prec argument "+st.SValue);
((Terminal)ruleTag).IsTag = true;
return;
}
if (st.TType == StreamTokenizer.TT_NUMBER)
{
int precedence = CheckPrecedenceLevel(PrologPrecedence((int)st.NValue));
GetToken();
if (!(st.TType == StreamTokenizer.TT_WORD
|| st.TType == SINGLE_QUOTE
|| st.TType == DOUBLE_QUOTE))
throw new Exception("Bad %prec associativity specifier");
try
{
ruleTag = new RuleTag(precedence,st.SValue);
}
catch (NonFatalParseErrorException e)
{
LoudWarning(e.Message + ": " + Location + "; %prec command ignored");
}
}
else
throw new Exception("Ill-formed %prec argument in grammar rule body");
}
/**
* Returns true iff not all non-terminal symbols are
* the LHS of at least one rule.
*/
private bool UndefinedSymbols ()
{
ArrayList undefs = new ArrayList();
foreach (NonTerminal n in nonterminals)
if (n.Rules.Count == 0)
undefs.Add(n);
if (undefs.Count != 0)
{
if (undefs.Count > 1)
Gripe("*** These non-terminal symbols have no rules:\n");
else
Gripe("*** This non-terminal symbol has no rules:\n");
foreach (NonTerminal n in undefs)
Gripe("\t"+n);
Gripe();
if (undefs.Count > 1)
Gripe("*** Declare them as tokens or give rules for them.");
else
Gripe("*** Declare it as a token or give rules for it.");
return true;
}
return false;
}
/**
* Returns true iff the grammar contains symbols that are
* not reachable from the grammar's start symbol. It traverses the
* rules starting from the start symbol and recording all symbols it
* meets in the dejaVu hash set. It then verifies that
* all the known symbols are in dejaVu, returning
* true iff any is missing...
*/
private bool UnreachableSymbols ()
{
HashSet dejaVu = new HashSet();
TraverseRules(_START,dejaVu);
HashSet unreached = new HashSet();
foreach (GrammarSymbol s in nonterminals)
if (!dejaVu[s])
unreached.Add(s);
foreach (Terminal s in terminals)
{
if (s.IsSpecial)
continue;
if (!dejaVu[s])
unreached.Add(s);
}
if (unreached.Count != 0)
{
if (unreached.Count > 1)
GripeSome("*** These symbols are ");
else
GripeSome("*** This symbol is ");
Gripe("not reachable from any root:\n");
foreach (GrammarSymbol s in unreached)
Gripe("\t"+s);
Gripe();
Gripe("*** Check the grammar in File "+GrammarName+" and correct this.");
return true;
}
return false;
}
/**
* Traverses the rules starting at the specified root symbol and
* recording all the grammar symbols it crosses on the way in the
* provided hash set, until no new one is found.
*/
private void TraverseRules (GrammarSymbol root, HashSet dejaVu)
{
dejaVu[root] = true;
if (root is NonTerminal)
foreach (Rule r in ((NonTerminal)root).Rules)
for (int i = 1; i
* It is computed like the L* relation but now the gathered
* information is finer; i.e., it is not enough to record that
* X L Y, we also need to remember through which particular
* rule this relation is realized. So, we need to compute the relation
* A L B R which holds iff A L B with R = A -> B S.
* Thus, we cannot rely only on just a set of nonterminals like
* LSet, but need, for each nonterminal A in the
* domain of L, a table associating a nonterminal B to a
* set of rules R such that A L B R. However, it
* is more convenient to have these edges backwards for calculating
* the path sets. So this builds, for each nonterminal A in
* the range of L, a table associating a nonterminal B to a
* set of rules R such that B L A R. These reverse
* links are recorded in a nonterminal's LTable.
*/
private void ComputePaths ()
{
// Initialize LTables for nonterminals in the domain of L by
// creating reverse rule-labeled links from each non-terminal
// N to P whenever P L N.
foreach (NonTerminal n in LDomain)
{
foreach (Rule rule in n.Rules)
{
GrammarSymbol symbol = rule.LeftMost;
if (symbol is NonTerminal)
((NonTerminal)symbol).AddLRule(n,rule); // Note the reverse links!
}
}
// Iterate backwards through the topologically ordered L-related
// nonterminals (i.e., from leaves to roots) building the rule paths
// until none needs to be added; that is, until FIRST sets along
// all paths are stationary.
bool progress;
ArrayList newPaths = null;
do
{
progress = false;
// For all nonterminals in the L-Graph:
for (int i = LOrder.Count-1; i>=0; i--)
{
NonTerminal n = (NonTerminal)LOrder[i];
// If n is not a root
if (!LRoots[n])
{
ArrayList paths = n.Paths;
newPaths = new ArrayList();
// For each path starting from n:
foreach (RulePath path in paths)
// For each L-predecessor pred of n:
foreach (NonTerminal pred in n.LTable.Keys)
// For each rule linking pred to n:
foreach (Rule rule in n.GetLRules(pred))
// Record the path p.rule:
newPaths.Add(path.Prepend(rule));
// Validate the new paths:
foreach (RulePath newpath in newPaths)
progress |= newpath.Start.AddPath(newpath);
}
}
}
while (progress);
}
//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\
/**
* A stack to store newly generated states before they can be
* processed to generate their successors.
*/
private Stack new_states = new Stack();
/**
* Adds the given state to the set of states, and records it
* in the new_state stack.
*/
private void AddNewState (State state)
{
state.Add();
scount++;
stateTable[state] = state;
new_states.Push(state);
}
/**
* Given a newly generated state, this checks whether it already
* exists. If so, returns the previously recorded state. If not,
* records this as a new state and returns it.
*/
internal State CheckNewState (State state)
{
State old_state = (State)stateTable[state];
if (old_state != null)
return old_state;
AddNewState(state);
return state;
}
/**
* The initial state of the parsing automaton.
*/
private State initState;
/**
* This computes the LALR states. It generates the states using
* the LR(0) set-of-items construction (see Dragon Book, pages
* 223-225), and then separates the kernel items from the set of
* items set in each state and computes the PRED relation defined
* in the Park-Choe-Chang article by fixpoint iteration.
*/
private void ComputeStates ()
{
// Seed the initial state with the initial rule's item,
// close it, and record it as a new state:
initState = new State(GetItem((Rule)(_START.Rules[0]),1));
initState.Closure();
AddNewState(initState);
// For each recorded new state, compute its next states:
do
((State)(new_states.Pop())).ComputeNextStates();
while (new_states.Count != 0);
// Compute the kernels and predecessors of each state:
bool progress = false;
for (int i=0; i
* r[0] -> r[1], ..., r[n]
*
* so that $i is mapped to the RHS's symbol r[i] which
* will be located at the stack reference n-i at parse time.
*
* This scans the action for node reference patterns of the form
* "(_rule_,i)" and replaces them with
* "(_rule_,offset-i)". This works correctly
* for calls such that offset == RHS.Count-1.
*
* @param action a string (program code)
* @param offset an integer (rule's RHS's size)
* @return the action where rule node references are now parse stack references.
*/
private string OffsetActionNodes (string action, int offset)
{
if (action == null || action.Length == 0)
return EMPTY_ACTION; // no action: get out!
StringBuilder buffer // result accumulator
= new StringBuilder(bufferSize);
int length = action.Length;
int i = 0;
char c = action[i];
for (;;)
{
if (c < '0' || c > '9')
{ // copy verbatim all chars but numerals
buffer.Append(c);
if (++i == length) break;
c = action[i];
continue;
}
if (NodeReference(action,i)) // Is this numeral following a node reference?
{ // yes: read off the node's number:
int v = 0;
do
{
v = v * 10 + (c - '0');
if (++i == length)
break;
c = action[i];
}
while ('0' <= c && c <= '9');
// and translate it as its stack offset:
buffer.Append((v-offset).ToString());
}
else
{ // copy it, and all the numerals following it, verbatim:
do
{
buffer.Append(c);
if (++i == length)
break;
c = action[i];
}
while ('0' <= c && c <= '9');
}
if (i == length)
break;
}
// return the string value of the result accumulator:
return buffer.ToString();
}
/**
* Returns true iff there is a node reference (of the form
* "pattern") ending at index i in the input
* string action.
*/
private bool NodeReference (string action, string pattern, int i)
{
int width = pattern.Length;
return (i > width-1 && action.Substring(i-width,width).Equals(pattern));
}
/**
* Returns true iff there is a node reference (of the form
* "[_rule_,") ending at index i in the input
* string action.
*/
private bool NodeReference (string action, int i)
{
return NodeReference(action,"(_rule_,",i);
}
/**
* Reads a semantic action into either of this grammar's
* ruleAction (resp., ruleUndoAction)
* variable (a String).
*/
private void ReadAction () // throws Exception
{
if (ruleAction == null)
// No semantic action has been read:
// Translate forward into the semantic action
TranslateRuleAction(true);
else
// The forward semantic action has already been read:
if (isDynamic && ruleUndoAction == null)
// Undo actions are allowed but no undo action has been read:
// Translate backward into the undo semantic action.
TranslateRuleAction(false);
else
throw new Exception("Illegal action occurrence in rule");
}
/**
* A container recording the known type casts for node references
* in the current semantic action.
*/
private HashSet knownCasts = new HashSet();
/**
* Translates the forward or backward semantic action and sets it
* into the global variable Strings: ruleAction
* and ruleUndoAction. Each of these is assumed equal to
* null when translateRuleAction is called.
*
* @param forward the action's direction
*/
private void TranslateRuleAction (bool forward) // throws Exception
{
SetSyntax(RAW_MODE); // Make all characters ordinary
knownCasts.Clear(); // Clear the known type casts
StringBuilder buffer = new StringBuilder(bufferSize);
int nesting = 1; // Nesting of braces
while (nesting > 0)
{
switch (GetToken())
{
case BEGIN_SCOPE:
buffer.Append((char)BEGIN_SCOPE);
nesting++;
break;
case END_SCOPE:
if (nesting > 1)
buffer.Append((char)END_SCOPE);
nesting--;
break;
case PSEUDO_VAR:
buffer.Append(StackReference(forward));
break;
case NEWLINE:
buffer.Append("\n ");
break;
case TAB:
buffer.Append("\t ");
break;
default:
buffer.Append((char)st.TType);
break;
}
}
SetSyntax(NORMAL_MODE);
if (forward)
ruleAction = ruleActionCast + buffer.ToString();
else
ruleUndoAction = ruleUndoActionCast + buffer.ToString();
}
/**
* Reads and return an integer.
*/
private int ReadNumber (int c) // throws Exception
{
int v = 0;
do
{
v = v * 10 + (c - '0');
c = GetToken();
}
while ('0' <= c && c <= '9');
st.PushBack();
return v;
}
/**
* Returns the String corresponding to the translation
* into a stack reference of a pseudo-variable in a semantic action.
*
* @param forward is true if top-down action, false otherwise.
*/
private string StackReference (bool forward) // throws Exception
{
string stref;
int n; // the stack offset;
int token = GetToken();
// NB: we read in ordinary mode (one char at a time)
switch (token)
{
case PSEUDO_VAR: // This is the result (LHS) to push on the stack;
// check if typecast is needed & translate as '_head_':
stref = TypeCast(forward,0,"_head_");
containsHeadReference = true;
break;
case '1': case '2': case '3':
case '4': case '5': case '6': // translate as reference at offset n
case '7': case '8': case '9': // after checking whether to typecast:
n = ReadNumber(token);
stref = TypeCast(forward,n,"Node(_rule_,"+n+")");
break;
case '0': // translate as deep reference
stref = DeepStackReference(0,"Node(_rule_,0)");
break;
case '-': // translate as deep reference
n = -ReadNumber(token);
stref = DeepStackReference(n,"Node(_rule_,"+n+")");
break;
default:
throw new Exception("Ill-formed pseudovariable");
}
return stref;
}
/**
* Returns the String corresponding to the translation of
* a stack reference at given offset n by generating all
* appropriate and necessary type adjustments in the case where
* the parse node it references has been declared of a more
* specific type by the %nodeclass command. In the
* process, when a type cast must be generated, the action (or
* undo action) is prepended with the appropriate local
* declarations and type checking code.
*
* @param forward is true if top-down action, false otherwise.
* @param n the stack offset
* @param reference the (partial) reference translation thus far
*/
private string TypeCast (bool forward, int n, string reference) // throws Exception
{
StringBuilder buffer = new StringBuilder(bufferSize);
if (n >= ruleSequence.Count)
throw new Exception("Pseudovariable out of range: $"+n);
GrammarSymbol symbol = (GrammarSymbol)ruleSequence[n];
string type = symbol is NonTerminal ? ((NonTerminal)symbol).NodeType : null;
string rf;
if (type == null)
rf = reference;
else
{
nodeCast = true;
rf = "_node"+n+"_";
if (!knownCasts[rf])
{
buffer.Append(" ").Append(type).Append(" ").Append(rf);
if (n == 0)
buffer.Append(" = new ").Append(type).Append("(_head_);")
.Append("\n _head_ = (").Append(type)
.Append(")").Append(rf).Append(";\n");
else
buffer.Append(";\n if (").Append(reference)
.Append(" is ").Append(type).Append(")")
.Append("\n ").Append(rf).Append(" = (")
.Append(type).Append(")").Append(reference).Append(";")
.Append("\n else\n {")
.Append("\n ").Append(rf).Append(" = new ")
.Append(type).Append("(").Append(reference).Append(");")
.Append("\n ReplaceStackNode(_rule_,")
.Append(n).Append(",").Append(rf).Append(");")
.Append("\n }\n");
knownCasts[rf] = true;
}
}
// Export the local buffer according to the direction:
if (forward)
ruleActionCast += buffer.ToString();
else
ruleUndoActionCast += buffer.ToString();
// Return the translated referent:
return rf;
}
/**
* Returns the translation string for a deep stack reference given
* the specified integer offset and node reference string, issuing
* a warning in the process.
*/
private string DeepStackReference (int n, string reference) // throws Exception
{
if (!knownCasts[reference])
{
knownCasts[reference] = true;
Warning("Unsafe stack reference: "+n+" "+Location
+" - no type cast was generated");
}
return reference;
}
/**
* Copies the remainder of the grammar file into the specified
* StreamWriter, which is the generated parser file. Include
* commands which start a line will be processed by copying the
* contents of the included file, which may itself contain include
* commands.
*/
internal void CopyRemainder (StreamWriter p_out) // throws IOException
{
string include = (char)COMMAND_START + INCLUDE_COMMAND;
for (string line = rd.ReadLine(); line != null; line = rd.ReadLine())
if (line.StartsWith(include))
{
string file = GetInclude(line.Substring(include.Length+1));
Say("*** Including file "+file);
rd.Include(file);
}
else
p_out.Write(line+"\n");
rd.Close();
}
/**
* Returns the file name to include specified in the given line.
* If the first substring past white spaces is single-quoted or
* double-quoted, then the name is what's between quotes (allowing
* for backslashes escapes). Otherwise, it is the first substring
* of the line not containing white spaces. Throws an IOException
* if such a name cannot be extracted.
*/
private string GetInclude (string line) // throws IOException
{
int start = 0;
int end;
// Skip white spaces:
while (start < line.Length && Char.IsWhiteSpace(line[start]))
start++;
if (start == line.Length)
throw new IOException("Bad argument to include in file " +
rd.FileName + ", line " + rd.LineNumber);
if (line[start] != '"' && line[start] != '\'')
{ // Unquoted argument: extract everything until white space
end = start;
while (end < line.Length && !Char.IsWhiteSpace(line[end]))
end++;
return line.Substring(start,end-start);
}
// Quoted argument: extract what's between quotes
char quote = line[start];
end = ++start;
while (end < line.Length &&
(line[end] != quote || line[end-1] == '\\'))
end++;
if (end == line.Length)
throw new IOException("Bad argument to include in file " +
rd.FileName + ", line " + rd.LineNumber);
return line.Substring(start,end-start);
}
/**
* Reads and process the remainder of the grammar specification
* following the rules.
*/
private void ReadRemainder () // throws Exception
{
// all characters are ordinary at this point
// and the next token to be read is COMMAND_START.
GetToken(); // skip the COMMAND_START
readingRemainderSection = true;
StringBuilder buffer = new StringBuilder(bufferSize);
for (;;)
{
switch (GetToken())
{
case StreamTokenizer.TT_EOF:
if (!IsVacuous(buffer))
ancillaryClasses.Add(buffer);
return;
case COMMAND_START:
if (st.Peek() == USEFILE_COMMAND[0])
// e.g., a 'u' must follows the COMMAND_START (%usefile)
{
SetSyntax(NORMAL_MODE);
GetToken();
if (String.Intern(st.SValue) == USEFILE_COMMAND)
{
if (!IsVacuous(buffer))
{
ancillaryClasses.Add(buffer);
buffer = new StringBuilder(bufferSize);
}
DeclareList(ancillaryClasses);
}
else
buffer.Append(st.SValue);
continue;
}
goto default;
default:
buffer.Append((char)st.TType);
break;
}
}
}
/**
* Returns true iff the specified buffer contains only white space.
*/
private static bool IsVacuous (StringBuilder buffer)
{
for (int i = 0; iANALYZING THE GRAMMAR
*/
//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\
// ANALYZING THE GRAMMAR \\
//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\
private void PreprocessGrammar ()
{
ComputeFirsts();
ComputeLGraph();
ComputePaths();
}
/**
* This computes the FIRST set for all the symbols in the grammar.
* This set is defined in the Dragon Book, page 189. This method
* uses the fix-point algorithm described there. It also computes
* the FIRST sets for all item suffixes for all the rules.
*/
private void ComputeFirsts ()
{
// Initialize the FIRST set of all terminals to contain the
// terminal itself.
foreach (Terminal t in terminals)
{
t.First = new SetOf(terminals);
if (t.IsEmpty)
t.IsNullable = true;
else
t.First.Add(t);
}
// Initialize the FIRST set of all non terminals to the empty set.
foreach (NonTerminal n in nonterminals)
n.First = new SetOf(terminals);
// Close all the FIRST sets by fixpoint iteration.
int index;
SetOf previous;
bool progress;
do
{
progress = false;
foreach (Rule rule in rules)
{
NonTerminal head = rule.Head;
index = 1;
previous = new SetOf(head.First);
for (int i=1; i