//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\ // PLEASE DO NOT EDIT WITHOUT THE EXPLICIT CONSENT OF THE AUTHOR! \\ //\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\ using System; using System.IO; using System.Text; using Ilog.Language.Util; using Ilog.Language.Tools; namespace Ilog.Language.IO { /** * * There are two concrete subclasses of this abstract class: * ilog.language.io.StreamTokenizer and * ilog.language.io.LAStreamTokenizer. They provide the exact * same interface as that of the standard Java Core API's * java.io.StreamTokenizer but with a simpler (and IMHO * sounder) implementation. One may use both subclasses instead of * java.io.StreamTokenizer without change. However, they * support a more complete and more general set of methods. In * addition, ilog.language.io.LAStreamTokenizer provides a * method for arbitrary lookaheads: * *
* public string Lookahead (int n) // throws IOException ** which returns the string of n characters in the input stream * including and starting with the current character. *
* The syntax configuration used as the default setting of the tokenizer * is such that: *
* This happens whenever ParseNumbers() (or ParseNumbers(true)) * has been called (default) and ParseNumbers(false) has not yet been * called. Note that the characters '0' to '9' * and the characters '-', '+', and * '.' are always considered numeric when parsing numbers is on. *
* The format of numbers recognized is that of Java, including octal
* (0...) and hexadecimal (0x... or
* 0X...) integers, and complete floating point format. Type
* letter suffixes (i.e., 'l' or 'L' for
* integers, and 'f', 'F', 'd',
* 'D' for floating point numbers, are also recognized, but
* they are ignored since the value returned is always a double.
*/
public const int TT_NUMBER = CC.NUM;
/**
* This value indicates that no token has been read yet.
*/
public const int TT_NOTHING = CC.NTG;
/**
* This value indicates that a special token has been returned.
*/
public const int TT_SPECIAL = CC.SPL;
/**
* This always contains the type of token just read. It is equal to:
*
* If the number starts with a '-' or '+'
* immediately followed by a decimal digit or a '.',
* then the sign is used as the number's. If the sign is immediately followed
* by a '.', which in turn is not immediately followed
* by a decimal digit, this is an error and a "Bad number format"
* exception is thrown. If the input being tokenized contains numbers with
* gaps between signs and numbers, then the sign is tokenized as indicated
* by its prevailing type.
*
* NB: The sign of an exponent in the floating point notation is always parsed
* as the exponent's sign regardless of the sign character's type. For example,
* 12.3e-4 is correctly parsed as 0.00123 even
* if '-' is ordinary.
*
* If the first digit is '0', the number is parsed as an octal if
* the next digit is not 'x' or 'X';
* otherwise, as a hexadecimal. In either case, the number must be an integer
* (i.e., it has no mantissa).
*
* Type letter suffixes (i.e., 'l' or 'L' for
* integers, and 'f', 'F', 'd',
* 'D' for floating point numbers) are also recognized, but
* ignored since the value returned is always a double.
* @see #CurrentType()
*/
protected void ReadNumber () // throws IOException
{
isInteger = true;
nval = 0;
bool isNegative = (_currentChar == '-');
if (_currentChar == '-' || _currentChar == '+')
NextChar();
switch (_currentChar)
{
case '0':
if (_parsingNonDecimals)
switch(_followingChar)
{
case 'x': case 'X':
ReadHexadecimal();
break;
case '0': case '1': case '2': case '3':
case '4': case '5': case '6': case '7':
ReadOctal();
break;
default:
ReadDecimal();
break;
}
else
ReadDecimal();
break;
case '.': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
ReadDecimal();
break;
default:
// This should not happen, because this method is entered only with
// _currentChar having been determined to be of type NUMERIC, which
// entails that exactly one of the foregoing cases fires.
throw new IOException("Bad number format");
}
if (isNegative)
nval = -nval;
}
protected void ReadOctal () // throws IOException
{
while (IsOctalDigit(_followingChar))
{
nval = 8*nval + (_followingChar - '0');
NextChar();
}
if (_followingChar == 'l' || _followingChar == 'L')
NextChar();
}
protected void ReadHexadecimal () // throws IOException
{
NextChar(); // skip the 'x' (NB: "0x" and "0X" are parsed as 0).
while (IsHexadecimalDigit(_followingChar))
{
nval = 16*nval + HexValue(_followingChar);
NextChar();
}
if (_followingChar == 'l' || _followingChar == 'L')
NextChar();
}
protected int HexValue (int digit)
{
switch (digit)
{
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
return (digit - '0');
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
return 10 + (digit - 'A');
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
return 10 + (digit - 'a');
}
return 0;
}
/* ======================================================================= */
protected bool CanFollowDecimalDot (int c)
{
return (IsDecimalDigit(c)
|| c == 'e' || c == 'E'
|| c == 'd' || c == 'D'
|| c == 'f' || c == 'F');
}
protected void ReadInteger () // throws IOException
{
nval = _currentChar - '0';
while (IsDecimalDigit(_followingChar))
{
nval = 10*nval + (_followingChar - '0');
NextChar();
}
}
protected void ReadMantissa (bool hasDigits) // throws IOException
{
double mantissa = 0;
int precision = 1;
while (IsDecimalDigit(_followingChar))
{
hasDigits = true;
mantissa = 10*mantissa + (_followingChar - '0');
precision *= 10;
NextChar();
}
if (!hasDigits)
throw new IOException("Bad number format");
nval += mantissa/precision;
}
protected void ReadExponent () // throws IOException
{
bool hasDigits = false;
bool isFractional = false;
int exponent = 0;
NextChar();
if (_followingChar == '-' || _followingChar == '+')
{
isFractional = (_followingChar == '-');
NextChar();
}
while (IsDecimalDigit(_followingChar))
{
hasDigits = true;
exponent = 10*exponent + (_followingChar - '0');
NextChar();
}
if (!hasDigits)
throw new IOException("Bad number format");
for (int i = 1; i <= exponent; i++)
if (isFractional)
nval /= 10;
else
nval *= 10;
}
protected void ReadQuotedWord () // throws IOException
{
wordBuffer = new StringBuilder();
for (;;)
{
NextChar();
if (_currentChar == rightQuote)
break;
if (_currentChar == charInfo[leftQuote].escape)
ReadEscapedChar();
if (_currentChar == CC.EOF)
throw new EndOfStreamException
("End of file encountered while reading a quoted string");
wordBuffer.Append((char)_currentChar);
}
sval = String.Intern(wordBuffer.ToString());
}
protected void ReadEscapedChar () // throws IOException
{
if (IsOctalDigit(_followingChar))
{
ReadOctalCode();
return;
}
switch (_followingChar)
{
case 'n':
_currentChar = CC.EOL;
break;
case 't':
_currentChar = CC.TAB;
break;
case 'b':
_currentChar = CC.BSP;
break;
case 'r':
_currentChar = CC.CRT;
break;
case 'f':
_currentChar = CC.FFD;
break;
case 'u':
ReadUnicode();
break;
default: // interpret the next char literally
_currentChar = _followingChar;
break;
}
ReadFollowingChar();
}
protected void ReadOctalCode () // throws IOException
{
_currentChar = 0;
for (int i = 0; i < 3; i++)
{
if (!IsOctalDigit(_followingChar))
return;
_currentChar = 8*_currentChar + (_followingChar - '0');
ReadFollowingChar();
}
}
protected void ReadUnicode () // throws IOException
{
while (_followingChar == 'u')
ReadFollowingChar();
_currentChar = 0;
for (int i = 0; i < 4; i++)
{
if (!IsHexadecimalDigit(_followingChar))
throw new IOException
("Non-hexadecimal digit in unicode ("+Location()+")");
_currentChar = 16*_currentChar + HexValue(_followingChar);
if (i <3)
ReadFollowingChar();
}
}
protected bool IsOctalDigit (int c)
{
return ('0' <= c && c <= '7');
}
protected bool IsDecimalDigit (int c)
{
return ('0' <= c && c <= '9');
}
protected bool IsHexadecimalDigit (int c)
{
return ('0' <= c && c <= '9' || 'A' <= c && c <= 'F' || 'a' <= c && c <= 'f');
}
}
//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\
/**
* This defines the type of objects stored in the character information table
* charInfo.
*/
public class CharInfo
{
public CharInfo(byte type, int escape)
{
this.type = type;
this.escape = escape;
}
/**
* The type of character.
*/
public byte type;
/**
* If the type is QUOTE_TYPE, this is the closing quote char.
*/
public int right;
/**
* If the type is QUOTE_TYPE, this is the escape character used;
* when equal to 0, no escape may be used for this quote.
*/
public int escape;
}
}
*
*/
protected int ttype = TT_NOTHING;
public int TType { get { return ttype; } }
/**
* This contains the token string when a word, or a quoted word, has been read.
*/
protected String sval;
public String SValue { get { return sval; } }
/**
* This contains the token value whenever a number has been read.
*/
protected double nval;
public double NValue { get { return nval; } }
/**
* This is set to true whenever an integer number has been read.
*/
protected bool isInteger;
public bool IsInteger { get { return isInteger; } }
/* ======================================================================= */
/**
* The underlying reader.
*/
protected TextReader reader;
/**
* The location of the character starting the current token.
*/
protected Location _tokenStart;
/**
* The location of the character starting the previous token.
*/
protected Location _previousTokenStart;
/**
* Returns the location of the start of the token at hand.
*/
public Location TokenStart ()
{
if (pushedBack)
return _previousTokenStart;
return _tokenStart;
}
/**
* The location of the character ending the current token.
*/
protected Location _tokenEnd;
/**
* The location of the character ending the previous token.
*/
protected Location _previousTokenEnd;
/**
* Returns the location of the end of the token at hand.
*/
public Location TokenEnd ()
{
if (pushedBack)
return _previousTokenEnd;
return _tokenEnd;
}
public Location CurrentCharLocation ()
{
return new Location(_currentCharFile,_currentCharLine,_currentCharCol);
}
protected void SetStartLocation ()
{
_previousTokenStart = _tokenStart;
_tokenStart = CurrentCharLocation();
}
protected void SetEndLocation ()
{
_previousTokenEnd = _tokenEnd;
_tokenEnd = CurrentCharLocation();
}
protected void ResetStartLocation ()
{
_tokenStart.SetFile(_currentCharFile);
_tokenStart.SetLine(_currentCharLine);
_tokenStart.SetColumn(_currentCharCol);
}
protected void ResetEndLocation ()
{
_tokenEnd.SetFile(_currentCharFile);
_tokenEnd.SetLine(_currentCharLine);
_tokenEnd.SetColumn(_currentCharCol);
}
/**
* Returns the current line number.
*/
public int GetLineNumber ()
{
return _currentCharLine;
}
/**
* Returns the current line number. For compatibility
* with java.io.StreamTokenizer.
*/
public int LineNo ()
{
return GetLineNumber();
}
/**
* Returns the current file if any, or null.
*/
public String GetFile ()
{
return _currentCharFile;
}
/**
* Sets the current filename to the specified string.
*/
public void SetFile (String file)
{
_currentCharFile = file;
_followingCharFile = file;
}
public String Location ()
{
String line = "line "+LineNo();
String file = GetFile();
if (file == null)
return line;
return "file "+file+", "+line;
}
/**
* Makes all characters between the first and second arguments inclusive
* ordinary exclusively.
*/
public void OrdinaryChars (int from, int to)
{
ResetCharRange(ORDINARY_TYPE, from, to);
}
/**
* Makes all characters between the first and second arguments inclusive
* word characters exclusively.
*/
public void WordChars (int from, int to)
{
ResetCharRange(WORD_TYPE, from, to);
}
/**
* Makes all characters between the first and second arguments inclusive
* white space characters exclusively.
*/
public void WhiteSpaceChars (int from, int to)
{
ResetCharRange(WHITESPACE_TYPE, from, to);
}
/**
* Makes the specified character a quote character exclusively.
*/
public void QuoteChar (int c)
{
QuotePair(c,c,ESCAPE_CHAR);
}
/**
* Makes the specified character a quote character exclusively,
* and specifies its escape character.
*/
public void QuoteChar (int c, int e)
{
QuotePair(c,c,e);
}
/**
* Makes the specified character ordinary exclusively.
*/
public void OrdinaryChar (int c)
{
if (0 <= c && c < charInfo.Length)
charInfo[c].type = ORDINARY_TYPE;
}
/* ======================================================================= */
/**
* The end-of-line is recognized as a token iff the argument is true.
*/
public void EolIsSignificant (bool flag)
{
_eolIsSignificant = flag;
}
/**
* White spaces are recognized as tokens iff the argument is true.
*/
public void SpaceIsSignificant (bool flag)
{
_spaceIsSignificant = flag;
}
/**
* Specifies that numbers should be parsed.
*/
public void ParseNumbers ()
{
ParseNumbers(true);
}
/**
* Specifies that octal and hexadecimal numbers should be parsed.
*/
public void ParseNonDecimals ()
{
ParseNonDecimals(true);
}
/**
* Specifies that octal and hexadecimal numbers should not be parsed.
*/
public void IgnoreNonDecimals ()
{
ParseNonDecimals(false);
}
public void ParseNumbers (bool flag)
{
if (_parsingNumbers = flag)
{
SetType('0','9',NUMERIC);
SetType("+-.",NUMERIC);
ParseNonDecimals();
}
else
{
UnsetType('0','9',NUMERIC);
UnsetType("+-.",NUMERIC);
}
}
public void ParseNonDecimals (bool flag)
{
_parsingNonDecimals = flag;
}
/* ======================================================================= */
/**
* If called, the next call to NextToken() will return the same
* token again.
*/
public void PushBack ()
{
if (ttype != TT_NOTHING) pushedBack = true;
}
/* ======================================================================= */
/**
* Returns a printable value of the current state of this
* StreamTokenizer.
*/
public override String ToString ()
{
String str;
switch (ttype)
{
case TT_EOF:
str = "EOF";
break;
case TT_EOL:
str = "EOL";
break;
case TT_WORD:
str = "WORD(" + sval + ")";
break;
case TT_NUMBER:
if (isInteger)
str = "INTEGER(" + (int)nval + ")";
else
str = "FLOAT(" + nval + ")";
break;
case TT_NOTHING:
str = "NOTHING";
break;
default:
{
if (ttype == leftQuote)
{
str = "QUOTE(" + (char)leftQuote + sval + (char)rightQuote + ")";
break;
}
char[] s = new char[3];
s[0] = s[2] = CC.SQT;
s[1] = (char)ttype;
str = new String(s);
break;
}
}
return new Span(_tokenStart,_tokenEnd) + ": " + str;
}
//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//
// The following are additional public utilities that I find useful. NB: Using
// these will make your application INCOMPATIBLE with java.io.StreamTokenizer.
//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//
/**
* Defines a pair of characters as a pair of left and right quotes.
* This allows tokenizing words quoted by two different characters.
* Note that only the left quote is categorized as QUOTE_TYPE.
* The third argument specifies the escape character: if 0, no escape
* is allowed within these quotes.
*/
public void QuotePair (int left, int right, int escape)
{
if (0 <= left && left < charInfo.Length)
{
charInfo[left].type = QUOTE_TYPE;
charInfo[left].right = right;
charInfo[left].escape = escape;
}
}
/**
* Same as above with implicit default escape character.
*/
public void QuotePair (int left, int right)
{
QuotePair(left,right,ESCAPE_CHAR);
}
/**
* Same as quotePair but left's QUOTE_TYPE is
* non-exclusive.
*/
public void SetQuotePair (int left, int right, int escape)
{
if (0 <= left && left < charInfo.Length)
{
SetType(left,QUOTE_TYPE);
charInfo[left].right = right;
charInfo[left].escape = escape;
}
}
/**
* Same as above with implicit default escape character.
*/
public void SetQuoteChar (int c)
{
SetQuotePair(c,c,ESCAPE_CHAR);
}
/**
* Sets this character to be a quote character, non-exclusively,
* and specifies its escape character.
*/
public void SetQuoteChar (int c, int e)
{
SetQuotePair(c,c,e);
}
/**
* Sets this character to be an ordinary character, non-exclusively.
*/
public void SetOrdinaryChar (int c)
{
if (0 <= c && c < charInfo.Length)
SetType(c,ORDINARY_TYPE);
}
/**
* Sets this character to be a word character, exclusively.
*/
public void WordChar (int c)
{
if (0 <= c && c < charInfo.Length)
charInfo[c].type = WORD_TYPE;
}
/**
* Sets this character to be a word character, non-exclusively.
*/
public void SetWordChar (int c)
{
if (0 <= c && c < charInfo.Length)
SetType(c,WORD_TYPE);
}
/**
* Sets all characters in this string to be word characters, exclusively.
*/
public void WordChars (String chars)
{
ResetCharString(WORD_TYPE, chars);
}
/**
* Sets all characters in this string to be word characters, non-exclusively.
*/
public void SetWordChars (String chars)
{
SetCharString(WORD_TYPE, chars);
}
/**
* Sets this character to be a whitespace character, exclusively.
*/
public void WhiteSpaceChar (int c)
{
if (0 <= c && c < charInfo.Length)
charInfo[c].type = WHITESPACE_TYPE;
}
/**
* Sets this character to be a whitespace character, non-exclusively.
*/
public void SetWhiteSpaceChar (int c)
{
if (0 <= c && c < charInfo.Length)
SetType(c,WHITESPACE_TYPE);
}
/**
* Sets all characters in this string to be whitespace characters, exclusively.
*/
public void WhiteSpaceChars (String chars)
{
ResetCharString(WHITESPACE_TYPE, chars);
}
/**
* Sets all characters in this string to be whitespace characters, non-exclusively.
*/
public void SetWhiteSpaceChars (String chars)
{
SetCharString(WHITESPACE_TYPE, chars);
}
/**
* Sets all characters in this string to be ordinary characters, exclusively.
*/
public void OrdinaryChars (String chars)
{
ResetCharString(ORDINARY_TYPE, chars);
}
/**
* Sets all characters in this string to be ordinary characters, non-exclusively.
*/
public void SetOrdinaryChars (String chars)
{
SetCharString(ORDINARY_TYPE, chars);
}
/**
* Checks whether this character's type is ordinary, among other types.
*/
public static bool IsOrdinaryChar (int c)
{
return (0 <= c && c < charInfo.Length)
&& HasType(c,ORDINARY_TYPE);
}
/**
* Checks whether this character's type is numeric, among other types.
*/
public static bool IsNumericChar (int c)
{
return (0 <= c && c < charInfo.Length)
&& HasType(c,NUMERIC_TYPE);
}
/**
* Checks whether this character's type is word, among other types.
*/
public static bool IsWordChar (int c)
{
return c >= charInfo.Length
|| c > 0 && HasType(c,WORD_TYPE);
}
/**
* Checks whether this character's type is whitespace, among other types.
*/
public static bool IsWhiteSpaceChar (int c)
{
return (0 <= c && c < charInfo.Length)
&& HasType(c,WHITESPACE_TYPE);
}
/* ======================================================================= */
/**
* Checks whether this character's type is quote, among other types.
*/
public static bool IsQuoteChar (int c)
{
return (0 <= c && c < charInfo.Length)
&& HasType(c,QUOTE_TYPE);
}
/**
* Returns the next character to be read.
*/
public int Peek ()
{
return _followingChar;
}
/**
* Returns the previous character that was read.
*/
public int PeekBack ()
{
return _previousChar;
}
/**
* A public method for skipping the current character
* in the input.
*/
public void SkipChar () // throws IOException
{
NextChar();
}
/**
* Same as skipChar(), but if the specified flag is false
* (resp., true) resets the start (resp., end) location of the current
* token.
*/
public void SkipChar (bool flag) // throws IOException
{
NextChar();
if (flag)
ResetEndLocation();
else
ResetStartLocation();
}
/* ======================================================================= */
/**
* Set the type of this character to be the one specified, non-exclusively.
*/
public static void SetType (int c, int type)
{
SetType(c,(byte)(1<