// FILE. . . . . /home/hak/ilt/src/ilog/rif/Tokenizer.java
// EDIT BY . . . Hassan Ait-Kaci
// ON MACHINE. . 4j4zn71
// STARTED ON. . Mon Apr 07 14:05:49 2008

package ilog.rif.bld;

/**
 * @version     Last modified on Fri May 16 14:51:58 2008 by hak
 * @author      <a href="mailto:hak@ilog.com">Hassan A&iuml;t-Kaci</a>
 * @copyright   &copy; 2006 <a href="http://www.ilog.com/">ILOG, Inc.</a>
 *
 * This is a skeleton tokenizer for building a java parser from the Jacc
 * grammar specified in <a href="BLD.html"><tt>BLD.grm</tt></a>. It is
 * minimal in that it relies only on enough information to make a basic
 * working lexer. The code below is rather self-explanatory.
 *
 * <p>
 *
 * The tokens are:
 *
 * <p>
 *
 * <font color="green">
 * <pre>
 * EQUAL     '='
 * MEMBER    '#'
 * SUBCLASS  '##'
 * IF        ':-'
 * ARROW     '->' 
 * LEXSPACE  '^^'
 * OPENPAR   '('
 * CLOSEPAR  ')'
 * OPENBRA   '['
 * CLOSEBRA  ']' 
 * AND       'And'
 * OR        'Or'
 * EXISTS    'Exists'
 * FORALL    'Forall'
 * GROUP     'Group'
 * EXTERNAL  'External'
 * VARIABLE  any identifier starting with a '?'
 * IDENTIFIER   any maximal length of non-special chars
 * STRING    any double-quoted string
 * </pre>
 * </font>
 *
 * Note that a <font color="green"><tt>SymSpace</tt></font> is just
 * parsed as a <font color="green"><tt>IDENTIFIER:IDENTIFIER</tt></font> and
 * <i>not</i> as an actual <a
 * href="http://www.ietf.org/rfc/rfc3987.txt">IRI</a>; <i>i.e.</i>, the
 * full IRI syntax is not checked, nor is the colon (<font
 * color="green"><tt>':'</tt></font>) character given any special
 * meaning.
 *
 */

import java.io.Reader;
import java.io.FileReader;
import java.io.BufferedReader;
import java.io.IOException;

import ilog.language.io.StreamTokenizer;
import ilog.language.syntax.*;

public class Tokenizer implements FileTokenizer
{
  int lineno;
  String file;
  Reader reader;
  StreamTokenizer st;
  boolean prompt;

  public Tokenizer ()
    {
    }

  public Tokenizer (String file) throws IOException
    {
      setFile(file);
    }

  public void setFile (String file) throws IOException
    {
      setFileName(file);
      setReader(new BufferedReader(new FileReader(file)));
    }

  public final int lineNumber()
    {
      return lineno;
    }

  public final void setReader (Reader rd)
    {
      reader = rd;
      st = new StreamTokenizer(reader);
      st.parseNumbers(false);
      st.ordinaryChars(":->=#^()[]");
      st.quoteChar('"');
      // the following are considered letters since they may appear in
      // any of the "IDENTIFIER" parts of an IRI
      st.wordChars("?~`'!@$%&+*|_;<>;,.|/\\");
    }

  public final Reader getReader ()
    {
      return reader;
    }

  public final String fileName ()
    {
      return file;
    }

  public final void setFileName (String file)
    {
      this.file = file;
    }

  final ParseNode locate (ParseNode node)
    {
      return ((ParseNode)node.setStart(st.tokenStart())
	                     .setEnd(st.tokenEnd()))
	     .setFile(file);
    }

  final ParseNode string ()
    {
      return locate(Parser.symbolToken("STRING","\""+st.sval+"\""));
    }

  final ParseNode variable (String symbol)
    {
      return locate(Parser.symbolToken("VARIABLE",symbol.substring(1)));
    }

  final ParseNode identifier ()
    {
      return locate(Parser.symbolToken("IDENTIFIER",st.sval));
    }

  final ParseNode literal (String s)
    {
      return locate(Parser.literalToken(s));
    }

  public final ParseNode nextToken () throws IOException
    {
      ParseNode t = null;
      int tk = st.nextToken();
      int nextChar = st.peek();
      lineno = st.lineno();

      switch (tk)
        {
        case StreamTokenizer.TT_SPECIAL:
          return nextToken();
        case StreamTokenizer.TT_EOF:
          t = locate(Parser.eoi());
          break;
	case '(':
	  t = literal("OPENPAR");
	  break;
	case ')':
	  t = literal("CLOSEPAR");
	  break;
	case '[':
	  t = literal("OPENBRA");
	  break;
	case ']':
	  t = literal("CLOSEBRA");
	  break;
	case '=':
	  t = literal("EQUAL");
	  break;
	case '#':
	  if (nextChar != '#')
	    t = literal("MEMBER");
	  else
	    {
	      t = literal(String.valueOf((char)tk)+((char)nextChar));
	      st.skipChar(true);
	      t = literal("SUBCLASS");
	    }
	  break;
	case '^':
	  if (nextChar != '^')
	    t = literal("^");
	  else
	    {
	      st.skipChar(true);
	      t = literal("LEXSPACE");
	    }
	  break;
        case '-':
	  if (nextChar != '>')
	    t = literal("-");
	  else
	    {
	      st.skipChar(true);
	      t = literal("ARROW");
	    }
	  break;
        case ':':
	  if (nextChar != '-')
	    t = literal("COLON");
	  else
	    {
	      st.skipChar(true);
	      t = literal("IF");
	    }
	  break;
        case '"':
          t = string();
          break;
        case StreamTokenizer.TT_WORD:
	  if (Lexicon.isVariable(st.sval))
            {
	      t = variable(st.sval);
              break;
            }
	  if (Lexicon.isReserved(st.sval))
            {
	      t = literal(st.sval.toUpperCase());
              break;
            }
	  t = identifier();
          break;
        default:
	  t = literal(String.valueOf((char)tk));
        }

      //System.out.println("Reading token: "+t);
      return t;
    }
}

