// FILE. . . . . d:/hak/hlt/src/hlt/fot/syntax/sources/FOTTokenizer.java
// EDIT BY . . . Hassan Ait-Kaci
// ON MACHINE. . Hak-Laptop
// STARTED ON. . Sat Jul 14 10:27:53 2018

// Last modified on Sat Jul 14 10:28:04 2018 by hak

/**
 * This defines a tokenizer for the classical Prolog terms. It
 * differentiates logical variables (capitalized or starting with an
 * underscore character "<tt>_</tt>), symbols, and punctuation. It
 * tokenizes a number as a string (its decimal notation). Although the
 * notation <tt>[]</tt> is tokenized as an atomic symbol, Prolog list
 * notation consisting of bracketed bar-separated pairs (such as
 * <tt>[...|...]</tt>) or bracketed comma-separated sequences (such as
 * <tt>[..., ..., ...]</tt>) are not tokenized or parsed as in Prolog.
 *
 * As usual, the details are in the method <tt>nextToken()</tt>.
 *
 * @version     Last modified on Wed Jun 20 13:51:44 2012 by hak
 * @author      <a href="mailto:hak@acm.org">Hassan A&iuml;t-Kaci</a>
 * @copyright   &copy; <a href="http://www.hassan-ait-kaci.net/">by the author</a>
 */

import java.io.*;
import java.util.Date;
import hlt.language.syntax.*;
import hlt.language.util.Location;
import hlt.language.io.StreamTokenizer;

public class FOTTokenizer implements Tokenizer
{
  BufferedReader reader;
  StreamTokenizer input;
  String file = "stdin";

  FOTTokenizer () throws IOException
    {
      setupReader(new InputStreamReader(System.in));
      interactive = true;
      banner();
      prompt();
    }

  FOTTokenizer (File file) throws IOException
    {
      setupReader(new FileReader(file));
      this.file = file.toString();
      interactive = false;
      banner();
      System.out.println("*** Parsing file: "+file);
    }

  private void setupReader (Reader rd)
    {
      setReader(rd);
      input = new StreamTokenizer(reader);
      input.ordinaryChars("+-./");
      input.wordChar('#'); // to identify pragma strings
      input.quoteChar('\'',0); // no escape allowed
    }

  public final int lineNumber()
    {
      return input.getLineNumber();
    }

  public final void setReader (Reader rd)
    {
      reader = new BufferedReader(rd);
    }

  public final Reader getReader ()
    {
      return reader;
    }

  public static boolean interactive;
  static String prompt = "FOT > ";

  public static final void prompt ()
    {
      if (interactive)
        {
          System.out.print(prompt);
        }
    }

  static public final void setPrompt(String p)
    {
      prompt = p;
    }

  static String banner = "*** This is a Canonical First-Order Term Parser\n"+
                         "*** Version of "+ (new Date())+
                         "\n*** Type a canonical fot followed by '.<CR>' or type 'exit.<CR>' to quit.";

  public static final void banner ()
    {
      System.out.println(banner);
    }

  final boolean isOtherChar (int c)
    {
      return input.isOrdinaryChar(c)
        && !(input.isWhitespaceChar(c) || c == '(' || c == ')' || c == ',' || c == '.');
    }

  final ParseNode locate (ParseNode node)
    {
      return ((ParseNode)node.setStart(input.tokenStart()).setEnd(input.tokenEnd()))
	.setFile(file);
    }

  final ParseNode locate (ParseNode node, Location start)
    {
      return ((ParseNode)node.setStart(start).setEnd(input.tokenEnd()))
	.setFile(file);
    }

  private boolean isPragma (String symbol)
  {
    return symbol.charAt(0) == '#';
  }

  private boolean isVariable (String symbol)
  {
    char start = symbol.charAt(0);
    if (Character.isUpperCase(start) || start == '_')
      return true;

    return false;
  }

  public ParseNode nextToken () throws IOException
    {
      ParseNode t = null;

      switch (input.nextToken())
        {
        case StreamTokenizer.TT_EOF:
          reader.close();
          locate(t = GenericParser.E_O_I);
          break;
        case '\'': case '"':
        case StreamTokenizer.TT_WORD:
	  if (input.sval == "exit" || input.sval == "quit" || input.sval == "halt")
	    t = GenericParser.literalToken("exit");
	  else
	    if (isPragma(input.sval))
	      t = GenericParser.symbolToken("PRAGMA",input.sval.substring(1));
	    else
	      if (isVariable(input.sval))
		t = GenericParser.symbolToken("VARIABLE",input.sval);
	      else
		t = GenericParser.symbolToken("FUNCTOR",input.sval);
	  locate(t);
          break;
        case StreamTokenizer.TT_NUMBER:
          if (input.isInteger)
            // t = GenericParser.numberToken("NUMBER",(int)input.nval);
            t = GenericParser.symbolToken("FUNCTOR",Integer.toString((int)input.nval));
          else
            // t = GenericParser.numberToken("NUMBER",input.nval);
	    t = GenericParser.symbolToken("FUNCTOR",Double.toString(input.nval));
	  locate(t);
          break;
        case '(': case ')': case ',': case '.': case '=':
          locate(t = GenericParser.literalToken(String.valueOf((char)input.ttype)));
          break;
        default: // read the longest possible token and return it as a symbol
	  Location start = input.tokenStart();
          StringBuffer functor = new StringBuffer(String.valueOf((char)input.ttype));
          input.spaceIsSignificant(true);
	  while (isOtherChar(input.peek()))
	    functor.append(String.valueOf((char)input.nextToken()));	  
          input.spaceIsSignificant(false);
          t = GenericParser.symbolToken("FUNCTOR",functor.toString());
	  locate(t,start);
          break;
        }
      // System.out.println(">>> Read token: "+t);
      return t;
    }
}
