|
FOTTokenizer.java
|
// FILE. . . . . d:/hak/hlt/src/hlt/fot/syntax/sources/FOTTokenizer.java // EDIT BY . . . Hassan Ait-Kaci // ON MACHINE. . Hak-Laptop // STARTED ON. . Sat Jul 14 10:27:53 2018 // Last modified on Sat Jul 14 10:28:04 2018 by hak
This defines a tokenizer for the classical Prolog terms. It
differentiates logical variables (capitalized or starting with an
underscore character "_), symbols, and punctuation. It
tokenizes a number as a string (its decimal notation). Although the
notation [] is tokenized as an atomic symbol, Prolog list
notation consisting of bracketed bar-separated pairs (such as
[...|...]) or bracketed comma-separated sequences (such as
[..., ..., ...]) are not tokenized or parsed as in Prolog.
As usual, the details are in the method nextToken().
|
import java.io.*; import java.util.Date; import hlt.language.syntax.*; import hlt.language.util.Location; import hlt.language.io.StreamTokenizer; public class FOTTokenizer implements Tokenizer { BufferedReader reader; StreamTokenizer input; String file = "stdin"; FOTTokenizer () throws IOException { setupReader(new InputStreamReader(System.in)); interactive = true; banner(); prompt(); } FOTTokenizer (File file) throws IOException { setupReader(new FileReader(file)); this.file = file.toString(); interactive = false; banner(); System.out.println("*** Parsing file: "+file); } private void setupReader (Reader rd) { setReader(rd); input = new StreamTokenizer(reader); input.ordinaryChars("+-./"); input.wordChar('#'); // to identify pragma strings input.quoteChar('\'',0); // no escape allowed } public final int lineNumber() { return input.getLineNumber(); } public final void setReader (Reader rd) { reader = new BufferedReader(rd); } public final Reader getReader () { return reader; } public static boolean interactive; static String prompt = "FOT > "; public static final void prompt () { if (interactive) { System.out.print(prompt); } } static public final void setPrompt(String p) { prompt = p; } static String banner = "*** This is a Canonical First-Order Term Parser\n"+ "*** Version of "+ (new Date())+ "\n*** Type a canonical fot followed by '.<CR>' or type 'exit.<CR>' to quit."; public static final void banner () { System.out.println(banner); } final boolean isOtherChar (int c) { return input.isOrdinaryChar(c) && !(input.isWhitespaceChar(c) || c == '(' || c == ')' || c == ',' || c == '.'); } final ParseNode locate (ParseNode node) { return ((ParseNode)node.setStart(input.tokenStart()).setEnd(input.tokenEnd())) .setFile(file); } final ParseNode locate (ParseNode node, Location start) { return ((ParseNode)node.setStart(start).setEnd(input.tokenEnd())) .setFile(file); } private boolean isPragma (String symbol) { return symbol.charAt(0) == '#'; } private boolean isVariable (String symbol) { char start = symbol.charAt(0); if (Character.isUpperCase(start) || start == '_') return true; return false; } public ParseNode nextToken () throws IOException { ParseNode t = null; switch (input.nextToken()) { case StreamTokenizer.TT_EOF: reader.close(); locate(t = GenericParser.E_O_I); break; case '\'': case '"': case StreamTokenizer.TT_WORD: if (input.sval == "exit" || input.sval == "quit" || input.sval == "halt") t = GenericParser.literalToken("exit"); else if (isPragma(input.sval)) t = GenericParser.symbolToken("PRAGMA",input.sval.substring(1)); else if (isVariable(input.sval)) t = GenericParser.symbolToken("VARIABLE",input.sval); else t = GenericParser.symbolToken("FUNCTOR",input.sval); locate(t); break; case StreamTokenizer.TT_NUMBER: if (input.isInteger) // t = GenericParser.numberToken("NUMBER",(int)input.nval); t = GenericParser.symbolToken("FUNCTOR",Integer.toString((int)input.nval)); else // t = GenericParser.numberToken("NUMBER",input.nval); t = GenericParser.symbolToken("FUNCTOR",Double.toString(input.nval)); locate(t); break; case '(': case ')': case ',': case '.': case '=': locate(t = GenericParser.literalToken(String.valueOf((char)input.ttype))); break; default: // read the longest possible token and return it as a symbol Location start = input.tokenStart(); StringBuffer functor = new StringBuffer(String.valueOf((char)input.ttype)); input.spaceIsSignificant(true); while (isOtherChar(input.peek())) functor.append(String.valueOf((char)input.nextToken())); input.spaceIsSignificant(false); t = GenericParser.symbolToken("FUNCTOR",functor.toString()); locate(t,start); break; } // System.out.println(">>> Read token: "+t); return t; } }
This file was generated on Sun Jul 15 08:21:59 CEST 2018 from file FOTTokenizer.java
by the hlt.language.tools.Hilite Java tool written by Hassan Aït-Kaci