XmlAnnotationTokenizer.java

// FILE. . . . . /home/hak/hlt/src/hlt/language/syntax/XmlAnnotationTokenizer.java
// EDIT BY . . . Hassan Ait-Kaci
// ON MACHINE. . 4j4zn71
// STARTED ON. . Fri Mar 16 18:09:23 2007

package hlt.language.syntax.xml;



This is a tokenizer for the XML annotation parser from the Jacc grammar in XmlAnnotation.grm. It is rather self-explanatory: simply read the comments and the code below.

Copyright:  © by the author
Author:  Hassan Aït-Kaci
Version:  Last modified on Wed Jun 20 13:51:44 2012 by hak



import java.io.Reader;
import java.io.StringReader;
import java.io.IOException;

import hlt.language.io.IO;
import hlt.language.io.StreamTokenizer;
import hlt.language.syntax.*;

public class XmlAnnotationTokenizer implements Tokenizer
{
  

A constant label used for designating the keyword for the XML namespace prefix used by the annotation language.

WARNING: Changing this value will change the syntax of the annotation language, which will then take the new given string value as the new corresponding keyword. As a result, old annotations using the previous keyword will be unprocessable by any XML annotation parser generated from a Jacc grammar for the XML annotation notation using this tokenizer.


  public static final String nsPrefixKey = "nsprefix";
  

A constant label used for designating the keyword for the XML local name used by the annotation language.

WARNING: Changing this value will change the syntax of the annotation language, which will then take the new given string value as the new corresponding keyword. As a result, old annotations using the previous keyword will be unprocessable by any XML annotation parser generated from a Jacc grammar for the XML annotation notation using this tokenizer.


  public static final String localNameKey = "localname";
  

A constant label used for designating the keyword for the XML attributes name used by the annotation language.

WARNING: Changing this value will change the syntax of the annotation language, which will then take the new given string value as the new corresponding keyword. As a result, old annotations using the previous keyword will be unprocessable by any XML annotation parser generated from a Jacc grammar for the XML annotation notation using this tokenizer.


  public static final String attributesKey = "attributes";
  

A constant label used for designating the keyword for the XML children used by the annotation language.

WARNING: Changing this value will change the syntax of the annotation language, which will then take the new given string value as the new corresponding keyword. As a result, old annotations using the previous keyword will be unprocessable by any XML annotation parser generated from a Jacc grammar for the XML annotation notation using this tokenizer.


  public static final String childrenKey = "children";

  //\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\

  

An int to store the line number. This is not actually used for now, but will be needed eventually. (See comment for this class' locate method.)


  int lineno;
  

The Reader to read from.


  Reader reader;
  

The StreamTokenizer reading off the reader.


  StreamTokenizer st;

  

Constructs an XmlAnnotationTokenizer with the specified annotation string.


  public XmlAnnotationTokenizer (String annotation)
    {
      setInput(annotation);
    }

  

Sets the input reader to read from the specified annotation string.


  public final void setInput (String annotation)
    {
      setReader(new StringReader(annotation));
    }

  

Sets up the reader and the stream tokenizer, and configures the stream tokenizer as follow:
  • it considers all the following characters ordinary:
    ? $ + - . ( ) { } [ ] < > : ; , = ! & | * % /
  • it recognizes quoted strings using either single or double quotes;
  • it considers the characters: _@ as part of word identifiers (i.e., as letters);
  • it recognizes unsigned whole numbers (but no floating points).


  public final void setReader (Reader rd)
    {
      reader = rd;
      st = new StreamTokenizer(reader);
      // parse unsigned whole numbers only:
      st.parseDigitsOnly();
      // make the specified characters ordinary:
      st.ordinaryChars("?$+-.(){}[]<>:;,=!&|*%/");
      // make the single quote a string quote character:
      st.quoteChar('\'');
      // make the double quote a string quote character:
      st.quoteChar('"');
      // make the specified characters letter word characters:
      st.wordChars("_@");
    }

  public final Reader getReader ()
    {
      return reader;
    }

  

This method is required for this class. It returns lineno. It is not actually used for now, but will be needed eventually. (See comment for this class' locate method.)


  public final int lineNumber()
    {
      return lineno;
    }

  

This is the identity function: for now, we can't really locate any node without the contextual information coming from the hlt.language.syntax.Grammar class. Todo for later.


  final ParseNode locate (ParseNode node)
    {
      return node;
    }

  final ParseNode position ()
    {
      return locate(XmlAnnotationParser.numberToken("POSITION",(int)st.nval));
    }

  final ParseNode symbol (String symbol)
    {
      return locate(XmlAnnotationParser.symbolToken("SYMBOL",symbol));
    }

  final ParseNode keyValSep ()
    {
      return locate(XmlAnnotationParser.literalToken("KEYVALSEP"));
    }

  final ParseNode listSep ()
    {
      return locate(XmlAnnotationParser.literalToken("LISTSEP"));
    }

  final ParseNode dot ()
    {
      return locate(XmlAnnotationParser.literalToken("DOT"));
    }

  final ParseNode star ()
    {
      return locate(XmlAnnotationParser.literalToken("STAR"));
    }

  final ParseNode slash ()
    {
      return locate(XmlAnnotationParser.literalToken("SLASH"));
    }

  final ParseNode value ()
    {
      return locate(XmlAnnotationParser.literalToken("VALUE"));
    }

  final ParseNode text ()
    {
      return locate(XmlAnnotationParser.literalToken("TEXT"));
    }

  boolean hasNsPrefix = false;
  boolean hasLocalName = false;
  boolean hasAttributes = false;
  boolean hasChildren = false;

  final ParseNode keywordOrSymbol (String symbol)
    {
      if (!hasNsPrefix && isPrefixOf(symbol,nsPrefixKey))
	{
	  hasNsPrefix = true;
	  return literal("NSPREFIX");
	}

      if (!hasLocalName && isPrefixOf(symbol,localNameKey))
	{
	  hasLocalName = true;
	  return literal("LOCALNAME");
	}

      if (!hasAttributes && isPrefixOf(symbol,attributesKey))
	{
	  hasAttributes = true;
	  return literal("ATTRIBUTES");
	}

      if (!hasChildren && isPrefixOf(symbol,childrenKey))
	{
	  hasChildren = true;
	  return literal("CHILDREN");
	}

      return symbol(symbol);
    }

  final boolean isPrefixOf (String prefix, String word)
    {
      return prefix == word
	  || prefix.length() != 0
	     && word.toLowerCase().startsWith(prefix.toLowerCase());
    }
  
  final ParseNode literal (int c)
    {
      return locate(XmlAnnotationParser.literalToken(String.valueOf((char)c)));
    }

  final ParseNode literal (String s)
    {
      return locate(XmlAnnotationParser.literalToken(s));
    }

  public final ParseNode nextToken () throws IOException
    {
      ParseNode t = null;
      int tk = st.nextToken();
      int nextChar = st.peek();
      lineno = st.lineno();

      switch (tk)
        {
        case StreamTokenizer.TT_SPECIAL:
          return nextToken();
        case StreamTokenizer.TT_EOF:
          t = locate(XmlAnnotationParser.eoi());
          break;
        case StreamTokenizer.TT_NUMBER:
          t = position();
          break;
        case StreamTokenizer.TT_WORD:
	  t = keywordOrSymbol(st.sval);
	  break;
        case IO.SQT: case IO.DQT:
	  t = symbol(st.sval);
          break;
	case '$':
	  if (st.nextToken() == StreamTokenizer.TT_WORD)
	    if (st.sval == "value" || st.sval == "VALUE")
	      t = value();
	    else
	      if (st.sval == "text" || st.sval == "TEXT")
		t = text();
	  else
	    {
	      st.pushBack();
	      t = literal(tk);
	    }
	  break;
        case ':':
	  t = keyValSep();
	  break;	  
        case '=':
	  if (st.peek() == '>')
	    {
	      t = keyValSep();
	      st.nextToken();
	    }
	  else
	    t = keyValSep();
	  break;	  
        case '-':
	  if (st.peek() == '>')
	    {
	      t = keyValSep();
	      st.nextToken();
	      break;
	    }
        case '.':
	  t = dot();
	  break;
        case '*':
	  t = star();
	  break;
        case '/':
	  t = slash();
	  break;
        case ',': case ';':
	  t = listSep();
	  break;
        default:
	  t = literal(tk);
        }
//       System.out.println("Reading token: "+t);
      return t;
    }
}



This file was generated on Tue Apr 30 07:44:50 CEST 2019 from file XmlAnnotationTokenizer.java
by the hlt.language.tools.Hilite Java tool written by Hassan Aït-Kaci