package de.fzi.wim.trie.extractor.character;

import de.fzi.wim.trie.extractor.*;
import java.util.Vector;

/**
 * Representing just a textual character
 *
 * @author <a href=zach@fzi.de>Valentin Zacharias</a>
 */
public class CharacterToken extends Token implements java.io.Serializable{

    public char c;
    private boolean whitespace = false; //everything that seperates two words
    private boolean unknown = false;

    private static final char[] WHITESPACE_CHARACTERS = {' ','\t','\n','(',')',',',';','[',']','.'};
    private static final char UNKNOWN_CHARACTER = '_';

    /**
     * If case should be ignored. Default is true, to change set
     * the property de.fzi.wim.obse.IGNORE_CASE to 1 for true, 0 for false.
     */
    public static boolean IGNORE_CASE = true;



    public boolean isWhitespace() { return whitespace; }
    public boolean isUnknown() { return unknown; }

    /**
     * Takes the String and creates a character Token for each character (and
     * places it in the tokens vector. All characters will be created to have
     * length 0 (begin = end)
     */
    public static void createCharacterTokens(Vector tokens, String text) {
        char[] content = text.toCharArray();
        for (int i=0;i<content.length;i++) {
            tokens.addElement(new CharacterToken(content[i],i));
        }
    }


    static {
        try {
            String iCase = System.getProperty("de.fzi.wim.obse.IGNORE_CASE");
            int iiCase = 0;
            if (iCase != null) {
                iiCase = Integer.parseInt(iCase);
                if (iiCase == 0) IGNORE_CASE = false;
                else IGNORE_CASE = true;
            }
        }
        catch (NumberFormatException nfe) {
        }
     }


    public CharacterToken(char c, int index) {
        super(index,index);
        if (IGNORE_CASE) {
            this.c = Character.toLowerCase(c);
        }
        else this.c = c;
        if (c == UNKNOWN_CHARACTER) unknown = true;
        testWhitespace();
    }

    private void testWhitespace() {
        for (int i=0;i<WHITESPACE_CHARACTERS.length;i++) {
            if (c == WHITESPACE_CHARACTERS[i]) {
                whitespace = true;
                return;
            }
        }
    }

    public boolean equals(Object o) {
        if (o instanceof CharacterToken) {
            CharacterToken other = (CharacterToken) o;
            if (c == other.c) return true;
            else return false;
        }
        else return false;
    }

    public String toString() {
        return "  Character Token: '" + c +"' from "+getBegin()+" to "+getEnd();
    }

}
