/*
 * Decompiled with CFR 0.152.
 */
package weka.core.tokenizers;

import java.util.Enumeration;
import java.util.LinkedList;
import java.util.Vector;
import weka.core.Option;
import weka.core.RevisionUtils;
import weka.core.Utils;
import weka.core.tokenizers.CharacterDelimitedTokenizer;

public class NGramTokenizer
extends CharacterDelimitedTokenizer {
    private static final long serialVersionUID = -2181896254171647219L;
    protected int m_NMax = 3;
    protected int m_NMin = 1;
    protected int m_N;
    protected int m_MaxPosition;
    protected int m_CurrentPosition;
    protected String[] m_SplitString;

    public String globalInfo() {
        return "Splits a string into an n-gram with min and max grams.";
    }

    public Enumeration listOptions() {
        Vector<Option> result = new Vector<Option>();
        Enumeration enm = super.listOptions();
        while (enm.hasMoreElements()) {
            result.addElement((Option)enm.nextElement());
        }
        result.addElement(new Option("\tThe max size of the Ngram (default = 3).", "max", 1, "-max <int>"));
        result.addElement(new Option("\tThe min size of the Ngram (default = 1).", "min", 1, "-min <int>"));
        return result.elements();
    }

    public String[] getOptions() {
        Vector<String> result = new Vector<String>();
        String[] options = super.getOptions();
        for (int i = 0; i < options.length; ++i) {
            result.add(options[i]);
        }
        result.add("-max");
        result.add("" + this.getNGramMaxSize());
        result.add("-min");
        result.add("" + this.getNGramMinSize());
        return result.toArray(new String[result.size()]);
    }

    public void setOptions(String[] options) throws Exception {
        super.setOptions(options);
        String value = Utils.getOption("max", options);
        if (value.length() != 0) {
            this.setNGramMaxSize(Integer.parseInt(value));
        } else {
            this.setNGramMaxSize(3);
        }
        value = Utils.getOption("min", options);
        if (value.length() != 0) {
            this.setNGramMinSize(Integer.parseInt(value));
        } else {
            this.setNGramMinSize(1);
        }
    }

    public int getNGramMaxSize() {
        return this.m_NMax;
    }

    public void setNGramMaxSize(int value) {
        this.m_NMax = value < 1 ? 1 : value;
    }

    public String NGramMaxSizeTipText() {
        return "The max N of the NGram.";
    }

    public void setNGramMinSize(int value) {
        this.m_NMin = value < 1 ? 1 : value;
    }

    public int getNGramMinSize() {
        return this.m_NMin;
    }

    public String NGramMinSizeTipText() {
        return "The min N of the NGram.";
    }

    public boolean hasMoreElements() {
        return this.m_CurrentPosition < this.m_MaxPosition && this.m_N - 1 + this.m_CurrentPosition < this.m_MaxPosition && this.m_N >= this.m_NMin;
    }

    public Object nextElement() {
        String retValue = "";
        for (int i = 0; i < this.m_N && i + this.m_CurrentPosition < this.m_MaxPosition; ++i) {
            retValue = retValue + " " + this.m_SplitString[this.m_CurrentPosition + i];
        }
        ++this.m_CurrentPosition;
        if (this.m_CurrentPosition + this.m_N - 1 == this.m_MaxPosition) {
            this.m_CurrentPosition = 0;
            --this.m_N;
        }
        return retValue.trim();
    }

    protected void filterOutEmptyStrings() {
        int i;
        LinkedList<String> clean = new LinkedList<String>();
        for (i = 0; i < this.m_SplitString.length; ++i) {
            if (this.m_SplitString[i].equals("")) continue;
            clean.add(this.m_SplitString[i]);
        }
        String[] newSplit = new String[clean.size()];
        for (i = 0; i < clean.size(); ++i) {
            newSplit[i] = (String)clean.get(i);
        }
        this.m_SplitString = newSplit;
    }

    public void tokenize(String s) {
        this.m_N = this.m_NMax;
        this.m_SplitString = s.split("[" + this.getDelimiters() + "]");
        this.filterOutEmptyStrings();
        this.m_CurrentPosition = 0;
        this.m_MaxPosition = this.m_SplitString.length;
    }

    public String getRevision() {
        return RevisionUtils.extract("$Revision: 5953 $");
    }

    public static void main(String[] args) {
        NGramTokenizer.runTokenizer(new NGramTokenizer(), args);
    }
}

