/*
 * Decompiled with CFR 0.152.
 */
package opennlp.grok.preprocess.sentdetect;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import opennlp.common.preprocess.SentenceDetector;
import opennlp.common.util.Pair;
import opennlp.common.xml.NLPDocument;
import opennlp.common.xml.XmlUtils;
import opennlp.grok.preprocess.sentdetect.DefaultEndOfSentenceScanner;
import opennlp.grok.preprocess.sentdetect.EndOfSentenceScanner;
import opennlp.grok.preprocess.sentdetect.SDContextGenerator;
import opennlp.grok.preprocess.sentdetect.SDEventStream;
import opennlp.maxent.ContextGenerator;
import opennlp.maxent.DataStream;
import opennlp.maxent.EventStream;
import opennlp.maxent.GIS;
import opennlp.maxent.GISModel;
import opennlp.maxent.IntegerPool;
import opennlp.maxent.MaxentModel;
import opennlp.maxent.PlainTextByLineDataStream;
import opennlp.maxent.io.SuffixSensitiveGISModelWriter;
import org.jdom.Element;

public class SentenceDetectorME
implements SentenceDetector {
    private MaxentModel model;
    private final ContextGenerator cgen;
    private final EndOfSentenceScanner scanner;
    private static final IntegerPool INT_POOL = new IntegerPool(100);
    private final int _trueIndex;

    public SentenceDetectorME(MaxentModel m) {
        this(m, new SDContextGenerator(), new DefaultEndOfSentenceScanner());
    }

    public SentenceDetectorME(MaxentModel m, ContextGenerator cg) {
        this(m, cg, new DefaultEndOfSentenceScanner());
    }

    public SentenceDetectorME(MaxentModel m, ContextGenerator cg, EndOfSentenceScanner s) {
        this.model = m;
        this._trueIndex = this.model.getIndex("T");
        this.cgen = cg;
        this.scanner = s;
    }

    public void process(NLPDocument doc) {
        Iterator sentIt = doc.sentenceIterator();
        while (sentIt.hasNext()) {
            int size;
            Element sentEl = (Element)sentIt.next();
            List wordEls = doc.getWordElements(sentEl);
            int lastWordIndex = wordEls.size() - 1;
            ArrayList<Element> elementsWithSentenceBreak = new ArrayList<Element>();
            ArrayList<Integer> positionWithinEachBreakElement = new ArrayList<Integer>();
            int i = 0;
            while (i <= lastWordIndex) {
                Element current = (Element)wordEls.get(i);
                String parentType = current.getParent().getAttributeValue("type");
                if (parentType != null) break;
                String word = current.getText();
                List enders = this.scanner.getPositions(word);
                int endersSize = enders.size();
                if (endersSize > 0) {
                    String previous = "";
                    String next = "";
                    if (i > 0) {
                        previous = ((Element)wordEls.get(i - 1)).getText();
                    }
                    if (i < lastWordIndex) {
                        next = ((Element)wordEls.get(i + 1)).getText();
                    }
                    Integer mostLikely = null;
                    double highest = 0.0;
                    int j = 0;
                    while (j < endersSize) {
                        String[] info = new String[]{previous, word, next};
                        Integer position = (Integer)enders.get(j);
                        double[] probs = this.model.eval(this.cgen.getContext((Object)new Pair((Object)info, (Object)position)));
                        if (this.model.getBestOutcome(probs).equals("T") && probs[this._trueIndex] > highest) {
                            highest = probs[this._trueIndex];
                            mostLikely = position;
                        }
                        ++j;
                    }
                    if (mostLikely != null) {
                        elementsWithSentenceBreak.add(current);
                        positionWithinEachBreakElement.add(mostLikely);
                    }
                }
                ++i;
            }
            if ((size = elementsWithSentenceBreak.size()) == 0) break;
            int tokenIndex = 0;
            List oldSentToks = doc.getTokenElements(sentEl);
            int numOfOldToks = oldSentToks.size();
            ArrayList<Element> $sents = new ArrayList<Element>();
            ArrayList<Element> $toks = new ArrayList<Element>();
            int i2 = 0;
            while (i2 < size) {
                Element breaker = (Element)elementsWithSentenceBreak.get(i2);
                Element breakerToken = breaker.getParent();
                boolean found = false;
                while (tokenIndex < numOfOldToks && !found) {
                    Element oldTok = (Element)oldSentToks.get(tokenIndex);
                    if (oldTok == breakerToken) {
                        found = true;
                        int pos = (Integer)positionWithinEachBreakElement.get(i2);
                        String word = breaker.getText();
                        breaker.setText(word.substring(0, pos));
                        Element enderToken = NLPDocument.createTOK((String)word.substring(pos, pos + 1));
                        $toks.add(breakerToken.detach());
                        $toks.add(enderToken);
                        $sents.add(new Element("s").setChildren($toks));
                        $toks = new ArrayList();
                        if (pos < word.length() - 1) {
                            Element suffixToken = NLPDocument.createTOK((String)word.substring(pos + 1));
                            $toks.add(suffixToken);
                        }
                    } else if (tokenIndex == numOfOldToks - 1) {
                        this.addLastTokenOfSentence((Element)oldSentToks.get(numOfOldToks - 1), $toks);
                    } else {
                        $toks.add(oldTok.detach());
                    }
                    ++tokenIndex;
                }
                ++i2;
            }
            if (tokenIndex < numOfOldToks - 1) {
                while (tokenIndex < numOfOldToks - 1) {
                    $toks.add(((Element)oldSentToks.get(tokenIndex)).detach());
                    ++tokenIndex;
                }
                this.addLastTokenOfSentence((Element)oldSentToks.get(numOfOldToks - 1), $toks);
                $sents.add(new Element("s").setChildren($toks));
            }
            XmlUtils.replace((Element)sentEl, $sents);
        }
    }

    private void addLastTokenOfSentence(Element lastToken, List toks) {
        toks.add(lastToken.detach());
        String word = lastToken.getChildText("w");
        int lastIndex = word.length() - 1;
        if (!Character.isUnicodeIdentifierPart(word.charAt(lastIndex))) {
            lastToken.getChild("w").setText(word.substring(0, lastIndex));
            toks.add(NLPDocument.createTOK((String)word.substring(lastIndex)));
        }
    }

    public Set requires() {
        return Collections.EMPTY_SET;
    }

    public String[] sentDetect(String s) {
        String sent;
        StringBuffer sb = new StringBuffer(s);
        List enders = this.scanner.getPositions(sb);
        int index = 0;
        ArrayList<String> sents = new ArrayList<String>();
        int i = 0;
        int end = enders.size();
        while (i < end) {
            Pair pair;
            double[] probs;
            Integer candidate = (Integer)enders.get(i);
            int cint = candidate;
            if ((i + 1 >= end || (Integer)enders.get(i + 1) != cint + 1) && this.model.getBestOutcome(probs = this.model.eval(this.cgen.getContext((Object)(pair = new Pair((Object)sb, (Object)candidate))))).equals("T") && this.isAcceptableBreak(s, index, cint)) {
                String sent2 = sb.substring(index, cint + 1).trim();
                if (sent2.length() > 0) {
                    sents.add(sent2);
                }
                index = cint + 1;
            }
            ++i;
        }
        if (index < sb.length() && (sent = sb.substring(index).trim()).length() > 0) {
            sents.add(sent);
        }
        String[] sentSA = new String[sents.size()];
        sentSA = sents.toArray(sentSA);
        return sentSA;
    }

    private int getFirstNonWS(String s, int pos) {
        while (pos < s.length() && Character.isWhitespace(s.charAt(pos))) {
            ++pos;
        }
        return pos;
    }

    public int[] sentPosDetect(String s) {
        StringBuffer sb = new StringBuffer(s);
        List enders = this.scanner.getPositions(s);
        ArrayList<Integer> positions = new ArrayList<Integer>(enders.size());
        positions.add(INT_POOL.get(this.getFirstNonWS(s, 0)));
        int i = 0;
        int end = enders.size() - 1;
        int index = 0;
        while (i < end) {
            Pair pair;
            double[] probs;
            Integer candidate = (Integer)enders.get(i);
            int cint = candidate;
            if ((i + 1 >= end || (Integer)enders.get(i + 1) != cint + 1) && this.model.getBestOutcome(probs = this.model.eval(this.cgen.getContext((Object)(pair = new Pair((Object)sb, (Object)candidate))))).equals("T") && this.isAcceptableBreak(s, index, cint)) {
                if (index != cint) {
                    positions.add(INT_POOL.get(this.getFirstNonWS(s, cint + 1)));
                }
                index = cint + 1;
            }
            ++i;
        }
        int[] sentPositions = new int[positions.size()];
        int i2 = 0;
        while (i2 < sentPositions.length) {
            sentPositions[i2] = (Integer)positions.get(i2);
            ++i2;
        }
        return sentPositions;
    }

    protected boolean isAcceptableBreak(String s, int fromIndex, int candidateIndex) {
        return true;
    }

    public static GISModel train(EventStream es, int iterations, int cut) throws IOException {
        return GIS.trainModel((EventStream)es, (int)iterations, (int)cut);
    }

    public static GISModel train(File inFile, int iterations, int cut, EndOfSentenceScanner scanner) throws IOException {
        BufferedReader reader = new BufferedReader(new FileReader(inFile));
        PlainTextByLineDataStream ds = new PlainTextByLineDataStream((Reader)reader);
        SDEventStream es = new SDEventStream((DataStream)ds, scanner);
        return GIS.trainModel((EventStream)es, (int)iterations, (int)cut);
    }

    public static void main(String[] args) throws IOException {
        try {
            File inFile = new File(args[0]);
            File outFile = new File(args[1]);
            SDEventStream es = new SDEventStream((DataStream)new PlainTextByLineDataStream((Reader)new FileReader(inFile)));
            GISModel mod = args.length > 3 ? SentenceDetectorME.train(es, Integer.parseInt(args[2]), Integer.parseInt(args[3])) : SentenceDetectorME.train(es, 100, 5);
            System.out.println("Saving the model as: " + args[1]);
            new SuffixSensitiveGISModelWriter(mod, outFile).persist();
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }
}

