/*
 * Decompiled with CFR 0.152.
 */
package opennlp.tools.sentdetect;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import opennlp.maxent.EventStream;
import opennlp.maxent.GIS;
import opennlp.maxent.GISModel;
import opennlp.maxent.IntegerPool;
import opennlp.maxent.MaxentModel;
import opennlp.maxent.PlainTextByLineDataStream;
import opennlp.maxent.io.SuffixSensitiveGISModelWriter;
import opennlp.tools.lang.thai.SentenceContextGenerator;
import opennlp.tools.sentdetect.AbstractEndOfSentenceScanner;
import opennlp.tools.sentdetect.DefaultSDContextGenerator;
import opennlp.tools.sentdetect.EndOfSentenceScanner;
import opennlp.tools.sentdetect.SDContextGenerator;
import opennlp.tools.sentdetect.SDEventStream;
import opennlp.tools.sentdetect.SentenceDetector;

public class SentenceDetectorME
implements SentenceDetector {
    private MaxentModel model;
    private final SDContextGenerator cgen;
    private final EndOfSentenceScanner scanner;
    private static final IntegerPool INT_POOL = new IntegerPool(100);
    private List sentProbs;
    protected boolean useTokenEnd;

    public SentenceDetectorME(MaxentModel m) {
        this(m, new DefaultSDContextGenerator(opennlp.tools.lang.english.EndOfSentenceScanner.eosCharacters), new opennlp.tools.lang.english.EndOfSentenceScanner());
    }

    public SentenceDetectorME(MaxentModel m, SDContextGenerator cg) {
        this(m, cg, new opennlp.tools.lang.english.EndOfSentenceScanner());
    }

    public SentenceDetectorME(MaxentModel m, SDContextGenerator cg, EndOfSentenceScanner s) {
        this.model = m;
        this.cgen = cg;
        this.scanner = s;
        this.sentProbs = new ArrayList(50);
        this.useTokenEnd = true;
    }

    public String[] sentDetect(String s) {
        int[] starts2 = this.sentPosDetect(s);
        if (starts2.length == 0) {
            return new String[]{s};
        }
        boolean leftover = starts2[starts2.length - 1] != s.length() && this.useTokenEnd;
        String[] sents = new String[leftover ? starts2.length + 1 : starts2.length];
        sents[0] = s.substring(0, starts2[0]);
        for (int si = 1; si < starts2.length; ++si) {
            sents[si] = s.substring(starts2[si - 1], starts2[si]);
        }
        if (leftover) {
            sents[sents.length - 1] = s.substring(starts2[starts2.length - 1]);
        }
        return sents;
    }

    private int getFirstWS(String s, int pos) {
        while (pos < s.length() && !Character.isWhitespace(s.charAt(pos))) {
            ++pos;
        }
        return pos;
    }

    private int getFirstNonWS(String s, int pos) {
        while (pos < s.length() && Character.isWhitespace(s.charAt(pos))) {
            ++pos;
        }
        return pos;
    }

    public int[] sentPosDetect(String s) {
        double sentProb = 1.0;
        this.sentProbs.clear();
        StringBuffer sb = new StringBuffer(s);
        List enders = this.scanner.getPositions(s);
        ArrayList<Integer> positions = new ArrayList<Integer>(enders.size());
        int end = enders.size();
        int index = 0;
        for (int i = 0; i < end; ++i) {
            Integer candidate = (Integer)enders.get(i);
            int cint = candidate;
            int fws = this.getFirstWS(s, cint + 1);
            if (i + 1 < end && (Integer)enders.get(i + 1) < fws) continue;
            double[] probs = this.model.eval(this.cgen.getContext(sb, candidate));
            String bestOutcome = this.model.getBestOutcome(probs);
            sentProb *= probs[this.model.getIndex(bestOutcome)];
            if (!bestOutcome.equals("T") || !this.isAcceptableBreak(s, index, cint)) continue;
            if (index != cint) {
                if (this.useTokenEnd) {
                    positions.add(INT_POOL.get(this.getFirstNonWS(s, this.getFirstWS(s, cint + 1))));
                } else {
                    positions.add(INT_POOL.get(this.getFirstNonWS(s, cint)));
                }
                this.sentProbs.add(new Double(probs[this.model.getIndex(bestOutcome)]));
            }
            index = cint + 1;
        }
        int[] sentPositions = new int[positions.size()];
        for (int i = 0; i < sentPositions.length; ++i) {
            sentPositions[i] = (Integer)positions.get(i);
        }
        return sentPositions;
    }

    public double[] getSentenceProbabilities() {
        double[] sentProbArray = new double[this.sentProbs.size()];
        for (int i = 0; i < sentProbArray.length; ++i) {
            sentProbArray[i] = (Double)this.sentProbs.get(i);
        }
        return sentProbArray;
    }

    protected boolean isAcceptableBreak(String s, int fromIndex, int candidateIndex) {
        return true;
    }

    public static GISModel train(EventStream es, int iterations, int cut) throws IOException {
        return GIS.trainModel(es, iterations, cut);
    }

    public static GISModel train(File inFile, int iterations, int cut, EndOfSentenceScanner scanner) throws IOException {
        BufferedReader reader = new BufferedReader(new FileReader(inFile));
        PlainTextByLineDataStream ds = new PlainTextByLineDataStream(reader);
        SDEventStream es = new SDEventStream(ds, scanner);
        return GIS.trainModel(es, iterations, cut);
    }

    private static void usage() {
        System.err.println("Usage: SentenceDetectorME [-encoding charset] [-lang language] trainData modelName");
        System.err.println("-encoding charset specifies the encoding which should be used ");
        System.err.println("                  for reading and writing text.");
        System.err.println("-lang language    specifies the language (english|spanish|thai) which ");
        System.err.println("                  is being processed.");
        System.exit(1);
    }

    public static void main(String[] args) throws IOException {
        int ai = 0;
        String encoding = null;
        String lang = null;
        if (args.length == 0) {
            SentenceDetectorME.usage();
        }
        while (args[ai].startsWith("-")) {
            if (args[ai].equals("-encoding")) {
                if (++ai < args.length) {
                    encoding = args[ai];
                    ++ai;
                    continue;
                }
                SentenceDetectorME.usage();
                continue;
            }
            if (args[ai].equals("-lang")) {
                if (++ai < args.length) {
                    lang = args[ai];
                    ++ai;
                    continue;
                }
                SentenceDetectorME.usage();
                continue;
            }
            SentenceDetectorME.usage();
        }
        File inFile = new File(args[ai++]);
        File outFile = new File(args[ai++]);
        try {
            AbstractEndOfSentenceScanner scanner = null;
            DefaultSDContextGenerator cg = null;
            if (lang == null || lang.equals("english") || lang.equals("spanish")) {
                scanner = new opennlp.tools.lang.english.EndOfSentenceScanner();
                cg = new DefaultSDContextGenerator(scanner.getEndOfSentenceCharacters());
            } else if (lang.equals("thai")) {
                scanner = new opennlp.tools.lang.thai.EndOfSentenceScanner();
                cg = new SentenceContextGenerator();
            } else {
                SentenceDetectorME.usage();
            }
            SDEventStream es = new SDEventStream(new PlainTextByLineDataStream(new InputStreamReader((InputStream)new FileInputStream(inFile), encoding)), scanner, cg);
            GISModel mod = args.length > ai ? SentenceDetectorME.train(es, Integer.parseInt(args[ai++]), Integer.parseInt(args[ai++])) : SentenceDetectorME.train(es, 100, 5);
            System.out.println("Saving the model as: " + outFile);
            new SuffixSensitiveGISModelWriter(mod, outFile).persist();
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }
}

