/*
 * Decompiled with CFR 0.152.
 */
package opennlp.uima.tokenize;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.Arrays;
import java.util.LinkedList;
import opennlp.maxent.GIS;
import opennlp.maxent.GISModel;
import opennlp.tools.tokenize.TokSpanEventStream;
import opennlp.tools.tokenize.TokenizerME;
import opennlp.tools.util.Span;
import opennlp.uima.util.CasConsumerUtil;
import opennlp.uima.util.ContainingConstraint;
import opennlp.uima.util.OpennlpUtil;
import opennlp.uima.util.UimaUtil;
import org.apache.uima.UimaContext;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.TypeSystem;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.cas.text.AnnotationIndex;
import org.apache.uima.collection.CasConsumer_ImplBase;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.resource.ResourceProcessException;
import org.apache.uima.util.Level;
import org.apache.uima.util.Logger;
import org.apache.uima.util.ProcessTrace;

public final class TokenizerTrainer
extends CasConsumer_ImplBase {
    private static final String IS_SKIP_ALPHA_NUMERICS_PARAMETER = "opennlp.uima.tokenizer.IsSkipAlphaNumerics";
    private TokSpanEventStream mEventStream;
    private UimaContext mContext;
    private Type mSentenceType;
    private Type mTokenType;
    private String mModelName;
    private Logger mLogger;

    public void initialize() throws ResourceInitializationException {
        this.mContext = this.getUimaContext();
        this.mLogger = this.mContext.getLogger();
        if (this.mLogger.isLoggable(Level.INFO)) {
            this.mLogger.log(Level.INFO, "Initializing the OpenNLP Tokenizer trainer.");
        }
        this.mModelName = CasConsumerUtil.getRequiredStringParameter(this.mContext, UimaUtil.MODEL_PARAMETER);
        Boolean isSkipAlphaNumerics = CasConsumerUtil.getOptionalBooleanParameter(this.mContext, IS_SKIP_ALPHA_NUMERICS_PARAMETER);
        this.mEventStream = new TokSpanEventStream(isSkipAlphaNumerics);
    }

    public void typeSystemInit(TypeSystem typeSystem) throws ResourceInitializationException {
        String sentenceTypeName = CasConsumerUtil.getRequiredStringParameter(this.mContext, UimaUtil.SENTENCE_TYPE_PARAMETER);
        this.mSentenceType = CasConsumerUtil.getType(typeSystem, sentenceTypeName);
        String tokenTypeName = CasConsumerUtil.getRequiredStringParameter(this.mContext, "opennlp.uima.TokenType");
        this.mTokenType = CasConsumerUtil.getType(typeSystem, tokenTypeName);
    }

    public void processCas(CAS cas) {
        AnnotationIndex<AnnotationFS> sentenceAnnotations = cas.getAnnotationIndex(this.mSentenceType);
        for (AnnotationFS sentence : sentenceAnnotations) {
            this.process(cas, sentence);
        }
    }

    private void process(CAS tcas, AnnotationFS sentence) {
        AnnotationIndex<AnnotationFS> allTokens = tcas.getAnnotationIndex(this.mTokenType);
        ContainingConstraint containingConstraint = new ContainingConstraint(sentence);
        FSIterator containingTokens = tcas.createFilteredIterator(allTokens.iterator(), containingConstraint);
        LinkedList<Span> openNLPSpans = new LinkedList<Span>();
        while (containingTokens.hasNext()) {
            AnnotationFS tokenAnnotation = (AnnotationFS)containingTokens.next();
            openNLPSpans.add(new Span(tokenAnnotation.getBegin() - sentence.getBegin(), tokenAnnotation.getEnd() - sentence.getBegin()));
        }
        Object[] spans = openNLPSpans.toArray(new Span[openNLPSpans.size()]);
        Arrays.sort(spans);
        this.mEventStream.addEvents((Span[])spans, sentence.getCoveredText());
    }

    public void collectionProcessComplete(ProcessTrace arg0) throws ResourceProcessException, IOException {
        GIS.PRINT_MESSAGES = false;
        GISModel tokenModel = TokenizerME.train(this.mEventStream);
        this.mEventStream = null;
        File modelFile = new File(this.getUimaContextAdmin().getResourceManager().getDataPath() + File.separatorChar + this.mModelName);
        OpennlpUtil.serialize(tokenModel, new FileOutputStream(modelFile));
    }

    public boolean isStateless() {
        return false;
    }

    public void destroy() {
        this.mEventStream = null;
    }
}

