/*
 * Decompiled with CFR 0.152.
 */
package de.berlin.hu.uima.cc.banner.trainer;

import banner.tagging.CRFTagger;
import banner.tagging.FeatureSet;
import banner.tagging.TagFormat;
import banner.types.EntityType;
import banner.types.Mention;
import banner.types.Sentence;
import de.berlin.hu.banner.featuresets.KlingerLikeFeatureSet;
import de.berlin.hu.banner.util.ConfigUtil;
import de.berlin.hu.uima.util.Util;
import dragon.nlp.tool.Tagger;
import dragon.nlp.tool.lemmatiser.EngLemmatiser;
import java.io.File;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import org.apache.commons.configuration.ConfigurationException;
import org.apache.commons.configuration.HierarchicalConfiguration;
import org.apache.commons.configuration.XMLConfiguration;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.CASException;
import org.apache.uima.collection.CasConsumer_ImplBase;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.resource.ResourceProcessException;
import org.u_compare.shared.semantic.NamedEntity;
import org.u_compare.shared.syntactic.Token;
import org.uimafit.util.JCasUtil;

public class BannerTrainer
extends CasConsumer_ImplBase {
    private static final String BANNER_MODEL_OUTPUT_FILE_PARAM = "BannerModelOutputFile";
    private static final String BANNER_CONFIG_FILE_PARAM = "BannerConfigFile";
    private File bannerModelOutputFile;
    private File bannerConfigFile;
    private HierarchicalConfiguration config;
    private FeatureSet featureSet;
    private TagFormat tagFormat;
    private Set<Sentence> bannerSentences;
    private int documentCounter;
    private int numberOfEntities;
    private int crfOrder;

    @Override
    public void initialize() throws ResourceInitializationException {
        super.initialize();
        this.bannerSentences = new HashSet<Sentence>();
        this.bannerModelOutputFile = new File(this.getConfigParameterValue(BANNER_MODEL_OUTPUT_FILE_PARAM).toString());
        this.bannerConfigFile = new File(this.getConfigParameterValue(BANNER_CONFIG_FILE_PARAM).toString());
        try {
            this.config = new XMLConfiguration(this.bannerConfigFile);
        }
        catch (ConfigurationException e) {
            e.printStackTrace();
            throw new ResourceInitializationException(e);
        }
        this.tagFormat = ConfigUtil.getTagFormat(this.config);
        EngLemmatiser lemmatiser = ConfigUtil.getLemmatiser(this.config);
        Tagger posTagger = ConfigUtil.getPosTagger(this.config);
        Set<Mention.MentionType> mentionTypes = ConfigUtil.getMentionTypes(this.config);
        Sentence.OverlapOption sameTypeOverlapOption = ConfigUtil.getSameTypeOverlapOption(this.config);
        Sentence.OverlapOption differentTypeOverlapOption = ConfigUtil.getDifferentTypeOverlapOption(this.config);
        this.crfOrder = ConfigUtil.getCRFOrder(this.config);
        this.featureSet = new KlingerLikeFeatureSet(this.tagFormat, lemmatiser, posTagger, null, mentionTypes, sameTypeOverlapOption, differentTypeOverlapOption);
        this.documentCounter = 0;
        this.numberOfEntities = 0;
    }

    @Override
    public void processCas(CAS aCas) throws ResourceProcessException {
        JCas aJCas = null;
        try {
            aJCas = aCas.getJCas();
        }
        catch (CASException e) {
            throw new ResourceProcessException(e);
        }
        Iterator<org.u_compare.shared.syntactic.Sentence> sentenceIterator = JCasUtil.iterator(aJCas, org.u_compare.shared.syntactic.Sentence.class);
        int sentenceCounter = 0;
        while (sentenceIterator.hasNext()) {
            org.u_compare.shared.syntactic.Sentence sentence = sentenceIterator.next();
            Sentence bannerSentence = new Sentence(String.valueOf(sentenceCounter), String.valueOf(this.documentCounter), sentence.getCoveredText());
            int sentenceBegin = sentence.getBegin();
            int sentenceEnd = sentence.getEnd();
            List<Token> tokensInSentence = Util.getTokens(aJCas, sentenceBegin, sentenceEnd);
            Util.tokenizeBannerSentence(bannerSentence, tokensInSentence);
            assert (tokensInSentence.size() == bannerSentence.getTokens().size());
            Iterator<NamedEntity> entityIterator = JCasUtil.iterator(sentence, NamedEntity.class, true, true);
            Annotation lastEntity = null;
            while (entityIterator.hasNext()) {
                NamedEntity currentEntity = entityIterator.next();
                if (!this.overlaps((NamedEntity)lastEntity, currentEntity)) {
                    int currentEntityBegin = currentEntity.getBegin();
                    int currentEntityEnd = currentEntity.getEnd();
                    if (currentEntityBegin >= sentenceEnd || currentEntityEnd > sentenceEnd) break;
                    int tokenPositionBegin = this.getTokenPositionBegin(currentEntityBegin, tokensInSentence);
                    int tokenPositionEnd = this.getTokenPositionEnd(currentEntityEnd, tokensInSentence);
                    Mention mention = new Mention(bannerSentence, tokenPositionBegin, tokenPositionEnd + 1, EntityType.getType(currentEntity.getEntityType()), Mention.MentionType.Required);
                    bannerSentence.addMention(mention);
                    ++this.numberOfEntities;
                } else {
                    System.out.println("Probable annotation error: " + lastEntity.getCoveredText() + " overlaps " + currentEntity.getCoveredText());
                }
                lastEntity = currentEntity;
            }
            this.bannerSentences.add(bannerSentence);
            ++sentenceCounter;
        }
        ++this.documentCounter;
    }

    private boolean overlaps(NamedEntity lastEntity, NamedEntity currentEntity) {
        return lastEntity != null && currentEntity.getBegin() >= lastEntity.getBegin() && currentEntity.getEnd() <= lastEntity.getEnd();
    }

    private int getTokenPositionBegin(int currentEntityBegin, List<Token> tokensInSentence) {
        int i = 0;
        while (i < tokensInSentence.size()) {
            Token token2 = tokensInSentence.get(i);
            if (token2.getBegin() <= currentEntityBegin && currentEntityBegin < token2.getEnd()) {
                return i;
            }
            ++i;
        }
        System.out.println(currentEntityBegin);
        for (Token token3 : tokensInSentence) {
            System.out.println(String.valueOf(token3.getBegin()) + "\t" + token3.getEnd());
        }
        throw new IllegalArgumentException();
    }

    private int getTokenPositionEnd(int currentNamedEnd, List<Token> tokensInSentence) {
        int i = 0;
        while (i < tokensInSentence.size()) {
            Token token2 = tokensInSentence.get(i);
            if (token2.getBegin() < currentNamedEnd && currentNamedEnd <= token2.getEnd()) {
                return i;
            }
            ++i;
        }
        throw new IllegalArgumentException();
    }

    @Override
    public void destroy() {
        System.out.println("Number of training sentences: " + this.bannerSentences.size());
        System.out.println("Number of entities: " + this.numberOfEntities);
        System.out.println("Training data loaded, starting training");
        try {
            CRFTagger tagger = CRFTagger.train(this.bannerSentences, this.crfOrder, this.tagFormat, this.featureSet);
            System.out.println("Training complete, saving model");
            tagger.describe("model_describe.txt");
            tagger.write(this.bannerModelOutputFile);
        }
        catch (Throwable e) {
            e.printStackTrace();
        }
    }
}

