/*
 * Decompiled with CFR 0.152.
 */
package de.berlin.hu.uima.ae.tagger.banner;

import banner.tagging.CRFTagger;
import banner.types.Mention;
import banner.types.Sentence;
import de.berlin.hu.uima.ae.tagger.banner.CRFWrapper;
import de.berlin.hu.uima.util.Util;
import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.commons.configuration.ConfigurationException;
import org.apache.commons.configuration.XMLConfiguration;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.cas.text.AnnotationIndex;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
import org.apache.uima.resource.ResourceInitializationException;
import org.u_compare.shared.semantic.NamedEntity;
import org.u_compare.shared.syntactic.Token;

public class BannerTagger
extends JCasAnnotator_ImplBase {
    private static final String BANNER_MODEL_FILE_PARAM = "BannerModelFile";
    private static final String BANNER_CONFIG_FILE_PARAM = "BannerConfigFile";
    private static final String THRESHOLD_PARAM = "Threshold";
    private static final boolean USE_RESOURCE = false;
    private CRFTagger tagger;
    private URL bannerModelFile;
    private File bannerConfigFile;
    private double threshold;
    private static final int N = 10;
    private int documentCounter;
    private XMLConfiguration config;

    @Override
    public void initialize(UimaContext aContext) throws ResourceInitializationException {
        super.initialize(aContext);
        try {
            String pathToModelFile = aContext.getConfigParameterValue(BANNER_MODEL_FILE_PARAM).toString();
            String pathToConfigFile = aContext.getConfigParameterValue(BANNER_CONFIG_FILE_PARAM).toString();
            this.bannerModelFile = new URL(pathToModelFile);
            this.bannerConfigFile = new File(pathToConfigFile);
            try {
                this.config = new XMLConfiguration(this.bannerConfigFile);
            }
            catch (ConfigurationException e) {
                e.printStackTrace();
                throw new ResourceInitializationException(e);
            }
            this.threshold = Double.parseDouble(aContext.getConfigParameterValue(THRESHOLD_PARAM).toString());
            this.tagger = CRFWrapper.load(this.bannerModelFile, null, null, null);
        }
        catch (IOException e) {
            throw new ResourceInitializationException(e);
        }
        this.documentCounter = 0;
    }

    @Override
    public void process(JCas aJCas) throws AnalysisEngineProcessException {
        AnnotationIndex<Annotation> sentenceIndex = aJCas.getAnnotationIndex(org.u_compare.shared.syntactic.Sentence.type);
        FSIterator sentenceIterator = sentenceIndex.iterator();
        int sentenceCounter = 0;
        HashSet<Mention> mentions = new HashSet<Mention>();
        String docText = aJCas.getDocumentText();
        long start = System.currentTimeMillis();
        while (sentenceIterator.hasNext()) {
            org.u_compare.shared.syntactic.Sentence sentence = (org.u_compare.shared.syntactic.Sentence)sentenceIterator.next();
            Sentence bannerSentence = new Sentence(String.valueOf(sentenceCounter), String.valueOf(this.documentCounter), sentence.getCoveredText());
            List<Token> tokensInSentence = Util.getTokens(aJCas, sentence);
            Util.tokenizeBannerSentence(bannerSentence, tokensInSentence);
            assert (sentence.getCoveredText().equals(bannerSentence.getText()));
            assert (tokensInSentence.size() == bannerSentence.getTokens().size()) : String.valueOf(tokensInSentence.size()) + " != " + bannerSentence.getTokens().size();
            try {
                this.tagger.tag(bannerSentence);
            }
            catch (ArrayIndexOutOfBoundsException e) {
                System.err.println("ERROR!");
                System.err.println("Corpus:\t" + sentence.getCoveredText());
                System.err.println("BANNER:\t" + bannerSentence.getText());
            }
            mentions.addAll(this.createAnnotations(aJCas, sentence.getBegin(), bannerSentence));
            ++sentenceCounter;
        }
        long time = System.currentTimeMillis() - start;
        ++this.documentCounter;
    }

    private Set<Mention> createAnnotations(JCas aJCas, int offset, Sentence bannerSentence) {
        List<Mention> mentions = bannerSentence.getMentions();
        HashSet<Mention> mentionsToAdd = new HashSet<Mention>();
        Mention lastMention = null;
        for (Mention mention : mentions) {
            if (!(mention.getProbability() >= this.threshold)) continue;
            if (lastMention != null && mention.overlaps(lastMention)) {
                if (!(mention.getProbability() > lastMention.getProbability())) continue;
                mentionsToAdd.remove(lastMention);
                mentionsToAdd.add(mention);
                lastMention = mention;
                continue;
            }
            mentionsToAdd.add(mention);
            lastMention = mention;
        }
        for (Mention mention : mentionsToAdd) {
            NamedEntity entity = new NamedEntity(aJCas);
            int startOffset = 0;
            int endOffset = 0;
            startOffset = bannerSentence.getTokens().get(mention.getStart()).getStart();
            endOffset = bannerSentence.getTokens().get(mention.getEnd() - 1).getEnd();
            entity.setBegin(offset + startOffset);
            entity.setEnd(offset + endOffset);
            entity.setEntityType(mention.getEntityType().getText());
            entity.setConfidence(mention.getProbability());
            entity.setSource("crf");
            entity.addToIndexes();
            assert (entity.getCoveredText().equals(mention.getText()));
        }
        return mentionsToAdd;
    }
}

