/*
 * Decompiled with CFR 0.152.
 */
package de.berlin.hu.uima.ae;

import de.berlin.hu.chemspot.ChemSpotConfiguration;
import de.berlin.hu.types.PubmedDocument;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Pattern;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.jcas.JCas;
import org.u_compare.shared.semantic.NamedEntity;
import org.u_compare.shared.semantic.chemical.Chemical;
import org.u_compare.shared.syntactic.Sentence;
import org.uimafit.util.JCasUtil;

public class AnnotationMergerAE
extends JCasAnnotator_ImplBase {
    private static final int DEFAULT_DICTIONARY_FILTER_LENGTH = ChemSpotConfiguration.getDictionaryFilterLength() != -1 ? ChemSpotConfiguration.getDictionaryFilterLength() : 2;

    @Override
    public void process(JCas aJCas) throws AnalysisEngineProcessException {
        ArrayList<PubmedDocument> documents = new ArrayList<PubmedDocument>();
        for (PubmedDocument doc : JCasUtil.iterate(aJCas, PubmedDocument.class)) {
            documents.add(doc);
        }
        if (documents.isEmpty()) {
            documents.add(null);
        }
        int drug_overlaps = 0;
        int total = 0;
        for (PubmedDocument document2 : documents) {
            Iterator<NamedEntity> entityIterator = document2 != null ? JCasUtil.iterator(document2, NamedEntity.class, true, true) : JCasUtil.iterator(aJCas, NamedEntity.class);
            ArrayList<NamedEntity> entities = new ArrayList<NamedEntity>();
            ArrayList<String> abbreviations = new ArrayList<String>();
            HashMap chemicalsMap = new HashMap();
            while (entityIterator.hasNext()) {
                NamedEntity namedEntity = entityIterator.next();
                if ("goldstandard".equals(namedEntity.getSource())) continue;
                entities.add(namedEntity);
                String chemName = namedEntity.getCoveredText().trim().toLowerCase();
                if (!chemicalsMap.containsKey(chemName)) {
                    chemicalsMap.put(chemName, new ArrayList());
                }
                ((List)chemicalsMap.get(chemName)).add(namedEntity);
            }
            Comparator<NamedEntity> comp = new Comparator<NamedEntity>(){

                @Override
                public int compare(NamedEntity m1, NamedEntity m2) {
                    return m1.getBegin() - m2.getBegin();
                }
            };
            Collections.sort(entities, comp);
            ArrayList<NamedEntity> chemicals = new ArrayList<NamedEntity>();
            boolean filtered = false;
            NamedEntity lastEntity = null;
            ArrayList<String> nonChemicalAbbreviations = new ArrayList<String>();
            Iterator<Sentence> sentenceIterator = document2 != null ? JCasUtil.iterator(document2, Sentence.class, true, true) : JCasUtil.iterator(aJCas, Sentence.class);
            Sentence sentence = sentenceIterator.hasNext() ? sentenceIterator.next() : null;
            for (NamedEntity entity : entities) {
                boolean isChemAbbreviation;
                if (sentence != null) {
                    while (sentence.getEnd() < entity.getBegin() && sentenceIterator.hasNext()) {
                        sentence = sentenceIterator.next();
                        lastEntity = null;
                    }
                }
                if (nonChemicalAbbreviations.contains(entity.getCoveredText().trim().toLowerCase())) {
                    entity.removeFromIndexes(aJCas);
                    filtered = true;
                }
                if (!(isChemAbbreviation = abbreviations.contains(entity.getCoveredText().trim().toLowerCase())) && !filtered && "ABBREV".equals(entity.getSource())) {
                    String name = null;
                    if (entity.getId() != null) {
                        name = entity.getId().trim().toLowerCase();
                        entity.setId(null);
                    }
                    if (name != null && chemicalsMap.containsKey(name)) {
                        List chems = (List)chemicalsMap.get(name);
                        for (NamedEntity c : chems) {
                            if ("ABBREV".equals(c.getSource())) continue;
                            isChemAbbreviation = true;
                            break;
                        }
                    }
                    if (isChemAbbreviation) {
                        abbreviations.add(entity.getCoveredText().trim().toLowerCase());
                    } else {
                        entity.removeFromIndexes(aJCas);
                        filtered = true;
                        nonChemicalAbbreviations.add(entity.getCoveredText().trim().toLowerCase());
                        ArrayList<NamedEntity> filteredEntities = new ArrayList<NamedEntity>();
                        for (NamedEntity e : chemicals) {
                            if (!entity.getCoveredText().trim().equalsIgnoreCase(e.getCoveredText().trim())) continue;
                            filteredEntities.add(e);
                            e.removeFromIndexes();
                        }
                        chemicals.removeAll(filteredEntities);
                    }
                }
                if (!filtered && lastEntity != null && this.crosses(lastEntity, entity)) {
                    if ("ABBREV".equals(lastEntity.getSource())) {
                        if (this.isReplaceByAbbreviation(lastEntity, entity) && (!"ABBREV".equals(entity.getSource()) || isChemAbbreviation)) {
                            entity.removeFromIndexes(aJCas);
                            filtered = true;
                        } else {
                            lastEntity.removeFromIndexes(aJCas);
                            chemicals.remove(lastEntity);
                        }
                    } else if ("ABBREV".equals(entity.getSource())) {
                        boolean isRemove = false;
                        if (isChemAbbreviation && this.isReplaceByAbbreviation(entity, lastEntity)) {
                            isRemove = true;
                        } else if (!isChemAbbreviation) {
                            isRemove = true;
                            filtered = true;
                        }
                        if (isRemove) {
                            lastEntity.removeFromIndexes(aJCas);
                            chemicals.remove(lastEntity);
                        }
                    } else if ("dictionary".equals(lastEntity.getSource()) && !"dictionary".equals(entity.getSource())) {
                        lastEntity.removeFromIndexes(aJCas);
                        chemicals.remove(lastEntity);
                    } else if (!"dictionary".equals(lastEntity.getSource()) && "dictionary".equals(entity.getSource())) {
                        entity.removeFromIndexes(aJCas);
                        filtered = true;
                    } else if ("sum_tagger".equals(lastEntity.getSource()) && !"sum_tagger".equals(entity.getSource())) {
                        lastEntity.removeFromIndexes(aJCas);
                        chemicals.remove(lastEntity);
                    } else if (!"sum_tagger".equals(lastEntity.getSource()) && "sum_tagger".equals(entity.getSource())) {
                        entity.removeFromIndexes(aJCas);
                        filtered = true;
                    } else if ("crf".equals(entity.getSource())) {
                        lastEntity.removeFromIndexes(aJCas);
                        chemicals.remove(lastEntity);
                    } else if (lastEntity.getCoveredText().length() > entity.getCoveredText().length()) {
                        entity.removeFromIndexes(aJCas);
                        filtered = true;
                    } else {
                        lastEntity.removeFromIndexes(aJCas);
                        chemicals.remove(lastEntity);
                    }
                }
                if (lastEntity != null && !filtered && !this.crosses(lastEntity, entity) && entity.getBegin() - lastEntity.getEnd() < 10 && entity.getCAS().getDocumentText().substring(lastEntity.getEnd(), entity.getBegin()).matches(" ")) {
                    entity.setBegin(lastEntity.getBegin());
                    lastEntity.removeFromIndexes();
                    chemicals.remove(lastEntity);
                }
                if (!filtered && ("dictionary".equals(entity.getSource()) || "drug tagger".equals(entity.getSource())) && entity.getEnd() - entity.getBegin() <= DEFAULT_DICTIONARY_FILTER_LENGTH) {
                    entity.removeFromIndexes(aJCas);
                    filtered = true;
                }
                if (!filtered) {
                    if (lastEntity != null && this.crosses(lastEntity, entity) && "drug tagger".equals(entity.getSource()) && !"drug tagger".equals(lastEntity.getSource())) {
                        ++drug_overlaps;
                    }
                    ++total;
                    chemicals.add(entity);
                    lastEntity = entity;
                }
                filtered = false;
            }
            this.convertNamedEntitiesToChemicals(aJCas, chemicals);
        }
    }

    private void convertNamedEntitiesToChemicals(JCas aJCas, List<NamedEntity> chemicals) {
        for (NamedEntity entity : chemicals) {
            Chemical chemical = new Chemical(aJCas, entity.getBegin(), entity.getEnd());
            chemical.setSource(entity.getSource());
            chemical.setId(entity.getId());
            chemical.setConfidence(entity.getConfidence());
            chemical.setEntityType(entity.getEntityType());
            chemical.addToIndexes();
            entity.removeFromIndexes(aJCas);
        }
    }

    private boolean crosses(NamedEntity lastEntity, NamedEntity entity) {
        if (lastEntity == null) {
            return false;
        }
        if (lastEntity.getBegin() <= entity.getBegin() && entity.getEnd() <= lastEntity.getEnd()) {
            return true;
        }
        if (entity.getBegin() <= lastEntity.getBegin() && lastEntity.getEnd() <= entity.getEnd()) {
            return true;
        }
        return lastEntity.getBegin() <= entity.getBegin() && entity.getBegin() <= lastEntity.getEnd();
    }

    private boolean isReplaceByAbbreviation(NamedEntity abbr, NamedEntity otherEntity) {
        if (otherEntity == null) {
            return true;
        }
        String abbrText = abbr.getCoveredText();
        String othText = otherEntity.getCoveredText();
        if ("ABBREV".equals(otherEntity.getSource())) {
            return abbr.getCoveredText().length() > otherEntity.getCoveredText().length();
        }
        if (othText.matches(String.valueOf(Pattern.quote(abbrText)) + "-[a-z\\-]+")) {
            return true;
        }
        return abbr.getBegin() == otherEntity.getBegin() && abbr.getEnd() == otherEntity.getEnd();
    }
}

