/*
 * Decompiled with CFR 0.152.
 */
package de.berlin.hu.uima.ae.feature;

import de.berlin.hu.chemspot.Mention;
import de.berlin.hu.uima.ae.feature.FeatureToken;
import de.berlin.hu.util.Constants;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
import org.u_compare.shared.semantic.NamedEntity;
import org.u_compare.shared.syntactic.Sentence;
import org.u_compare.shared.syntactic.Token;
import org.uimafit.util.JCasUtil;

public class FeatureTokenGenerator {
    private Map<Integer, List<FeatureToken>> tokens = null;
    private Map<String, Integer> chebiMinDepth = null;
    private Map<String, Integer> chebiAvgDepth = null;
    private Map<String, Integer> chebiMaxDepth = null;
    private Map<String, Integer> nrChildNodes = null;
    private List<String> prefixes = null;
    private List<String> suffixes = null;
    private Map<List<String>, String> phareData = null;
    private Map<String, String> whoAtcList = null;

    private void loadChebiData(String file) throws IOException {
        this.chebiMinDepth = new HashMap<String, Integer>();
        this.chebiAvgDepth = new HashMap<String, Integer>();
        this.chebiMaxDepth = new HashMap<String, Integer>();
        this.nrChildNodes = new HashMap<String, Integer>();
        System.out.print("Loading chebi data from resource " + file + "... ");
        BufferedReader reader = new BufferedReader(new InputStreamReader(this.getClass().getResourceAsStream(file)));
        String line = null;
        reader.readLine();
        while ((line = reader.readLine()) != null) {
            String[] chebi = line.split("\t");
            String chebiId = chebi[0];
            int children2 = Integer.valueOf(chebi[1]);
            String[] depths = chebi[2].split(",");
            int minDepth = Integer.MAX_VALUE;
            int avgDepth = 0;
            int maxDepth = 0;
            String[] stringArray = depths;
            int n = depths.length;
            int n2 = 0;
            while (n2 < n) {
                String depthString = stringArray[n2];
                int depth = Integer.valueOf(depthString.trim());
                minDepth = depth < minDepth ? depth : minDepth;
                maxDepth = depth > maxDepth ? depth : maxDepth;
                avgDepth += depth;
                ++n2;
            }
            avgDepth = Math.round((float)avgDepth / (float)depths.length);
            this.chebiMinDepth.put(chebiId, minDepth);
            this.chebiAvgDepth.put(chebiId, avgDepth);
            this.chebiMaxDepth.put(chebiId, maxDepth);
            this.nrChildNodes.put(chebiId, children2);
        }
        System.out.println("Done.");
        reader.close();
    }

    private void loadPrefixesSuffixes(String path) throws IOException {
        System.out.print("Loading prefixes and suffixes from resource directory " + path + "... ");
        this.prefixes = new ArrayList<String>();
        this.suffixes = new ArrayList<String>();
        BufferedReader reader = new BufferedReader(new InputStreamReader(this.getClass().getResourceAsStream(String.valueOf(path) + "prefixes.txt")));
        String line = null;
        while ((line = reader.readLine()) != null) {
            this.prefixes.add(line.split("\t")[0]);
        }
        reader.close();
        reader = new BufferedReader(new InputStreamReader(this.getClass().getResourceAsStream(String.valueOf(path) + "suffixes-filtered.txt")));
        while ((line = reader.readLine()) != null) {
            this.suffixes.add(line.split("\t")[0]);
        }
        reader.close();
        System.out.println("Done.");
    }

    private void loadPhareData(String file) throws IOException {
        System.out.print("Loading pharmagenomics relationship ontology data from resource " + file + "... ");
        this.phareData = new HashMap<List<String>, String>();
        BufferedReader reader = new BufferedReader(new InputStreamReader(this.getClass().getResourceAsStream(file)));
        String line = null;
        while ((line = reader.readLine()) != null) {
            String[] phare = line.split("\t");
            String label = phare[0];
            ArrayList<String> terms = new ArrayList<String>();
            String[] stringArray = phare[1].split("\\|");
            int n = stringArray.length;
            int n2 = 0;
            while (n2 < n) {
                String term = stringArray[n2];
                terms.add(term);
                ++n2;
            }
            this.phareData.put(terms, label);
        }
        reader.close();
        System.out.println("Done.");
    }

    private void loadWHOATCData(String path) throws IOException {
        System.out.print("Loading WHO ATC list from resource directory " + path + "... ");
        this.whoAtcList = new HashMap<String, String>();
        BufferedReader reader = new BufferedReader(new InputStreamReader(this.getClass().getResourceAsStream(path)));
        String line = null;
        String identifier = null;
        int i = 0;
        while ((line = reader.readLine()) != null) {
            String term = null;
            if ((line = line.trim()).contains(" - ")) {
                String[] split2 = line.split(" - ");
                identifier = split2[0].trim();
                term = split2[1].trim();
                i = 0;
            } else {
                term = line;
                ++i;
            }
            String mapIdentifier = String.valueOf(identifier) + (i > 0 ? "/" + i : "");
            this.whoAtcList.put(mapIdentifier, term);
            if (!term.contains(",")) continue;
            this.whoAtcList.put(String.valueOf(mapIdentifier) + "/C", term.replaceAll(",.*", ""));
        }
        reader.close();
        System.out.println("Done.");
    }

    public FeatureTokenGenerator() {
        System.out.println();
        System.out.println("Initializing feature generator.");
        this.tokens = new HashMap<Integer, List<FeatureToken>>();
        if (this.chebiMinDepth == null) {
            try {
                this.loadChebiData("/resources/chebi/chebi_ontology_fulldepth.txt");
            }
            catch (IOException e) {
                System.out.println("Error while loading chebi data");
                e.printStackTrace();
            }
        }
        if (this.prefixes == null) {
            try {
                this.loadPrefixesSuffixes("/resources/");
            }
            catch (IOException e) {
                System.out.println("Error while loading prefixes and suffixes");
                e.printStackTrace();
            }
        }
        if (this.phareData == null) {
            try {
                this.loadPhareData("/resources/phare.txt");
            }
            catch (IOException e) {
                System.out.println("Error while loading pharmagenomics relationship ontology data");
                e.printStackTrace();
            }
        }
        if (this.whoAtcList == null) {
            try {
                this.loadWHOATCData("/resources/who/WHO-ATC.txt");
            }
            catch (IOException e) {
                System.out.println("Error while loading WHO ATC list");
                e.printStackTrace();
            }
        }
        System.out.println("Feature generator initialized.");
        System.out.println();
    }

    public void process(JCas aJCas, Feature_Phase phase) throws AnalysisEngineProcessException {
        switch (phase) {
            case PHASE1: {
                this.tokens.put(aJCas.getDocumentText().hashCode(), new ArrayList());
                this.generateFeatureTokens(aJCas);
                this.checkNormalization(aJCas);
                break;
            }
            case PHASE2: {
                this.checkExpandedMentions(aJCas);
                break;
            }
            case PHASE3: {
                this.checkStopwords(aJCas);
                break;
            }
            case PHASE4: {
                this.checkNormalization(aJCas);
                this.checkPrefixesSuffixes(aJCas);
                this.checkPhareData(aJCas);
                this.checkWHOATC(aJCas);
            }
        }
    }

    public void clearFeatureTokens() {
        this.tokens.clear();
    }

    private void generateFeatureTokens(JCas aJCas) {
        List<FeatureToken> tokens = this.getFeatureTokens(aJCas);
        for (Token token2 : JCasUtil.iterate(aJCas, Token.class)) {
            FeatureToken ft = new FeatureToken(aJCas, token2.getBegin(), token2.getEnd());
            tokens.add(ft);
        }
        for (NamedEntity ne : JCasUtil.iterate(aJCas, NamedEntity.class)) {
            if ("goldstandard".equals(ne.getSource())) continue;
            for (FeatureToken token3 : this.getFeatureTokens(aJCas, ne)) {
                try {
                    token3.addFeature(ChemSpot_Feature.valueOf(ne.getSource().toUpperCase()));
                }
                catch (IllegalArgumentException illegalArgumentException) {
                    // empty catch block
                }
            }
        }
    }

    public List<FeatureToken> getFeatureTokens(JCas aJCas) {
        return this.tokens.get(aJCas.getDocumentText().hashCode());
    }

    public List<FeatureToken> removeFeatureTokens(JCas aJCas) {
        return this.tokens.remove(aJCas.getDocumentText().hashCode());
    }

    public List<FeatureToken> getFeatureTokens(JCas aJCas, Annotation container) {
        ArrayList<FeatureToken> result2 = new ArrayList<FeatureToken>();
        for (FeatureToken token2 : this.getFeatureTokens(aJCas)) {
            if (token2.getBegin() > container.getEnd()) break;
            if (token2.getBegin() < container.getBegin() || token2.getEnd() > container.getEnd()) continue;
            result2.add(token2);
        }
        return result2;
    }

    private void checkExpandedMentions(JCas aJCas) {
        for (NamedEntity ne : JCasUtil.iterate(aJCas, NamedEntity.class)) {
            ChemSpot_Feature feature2;
            if ("goldstandard".equals(ne.getSource())) continue;
            boolean was_expanded = false;
            List<FeatureToken> tokens = this.getFeatureTokens(aJCas, ne);
            for (FeatureToken token2 : tokens) {
                try {
                    feature2 = ChemSpot_Feature.valueOf(ne.getSource().toUpperCase());
                    if (token2.hasFeature(feature2)) continue;
                    token2.addFeature(ChemSpot_Feature.valueOf((Object)((Object)feature2) + "_ME"));
                    token2.addFeature(ChemSpot_Feature.MATCH_EXPANSION);
                    was_expanded = true;
                }
                catch (IllegalArgumentException feature2) {
                    // empty catch block
                }
            }
            if (!was_expanded) continue;
            for (FeatureToken token2 : tokens) {
                try {
                    feature2 = ChemSpot_Feature.valueOf(ne.getSource().toUpperCase());
                    if (token2.hasFeature((Object)((Object)feature2) + "_ME")) continue;
                    token2.addFeature(ChemSpot_Feature.valueOf((Object)((Object)feature2) + "_ME"));
                    token2.addFeature(ChemSpot_Feature.MATCH_EXPANSION);
                }
                catch (IllegalArgumentException illegalArgumentException) {
                    // empty catch block
                }
            }
        }
    }

    private void checkStopwords(JCas aJCas) {
        ArrayList<FeatureToken> tokens = new ArrayList<FeatureToken>(this.getFeatureTokens(aJCas));
        for (NamedEntity ne : JCasUtil.iterate(aJCas, NamedEntity.class)) {
            if ("goldstandard".equals(ne.getSource())) continue;
            for (FeatureToken token2 : this.getFeatureTokens(aJCas, ne)) {
                tokens.remove(token2);
            }
        }
        for (FeatureToken token3 : tokens) {
            if (token3.getFeatures().isEmpty()) continue;
            token3.addFeature(ChemSpot_Feature.STOPWORD);
        }
    }

    private void checkNormalization(JCas aJCas) {
        for (NamedEntity ne : JCasUtil.iterate(aJCas, NamedEntity.class)) {
            if ("goldstandard".equals(ne.getSource())) continue;
            Mention mention = new Mention(ne);
            String[] ids = mention.getIds();
            for (FeatureToken token2 : this.getFeatureTokens(aJCas, ne)) {
                token2.addFeature(ChemSpot_Feature.CHEMSPOT);
                int i = 0;
                while (i < ids.length) {
                    if (ids[i] != null && !ids[i].isEmpty()) {
                        token2.addFeature(Constants.ChemicalID.values()[i].toString());
                    }
                    ++i;
                }
                String chebiId = mention.getCHEB();
                if (chebiId == null) continue;
                if (this.chebiAvgDepth.containsKey(chebiId)) {
                    token2.addFeature((Object)((Object)ChemSpot_Feature.CHEB_AVG_DEPTH) + "_" + this.chebiAvgDepth.get(chebiId));
                }
                if (this.chebiMinDepth.containsKey(chebiId)) {
                    token2.addFeature((Object)((Object)ChemSpot_Feature.CHEB_MIN_DEPTH) + "_" + this.chebiMinDepth.get(chebiId));
                }
                if (this.chebiMaxDepth.containsKey(chebiId)) {
                    token2.addFeature((Object)((Object)ChemSpot_Feature.CHEB_MAX_DEPTH) + "_" + this.chebiMaxDepth.get(chebiId));
                }
                if (!this.nrChildNodes.containsKey(chebiId)) continue;
                token2.addFeature((Object)((Object)ChemSpot_Feature.CHEB_CHILDREN) + "_" + this.nrChildNodes.get(chebiId));
            }
        }
    }

    private void checkPrefixesSuffixes(JCas aJCas) {
        for (FeatureToken token2 : this.getFeatureTokens(aJCas)) {
            for (String prefix : this.prefixes) {
                if (!token2.getCoveredText().toLowerCase().startsWith(prefix)) continue;
                token2.addFeature(ChemSpot_Feature.CHEMICAL_PREFIX);
                token2.addFeature((Object)((Object)ChemSpot_Feature.CHEMICAL_PREFIX) + "_" + prefix.toUpperCase());
            }
            for (String suffix : this.suffixes) {
                if (!token2.getCoveredText().toLowerCase().endsWith(suffix)) continue;
                token2.addFeature(ChemSpot_Feature.CHEMICAL_SUFFIX);
                token2.addFeature((Object)((Object)ChemSpot_Feature.CHEMICAL_SUFFIX) + "_" + suffix.toUpperCase());
            }
        }
    }

    private void checkPhareData(JCas aJCas) {
        for (Sentence sentence : JCasUtil.iterate(aJCas, Sentence.class)) {
            String sentenceString = sentence.getCoveredText().toLowerCase();
            for (List<String> terms : this.phareData.keySet()) {
                for (String term : terms) {
                    int index2 = sentenceString.indexOf(term.toLowerCase());
                    while (index2 != -1) {
                        if (!(index2 - 1 >= 0 && Character.isLetter(sentenceString.charAt(index2 - 1)) || index2 + term.length() < sentenceString.length() && Character.isLetter(sentenceString.charAt(index2 + term.length())))) {
                            for (FeatureToken token2 : this.getFeatureTokens(aJCas, sentence)) {
                                if (token2.getBegin() < sentence.getBegin() + index2 || token2.getEnd() > sentence.getBegin() + index2 + term.length()) continue;
                                token2.addFeature(this.phareData.get(terms).replaceAll("\\s+", "_").toUpperCase());
                            }
                        }
                        index2 = sentenceString.indexOf(term.toLowerCase(), index2 + term.length());
                    }
                }
            }
        }
    }

    private void checkWHOATC(JCas aJCas) {
        for (Sentence sentence : JCasUtil.iterate(aJCas, Sentence.class)) {
            String sentenceString = sentence.getCoveredText().toLowerCase();
            for (String identifier : this.whoAtcList.keySet()) {
                String term = this.whoAtcList.get(identifier);
                int index2 = sentenceString.indexOf(term.toLowerCase());
                while (index2 != -1) {
                    if (!(index2 - 1 >= 0 && Character.isLetter(sentenceString.charAt(index2 - 1)) || index2 + term.length() < sentenceString.length() && Character.isLetter(sentenceString.charAt(index2 + term.length())))) {
                        ArrayList<String> whoAtcFeatures = new ArrayList<String>();
                        whoAtcFeatures.add("WHO-ATC-" + identifier + ":" + term.replaceAll("\\s+", "_").toUpperCase());
                        for (String identifier2 : this.whoAtcList.keySet()) {
                            if (!identifier.startsWith(identifier2)) continue;
                            String term2 = this.whoAtcList.get(identifier2);
                            whoAtcFeatures.add("WHO-ATC-" + identifier2 + ":" + term2.replaceAll("\\s+", "_").toUpperCase());
                        }
                        for (FeatureToken token2 : this.getFeatureTokens(aJCas, sentence)) {
                            if (token2.getBegin() < sentence.getBegin() + index2 || token2.getEnd() > sentence.getBegin() + index2 + term.length()) continue;
                            token2.getFeatures().addAll(whoAtcFeatures);
                        }
                    }
                    index2 = sentenceString.indexOf(term.toLowerCase(), index2 + term.length());
                }
            }
        }
    }

    public void printFeatureTokens(JCas aJCas) {
        ArrayList<NamedEntity> nes = new ArrayList<NamedEntity>(JCasUtil.select(aJCas, NamedEntity.class));
        for (NamedEntity ne : new ArrayList<NamedEntity>(nes)) {
            if (!"goldstandard".equals(ne.getSource())) continue;
            nes.remove(ne);
        }
        for (FeatureToken token2 : this.getFeatureTokens(aJCas)) {
            while (!nes.isEmpty() && ((NamedEntity)nes.get(0)).getEnd() < token2.getBegin()) {
                nes.remove(0);
            }
            if (!nes.isEmpty() && ((NamedEntity)nes.get(0)).getBegin() <= token2.getBegin() && ((NamedEntity)nes.get(0)).getEnd() >= token2.getEnd()) {
                NamedEntity ne = (NamedEntity)nes.remove(0);
                System.out.println();
                System.out.println(ne.getCoveredText());
            }
            if (token2.getFeatures().isEmpty()) continue;
            System.out.println("  " + token2.getCoveredText() + " -> " + token2.getFeatures());
        }
    }

    public static enum ChemSpot_Feature {
        CRF,
        DICTIONARY,
        SUM_TAGGER,
        ABBREV,
        CHEMSPOT,
        MATCH_EXPANSION,
        CRF_ME,
        DICTIONARY_ME,
        SUM_TAGGER_ME,
        ABBREV_ME,
        STOPWORD,
        CHID,
        CHEB,
        CAS,
        PUBC,
        PUBS,
        INCH,
        DRUG,
        HMBD,
        KEGG,
        KEGD,
        MESH,
        CHEB_MIN_DEPTH,
        CHEB_AVG_DEPTH,
        CHEB_MAX_DEPTH,
        CHEB_CHILDREN,
        CHEMICAL_PREFIX,
        CHEMICAL_SUFFIX;

    }

    public static enum Feature_Phase {
        PHASE1,
        PHASE2,
        PHASE3,
        PHASE4;

    }
}

