/*
 * Decompiled with CFR 0.152.
 */
package de.berlin.hu.uima.ae.normalizer;

import de.berlin.hu.chemspot.ChemSpotConfiguration;
import de.berlin.hu.uima.ae.normalizer.StringComparator;
import de.berlin.hu.util.Constants;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.u_compare.shared.semantic.NamedEntity;
import org.uimafit.util.JCasUtil;
import uk.ac.cam.ch.wwmm.opsin.NameToInchi;
import uk.ac.cam.ch.wwmm.opsin.NameToStructureException;

public class Normalizer
extends JCasAnnotator_ImplBase {
    private static Map<String, String[]> ids = new HashMap<String, String[]>();
    private static Map<String, String[]> normalizedIds = new HashMap<String, String[]>();
    private NameToInchi nameToInChi;
    private static final String PATH_TO_IDS = "PathToIDs";
    private Map<String, String> fdaIds = null;
    private Map<String, String> fdaDates = null;
    private static int chemHitsDifferent = 0;
    private static int chemHitsEqual = 0;
    private static int chemHitsIdFound = 0;
    private static int chemHitsIdFoundExclusively = 0;
    private static int chemHitsIdNotFoundExclusively = 0;
    private static int chemHitsIdFoundBoth = 0;
    private static int chemHitsIdNotFound = 0;
    private static int chemHitsdifferentIdFound = 0;
    private static int nE = 0;
    private static int nN = 0;
    private static int fda = 0;
    private static int one = 0;
    private static int two = 0;
    private static int twoAll = 0;

    private void loadFDAData(String pathToFile) throws IOException {
        this.fdaIds = new HashMap<String, String>();
        this.fdaDates = new HashMap<String, String>();
        BufferedReader reader = new BufferedReader(new InputStreamReader(this.getClass().getResourceAsStream(pathToFile)));
        String line = null;
        while ((line = reader.readLine()) != null) {
            String[] data2 = line.split("\t");
            String id = data2[0];
            String drug = data2[1];
            String date = data2[2];
            this.fdaIds.put(drug, id);
            this.fdaDates.put(id, date);
        }
        reader.close();
    }

    private void writePrefixSuffixLists() throws IOException {
        int prefixLength = 3;
        int suffixLength = 3;
        HashMap<String, Integer> prefixes = new HashMap<String, Integer>();
        HashMap<String, Integer> suffixes = new HashMap<String, Integer>();
        System.out.println("Writing prefix and suffix lists...");
        for (String chemical : ids.keySet()) {
            String suffix;
            if (chemical.startsWith("(")) {
                chemical = chemical.substring(1, chemical.length());
            }
            if (chemical.endsWith(")")) {
                chemical = chemical.substring(0, chemical.length() - 1);
            }
            String prefix = chemical.length() >= prefixLength ? chemical.substring(0, prefixLength) : null;
            String string2 = suffix = chemical.length() >= suffixLength ? chemical.substring(chemical.length() - suffixLength, chemical.length()) : null;
            if (prefix != null) {
                if (!prefixes.containsKey(prefix)) {
                    prefixes.put(prefix, 0);
                }
                prefixes.put(prefix, (Integer)prefixes.get(prefix) + 1);
            }
            if (suffix == null) continue;
            if (!suffixes.containsKey(suffix)) {
                suffixes.put(suffix, 0);
            }
            suffixes.put(suffix, (Integer)suffixes.get(suffix) + 1);
        }
        ArrayList prefixList = new ArrayList(prefixes.keySet());
        ArrayList suffixList = new ArrayList(suffixes.keySet());
        class IntegerMapComparator
        implements Comparator<String> {
            private Map<String, Integer> map = null;

            public IntegerMapComparator(Map<String, Integer> map2) {
                this.map = map2;
            }

            @Override
            public int compare(String o1, String o2) {
                return this.map.get(o1) - this.map.get(o2);
            }
        }
        Collections.sort(prefixList, Collections.reverseOrder(new IntegerMapComparator(prefixes)));
        Collections.sort(suffixList, Collections.reverseOrder(new IntegerMapComparator(suffixes)));
        BufferedWriter writer = new BufferedWriter(new FileWriter("prefixes.txt"));
        for (String prefix : prefixList) {
            writer.write(String.format("%s\t%d%n", prefix, prefixes.get(prefix)));
        }
        writer.close();
        writer = new BufferedWriter(new FileWriter("suffixes.txt"));
        for (String suffix : suffixList) {
            writer.write(String.format("%s\t%d%n", suffix, suffixes.get(suffix)));
        }
        writer.close();
        writer = new BufferedWriter(new FileWriter("suffixes-filtered.txt"));
        for (String suffix : suffixList) {
            if (!suffix.matches(String.format("[a-z]{%d}", suffixLength))) continue;
            writer.write(String.format("%s\t%d%n", suffix, suffixes.get(suffix)));
        }
        writer.close();
        System.out.println("Done.");
    }

    public static Map<String, String[]> readIdsFile(InputStream in) throws IOException {
        HashMap<String, String[]> result2 = new HashMap<String, String[]>();
        HashMap normalizedChems = new HashMap();
        BufferedReader reader = new BufferedReader(new InputStreamReader(in));
        String line = reader.readLine();
        while (line != null) {
            int splitAt2 = line.indexOf(9);
            String chem = line.substring(0, splitAt2).toLowerCase();
            String identifiers = line.substring(splitAt2 + 1);
            result2.put(chem, identifiers.split("\t"));
            line = reader.readLine();
        }
        return result2;
    }

    public static Map<String, String[]> loadIdsFromFile(String file) throws IOException {
        HashMap<String, String[]> ids = new HashMap<String, String[]>();
        if (file.endsWith(".zip")) {
            ZipFile zipFile = new ZipFile(file);
            Enumeration<? extends ZipEntry> entries = zipFile.entries();
            while (entries.hasMoreElements()) {
                ZipEntry entry = entries.nextElement();
                InputStream in = zipFile.getInputStream(entry);
                ids.putAll(Normalizer.readIdsFile(in));
                in.close();
            }
        } else {
            FileInputStream in = new FileInputStream(file);
            ids.putAll(Normalizer.readIdsFile(in));
            ((InputStream)in).close();
        }
        return ids;
    }

    public static void writeIDs(String pathToFile, Map<String, String[]> ids) throws IOException {
        BufferedWriter writer = new BufferedWriter(new FileWriter(pathToFile));
        for (String chem : ids.keySet()) {
            String[] chemIds = ids.get(chem);
            String idString = "";
            Constants.ChemicalID[] chemicalIDArray = Constants.ChemicalID.values();
            int n = chemicalIDArray.length;
            int n2 = 0;
            while (n2 < n) {
                Constants.ChemicalID type = chemicalIDArray[n2];
                String id = "";
                if (type.ordinal() < chemIds.length && (id = chemIds[type.ordinal()]) == null) {
                    id = "";
                }
                idString = String.valueOf(idString) + "\t" + id;
                ++n2;
            }
            writer.write(String.valueOf(chem) + idString);
            writer.newLine();
        }
        writer.close();
    }

    public static Map<String, String[]> getIds() {
        return ids;
    }

    @Override
    public void initialize(UimaContext aContext) throws ResourceInitializationException {
        super.initialize(aContext);
        System.out.println("Initializing normalizer...");
        try {
            this.loadFDAData("/resources/fda/approved_drugs.tsv");
        }
        catch (IOException e) {
            e.printStackTrace();
        }
        String idsFile = aContext.getConfigParameterValue(PATH_TO_IDS).toString();
        try {
            if (idsFile.endsWith(".zip")) {
                ZipFile zipFile = new ZipFile(idsFile);
                Enumeration<? extends ZipEntry> entries = zipFile.entries();
                while (entries.hasMoreElements()) {
                    ZipEntry entry = entries.nextElement();
                    InputStream in = zipFile.getInputStream(entry);
                    if (entry.getName().contains("normalized")) {
                        if (ChemSpotConfiguration.useComponent(ChemSpotConfiguration.Component.CHEMHITS)) {
                            System.out.print("  Loading ChemHits normalized ids... ");
                            normalizedIds.putAll(Normalizer.readIdsFile(in));
                            System.out.println("Done.");
                        }
                    } else if (ChemSpotConfiguration.useComponent(ChemSpotConfiguration.Component.NORMALIZER)) {
                        System.out.print("  Loading ids... ");
                        ids.putAll(Normalizer.readIdsFile(in));
                        System.out.println("Done.");
                    }
                    in.close();
                }
            } else {
                ids = Normalizer.loadIdsFromFile(idsFile);
            }
        }
        catch (IOException e) {
            throw new ResourceInitializationException(e);
        }
        if (ChemSpotConfiguration.useComponent(ChemSpotConfiguration.Component.OPSIN)) {
            try {
                this.nameToInChi = new NameToInchi();
            }
            catch (NameToStructureException e) {
                e.printStackTrace();
            }
        }
    }

    private String[] getBestMatch(String chemical, Map<String, String[]> ids) {
        String[] result2 = null;
        ArrayList<String> substringMatches = new ArrayList<String>();
        String bestMatch = null;
        float bestScore = 0.0f;
        int i = 0;
        for (String key : ids.keySet()) {
            Math.abs(chemical.length() - key.length());
            float score = StringComparator.diceCoefficient(StringComparator.getNGrams(chemical, 2), StringComparator.getNGrams(key, 2));
            if (score > bestScore) {
                bestMatch = key;
                bestScore = score;
            }
            if (chemical.contains(key)) {
                substringMatches.add(key);
            }
            if (++i % 10000 != 0) continue;
            System.out.print(".");
        }
        if ((double)bestScore > 0.7) {
            result2 = ids.get(bestMatch);
        } else if (!substringMatches.isEmpty()) {
            Comparator<String> comparator = new Comparator<String>(){

                @Override
                public int compare(String o1, String o2) {
                    return o1.length() - o2.length();
                }
            };
            Collections.sort(substringMatches, Collections.reverseOrder(comparator));
            String bestSubstringMatch = (String)substringMatches.get(0);
            if (bestSubstringMatch.length() > 3) {
                result2 = ids.get(bestMatch);
            }
        }
        return result2;
    }

    @Override
    public void process(JCas jCas) throws AnalysisEngineProcessException {
        Iterator<NamedEntity> entities = JCasUtil.iterator(jCas, NamedEntity.class);
        ArrayList<NamedEntity> entiti = new ArrayList<NamedEntity>();
        while (entities.hasNext()) {
            String normalizedString;
            String[] normalizedTemp;
            String inchi;
            NamedEntity entity = entities.next();
            String string2 = inchi = this.nameToInChi != null ? this.nameToInChi.parseToStdInchi(entity.getCoveredText()) : null;
            if (entity.getCoveredText().matches("[a-zA-Z]+")) {
                ++one;
            }
            if (entity.getCoveredText().matches("[a-zA-Z]+ [a-zA-Z]+")) {
                ++two;
            }
            if (entity.getCoveredText().matches("[a-zA-Z0-9]+( [a-zA-Z0-9]+)?")) {
                ++twoAll;
            }
            if ("goldstandard".equals(entity.getSource())) continue;
            ++nE;
            Object[] normalized = ids.get(entity.getCoveredText().toLowerCase());
            if (normalized != null) {
                if (normalized.length > Constants.ChemicalID.INCH.ordinal()) {
                    if (normalized[Constants.ChemicalID.INCH.ordinal()].isEmpty() && inchi != null) {
                        normalized[Constants.ChemicalID.INCH.ordinal()] = inchi;
                    }
                } else if (inchi != null) {
                    normalizedTemp = (String[])Arrays.copyOf(normalized, Constants.ChemicalID.INCH.ordinal() + 1);
                    normalizedTemp[Constants.ChemicalID.INCH.ordinal()] = inchi;
                    normalized = normalizedTemp;
                }
                ++nN;
            } else if (inchi != null) {
                normalizedTemp = new String[Constants.ChemicalID.INCH.ordinal() + 1];
                normalizedTemp[Constants.ChemicalID.INCH.ordinal()] = inchi;
                normalized = normalizedTemp;
                ++nN;
            }
            if (this.fdaIds != null && this.fdaIds.containsKey(entity.getCoveredText().toLowerCase())) {
                ++fda;
                if (normalized == null) {
                    normalized = new String[Constants.ChemicalID.values().length];
                }
                normalized = (String[])Arrays.copyOf(normalized, Constants.ChemicalID.values().length);
                normalized[Constants.ChemicalID.FDA.ordinal()] = this.fdaIds.get(entity.getCoveredText().toLowerCase());
                if (this.fdaDates.containsKey(this.fdaIds.get(entity.getCoveredText().toLowerCase()))) {
                    normalized[Constants.ChemicalID.FDA_DATE.ordinal()] = this.fdaDates.get(this.fdaIds.get(entity.getCoveredText().toLowerCase()));
                }
            }
            String string3 = normalizedString = normalized != null ? Arrays.toString(normalized) : null;
            if ("goldstandard".equals(entity.getSource())) {
                NamedEntity e = (NamedEntity)entity.clone();
                e.setId(normalizedString);
                e.setSource("Test");
                entiti.add(e);
                continue;
            }
            entity.setId(normalizedString);
        }
        if (!entiti.isEmpty()) {
            for (NamedEntity e : entiti) {
                e.addToIndexes();
            }
        }
    }

    private void printChemHitsStatistic() {
        System.out.printf("%nChemHits statistics:%n  identifed %d new terms after normalization (of %d / %.2f %%)%n", chemHitsDifferent, chemHitsDifferent + chemHitsEqual, Float.valueOf(chemHitsDifferent + chemHitsEqual > 0 ? (float)chemHitsDifferent / (float)(chemHitsDifferent + chemHitsEqual) * 100.0f : 0.0f));
        System.out.printf("  found only by ChemHits: %d, only by ChemSpot: %d, by neither: %d, by both: %d (%d of those differently / %.2f %%)%n%n", chemHitsIdFoundExclusively, chemHitsIdNotFoundExclusively, chemHitsIdNotFound, chemHitsIdFoundBoth, chemHitsdifferentIdFound, Float.valueOf(chemHitsIdFoundBoth > 0 ? (float)chemHitsdifferentIdFound / (float)chemHitsIdFoundBoth * 100.0f : 0.0f));
    }
}

