/*
 * Decompiled with CFR 0.152.
 */
package de.berlin.hu.ppi.mediator;

import de.berlin.hu.ppi.PpiToolkit;
import de.berlin.hu.ppi.db.DbService;
import de.berlin.hu.ppi.mediator.LoaderException;
import de.berlin.hu.ppi.mediator.ProteinChecker;
import de.berlin.hu.ppi.mediator.ProteinResolver;
import de.berlin.hu.ppi.mediator.dbx.DBConstants;
import de.berlin.hu.ppi.mediator.dbx.DBLookupProtein;
import de.berlin.hu.ppi.mediator.dbx.DBLookupProteinService;
import de.berlin.hu.ppi.mediator.dbx.DBProtein;
import de.berlin.hu.ppi.mediator.dbx.DBProteinService;
import de.berlin.hu.ppi.wrapper.Protein;
import java.io.IOException;
import java.sql.Connection;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.log4j.Logger;
import uk.ac.ebi.kraken.interfaces.uniprot.DatabaseCrossReference;
import uk.ac.ebi.kraken.interfaces.uniprot.DatabaseType;
import uk.ac.ebi.kraken.interfaces.uniprot.Gene;
import uk.ac.ebi.kraken.interfaces.uniprot.NcbiTaxonomyId;
import uk.ac.ebi.kraken.interfaces.uniprot.SecondaryUniProtAccession;
import uk.ac.ebi.kraken.interfaces.uniprot.UniProtEntry;
import uk.ac.ebi.kraken.interfaces.uniprot.dbx.geneid.GeneId;
import uk.ac.ebi.kraken.interfaces.uniprot.dbx.go.Go;
import uk.ac.ebi.kraken.interfaces.uniprot.dbx.interpro.InterPro;
import uk.ac.ebi.kraken.interfaces.uniprot.dbx.reactome.Reactome;
import uk.ac.ebi.kraken.interfaces.uniprot.dbx.refseq.RefSeq;
import uk.ac.ebi.kraken.interfaces.uniprot.description.Field;
import uk.ac.ebi.kraken.interfaces.uniprot.description.FieldType;
import uk.ac.ebi.kraken.uuw.services.remoting.EntryRetrievalService;
import uk.ac.ebi.kraken.uuw.services.remoting.UniProtJAPI;

public class ProteinLoader
implements DBConstants {
    static Logger log = Logger.getLogger(ProteinLoader.class);
    ProteinResolver proteinResolver;
    DbService db_service;
    DBLookupProteinService dbLookupProteinService;
    DBProteinService dbProteinService;
    EntryRetrievalService entryRetrievalService;
    protected int no_mappings_count = 0;
    private UniProtEntry entry;

    public ProteinLoader(Connection con) {
        this.proteinResolver = new ProteinResolver(con);
        this.dbLookupProteinService = new DBLookupProteinService(con);
        this.dbProteinService = new DBProteinService(con);
        this.entryRetrievalService = UniProtJAPI.factory.getEntryRetrievalService();
    }

    public DBProtein[] load(Protein protein) throws LoaderException {
        Map<String, String> ids = protein.getAllIds();
        String uid = null;
        uid = ids.get("uniprotkb");
        if (!(uid == null && (uid = ids.get("uniprot")) == null && (uid = ids.get("uniprot knowledge base")) == null || "-".equals(uid))) {
            log.trace("processing uniprotid " + uid);
        } else {
            log.trace("processing protein w/o uniprotid, trying to resolve ...");
            uid = this.proteinResolver.resolve(protein);
        }
        if (uid == null) {
            Map<String, String> idmap = protein.getAllIds();
            Set<String> keySet = idmap.keySet();
            if (keySet.size() > 0) {
                StringBuilder b = new StringBuilder();
                b.append("giving up, available Ids are: ");
                for (String mapkey : keySet) {
                    b.append(mapkey).append(" : ").append(idmap.get(mapkey)).append(", ");
                }
                log.warn(b.toString());
            }
            ++this.no_mappings_count;
            return null;
        }
        log.trace("resolved successfully");
        uid = this.cleanUniprotId(uid);
        if (uid == null) {
            return null;
        }
        DBProtein[] result = this.load(uid, protein.getExperimentalRole());
        return result;
    }

    public DBProtein[] load(String uid, String experimentalRole) throws LoaderException {
        DBLookupProtein[] loadedProteins;
        try {
            loadedProteins = this.dbLookupProteinService.find(uid);
        }
        catch (SQLException e) {
            throw new LoaderException("SQLException caugth: " + e);
        }
        if (loadedProteins != null) {
            log.trace("... loaded Protein locally");
            return this.checkProteins(loadedProteins);
        }
        Properties[] uniprotProteins = this.getUniprotProteins(uid);
        if (uniprotProteins == null) {
            return null;
        }
        loadedProteins = new DBLookupProtein[uniprotProteins.length];
        for (int i = 0; i < uniprotProteins.length; ++i) {
            try {
                DBLookupProtein newLookupProtein = this.dbLookupProteinService.newDBLookupProtein(uniprotProteins[i]);
                if (PpiToolkit.isCachingEnabled()) {
                    newLookupProtein.insert();
                }
                newLookupProtein.setExperimentaRole(experimentalRole);
                loadedProteins[i] = newLookupProtein;
                continue;
            }
            catch (SQLException e) {
                throw new LoaderException(e);
            }
        }
        DBProtein[] checkProteins = this.checkProteins(loadedProteins);
        return checkProteins;
    }

    private String cleanUniprotId(String uid) {
        String uidOld = uid;
        if (!(uid = uid.trim().toLowerCase()).matches("\\w\\w\\w\\w\\w\\w")) {
            String[] splitted;
            for (String e : splitted = uid.split("[ _\\-;/,]")) {
                if (!e.matches("\\w\\w\\w\\w\\w\\w")) continue;
                return e;
            }
        } else {
            return uid;
        }
        log.warn("Could not find/clean uniprot id: " + uidOld);
        return null;
    }

    public DBProtein[] checkProteins(DBLookupProtein[] proteins) {
        DBProtein[] checkedProteins = new DBProtein[proteins.length];
        int checked_ok_count = 0;
        for (int i = 0; i < proteins.length; ++i) {
            if (!ProteinChecker.check(proteins[i])) continue;
            checkedProteins[checked_ok_count++] = this.dbProteinService.newDBProtein(proteins[i]);
        }
        return Arrays.copyOf(checkedProteins, checked_ok_count);
    }

    public void getUniprotProteinsNew(String uniprotId, List<UniProtEntry> results) throws SQLException {
        log.trace("... retrieving from UniProt");
        this.entry = this.entryRetrievalService.getUniProtEntry(uniprotId);
        if (this.entry == null) {
            log.trace("Couldn't retrieve protein from UniProt!");
            log.trace("... checking for 'demerged'");
            ArrayList<String> newIds = new ArrayList<String>();
            this.dbLookupProteinService.findDemergedNew(uniprotId, newIds);
            if (newIds.isEmpty()) {
                String newId = null;
                newId = this.httpFetchNewUniProtId(uniprotId);
                if (newId != null) {
                    newIds.add(newId);
                }
            }
            for (String newId : newIds) {
                this.entry = this.entryRetrievalService.getUniProtEntry(newId);
                if (this.entry == null) continue;
                results.add(this.entry);
            }
        } else {
            results.add(this.entry);
        }
    }

    private String httpFetchNewUniProtId(String uniprotId) {
        String newId = null;
        try {
            log.trace("..... demerged lookup unsuccessfull. Checking up directly");
            String location = "http://www.uniprot.org/uniprot/" + uniprotId.toUpperCase();
            HttpClient client = new HttpClient();
            GetMethod getmethod = new GetMethod(location);
            getmethod.setFollowRedirects(false);
            int status = client.executeMethod(getmethod);
            if (status == 301) {
                location = getmethod.getResponseHeader("Location").getValue();
                newId = location.split("/")[2];
                newId = newId.toLowerCase();
                log.trace("..... trying for new id " + newId);
                getmethod.releaseConnection();
            } else {
                log.trace("..... no success");
                log.debug(status);
                log.debug(location);
            }
            if (uniprotId.equals(newId)) {
                log.trace("..... no success");
                newId = null;
            }
        }
        catch (Exception exception) {
            // empty catch block
        }
        return newId;
    }

    public Properties[] getUniprotProteins(String uniprotId) {
        log.trace("... retrieving from UniProt");
        Properties proteinProperties = this.retrieveUniprotEntry(uniprotId);
        if (proteinProperties != null) {
            Properties[] fetchedProteins = new Properties[]{proteinProperties};
            return fetchedProteins;
        }
        log.trace("Couldn't retrieve protein from UniProt!");
        log.trace("... checking for 'demerged'");
        String[] new_ids = null;
        try {
            new_ids = this.dbLookupProteinService.findDemerged(uniprotId);
        }
        catch (SQLException e) {
            log.error("demerged lookup failed: " + e);
            return null;
        }
        if (new_ids == null) {
            log.trace("..... demerged lookup unsuccessfull. Checking up directly");
            String location = "http://www.uniprot.org/uniprot/" + uniprotId.toUpperCase();
            try {
                String new_id;
                HttpClient client = new HttpClient();
                GetMethod getmethod = new GetMethod(location);
                getmethod.setFollowRedirects(false);
                int status = client.executeMethod(getmethod);
                if (status == 301) {
                    location = getmethod.getResponseHeader("Location").getValue();
                    new_id = location.split("/")[2];
                    if ((new_id = new_id.toLowerCase()) == uniprotId) {
                        log.trace("..... no success");
                        return null;
                    }
                } else {
                    log.trace("..... no success");
                    log.debug(status);
                    log.debug(location);
                    return null;
                }
                log.trace("..... trying for new id " + new_id);
                new_ids = new String[]{new_id};
                getmethod.releaseConnection();
            }
            catch (IOException e) {
                log.error(e);
                return null;
            }
        }
        int new_count = 0;
        Properties[] fetchedProteins = new Properties[new_ids.length];
        for (int i = 0; i < new_ids.length; ++i) {
            proteinProperties = this.retrieveUniprotEntry(new_ids[i]);
            if (proteinProperties == null) continue;
            fetchedProteins[new_count++] = proteinProperties;
        }
        if (new_count > 0) {
            return Arrays.copyOf(fetchedProteins, new_count);
        }
        return null;
    }

    public Properties retrieveUniprotEntry(String acc) {
        Properties protein;
        block46: {
            protein = new Properties();
            try {
                this.entry = this.entryRetrievalService.getUniProtEntry(acc);
                if (this.entry != null) {
                    List crlist;
                    int tax_id;
                    String shortName;
                    List<Gene> genes;
                    if (this.entry.getProteinDescription().hasRecommendedName()) {
                        for (Field f : this.entry.getProteinDescription().getRecommendedName().getFields()) {
                            if (f.getType() != FieldType.FULL) continue;
                            protein.setProperty("protein_name", f.getValue());
                            break;
                        }
                    }
                    if ((genes = this.entry.getGenes()) != null && genes.size() >= 1 && (shortName = genes.get(0).getGeneName().getValue()).length() > 0) {
                        protein.setProperty("protein_short_name", shortName);
                    }
                    protein.setProperty("sequence", this.entry.getSequence().getValue().toLowerCase());
                    Iterator<NcbiTaxonomyId> i$ = this.entry.getNcbiTaxonomyIds().iterator();
                    if (i$.hasNext()) {
                        NcbiTaxonomyId ti = i$.next();
                        protein.setProperty("species", ti.getValue());
                    }
                    protein.setProperty("primary_uniprot_id", this.entry.getPrimaryUniProtAccession().getValue());
                    String secacc = "";
                    for (SecondaryUniProtAccession sec : this.entry.getSecondaryUniProtAccessions()) {
                        secacc = secacc + sec.getValue() + ";";
                    }
                    protein.setProperty("secondary_uniprot_ids", secacc);
                    List idlist = this.entry.getDatabaseCrossReferences(DatabaseType.GENEID);
                    for (GeneId geneid : idlist) {
                        if (!geneid.hasGeneIdAccessionNumber()) continue;
                        protein.setProperty("gene_id", geneid.getGeneIdAccessionNumber().getValue());
                        break;
                    }
                    List seqlist = this.entry.getDatabaseCrossReferences(DatabaseType.REFSEQ);
                    for (RefSeq item : seqlist) {
                        if (!item.hasRefSeqAccessionNumber()) continue;
                        protein.setProperty("refseq", item.getRefSeqAccessionNumber().getValue());
                        break;
                    }
                    if ((tax_id = Integer.parseInt(protein.getProperty("species"))) == 4932) {
                        crlist = this.entry.getDatabaseCrossReferences(DatabaseType.SGD);
                        for (DatabaseCrossReference item : crlist) {
                            if (!item.hasSgdAccessionNumber()) continue;
                            protein.setProperty("org_specific_id", item.getSgdAccessionNumber().getValue());
                            break;
                        }
                    } else if (tax_id == 6239) {
                        crlist = this.entry.getDatabaseCrossReferences(DatabaseType.WORMBASE);
                        for (DatabaseCrossReference item : crlist) {
                            if (!item.hasWormBaseAccessionNumber()) continue;
                            String value = item.getWormBaseAccessionNumber().getValue();
                            System.out.println(value);
                            protein.setProperty("org_specific_id", value);
                            break;
                        }
                    } else if (tax_id == 7227) {
                        crlist = this.entry.getDatabaseCrossReferences(DatabaseType.FLYBASE);
                        for (DatabaseCrossReference item : crlist) {
                            if (!item.hasFlyBaseAccessionNumber()) continue;
                            protein.setProperty("org_specific_id", item.getFlyBaseAccessionNumber().getValue());
                            break;
                        }
                    } else if (tax_id == 9606) {
                        crlist = this.entry.getDatabaseCrossReferences(DatabaseType.MIM);
                        StringBuilder builder = new StringBuilder();
                        for (DatabaseCrossReference item : crlist) {
                            String description;
                            if (!item.hasMimAccessionNumber() || !(description = item.getMimDescription().getValue()).startsWith("gene")) continue;
                            builder.append(item.getMimAccessionNumber().getValue());
                            builder.append(";");
                        }
                        String omimIds = builder.toString();
                        if (omimIds.length() > 0) {
                            protein.setProperty("omim_genotype", builder.toString());
                        }
                    } else if (tax_id == 10090) {
                        crlist = this.entry.getDatabaseCrossReferences(DatabaseType.MGI);
                        for (DatabaseCrossReference item : crlist) {
                            if (!item.hasMgiAccessionNumber()) continue;
                            protein.setProperty("org_specific_id", item.getMgiAccessionNumber().getValue());
                            break;
                        }
                    } else if (tax_id == 10116) {
                        crlist = this.entry.getDatabaseCrossReferences(DatabaseType.RGD);
                        for (DatabaseCrossReference item : crlist) {
                            if (!item.hasRgdAccessionNumber()) continue;
                            protein.setProperty("org_specific_id", item.getRgdAccessionNumber().getValue());
                            break;
                        }
                    }
                    List kegglist = this.entry.getDatabaseCrossReferences(DatabaseType.KEGG);
                    StringBuilder keggIds = new StringBuilder();
                    for (DatabaseCrossReference item : kegglist) {
                        if (!item.hasKeggAccessionNumber()) continue;
                        keggIds.append(item.getKeggAccessionNumber().getValue());
                        keggIds.append(";");
                        log.debug("Kegg: " + item.getKeggAccessionNumber().getValue());
                    }
                    String keggIdsString = keggIds.toString();
                    if (keggIdsString.length() > 0) {
                        protein.setProperty("kegg_ids", keggIdsString);
                    }
                    List rctlist = this.entry.getDatabaseCrossReferences(DatabaseType.REACTOME);
                    String reactomestring = "";
                    for (Reactome item : rctlist) {
                        if (!item.hasReactomeAccessionNumber()) continue;
                        reactomestring = reactomestring + item.getReactomeAccessionNumber().getValue() + ";";
                    }
                    if (reactomestring.compareTo("") != 0) {
                        protein.setProperty("reactome_ids", reactomestring);
                    }
                    for (String ec : this.entry.getProteinDescription().getEcNumbers()) {
                        protein.setProperty("ec_number", ec);
                    }
                    List iplist = this.entry.getDatabaseCrossReferences(DatabaseType.INTERPRO);
                    String ipstring = "";
                    for (InterPro item : iplist) {
                        if (!item.hasInterProId()) continue;
                        ipstring = ipstring + item.getInterProId().getValue() + ";";
                    }
                    if (ipstring.compareTo("") != 0) {
                        protein.setProperty("interpro_ids", ipstring);
                    }
                    List<Go> golist = this.entry.getGoTerms();
                    String gostring = "";
                    String directfetch = null;
                    for (Go item : golist) {
                        String go_id;
                        if (!item.hasGoId() || gostring.indexOf(go_id = item.getGoId().getValue()) != -1) continue;
                        String evidence = item.getGoEvidenceType().getValue();
                        if (evidence.equals("")) {
                            String[] parts;
                            block45: {
                                log.trace("..... fetching go evidence from ebi directly");
                                if (directfetch == null) {
                                    String uniprotid = protein.getProperty("primary_uniprot_id");
                                    String location = "http://www.ebi.ac.uk/ego/GAnnotation?protein=" + uniprotid.toUpperCase() + "&format=tsv";
                                    try {
                                        HttpClient client = new HttpClient();
                                        GetMethod getmethod = new GetMethod(location);
                                        getmethod.setFollowRedirects(false);
                                        int status = client.executeMethod(getmethod);
                                        if (status == 200) {
                                            directfetch = getmethod.getResponseBodyAsString();
                                            break block45;
                                        }
                                        log.trace("..... failed");
                                        log.debug(status);
                                        log.debug(location);
                                        continue;
                                    }
                                    catch (IOException e) {
                                        log.error(e);
                                        return null;
                                    }
                                }
                            }
                            if ((parts = directfetch.split(go_id + "\\t")) != null && parts.length > 1) {
                                evidence = parts[1].split("\\t")[2];
                                log.trace("..... done");
                            } else {
                                log.trace("..... failed: not found");
                            }
                        }
                        gostring = gostring + go_id + "(" + evidence + ")" + ";";
                    }
                    if (gostring.compareTo("") != 0) {
                        protein.setProperty("go_ids", gostring);
                    }
                    break block46;
                }
                log.trace("Entry is null for [" + acc + "]");
                return null;
            }
            catch (Exception e) {
                log.error("Could not retrieve uniprot entry for " + acc, e);
                return null;
            }
        }
        log.info("Retrieved uniprot entry for " + acc);
        return protein;
    }
}

