/*
 * Decompiled with CFR 0.152.
 */
package de.berlin.hu.uima.cr.ddi.parser;

import de.berlin.hu.types.PubmedDocument;
import java.util.ArrayList;
import java.util.Iterator;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.cas.FSArray;
import org.apache.uima.jcas.tcas.Annotation;
import org.u_compare.shared.semantic.NamedEntity;
import org.uimafit.util.JCasUtil;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import sprint.uima.types.CorpusDocument;
import sprint.uima.types.Entity;
import sprint.uima.types.Pair;
import sprint.uima.types.Sentence;
import sprint.uima.types.Token;

public class DDICorpusContentHandlerImpl
implements ContentHandler {
    private static final String CORPUS_TAG = "corpus";
    private static final String DOCUMENT_TAG = "document";
    private static final String SENTENCE_TAG = "sentence";
    private static final String SENTENCEANALYSES_TAG = "sentenceanalyses";
    private static final String TOKENIZATIONS_TAG = "tokenizations";
    private static final String TOKENIZATION_TAG = "tokenization";
    private static final String ENTITY_TAG = "entity";
    private static final String PAIR_TAG = "pair";
    private static final String TOKEN_TAG = "token";
    private static final String CHAR_OFFSET_ATTR = "charOffset";
    private static final String ID_ATTR = "id";
    private static final String TEXT_ATTR = "text";
    private static final String ENTITY_TYPE_ATTR = "type";
    private static final String ENTITY_1_ATTR = "e1";
    private static final String ENTITY_2_ATTR = "e2";
    private static final String INTERACTION_ATTR = "interaction";
    private static final String TOKENIZER_ATTR = "tokenizer";
    private static final String POS_ATTR = "POS";
    private ElementType currentElementType;
    private JCas jcas;
    private StringBuffer documentTextStringBuffer;
    private int sentenceOffset;
    private int nextSentenceOffset;
    private String documentId;
    private Sentence currentSentence;
    private ArrayList<Pair> currentPairs;
    private boolean firstEntitySeen;

    public DDICorpusContentHandlerImpl(JCas jcas) {
        this.jcas = jcas;
    }

    @Override
    public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException {
        if (CORPUS_TAG.equalsIgnoreCase(qName)) {
            this.currentElementType = ElementType.Corpus;
        } else if (DOCUMENT_TAG.equalsIgnoreCase(qName)) {
            this.createDocumentAnnotation(atts);
            this.firstEntitySeen = false;
        } else if (SENTENCE_TAG.equalsIgnoreCase(qName)) {
            this.createSentenceAnnotation(atts);
        } else if (ENTITY_TAG.equalsIgnoreCase(qName)) {
            this.createEntityAnnotation(atts);
            this.firstEntitySeen = true;
        } else if (!PAIR_TAG.equalsIgnoreCase(qName)) {
            if (SENTENCEANALYSES_TAG.equalsIgnoreCase(qName)) {
                this.currentElementType = ElementType.Sentenceanalyses;
            } else if (TOKENIZATIONS_TAG.equalsIgnoreCase(qName)) {
                this.currentElementType = ElementType.Tokenizations;
            } else if (TOKENIZATION_TAG.equalsIgnoreCase(qName)) {
                String tokenizerName = atts.getValue(TOKENIZER_ATTR);
                if (tokenizerName.equals("Charniak-Lease")) {
                    this.currentElementType = ElementType.Tokenization;
                }
            } else if (TOKEN_TAG.equalsIgnoreCase(qName)) {
                if (this.currentElementType.equals((Object)ElementType.Tokenization)) {
                    this.createTokenAnnotation(atts);
                }
            } else {
                this.currentElementType = ElementType.Ignore;
            }
        }
    }

    private void createDocumentAnnotation(Attributes atts) {
        this.currentElementType = ElementType.Document;
        this.documentTextStringBuffer = new StringBuffer();
        this.sentenceOffset = 0;
        this.nextSentenceOffset = 0;
        this.documentId = atts.getValue(ID_ATTR);
    }

    private void createSentenceAnnotation(Attributes atts) {
        this.currentElementType = ElementType.Sentence;
        String sentenceText = String.valueOf(atts.getValue(TEXT_ATTR)) + " ";
        this.documentTextStringBuffer.append(sentenceText);
        this.sentenceOffset = this.nextSentenceOffset;
        this.nextSentenceOffset += sentenceText.length();
        Sentence sentence = new Sentence(this.jcas, this.sentenceOffset, this.nextSentenceOffset - 1);
        sentence.setID(atts.getValue(ID_ATTR));
        sentence.addToIndexes();
        this.currentSentence = sentence;
        this.currentPairs = new ArrayList();
    }

    private void createEntityAnnotation(Attributes atts) {
        String charOffset = atts.getValue(CHAR_OFFSET_ATTR);
        String[] boundaries = atts.getValue(CHAR_OFFSET_ATTR).split("-");
        int begin = this.sentenceOffset + Integer.valueOf(boundaries[0]);
        int end = this.sentenceOffset + Integer.valueOf(boundaries[boundaries.length - 1]);
        if (!this.firstEntitySeen) {
            ++end;
        }
        Entity entity = new Entity(this.jcas, begin, end);
        entity.setID(atts.getValue(ID_ATTR));
        entity.setEntityType(atts.getValue(ENTITY_TYPE_ATTR));
        entity.setCharOffset(charOffset);
        entity.addToIndexes();
        NamedEntity namedEntity = new NamedEntity(this.jcas, begin, end);
        namedEntity.setId(atts.getValue(ID_ATTR));
        namedEntity.setEntityType(atts.getValue(ENTITY_TYPE_ATTR));
        namedEntity.setSource("goldstandard");
        namedEntity.addToIndexes();
    }

    private void createPairAnnotation(Attributes atts) {
        Pair pair2 = new Pair(this.jcas);
        pair2.setID(atts.getValue(ID_ATTR));
        pair2.setInteraction(Boolean.parseBoolean(atts.getValue(INTERACTION_ATTR)));
        String entity1ID = atts.getValue(ENTITY_1_ATTR);
        String entity2ID = atts.getValue(ENTITY_2_ATTR);
        Iterator<Entity> entityIterator = JCasUtil.iterate(this.jcas, Entity.class, this.currentSentence).iterator();
        Annotation entity1 = null;
        Annotation entity2 = null;
        while (entityIterator.hasNext()) {
            Entity currentEntity = entityIterator.next();
            if (entity1ID.equals(currentEntity.getID())) {
                entity1 = currentEntity;
                continue;
            }
            if (!entity2ID.equals(currentEntity.getID())) continue;
            entity2 = currentEntity;
        }
        pair2.setEntity1((Entity)entity1);
        pair2.setEntity2((Entity)entity2);
        if (entity1.getBegin() < entity2.getBegin()) {
            pair2.setBegin(entity1.getBegin());
            pair2.setEnd(entity2.getEnd());
        } else {
            pair2.setBegin(entity2.getBegin());
            pair2.setEnd(entity1.getEnd());
        }
        pair2.addToIndexes();
        this.currentPairs.add(pair2);
    }

    private void createTokenAnnotation(Attributes atts) {
        String[] boundaries = atts.getValue(CHAR_OFFSET_ATTR).split("-");
        int begin = this.sentenceOffset + Integer.valueOf(boundaries[0]);
        int end = this.sentenceOffset + Integer.valueOf(boundaries[1]) + 1;
        Token entity = new Token(this.jcas, begin, end);
        entity.setID(atts.getValue(ID_ATTR));
        entity.setPOS(atts.getValue(POS_ATTR));
        entity.addToIndexes();
    }

    @Override
    public void endElement(String uri, String localName, String qName) throws SAXException {
        if (!CORPUS_TAG.equalsIgnoreCase(qName)) {
            if (DOCUMENT_TAG.equalsIgnoreCase(qName)) {
                this.jcas.setDocumentText(this.documentTextStringBuffer.toString());
                CorpusDocument corpusDocument = new CorpusDocument(this.jcas, 0, this.documentTextStringBuffer.length());
                corpusDocument.setID(this.documentId);
                corpusDocument.addToIndexes();
                PubmedDocument pubmedDocument = new PubmedDocument(this.jcas, 0, this.documentTextStringBuffer.length());
                pubmedDocument.setPmid(this.documentId);
                pubmedDocument.addToIndexes();
            } else if (SENTENCE_TAG.equalsIgnoreCase(qName)) {
                FSArray pairs = new FSArray(this.jcas, this.currentPairs.size());
                int j = 0;
                while (j < pairs.size()) {
                    pairs.set(j, this.currentPairs.get(j));
                    ++j;
                }
                this.currentSentence.setPairs(pairs);
            } else if (!SENTENCEANALYSES_TAG.equalsIgnoreCase(qName) && !TOKENIZATIONS_TAG.equalsIgnoreCase(qName) && TOKENIZATION_TAG.equalsIgnoreCase(qName)) {
                this.currentElementType = ElementType.Sentenceanalyses;
            }
        }
    }

    @Override
    public void setDocumentLocator(Locator locator) {
    }

    @Override
    public void startDocument() throws SAXException {
    }

    @Override
    public void endDocument() throws SAXException {
    }

    @Override
    public void startPrefixMapping(String prefix, String uri) throws SAXException {
    }

    @Override
    public void endPrefixMapping(String prefix) throws SAXException {
    }

    @Override
    public void characters(char[] ch, int start, int length) throws SAXException {
    }

    @Override
    public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException {
    }

    @Override
    public void processingInstruction(String target, String data2) throws SAXException {
    }

    @Override
    public void skippedEntity(String name) throws SAXException {
    }

    private static enum ElementType {
        Ignore,
        Corpus,
        Document,
        Sentence,
        Sentenceanalyses,
        Tokenizations,
        Tokenization;

    }
}

