/*
 * Decompiled with CFR 0.152.
 */
package de.berlin.hu.uima.cr.xml;

import de.berlin.hu.types.PubmedDocument;
import de.berlin.hu.uima.cr.xml.XMLCollectionReader;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.CASException;
import org.apache.uima.collection.CollectionException;
import org.apache.uima.examples.SourceDocumentInformation;
import org.apache.uima.jcas.JCas;
import org.u_compare.shared.semantic.NamedEntity;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

public class NaCTeMCollectionReader
extends XMLCollectionReader {
    @Override
    protected List<File> getfiles(String inputDir) {
        ArrayList<File> result2 = new ArrayList<File>();
        File dir2 = new File(inputDir);
        File[] fileArray = dir2.listFiles();
        int n = fileArray.length;
        int n2 = 0;
        while (n2 < n) {
            File file = fileArray[n2];
            if (file.isFile() && file.getName().endsWith(".xml")) {
                result2.add(file);
            }
            ++n2;
        }
        return result2;
    }

    @Override
    public void getNext(CAS aCAS) throws IOException, CollectionException {
        int i;
        Document document2 = this.getNextDocument();
        JCas jcas = null;
        try {
            jcas = aCAS.getJCas();
        }
        catch (CASException e) {
            throw new CollectionException(e);
        }
        String pmid = document2.getElementsByTagName("PMID").item(0).getTextContent();
        Node titleNode = document2.getElementsByTagName("ArticleTitle").item(0);
        String title = titleNode != null ? document2.getElementsByTagName("ArticleTitle").item(0).getTextContent() : "";
        Node abstractNode = document2.getElementsByTagName("AbstractText").item(0);
        String abstr = abstractNode != null ? abstractNode.getTextContent() : "";
        String text2 = String.valueOf(title) + "\n\n" + abstr;
        jcas.setDocumentText(text2);
        SourceDocumentInformation srcDocInfo = new SourceDocumentInformation(jcas);
        srcDocInfo.setUri(document2.getDocumentURI().toString());
        srcDocInfo.setOffsetInSource(0);
        srcDocInfo.setDocumentSize(text2.length());
        srcDocInfo.setLastSegment(this.hasNext());
        srcDocInfo.addToIndexes();
        PubmedDocument abstractAnnotation = new PubmedDocument(jcas);
        abstractAnnotation.setBegin(0);
        abstractAnnotation.setEnd(text2.length());
        abstractAnnotation.setPmid(pmid);
        abstractAnnotation.addToIndexes(jcas);
        ArrayList<Node> nodes = new ArrayList<Node>();
        if (titleNode != null) {
            NodeList titleNodes = titleNode.getChildNodes();
            i = 0;
            while (i < titleNodes.getLength()) {
                nodes.add(titleNodes.item(i));
                ++i;
            }
        }
        if (abstractNode != null) {
            NodeList abstractNodes = abstractNode.getChildNodes();
            i = 0;
            while (i < abstractNodes.getLength()) {
                nodes.add(abstractNodes.item(i));
                ++i;
            }
        }
        int offset = 0;
        for (Node node : nodes) {
            String chemical;
            Matcher matcher;
            if (!"METABOLITE".equals(node.getNodeName()) && !"ENZYME".equals(node.getNodeName()) || node.getTextContent().trim().isEmpty() || !(matcher = Pattern.compile(Pattern.quote(chemical = node.getTextContent())).matcher(text2.substring(offset))).find()) continue;
            int begin = matcher.start();
            int end = matcher.end();
            NamedEntity namedEntity = new NamedEntity(jcas);
            namedEntity.setBegin(offset + begin);
            namedEntity.setEnd(offset + end);
            namedEntity.setConfidence(1.0);
            namedEntity.setSource("goldstandard");
            namedEntity.addToIndexes();
            offset = end + 1;
        }
    }
}

