/*
 * Decompiled with CFR 0.152.
 */
package de.berlin.hu.uima.cr.txt.gz;

import de.berlin.hu.types.PubmedDocument;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Vector;
import java.util.zip.GZIPInputStream;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.CASException;
import org.apache.uima.collection.CollectionException;
import org.apache.uima.collection.CollectionReader_ImplBase;
import org.apache.uima.examples.SourceDocumentInformation;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.DocumentAnnotation;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.Progress;
import org.apache.uima.util.ProgressImpl;

public class ZipFileCollectionReader
extends CollectionReader_ImplBase {
    public static final String PARAM_INPUTDIR = "InputDirectory";
    public static final String PARAM_LANGUAGE = "Language";
    public static final String PARAM_SUBDIR = "BrowseSubdirectories";
    private ArrayList<File> mFiles;
    private String mLanguage;
    private Boolean mRecursive;
    private int mCurrentIndex;

    @Override
    public void initialize() throws ResourceInitializationException {
        File directory = new File(((String)this.getConfigParameterValue(PARAM_INPUTDIR)).trim());
        this.mLanguage = (String)this.getConfigParameterValue(PARAM_LANGUAGE);
        this.mRecursive = (Boolean)this.getConfigParameterValue(PARAM_SUBDIR);
        if (this.mRecursive == null) {
            this.mRecursive = Boolean.FALSE;
        }
        this.mCurrentIndex = 0;
        if (!directory.exists() || !directory.isDirectory()) {
            throw new ResourceInitializationException("directory_not_found", new Object[]{PARAM_INPUTDIR, this.getMetaData().getName(), directory.getPath()});
        }
        this.mFiles = new ArrayList();
        this.addFilesFromDir(directory);
    }

    private void addFilesFromDir(File dir2) {
        File[] files = dir2.listFiles();
        int i = 0;
        while (i < files.length) {
            if (!files[i].isDirectory() && files[i].getName().endsWith(".txt.gz")) {
                this.mFiles.add(files[i]);
            } else if (files[i].isDirectory() && this.mRecursive.booleanValue()) {
                this.addFilesFromDir(files[i]);
            }
            ++i;
        }
    }

    @Override
    public boolean hasNext() {
        return this.mCurrentIndex < this.mFiles.size();
    }

    @Override
    public void getNext(CAS aCAS) throws IOException, CollectionException {
        JCas jcas;
        try {
            jcas = aCAS.getJCas();
        }
        catch (CASException e) {
            throw new CollectionException(e);
        }
        File file = this.mFiles.get(this.mCurrentIndex++);
        System.out.println("Reading file: " + file.getAbsolutePath());
        BufferedReader reader = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(file))));
        StringBuffer textBuffer = new StringBuffer();
        Integer currindex = -1;
        while (reader.ready()) {
            PubmedDocument pmdoc = new PubmedDocument(jcas);
            String s = reader.readLine();
            String[] two = new String[2];
            two = ZipFileCollectionReader.splitFirst(s, "\t");
            pmdoc.setPmid(two[0]);
            String annot = new String(two[1]);
            textBuffer.append(String.valueOf(annot) + "\n");
            pmdoc.setBegin(currindex + 1);
            Integer len = annot.length();
            currindex = currindex + len + 1;
            pmdoc.setEnd(currindex);
            pmdoc.addToIndexes();
        }
        String text2 = textBuffer.toString();
        jcas.setDocumentText(text2);
        if (this.mLanguage != null) {
            ((DocumentAnnotation)jcas.getDocumentAnnotationFs()).setLanguage(this.mLanguage);
        }
        SourceDocumentInformation srcDocInfo = new SourceDocumentInformation(jcas);
        srcDocInfo.setUri(file.getAbsoluteFile().toURI().toString());
        srcDocInfo.setOffsetInSource(0);
        srcDocInfo.setDocumentSize((int)file.length());
        srcDocInfo.setLastSegment(this.mCurrentIndex == this.mFiles.size());
        srcDocInfo.setBegin(0);
        srcDocInfo.setEnd(currindex);
        srcDocInfo.addToIndexes();
    }

    public static String[] splitFirst(String source, String splitter) {
        Vector<String> rv = new Vector<String>();
        int last2 = 0;
        int next2 = 0;
        next2 = source.indexOf(splitter, last2);
        if (next2 != -1) {
            rv.add(source.substring(last2, next2));
            last2 = next2 + splitter.length();
        }
        if (last2 < source.length()) {
            rv.add(source.substring(last2, source.length()));
        }
        return rv.toArray(new String[rv.size()]);
    }

    @Override
    public void close() throws IOException {
    }

    @Override
    public Progress[] getProgress() {
        return new Progress[]{new ProgressImpl(this.mCurrentIndex, this.mFiles.size(), "entities")};
    }

    public int getNumberOfDocuments() {
        return this.mFiles.size();
    }
}

