/*
 * Decompiled with CFR 0.152.
 */
package dragon.nlp.tool.xtract;

import dragon.matrix.IntSuperSparseMatrix;
import dragon.nlp.Document;
import dragon.nlp.DocumentParser;
import dragon.nlp.Paragraph;
import dragon.nlp.Sentence;
import dragon.nlp.SimpleElementList;
import dragon.nlp.SimplePairList;
import dragon.nlp.Word;
import dragon.nlp.extract.EngDocumentParser;
import dragon.nlp.tool.Lemmatiser;
import dragon.nlp.tool.Tagger;
import dragon.nlp.tool.xtract.EngWordPairGenerator;
import dragon.nlp.tool.xtract.SentenceBase;
import dragon.nlp.tool.xtract.WordPairGenerator;
import dragon.nlp.tool.xtract.WordPairIndexer;
import dragon.nlp.tool.xtract.WordPairStat;
import dragon.nlp.tool.xtract.WordPairStatList;
import dragon.onlinedb.Article;
import dragon.onlinedb.CollectionReader;
import java.io.File;
import java.util.Date;

public class EngWordPairIndexer
implements WordPairIndexer {
    protected int maxSpan;
    protected Tagger tagger;
    protected Lemmatiser lemmatiser;
    protected WordPairGenerator pairGenerator;
    protected SentenceBase sentenceBase;
    protected IntSuperSparseMatrix[] arrPairSentLeftMatrix;
    protected IntSuperSparseMatrix[] arrPairSentRightMatrix;
    protected SimpleElementList docKeyList;
    protected SimpleElementList wordKeyList;
    protected SimplePairList pairKeyList;
    protected WordPairStatList wordpairStatList;
    protected DocumentParser parser;
    protected int flushInterval;
    protected int indexedNum;

    public EngWordPairIndexer(String folder, int maxSpan, Tagger tagger, Lemmatiser lemmatiser) {
        this(folder, maxSpan, tagger, lemmatiser, new EngWordPairGenerator(maxSpan));
    }

    public EngWordPairIndexer(String folder, int maxSpan, Tagger tagger, Lemmatiser lemmatiser, WordPairGenerator pairGenerator) {
        this.maxSpan = maxSpan;
        this.tagger = tagger;
        this.lemmatiser = lemmatiser;
        this.pairGenerator = pairGenerator;
        this.flushInterval = 10000;
        new File(folder).mkdirs();
        this.parser = new EngDocumentParser();
        this.sentenceBase = new SentenceBase(folder + "/sentencebase.index", folder + "/sentencebase.matrix");
        this.docKeyList = new SimpleElementList(folder + "/dockey.list", true);
        this.wordKeyList = new SimpleElementList(folder + "/wordkey.list", true);
        this.pairKeyList = new SimplePairList(folder + "/pairkey.list", true);
        this.wordpairStatList = new WordPairStatList(folder + "/pairstat.list", maxSpan, true);
        this.arrPairSentRightMatrix = new IntSuperSparseMatrix[maxSpan];
        int i = 1;
        while (i <= maxSpan) {
            this.arrPairSentRightMatrix[i - 1] = new IntSuperSparseMatrix(folder + "/pairsentr" + i + ".index", folder + "/pairsentr" + i + ".matrix", false, false);
            this.arrPairSentRightMatrix[i - 1].setFlushInterval(Integer.MAX_VALUE);
            ++i;
        }
        this.arrPairSentLeftMatrix = new IntSuperSparseMatrix[maxSpan];
        int i2 = 1;
        while (i2 <= maxSpan) {
            this.arrPairSentLeftMatrix[i2 - 1] = new IntSuperSparseMatrix(folder + "/pairsentl" + i2 + ".index", folder + "/pairsentl" + i2 + ".matrix", false, false);
            this.arrPairSentLeftMatrix[i2 - 1].setFlushInterval(Integer.MAX_VALUE);
            ++i2;
        }
    }

    public DocumentParser getDocumentParser() {
        return this.parser;
    }

    public void setDocumentParser(DocumentParser parser) {
        this.parser = parser;
    }

    public void close() {
        this.sentenceBase.close();
        this.docKeyList.close();
        this.wordKeyList.close();
        this.wordpairStatList.close();
        this.pairKeyList.close();
        int i = 0;
        while (i < this.maxSpan) {
            this.arrPairSentRightMatrix[i].finalizeData();
            this.arrPairSentRightMatrix[i].close();
            ++i;
        }
        i = 0;
        while (i < this.maxSpan) {
            this.arrPairSentLeftMatrix[i].finalizeData();
            this.arrPairSentLeftMatrix[i].close();
            ++i;
        }
    }

    public void flush() {
        int i = 0;
        while (i < this.maxSpan) {
            this.arrPairSentRightMatrix[i].flush();
            ++i;
        }
        i = 0;
        while (i < this.maxSpan) {
            this.arrPairSentLeftMatrix[i].flush();
            ++i;
        }
    }

    public void index(CollectionReader collectionReader) {
        try {
            this.indexedNum = 0;
            Article curArticle = collectionReader.getNextArticle();
            while (curArticle != null) {
                if (this.indexedNum > 0 && this.indexedNum % this.flushInterval == 0) {
                    this.flush();
                }
                this.indexArticle(curArticle);
                ++this.indexedNum;
                curArticle = collectionReader.getNextArticle();
            }
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }

    public boolean indexArticle(Article curArticle) {
        try {
            if (this.docKeyList.contains(curArticle.getKey())) {
                return true;
            }
            System.out.println(new Date().toString() + " " + curArticle.getKey());
            this.docKeyList.add(curArticle.getKey());
            Document curDoc = new Document();
            curDoc.addParagraph(this.parser.parseParagraph(curArticle.getTitle()));
            curDoc.addParagraph(this.parser.parseParagraph(curArticle.getAbstract()));
            curDoc.addParagraph(this.parser.parseParagraph(curArticle.getBody()));
            Paragraph curParagraph = curDoc.getFirstParagraph();
            while (curParagraph != null) {
                Sentence curSent = curParagraph.getFirstSentence();
                while (curSent != null) {
                    this.indexSentence(curSent);
                    curSent = curSent.next;
                }
                curParagraph = curParagraph.next;
            }
            return true;
        }
        catch (Exception e) {
            e.printStackTrace();
            return false;
        }
    }

    private boolean indexSentence(Sentence sent) {
        try {
            if (sent.getWordNum() < 2) {
                return true;
            }
            this.preprocessSentence(sent);
            int num2 = this.pairGenerator.generate(sent);
            if (num2 <= 0) {
                return true;
            }
            int sentIndex = this.sentenceBase.addSentence(sent);
            int i = 0;
            while (i < num2) {
                WordPairStat curPair = this.pairGenerator.getWordPairs(i);
                curPair.setIndex(this.pairKeyList.add(curPair.getFirstWord(), curPair.getSecondWord()));
                this.wordpairStatList.add(curPair);
                int j = 1;
                while (j <= this.maxSpan) {
                    if (curPair.getFrequency(j) > 0) {
                        this.arrPairSentRightMatrix[j - 1].add(curPair.getIndex(), sentIndex, curPair.getFrequency(j));
                    }
                    ++j;
                }
                j = 1;
                while (j <= this.maxSpan) {
                    if (curPair.getFrequency(-j) > 0) {
                        this.arrPairSentLeftMatrix[j - 1].add(curPair.getIndex(), sentIndex, curPair.getFrequency(-j));
                    }
                    ++j;
                }
                ++i;
            }
            return true;
        }
        catch (Exception e) {
            e.printStackTrace();
            return false;
        }
    }

    protected void preprocessSentence(Sentence sent) {
        if (this.tagger != null) {
            this.tagger.tag(sent);
        }
        Word cur = sent.getFirstWord();
        while (cur != null) {
            if (cur.getPOSIndex() == 1) {
                if (this.lemmatiser != null) {
                    cur.setLemma(this.lemmatiser.lemmatize(cur.getContent(), 1));
                } else {
                    cur.setLemma(cur.getContent().toLowerCase());
                }
            } else {
                cur.setLemma(cur.getContent().toLowerCase());
            }
            cur.setIndex(this.wordKeyList.add(cur.getLemma()));
            cur = cur.next;
        }
    }
}

