/*
 * Decompiled with CFR 0.152.
 */
package dragon.ir.kngbase;

import dragon.ir.index.IRRelation;
import dragon.ir.kngbase.KnowledgeBase;
import dragon.matrix.DoubleFlatSparseMatrix;
import dragon.matrix.DoubleGiantSparseMatrix;
import dragon.matrix.DoubleSparseMatrix;
import dragon.matrix.IntFlatSparseMatrix;
import dragon.matrix.IntGiantSparseMatrix;
import dragon.matrix.IntSparseMatrix;
import dragon.nlp.SimpleElementList;
import dragon.nlp.Token;
import dragon.nlp.extract.TokenExtractor;
import dragon.onlinedb.Article;
import dragon.onlinedb.CollectionReader;
import dragon.util.SortedArray;
import java.io.File;
import java.util.ArrayList;
import java.util.Date;

public class HALSpace
implements KnowledgeBase {
    private SimpleElementList termList;
    private TokenExtractor te;
    private int windowSize;
    private IntSparseMatrix cooccurMatrix;
    private DoubleSparseMatrix halMatrix;
    private boolean fileBasedMatrix;
    private SortedArray relationCache;
    private boolean showProgress;
    private boolean useExternalTokenIndex;

    public HALSpace(TokenExtractor te, int windowSize) {
        this(new SimpleElementList(), te, windowSize);
    }

    public HALSpace(SimpleElementList termList, TokenExtractor te, int windowSize) {
        this.termList = termList;
        this.useExternalTokenIndex = termList.size() > 0;
        this.te = te;
        te.setFilteringOption(false);
        this.windowSize = windowSize;
        this.halMatrix = new DoubleFlatSparseMatrix();
        this.fileBasedMatrix = false;
        this.cooccurMatrix = new IntFlatSparseMatrix(true, true);
        this.relationCache = new SortedArray();
        this.showProgress = false;
    }

    public HALSpace(SimpleElementList termList, TokenExtractor te, int windowSize, String indexFilename, String matrixFilename) {
        this.termList = termList;
        this.useExternalTokenIndex = termList.size() > 0;
        this.te = te;
        te.setFilteringOption(false);
        this.windowSize = windowSize;
        this.halMatrix = new DoubleGiantSparseMatrix(indexFilename, matrixFilename, false, false);
        ((DoubleGiantSparseMatrix)this.halMatrix).setFlushInterval(Integer.MAX_VALUE);
        this.fileBasedMatrix = true;
        this.cooccurMatrix = new IntGiantSparseMatrix(indexFilename + ".tmp", matrixFilename + ".tmp", true, true);
        this.relationCache = new SortedArray();
        this.showProgress = false;
    }

    public DoubleSparseMatrix getKnowledgeMatrix() {
        return this.halMatrix;
    }

    public SimpleElementList getRowKeyList() {
        return this.termList;
    }

    public SimpleElementList getColumnKeyList() {
        return this.termList;
    }

    public void setShowProgress(boolean option) {
        this.showProgress = option;
    }

    public void add(ArrayList articleList) {
        int i = 0;
        while (i < articleList.size()) {
            this.addArticle((Article)articleList.get(i));
            ++i;
        }
    }

    public void add(CollectionReader collectionReader) {
        int count2 = 0;
        Article article = collectionReader.getNextArticle();
        while (article != null) {
            this.addArticle(article);
            if (this.showProgress && ++count2 % 10 == 0) {
                System.out.println(new Date().toString() + " Processed Articles: " + count2);
            }
            article = collectionReader.getNextArticle();
        }
    }

    public void finalizeData() {
        this.cooccurMatrix.finalizeData();
        int row = this.cooccurMatrix.rows();
        int i = 0;
        while (i < row) {
            int[] arrCol = this.cooccurMatrix.getNonZeroColumnsInRow(i);
            int[] arrFreq = this.cooccurMatrix.getNonZeroIntScoresInRow(i);
            int len = arrFreq.length;
            double sum2 = this.cooccurMatrix.getRowSum(i);
            double mean = sum2 / (double)len;
            sum2 = 0.0;
            int j = 0;
            while (j < len) {
                if ((double)arrFreq[j] >= mean) {
                    sum2 += (double)arrFreq[j];
                }
                ++j;
            }
            j = 0;
            while (j < len) {
                if ((double)arrFreq[j] >= mean) {
                    this.halMatrix.add(i, arrCol[j], (double)arrFreq[j] / sum2);
                }
                ++j;
            }
            if (this.showProgress && i % 1000 == 0) {
                System.out.println("Processed Rows: " + i);
            }
            if (this.fileBasedMatrix && i % 5000 == 0) {
                ((DoubleGiantSparseMatrix)this.halMatrix).flush();
            }
            ++i;
        }
        this.halMatrix.finalizeData();
    }

    public DoubleSparseMatrix getHALMatrix() {
        return this.halMatrix;
    }

    public void close() {
        this.halMatrix.close();
        this.cooccurMatrix.close();
        if (this.fileBasedMatrix) {
            new File(((IntGiantSparseMatrix)this.cooccurMatrix).getIndexFilename()).delete();
            new File(((IntGiantSparseMatrix)this.cooccurMatrix).getMatrixFilename()).delete();
        }
    }

    private void addArticle(Article article) {
        StringBuffer sb = new StringBuffer();
        if (article.getTitle() != null) {
            sb.append(article.getTitle());
            sb.append(' ');
        }
        if (article.getAbstract() != null) {
            sb.append(article.getAbstract());
            sb.append(' ');
        }
        if (article.getBody() != null) {
            sb.append(article.getBody());
            sb.append(' ');
        }
        if (sb.length() <= 20) {
            return;
        }
        ArrayList tokenList = this.te.extractFromDoc(sb.toString().trim());
        if (tokenList == null || tokenList.size() < this.windowSize) {
            return;
        }
        Token[] arrToken = new Token[tokenList.size()];
        SortedArray cache = new SortedArray();
        int i = 0;
        while (i < tokenList.size()) {
            arrToken[i] = (Token)tokenList.get(i);
            int pos = cache.binarySearch(arrToken[i]);
            if (pos >= 0) {
                arrToken[i].setIndex(((Token)cache.get(pos)).getIndex());
            } else {
                arrToken[i].setIndex(this.tokenSearch(arrToken[i].getValue()));
                if (arrToken[i].getIndex() >= 0) {
                    cache.add(pos * -1 - 1, arrToken[i]);
                }
            }
            ++i;
        }
        cache.clear();
        tokenList.clear();
        i = 0;
        while (i <= arrToken.length - this.windowSize) {
            int first = arrToken[i].getIndex();
            if (first != -1) {
                int j = 1;
                while (j < this.windowSize) {
                    int second = arrToken[i + j].getIndex();
                    if (second != -1) {
                        this.addRelation(first, second, this.windowSize - j);
                        this.addRelation(second, first, this.windowSize - j);
                    }
                    ++j;
                }
            }
            ++i;
        }
        i = 0;
        while (i < this.relationCache.size()) {
            IRRelation relation = (IRRelation)this.relationCache.get(i);
            this.cooccurMatrix.add(relation.getFirstTerm(), relation.getSecondTerm(), relation.getFrequency());
            this.cooccurMatrix.add(relation.getSecondTerm(), relation.getFirstTerm(), relation.getFrequency());
            ++i;
        }
        this.relationCache.clear();
    }

    private int tokenSearch(String token2) {
        if (this.useExternalTokenIndex) {
            return this.termList.search(token2);
        }
        return this.termList.add(token2);
    }

    private boolean addRelation(int first, int second, int score) {
        IRRelation cur = new IRRelation(first, second, score);
        if (!this.relationCache.add(cur)) {
            cur = (IRRelation)this.relationCache.get(this.relationCache.insertedPos());
            cur.addFrequency(score);
        }
        return true;
    }
}

