/*
 * Decompiled with CFR 0.152.
 */
package dragon.ir.kngbase;

import dragon.ir.index.IRSignatureIndexList;
import dragon.matrix.DoubleSuperSparseMatrix;
import dragon.matrix.IntSparseMatrix;
import dragon.nlp.Counter;
import dragon.nlp.Token;
import dragon.util.MathUtil;
import java.io.File;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Random;

public class TopicSignatureModel {
    private IRSignatureIndexList srcIndexList;
    private IRSignatureIndexList destIndexList;
    private IntSparseMatrix srcSignatureDocMatrix;
    private IntSparseMatrix destDocSignatureMatrix;
    private IntSparseMatrix cooccurMatrix;
    private boolean useDocFrequency;
    private boolean useMeanTrim;
    private boolean useEM;
    private double probThreshold;
    private double bkgCoeffi;
    private int[] buf;
    private int iterationNum;
    private int totalDestSignatureNum;
    private int DOC_THRESH;

    public TopicSignatureModel(IRSignatureIndexList srcIndexList, IntSparseMatrix srcSignatureDocMatrix, IntSparseMatrix destDocSignatureMatrix) {
        this.srcIndexList = srcIndexList;
        this.srcSignatureDocMatrix = srcSignatureDocMatrix;
        this.destDocSignatureMatrix = destDocSignatureMatrix;
        this.useDocFrequency = true;
        this.useMeanTrim = true;
        this.probThreshold = 0.001;
        this.useEM = false;
        this.iterationNum = 15;
        this.bkgCoeffi = 0.5;
        this.totalDestSignatureNum = destDocSignatureMatrix.columns();
    }

    public TopicSignatureModel(IRSignatureIndexList srcIndexList, IntSparseMatrix cooccurMatrix) {
        this.srcIndexList = srcIndexList;
        this.cooccurMatrix = cooccurMatrix;
        this.useMeanTrim = true;
        this.probThreshold = 0.001;
        this.useEM = false;
        this.iterationNum = 15;
        this.bkgCoeffi = 0.5;
        this.totalDestSignatureNum = cooccurMatrix.columns();
    }

    public TopicSignatureModel(IRSignatureIndexList srcIndexList, IRSignatureIndexList destIndexList, IntSparseMatrix cooccurMatrix) {
        this.srcIndexList = srcIndexList;
        this.destIndexList = destIndexList;
        this.cooccurMatrix = cooccurMatrix;
        this.useMeanTrim = true;
        this.probThreshold = 0.001;
        this.useEM = true;
        this.iterationNum = 15;
        this.bkgCoeffi = 0.5;
        this.totalDestSignatureNum = cooccurMatrix.columns();
    }

    public TopicSignatureModel(IRSignatureIndexList srcIndexList, IntSparseMatrix srcSignatureDocMatrix, IRSignatureIndexList destIndexList, IntSparseMatrix destDocSignatureMatrix) {
        this.srcIndexList = srcIndexList;
        this.srcSignatureDocMatrix = srcSignatureDocMatrix;
        this.destIndexList = destIndexList;
        this.destDocSignatureMatrix = destDocSignatureMatrix;
        this.useDocFrequency = true;
        this.useMeanTrim = true;
        this.probThreshold = 0.001;
        this.useEM = true;
        this.iterationNum = 15;
        this.bkgCoeffi = 0.5;
        this.totalDestSignatureNum = destDocSignatureMatrix.columns();
    }

    public void setUseEM(boolean option) {
        this.useEM = option;
    }

    public boolean getUseEM() {
        return this.useEM;
    }

    public void setEMBackgroundCoefficient(double coeffi) {
        this.bkgCoeffi = coeffi;
    }

    public double getEMBackgroundCoefficient() {
        return this.bkgCoeffi;
    }

    public void setEMIterationNum(int iterationNum) {
        this.iterationNum = iterationNum;
    }

    public int getEMIterationNum() {
        return this.iterationNum;
    }

    public void setUseDocFrequency(boolean option) {
        this.useDocFrequency = option;
    }

    public boolean getUseDocFrequency() {
        return this.useDocFrequency;
    }

    public void setUseMeanTrim(boolean option) {
        this.useMeanTrim = option;
    }

    public boolean getUseMeanTrim() {
        return this.useMeanTrim;
    }

    public void setProbThreshold(double threshold) {
        this.probThreshold = threshold;
    }

    public double getProbThreshold() {
        return this.probThreshold;
    }

    public boolean genTransMatrix(int minDocFrequency, String matrixPath, String matrixKey) {
        String transIndexFile = matrixPath + "/" + matrixKey + ".index";
        String transMatrixFile = matrixPath + "/" + matrixKey + ".matrix";
        String transTIndexFile = matrixPath + "/" + matrixKey + "t.index";
        String transTMatrixFile = matrixPath + "/" + matrixKey + "t.matrix";
        File file = new File(transMatrixFile);
        if (file.exists()) {
            file.delete();
        }
        if ((file = new File(transIndexFile)).exists()) {
            file.delete();
        }
        if ((file = new File(transTMatrixFile)).exists()) {
            file.delete();
        }
        if ((file = new File(transTIndexFile)).exists()) {
            file.delete();
        }
        DoubleSuperSparseMatrix outputTransMatrix = new DoubleSuperSparseMatrix(transIndexFile, transMatrixFile, false, false);
        outputTransMatrix.setFlushInterval(Integer.MAX_VALUE);
        DoubleSuperSparseMatrix outputTransTMatrix = new DoubleSuperSparseMatrix(transTIndexFile, transTMatrixFile, false, false);
        outputTransTMatrix.setFlushInterval(Integer.MAX_VALUE);
        int cellNum = 0;
        int rowNum = this.srcIndexList.size();
        this.buf = new int[this.totalDestSignatureNum];
        if (this.destDocSignatureMatrix != null) {
            this.DOC_THRESH = this.computeDocThreshold(this.destDocSignatureMatrix);
        }
        int i = 0;
        while (i < rowNum) {
            if (i % 1000 == 0) {
                System.out.println(new Date().toString() + " Processing Row#" + i);
            }
            if (this.srcIndexList.getIRSignature(i).getDocFrequency() >= minDocFrequency && (this.cooccurMatrix == null || this.cooccurMatrix.getNonZeroNumInRow(i) >= 5)) {
                ArrayList tokenList = this.genSignatureTranslation(i);
                int j = 0;
                while (j < tokenList.size()) {
                    Token curToken = (Token)tokenList.get(j);
                    outputTransMatrix.add(i, curToken.getIndex(), curToken.getWeight());
                    outputTransTMatrix.add(curToken.getIndex(), i, curToken.getWeight());
                    ++j;
                }
                tokenList.clear();
                if ((cellNum += tokenList.size()) >= 5000000) {
                    outputTransTMatrix.flush();
                    outputTransMatrix.flush();
                    cellNum = 0;
                }
            }
            ++i;
        }
        outputTransTMatrix.finalizeData();
        outputTransTMatrix.close();
        outputTransMatrix.finalizeData();
        outputTransMatrix.close();
        return true;
    }

    public ArrayList genSignatureTranslation(int srcSignatureIndex) {
        int[] arrDoc;
        ArrayList tokenList = this.srcSignatureDocMatrix != null ? ((arrDoc = this.srcSignatureDocMatrix.getNonZeroColumnsInRow(srcSignatureIndex)).length > this.DOC_THRESH ? this.computeDistributionByArray(arrDoc) : this.computeDistributionByHash(arrDoc)) : this.computeDistributionByCooccurMatrix(srcSignatureIndex);
        if (this.useEM) {
            tokenList = this.emTopicSignatureModel(tokenList);
        }
        return tokenList;
    }

    private int computeDocThreshold(IntSparseMatrix doctermMatrix) {
        return (int)((double)doctermMatrix.columns() / this.computeAvgTermNum(doctermMatrix) / 8.0);
    }

    private double computeAvgTermNum(IntSparseMatrix doctermMatrix) {
        Random random = new Random();
        int num2 = Math.min(50, doctermMatrix.rows());
        double sum2 = 0.0;
        int i = 0;
        while (i < num2) {
            int index = random.nextInt(doctermMatrix.rows());
            sum2 += (double)doctermMatrix.getNonZeroNumInRow(index);
            ++i;
        }
        return sum2 / (double)num2;
    }

    private ArrayList computeDistributionByCooccurMatrix(int signatureIndex) {
        double rowTotal = 0.0;
        int[] arrIndex = this.cooccurMatrix.getNonZeroColumnsInRow(signatureIndex);
        int[] arrFreq = this.cooccurMatrix.getNonZeroIntScoresInRow(signatureIndex);
        int i = 0;
        while (i < arrFreq.length) {
            rowTotal += (double)arrFreq[i];
            ++i;
        }
        double mean = this.useMeanTrim ? rowTotal / (double)arrFreq.length : 0.5;
        if (mean < rowTotal * this.getMinInitProb()) {
            mean = rowTotal * this.getMinInitProb();
        }
        rowTotal = 0.0;
        ArrayList<Token> list2 = new ArrayList<Token>();
        i = 0;
        while (i < arrFreq.length) {
            if ((double)arrFreq[i] >= mean) {
                list2.add(new Token(arrIndex[i], arrFreq[i]));
                rowTotal += (double)arrFreq[i];
            }
            ++i;
        }
        i = 0;
        while (i < list2.size()) {
            Token curToken = (Token)list2.get(i);
            curToken.setWeight((double)curToken.getFrequency() / rowTotal);
            ++i;
        }
        return list2;
    }

    private ArrayList computeDistributionByArray(int[] arrDoc) {
        double rowTotal = 0.0;
        if (this.buf == null) {
            this.buf = new int[this.totalDestSignatureNum];
        }
        MathUtil.initArray(this.buf, 0);
        int j = 0;
        while (j < arrDoc.length) {
            int[] arrIndex = this.destDocSignatureMatrix.getNonZeroColumnsInRow(arrDoc[j]);
            int[] arrFreq = this.useDocFrequency ? null : this.destDocSignatureMatrix.getNonZeroIntScoresInRow(arrDoc[j]);
            int k = 0;
            while (k < arrIndex.length) {
                if (this.useDocFrequency) {
                    int n = arrIndex[k];
                    this.buf[n] = this.buf[n] + 1;
                } else {
                    int n = arrIndex[k];
                    this.buf[n] = this.buf[n] + arrFreq[k];
                }
                ++k;
            }
            ++j;
        }
        int nonZeroNum = 0;
        int i = 0;
        while (i < this.buf.length) {
            if (this.buf[i] > 0) {
                ++nonZeroNum;
                rowTotal += (double)this.buf[i];
            }
            ++i;
        }
        double mean = this.useMeanTrim ? rowTotal / (double)nonZeroNum : 0.5;
        if (mean < rowTotal * this.getMinInitProb()) {
            mean = rowTotal * this.getMinInitProb();
        }
        rowTotal = 0.0;
        ArrayList<Token> list2 = new ArrayList<Token>();
        i = 0;
        while (i < this.buf.length) {
            if ((double)this.buf[i] >= mean) {
                list2.add(new Token(i, this.buf[i]));
                rowTotal += (double)this.buf[i];
            }
            ++i;
        }
        i = 0;
        while (i < list2.size()) {
            Token curToken = (Token)list2.get(i);
            curToken.setWeight((double)curToken.getFrequency() / rowTotal);
            ++i;
        }
        return list2;
    }

    private ArrayList computeDistributionByHash(int[] arrDoc) {
        Token curToken;
        ArrayList<Token> list2;
        ArrayList<Token> tokenList = this.countTokensByHashMap(arrDoc);
        double rowTotal = 0.0;
        int i = 0;
        while (i < tokenList.size()) {
            rowTotal += (double)((Token)tokenList.get(i)).getFrequency();
            ++i;
        }
        if (this.useMeanTrim || rowTotal * this.getMinInitProb() > 1.0) {
            double mean = this.useMeanTrim ? rowTotal / (double)tokenList.size() : 0.5;
            if (mean < rowTotal * this.getMinInitProb()) {
                mean = rowTotal * this.getMinInitProb();
            }
            list2 = new ArrayList<Token>();
            rowTotal = 0.0;
            i = 0;
            while (i < tokenList.size()) {
                curToken = (Token)tokenList.get(i);
                if ((double)curToken.getFrequency() >= mean) {
                    list2.add(curToken);
                    rowTotal += (double)curToken.getFrequency();
                }
                ++i;
            }
            tokenList.clear();
        } else {
            list2 = tokenList;
        }
        i = 0;
        while (i < list2.size()) {
            curToken = (Token)list2.get(i);
            curToken.setWeight((double)curToken.getFrequency() / rowTotal);
            ++i;
        }
        return list2;
    }

    private ArrayList countTokensByHashMap(int[] arrDoc) {
        Counter counter;
        Token curToken;
        HashMap<Token, Counter> hash = new HashMap<Token, Counter>();
        int j = 0;
        while (j < arrDoc.length) {
            int termNum = this.destDocSignatureMatrix.getNonZeroNumInRow(arrDoc[j]);
            if (termNum != 0) {
                int[] arrTerm = this.destDocSignatureMatrix.getNonZeroColumnsInRow(arrDoc[j]);
                int[] arrFreq = this.useDocFrequency ? null : this.destDocSignatureMatrix.getNonZeroIntScoresInRow(arrDoc[j]);
                int i = 0;
                while (i < termNum) {
                    curToken = this.useDocFrequency ? new Token(arrTerm[i], 1) : new Token(arrTerm[i], arrFreq[i]);
                    counter = (Counter)hash.get(curToken);
                    if (counter == null) {
                        counter = new Counter(curToken.getFrequency());
                        hash.put(curToken, counter);
                    } else {
                        counter.addCount(curToken.getFrequency());
                    }
                    ++i;
                }
            }
            ++j;
        }
        ArrayList<Token> list2 = new ArrayList<Token>(hash.size());
        Iterator iterator2 = hash.keySet().iterator();
        while (iterator2.hasNext()) {
            curToken = (Token)iterator2.next();
            counter = (Counter)hash.get(curToken);
            curToken.setFrequency(counter.getCount());
            list2.add(curToken);
        }
        hash.clear();
        return list2;
    }

    private double getMinInitProb() {
        return this.probThreshold;
    }

    private ArrayList emTopicSignatureModel(ArrayList list2) {
        Token curToken;
        int termNum = list2.size();
        double[] arrProb = new double[termNum];
        double[] arrCollectionProb = new double[termNum];
        double weightSum = 0.0;
        int i = 0;
        while (i < termNum) {
            curToken = (Token)list2.get(i);
            arrCollectionProb[i] = this.useDocFrequency ? (double)this.destIndexList.getIRSignature(curToken.getIndex()).getDocFrequency() : (double)this.destIndexList.getIRSignature(curToken.getIndex()).getFrequency();
            weightSum += arrCollectionProb[i];
            ++i;
        }
        i = 0;
        while (i < termNum) {
            arrCollectionProb[i] = arrCollectionProb[i] / weightSum;
            ++i;
        }
        i = 0;
        while (i < this.iterationNum) {
            weightSum = 0.0;
            int j = 0;
            while (j < termNum) {
                curToken = (Token)list2.get(j);
                arrProb[j] = (1.0 - this.bkgCoeffi) * curToken.getWeight() / ((1.0 - this.bkgCoeffi) * curToken.getWeight() + this.bkgCoeffi * arrCollectionProb[j]) * (double)curToken.getFrequency();
                weightSum += arrProb[j];
                ++j;
            }
            j = 0;
            while (j < termNum) {
                curToken = (Token)list2.get(j);
                curToken.setWeight(arrProb[j] / weightSum);
                ++j;
            }
            ++i;
        }
        return list2;
    }
}

