/*
 * Decompiled with CFR 0.152.
 */
package dragon.nlp.tool.xtract;

import dragon.nlp.tool.xtract.WordPairStat;
import dragon.nlp.tool.xtract.WordPairStatList;
import dragon.util.FileUtil;
import java.io.BufferedReader;
import java.util.ArrayList;
import java.util.Date;

public class WordPairFilter {
    private String workDir;
    private int maxSpan;
    private double minStrength;
    private double minSpread;
    private double minZScore;

    public WordPairFilter(String workDir, int maxSpan, double minStrength, double minSpread, double minZScore) {
        this.minStrength = minStrength;
        this.minSpread = minSpread;
        this.minZScore = minZScore;
        this.workDir = workDir;
        this.maxSpan = maxSpan;
    }

    public WordPairStat[] execute() {
        WordPairStatList list2 = new WordPairStatList(this.workDir + "/pairstat.list", this.maxSpan, false);
        int wordNum = this.readWordNum();
        double[][] arrWordStat = this.computeWordStat(wordNum, list2);
        return this.filterWordPair(arrWordStat, list2);
    }

    private double[][] computeWordStat(int wordNum, WordPairStatList list2) {
        System.out.println(new Date().toString() + " Computing Word Stat...");
        double[][] arrWordStat = new double[wordNum][3];
        int i = 0;
        while (i < wordNum) {
            int j = 0;
            while (j < 3) {
                arrWordStat[i][j] = 0.0;
                ++j;
            }
            ++i;
        }
        i = 0;
        while (i < list2.size()) {
            WordPairStat curPair = list2.get(i);
            double[] dArray = arrWordStat[curPair.getFirstWord()];
            dArray[0] = dArray[0] + (double)curPair.getTotalFrequency();
            double[] dArray2 = arrWordStat[curPair.getFirstWord()];
            dArray2[1] = dArray2[1] + (double)(curPair.getTotalFrequency() * curPair.getTotalFrequency());
            double[] dArray3 = arrWordStat[curPair.getFirstWord()];
            dArray3[2] = dArray3[2] + 1.0;
            double[] dArray4 = arrWordStat[curPair.getSecondWord()];
            dArray4[0] = dArray4[0] + (double)curPair.getTotalFrequency();
            double[] dArray5 = arrWordStat[curPair.getSecondWord()];
            dArray5[1] = dArray5[1] + (double)(curPair.getTotalFrequency() * curPair.getTotalFrequency());
            double[] dArray6 = arrWordStat[curPair.getSecondWord()];
            dArray6[2] = dArray6[2] + 1.0;
            ++i;
        }
        i = 0;
        while (i < wordNum) {
            if (arrWordStat[i][2] > 0.0) {
                arrWordStat[i][0] = arrWordStat[i][0] / arrWordStat[i][2];
                arrWordStat[i][1] = Math.sqrt(arrWordStat[i][1] / arrWordStat[i][2] - Math.pow(arrWordStat[i][0], 2.0));
            }
            ++i;
        }
        return arrWordStat;
    }

    private WordPairStat[] filterWordPair(double[][] arrWordStat, WordPairStatList list2) {
        ArrayList<WordPairStat> selectedList = new ArrayList<WordPairStat>();
        int i = 0;
        while (i < list2.size()) {
            WordPairStat filteredPair;
            WordPairStat curPair;
            double strength;
            if (i % 10000 == 0) {
                System.out.println(new Date().toString() + " processed: " + i);
            }
            if ((strength = arrWordStat[(curPair = list2.get(i)).getFirstWord()][1] == 0.0 ? 0.0 : ((double)curPair.getTotalFrequency() - arrWordStat[curPair.getFirstWord()][0]) / arrWordStat[curPair.getFirstWord()][1]) < this.minStrength) {
                strength = arrWordStat[curPair.getSecondWord()][1] == 0.0 ? 0.0 : ((double)curPair.getTotalFrequency() - arrWordStat[curPair.getSecondWord()][0]) / arrWordStat[curPair.getSecondWord()][1];
            }
            if (strength >= this.minStrength && (filteredPair = this.filterWordPair(curPair)) != null) {
                selectedList.add(filteredPair);
            }
            ++i;
        }
        WordPairStat[] arrSelected = new WordPairStat[selectedList.size()];
        i = 0;
        while (i < arrSelected.length) {
            arrSelected[i] = (WordPairStat)selectedList.get(i);
            ++i;
        }
        return arrSelected;
    }

    private WordPairStat filterWordPair(WordPairStat pair2) {
        int freq;
        double sum2 = 0.0;
        double squareSum = 0.0;
        int i = 1;
        while (i <= this.maxSpan) {
            freq = pair2.getFrequency(i);
            sum2 += (double)freq;
            squareSum += (double)(freq * freq);
            freq = pair2.getFrequency(-i);
            sum2 += (double)freq;
            squareSum += (double)(freq * freq);
            ++i;
        }
        double mean = sum2 / 2.0 / (double)this.maxSpan;
        double spread = squareSum / 2.0 / (double)this.maxSpan - mean * mean;
        if (spread < this.minSpread) {
            return null;
        }
        boolean found = false;
        spread = Math.sqrt(spread);
        i = 1;
        while (i <= this.maxSpan) {
            freq = pair2.getFrequency(i);
            if (((double)freq - mean) / spread >= this.minZScore) {
                found = true;
            } else {
                pair2.addFrequency(i, -freq);
            }
            freq = pair2.getFrequency(-i);
            if (((double)freq - mean) / spread >= this.minZScore) {
                found = true;
            } else {
                pair2.addFrequency(-i, -freq);
            }
            ++i;
        }
        if (found) {
            return pair2;
        }
        return null;
    }

    private int readWordNum() {
        try {
            BufferedReader br = FileUtil.getTextReader(this.workDir + "/wordkey.list");
            int num2 = Integer.parseInt(br.readLine());
            br.close();
            return num2;
        }
        catch (Exception e) {
            e.printStackTrace();
            return 0;
        }
    }
}

