/*
 * Decompiled with CFR 0.152.
 */
package cc.mallet.types;

import cc.mallet.types.Alphabet;
import cc.mallet.types.AugmentableFeatureVector;
import cc.mallet.types.FeatureConjunction;
import cc.mallet.types.FeatureSelection;
import cc.mallet.types.FeatureVector;
import cc.mallet.types.FeatureVectorSequence;
import cc.mallet.types.Instance;
import cc.mallet.types.InstanceList;
import cc.mallet.types.RankedFeatureVector;
import cc.mallet.util.MalletLogger;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.Serializable;
import java.util.logging.Logger;

public class FeatureInducer
implements Serializable {
    private static Logger logger = MalletLogger.getLogger(FeatureInducer.class.getName());
    static boolean addMaskedFeatures = false;
    static int minTrainingListSize = 20;
    RankedFeatureVector.Factory ranker;
    RankedFeatureVector.PerLabelFactory perLabelRanker;
    int beam1 = 300;
    int beam2 = 1000;
    FeatureConjunction.List fcl = new FeatureConjunction.List();
    private static final long serialVersionUID = 1L;
    private static final int CURRENT_SERIAL_VERSION = 0;

    public FeatureInducer(RankedFeatureVector.Factory ranker, InstanceList ilist, int numNewFeatures, int beam1, int beam2) {
        this.beam1 = beam1;
        this.beam2 = beam2;
        if (ilist.size() < minTrainingListSize) {
            logger.info("FeatureInducer not inducing from less than " + minTrainingListSize + " features.");
            return;
        }
        Alphabet tmpDV = (Alphabet)ilist.getDataAlphabet().clone();
        FeatureSelection featuresSelected = ilist.getFeatureSelection();
        InstanceList tmpilist = new InstanceList(tmpDV, ilist.getTargetAlphabet());
        RankedFeatureVector gg = ranker.newRankedFeatureVector(ilist);
        logger.info("Rank values before this round of conjunction-building");
        int n = Math.min(200, gg.numLocations());
        for (int i = 0; i < n; ++i) {
            logger.info("Rank=" + i + ' ' + Double.toString(gg.getValueAtRank(i)) + ' ' + gg.getObjectAtRank(i).toString());
        }
        FeatureSelection fsMin = new FeatureSelection(tmpDV);
        FeatureSelection fsMax = new FeatureSelection(tmpDV);
        int minBeam = Math.min(beam1, beam2);
        int maxBeam = Math.max(beam1, beam2);
        logger.info("Using minBeam=" + minBeam + " maxBeam=" + maxBeam);
        int max2 = maxBeam < gg.numLocations() ? maxBeam : gg.numLocations();
        for (int b = 0; b < max2 && gg.getValueAtRank(b) != 0.0; ++b) {
            int index = gg.getIndexAtRank(b);
            fsMax.add(index);
            if (b >= minBeam) continue;
            fsMin.add(index);
        }
        double minGain = 0.0;
        gg = null;
        for (int i = 0; i < ilist.size(); ++i) {
            Instance inst = (Instance)ilist.get(i);
            FeatureVector fv = (FeatureVector)inst.getData();
            tmpilist.add(new Instance(new FeatureVector(fv, tmpDV, fsMin, fsMax), inst.getTarget(), inst.getName(), inst.getSource()), ilist.getInstanceWeight(i));
        }
        logger.info("Calculating gradient gain of conjunctions, vocab size = " + tmpDV.size());
        RankedFeatureVector gg2 = ranker.newRankedFeatureVector(tmpilist);
        for (int i = 0; i < 200 && i < gg2.numLocations(); ++i) {
            logger.info("Conjunction Rank=" + i + ' ' + Double.toString(gg2.getValueAtRank(i)) + ' ' + gg2.getObjectAtRank(i).toString());
        }
        int numFeaturesAdded = 0;
        Alphabet origV = ilist.getDataAlphabet();
        int origVSize = origV.size();
        block4: for (int i = 0; i < gg2.numLocations(); ++i) {
            int index;
            double gain = gg2.getValueAtRank(i);
            if (gain < minGain) {
                logger.info("Stopping feature induction: gain[" + i + "]=" + gain + ", minGain=" + minGain);
                break;
            }
            if (gg2.getIndexAtRank(i) >= origVSize) {
                String s = (String)gg2.getObjectAtRank(i);
                int[] featureIndices = FeatureConjunction.getFeatureIndices(origV, s);
                if (FeatureConjunction.isValidConjunction(featureIndices) && (i == 0 || gg2.getValueAtRank(i - 1) != gg2.getValueAtRank(i))) {
                    double newFeatureValue = gg2.getValueAtRank(i);
                    for (int j = 0; j < featureIndices.length; ++j) {
                        if (gg2.value(featureIndices[j]) >= newFeatureValue) continue block4;
                    }
                    this.fcl.add(new FeatureConjunction(origV, featureIndices));
                    int index2 = origV.size() - 1;
                    logger.info("Added feature c " + numFeaturesAdded + " " + newFeatureValue + ' ' + s);
                    ++numFeaturesAdded;
                }
            } else if (!(featuresSelected == null || featuresSelected.contains(index = gg2.getIndexAtRank(i)) || i != 0 && gg2.getValueAtRank(i - 1) == gg2.getValueAtRank(i))) {
                this.fcl.add(new FeatureConjunction(origV, new int[]{index}));
                logger.info("Added feature a " + numFeaturesAdded + " " + gg2.getValueAtRank(i) + ' ' + gg2.getObjectAtRank(i));
                ++numFeaturesAdded;
            }
            if (numFeaturesAdded < numNewFeatures) continue;
            logger.info("Stopping feature induction: numFeaturesAdded=" + numFeaturesAdded);
            break;
        }
        logger.info("Finished adding features");
    }

    public FeatureInducer(RankedFeatureVector.Factory ranker, InstanceList ilist, int numNewFeatures) {
        this(ranker, ilist, numNewFeatures, numNewFeatures, numNewFeatures);
    }

    public void induceFeaturesFor(InstanceList ilist, boolean withFeatureShrinkage, boolean addPerClassFeatures) {
        assert (!addPerClassFeatures);
        assert (!withFeatureShrinkage);
        FeatureSelection fs = ilist.getFeatureSelection();
        assert (ilist.getPerLabelFeatureSelection() == null);
        if (this.fcl.size() == 0) {
            return;
        }
        for (int i = 0; i < ilist.size(); ++i) {
            Instance inst = (Instance)ilist.get(i);
            Object data2 = inst.getData();
            if (data2 instanceof AugmentableFeatureVector) {
                AugmentableFeatureVector afv = (AugmentableFeatureVector)data2;
                this.fcl.addTo(afv, 1.0, fs);
                continue;
            }
            if (data2 instanceof FeatureVectorSequence) {
                FeatureVectorSequence fvs = (FeatureVectorSequence)data2;
                for (int j = 0; j < fvs.size(); ++j) {
                    this.fcl.addTo((AugmentableFeatureVector)fvs.get(j), 1.0, fs);
                }
                continue;
            }
            throw new IllegalArgumentException("Unsupported instance data type " + data2.getClass().getName());
        }
    }

    private void writeObject(ObjectOutputStream out) throws IOException {
        out.writeInt(0);
        out.writeInt(this.beam1);
        out.writeInt(this.beam2);
        out.writeObject(this.fcl);
    }

    private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException {
        int version = in.readInt();
        this.beam1 = in.readInt();
        this.beam2 = in.readInt();
        this.fcl = (FeatureConjunction.List)in.readObject();
    }
}

