package scj.input.datagenerator;

import java.util.ArrayList;
import java.util.Collections;
import java.util.LinkedHashSet;
import java.util.Set;
import java.util.TreeSet;

import org.apache.commons.math3.distribution.AbstractIntegerDistribution;
import org.apache.commons.math3.distribution.PoissonDistribution;
import org.apache.commons.math3.distribution.UniformIntegerDistribution;
import org.apache.commons.math3.distribution.ZipfDistribution;
import org.apache.commons.math3.random.JDKRandomGenerator;
import org.apache.commons.math3.random.RandomGenerator;

import scj.input.DataTuple;

/**
 * 
 * Die Daten werden anhand verschiedener mathematischer Verteilungen generiert: Normalverteilung, Zipf-Verteilung und Gleichverteilung.
 * 
 * @author akunkel
 *
 */
public class DataGenerator {

    public enum Distribution {
        Uniform, Zipf, Poisson, Constant
    }
    
    public enum LogLevel {
        LOG, SILENT
    }
    protected LogLevel logLevel = LogLevel.LOG;

	private static final int tupleIDStart = 0;

	private RandomGenerator r;

    public DataGenerator() {
		r = new JDKRandomGenerator();
    }
    
    
    public static void main(String[] args) {
    	
    	DataGenerator gen = new DataGenerator();
    	gen.setLogLevel(LogLevel.SILENT);
    	DataOutput output = new DataOutput();
    	
    	int tupleNumber = Integer.valueOf(args[0]);
		int setSizeMax = Integer.valueOf(args[1]);
		Distribution setSizeDistribution = distributionFromString(args[2]);
		int setElemMax = Integer.valueOf(args[3]);
		Distribution setElemDistribution = distributionFromString(args[4]);
		
    	Set<DataTuple> sets = gen.getDataTuples(tupleNumber, setSizeDistribution, setSizeMax, setElemDistribution, setElemMax);
    	output.output(sets);
    }


	private static Distribution distributionFromString(String string) {
		if(string.equals("zipf")) {
			return Distribution.Zipf;
		}
		if(string.equals("poisson")) {
			return Distribution.Poisson;
		}
		if(string.equals("uniform")) {
			return Distribution.Uniform;
		}
		return null;
	}


	private void setLogLevel(LogLevel logLevel) {
		this.logLevel = logLevel;
	}

	public Set<DataTuple> getDataTuples(
			int tupleNumber, 
			Distribution setSizeDistribution,
            int setSizeMax,
            Distribution setElemDistribution,
            int setElemMax) {
		
		Set<DataTuple> tupleSet = fill(tupleNumber, setSizeDistribution, setSizeMax, setElemDistribution, setElemMax);
		return tupleSet;
	}
	
	protected Set<DataTuple>  fill(
			int tupleNumber,
            Distribution setSizeDistribution,
            int setSizeMax,
            Distribution setElemDistribution,
            int setElemMax) {

        AbstractIntegerDistribution setcardGen;
        switch (setSizeDistribution) {
            case Uniform:
                setcardGen = new UniformIntegerDistribution(r, 1, setSizeMax);//uniform counts from 1
                break;
            case Zipf:
                setcardGen = new ZipfDistribution(r, setSizeMax,1);//zipf counts from 1
                break;
            case Poisson:
                setcardGen = new PoissonDistribution(r,setSizeMax,
                        PoissonDistribution.DEFAULT_EPSILON,PoissonDistribution.DEFAULT_MAX_ITERATIONS);
                break;
            case Constant:
                setcardGen = null;
                break;
            default:
                setcardGen = new UniformIntegerDistribution(r, 1, setSizeMax);
        }

        AbstractIntegerDistribution setelemGen;
        switch (setElemDistribution) {
            case Uniform:
                setelemGen = new UniformIntegerDistribution(r, 1, setElemMax);
                break;
            case Zipf:
                setelemGen = new ZipfDistribution(r, setElemMax,1);
                break;
            case Poisson:
                setelemGen = new PoissonDistribution(r,setElemMax,
                        PoissonDistribution.DEFAULT_EPSILON,PoissonDistribution.DEFAULT_MAX_ITERATIONS);
                break;

            default:
                setelemGen = new UniformIntegerDistribution(r, 1, setElemMax);
        }


        ArrayList<DataTuple> result = new ArrayList<DataTuple>(tupleNumber);
        DataTuple tuple;
        int setSize;
        int index;
        int[] setValues;
        int step = tupleNumber/100;
        
        TreeSet<Integer> generated = new TreeSet<Integer>();
        try{
            for(int i = tupleIDStart; i < tupleNumber + tupleIDStart; i++) {

                if(logLevel != LogLevel.SILENT && i % step == 0) {
                    System.err.print("\t"+i/step + "%");
                }

                if(setSizeDistribution == Distribution.Constant) {
                    setSize = setSizeMax;
                }else {
                    setSize = setcardGen.sample();
                }
                if(setSizeDistribution == Distribution.Poisson) {
                    setSize ++;//make sure there is no zero set size
                }
                
                //now we do the set generation
                generated.clear();
                while (generated.size() < setSize) {
                    generated.add(setelemGen.sample());
                }
                generated.toArray();

                setValues = new int[setSize];
                index = 0;
                for(Integer gen:generated) {
                    setValues[index] = gen;
                    index++;
                }
                //set generation done
                tuple = new DataTuple(i, setValues);
                result.add(tuple);
            }
        }catch (Exception e) {
            System.err.print(e);
        }
        Collections.shuffle(result);

        if(logLevel != LogLevel.SILENT) {
        	System.out.println("## Relation size:" + tupleNumber+ ", set card. distribution:" + setSizeDistribution +
                ", set card. max:" + setSizeMax + ", set elem. distribution:" + setElemDistribution +
                ", set elem. max:" + setElemMax);
        }
        return new LinkedHashSet<DataTuple>(result);
	}
}
