package scj.analyze.statistics.source.counter;

import it.unimi.dsi.fastutil.ints.Int2IntMap;
import it.unimi.dsi.fastutil.ints.Int2IntOpenHashMap;

import java.text.DecimalFormat;
import java.util.Iterator;
import java.util.Map.Entry;

import scj.input.DataTuple;


/*
 * Als Indikator für die Verteilung der Mengenelemente auf die einzelnen Mengen kann gewählt werden: 
 * 	"Anteil der Mengenelemente an der Gesamtmenge, welche in höchstens x% der Mengen enthalten sind".
 * 
 */
public abstract class InfrequentItemCounter implements Counter {

	// Property.
	protected double[] percentages = new double[]{.1, .2, .3, .4, .5, .6, .7, .8, .9};
	
	// Simple stats.
	protected int itemCount;
	protected int setCount;
	protected Int2IntMap countMap;
	
	// Aggregated stats.
	protected double avgSetSize;
	protected Int2IntMap frequencyMap;
	protected int infrequentItemCount;
	protected double[] infrequentItemPercentage;
	
	public InfrequentItemCounter() {
		itemCount = 0;
		setCount = 0;
		countMap = new Int2IntOpenHashMap();
	}
	
	@Override
	public void count(DataTuple tuple) {
		setCount += 1;
		itemCount += tuple.getSet().length;
		
		for(int val: tuple.getSet()) {
			addToCountingMap(countMap, val);
		}
		
	}

	protected void addToCountingMap(Int2IntMap countMap, int val) {
		if(countMap.containsKey(val)) {
			countMap.put(val, 1 + countMap.get(val));
		} else {
			countMap.put(val, 1);
		}
	}

	/* (non-Javadoc)
	 * @see scj.analyze.statistics.source.Counter#aggregate()
	 */
	@Override
	public void aggregate() {
		
		// Build averages
		avgSetSize = ((double) itemCount) / setCount;
		
		// Build frequency map ("histogram")
		initFrequencyMap();
		
		for (Iterator<Entry<Integer, Integer>> iterator = countMap.entrySet().iterator(); 
				iterator.hasNext();) {
			Entry<Integer, Integer> entry = iterator.next();
			addToCountingMap(frequencyMap, entry.getValue());
		}

		int i=0;
		infrequentItemPercentage = new double[getPercentages().length];
		for(double percentage: getPercentages()) {
		
			double threshold = getThreshold(percentage);
			
			infrequentItemCount = 0;		
			for (Iterator<Entry<Integer, Integer>> iterator = frequencyMap.entrySet().iterator(); 
					iterator.hasNext();) {
				Entry<Integer, Integer> entry = iterator.next();
				
				if(entry.getKey() <= threshold) {
					infrequentItemCount += entry.getValue()*entry.getKey();
				}
			}
			
			infrequentItemPercentage[i++] = ((double) infrequentItemCount) / itemCount;
		}
	}

	abstract protected double getThreshold(double percentage);

	protected abstract double[] getPercentages();
	
	protected void initFrequencyMap() {
		frequencyMap = new Int2IntOpenHashMap();
	}

	/* (non-Javadoc)
	 * @see scj.analyze.statistics.source.Counter#output()
	 */
	@Override
	public String output() {
		StringBuilder tmp = new StringBuilder();
		DecimalFormat format = new DecimalFormat("#.##");

		int i=0;
		for(double percentage: getPercentages()) {
			tmp.append("\t "+format.format(percentage)+" ");
		}
		tmp.append("\n");
		
		i=0;
		for(double percentage: getPercentages()) {
			tmp.append("\t "+format.format(infrequentItemPercentage[i++]*100)+"");
		}
		
		return tmp.toString();
	}


}
