/* CREATE TABLE dist_nodes ( */
/* 	go_id varchar(10) PRIMARY KEY, */
/* 	nodes varchar(10)[] */
/* ); */

/* CREATE TABLE assignment ( */
/* 	go_id varchar(10) PRIMARY KEY, */
/* 	genes integer NOT NULL */
/* ); */

/* TRUNCATE TABLE assignment; */
/* TRUNCATE TABLE dist_nodes; */

CREATE OR REPLACE FUNCTION get_nodes(go_term varchar(10)) RETURNS TABLE (node varchar(10)) AS $$
BEGIN
	RETURN QUERY
		SELECT unnest(nodes) FROM dist_nodes WHERE go_id = go_term;

	IF NOT FOUND THEN
		RETURN QUERY 
			SELECT go_term
			UNION
			SELECT get_nodes(go_id) FROM go_is_a WHERE is_a = go_term;
	END IF;
END
$$ LANGUAGE plpgsql;


CREATE OR REPLACE FUNCTION fill_nodes() RETURNS int AS $$
DECLARE
	parent varchar(10);
	nodes varchar(10)[];
BEGIN
	FOR parent IN SELECT id FROM go LOOP
		nodes := array(
			SELECT parent
			UNION
			SELECT get_nodes(go_id) FROM go_is_a WHERE is_a = parent
		);
		EXECUTE 'INSERT INTO dist_nodes VALUES ($1, $2)' USING parent, nodes;
	END LOOP;
	RETURN 1;
END
$$ LANGUAGE plpgsql;


CREATE OR REPLACE FUNCTION fill_assignment() RETURNS integer AS $$
DECLARE
	i int := 0;
	term varchar(10);
	nodes varchar(10)[];
BEGIN
	FOR term, nodes IN SELECT * FROM dist_nodes LOOP
		INSERT INTO assignment
		SELECT term, count(DISTINCT gene_id)
		FROM genes_go
		WHERE go_id = ANY (nodes);

		i := i + 1;
		IF mod(i, 1000) = 0 THEN
			RAISE NOTICE 'processed % terms', i;
		END IF;
	END LOOP;
	RETURN 1;
END
$$ LANGUAGE plpgsql;


CREATE OR REPLACE FUNCTION fill_nodes_assignment() RETURNS integer AS $$
BEGIN
	PERFORM fill_nodes();
	PERFORM fill_assignment();
	RETURN 1;
END
$$ LANGUAGE plpgsql;
