Humboldt-Universität zu Berlin - Mathematisch-Naturwissenschaftliche Fakultät - Wissensmanagement in der Bioinformatik

Huffman.java

text/x-java Huffman.java — 7.1 KB

Dateiinhalt

import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.HashMap;
import java.util.Map;
import java.util.PriorityQueue;


public class Huffman {

	// Root of the binary tree used for encoding
	public Node root;

	// HashMap: contains the Huffman code of every character.
	// Key: Character (a character of the input string).
	// Value: String (the Huffman code of the key).
	protected Map<Character, String> code;

	public Huffman() {
		this.root = null;
	}

	// Node of the binary tree
	public class Node {
		// Left and right children of the node
		public Node left, right;

		// If the node is a leaf, it should contain a character,
		// otherwise, this is null
		public Character c;

		// Height of the subtree having this node as root.
		public int h;

		// Default constructor
		public Node() {
			this(null);
		}

		// Creates a leaf node containing the character c
		public Node(Character c) {
			this(c, null, null, 0);
		}

		// Creates a leaf node containing the character c with left and right
		// children
		public Node(Character c, Node left, Node right, int height) {
			this.left = left;
			this.right = right;
			this.c = c;
			this.h = height;
		}

		// If the node is a leaf, it should contain a character,
		// otherwise, this is null
		public boolean isLeaf() {
			return (this.c != null);
		}
	}

	// Represents an entry of the priority queue.
	// Consists of a key (integer) used to sort the entries,
	// and a value (a Node).
	protected static class Entry implements Comparable<Entry> {
		public Integer key;
		public Node node;

		public Entry(Integer k, Node v) {
			this.key = k;
			this.node = v;
		}

		// Used to sort the entries by their key (ascending order)
		@Override
		public int compareTo(Entry other) {
			// in case of a tie, the node with
			// the smaller subtree is used
			if (this.key == other.key) {
				return Integer.compare(this.node.h, other.node.h);
			}
			return Integer.compare(this.key, other.key);
		}
	}

	private void createTree(PriorityQueue<Entry> pq) {
		// Creates the binary tree used for the encoding.
		// 1: extract two nodes (e1, e2) from the priority queue,
		// 2: create a new internal node e3 with a null character; its left and
		// children are e1 and e2; the priority of the new internal node is
		// the sum of the priority e1 and e2.
		// 3: push e3 into the priority queue
		// 4: repeat until the priority queue contains a single node
		// (i.e., the root).

		/*
		 * TODO FILL IN YOUR CODE HERE
		 */
	}

	private void createCode(Node node, String prefix) {
		// DFS traversal of the binary tree from the root;
		// every time a leaf is encountered, store the character
		// and the corresponding code in the 'code' map.

		/*
		 * TODO FILL IN YOUR CODE HERE
		 */
	}

	public String decode(String input) {
		// Decode the input string using the codes previously computed.
		// The input string should be a sequence of '0' and '1'.
		// Parse the input string character by character, and explore the
		// tree starting from the root until a leaf is found.
		// Continue until all the input has been decoded.

		/*
		 * TODO FILL IN YOUR CODE HERE
		 */
		return null;
	}

	public void prepare(String input) {
		// Prepares the Huffman coding using the given input string.
		// 1: count the frequency of every character of the input string
		//    (already done for you)
		// 2: create a priority queue with the least frequent character on top
		//    (already done for you)
		// 3: use the priority queue to create the binary tree using the createTree method
		//    (to be implemented by you)
		// 4: use the binary tree to encode the characters based
		//    (to be implemented by you)
		Map<Character, Integer> map = new HashMap<Character, Integer>();
		this.code = new HashMap<Character, String>();

		for (int i = 0; i < input.length(); ++i) {
			Character c = input.charAt(i);
			Integer freq = map.get(c);
			if (freq != null) {
				++freq;
			} else {
				freq = 1;
			}
			map.put(c, freq);
		}

		PriorityQueue<Entry> pq = new PriorityQueue<>();
		for (Map.Entry<Character, Integer> c : map.entrySet()) {
			pq.add(new Entry(c.getValue(), new Node(c.getKey())));
		}

		createTree(pq);
		createCode(this.root, "");
	}

	public String encode(String input) {
		// Encodes the input string into a sequence of '0' and '1'
		// using the Huffman coding previously computed
		if (this.code == null) {
			throw new Error("Huffman encoder is uninitialized");
		}

		StringBuffer output = new StringBuffer();
		for (int i = 0; i < input.length(); ++i) {
			char currentChar = input.charAt(i);
			if (!this.code.containsKey(currentChar)) {
				throw new Error("Input char " + currentChar + " wasn't part of the pre-processing");
			}

			output.append(this.code.get(currentChar));
		}

		return output.toString();
	}

	// Print the content of the code map
	public void printMap() {
		for (Map.Entry<Character, String> entry : this.code.entrySet()) {
			System.out.println(entry.getKey() + ": " + entry.getValue());
		}
	}

	public static boolean checkEncoded(String s, int l) {
		System.out.println("The encoded string has " + s.length() + " bits");

		if (s.length() != l) {
			System.err
					.println("Error: the encoded string should have length " + l + " but it has length " + s.length());
			return false;
		}

		for (int i = 0; i < s.length(); ++i) {
			if (s.charAt(i) != '0' && s.charAt(i) != '1') {
				System.err.println("Error: output string must only contain '0' or '1', " + "character " + s.charAt(i)
						+ " found at position " + Integer.toString(i));
				return false;
			}
		}

		return true;
	}

	public static boolean checkDecoded(String input, String decoded) {
		System.out.println(
				"The decoded string has length " + decoded.length() + " equals " + (decoded.length() * 8) + " Bits");

		if (decoded.length() != input.length()) {
			System.err.println("Error: decoded string has length " + decoded.length()
					+ " but the input string has size " + input.length());
			return false;
		}

		int i = 0;
		while (i < input.length()) {
			if (input.charAt(i) != decoded.charAt(i)) {
				System.err.print("Error at character " + i + ": it has been decoded to " + decoded.charAt(i)
						+ " instead of " + input.charAt(i));
				return false;
			}
			++i;
		}
		return i == input.length();
	}

	public static void main(String[] args) {
		HuffmanSolution encoder = new HuffmanSolution();
		
		String[] paths = { "lorem1.txt", "lorem2.txt", "lorem3.txt" };
		int[] encLengths = { 1260, 10632, 8894 };

		for (int i = 0; i < paths.length; ++i) {
			String input = new String();
			String path = paths[i];
			try {
				input = (Files.readAllLines(Paths.get(path), StandardCharsets.UTF_8)).get(0);
			} catch (IOException e) {
				System.err.println("Error in parsing file: " + path);
				continue;
			}

			System.out.println("Testing file " + path);

			// Prepare encoder
			encoder.prepare(input);

			// Testing encoding
			String encoded = encoder.encode(input);
			if (!checkEncoded(encoded, encLengths[i])) {
				continue;
			}

			String decoded = encoder.decode(encoded);

			// Testing decoding
			if (checkDecoded(input, decoded)) {
				System.out.println("File " + path + " decoded successfully!");
			}

		}
	}
}