Humboldt-Universität zu Berlin - Mathematisch-Naturwissenschaftliche Fakultät - Wissensmanagement in der Bioinformatik

Huffman.java

Huffman.java

Huffman.java — 7.1 KB

Dateiinhalt

```import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.HashMap;
import java.util.Map;
import java.util.PriorityQueue;

public class Huffman {

// Root of the binary tree used for encoding
public Node root;

// HashMap: contains the Huffman code of every character.
// Key: Character (a character of the input string).
// Value: String (the Huffman code of the key).
protected Map<Character, String> code;

public Huffman() {
this.root = null;
}

// Node of the binary tree
public class Node {
// Left and right children of the node
public Node left, right;

// If the node is a leaf, it should contain a character,
// otherwise, this is null
public Character c;

// Height of the subtree having this node as root.
public int h;

// Default constructor
public Node() {
this(null);
}

// Creates a leaf node containing the character c
public Node(Character c) {
this(c, null, null, 0);
}

// Creates a leaf node containing the character c with left and right
// children
public Node(Character c, Node left, Node right, int height) {
this.left = left;
this.right = right;
this.c = c;
this.h = height;
}

// If the node is a leaf, it should contain a character,
// otherwise, this is null
public boolean isLeaf() {
return (this.c != null);
}
}

// Represents an entry of the priority queue.
// Consists of a key (integer) used to sort the entries,
// and a value (a Node).
protected static class Entry implements Comparable<Entry> {
public Integer key;
public Node node;

public Entry(Integer k, Node v) {
this.key = k;
this.node = v;
}

// Used to sort the entries by their key (ascending order)
@Override
public int compareTo(Entry other) {
// in case of a tie, the node with
// the smaller subtree is used
if (this.key == other.key) {
return Integer.compare(this.node.h, other.node.h);
}
return Integer.compare(this.key, other.key);
}
}

private void createTree(PriorityQueue<Entry> pq) {
// Creates the binary tree used for the encoding.
// 1: extract two nodes (e1, e2) from the priority queue,
// 2: create a new internal node e3 with a null character; its left and
// children are e1 and e2; the priority of the new internal node is
// the sum of the priority e1 and e2.
// 3: push e3 into the priority queue
// 4: repeat until the priority queue contains a single node
// (i.e., the root).

/*
* TODO FILL IN YOUR CODE HERE
*/
}

private void createCode(Node node, String prefix) {
// DFS traversal of the binary tree from the root;
// every time a leaf is encountered, store the character
// and the corresponding code in the 'code' map.

/*
* TODO FILL IN YOUR CODE HERE
*/
}

public String decode(String input) {
// Decode the input string using the codes previously computed.
// The input string should be a sequence of '0' and '1'.
// Parse the input string character by character, and explore the
// tree starting from the root until a leaf is found.
// Continue until all the input has been decoded.

/*
* TODO FILL IN YOUR CODE HERE
*/
return null;
}

public void prepare(String input) {
// Prepares the Huffman coding using the given input string.
// 1: count the frequency of every character of the input string
// 2: create a priority queue with the least frequent character on top
// 3: use the priority queue to create the binary tree using the createTree method
//    (to be implemented by you)
// 4: use the binary tree to encode the characters based
//    (to be implemented by you)
Map<Character, Integer> map = new HashMap<Character, Integer>();
this.code = new HashMap<Character, String>();

for (int i = 0; i < input.length(); ++i) {
Character c = input.charAt(i);
Integer freq = map.get(c);
if (freq != null) {
++freq;
} else {
freq = 1;
}
map.put(c, freq);
}

PriorityQueue<Entry> pq = new PriorityQueue<>();
for (Map.Entry<Character, Integer> c : map.entrySet()) {
}

createTree(pq);
createCode(this.root, "");
}

public String encode(String input) {
// Encodes the input string into a sequence of '0' and '1'
// using the Huffman coding previously computed
if (this.code == null) {
throw new Error("Huffman encoder is uninitialized");
}

StringBuffer output = new StringBuffer();
for (int i = 0; i < input.length(); ++i) {
char currentChar = input.charAt(i);
if (!this.code.containsKey(currentChar)) {
throw new Error("Input char " + currentChar + " wasn't part of the pre-processing");
}

output.append(this.code.get(currentChar));
}

return output.toString();
}

// Print the content of the code map
public void printMap() {
for (Map.Entry<Character, String> entry : this.code.entrySet()) {
System.out.println(entry.getKey() + ": " + entry.getValue());
}
}

public static boolean checkEncoded(String s, int l) {
System.out.println("The encoded string has " + s.length() + " bits");

if (s.length() != l) {
System.err
.println("Error: the encoded string should have length " + l + " but it has length " + s.length());
return false;
}

for (int i = 0; i < s.length(); ++i) {
if (s.charAt(i) != '0' && s.charAt(i) != '1') {
System.err.println("Error: output string must only contain '0' or '1', " + "character " + s.charAt(i)
+ " found at position " + Integer.toString(i));
return false;
}
}

return true;
}

public static boolean checkDecoded(String input, String decoded) {
System.out.println(
"The decoded string has length " + decoded.length() + " equals " + (decoded.length() * 8) + " Bits");

if (decoded.length() != input.length()) {
System.err.println("Error: decoded string has length " + decoded.length()
+ " but the input string has size " + input.length());
return false;
}

int i = 0;
while (i < input.length()) {
if (input.charAt(i) != decoded.charAt(i)) {
System.err.print("Error at character " + i + ": it has been decoded to " + decoded.charAt(i)
+ " instead of " + input.charAt(i));
return false;
}
++i;
}
return i == input.length();
}

public static void main(String[] args) {
HuffmanSolution encoder = new HuffmanSolution();

String[] paths = { "lorem1.txt", "lorem2.txt", "lorem3.txt" };
int[] encLengths = { 1260, 10632, 8894 };

for (int i = 0; i < paths.length; ++i) {
String input = new String();
String path = paths[i];
try {
} catch (IOException e) {
System.err.println("Error in parsing file: " + path);
continue;
}

System.out.println("Testing file " + path);

// Prepare encoder
encoder.prepare(input);

// Testing encoding
String encoded = encoder.encode(input);
if (!checkEncoded(encoded, encLengths[i])) {
continue;
}

String decoded = encoder.decode(encoded);

// Testing decoding
if (checkDecoded(input, decoded)) {
System.out.println("File " + path + " decoded successfully!");
}

}
}
}
```