Humboldt-Universität zu Berlin - Mathematisch-Naturwissenschaftliche Fakultät - Wissensmanagement in der Bioinformatik

Huffman.java

text/x-java Huffman.java — 6.9 KB

Dateiinhalt

import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.HashMap;
import java.util.Map;
import java.util.PriorityQueue;

public class Huffman {

  // Root of the binary tree used for encoding
  public Node root;

  // HashMap: contains the Huffman code of every character.
  // Key: Character (a character of the input string).
  // Value: String (the Huffman code of the key).
  protected Map<Character, String> code;

  public Huffman() {
    this.root = null;
  }

  // Node of the binary tree
  public class Node {
    // Left and right children of the node
    public Node left, right;

    // If the node is a leaf, it should contain a character,
    // otherwise, this is null
    public Character c;

    // Height of the subtree having this node as root.
    public int h;

    // Default constructor
    public Node() {
      this(null);
    }

    // Creates a leaf node containing the character c
    public Node(Character c) {
      this(c, null, null, 0);
    }

    // Creates a leaf node containing the character c with left and right
    // children
    public Node(Character c, Node left, Node right, int height) {
      this.left = left;
      this.right = right;
      this.c = c;
      this.h = height;
    }

    // If the node is a leaf, it should contain a character,
    // otherwise, this is null
    public boolean isLeaf() {
      return (this.c != null);
    }
  }

  // Represents an entry of the priority queue.
  // Consists of a key (integer) used to sort the entries,
  // and a value (a Node).
  protected static class Entry implements Comparable<Entry> {
    public Integer key;
    public Node node;

    public Entry(Integer k, Node v) {
      this.key = k;
      this.node = v;
    }

    // Used to sort the entries by their key (ascending order)
    @Override
    public int compareTo(Entry other) {
      // in case of a tie, the node with 
      // the smaller subtree is used
      if (this.key == other.key) { 
        return Integer.compare(this.node.h, other.node.h);
      }
      return Integer.compare(this.key, other.key);
    }
  }

  private void createTree(PriorityQueue<Entry> pq) {
    // Create the binary tree used for the encoding.
    // 1: extract two nodes (e1, e2) from the priority queue,
    // 2: create a new internal node e3 with a null character; its left and
    // children are e1 and e2; the priority of the new internal node is
    // the sum of the priority e1 and e2.
    // 3: push e3 into the priority queue
    // 4: repeat until the priority queue contains a single node
    // (i.e., the root).

    /*
     * TODO FILL IN YOUR CODE HERE
     */
  }

  private void createCode(Node node, String prefix) {
    // DFS traversal of the binary tree from the root;
    // every time a leaf is encountered, store the character
    // and the corresponding code in the 'code' map.

    /*
     * TODO FILL IN YOUR CODE HERE
     */
  }

  public String decode(String input) {
    // Decode the input string using the codes previously computed.
    // The input string should be a sequence of '0' and '1'.
    // Parse the input string character by character, and explore the
    // tree starting from the root until a leaf is found.
    // Continue until all the input has been decoded.

    /*
     * TODO FILL IN YOUR CODE HERE
     */
    return null;
  }

  public String encode(String input) {
    // Encodes the input string into a sequence of '0' and '1'
    // using the Huffman coding.
    // 1: count the frequency of every character of the input string
    // (already done for you)
    // 2: create a priority queue with the least frequent character on top
    // (already done for you)
    // 3: use the priority queue to create the binary tree used to encode
    // the input string (to be done for you)
    // 4: use the binary tree to encode the input string (to be implemented by
    // createTree and createCode)
    Map<Character, Integer> map = new HashMap<Character, Integer>();
    this.code = new HashMap<Character, String>();

    for (int i = 0; i < input.length(); ++i) {
      Character c = input.charAt(i);
      Integer freq = map.get(c);
      if (freq != null) {
        ++freq;
      } else {
        freq = 1;
      }
      map.put(c, freq);
    }

    PriorityQueue<Entry> pq = new PriorityQueue<>();
    for (Map.Entry<Character, Integer> c : map.entrySet()) {
      pq.add(new Entry(c.getValue(), new Node(c.getKey())));
    }

    createTree(pq);
    createCode(this.root, "");

    StringBuffer output = new StringBuffer();
    for (int i = 0; i < input.length(); ++i) {
      output.append(this.code.get(input.charAt(i)));
    }

    return output.toString();
  }

  private static boolean checkEncoded(String s, int l) {
    System.out.println("The encoded string has " + s.length() + " bits");

    if (s.length() != l) {
      System.err.println(
          "Error: the encoded string should have length " + l + " but it has length " + s.length());
      return false;
    }

    for (int i = 0; i < s.length(); ++i) {
      if (s.charAt(i) != '0' && s.charAt(i) != '1') {
        System.err.println("Error: output string must only contain '0' or '1', " + "character "
            + s.charAt(i) + " found at position " + Integer.toString(i));
        return false;
      }
    }

    return true;
  }

  private static boolean checkDecoded(String input, String decoded) {
    System.out.println("The decoded string has length " + decoded.length() + " equals "
        + (decoded.length() * 8) + " Bits");

    if (decoded.length() != input.length()) {
      System.err.println("Error: decoded string has length " + decoded.length()
          + " but the input string has size " + input.length());
      return false;
    }

    int i = 0;
    while (i < input.length()) {
      if (input.charAt(i) != decoded.charAt(i)) {
        System.err.print("Error at character " + i + ": it has been decoded to " + decoded.charAt(i)
            + " instead of " + input.charAt(i));
        return false;
      }
      ++i;
    }
    return i == input.length();
  }

  public static void main(String[] args) {
    Huffman enc = new Huffman();
    String[] paths = { "lorem1.txt", "lorem2.txt", "lorem3.txt" };
    int[] encLengths = { 1260, 10632, 8894 };

    for (int i = 0; i < paths.length; ++i) {
      String input = new String();
      String path = paths[i];
      try {
        input = (Files.readAllLines(Paths.get(path), StandardCharsets.UTF_8)).get(0);
      } catch (IOException e) {
        System.err.println("Error in parsing file: " + path);
        continue;
      }

      System.out.println("Testing file " + path);

      // Testing encoding
      String encoded = enc.encode(input);
      if (!checkEncoded(encoded, encLengths[i])) {
        continue;
      }

      String decoded = enc.decode(encoded);

      // Testing decoding
      if (checkDecoded(input, decoded)) {
        System.out.println("File " + path + " decoded successfully!");
      }
    }
  }
}