(ns e2-67-to-72 (:use clojure.test)) | |

(defn third [x] (second (rest x))) | |

(defn fourth [x] (second (rest (rest x)))) | |

(defn make-leaf [s weight] ['leaf s weight]) | |

(defn leaf? [o] (= (first o) 'leaf)) | |

(defn symbol-leaf [x] (second x)) | |

(defn weight-leaf [x] (third x)) | |

(defn left-branch [t] (first t)) | |

(defn right-branch [t] (second t)) | |

(defn symbols [t] (if (leaf? t) [(symbol-leaf t)] (third t))) | |

(defn weight [t] (if (leaf? t) (weight-leaf t) (fourth t))) | |

(defn make-code-tree [l r] [l r (concat (symbols l) (symbols r)) (+ (weight l) (weight r))]) | |

(defn choose-branch [bit b] (cond (= bit 0) (left-branch b) (= bit 1) (right-branch b) :else nil)) | |

(defn decode [bits t] (letfn [(decode-1 [bits current-branch] (if (empty? bits) [] (let [next-branch (choose-branch (first bits) current-branch)] (if (leaf? next-branch) (cons (symbol-leaf next-branch) (decode-1 (rest bits) t)) (decode-1 (rest bits) next-branch)))))] (decode-1 bits t))) | |

(def *sample-tree* (make-code-tree (make-leaf 'A 4) (make-code-tree (make-leaf 'B 2) (make-code-tree (make-leaf 'D 1) (make-leaf 'C 1))))) | |

(def *sample-message* [0 1 1 0 0 1 0 1 0 1 1 1 0]) | |

(def *sample-decoded* ['A 'D 'A 'B 'B 'C 'A]) | |

(deftest test-assumption (is (= *sample-decoded* (decode *sample-message* *sample-tree*)))) | |

Returns true if tree 't' contains symbol 's' | (defn contains-symbol [s t] ; Clojure idiom for contains? in lists/vectors? (not (nil? (some #{s} (symbols t))))) |

(defn encode-symbol [s t] (if (contains-symbol s t) (if (not (leaf? t)) (if (contains-symbol s (left-branch t)) (cons 0 (encode-symbol s (left-branch t))) (cons 1 (encode-symbol s (right-branch t)))) []) (assert false))) | |

(defn encode [msg t] (if (empty? msg) [] (concat (encode-symbol (first msg) t) (encode (rest msg) t)))) | |

(deftest test-encode-decode (is (= (encode *sample-decoded* *sample-tree*) *sample-message*))) | |

Inserts an element 'x' into the set 'xs' and keeps the set ordered by weight | (defn adjoin-set [x xs] (cond (empty? xs) [x] (< (weight x) (weight (first xs))) (cons x xs) :else (cons (first xs) (adjoin-set x (rest xs))))) |

Creates a set of leaves out of (Symbol, Weight) pairs | (defn make-leaf-set [pairs] (if (empty? pairs) [] (let [pair (first pairs)] (adjoin-set (make-leaf (first pair) (second pair)) (make-leaf-set (rest pairs)))))) |

Creates a Huffman tree out of a set of leaves (containing a symbol to be encoded and its weight) | (defn successive-merge [xs] (cond (empty? xs) [] (= (count xs) 1) (first xs) :else (successive-merge (adjoin-set (make-code-tree (first xs) (second xs)) (rest (rest xs)))))) |

(defn generate-huffman-tree [pairs] (successive-merge (make-leaf-set pairs))) | |

(deftest test-huffman-tree ; My function generates a tree symmetric to the one depicted in the book ; (fig. 2.18) as it reverses the set of leaves. (is (= (make-code-tree (make-leaf 'A 8) (make-code-tree (make-code-tree (make-code-tree (make-leaf 'H 1) (make-leaf 'G 1)) (make-code-tree (make-leaf 'F 1) (make-leaf 'E 1))) (make-code-tree (make-code-tree (make-leaf 'D 1) (make-leaf 'C 1)) (make-leaf 'B 3)))) (generate-huffman-tree [['A 8] ['B 3] ['C 1] ['D 1] ['E 1] ['F 1] ['G 1] ['H 1]])))) | |

(def *rock-70-tree* (generate-huffman-tree [['A 2] ['BOOM 1] ['GET 2] ['JOB 2] ['NA 16] ['SHA 3] ['YIP 9] ['WAH 1]])) | |

(def *rock-70-message* "Get a job Sha na na na na na na na na Get a job Sha na na na na na na na na Wah yip yip yip yip yip yip yip yip yip Sha boom") | |

- uppercase it (as our symbols in the huffman tree are uppercased)
- turn it into code (split on spaces and quote)
| |

(defn as-symbol [s] (symbol (.toUpperCase (.replace s "\n" )))) | |

(defn message-to-code [msg] (reverse (reduce #(cons (as-symbol %2) %) [] (filter #(seq %) (seq (.split msg " ")))))) | |

(deftest test-70-rock (is (= (count (encode (message-to-code *rock-70-message*) *rock-70-tree*)) 84))) | |

(defn length [msg] (count (message-to-code msg))) | |

(deftest test-num-of-bits-fixedsize (is (= (* 3 (length *rock-70-message*)) 108))) | |

{A, B, C, D, E} / \ {A, B, C, D} {E} / \ {A, B, C} {D} / \ {A, B} {C} / \ {A} {B} Even from this one sample we can predict that all of the trees generated for the given alphabet and weights will be of the same structure - one branch of each subtree will be at most of length 1 and the other branch will go down until it reaches the elements with weights 1 and 2. To encode the most common symbol we will always need just one bit as it will always be the first symbol on the left/right (dependening on the implementation of the huffman tree generator) To encode the most uncommon symbol we'll need 'n-1' bits as the most uncommon symbol will be at most n-1 steps away from the root of the tree (it will be the n-th symbol in the alphabet, so we would need 'n' bits if we stored the [symbol, weight] pairs in a list instead of a tree. As we're storing those pairs in a binary tree, the second most uncommon symbol will also require 'n-1' bits as it will be on the same level as the first most uncommon symbol). | |