| |
| | (ns e2-67-to-72
(:use clojure.test))
| | (defn third [x]
(second (rest x)))
| | (defn fourth [x]
(second (rest (rest x))))
| | (defn make-leaf [s weight]
['leaf s weight])
| | (defn leaf? [o]
(= (first o) 'leaf))
| | (defn symbol-leaf [x]
(second x))
| | (defn weight-leaf [x]
(third x))
| | (defn left-branch [t]
(first t))
| | (defn right-branch [t]
(second t))
| | (defn symbols [t]
(if (leaf? t)
[(symbol-leaf t)]
(third t)))
| | (defn weight [t]
(if (leaf? t)
(weight-leaf t)
(fourth t)))
| | (defn make-code-tree [l r]
[l r
(concat (symbols l) (symbols r))
(+ (weight l) (weight r))])
| | (defn choose-branch [bit b]
(cond (= bit 0) (left-branch b)
(= bit 1) (right-branch b)
:else nil))
| | (defn decode [bits t]
(letfn [(decode-1 [bits current-branch]
(if (empty? bits) []
(let [next-branch (choose-branch (first bits) current-branch)]
(if (leaf? next-branch)
(cons (symbol-leaf next-branch) (decode-1 (rest bits) t))
(decode-1 (rest bits) next-branch)))))]
(decode-1 bits t)))
| | (def *sample-tree*
(make-code-tree (make-leaf 'A 4)
(make-code-tree
(make-leaf 'B 2)
(make-code-tree (make-leaf 'D 1)
(make-leaf 'C 1)))))
| | (def *sample-message* [0 1 1 0 0 1 0 1 0 1 1 1 0])
| | (def *sample-decoded* ['A 'D 'A 'B 'B 'C 'A])
| | (deftest test-assumption
(is (= *sample-decoded* (decode *sample-message* *sample-tree*))))
Returns true if tree 't' contains symbol 's'
| | (defn contains-symbol
[s t]
; Clojure idiom for contains? in lists/vectors?
(not (nil? (some #{s} (symbols t)))))
| | (defn encode-symbol [s t]
(if (contains-symbol s t)
(if (not (leaf? t))
(if (contains-symbol s (left-branch t))
(cons 0 (encode-symbol s (left-branch t)))
(cons 1 (encode-symbol s (right-branch t))))
[])
(assert false)))
| | (defn encode [msg t]
(if (empty? msg) []
(concat (encode-symbol (first msg) t)
(encode (rest msg) t))))
| | (deftest test-encode-decode
(is (= (encode *sample-decoded* *sample-tree*) *sample-message*)))
Inserts an element 'x' into the set 'xs' and keeps the set ordered by weight
| | (defn adjoin-set
[x xs]
(cond (empty? xs) [x]
(< (weight x) (weight (first xs))) (cons x xs)
:else (cons (first xs)
(adjoin-set x (rest xs)))))
Creates a set of leaves out of (Symbol, Weight) pairs
| | (defn make-leaf-set
[pairs]
(if (empty? pairs) []
(let [pair (first pairs)]
(adjoin-set (make-leaf (first pair) (second pair))
(make-leaf-set (rest pairs))))))
Creates a Huffman tree out of a set of leaves (containing a symbol to be
encoded and its weight)
| | (defn successive-merge
[xs]
(cond (empty? xs) []
(= (count xs) 1) (first xs)
:else (successive-merge (adjoin-set (make-code-tree (first xs) (second xs))
(rest (rest xs))))))
| | (defn generate-huffman-tree [pairs]
(successive-merge (make-leaf-set pairs)))
| | (deftest test-huffman-tree
; My function generates a tree symmetric to the one depicted in the book
; (fig. 2.18) as it reverses the set of leaves.
(is (= (make-code-tree (make-leaf 'A 8)
(make-code-tree
(make-code-tree (make-code-tree (make-leaf 'H 1) (make-leaf 'G 1))
(make-code-tree (make-leaf 'F 1) (make-leaf 'E 1)))
(make-code-tree (make-code-tree (make-leaf 'D 1) (make-leaf 'C 1))
(make-leaf 'B 3))))
(generate-huffman-tree [['A 8] ['B 3] ['C 1] ['D 1] ['E 1] ['F 1] ['G 1] ['H 1]]))))
| | (def *rock-70-tree* (generate-huffman-tree [['A 2] ['BOOM 1] ['GET 2] ['JOB 2] ['NA 16] ['SHA 3] ['YIP 9] ['WAH 1]]))
| | (def *rock-70-message*
"Get a job
Sha na na na na na na na na
Get a job
Sha na na na na na na na na
Wah yip yip yip yip yip yip yip yip yip
Sha boom")
- uppercase it (as our symbols in the huffman tree are uppercased)
- turn it into code (split on spaces and quote)
| |
| | (defn as-symbol [s]
(symbol (.toUpperCase (.replace s "\n" ))))
| | (defn message-to-code [msg]
(reverse (reduce #(cons (as-symbol %2) %) []
(filter #(seq %) (seq (.split msg " "))))))
| | (deftest test-70-rock
(is (= (count (encode (message-to-code *rock-70-message*) *rock-70-tree*)) 84)))
| | (defn length [msg]
(count (message-to-code msg)))
| | (deftest test-num-of-bits-fixedsize
(is (= (* 3 (length *rock-70-message*)) 108)))
{A, B, C, D, E}
/ \
{A, B, C, D} {E}
/ \
{A, B, C} {D}
/ \
{A, B} {C}
/ \
{A} {B}
Even from this one sample we can predict that all of the trees generated for
the given alphabet and weights will be of the same structure - one branch of
each subtree will be at most of length 1 and the other branch will go down
until it reaches the elements with weights 1 and 2.
To encode the most common symbol we will always need just one bit as it will
always be the first symbol on the left/right (dependening on the
implementation of the huffman tree generator)
To encode the most uncommon symbol we'll need 'n-1' bits as the most uncommon
symbol will be at most n-1 steps away from the root of the tree (it will be
the n-th symbol in the alphabet, so we would need 'n' bits if we stored the
[symbol, weight] pairs in a list instead of a tree. As we're storing those
pairs in a binary tree, the second most uncommon symbol will also require
'n-1' bits as it will be on the same level as the first most uncommon
symbol).
| |
| |