1 | ;; -*- mode: common-lisp; package: util.zip -*- |
---|
2 | ;; |
---|
3 | ;; inflate.cl |
---|
4 | ;; |
---|
5 | ;; copyright (c) 1986-2000 Franz Inc, Berkeley, CA |
---|
6 | ;; |
---|
7 | ;; This code is free software; you can redistribute it and/or |
---|
8 | ;; modify it under the terms of the version 2.1 of |
---|
9 | ;; the GNU Lesser General Public License as published by |
---|
10 | ;; the Free Software Foundation, as clarified by the AllegroServe |
---|
11 | ;; prequel found in license-allegroserve.txt. |
---|
12 | ;; |
---|
13 | ;; This code is distributed in the hope that it will be useful, |
---|
14 | ;; but without any warranty; without even the implied warranty of |
---|
15 | ;; merchantability or fitness for a particular purpose. See the GNU |
---|
16 | ;; Lesser General Public License for more details. |
---|
17 | ;; |
---|
18 | ;; Version 2.1 of the GNU Lesser General Public License is in the file |
---|
19 | ;; license-lgpl.txt that was distributed with this file. |
---|
20 | ;; If it is not present, you can access it from |
---|
21 | ;; http://www.gnu.org/copyleft/lesser.txt (until superseded by a newer |
---|
22 | ;; version) or write to the Free Software Foundation, Inc., 59 Temple Place, |
---|
23 | ;; Suite 330, Boston, MA 02111-1307 USA |
---|
24 | ;; |
---|
25 | ;; |
---|
26 | ;; $Id: inflate.cl,v 1.1.4.2 2002/06/19 02:50:55 layer Exp $ |
---|
27 | |
---|
28 | ;; Description: |
---|
29 | ;; inflate a stream of bytes which was compressed with the Deflate |
---|
30 | ;; algorithm |
---|
31 | ;; |
---|
32 | ;; john foderaro, August 2001 |
---|
33 | ;; |
---|
34 | ;;- This code in this file obeys the Lisp Coding Standard found in |
---|
35 | ;;- http://www.franz.com/~jkf/coding_standards.html |
---|
36 | ;;- |
---|
37 | |
---|
38 | |
---|
39 | |
---|
40 | #| |
---|
41 | Programming interface: |
---|
42 | |
---|
43 | (inflate input-stream output-stream) |
---|
44 | - the compressed information from the input-stream is read and |
---|
45 | the uncompressed information is written to the output-stream |
---|
46 | - both streams must support (unsigned-byte 8) element reading and writing |
---|
47 | |
---|
48 | |
---|
49 | (skip-gzip-header input-stream) |
---|
50 | - if the input stream is positioned on the header of a gzip'ed file |
---|
51 | then skip that header. |
---|
52 | - if the input stream is not positioned on a gzip header then nothing |
---|
53 | is done. |
---|
54 | |
---|
55 | |# |
---|
56 | |
---|
57 | |
---|
58 | |
---|
59 | #| |
---|
60 | The Deflate Compression Algorithm |
---|
61 | |
---|
62 | reference: http://www.gzip.org/zlib/rfc-deflate.html |
---|
63 | |
---|
64 | Basic idea: |
---|
65 | Deflation is a means of compressing an octet sequence that |
---|
66 | combines the LZ77 algorithm for marking common substrings and |
---|
67 | Huffman coding to take advantage of different frequency of occurance |
---|
68 | for each possible values in the file. |
---|
69 | This algorithm may not be as easy to understand or as efficient |
---|
70 | as the LZW compression algorithm but Deflate does have the big |
---|
71 | advantage in that it is not patented. Thus Deflate is a very |
---|
72 | widely used. Nowdays it's the most common compression method |
---|
73 | used in Windows Zip programs (e.g. Winzip) and in the Unix gzip program. |
---|
74 | Java jar files, being just zip files, also use this compression method. |
---|
75 | |
---|
76 | |
---|
77 | Lempel-Ziv 1977 (LZ77): |
---|
78 | An octet sequence often contains repeated subsequences. The LZ algorithm |
---|
79 | compresses a file by replacing repeated substrings with (Length,Distance) |
---|
80 | markers which mean during decompression: Go back Distance octets |
---|
81 | in output stream and copy Length bytes to the output stream. |
---|
82 | |
---|
83 | Huffman Coding: |
---|
84 | A Huffman code for a set of values V assigns a unique bitsequence |
---|
85 | to each value in V. A bitsequence is a sequence of 0's and 1'. |
---|
86 | An important property of Huffman codes is that if X is a bitsequence |
---|
87 | for a value in V then no other value in V has a bitsequence |
---|
88 | with X as a prefix of that sequence. This means that if you see |
---|
89 | the bitsequence X in the stream you know that this denotes the value |
---|
90 | v and you don't have to read any more bits. |
---|
91 | |
---|
92 | |
---|
93 | Blocks: |
---|
94 | A deflated file is a sequence of blocks. There are three types of |
---|
95 | blocks: |
---|
96 | 1. uncompressed - The block simply contains the same sequence of |
---|
97 | octets as were found in the input stream. This type of block |
---|
98 | is useful when the input stream has already been compressed (e.g. |
---|
99 | it's a jpg or gif file) as compressing a compressed file often |
---|
100 | results in the file getting larger. |
---|
101 | |
---|
102 | 2. compressed with fixed Huffman code - The block contains a |
---|
103 | huffman-coded LZ77 compressed bitsequence. The huffman code |
---|
104 | used is specified by the deflate algorithm. This type of block |
---|
105 | is useful when the octet sequence is short since in that case |
---|
106 | the overhead of creating a custom huffman code is more than is gained |
---|
107 | by that custom code. |
---|
108 | |
---|
109 | 3. compressed with a custom Huffman code - The block contains |
---|
110 | a description of a Huffman code to be used in this block only |
---|
111 | and then a Huffman-code LZ77 compressed bitsequence. The values |
---|
112 | that describe the custome huffman tree are themselves huffman coded. |
---|
113 | |
---|
114 | |
---|
115 | |
---|
116 | |# |
---|
117 | |
---|
118 | (defpackage :util.zip (:use :common-lisp :excl) |
---|
119 | (:export #:inflate |
---|
120 | #:skip-gzip-header)) |
---|
121 | |
---|
122 | |
---|
123 | (in-package :util.zip) |
---|
124 | |
---|
125 | (provide :inflate) |
---|
126 | |
---|
127 | (defun inflate (p op) |
---|
128 | ;; user callable |
---|
129 | ;; inflate the stream p into the stream op |
---|
130 | ;; both streams should be unsigned-byte 8 |
---|
131 | ;; |
---|
132 | (let ((br (new-bit-reader p)) |
---|
133 | (buffer (make-array (* 32 1024) :element-type '(unsigned-byte 8))) |
---|
134 | (end 0)) |
---|
135 | (loop |
---|
136 | (if* (null (setq end (process-deflate-block br op buffer end))) |
---|
137 | then ; last block, we're all done |
---|
138 | (return))))) |
---|
139 | |
---|
140 | |
---|
141 | |
---|
142 | |
---|
143 | ;;; ------------ gzip support |
---|
144 | ; |
---|
145 | ; gzip preceeds files with a header and the only support we need |
---|
146 | ; give to handle gzip files is the ability to skip the header |
---|
147 | ; and get to the meat of the file |
---|
148 | |
---|
149 | |
---|
150 | ; gzip constants |
---|
151 | |
---|
152 | ; compression strategies (only one supported) |
---|
153 | (defconstant z_deflated 8) |
---|
154 | |
---|
155 | ; flag bits |
---|
156 | (defconstant gz_ascii_flags #x01) ; file probably ascii |
---|
157 | (defconstant gz_head_crc #x02) ; header crc present |
---|
158 | (defconstant gz_extra_field #x04) ; extra field present |
---|
159 | (defconstant gz_orig_name #x08) ; original file name present |
---|
160 | (defconstant gz_comment #x10) ; file comment present |
---|
161 | (defconstant gz_reserved #xe0) ; no bits allowed on here |
---|
162 | |
---|
163 | (defun skip-gzip-header (p) |
---|
164 | ;; If the next thing in the stream p is gzip header then skip |
---|
165 | ;; past it and return t. |
---|
166 | ;; If it's not a gzip header than return nil |
---|
167 | ;; If it's starts to look like a gzip header but turns out to |
---|
168 | ;; not be valid signal an error. Note that the first byte of |
---|
169 | ;; a gzip header is an illegal byte to begin a deflated stream so |
---|
170 | ;; that if the first byte matches a gzip header but the rest do not |
---|
171 | ;; then the stream was positioned at neither a gzip header nor a |
---|
172 | ;; deflated stream |
---|
173 | ; |
---|
174 | ;; see check_header in gzio.c in rpm zlib-1.1.3 (or variant) |
---|
175 | ;; for details on what's in the header. |
---|
176 | |
---|
177 | (let (method flags) |
---|
178 | |
---|
179 | ; look for magic number |
---|
180 | (if* (not (eql #x1f (read-byte p))) |
---|
181 | then ; not a gzip header, may be a deflate block |
---|
182 | (unread-char (code-char #x1f) p) |
---|
183 | (return-from skip-gzip-header nil)) |
---|
184 | |
---|
185 | |
---|
186 | ; now check the second magic number |
---|
187 | (if* (not (eql #x8b (read-byte p))) |
---|
188 | then (error "non gzip magic number")) |
---|
189 | |
---|
190 | (setq method (read-byte p) |
---|
191 | flags (read-byte p)) |
---|
192 | |
---|
193 | (if* (or (not (eql method z_deflated)) |
---|
194 | (not (zerop (logand flags gz_reserved)))) |
---|
195 | then (error "bad method/flags in header")) |
---|
196 | |
---|
197 | ; discard time, xflags and os code */ |
---|
198 | (dotimes (i 6) (read-byte p)) |
---|
199 | |
---|
200 | ; discard extra field if present |
---|
201 | (if* (logtest flags gz_extra_field) |
---|
202 | then (let ((length (+ (read-byte p) |
---|
203 | (ash (read-byte p) 8)))) |
---|
204 | (dotimes (i length) (read-byte p)))) |
---|
205 | |
---|
206 | (if* (logtest flags gz_orig_name) |
---|
207 | then ; discard name of file, null terminated |
---|
208 | (do ((val (read-byte p) (read-byte p))) |
---|
209 | ((zerop val)))) |
---|
210 | |
---|
211 | (if* (logtest flags gz_comment) |
---|
212 | then ; discard comment, null terminated |
---|
213 | (do ((val (read-byte p) (read-byte p))) |
---|
214 | ((zerop val)))) |
---|
215 | |
---|
216 | (if* (logtest flags gz_head_crc) |
---|
217 | then ; discard header crc |
---|
218 | (dotimes (i 2) (read-byte p))) |
---|
219 | |
---|
220 | ; success! |
---|
221 | t |
---|
222 | )) |
---|
223 | |
---|
224 | ;;;----------- end gzip support |
---|
225 | |
---|
226 | |
---|
227 | |
---|
228 | ;;;----------- support for reading bitfields from a stream |
---|
229 | |
---|
230 | |
---|
231 | (defstruct bit-reader |
---|
232 | stream |
---|
233 | last-byte ; last byte read, possibly two combined bytes too |
---|
234 | bits ; bits left of last byte to use |
---|
235 | ) |
---|
236 | |
---|
237 | (defparameter *maskarray* |
---|
238 | ;; for a bit length, mask off junk bits |
---|
239 | (make-array 17 |
---|
240 | :initial-contents |
---|
241 | '(#x0 |
---|
242 | #x1 #x3 #x7 #xf |
---|
243 | #x1f #x3f #x7f #xff |
---|
244 | #x1ff #x3ff #x7ff #xfff |
---|
245 | #x1fff #x3fff #x7fff #xffff))) |
---|
246 | |
---|
247 | ;; bit reader |
---|
248 | (defun new-bit-reader (stream) |
---|
249 | ; create and initialize bit reader |
---|
250 | (make-bit-reader :stream stream :last-byte 0 :bits 0)) |
---|
251 | |
---|
252 | (defun reset-bit-reader (br) |
---|
253 | ; clear out unused bit of the current byte |
---|
254 | (setf (bit-reader-bits br) 0)) |
---|
255 | |
---|
256 | (defun read-bits (br count) |
---|
257 | ;; return a value from the current bit reader. |
---|
258 | ;; the count can be from 1 to 16 |
---|
259 | ;; |
---|
260 | |
---|
261 | (if* (eql count 0) |
---|
262 | then (return-from read-bits 0)) |
---|
263 | |
---|
264 | |
---|
265 | (let ((last-byte (bit-reader-last-byte br)) |
---|
266 | (bits (bit-reader-bits br))) |
---|
267 | (loop |
---|
268 | (if* (>= bits count) |
---|
269 | then ;we have enough now |
---|
270 | (if* (> bits count) |
---|
271 | then ; we have some left over |
---|
272 | (setf (bit-reader-last-byte br) |
---|
273 | (ash last-byte (- count))) |
---|
274 | (setf (bit-reader-bits br) (- bits count)) |
---|
275 | (return (logand last-byte (svref *maskarray* count))) |
---|
276 | else ; no bits left |
---|
277 | (setf (bit-reader-bits br) 0) |
---|
278 | (setf (bit-reader-last-byte br) 0) |
---|
279 | (return last-byte) |
---|
280 | ) |
---|
281 | else ; need a new byte |
---|
282 | (let ((new-byte (read-byte (bit-reader-stream br)))) |
---|
283 | (setq last-byte (+ last-byte |
---|
284 | (ash new-byte bits))) |
---|
285 | (incf bits 8)))))) |
---|
286 | |
---|
287 | |
---|
288 | |
---|
289 | ;;;----------- end bitfield reading |
---|
290 | |
---|
291 | |
---|
292 | |
---|
293 | |
---|
294 | ;;;----------- build constant tables needed by the algorithm |
---|
295 | |
---|
296 | ;; The tables needed to decode length and distance values |
---|
297 | ;; A compressed file contains a sequence of literal character values |
---|
298 | ;; or (length,distance) pairs. The length is computed by taking |
---|
299 | ;; the length-value in the file and using these tables to bind |
---|
300 | ;; a base length value and the number of extra bits to read from the file |
---|
301 | ;; and then to add to the length value. |
---|
302 | ;; The same is done for distance. |
---|
303 | |
---|
304 | (defvar *base-length*) ; array mapping code to length value |
---|
305 | (defvar *length-extra-bits*) ; array saying how many more bitsworth to read |
---|
306 | |
---|
307 | (defvar *base-distance*) |
---|
308 | (defvar *distance-extra-bits*) |
---|
309 | |
---|
310 | |
---|
311 | ; build those arrays at load time: |
---|
312 | |
---|
313 | (progn |
---|
314 | (setq *base-length* (make-array (1+ (- 285 257))) |
---|
315 | *length-extra-bits* (make-array (1+ (- 285 257)))) |
---|
316 | |
---|
317 | (let ((len 3) |
---|
318 | (ind 0)) |
---|
319 | (dolist (ent '((8 0) ; count and number of extra bits |
---|
320 | (4 1) (4 2) (4 3) (4 4) (4 5) (1 0))) |
---|
321 | (dotimes (i (car ent)) |
---|
322 | (setf (svref *base-length* ind) len) |
---|
323 | (setf (svref *length-extra-bits* ind) (cadr ent)) |
---|
324 | (incf ind 1) |
---|
325 | (incf len (ash 1 (cadr ent))) |
---|
326 | ) |
---|
327 | ; special case, code 285 is length 258. |
---|
328 | (setf (svref *base-length* (- 285 257)) 258) |
---|
329 | )) |
---|
330 | |
---|
331 | (setq *base-distance* (make-array (1+ (- 29 0))) |
---|
332 | *distance-extra-bits* (make-array (1+ (- 29 0)))) |
---|
333 | |
---|
334 | (let ((dist 1) |
---|
335 | (ind 0)) |
---|
336 | (dolist (ent '((4 0) ; count and number of extra bits |
---|
337 | (2 1) (2 2) (2 3) (2 4) (2 5) (2 6) (2 7) (2 8) |
---|
338 | (2 9) (2 10) (2 11) (2 12) (2 13))) |
---|
339 | (dotimes (i (car ent)) |
---|
340 | (setf (svref *base-distance* ind) dist) |
---|
341 | (setf (svref *distance-extra-bits* ind) (cadr ent)) |
---|
342 | (incf ind 1) |
---|
343 | (incf dist (ash 1 (cadr ent))))))) |
---|
344 | |
---|
345 | |
---|
346 | |
---|
347 | |
---|
348 | ;;;----------- end table building |
---|
349 | |
---|
350 | |
---|
351 | |
---|
352 | ;;;----------- Huffman tree support |
---|
353 | |
---|
354 | (defstruct (bitinfo (:type list)) |
---|
355 | ;; when we describe a range of values and the code width we |
---|
356 | ;; use a list of three elements. this structure describes it |
---|
357 | minval |
---|
358 | maxval |
---|
359 | bitwidth) |
---|
360 | |
---|
361 | |
---|
362 | ;test case |
---|
363 | ; (generate-huffman-tree '((0 4 3) (5 5 2) (6 7 4))) |
---|
364 | ; will generate sample table from the Deutsch paper |
---|
365 | ; |
---|
366 | |
---|
367 | (defun generate-huffman-tree (bitinfo) |
---|
368 | ;; bitinfo is a list of bitinfo items (minval maxval bitwidth) |
---|
369 | ;; which means that values from minval through maxval are |
---|
370 | ;; to be represented by codes of width bitwidth. |
---|
371 | ;; |
---|
372 | ;; we return two valuse: the huffman tree and the mininum bit width |
---|
373 | ;; |
---|
374 | (let ((maxval 0) |
---|
375 | (minval most-positive-fixnum) |
---|
376 | (maxbitwidth 0) |
---|
377 | (minbitwidth most-positive-fixnum) |
---|
378 | bitwidthcounts |
---|
379 | valuecode |
---|
380 | valuewidth |
---|
381 | nextcode |
---|
382 | ) |
---|
383 | ; find out the range of values (well the max) and the max bit width |
---|
384 | (dolist (bi bitinfo) |
---|
385 | (setq maxval (max maxval (bitinfo-maxval bi))) |
---|
386 | (setq minval (min minval (bitinfo-minval bi))) |
---|
387 | (setq maxbitwidth (max maxbitwidth (bitinfo-bitwidth bi))) |
---|
388 | (setq minbitwidth (min minbitwidth (bitinfo-bitwidth bi))) |
---|
389 | ) |
---|
390 | |
---|
391 | ; per bitwidth arrays |
---|
392 | (setq bitwidthcounts (make-array (1+ maxbitwidth) |
---|
393 | :initial-element 0)) |
---|
394 | (setq nextcode (make-array (1+ maxbitwidth) |
---|
395 | :initial-element 0)) |
---|
396 | |
---|
397 | ; per value arrays |
---|
398 | (setq valuecode (make-array (1+ (- maxval minval)))) ; huffman code chose |
---|
399 | (setq valuewidth (make-array (1+ (- maxval minval)) |
---|
400 | :initial-element 0)) ; bit width |
---|
401 | |
---|
402 | (dolist (bi bitinfo) |
---|
403 | ; set valuewidth array from the given data |
---|
404 | (do ((v (bitinfo-minval bi) (1+ v))) |
---|
405 | ((> v (bitinfo-maxval bi))) |
---|
406 | (setf (svref valuewidth (- v minval)) (bitinfo-bitwidth bi))) |
---|
407 | |
---|
408 | ; keep track of how many huffman codes will have a certain bit width |
---|
409 | (incf (svref bitwidthcounts (bitinfo-bitwidth bi)) |
---|
410 | (1+ (- (bitinfo-maxval bi) (bitinfo-minval bi)))) |
---|
411 | ) |
---|
412 | |
---|
413 | |
---|
414 | |
---|
415 | ; compute the starting code for each bit width |
---|
416 | (let ((code 0)) |
---|
417 | (dotimes (widthm1 maxbitwidth) |
---|
418 | (setq code |
---|
419 | (ash (+ code (svref bitwidthcounts widthm1)) 1)) |
---|
420 | (setf (svref nextcode (1+ widthm1)) code))) |
---|
421 | |
---|
422 | ; compute the huffman code for each value |
---|
423 | (do ((v minval (1+ v))) |
---|
424 | ((> v maxval)) |
---|
425 | (let ((width (svref valuewidth (- v minval)))) |
---|
426 | (if* (not (zerop width)) |
---|
427 | then ; must assign a code |
---|
428 | (setf (svref valuecode (- v minval)) |
---|
429 | (svref nextcode width)) |
---|
430 | (incf (svref nextcode width))))) |
---|
431 | |
---|
432 | ;; now we know the code for each value in the valuecode array |
---|
433 | ;; |
---|
434 | ;; now compute the tree |
---|
435 | (values (build-huffman-tree |
---|
436 | minval |
---|
437 | (mapcar #'(lambda (bi) (cons (car bi) (cadr bi))) bitinfo) |
---|
438 | valuecode valuewidth 1) |
---|
439 | ; second value useful for decoding: |
---|
440 | minbitwidth))) |
---|
441 | |
---|
442 | |
---|
443 | (defun build-huffman-tree (minval minmaxes valuecode valuewidth pos) |
---|
444 | ;; compute a huffman cons tree |
---|
445 | ;; minmaxes is a list of conses. each cons |
---|
446 | ;; representing a (min . max) range of values. |
---|
447 | ;; |
---|
448 | |
---|
449 | (multiple-value-bind (zero one) (split-on-position minval minmaxes |
---|
450 | valuecode |
---|
451 | valuewidth |
---|
452 | pos) |
---|
453 | (cons (if* (consp zero) |
---|
454 | then (build-huffman-tree minval |
---|
455 | zero valuecode valuewidth (1+ pos)) |
---|
456 | else zero) |
---|
457 | (if* (consp one) |
---|
458 | then (build-huffman-tree minval one valuecode valuewidth (1+ pos)) |
---|
459 | else one)))) |
---|
460 | |
---|
461 | (defun split-on-position (minval minmaxes valuecode valuewidth pos) |
---|
462 | ;; compute those values that have a zero in the pos (1 based) position |
---|
463 | ;; of their code and those that have one in that position. |
---|
464 | ;; return two values, the zero set and the one set. |
---|
465 | ;; The position is from the msbit of the huffman code. |
---|
466 | ;; |
---|
467 | ;; If the value of the specified pos selects a specific value |
---|
468 | ;; and no further bits need be read to identify that value then |
---|
469 | ;; we return that value rather than a list of conses. |
---|
470 | |
---|
471 | (let (zero one) |
---|
472 | (dolist (mm minmaxes) |
---|
473 | (do ((v (car mm) (1+ v))) |
---|
474 | ((> v (cdr mm))) |
---|
475 | (let ((width (svref valuewidth (- v minval))) |
---|
476 | (code (svref valuecode (- v minval)))) |
---|
477 | (if* (logbitp (- width pos) code) |
---|
478 | then ; one bit set |
---|
479 | (if* (eql width pos) |
---|
480 | then ; last bit |
---|
481 | (setq one v) |
---|
482 | else ; more bits to check |
---|
483 | (let ((firstone (car one))) |
---|
484 | (if* (and firstone |
---|
485 | (eq (cdr firstone) (1- v))) |
---|
486 | then ; increase renge |
---|
487 | (setf (cdr firstone) v) |
---|
488 | else (push (cons v v) one)))) |
---|
489 | else ; zero bit set |
---|
490 | (if* (eql width pos) |
---|
491 | then ; last bit |
---|
492 | (setq zero v) |
---|
493 | else ; more bits to check |
---|
494 | (let ((firstzero (car zero))) |
---|
495 | (if* (and firstzero |
---|
496 | (eq (cdr firstzero) (1- v))) |
---|
497 | then ; increase renge |
---|
498 | (setf (cdr firstzero) v) |
---|
499 | else (push (cons v v) zero)))))))) |
---|
500 | (values |
---|
501 | (if* (consp zero) then (nreverse zero) else zero) ; order numerically |
---|
502 | (if* (consp one) then (nreverse one) else one)))) |
---|
503 | |
---|
504 | |
---|
505 | (defun generate-huffman-tree-from-vector (vector start end) |
---|
506 | ;; generate huffman tree from items in the vector from start to end-1 |
---|
507 | ;; assume start corresponds to value 0 in the tree |
---|
508 | (do ((i start (1+ i)) |
---|
509 | (val 0 (1+ val)) |
---|
510 | (res)) |
---|
511 | ((>= i end) |
---|
512 | (generate-huffman-tree (nreverse res))) |
---|
513 | (let ((len (svref vector i))) |
---|
514 | (if* (> len 0) |
---|
515 | then (push (list val val len) res))))) |
---|
516 | |
---|
517 | |
---|
518 | |
---|
519 | |
---|
520 | |
---|
521 | ;; the huffman tree to use for type 1 blocks |
---|
522 | ;; |
---|
523 | (defparameter *fixed-huffman-tree* |
---|
524 | (generate-huffman-tree '((0 143 8) (144 255 9) (256 279 7) (280 287 8)))) |
---|
525 | |
---|
526 | ;; distance are represented by a trivial huffman code |
---|
527 | (defparameter *fixed-huffman-distance-tree* |
---|
528 | (generate-huffman-tree '((0 31 5)))) |
---|
529 | |
---|
530 | |
---|
531 | ;;;----------- end Huffman support |
---|
532 | |
---|
533 | |
---|
534 | |
---|
535 | |
---|
536 | (defun process-deflate-block (br op buffer end) |
---|
537 | ;; br is a bit stream, op is the output stream |
---|
538 | ;; process the next block in the stream |
---|
539 | ;; return false if this is the last block of data else |
---|
540 | ;; return the next index into the buffer |
---|
541 | (let ((bfinal (read-bits br 1)) |
---|
542 | (btype (read-bits br 2))) |
---|
543 | |
---|
544 | (setq end |
---|
545 | (case btype |
---|
546 | (0 (process-non-compressed-block br op buffer end)) |
---|
547 | (1 (process-fixed-huffman-block br op buffer end)) |
---|
548 | (2 (process-dynamic-huffman-block br op buffer end)) |
---|
549 | (3 (error "illegal deflate block value")))) |
---|
550 | (if* (eql bfinal 1) |
---|
551 | then (flush-buffer op buffer end) |
---|
552 | nil |
---|
553 | else end) |
---|
554 | )) |
---|
555 | |
---|
556 | |
---|
557 | |
---|
558 | (defun process-non-compressed-block (br op buffer end) |
---|
559 | ;; process a block of uncompressed data |
---|
560 | (reset-bit-reader br) |
---|
561 | (let ((p (bit-reader-stream br))) |
---|
562 | (let ((len (read-uword p)) |
---|
563 | (onecomplen (read-uword p))) |
---|
564 | (if* (not (eql len (logxor #xffff onecomplen))) |
---|
565 | then (error "bad length value in non compressed block")) |
---|
566 | (dotimes (i len) |
---|
567 | (setq end (put-byte-in-buffer op (read-byte p) buffer end)))) |
---|
568 | end)) |
---|
569 | |
---|
570 | (defun read-uword (stream) |
---|
571 | ;; read a little endian value |
---|
572 | (+ (read-byte stream) (ash (read-byte stream) 8))) |
---|
573 | |
---|
574 | (defun put-byte-in-buffer (op byte buffer end) |
---|
575 | ;; store the next output byte in the buffer |
---|
576 | (if* (>= end (length buffer)) |
---|
577 | then (flush-buffer op buffer end) |
---|
578 | (setq end 0)) |
---|
579 | (setf (aref buffer end) byte) |
---|
580 | (1+ end)) |
---|
581 | |
---|
582 | (defun flush-buffer (op buffer end) |
---|
583 | ;; send bytes to the output stream. If op isn't a stream |
---|
584 | ;; then it must be a function to funcall to take the bytes. |
---|
585 | (if* (> end 0) |
---|
586 | then (if* (streamp op) |
---|
587 | then (write-sequence buffer op :end end) |
---|
588 | else (funcall op buffer end)))) |
---|
589 | |
---|
590 | |
---|
591 | |
---|
592 | |
---|
593 | |
---|
594 | (defun process-fixed-huffman-block (br op buffer end) |
---|
595 | ;; process a huffman block with the standard huffman tree |
---|
596 | ;; |
---|
597 | (process-huffman-block br op *fixed-huffman-tree* 7 *fixed-huffman-distance-tree* 5 |
---|
598 | buffer end)) |
---|
599 | |
---|
600 | (defun process-huffman-block (br op |
---|
601 | lengthlit-tree minwidth |
---|
602 | distance-tree mindistwidth |
---|
603 | buffer end) |
---|
604 | ;; the common code for blocks of type 1 and 2 that does |
---|
605 | ;; the decompression given a length/literal huffman tree |
---|
606 | ;; and a distance huffman tree. |
---|
607 | ;; If the distance tree is nil then we use the trivial huffman |
---|
608 | ;; code from the algorithm. |
---|
609 | ;; |
---|
610 | (let* ((bufflen (length buffer)) |
---|
611 | length |
---|
612 | distance |
---|
613 | ) |
---|
614 | |
---|
615 | |
---|
616 | (loop |
---|
617 | (let ((value (decode-huffman-tree br lengthlit-tree minwidth))) |
---|
618 | (if* (< value 256) |
---|
619 | then ; output and add to buffer |
---|
620 | (setq end (put-byte-in-buffer op value buffer end)) |
---|
621 | |
---|
622 | elseif (eql value 256) |
---|
623 | then (return) ; end of block |
---|
624 | else ; we have a length byte |
---|
625 | ; compute length, distance |
---|
626 | |
---|
627 | (let ((adj-code (- value 257))) |
---|
628 | (setq length (+ (svref *base-length* adj-code) |
---|
629 | (read-bits br (svref *length-extra-bits* |
---|
630 | adj-code))))) |
---|
631 | |
---|
632 | (let ((dist-code (if* distance-tree |
---|
633 | then (decode-huffman-tree br |
---|
634 | distance-tree |
---|
635 | mindistwidth) |
---|
636 | else (read-bits br 5)))) |
---|
637 | (setq distance |
---|
638 | (+ (svref *base-distance* dist-code) |
---|
639 | (read-bits br (svref *distance-extra-bits* |
---|
640 | dist-code))))) |
---|
641 | |
---|
642 | ; copy in bytes |
---|
643 | (do ((i (mod (- end distance) bufflen) (1+ i)) |
---|
644 | (count length (1- count))) |
---|
645 | ((<= count 0)) |
---|
646 | (if* (>= i bufflen) then (setf i 0)) |
---|
647 | (setq end (put-byte-in-buffer op |
---|
648 | (aref buffer i) |
---|
649 | buffer |
---|
650 | end)))))) |
---|
651 | ; return where we left off |
---|
652 | end)) |
---|
653 | |
---|
654 | |
---|
655 | |
---|
656 | (defparameter *code-index* |
---|
657 | ;; order of elements in the code index values |
---|
658 | ;; pretty crazy, eh? |
---|
659 | (make-array 19 |
---|
660 | :initial-contents |
---|
661 | '(16 17 18 0 8 7 9 6 10 5 11 4 12 3 13 2 14 1 15))) |
---|
662 | |
---|
663 | |
---|
664 | (defun process-dynamic-huffman-block (br op buffer end) |
---|
665 | ;; process a block that includes a personalized huffman tree |
---|
666 | ;; just for this block |
---|
667 | (let ((hlit (read-bits br 5)) |
---|
668 | (hdist (read-bits br 5)) |
---|
669 | (hclen (read-bits br 4)) |
---|
670 | |
---|
671 | code-length-huffman-tree |
---|
672 | (minlen 9999) |
---|
673 | ) |
---|
674 | |
---|
675 | ; read in the huffman code width of each of the numbers |
---|
676 | ; from 0 18... this will be then used to create a huffman tree |
---|
677 | ; |
---|
678 | (let ((codevec (make-array 19 :initial-element 0)) |
---|
679 | (len)) |
---|
680 | |
---|
681 | (dotimes (i (+ hclen 4)) |
---|
682 | (setf (svref codevec |
---|
683 | (svref *code-index* i)) |
---|
684 | (setq len (read-bits br 3))) |
---|
685 | (if* (> len 0) then (setq minlen (min len minlen)))) |
---|
686 | |
---|
687 | |
---|
688 | |
---|
689 | (setq code-length-huffman-tree |
---|
690 | (generate-huffman-tree-from-vector codevec 0 (length codevec)))) |
---|
691 | |
---|
692 | ; now we're in position to read the code lengths for the |
---|
693 | ; huffman table that will allow us to read the data. |
---|
694 | ; (Is this a nutty algorithm or what??) |
---|
695 | ; |
---|
696 | (let ((bigvec (make-array (+ hlit 257 hdist 1) |
---|
697 | :initial-element 0)) |
---|
698 | (index 0)) |
---|
699 | (loop |
---|
700 | (if* (>= index (length bigvec)) then (return)) |
---|
701 | (let ((val (decode-huffman-tree br code-length-huffman-tree minlen))) |
---|
702 | (if* (<= val 15) |
---|
703 | then ; literal value |
---|
704 | (setf (svref bigvec index) val) |
---|
705 | (incf index) |
---|
706 | elseif (eql val 16) |
---|
707 | then ; repeat prev |
---|
708 | (let ((prev-val (svref bigvec (1- index)))) |
---|
709 | (dotimes (i (+ 3 (read-bits br 2))) |
---|
710 | (setf (svref bigvec index) prev-val) |
---|
711 | (incf index))) |
---|
712 | elseif (eq val 17) |
---|
713 | then ; repeat zero |
---|
714 | (dotimes (i (+ 3 (read-bits br 3))) |
---|
715 | (setf (svref bigvec index) 0) |
---|
716 | (incf index)) |
---|
717 | elseif (eq val 18) |
---|
718 | then ; repeat zero a lot |
---|
719 | (dotimes (i (+ 11 (read-bits br 7))) |
---|
720 | (setf (svref bigvec index) 0) |
---|
721 | (incf index))))) |
---|
722 | |
---|
723 | (let (literal-length-huffman litlen-width |
---|
724 | distance-huffman distance-width) |
---|
725 | (multiple-value-setq (literal-length-huffman litlen-width) |
---|
726 | (generate-huffman-tree-from-vector bigvec 0 (+ hlit 257))) |
---|
727 | |
---|
728 | (multiple-value-setq (distance-huffman distance-width) |
---|
729 | (generate-huffman-tree-from-vector bigvec (+ hlit 257) |
---|
730 | (length bigvec))) |
---|
731 | |
---|
732 | (process-huffman-block br op literal-length-huffman litlen-width |
---|
733 | distance-huffman distance-width |
---|
734 | buffer end) |
---|
735 | )))) |
---|
736 | |
---|
737 | |
---|
738 | |
---|
739 | (defun decode-huffman-tree (br tree minbits) |
---|
740 | ;; find the next huffman encoded value. |
---|
741 | ; the minimum length of a huffman code is minbits so |
---|
742 | ; grab that many bits right away to speed processing and the |
---|
743 | ; go bit by bit until the answer is found |
---|
744 | (let ((startval (read-bits br minbits))) |
---|
745 | (dotimes (i minbits) |
---|
746 | (if* (logtest 1 startval) |
---|
747 | then (setq tree (cdr tree)) |
---|
748 | else (setq tree (car tree))) |
---|
749 | (setq startval (ash startval -1))) |
---|
750 | (loop |
---|
751 | (if* (atom tree) |
---|
752 | then (return tree) |
---|
753 | else (if* (eql 1 (read-bits br 1)) |
---|
754 | then (setq tree (cdr tree)) |
---|
755 | else (setq tree (car tree))))))) |
---|
756 | |
---|
757 | |
---|
758 | |
---|
759 | |
---|
760 | |
---|
761 | |
---|
762 | ;;; test case... |
---|
763 | ;; Read file created with gzip and write the uncompressed version |
---|
764 | ;; to another file. |
---|
765 | ;; |
---|
766 | ;; Porting note: the open below works on ACL since it creates |
---|
767 | ;; a bivalent simple-stream. If you run this on other lispsj |
---|
768 | ;; you'll want to specify an :element-type of '(unsigned-byte 8) |
---|
769 | ;; |
---|
770 | #+ignore |
---|
771 | (defun testit (&optional (filename "foo.n.gz") (output-filename "out")) |
---|
772 | (with-open-file (p filename :direction :input) |
---|
773 | (skip-gzip-header p) |
---|
774 | (with-open-file (op output-filename :direction :output |
---|
775 | :if-exists :supersede) |
---|
776 | (inflate p op)))) |
---|