Adaptive Huffman Coding

LZH implementation in Pascal

Contributor: DOUGLAS WEBB

Unit LZH;

 {$A+,B-,D-,E-,F-,I+,L-,N-,O-,R-,S-,V-}

(*
 * LZHUF.C English version 1.0
 * Based on Japanese version 29-NOV-1988
 * LZSS coded by Haruhiko OKUMURA
 * Adaptive Huffman Coding coded by Haruyasu YOSHIZAKI
 * Edited and translated to English by Kenji RIKITAKE
 * Translated from C to Turbo Pascal by Douglas Webb 2/18/91
 * Update and bug correction of TP version 4/29/91 (Sorry!!)
 *)

{
     This Unit allows the user to commpress data using a combination of
   LZSS Compression and adaptive Huffman coding, or conversely to deCompress
   data that was previously Compressed by this Unit.

     There are a number of options as to where the data being Compressed/
   deCompressed is coming from/going to.

    In fact it requires that you pass the "LZHPack" Procedure 2 procedural
  parameter of Type 'GetProcType' and 'PutProcType' (declared below) which
  will accept 3 parameters and act in every way like a 'BlockRead'/'BlockWrite'
  Procedure call. Your 'GetProcType' Procedure should return the data
  to be Compressed, and Your 'PutProcType' Procedure should do something with
  the Compressed data (ie., put it in a File). In Case you need to know (and
  you do if you want to deCompress this data again) the number of Bytes in the
  Compressed data (original, not Compressed size) is returned in 'Bytes_Written'.

  GetBytesProc = Procedure(Var DTA; NBytes:Word; Var Bytes_Got : Word);

  DTA is the start of a memory location where the inFormation returned should
  be. NBytes is the number of Bytes requested. The actual number of Bytes
  returned must be passed in Bytes_Got (if there is no more data then 0
  should be returned).

  PutBytesProc = Procedure(Var DTA; NBytes:Word; Var Bytes_Got : Word);

  As above except instead of asking For data the Procedure is dumping out
  Compressed data, do somthing With it.

    "LZHUnPack" is basically the same thing in reverse. It requires
  procedural parameters of Type 'PutProcType'/'GetProcType' which
  will act as above. 'GetProcType' must retrieve data Compressed using
  "LZHPack" (above) and feed it to the unpacking routine as requested.
  'PutProcType' must accept the deCompressed data and do something
  withit. You must also pass in the original size of the deCompressed data,
  failure to do so will have adverse results.

     Don't Forget that as procedural parameters the 'GetProcType'/'PutProcType'
  Procedures must be Compiled in the 'F+' state to avoid a catastrophe.

}

{ note: All the large data structures For these routines are allocated when
  needed from the heap, and deallocated when finished. So when not in use
  memory requirements are minimal. However, this Unit Uses about 34K of
  heap space, and 400 Bytes of stack when in use. }

Interface

Type

  PutBytesProc = Procedure(Var DTA; NBytes : Word; Var Bytes_Put : Word);
  GetBytesProc = Procedure(Var DTA; NBytes : Word; Var Bytes_Got : Word);

Procedure LZHPack(Var Bytes_Written : LongInt;
                      GetBytes : GetBytesProc;
                      PutBytes : PutBytesProc);

Procedure LZHUnpack(TextSize : LongInt;
                    GetBytes : GetBytesProc;
                    PutBytes : PutBytesProc);

Implementation

Const
  Exit_OK = 0;
  Exit_FAILED = 1;

  { LZSS Parameters }
  N = 4096; { Size of String buffer }
  F = 60; { Size of look-ahead buffer }
  THRESHOLD = 2;
  NUL = N; { end of tree's node }

  { Huffman coding parameters }
  N_Char = (256 - THRESHOLD + F);

  { Character code (:= 0..N_Char-1) }
  T = (N_Char * 2 - 1); { Size of table }
  R = (T - 1); { root position }

  { update when cumulative frequency }
  { reaches to this value }
  MAX_FREQ = $8000;

{
 * Tables For encoding/decoding upper 6 bits of
 * sliding dictionary Pointer
 }

  { encoder table }
  p_len : Array[0..63] of Byte =
  ($03, $04, $04, $04, $05, $05, $05, $05,
   $05, $05, $05, $05, $06, $06, $06, $06,
   $06, $06, $06, $06, $06, $06, $06, $06,
   $07, $07, $07, $07, $07, $07, $07, $07,
   $07, $07, $07, $07, $07, $07, $07, $07,
   $07, $07, $07, $07, $07, $07, $07, $07,
   $08, $08, $08, $08, $08, $08, $08, $08,
   $08, $08, $08, $08, $08, $08, $08, $08);

  p_code : Array[0..63] of Byte =
  ($00, $20, $30, $40, $50, $58, $60, $68,
   $70, $78, $80, $88, $90, $94, $98, $9C,
   $A0, $A4, $A8, $AC, $B0, $B4, $B8, $BC,
   $C0, $C2, $C4, $C6, $C8, $CA, $CC, $CE,
   $D0, $D2, $D4, $D6, $D8, $DA, $DC, $DE,
   $E0, $E2, $E4, $E6, $E8, $EA, $EC, $EE,
   $F0, $F1, $F2, $F3, $F4, $F5, $F6, $F7,
   $F8, $F9, $FA, $FB, $FC, $FD, $FE, $FF);

  { decoder table }
  d_code : Array[0..255] of Byte =
  ($00, $00, $00, $00, $00, $00, $00, $00,
   $00, $00, $00, $00, $00, $00, $00, $00,
   $00, $00, $00, $00, $00, $00, $00, $00,
   $00, $00, $00, $00, $00, $00, $00, $00,
   $01, $01, $01, $01, $01, $01, $01, $01,
   $01, $01, $01, $01, $01, $01, $01, $01,
   $02, $02, $02, $02, $02, $02, $02, $02,
   $02, $02, $02, $02, $02, $02, $02, $02,
   $03, $03, $03, $03, $03, $03, $03, $03,
   $03, $03, $03, $03, $03, $03, $03, $03,
   $04, $04, $04, $04, $04, $04, $04, $04,
   $05, $05, $05, $05, $05, $05, $05, $05,
   $06, $06, $06, $06, $06, $06, $06, $06,
   $07, $07, $07, $07, $07, $07, $07, $07,
   $08, $08, $08, $08, $08, $08, $08, $08,
   $09, $09, $09, $09, $09, $09, $09, $09,
   $0A, $0A, $0A, $0A, $0A, $0A, $0A, $0A,
   $0B, $0B, $0B, $0B, $0B, $0B, $0B, $0B,
   $0C, $0C, $0C, $0C, $0D, $0D, $0D, $0D,
   $0E, $0E, $0E, $0E, $0F, $0F, $0F, $0F,
   $10, $10, $10, $10, $11, $11, $11, $11,
   $12, $12, $12, $12, $13, $13, $13, $13,
   $14, $14, $14, $14, $15, $15, $15, $15,
   $16, $16, $16, $16, $17, $17, $17, $17,
   $18, $18, $19, $19, $1A, $1A, $1B, $1B,
   $1C, $1C, $1D, $1D, $1E, $1E, $1F, $1F,
   $20, $20, $21, $21, $22, $22, $23, $23,
   $24, $24, $25, $25, $26, $26, $27, $27,
   $28, $28, $29, $29, $2A, $2A, $2B, $2B,
   $2C, $2C, $2D, $2D, $2E, $2E, $2F, $2F,
   $30, $31, $32, $33, $34, $35, $36, $37,
   $38, $39, $3A, $3B, $3C, $3D, $3E, $3F);

  d_len : Array[0..255] of Byte =
  ($03, $03, $03, $03, $03, $03, $03, $03,
   $03, $03, $03, $03, $03, $03, $03, $03,
   $03, $03, $03, $03, $03, $03, $03, $03,
   $03, $03, $03, $03, $03, $03, $03, $03,
   $04, $04, $04, $04, $04, $04, $04, $04,
   $04, $04, $04, $04, $04, $04, $04, $04,
   $04, $04, $04, $04, $04, $04, $04, $04,
   $04, $04, $04, $04, $04, $04, $04, $04,
   $04, $04, $04, $04, $04, $04, $04, $04,
   $04, $04, $04, $04, $04, $04, $04, $04,
   $05, $05, $05, $05, $05, $05, $05, $05,
   $05, $05, $05, $05, $05, $05, $05, $05,
   $05, $05, $05, $05, $05, $05, $05, $05,
   $05, $05, $05, $05, $05, $05, $05, $05,
   $05, $05, $05, $05, $05, $05, $05, $05,
   $05, $05, $05, $05, $05, $05, $05, $05,
   $05, $05, $05, $05, $05, $05, $05, $05,
   $05, $05, $05, $05, $05, $05, $05, $05,
   $06, $06, $06, $06, $06, $06, $06, $06,
   $06, $06, $06, $06, $06, $06, $06, $06,
   $06, $06, $06, $06, $06, $06, $06, $06,
   $06, $06, $06, $06, $06, $06, $06, $06,
   $06, $06, $06, $06, $06, $06, $06, $06,
   $06, $06, $06, $06, $06, $06, $06, $06,
   $07, $07, $07, $07, $07, $07, $07, $07,
   $07, $07, $07, $07, $07, $07, $07, $07,
   $07, $07, $07, $07, $07, $07, $07, $07,
   $07, $07, $07, $07, $07, $07, $07, $07,
   $07, $07, $07, $07, $07, $07, $07, $07,
   $07, $07, $07, $07, $07, $07, $07, $07,
   $08, $08, $08, $08, $08, $08, $08, $08,
   $08, $08, $08, $08, $08, $08, $08, $08);

  getbuf : Word = 0;
  getlen : Byte = 0;
  putlen : Byte = 0;
  putbuf : Word = 0;
  TextSize : LongInt = 0;
  codesize : LongInt = 0;
  printcount : LongInt = 0;
  match_position : Integer = 0;
  match_length : Integer = 0;

Type
  FreqType = Array[0..T] of Word;
  FreqPtr = ^FreqType;
  PntrType = Array[0..pred(T + N_Char)] of Integer;
  pntrPtr = ^PntrType;
  SonType = Array[0..pred(T)] of Integer;
  SonPtr = ^SonType;
  TextBufType = Array[0..N + F - 2] of Byte;
  TBufPtr = ^TextBufType;
  WordRay = Array[0..N] of Integer;
  WordRayPtr = ^WordRay;
  BWordRay = Array[0..N + 256] of Integer;
  BWordRayPtr = ^BWordRay;

Var
  Text_buf : TBufPtr;
  lson, dad : WordRayPtr;
  rson : BWordRayPtr;
  freq : FreqPtr; { cumulative freq table }

{
 * pointing parent nodes.
 * area [T..(T + N_Char - 1)] are Pointers For leaves
 }
  prnt : pntrPtr;

  { pointing children nodes (son[], son[] + 1)}
  son : SonPtr;

  Procedure InitTree; { Initializing tree }
  Var
    i : Integer;
  begin
    For i := N + 1 to N + 256 do
      rson^[i] := NUL; { root }
    For i := 0 to N do
      dad^[i] := NUL; { node }
  end;

  Procedure InsertNode(R : Integer); { Inserting node to the tree }
  Var
    tmp, i, p, cmp : Integer;
    key : TBufPtr;
    c : Word;
  begin
    cmp := 1;
    key := @Text_buf^[R];
    p := succ(N) + key^[0];
    rson^[R] := NUL;
    lson^[R] := NUL;
    match_length := 0;
    While match_length < F do
      begin
        if (cmp >= 0) then
          begin
            if (rson^[p] <> NUL) then
              p := rson^[p]
            else
              begin
                rson^[p] := R;
                dad^[R] := p;
                Exit;
              end;
          end
        else
          begin
            if (lson^[p] <> NUL) then
              p := lson^[p]
            else
              begin
                lson^[p] := R;
                dad^[R] := p;
                Exit;
              end;
          end;
        i := 0;
        cmp := 0;
        While (i < F) and (cmp = 0) do
          begin
            inc(i);
            cmp := key^[i] - Text_buf^[p + i];
          end;
        if (i > THRESHOLD) then
          begin
            tmp := pred((R - p) and pred(N));
            if (i > match_length) then
              begin
                match_position := tmp;
                match_length := i;
              end;
            if (match_length < F) and (i = match_length) then
              begin
                c := tmp;
                if (c < match_position) then
                  match_position := c;
              end;
          end;
      end; { While True do }
    dad^[R] := dad^[p];
    lson^[R] := lson^[p];
    rson^[R] := rson^[p];
    dad^[lson^[p]] := R;
    dad^[rson^[p]] := R;
    if (rson^[dad^[p]] = p) then
      rson^[dad^[p]] := R
    else
      lson^[dad^[p]] := R;
    dad^[p] := NUL; { remove p }
  end;

  Procedure DeleteNode(p : Integer); { Deleting node from the tree }
  Var
    q : Integer;
  begin
    if (dad^[p] = NUL) then
      Exit; { unregistered }
    if (rson^[p] = NUL) then
      q := lson^[p]
    else if (lson^[p] = NUL) then
      q := rson^[p]
    else
      begin
        q := lson^[p];
        if (rson^[q] <> NUL) then
          begin
            Repeat
              q := rson^[q];
            Until (rson^[q] = NUL);
            rson^[dad^[q]] := lson^[q];
            dad^[lson^[q]] := dad^[q];
            lson^[q] := lson^[p];
            dad^[lson^[p]] := q;
          end;
        rson^[q] := rson^[p];
        dad^[rson^[p]] := q;
      end;
    dad^[q] := dad^[p];


    if (rson^[dad^[p]] = p) then
      rson^[dad^[p]] := q
    else
      lson^[dad^[p]] := q;
    dad^[p] := NUL;
  end;

  { Huffman coding parameters }

  Function GetBit(GetBytes : GetBytesProc) : Integer; { get one bit }
  Var
    i : Byte;
    i2 : Integer;
    result : Word;
  begin
    While (getlen <= 8) do
      begin
        GetBytes(i, 1, result);
        if result = 1 then
          i2 := i
        else i2 := 0;
        getbuf := getbuf or (i2 shl (8 - getlen));
        inc(getlen, 8);
      end;
    i2 := getbuf;
    getbuf := getbuf shl 1;
    dec(getlen);
    GetBit := Integer((i2 < 0));
  end;

  Function GetByte(GetBytes : GetBytesProc) : Integer; { get a Byte }
  Var
    j : Byte;
    i, result : Word;
  begin
    While (getlen <= 8) do
      begin
        GetBytes(j, 1, result);
        if result = 1 then
          i := j
        else
          i := 0;
        getbuf := getbuf or (i shl (8 - getlen));
        inc(getlen, 8);
      end;
    i := getbuf;
    getbuf := getbuf shl 8;
    dec(getlen, 8);
    GetByte := Integer(i shr 8);
  end;

  Procedure Putcode(l : Integer; c : Word;
                    PutBytes : PutBytesProc); { output c bits }
  Var
    Temp : Byte;
    Got : Word;
  begin
    putbuf := putbuf or (c shr putlen);
    inc(putlen, l);
    if (putlen >= 8) then
      begin
        Temp := putbuf shr 8;
        PutBytes(Temp, 1, Got);
        dec(putlen, 8);
        if (putlen >= 8) then
          begin
            Temp := lo(putbuf);
            PutBytes(Temp, 1, Got);
            inc(codesize, 2);
            dec(putlen, 8);
            putbuf := c shl (l - putlen);
          end
        else
          begin
            putbuf := putbuf shl 8;
            inc(codesize);
          end;
      end;
  end;

  { initialize freq tree }

  Procedure StartHuff;
  Var
    i, j : Integer;
  begin
    For i := 0 to pred(N_Char) do
      begin
        freq^[i] := 1;
        son^[i] := i + T;
        prnt^[i + T] := i;
      end;
    i := 0;
    j := N_Char;
    While (j <= R) do
      begin
        freq^[j] := freq^[i] + freq^[i + 1];
        son^[j] := i;
        prnt^[i] := j;
        prnt^[i + 1] := j;
        inc(i, 2);
        inc(j);
      end;
    freq^[T] := $ffff;
    prnt^[R] := 0;
  end;

  { reConstruct freq tree }

  Procedure reConst;
  Var
    i, j, k, tmp : Integer;
    F, l : Word;
  begin
    { halven cumulative freq For leaf nodes }
    j := 0;
    For i := 0 to pred(T) do
      begin
        if (son^[i] >= T) then
          begin
            freq^[j] := succ(freq^[i]) div 2; {@@ Bug Fix MOD -> div @@}
            son^[j] := son^[i];
            inc(j);
          end;
      end;
    { make a tree : first, connect children nodes }
    i := 0;
    j := N_Char;
    While (j < T) do
      begin
        k := succ(i);
        F := freq^[i] + freq^[k];
        freq^[j] := F;
        k := pred(j);
        While F < freq^[k] do
          dec(k);
        inc(k);
        l := (j - k) shl 1;
        tmp := succ(k);
        move(freq^[k], freq^[tmp], l);
        freq^[k] := F;
        move(son^[k], son^[tmp], l);
        son^[k] := i;
        inc(i, 2);
        inc(j);
      end;
    { connect parent nodes }
    For i := 0 to pred(T) do
      begin
        k := son^[i];
        if (k >= T) then
          begin
            prnt^[k] := i;
          end
        else
          begin
            prnt^[k] := i;
            prnt^[succ(k)] := i;
          end;
      end;
  end;

  { update freq tree }

  Procedure update(c : Integer);
  Var
    i, j, k, l : Integer;
  begin
    if (freq^[R] = MAX_FREQ) then
      begin
        reConst;
      end;
    c := prnt^[c + T];
    Repeat
      inc(freq^[c]);
      k := freq^[c];
      { swap nodes to keep the tree freq-ordered }
      l := succ(c);
      if (k > freq^[l]) then
        begin
          While (k > freq^[l]) do
            inc(l);
          dec(l);
          freq^[c] := freq^[l];
          freq^[l] := k;
          i := son^[c];
          prnt^[i] := l;
          if (i < T) then prnt^[succ(i)] := l;
          j := son^[l];
          son^[l] := i;
          prnt^[j] := c;
          if (j < T) then prnt^[succ(j)] := c;
          son^[c] := j;
          c := l;
        end;
      c := prnt^[c];
    Until (c = 0); { Repeat it Until reaching the root }
  end;

Var
  code, len : Word;

  Procedure EncodeChar(c : Word; PutBytes : PutBytesProc);
  Var
    i : Word;
    j, k : Integer;
  begin
    i := 0;
    j := 0;
    k := prnt^[c + T];
    { search connections from leaf node to the root }
    Repeat
      i := i shr 1;
 {
        if node's address is odd, output 1
        else output 0
        }
      if Boolean(k and 1) then inc(i, $8000);
      inc(j);
      k := prnt^[k];
    Until (k = R);
    Putcode(j, i, PutBytes);
    code := i;
    len := j;
    update(c);
  end;

  Procedure EncodePosition(c : Word; PutBytes : PutBytesProc);
  Var
    i, j : Word;
  begin
    { output upper 6 bits With encoding }
    i := c shr 6;
    j := p_code[i];
    Putcode(p_len[i], j shl 8, PutBytes);
    { output lower 6 bits directly }
    Putcode(6, (c and $3f) shl 10, PutBytes);
  end;

  Procedure Encodeend(PutBytes : PutBytesProc);
  Var
    Temp : Byte;
    Got : Word;
  begin
    if Boolean(putlen) then
      begin
        Temp := lo(putbuf shr 8);
        PutBytes(Temp, 1, Got);
        inc(codesize);
      end;
  end;

  Function DecodeChar(GetBytes : GetBytesProc) : Integer;
  Var
    c : Word;
  begin
    c := son^[R];
    {
     * start searching tree from the root to leaves.
     * choose node #(son[]) if input bit = 0
     * else choose #(son[]+1) (input bit = 1)
    }
    While (c < T) do
      begin
        c := c + GetBit(GetBytes);
        c := son^[c];
      end;
    c := c - T;
    update(c);
    DecodeChar := Integer(c);
  end;

  Function DecodePosition(GetBytes : GetBytesProc) : Word;
  Var
    i, j, c : Word;
  begin
    { decode upper 6 bits from given table }
    i := GetByte(GetBytes);
    c := Word(d_code[i] shl 6);
    j := d_len[i];
    { input lower 6 bits directly }
    dec(j, 2);
    While j <> 0 do
      begin
        i := (i shl 1) + GetBit(GetBytes);
        dec(j);
      end;
    DecodePosition := c or i and $3f;
  end;

  { Compression }

  Procedure InitLZH;
  begin
    getbuf := 0;
    getlen := 0;
    putlen := 0;
    putbuf := 0;
    TextSize := 0;
    codesize := 0;
    printcount := 0;
    match_position := 0;
    match_length := 0;
    new(lson);
    new(dad);
    new(rson);
    new(Text_buf);
    new(freq);
    new(prnt);
    new(son);
  end;

  Procedure endLZH;
  begin
    dispose(son);
    dispose(prnt);
    dispose(freq);
    dispose(Text_buf);
    dispose(rson);
    dispose(dad);
    dispose(lson);
  end;

  Procedure LZHPack(Var Bytes_Written : LongInt;
                        GetBytes : GetBytesProc;
                        PutBytes : PutBytesProc);
  Var
    ct : Byte;
    i, len, R, s, last_match_length : Integer;
    Got : Word;
  begin
    InitLZH;
    TextSize := 0; { rewind and rescan }
    StartHuff;
    InitTree;
    s := 0;
    R := N - F;
    fillChar(Text_buf^[0], R, ' ');
    len := 0;
    Got := 1;
    While (len < F) and (Got <> 0) do
      begin
        GetBytes(ct, 1, Got);
        if Got <> 0 then
          begin
            Text_buf^[R + len] := ct;
            inc(len);
          end;
      end;
    TextSize := len;
    For i := 1 to F do
      InsertNode(R - i);
    InsertNode(R);
    Repeat
      if (match_length > len) then
        match_length := len;
      if (match_length <= THRESHOLD) then
        begin
          match_length := 1;
          EncodeChar(Text_buf^[R], PutBytes);
        end
      else
        begin
          EncodeChar(255 - THRESHOLD + match_length, PutBytes);
          EncodePosition(match_position, PutBytes);
        end;
      last_match_length := match_length;
      i := 0;
      Got := 1;
      While (i < last_match_length) and (Got <> 0) do
        begin
          GetBytes(ct, 1, Got);
          if Got <> 0 then
            begin
              DeleteNode(s);
              Text_buf^[s] := ct;
              if (s < pred(F)) then
                Text_buf^[s + N] := ct;
              s := succ(s) and pred(N);
              R := succ(R) and pred(N);
              InsertNode(R);
              inc(i);
            end;
        end;
      inc(TextSize, i);
      While (i < last_match_length) do
        begin
          inc(i);
          DeleteNode(s);
          s := succ(s) and pred(N);
          R := succ(R) and pred(N);
          dec(len);
          if Boolean(len) then InsertNode(R);
        end;
    Until (len <= 0);
    Encodeend(PutBytes);
    endLZH;
    Bytes_Written := TextSize;
  end;

  Procedure LZHUnpack(TextSize : LongInt;
                      GetBytes : GetBytesProc;
                      PutBytes : PutBytesProc);
  Var
    c, i, j, k, R : Integer;
    c2, a : Byte;
    count : LongInt;
    Put : Word;
  begin
    InitLZH;
    StartHuff;
    R := N - F;
    fillChar(Text_buf^[0], R, ' ');
    count := 0;
    While count < TextSize do
      begin
        c := DecodeChar(GetBytes);
        if (c < 256) then
          begin
            c2 := lo(c);
            PutBytes(c2, 1, Put);
            Text_buf^[R] := c;
            inc(R);
            R := R and pred(N);
            inc(count);
          end
        else
          begin
            i := (R - succ(DecodePosition(GetBytes))) and pred(N);
            j := c - 255 + THRESHOLD;
            For k := 0 to pred(j) do
              begin
                c := Text_buf^[(i + k) and pred(N)];
                c2 := lo(c);
                PutBytes(c2, 1, Put);
                Text_buf^[R] := c;
                inc(R);
                R := R and pred(N);
                inc(count);
              end;
          end;
      end;
    endLZH;
  end;
end.

 

Share this article!

Follow us!

Find more helpful articles: