; Huffman compression macro: bitRAKE, 2008 Jan 05, Initial release
;

macro numdisplay num,base,digs,disp{
  local ..a,..b
  virtual at 0
    db '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz@%'
    dq num
  repeat digs
    load ..a qword from $-8
    dq ..a/base
  end repeat
  repeat digs
    load ..a qword from $-8*(1+%)
    ..b=..a mod base
    load ..a byte from ..b
    display ..a
  end repeat
  end virtual
  display disp
}


virtual at 8

  PRESS_ME

  len = ($-$$)
  numdisplay len,10,6,<9,": bytes to compress",13,10>

  ; symbol data structure size
  HUFF=16
  ; rd frequency count / bits to encode
  ; rq precomputed code bits
  ; rw ?
  ; rb tree node of leaf
  ; rb value of symbol
  huff dq 258 dup (0,0) ; couple extra needed dummy
  tree dd 256 dup (-1)
  pack db len dup 0

  ; init structures
  i=0
  while i<256
    store byte i at huff+HUFF*i+15
    i=i+1
  end while

  ; count byte frequency
  i=0
  while len>i
    load a byte from $$+i
    load b dword from huff+HUFF*a
    b=b+1
    store dword b at huff+HUFF*a
    i=i+1
  end while

  ; sort symbols by frequency (slow bubble)
  i=0
  while i<255
    load a qword from huff+HUFF*i
    load b qword from huff+HUFF*i+8
    j=256
    while j>i
      j=j-1
      load c qword from huff+HUFF*j
      if a>c
	load d qword from huff+HUFF*j+8
	store qword a at huff+HUFF*j
	store qword b at huff+HUFF*j+8
	a=c
	b=d
      end if
    end while
    store qword a at huff+HUFF*i
    store qword b at huff+HUFF*i+8
    i=i+1
  end while

  ; ignore unused symbols
  i=0
  j=0
  while j=0
    load j dword from huff+HUFF*i
    i=i+1
  end while
  n=257-i
  A=huff+HUFF*(256-n)
  numdisplay n,10,6,<9,": symbols used",13,10>

  ; determine code bits, see: Alistair Moffat's http://www.cs.mu.oz.au/~alistair/inplace.c
  load a dword from A
  load b dword from A+HUFF
  store dword (a+b) at A
  root=0
  leaf=2
  next=1
  while next < n-1
    load A_root dword from A+HUFF*root
    load A_leaf dword from A+HUFF*leaf
    if (leaf>=n)|(A_root<A_leaf)
      store dword A_root at A+HUFF*next
      store dword next at A+HUFF*root
      root=root+1
    else
      store dword A_leaf at A+HUFF*next
      leaf=leaf+1
    end if

    load A_root dword from A+HUFF*root
    load A_leaf dword from A+HUFF*leaf
    load A_next dword from A+HUFF*next
    if (leaf>=n)|((root<next)&(A_root<A_leaf))
      store dword A_root+A_next at A+HUFF*next
      store dword next at A+HUFF*root
      root=root+1
    else
      store dword A_leaf+A_next at A+HUFF*next
      leaf=leaf+1
    end if

    next=next+1
  end while

  store dword 0 at A+HUFF*(n-2)
  next=n-2
  while next>0
    next=next-1
    load a dword from A+HUFF*next
    load a dword from A+HUFF*a
    store dword a+1 at A+HUFF*next
  end while

  avbl=1
  uzed=0
  dpth=0
  root=n-2
  next=n-1
  while avbl>0
    load A_root dword from A+HUFF*root
    while (root>=0)&(A_root=dpth)
      uzed=uzed+1
      root=root-1
      load A_root dword from A+HUFF*root
    end while

    while avbl>uzed
      store dword dpth at A+HUFF*next
      next=next-1
      avbl=avbl-1
    end while

    avbl=2*uzed
    dpth=dpth+1
    uzed=0
  end while

  ; pre-compute code bits (speeds compression)
  load a dword from A+HUFF*(n-1)
  b=1 shl a
  i=n
  while i>0
    i=i-1
    load c dword from A+HUFF*i
    while c>a
      b=b shl 1
      a=a+1
    end while
    b=b-1
    store qword b at A+HUFF*i+4
  end while

  ; sort: code-lenth, byte value
  ; (larger value of code pairs gets code with lsb=1)
  i=0
  while i<n
    load a dword from A+HUFF*i
    b=a
    j=i
    while a=b
      j=j+1
      load b dword from A+HUFF*j
    end while
    j=j-i
    ; sort j items by output byte (slow bubble)
    k=i
    while k<i+j-1
      load a byte from A+HUFF*k+15
      h=i+j
      while h>k
	h=h-1
	load b byte from A+HUFF*h+15
	if b>a
	  store byte a at A+HUFF*h+15
	  a=b
	end if
      end while
      store byte a at A+HUFF*k+15
      k=k+1
    end while
    ; swap pairs?
    load a byte from A+HUFF*i+4
    if 0=(a and 1)
      k=i
      while k<i+j-1
	load a byte from A+HUFF*k+15
	load b byte from A+HUFF*(k+1)+15
	store byte b at A+HUFF*k+15
	store byte a at A+HUFF*(k+1)+15
	k=k+2
      end while
    end if
    i=i+j
  end while


  ; display codes
  i=n
  while i>0
    i=i-1
    load a byte from A+HUFF*i+15
    load c dword from A+HUFF*i
    load b qword from A+HUFF*i+4
    numdisplay a,16,2,<":">
    numdisplay b,2,c,<13,10>
  end while

  ; generate tree (need to flip encode bits for rotations?)
  ; start with highest value byte pairs
  ;   place at highest free slot in tree
  ; migrate values to minimize tree size (to center 80 value)
  ; update branches (filling holes) to reach leaves
  i=256
  while i>0
    i=i-1
    j=0
    while j<n
      load a byte from A+HUFF*j+15
      if a=i
	load b byte from A+HUFF*j+14
	if b<>0
	  break
	end if
	; is sibling pair a branch of leaf?

; a = pos-val-CF
; if val>pos CF is set and node is a branch

	; lowest position in tree=a with val 00-CF
	; pos-val-CF=a
	; val<=pos-CF
	; 256-a
	load b byte from A+HUFF*j+4
	load c dword from TREE+4*a



	break
      end if
      j=j+1
    end while
  end while



  ; finally, compress
  ; load bytes from low
  ; store bits high
  q=len
  p=0
  r=0
  i=0
  while i<len
    load a byte from $$+i
    j=n+1
    while a<>b
      j=j-1
      load b byte from A+HUFF*j+15
    end while
    load d dword from A+HUFF*j
    load c qword from A+HUFF*j+4
    r=(r shl d) or c
    p=p+d
    while p>8
      q=q-1
      p=p-8
      store byte $FF and (r shr p) at pack+q
    end while
    i=i+1
  end while
  if p>0
      q=q-1
      p=8-p
      store byte $FF and (r shl p) at pack+q
  end if
  p=8*q-p

  ; shift code bits down
  ; try to shift into table (rare event due to byte granularity)

  numdisplay p,10,6,<9,": code bits",13,10>
  numdisplay (len-q),10,6,<9,": compressed bytes",13,10>
  numdisplay (tre-start),10,6,<9,": decompressor",13,10>

  rept 200 i:0 { load a#i byte from tree+i }
  rept 200 i:0 { load b#i byte from pack+i }

  macro TreeBlock { rept 100 i:0 \{ db a\#i \} }
  macro DataBlock { rept 200 i:0 \{ db b\#i \} }
end virtual


PACKBITS=$7FF0
NODE0=tre-start ;-UNUSED


