title Lempel-Ziv Compressor ; $Source: /usr/home/dhesi/zoo/RCS/lzc.asm,v $ ; $Id: lzc.asm,v 1.4 91/07/07 09:36:18 dhesi Exp $ ;Derived from Tom Pfau's public domain assembly code. ;The contents of this file are hereby released to the public domain. ; -- Rahul Dhesi 1988/08/24 UNBUF_IO equ 1 ;use unbuffered I/O public _lzc include asmconst.ai include macros.ai check_gap equ 4000 ;Check ratio every so many bits scale_limit equ 32000 ;scale down if bitsout > scale_limit rat_thresh equ 0ffffh ;don't reset if rat > rat_thresh ;Hash table entry hash_rec struc first dw ? ; First entry with this value next dw ? ; Next entry along chain char db ? ; Suffix char hash_rec ends extrn docrc:near ;Procedure for block CRC in lzd.asm ifdef UNBUF_IO extrn _read:near extrn _blockwrite:near else extrn _zooread:near extrn _zoowrite:near endif ;Declare Segments _text segment byte public 'code' _text ends dgroup group _data assume ds:dgroup,es:dgroup _data segment word public 'data' extrn _out_buf_adr:word ;address of C output buffer extrn _in_buf_adr:word ;address of C input buffer extrn memflag:byte ;got memory? flag save_sp dw ? bytesin dw ? ;count of bytes read bitsout dw ? ;count of bits written ratio dw ? ;recent ratio ratflag db ? ;flag to remind us to check ratio bit_interval dw ? ;interval at which to test ratio input_handle dw ? output_handle dw ? hash_seg dw ? prefix_code dw ? free_code dw ? max_code dw ? nbits dw ? k db ? bit_offset dw ? input_offset dw 0 input_size dw 0 _data ends memory segment para public 'memory' hash label hash_rec memory ends add_code macro local ac1,ac2,ac3 mov bx,free_code ;Get code to use push ds ;point to hash table mov ds,hash_seg or di,di ;First use of this prefix? jz ac1 ;zero means yes mov [si].next,bx ;point last use to new entry jmp short ac2 ac1: mov [si].first,bx ;Point first use to new entry ac2: cmp bx,maxmax ;Have we reached code limit? je ac3 ;equal means yes, just return ;call index ;get address of new entry call_index ;macro for speed mov [si].first,-1 ;initialize pointers mov [si].next,-1 mov [si].char,al ;save suffix char inc es:free_code ;adjust next code ac3: pop ds ;restore seg reg endm read_char macro ;Macro for speed local m$1,m$2,m$3,m$4 inc [bytesin] ;Maintain input byte count for ratio mov di,input_offset ;Anything left in buffer? cmp di,input_size jb m$1 ;less means yes mov cx,inbufsiz call doread ;read block cmp ax,-1 jnz m$3 jmp IO_err ;input error m$3: mov dx,_in_buf_adr ;for docrc call docrc or ax,ax ;Anything left? jz m$2 ;zero means no, finished mov input_size,ax ;Save bytes read mov input_offset,0 ;Point to beginning of buffer mov di,0 m$1: mov si,_in_buf_adr add si,di lodsb ;Read it in inc input_offset ;Adjust pointer clc ;Success jmp short m$4 m$2: stc ;Nothing left m$4: endm ;Start writing code _text segment assume cs:_text,ds:dgroup,es:dgroup,ss:nothing _lzc proc near push bp ;Standard C entry code mov bp,sp push di push si push ds ;Save ds to be sure mov bx,ds mov es,bx ;Get two parameters, both integers, that are input file handle and ;output file handle mov ax,[bp+4] mov [input_handle],ax mov ax,[bp+6] mov [output_handle],ax call compress ;Compress file ;Status received back in AX pop ds pop si ;Standard C return code pop di mov sp,bp pop bp ret _lzc endp compress proc near mov [save_sp],sp ;Save SP in case of error return ;Initialize variables for serial re-entrancy mov [bit_offset],0 mov [input_offset],0 mov [input_size],0 test memflag,0ffH ;Memory allocated? jnz gotmem ;If allocated, continue malloc <((maxmax * 5) / 16 + 20)> ;allocate it jnc here1 jmp MALLOC_err1 here1: mov hash_seg,ax ;Save segment address of mem block mov memflag,0ffh ;Set flag to remind us later gotmem: l1: call init_table ;Initialize the table and some vars mov ax,clear ;Write a clear code call write_code ;call read_char ;Read first char read_char ;macro for speed l4: ;new code to check compression ratio test [ratflag],0FFH ;Time to check ratio? jz rd$1 ;Skip checking if zero call check_ratio rd$1: xor ah,ah ;Turn char into code l4a: mov prefix_code,ax ;Set prefix code ;call read_char ;Read next char read_char ;macro for speed jc l17 ;Carry means eof mov k,al ;Save char in k mov bx,prefix_code ;Get prefix code call lookup_code ;See if this pair in table jnc l4a ;nc means yes, new code in ax ;call add_code ;Add pair to table add_code ;Macro for speed push bx ;Save new code mov ax,prefix_code ;Write old prefix code call write_code pop bx mov al,k ;Get last char cmp bx,max_code ;Exceed code size? jnb l4$ jmp l4 l4$: cmp nbits,maxbits ;Currently less than maxbits? jb l14 ;yes mov ax,clear ;Write a clear code call write_code call init_table ;Reinit table mov al,k ;get last char jmp l4 ;Start over l14: inc nbits ;Increase number of bits shl max_code,1 ;Double max code size jmp l4 ;Get next char l17: mov ax,prefix_code ;Write last code call write_code mov ax,eof ;Write eof code call write_code mov ax,bit_offset ;Make sure buffer is flushed to file cmp ax,0 je OK_ret mov dx,ax ;dx <- ax shr ax,1 shr ax,1 shr ax,1 ;ax <- ax div 8 and dx,07 ;dx <- ax mod 8 ;If extra bits, make sure they get je l17a ;written inc ax l17a: call flush OK_ret: xor ax,ax ;Normal return -- compressed ret IO_err: mov ax,2 ;I/O error return mov sp,[save_sp] ;Restore stack pointer ret MALLOC_err1: ;hash table alloc error mov ax,1 ;Malloc error return mov sp,[save_sp] ;Restore stack pointer ret compress endp init_table proc near mov [bytesin],0 ;Input byte count mov [bitsout],0 ;Output bit count mov [ratio],0 mov [ratflag],0 mov [bit_interval],check_gap mov nbits,9 ;Set code size to 9 mov max_code,512 ;Set max code to 512 push es ;Save seg reg mov es,hash_seg ;Address hash table mov ax,-1 ;Unused flag mov cx,640 ;Clear first 256 entries mov di,offset hash ;Point to first entry rep stosw ;Clear it out pop es ;Restore seg reg mov free_code,first_free ;Set next code to use ret ;done init_table endp write_code proc near push ax ;Save code mov cx,nbits add [bitsout],cx ;Maintain output bit count for ratio sub [bit_interval],cx jg rd$2 ;OK if not reached interval mov [ratflag],1 ;else set flag -- check ratio soon rd$2: mov ax,bit_offset ;Get bit offset mov cx,nbits ;Adjust bit offset by code size add bit_offset,cx mov dx,ax ;dx <- ax shr ax,1 shr ax,1 shr ax,1 ;ax <- ax div 8 and dx,07 ;dx <- ax mod 8 ;Now ax contains byte offset and ;dx contains bit offset within that byte (I think) cmp ax,outbufsiz-4 ;Approaching end of buffer? jb wc1 ;less means no call flush ;Write the buffer push dx ;dx contains offset within byte add dx,nbits ;adjust by code size mov bit_offset,dx ;new bit offset pop dx ;restore dx ;ax is an index into output buffer. Next, ax <- address of buffer[ax] add ax,[_out_buf_adr] mov si,ax ;put in si mov al,byte ptr [si] ;move byte to first position ;put value of al into first byte of output buffer push bx mov bx,[_out_buf_adr] mov [bx],al pop bx xor ax,ax ;Byte offset of zero wc1: add ax,[_out_buf_adr] ;Point into buffer mov di,ax ;Destination pop ax ;Restore code mov cx,dx ;offset within byte xor dx,dx ;dx will catch bits rotated out jcxz wc3 ;If offset in byte is zero, skip shift wc2: shl ax,1 ;Rotate code rcl dx,1 loop wc2 or al,byte ptr [di] ;Grab bits currently in buffer wc3: stosw ;Save data mov al,dl ;Grab extra bits stosb ;and save ret write_code endp flush proc near push ax ;Save all registers push bx ;AX contains number of bytes to write push cx push dx push si ;may not be necessary to save si & di push di push ax ;save byte count push ax ;byte count push _out_buf_adr ;buffer address push output_handle ;zoofile ifdef UNBUF_IO call _blockwrite else call _zoowrite endif add sp,6 pop cx ;recover byte count cmp ax,cx jz here2 jmp IO_err ;I/O error here2: pop di pop si pop dx pop cx pop bx pop ax ret flush endp lookup_code proc near push ds ;Save seg reg mov ds,hash_seg ;point to hash table ;call index ;convert code to address call_index ;macro for speed mov di,0 ;flag mov bx,[si].first cmp bx,-1 ;Has this code been used? je gc4 ;equal means no inc di ;set flag gc2: ;call index ;convert code to address call_index ;macro for speed cmp [si].char,al ;is char the same? jne gc3 ;ne means no clc ;success mov ax,bx ;put found code in ax pop ds ;restore seg reg ret ;done gc3: mov bx,[si].next cmp bx,-1 ;More left with this prefix? je gc4 ;equal means no jmp gc2 ;try again gc4: stc ;not found pop ds ;restore seg reg ret ;done lookup_code endp comment # index proc near mov si,bx ;si = bx * 5 (5 byte hash entries) shl si,1 ;si = bx * 2 * 2 + bx shl si,1 add si,bx ret index endp # end comment check_ratio proc near push ax ; mov dl,'*' ;'*' printed means checking ratio ; call sendout ;Getting ready to check ratios. If bitsout is over scale_limit, ;then we scale down both bitsout and bytesin by a factor ;of 4. This will avoid overflow. mov cx,[bitsout] cmp cx,scale_limit jb scale_ok mov cl,2 shr [bytesin],cl shr [bitsout],cl scale_ok: ;Note MASM bug: "mov ah,high [bytesin]" and ;"mov al,low [bytesin]" don't work. mov ah,byte ptr [bytesin] mov dl,byte ptr [bytesin+1] sub al,al sub dh,dh ;dx:ax = 8 * bitsin = 256 * bytesin mov cx,[bitsout] ;cx <- bitsout or cx,cx ;Division by zero? jnz candivide ;No -- go ahead divide mov ax,0FFFFH ;yes -- assume max poss value jmp short divided candivide: ;Calculate cx as (bytesin * 256) / bitsout = bitsin * 8 / bitsout div cx ;ax <- rat <- dx:ax / cx shl ax,1 shl ax,1 ;rat <- 4 * bytes_in / bytes_out divided: ;Enter here with ax = rat = bitsin / bitsout. ; call print_data ;print info for debugging ;If rat > rat_thresh then ratio is good; do not reset table ; cmp ax,rat_thresh ; ja ratdone ;Compare rat against ratio mov bx,ax ;save rat in bx cmp ax,[ratio] ;cmp rat,ratio jb ratless ;trouble if ratio is now smaller mov ax,[ratio] call mul7 ;ax <- 7 * ratio add ax,bx ;ax = 7 * ratio + rat shr ax,1 shr ax,1 shr ax,1 ;ax = (7 * ratio + rat) / 8 mov [ratio],ax ;ratio = (7 * ratio + rat) / 8 jmp short ratdone ratless: ;ratio is going down, so... mov [bytesin],0 mov [bitsout],0 ; mov dl,'#' ;'#' printed means table reset ; call sendout mov ax,clear ;Write a clear code call write_code call init_table ;Reinit table ratdone: mov [ratflag],0 mov [bit_interval],check_gap pop ax ret check_ratio endp comment # sendout proc near ;char in dl send to console push ax mov ah,02 int 21H pop ax ret sendout endp # end comment mul7 proc near ;multiply ax by 7 push dx mov dx,7 mul dx ;dx:ax <- 7 * ax pop dx ret mul7 endp comment # mul3 proc near ;multiply ax by 3 push dx mov dx,3 mul dx ;dx:ax <- 3 * ax pop dx ret mul3 endp # end comment comment # mul1_125 proc near ;multiply ax by 1.125 push bx mov bx,ax shr bx,1 shr bx,1 shr bx,1 ;bx = n / 8 add ax,bx ;ax <- n + n / 8 pop bx ret mul1_125 endp # end comment comment # print_data proc near ;Debugging -- print bytesin, bitsout, rat, and ratio push ax push bx push cx push dx push ax ;print rat call _prtint add sp,2 push [ratio] ;print ratio call _prtint add sp,2 push [bytesin] call _prtint add sp,2 push [bitsout] call _prtint add sp,2 pop dx pop cx pop bx pop ax ret print_data endp # end comment ;doread reads cx characters and stores them in input buffer ;return value from zooread is returned in ax doread proc near ;reads block push bx push cx push dx push si push di push cx ;byte count push _in_buf_adr ;buffer address push input_handle ;zoofile ifdef UNBUF_IO call _read else call _zooread endif add sp,6 pop di pop si pop dx pop cx pop bx ret doread endp _text ends end