RNC1 optimizations

This commit is contained in:
Mr-Wiseguy
2021-03-28 15:54:08 -04:00
parent 359ba19531
commit 0fdb2d0ace

View File

@@ -121,9 +121,8 @@ Propack_UnpackM1:
/* Set up entry parameters, the RNC code always uses a0, a1 for */
/* the I/O datastream and we must use R3K registers A0,A1 */
move xa0,a0 /* move input,a0 */
move xa1,a1 /* move output,a1 */
jal pp_Unpack /* bsr pp_Unpack */
nop
move xa1,a1 /* move output,a1 */
/*
Restore previously saved registers here
@@ -150,9 +149,8 @@ Propack_UnpackM1:
lw $23,76($sp)
lw $31,80($sp)
add $sp, $sp, 88
j ra /* rts */
nop
add $sp, $sp, 88
pp_Unpack:
sub const_ff,zero,1 /* Set up constant 0xff */
and const_ff,const_ff,0x00ff
@@ -161,13 +159,11 @@ pp_Unpack:
srl const_8000,const_80000000,16 /* Set up constant 0x8000 */
nor const_ffffff00,zero,const_ff /* Set up constant 0xffffff00 */
sub $sp,$sp,BUFSIZE /* sub.w #bufsize,sp */
move xa2,$sp /* move.w sp,a2 */
sub $sp,$sp,8 /* Preserve link address for return*/
sub $sp,$sp,8+BUFSIZE /* sub.w #bufsize,sp, store link address */
add xa2,$sp,8 /* move.w sp,a2 */
sw ra,($sp)
add xa0,xa0,4 /* addq #4,a0 (skip past comp'd length) */
jal read_long /* bsr read_long (get uncomp'd length) */
nop
lw d0, 4(xa0) /* get uncomp'd length */
add xa0,xa0,8 /* skip past comp'd and uncomp'd lengths */
add input,xa0,HEADER_LEN-8 /* lea HEADER_LEN-8(a0),input (skip past rest of header) */
move output,xa1 /* move.l a1,output */
lbu bit_buffer,1(input) /* move.b 1(input),bit_buffer */
@@ -177,82 +173,56 @@ pp_Unpack:
or bit_buffer,bit_buffer,tmp /* " " and or it in to the bit_buffer */
add output_hi,output,d0 /* lea (output,d0.l),output_hi */
add d0,zero,2 /* moveq.l #2,d0 */
add d1,zero,2 /* moveq.l #2,d1 */
jal input_bits /* bsr input_bits */
nop
lw ra,($sp) /* Restore from linked jumps */
add $sp,$sp,8
add d1,zero,2 /* moveq.l #2,d1 */
unpack2:
sub $sp,$sp,8 /* Preserve link address for return*/
sw ra,($sp)
/* */
/* Fill the hufftables with a set value so we can see if they work */
/* */
move xa0,xa2 /* move.l a2,a0 */
jal make_huftable /* bsr make_huftable */
nop
add xa0,xa2,POS_TABLE /* lea POS_TABLE(a2),a0 */
move xa0,xa2 /* move.l a2,a0 */
jal make_huftable /* bsr make_huftable */
nop
add xa0,xa2,LEN_TABLE /* lea LEN_TABLE(a2),a0 */
add xa0,xa2,POS_TABLE /* lea POS_TABLE(a2),a0 */
jal make_huftable /* bsr make_huftable */
nop
add xa0,xa2,LEN_TABLE /* lea LEN_TABLE(a2),a0 */
add d0,zero,-1 /* moveq.l #-1,d0 */
add d1,zero,16 /* moveq.l #16,d1 */
jal input_bits /* bsr input_bits */
nop
lw ra,($sp) /* Restore from linked jumps */
add $sp,$sp,8
add d1,zero,16 /* moveq.l #16,d1 */
move counts,d0 /* move.w d0,counts */
sub tmp,zero,1
srl tmp,tmp,16
sub counts,counts,1 /* subq.w #1,counts */
and counts,tmp,counts
j unpack5 /* bra.s unpack5 */
nop
and counts,tmp,counts
unpack3:
add xa0,xa2,POS_TABLE /* lea POS_TABLE(a2),a0 */
sub $sp,$sp,8 /* Preserve link address for return*/
sw ra,($sp)
move d0,zero /* moveq.l #0,d0 */
jal input_value /* bsr.s input_value */
nop
move d0,zero /* moveq.l #0,d0 */
subu d0,zero,d0 /* neg.l d0 */
add xa1,output,d0 /* lea -1(output,d0.l),a1 */
add xa1,xa1,-1 /* " " */
add xa0,xa2,LEN_TABLE /* lea LEN_TABLE(a2),a0 */
jal input_value /* bsr.s input_value */
nop
lw ra,($sp) /* Restore from linked jumps */
add $sp,$sp,8
add xa0,xa2,LEN_TABLE /* lea LEN_TABLE(a2),a0 */
add output,output,1 /* move.b (a1)+,(output)+ */
lbu tmp,(xa1) /* " " */
add xa1,xa1,1 /* " " */
sb tmp,-1(output) /* " " */
addu tmp2,xa1,d0
.global unpack4
unpack4:
add output,output,1 /* move.b (a1)+,(output)+ */
lbu tmp,(xa1) /* " " */
add xa1,xa1,1 /* " " */
sb tmp,-1(output) /* " " */
bne d0,zero,unpack4 /* dbra d0,unpack4 */
sub d0,d0,1
bne xa1,tmp2,unpack4 /* dbra d0,unpack4 */
add xa1,xa1,1
unpack5:
sub $sp,$sp,8 /* Preserve link address for return*/
sw ra,($sp)
move xa0,xa2 /* move.l a2,a0 */
jal input_value /* bsr.s input_value */
nop
lw ra,($sp) /* Restore from linked jumps */
add $sp,$sp,8
move xa0,xa2 /* move.l a2,a0 */
sub d0,d0,1 /* subq.w #1,d0 */
bltz d0,unpack6 /* bmi.s unpack6 */
nop
sub $sp,$sp,8 /* Preserve link address for return*/
sw ra,($sp)
nop
jal getrawREP /* getrawREP */
nop /* */
lw ra,($sp) /* Restore from linked jumps */
add $sp,$sp,8
nop
lbu tmp,1(input) /* move.b 1(input),d0 */
lbu d0,0(input) /* move.b (input),d0 */
sll tmp,tmp,8 /* rol.w #8,d0 */
@@ -265,69 +235,46 @@ unpack5:
and bit_buffer,bit_buffer,d1 /* and.l d1,bit_buffer */
or bit_buffer,d0,bit_buffer /* or.l d0,bit_buffer */
unpack6:
bne counts,zero,unpack3 /* dbra counts,unpack3 */
sub counts,counts,1
bne counts,zero,unpack3 /* dbra counts,unpack3 */
sub counts,counts,1
sub tmp,output,output_hi /* cmp.l output_hi,output */
bltz tmp,unpack2 /* bcs.s unpack2 */
nop /* " " */
add $sp, $sp, BUFSIZE /* lea BUFSIZE(sp),sp */
nop /* " " */
lw ra,($sp) /* Restore from linked jumps */
jr ra /* rts */
nop
add $sp, $sp, 8 + BUFSIZE /* lea BUFSIZE(sp),sp */
input_value:
lbu d0,0(xa0) /* move.w (a0)+,d0 */
lbu tmp,1(xa0) /* " " */
add xa0,xa0,2 /* " " */
sll d0,d0,8 /* " " */
or d0,tmp,d0 /* " " */
lhu d0,0(xa0) /* move.w (a0)+,d0 */
and d0,d0,bit_buffer /* and.w bit_buffer,d0 */
lbu tmp,0(xa0) /* sub.w (a0)+,d0 */
lbu tmp2,1(xa0) /* " " */
add xa0,xa0,2 /* " " */
sll tmp,tmp,8 /* " " */
or tmp,tmp,tmp2 /* " " */
lhu tmp,2(xa0) /* move.w (a0)+,d0 */
sub d0,d0,tmp /* " " */
bne d0,zero,input_value /* bne.s input_value */
nop
add xa0,xa0,4 /* " " */
lbu d1,16*4-4(xa0) /* move.b 16*4-4(a0),d1 */
nop
sub bit_count,bit_count,d1 /* sub.b d1,bit_count */
bgez bit_count,input_value2 /* bge.s input_value2 */
nop /*(Delay Slot) */
sub $sp,$sp,8 /* Preserve link address for return*/
sw ra,($sp)
bgezl bit_count,input_value2 /* bge.s input_value2 */
srl bit_buffer,bit_buffer,d1 /* lsr.l d1,bit_buffer */
sw ra,-8($sp)
jal input_bits3 /* bsr.s input_bits3 */
nop /* * */
sub $sp,$sp,8 /* Preserve link address for return*/
lw ra,($sp) /* Restore from linked jumps */
add $sp,$sp,8
input_value2:
srl bit_buffer,bit_buffer,d1 /* lsr.l d1,bit_buffer */
lbu d0,16*4-3(xa0) /* move.b 16*4-3(a0),d0 */
nop
sub tmp,d0,2 /* cmp.b #2,d0 */
bltz tmp,input_value4 /* blt.s input_value4 */
nop
nop
sub d0,d0,1 /* subq.b #1,d0 */
move d1,d0 /* move.b d0,d1 */
move d2,d0 /* move.b d0,d2 */
lbu tmp,16*4-2(xa0) /* move.w 16*4-2(a0),d0 */
lbu d0,16*4-2+1(xa0) /* " " */
sll tmp,tmp,8
or d0,tmp,d0
and d0,d0,bit_buffer /* and.w bit_buffer,d0 */
lhu d0,16*4-2(xa0) /* move.w 16*4-2(a0),d0 */
sub bit_count,bit_count,d1 /* sub.b d1,bit_count */
bgez bit_count,input_value3 /* bge.s input_value3 */
nop
sub $sp,$sp,8 /* Preserve link address for return*/
sw ra,($sp)
jal input_bits3 /* bsr.s input_bits3 */
nop /* */
lw ra,($sp) /* Restore from linked jumps */
add $sp,$sp,8
input_value3:
srl bit_buffer,bit_buffer,d1 /* lsr.l d1,bit_buffer */
and d0,d0,bit_buffer /* and.w bit_buffer,d0 */
add tmp,zero,1
sll tmp,tmp,d2 /* bset d2,d0 */
or d0,tmp,d0 /* " " */
bltz bit_count,input_bits3 /* bge.s input_value3 */
or d0,tmp,d0 /* " " */
srl bit_buffer,bit_buffer,d1 /* lsr.l d1,bit_buffer */
input_value4:
jr ra
nop /*(Delay Slot) */
@@ -341,29 +288,16 @@ input_value4:
//------------------------------------------------------------------------------
*/
input_bits:
and d0,bit_buffer,d0 /* and.w bit_buffer,d0 */
sub bit_count,bit_count,d1 /* sub.b d1,bit_count */
bgez bit_count,input_bits2 /* bge.s input_bits2 */
nop
sub $sp,$sp,8 /* Preserve link address for return*/
sw ra,($sp)
jal input_bits3 /* bsr.s input_bits3 */
nop
lw ra,($sp) /* Restore from linked jumps */
add $sp,$sp,8
input_bits2:
srl bit_buffer,bit_buffer,d1 /*(Delay Slot)lsr.l d1,bit_buffer */
jr ra /* return */
nop
bgez bit_count,input_bits_ret /* bge.s input_bits2 */
and d0,bit_buffer,d0 /* and.w bit_buffer,d0 */
input_bits3:
add bit_count,bit_count,d1 /* Make sure we have correct # of bits left in buffer */
srl bit_buffer,bit_buffer,bit_count /* lsr.l bit_count,bit_buffer */
add input,input,4 /* jump passed our data for some odd reason */
lbu tmp,-2(input) /* Get our new word to add into bit buffer */
lbu tmp2,-1(input) /* byte at a time..... */
sub input,input,2
add input,input,2 /* jump passed our data for some odd reason */
lbu tmp,0(input) /* Get our new word to add into bit buffer */
lbu tmp2,1(input) /* byte at a time..... */
sll tmp,tmp,16 /* shift this into the right place */
sll tmp2,tmp2,24
or bit_buffer,bit_buffer,tmp2 /* OR in the 2 new bytes read to the bit buffer */
@@ -372,57 +306,42 @@ input_bits3:
sub d1,d1,bit_count /* sub.b bit_count,d1 */
add bit_count,zero,16 /* moveq.l #16,bit_count */
sub bit_count,bit_count,d1 /* sub.b d1,bit_count */
input_bits_ret:
jr ra /* rts */
nop
read_long:
add d1,zero,3 /* moveq.l #3,d1 */
read_long2:
sll d0,d0,8 /* lsl.l #8,d0 */
lbu tmp,0(xa0) /* move.b (a0)+,d0 */
add xa0,xa0,1 /* " " */
or d0,tmp,d0 /* " " */
bne d1,zero,read_long2 /* dbra d1,read_long2 */
sub d1,d1,1
jr ra
nop /*(Delay Slot) */
srl bit_buffer,bit_buffer,d1 /*(Delay Slot)lsr.l d1,bit_buffer */
/*------------------------------------------------------------------------------
// Make_HufTable - Builds huffman decode table
//------------------------------------------------------------------------------
*/
make_huftable:
add d0,zero,0x1f /* moveq.l #$1f,d0 */
add d1,zero,0x5 /* moveq.l #5,d1 */
sub $sp,$sp,8 /* Preserve link address for return*/
sw ra,($sp)
sw ra,0($sp)
add d0,zero,0x1f /* moveq.l #$1f,d0 */
jal input_bits /* bsr.s input_bits */
nop /*(Delay Slot) */
add d1,zero,0x5 /* moveq.l #5,d1 */
lw ra,($sp) /* Restore from linked jumps */
add $sp,$sp,8
sub d0,d0,1 /* subq.w #1,d0 */
bltz d0,make_huftable8 /* bmi.s make_huftable8 */
nop
add $sp,$sp,8
move d2,d0 /* move.w d0,d2 */
move d3,d0 /* move.w d0,d3 */
sub $sp,$sp,16 /* lea -16(sp),sp */
move xa1,$sp /* move.l sp,a1 */
sub $sp,$sp,24 /* lea -16(sp),sp */
add xa1,$sp,8 /* move.l sp,a1 */
sw ra,0($sp) /* Preserve link address for return*/
make_huftable3:
add d0,zero,15 /* moveq.l #$f,d0 */
add d1,zero,4 /* moveq.l #4,d1 */
sub $sp,$sp,8 /* Preserve link address for return*/
sw ra,($sp)
jal input_bits /* bsr.s input_bits */
nop /*(Delay Slot) */
lw ra,($sp) /* Restore from linked jumps */
add $sp,$sp,8
add d1,zero,4 /* moveq.l #4,d1 */
sb d0,(xa1) /* move.b d0,(a1)+ */
add xa1,xa1,1 /* " " */
bne d2,zero,make_huftable3 /* dbra d2,make_huftable3 */
sub d2,d2,1
sub d2,d2,1
lw ra,($sp) /* Restore from linked jumps */
move d0,const_80000000 /* moveq.l #1,d0 (with shift == 0x8000000) */
add d1,zero,1 /* moveq.l #1,d1 */
move d2,zero /* moveq.l #0,d2 */
sub $sp,$sp,16 /* movem.l d5-d7,-(sp) */
sub $sp,$sp,8 /* movem.l d5-d7,-(sp) */
sw d5,0($sp) /* " " */
sw d6,4($sp) /* " " */
sw d7,8($sp) /* " " */
@@ -431,15 +350,12 @@ make_huftable4:
add xa1,$sp,16 /* lea 12(sp),a1 */
make_huftable5:
lbu tmp,0(xa1) /* cmp.b (a1)+,d1 */
add xa1,xa1,1 /* " " */
bne d1,tmp,make_huftable7 /* bne.s make_huftable7 */
nop /*(Delay Slot) */
add xa1,xa1,1 /* " " */
add d5,zero,1 /* moveq.l #1,d5 */
sll d5,d5,d1 /* lsl.w d1,d5 */
sub d5,d5,1 /* subq.w #1,d5 */
sb d5,1(xa0) /* move.w d5,(a0)+ */
srl tmp,d5,8 /* " " */
sb tmp,(xa0) /* " " */
sh d5,0(xa0) /* move.w d5,(a0)+ */
add xa0,xa0,2 /* " " */
move d5,d2 /* move.l d2,d5 */
sll tmp,d5,16 /* swap d5 */
@@ -453,18 +369,15 @@ make_huftable6:
and d6,tmp,d6
and tmp,d5,const_8000 /* roxl.w #1,d5 */
sll d5,d5,1 /* " " */
srl d6,d6,1 /* " " */
beq tmp,zero,make_ht6_1 /* roxr.w #1,d6 */
nop
srl d6,d6,1 /* " " */
or d6,d6,const_8000 /* " " */
make_ht6_1: bne d7,zero,make_huftable6 /* dbra d7,make_huftable6 */
sub d7,d7,1
add d5,zero,16 /* moveq.l #16,d5 */
sub d5,d5,d1 /* sub.b d1,d5 */
srl d6,d6,d5 /* lsr.w d5,d6 */
sb d6,1(xa0) /* move.w d6,(a0)+ */
srl tmp,d6,8 /* " " */
sb tmp,0(xa0) /* " " */
sh d6,0(xa0) /* move.w d6,(a0)+ */
add xa0,xa0,2 /* " " */
sb d1,16*4-4(xa0) /* move.b d1,16*4-4(a0) */
move d5,d3 /* move.b d3,d5 */
@@ -474,9 +387,7 @@ make_ht6_1: bne d7,zero,make_huftable6 /* dbra d7,make_huftable6 */
subu d5,d5,1 /* subq.b #1,d5 */
sll d6,d6,d5 /* lsl.w d5,d6 */
subu d6,d6,1 /* subq.w #1,d6 */
sb d6,16*4-2+1(xa0)
srl tmp,d6,8
sb tmp,16*4-2(xa0) /* move.w d6,16*4-2(a0) */
sh d6,16*4-2(xa0) /* move.w d6,16*4-2(a0) */
addu d2,d2,d0 /* add.l d0,d2 */
make_huftable7:
bne d4,zero,make_huftable5 /* dbra d4,make_huftable5 */
@@ -488,19 +399,16 @@ make_huftable7:
lw d5,0($sp) /* movem.l (sp)+,d5-d7 */
lw d6,4($sp) /* " " */
lw d7,8($sp) /* " " */
add $sp,$sp,16 /* " " */
add $sp,$sp,16 /* lea 16(sp),sp */
make_huftable8:
jr ra
nop /*(Delay Slot) */
add $sp,$sp,32 /* lea 16(sp),sp */
getrawREP:
lbu tmp,0(input)
nop
sb tmp,0(output)
add output,output,1
add input,input,1
bne d0,zero,getrawREP /* dbra d0,getrawREP */
sub d0,d0,1
sub d0,d0,1
make_huftable8:
jr ra
nop /*(Delay Slot) */