aaaaaa123456789 267352c4d8 Update lzcomp
2020-06-28 05:20:05 -03:00

113 lines
5.7 KiB
C

#include "proto.h"
/*
Multi-pass compressor: performs an initial pass generating a single command for each byte position in the data and
refines the command stream further in subsequent passes.
Methods defined: 16
Flags values: the flags are a bitfield; each bit triggers some alternate behavior if set:
1: always emit a literal command (0) for the first byte of the file
2: when reducing a two-byte repetition (2) command in the overlap elimination pass, don't force it to contain a
whole number of repetitions (i.e., an even count)
4: don't emit copy commands (4, 5, 6) with a count of 3
8: don't emit single-byte repetition (1) commands
*/
struct command * try_compress_multi_pass (const unsigned char * data, const unsigned char * flipped, unsigned short * size, unsigned flags) {
struct command * result = calloc(*size, sizeof(struct command));
unsigned char * reversed = malloc(*size);
short * sources = malloc(*size * sizeof(short));
unsigned short pos, next, current = 0;
for (pos = 0; pos < *size; pos ++) {
reversed[pos] = data[*size - 1 - pos];
sources[pos] = -1;
}
for (pos = (flags & 1); pos < *size; pos += (result[pos].count >= MULTIPASS_SKIP_THRESHOLD) ? result[pos].count : 1) {
result[pos] = pick_command_for_pass(data, flipped, reversed, sources, *size, pos, flags);
if ((result[pos].command >= 4) || (result[pos].count < MULTIPASS_SKIP_THRESHOLD)) sources[current ++] = pos;
}
free(reversed);
free(sources);
for (pos = 0; pos < *size; pos ++) {
for (current = 1; current < result[pos].count; current ++) if (result[pos + current].count > result[pos].count) {
result[pos].count = current;
if ((result[pos].command == 2) && (current & 1) && !(flags & 2)) result[pos].count --;
}
if (result[pos].count <= command_size(result[pos])) result[pos] = (struct command) {.command = 0, .count = 0};
}
for (pos = 0; pos < *size; pos ++)
if (!result[pos].command) {
for (current = 1; (current < MAX_COMMAND_COUNT) && ((pos + current) < *size); current ++) if (result[pos + current].command) break;
result[pos] = (struct command) {.command = 0, .count = current, .value = pos};
} else if (result[pos].count > MAX_COMMAND_COUNT) {
result[pos + MAX_COMMAND_COUNT] = result[pos];
result[pos + MAX_COMMAND_COUNT].count -= MAX_COMMAND_COUNT;
if ((result[pos + MAX_COMMAND_COUNT].command >= 4) && (result[pos + MAX_COMMAND_COUNT].value >= 0))
result[pos + MAX_COMMAND_COUNT].value += (result[pos].command == 6) ? -MAX_COMMAND_COUNT : MAX_COMMAND_COUNT;
result[pos].count = MAX_COMMAND_COUNT;
}
for (next = pos = 0; pos < *size; pos ++)
if (pos == next)
next += result[pos].count;
else
result[pos].command = 7;
repack(&result, size);
return result;
}
struct command pick_command_for_pass (const unsigned char * data, const unsigned char * flipped, const unsigned char * reversed, const short * sources,
unsigned short length, unsigned short position, unsigned flags) {
struct command result = pick_repetition_for_pass(data, length, position, flags);
if (result.count >= MULTIPASS_SKIP_THRESHOLD) return result;
unsigned char p;
for (p = 0; p < 3; p ++) {
struct command temp = pick_copy_for_pass(data, p[(const unsigned char * []) {data, flipped, reversed}], sources, p + 4, length, position, flags);
if (temp.command == 7) continue;
if (temp.count > result.count) result = temp;
}
if ((result.command >= 4) && (result.value >= (position - LOOKBACK_LIMIT))) result.value -= position;
return result;
}
struct command pick_repetition_for_pass (const unsigned char * data, unsigned short length, unsigned short position, unsigned flags) {
unsigned short p;
if (data[position]) {
if ((position + 1) >= length) return (struct command) {.command = 1, .count = 1, .value = data[position]};
struct command result;
if (!(flags & 8) && (data[position] == data[position + 1]))
result = (struct command) {.command = 1, .value = data[position]};
else
result = (struct command) {.command = 2, .value = data[position] | (data[position + 1] << 8)};
for (p = 1; ((position + p) < length) && (p < LOOKAHEAD_LIMIT); p ++) if (data[position + p] != data[position + (p & 1)]) break;
result.count = p;
return result;
} else {
for (p = position + 1; (p < length) && (p < (position + LOOKAHEAD_LIMIT)); p ++) if (data[p]) break;
return (struct command) {.command = 3, .count = p - position};
}
}
struct command pick_copy_for_pass (const unsigned char * data, const unsigned char * reference, const short * sources, unsigned char command_type,
unsigned short length, unsigned short position, unsigned flags) {
struct command result = {.command = 7, .count = (flags & 4) ? 4 : 3};
if (length < 3) return result;
unsigned refpos, count;
const unsigned char * current;
unsigned char buffer[6] = {0};
memcpy(buffer, reference + length - 3, 3);
while (*sources >= 0) {
refpos = *(sources ++);
if (command_type == 6) refpos = length - 1 - refpos;
if (refpos >= (length - 3))
current = buffer + refpos - (length - 3);
else
current = reference + refpos;
if (memcmp(data + position, current, ((position + 4) > length) ? length - position : 4)) continue;
for (count = 4; (count < (length - position)) && (count < (length - refpos)); count ++) if (data[position + count] != current[count]) break;
if (count > (length - refpos)) count = length - refpos;
if (count > (length - position)) count = length - position;
if (result.count > count) continue;
result = (struct command) {.command = command_type, .count = count, .value = sources[-1]};
}
return result;
}