Match LZ compressed files (#724)

Replace lzcomp with new version and match all LZ compressed files
This commit is contained in:
aaaaaa123456789
2020-05-23 14:30:04 -03:00
committed by GitHub
parent f1513ae8ad
commit 2b7237e299
776 changed files with 1147 additions and 520 deletions

View File

@@ -20,5 +20,9 @@ clean:
gfx md5: common.h
lzcomp: CFLAGS = -O3 -flto -std=c11 -Wall -Wextra -pedantic -Wno-strict-overflow -Wno-sign-compare
lzcomp: $(wildcard lz/*.c) $(wildcard lz/*.h)
$(CC) $(CFLAGS) -o $@ lz/*.c
%: %.c
$(CC) $(CFLAGS) -o $@ $<

35
tools/lz/global.c Normal file
View File

@@ -0,0 +1,35 @@
#include "proto.h"
const struct compressor compressors[] = {
// NOTE: the "flags" field for each compressor will be set to the chosen/current method number minus the base
// number for that particular compressor. That means that each compressor will use a zero-based flags value.
{.methods = 72, .name = "singlepass", .function = &try_compress_single_pass}, // 0-71
{.methods = 2, .name = "null", .function = &store_uncompressed}, // 72-73
{.methods = 6, .name = "repetitions", .function = &try_compress_repetitions}, // 74-79
{.methods = 16, .name = "multipass", .function = &try_compress_multi_pass}, // 80-95
{0} // end of the list
};
const unsigned char bit_flipping_table[] = {
// For each byte, the table contains that same byte with its bits flipped around (for instance,
// 0x58 (01011000 binary) becomes 0x1a (00011010 binary)). This is faster than flipping bits
// manually at runtime.
0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0, 0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0,
0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8, 0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8,
0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4, 0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4,
0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec, 0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc,
0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2, 0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2,
0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea, 0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa,
0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6, 0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6,
0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee, 0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe,
0x01, 0x81, 0x41, 0xc1, 0x21, 0xa1, 0x61, 0xe1, 0x11, 0x91, 0x51, 0xd1, 0x31, 0xb1, 0x71, 0xf1,
0x09, 0x89, 0x49, 0xc9, 0x29, 0xa9, 0x69, 0xe9, 0x19, 0x99, 0x59, 0xd9, 0x39, 0xb9, 0x79, 0xf9,
0x05, 0x85, 0x45, 0xc5, 0x25, 0xa5, 0x65, 0xe5, 0x15, 0x95, 0x55, 0xd5, 0x35, 0xb5, 0x75, 0xf5,
0x0d, 0x8d, 0x4d, 0xcd, 0x2d, 0xad, 0x6d, 0xed, 0x1d, 0x9d, 0x5d, 0xdd, 0x3d, 0xbd, 0x7d, 0xfd,
0x03, 0x83, 0x43, 0xc3, 0x23, 0xa3, 0x63, 0xe3, 0x13, 0x93, 0x53, 0xd3, 0x33, 0xb3, 0x73, 0xf3,
0x0b, 0x8b, 0x4b, 0xcb, 0x2b, 0xab, 0x6b, 0xeb, 0x1b, 0x9b, 0x5b, 0xdb, 0x3b, 0xbb, 0x7b, 0xfb,
0x07, 0x87, 0x47, 0xc7, 0x27, 0xa7, 0x67, 0xe7, 0x17, 0x97, 0x57, 0xd7, 0x37, 0xb7, 0x77, 0xf7,
0x0f, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef, 0x1f, 0x9f, 0x5f, 0xdf, 0x3f, 0xbf, 0x7f, 0xff
};
char option_name_buffer[] = "-?"; // used to extract the name of a short option (separated from its argument)

51
tools/lz/main.c Normal file
View File

@@ -0,0 +1,51 @@
#include "proto.h"
int main (int argc, char ** argv) {
struct options options = get_options(argc, argv);
unsigned short size;
unsigned char * file_buffer = read_file_into_buffer(options.input, &size);
struct command * commands;
if (options.mode & 2) {
unsigned short original_size = size, remainder;
commands = get_commands_from_file(file_buffer, &size, &remainder);
if (!commands) error_exit(1, "invalid command stream");
if (options.mode == 2) {
unsigned char * uncompressed = get_uncompressed_data(commands, file_buffer, &size);
if (!uncompressed) error_exit(1, "output data is too large");
write_raw_data_to_file(options.output, uncompressed, size);
free(uncompressed);
} else
write_commands_and_padding_to_textfile(options.output, commands, size, file_buffer, original_size - remainder, remainder);
} else {
commands = compress(file_buffer, &size, options.method);
(options.mode ? write_commands_to_textfile : write_commands_to_file)(options.output, commands, size, file_buffer, options.alignment);
}
free(file_buffer);
free(commands);
return 0;
}
struct command * compress (const unsigned char * data, unsigned short * size, unsigned method) {
unsigned char * bitflipped = malloc(*size);
unsigned current;
for (current = 0; current < *size; current ++) bitflipped[current] = bit_flipping_table[data[current]];
const struct compressor * compressor = compressors;
struct command * result;
if (method < COMPRESSION_METHODS) {
while (method >= compressor -> methods) method -= (compressor ++) -> methods;
result = compressor -> function(data, bitflipped, size, method);
} else {
struct command * compressed_sequences[COMPRESSION_METHODS];
unsigned short lengths[COMPRESSION_METHODS];
unsigned flags = compressor -> methods;
for (current = 0; current < COMPRESSION_METHODS; current ++) {
lengths[current] = *size;
if (!flags) flags = (++ compressor) -> methods;
compressed_sequences[current] = compressor -> function(data, bitflipped, lengths + current, -- flags);
}
result = select_command_sequence(compressed_sequences, lengths, COMPRESSION_METHODS, size);
for (current = 0; current < COMPRESSION_METHODS; current ++) free(compressed_sequences[current]);
}
free(bitflipped);
return result;
}

61
tools/lz/merging.c Normal file
View File

@@ -0,0 +1,61 @@
#include "proto.h"
struct command * select_command_sequence (struct command ** sequences, const unsigned short * lengths, unsigned count, unsigned short * final_length) {
unsigned short min_sequence = 0, min_length = compressed_length(*sequences, *lengths);
unsigned short seq, len;
for (seq = 1; seq < count; seq ++) {
len = compressed_length(sequences[seq], lengths[seq]);
if (len < min_length) {
min_sequence = seq;
min_length = len;
}
}
*final_length = lengths[min_sequence];
struct command * current = malloc(*final_length * sizeof(struct command));
memcpy(current, sequences[min_sequence], *final_length * sizeof(struct command));
struct command * new;
for (seq = 1; seq < count; seq ++) {
new = merge_command_sequences(current, *final_length, sequences[(seq + min_sequence) % count], lengths[(seq + min_sequence) % count], final_length);
free(current);
current = new;
}
return current;
}
struct command * merge_command_sequences (const struct command * current, unsigned short current_length, const struct command * new, unsigned short new_length,
unsigned short * result_length) {
struct command * result = malloc(sizeof(struct command) * (current_length + new_length));
struct command * current_command = result;
const struct command * saved_current;
const struct command * saved_new;
unsigned short current_pos, new_pos;
while (current_length) {
if (current -> count == new -> count) {
*(current_command ++) = pick_best_command(2, *(current ++), *(new ++));
current_length --;
continue;
}
saved_current = current;
saved_new = new;
current_pos = (current ++) -> count;
new_pos = (new ++) -> count;
current_length --;
while (current_pos != new_pos)
if (current_pos < new_pos) {
current_pos += (current ++) -> count;
current_length --;
} else
new_pos += (new ++) -> count;
current_pos = compressed_length(saved_current, current - saved_current);
new_pos = compressed_length(saved_new, new - saved_new);
if (new_pos < current_pos) {
memcpy(current_command, saved_new, sizeof(struct command) * (new - saved_new));
current_command += new - saved_new;
} else {
memcpy(current_command, saved_current, sizeof(struct command) * (current - saved_current));
current_command += current - saved_current;
}
}
*result_length = current_command - result;
return result;
}

112
tools/lz/mpcomp.c Normal file
View File

@@ -0,0 +1,112 @@
#include "proto.h"
/*
Multi-pass compressor: performs an initial pass generating a single command for each byte position in the data and
refines the command stream further in subsequent passes.
Methods defined: 16
Flags values: the flags are a bitfield; each bit triggers some alternate behavior if set:
1: always emit a literal command (0) for the first byte of the file
2: when reducing a two-byte repetition (2) command in the overlap elimination pass, don't force it to contain a
whole number of repetitions (i.e., an even count)
4: don't emit copy commands (4, 5, 6) with a count of 3
8: don't emit single-byte repetition (1) commands
*/
struct command * try_compress_multi_pass (const unsigned char * data, const unsigned char * flipped, unsigned short * size, unsigned flags) {
struct command * result = calloc(*size, sizeof(struct command));
unsigned char * reversed = malloc(*size);
short * sources = malloc(*size * sizeof(short));
unsigned short pos, next, current = 0;
for (pos = 0; pos < *size; pos ++) {
reversed[pos] = data[*size - 1 - pos];
sources[pos] = -1;
}
for (pos = (flags & 1); pos < *size; pos += (result[pos].count >= MULTIPASS_SKIP_THRESHOLD) ? result[pos].count : 1) {
result[pos] = pick_command_for_pass(data, flipped, reversed, sources, *size, pos, flags);
if ((result[pos].command >= 4) || (result[pos].count < MULTIPASS_SKIP_THRESHOLD)) sources[current ++] = pos;
}
free(reversed);
free(sources);
for (pos = 0; pos < *size; pos ++) {
for (current = 1; current < result[pos].count; current ++) if (result[pos + current].count > result[pos].count) {
result[pos].count = current;
if ((result[pos].command == 2) && (current & 1) && !(flags & 2)) result[pos].count --;
}
if (result[pos].count <= command_size(result[pos])) result[pos] = (struct command) {.command = 0, .count = 0};
}
for (pos = 0; pos < *size; pos ++)
if (!result[pos].command) {
for (current = 1; (current < MAX_COMMAND_COUNT) && ((pos + current) < *size); current ++) if (result[pos + current].command) break;
result[pos] = (struct command) {.command = 0, .count = current, .value = pos};
} else if (result[pos].count > MAX_COMMAND_COUNT) {
result[pos + MAX_COMMAND_COUNT] = result[pos];
result[pos + MAX_COMMAND_COUNT].count -= MAX_COMMAND_COUNT;
if ((result[pos + MAX_COMMAND_COUNT].command >= 4) && (result[pos + MAX_COMMAND_COUNT].value >= 0))
result[pos + MAX_COMMAND_COUNT].value += (result[pos].command == 6) ? -MAX_COMMAND_COUNT : MAX_COMMAND_COUNT;
result[pos].count = MAX_COMMAND_COUNT;
}
for (next = pos = 0; pos < *size; pos ++)
if (pos == next)
next += result[pos].count;
else
result[pos].command = 7;
repack(&result, size);
return result;
}
struct command pick_command_for_pass (const unsigned char * data, const unsigned char * flipped, const unsigned char * reversed, const short * sources,
unsigned short length, unsigned short position, unsigned flags) {
struct command result = pick_repetition_for_pass(data, length, position, flags);
if (result.count >= MULTIPASS_SKIP_THRESHOLD) return result;
unsigned char p;
for (p = 0; p < 3; p ++) {
struct command temp = pick_copy_for_pass(data, p[(const unsigned char * []) {data, flipped, reversed}], sources, p + 4, length, position, flags);
if (temp.command == 7) continue;
if (temp.count > result.count) result = temp;
}
if ((result.command >= 4) && (result.value >= (position - LOOKBACK_LIMIT))) result.value -= position;
return result;
}
struct command pick_repetition_for_pass (const unsigned char * data, unsigned short length, unsigned short position, unsigned flags) {
unsigned short p;
if (data[position]) {
if ((position + 1) >= length) return (struct command) {.command = 1, .count = 1, .value = data[position]};
struct command result;
if (!(flags & 8) && (data[position] == data[position + 1]))
result = (struct command) {.command = 1, .value = data[position]};
else
result = (struct command) {.command = 2, .value = data[position] | (data[position + 1] << 8)};
for (p = 1; ((position + p) < length) && (p < LOOKAHEAD_LIMIT); p ++) if (data[position + p] != data[position + (p & 1)]) break;
result.count = p;
return result;
} else {
for (p = position + 1; (p < length) && (p < (position + LOOKAHEAD_LIMIT)); p ++) if (data[p]) break;
return (struct command) {.command = 3, .count = p - position};
}
}
struct command pick_copy_for_pass (const unsigned char * data, const unsigned char * reference, const short * sources, unsigned char command_type,
unsigned short length, unsigned short position, unsigned flags) {
struct command result = {.command = 7, .count = (flags & 4) ? 4 : 3};
if (length < 3) return result;
unsigned refpos, count;
const unsigned char * current;
unsigned char buffer[6] = {0};
memcpy(buffer, reference + length - 3, 3);
while (*sources >= 0) {
refpos = *(sources ++);
if (command_type == 6) refpos = length - 1 - refpos;
if (refpos >= (length - 3))
current = buffer + refpos - (length - 3);
else
current = reference + refpos;
if (memcmp(data + position, current, 4)) continue;
for (count = 4; (count < (length - position)) && (count < (length - refpos)); count ++) if (data[position + count] != current[count]) break;
if (count > (length - refpos)) count = length - refpos;
if (count > (length - position)) count = length - position;
if (result.count > count) continue;
result = (struct command) {.command = command_type, .count = count, .value = sources[-1]};
}
return result;
}

20
tools/lz/nullcomp.c Normal file
View File

@@ -0,0 +1,20 @@
#include "proto.h"
/*
Null compressor: stores data uncompressed, using literal (0) commands only.
Methods defined: 2
Flags values: 0 = split a trailing 33-to-64-byte block at the end into two short blocks; 1 = don't
*/
struct command * store_uncompressed (__attribute__((unused)) const unsigned char * data, __attribute__((unused)) const unsigned char * bitflipped, unsigned short * size, unsigned flags) {
unsigned short position, block, remainder = *size;
struct command * result = NULL;
*size = 0;
for (position = 0; remainder; position += block, remainder -= block) {
block = (remainder > MAX_COMMAND_COUNT) ? MAX_COMMAND_COUNT : remainder;
if (!(flags & 1) && (block <= (2 * SHORT_COMMAND_COUNT)) && (block > SHORT_COMMAND_COUNT)) block = SHORT_COMMAND_COUNT;
result = realloc(result, sizeof(struct command) * (1 + *size));
result[(*size) ++] = (struct command) {.command = 0, .count = block, .value = position};
}
return result;
}

141
tools/lz/options.c Normal file
View File

@@ -0,0 +1,141 @@
#include "proto.h"
struct options get_options (int argc, char ** argv) {
struct options result = {.input = NULL, .output = NULL, .mode = 0, .alignment = 0, .method = COMPRESSION_METHODS};
const char * program_name = *argv;
int compressor = -1;
if (argc == 1) usage(program_name);
for (argv ++; *argv; argv ++) {
if (**argv != '-') break;
if (!1[*argv]) break;
if (!strcmp(*argv, "--")) {
argv ++;
break;
} else if (!(strcmp(*argv, "--text") && strcmp(*argv, "-t")))
result.mode = 1;
else if (!(strcmp(*argv, "--binary") && strcmp(*argv, "-b")))
result.mode = 0;
else if (!(strcmp(*argv, "--uncompress") && strcmp(*argv, "-u")))
result.mode = 2;
else if (!(strcmp(*argv, "--dump") && strcmp(*argv, "-d")))
result.mode = 3;
else if (!(strcmp(*argv, "--align") && strncmp(*argv, "-a", 2)))
result.alignment = parse_numeric_option_argument(&argv, 12);
else if (!(strcmp(*argv, "--method") && strncmp(*argv, "-m", 2)))
result.method = parse_numeric_option_argument(&argv, COMPRESSION_METHODS - 1);
else if (!(strcmp(*argv, "--compressor") && strncmp(*argv, "-c", 2)))
compressor = parse_compressor_option_argument(&argv);
else if (!(strcmp(*argv, "--optimize") && strcmp(*argv, "-o"))) {
result.method = COMPRESSION_METHODS;
compressor = -1;
} else if (!(strcmp(*argv, "--help") && strcmp(*argv, "-?")))
usage(program_name);
else if (!(strcmp(*argv, "--list") && strcmp(*argv, "-l")))
list_compressors();
else
error_exit(3, "unknown option: %s", *argv);
}
if (compressor >= 0) {
if (result.method >= COMPRESSION_METHODS) result.method = 0;
if (result.method >= compressors[compressor].methods)
error_exit(3, "method for the %s compressor must be between 0 and %u", compressors[compressor].name, compressors[compressor].methods - 1);
while (compressor > 0) result.method += compressors[-- compressor].methods;
}
if (*argv) {
if (strcmp(*argv, "-")) result.input = *argv;
if (*(++ argv)) {
if (argv[1]) error_exit(3, "too many command-line arguments");
if (strcmp(*argv, "-")) result.output = *argv;
}
}
return result;
}
unsigned parse_numeric_option_argument (char *** alp, unsigned limit) {
const char * option;
const char * value = get_argument_for_option(alp, &option);
char * error;
unsigned long result = strtoul(value, &error, 10);
if (*error) error_exit(3, "invalid argument to option %s", option);
if (result > limit) error_exit(3, "argument to option %s must be between 0 and %u", option, limit);
return result;
}
int parse_compressor_option_argument (char *** alp) {
const char * name = get_argument_for_option(alp, NULL);
if (!strcmp(name, "*")) return -1;
int result = -1;
unsigned length = strlen(name);
const struct compressor * compressor;
for (compressor = compressors; compressor -> name; compressor ++) {
if (strncmp(name, compressor -> name, length)) continue;
if (result >= 0) error_exit(3, "ambiguous compressor prefix: %s", name);
result = compressor - compressors;
}
if (result < 0) error_exit(3, "unknown compressor: %s", name);
return result;
}
const char * get_argument_for_option (char *** alp, const char ** option_name) {
// alp: argument list pointer (i.e., address of the current value of argv after indexing)
// will point at the last consumed argument on exit (since the caller will probably increment it once more)
const char * option;
const char * result;
if (1[**alp] == '-') {
option = *((*alp) ++);
result = **alp;
} else {
option_name_buffer[1] = 1[**alp];
option = option_name_buffer;
result = **alp + 2;
}
if (!(result && *result)) error_exit(3, "option %s requires an argument", option);
if (option_name) *option_name = option;
return result;
}
noreturn usage (const char * program_name) {
fprintf(stderr, "Usage: %s [<options>] [<source file> [<output>]]\n\n", program_name);
fputs("Execution mode:\n", stderr);
fputs(" -b, --binary Output the command stream as binary data (default).\n", stderr);
fputs(" -t, --text Output the command stream as text.\n", stderr);
fputs(" -u, --uncompress Process a compressed file and output the original data.\n", stderr);
fputs(" -d, --dump Process a compressed file and dump the command stream as\n", stderr);
fputs(" text (as if compressed with the --text option).\n", stderr);
fputs(" -l, --list List compressors and their method numbers.\n", stderr);
fputs(" -?, --help Print this help text and exit.\n", stderr);
fputs("Compression options:\n", stderr);
fputs(" -o, --optimize Use the best combination of compression\n", stderr);
fputs(" methods available (default).\n", stderr);
fputs(" -m<number>, --method <number> Use only one specific compression method.\n", stderr);
fprintf(stderr, " Valid method numbers are between 0 and %u.\n", COMPRESSION_METHODS - 1);
fputs(" -c<name>, --compressor <name> Use the specified compressor: the method\n", stderr);
fputs(" number will be relative to that compressor.\n", stderr);
fputs(" Any prefix of the compressor name may be\n", stderr);
fputs(" specified. Use * to indicate any compressor.\n", stderr);
fputs(" -a<number>, --align <number> Pad the compressed output with zeros until\n", stderr);
fputs(" the size has the specified number of low bits\n", stderr);
fputs(" cleared (default: 0).\n", stderr);
fputs("The source and output filenames can be given as - (or omitted) to use standard\n", stderr);
fputs("input and output. Use -- to indicate that subsequent arguments are file names.\n", stderr);
exit(3);
}
noreturn list_compressors (void) {
const struct compressor * compressor;
unsigned current, length = 10;
for (compressor = compressors; compressor -> name; compressor ++) if ((current = strlen(compressor -> name)) > length) length = current;
fprintf(stderr, "%-*s Offset Methods\n", length, "Compressor");
for (current = 0; current < length; current ++) putc('-', stderr);
fputs(" ------ -------\n", stderr);
current = 0;
for (compressor = compressors; compressor -> name; compressor ++) {
fprintf(stderr, "%-*s %6u %7u\n", length, compressor -> name, current, compressor -> methods);
current += compressor -> methods;
}
putc('\n', stderr);
fputs("Note: the offset indicates the compressor's lowest method number when the\n", stderr);
fputs("--compressor option is not given. When that option is used, every compressor's\n", stderr);
fputs("methods are numbered from zero.\n", stderr);
exit(3);
}

138
tools/lz/output.c Normal file
View File

@@ -0,0 +1,138 @@
#include "proto.h"
void write_commands_to_textfile (const char * file, const struct command * commands, unsigned count, const unsigned char * input_stream,
unsigned char alignment) {
FILE * fp = file ? fopen(file, "w") : stdout;
if (!fp) error_exit(1, "could not open file %s for writing", file);
unsigned length = 0;
while (count --) {
write_command_to_textfile(fp, *commands, input_stream);
length += command_size(*(commands ++));
}
if (fputs("\tlzend\n", fp) < 0) error_exit(1, "could not write terminator to compressed output");
length = ~length & ((1 << alignment) - 1);
if (length --) {
int rv = fputs("\tdb 0", fp);
while ((rv >= 0) && length --) rv = fputs(", 0", fp);
if (rv >= 0) rv = -(putc('\n', fp) == EOF);
if (rv < 0) error_exit(1, "could not write padding to compressed output");
}
if (file) fclose(fp);
}
void write_commands_and_padding_to_textfile (const char * file, const struct command * commands, unsigned count, const unsigned char * input_stream,
unsigned padding_offset, unsigned padding_size) {
FILE * fp = file ? fopen(file, "w") : stdout;
if (!fp) error_exit(1, "could not open file %s for writing", file);
while (count --) write_command_to_textfile(fp, *(commands ++), input_stream);
if (fputs("\tlzend\n", fp) < 0) error_exit(1, "could not write terminator to compressed output");
if (padding_size) {
input_stream += padding_offset;
int rv = fprintf(fp, "\tdb $%02hhx", *(input_stream ++));
while ((rv >= 0) && (-- padding_size)) rv = fprintf(fp, ", $%02hhx", *(input_stream ++));
if (rv >= 0) rv = -(putc('\n', fp) == EOF);
if (rv < 0) error_exit(1, "could not write padding to compressed output");
}
if (file) fclose(fp);
}
void write_command_to_textfile (FILE * fp, struct command command, const unsigned char * input_stream) {
if ((!command.count) || (command.count > MAX_COMMAND_COUNT)) error_exit(2, "invalid command in output stream");
int rv, pos;
const char * kind;
switch (command.command) {
case 0:
if ((rv = fprintf(fp, "\tlzdata")) < 0) break;
for (pos = 0; pos < command.count; pos ++) if ((rv = fprintf(fp, "%s$%02hhx", pos ? ", " : " ", input_stream[command.value + pos])) < 0) break;
rv = putc('\n', fp);
break;
case 1:
if ((command.value < 0) || (command.value > 255)) error_exit(2, "invalid command in output stream");
rv = fprintf(fp, "\tlzrepeat %u, $%02hhx\n", command.count, (unsigned char) command.value);
break;
case 2:
if (command.value < 0) error_exit(2, "invalid command in output stream");
rv = fprintf(fp, "\tlzrepeat %u, $%02hhx, $%02hhx\n", command.count, (unsigned char) command.value, (unsigned char) (command.value >> 8));
break;
case 3:
rv = fprintf(fp, "\tlzzero %u\n", command.count);
break;
case 4:
kind = "normal";
goto copy;
case 5:
kind = "flipped";
goto copy;
case 6:
kind = "reversed";
copy:
if ((command.value < -LOOKBACK_LIMIT) || (command.value >= MAX_FILE_SIZE)) error_exit(2, "invalid command in output stream");
if (command.value < 0)
rv = fprintf(fp, "\tlzcopy %s, %u, %d\n", kind, command.count, command.value);
else
rv = fprintf(fp, "\tlzcopy %s, %u, $%04hx\n", kind, command.count, (unsigned short) command.value);
break;
default:
error_exit(2, "invalid command in output stream");
}
if (rv < 0) error_exit(1, "could not write command to compressed output");
}
void write_commands_to_file (const char * file, const struct command * commands, unsigned count, const unsigned char * input_stream, unsigned char alignment) {
FILE * fp = file ? fopen(file, "wb") : stdout;
if (!fp) error_exit(1, "could not open file %s for writing", file);
unsigned length = 0;
while (count --) {
write_command_to_file(fp, *commands, input_stream);
length += command_size(*(commands ++));
}
if (putc(-1, fp) == EOF) error_exit(1, "could not write terminator to compressed output");
length = ~length & ((1 << alignment) - 1);
while (length --) if (putc(0, fp) == EOF) error_exit(1, "could not write padding to compressed output");
if (file) fclose(fp);
}
void write_command_to_file (FILE * fp, struct command command, const unsigned char * input_stream) {
if ((!command.count) || (command.count > MAX_COMMAND_COUNT)) error_exit(2, "invalid command in output stream");
unsigned char buf[4];
unsigned char * pos = buf;
int n;
command.count --;
if (command.count < SHORT_COMMAND_COUNT)
*(pos ++) = (command.command << 5) + command.count;
else {
*(pos ++) = 224 + (command.command << 2) + (command.count >> 8);
*(pos ++) = command.count;
}
switch (command.command) {
case 1: case 2:
if ((command.value < 0) || (command.value >= (1 << (command.command << 3)))) error_exit(2, "invalid command in output stream");
for (n = 0; n < command.command; n ++) *(pos ++) = command.value >> (n << 3);
case 0: case 3:
break;
default:
if ((command.value < -LOOKBACK_LIMIT) || (command.value >= MAX_FILE_SIZE)) error_exit(2, "invalid command in output stream");
if (command.value < 0)
*(pos ++) = command.value ^ 127;
else {
*(pos ++) = command.value >> 8;
*(pos ++) = command.value;
}
}
if (fwrite(buf, 1, pos - buf, fp) != (pos - buf)) error_exit(1, "could not write command to compressed output");
if (command.command) return;
command.count ++;
if (fwrite(input_stream + command.value, 1, command.count, fp) != command.count) error_exit(1, "could not write data to compressed output");
}
void write_raw_data_to_file (const char * file, const void * data, unsigned length) {
FILE * fp = file ? fopen(file, "w") : stdout;
if (!fp) error_exit(1, "could not open file %s for writing", file);
while (length) {
unsigned rv = fwrite(data, 1, length, fp);
if (!rv) error_exit(1, "could not write raw data to output");
data = (const char *) data + rv;
length -= rv;
}
if (file) fclose(fp);
}

58
tools/lz/packing.c Normal file
View File

@@ -0,0 +1,58 @@
#include "proto.h"
void optimize (struct command * commands, unsigned short count) {
while (count && (commands -> command == 7)) commands ++, count --;
if (count < 2) return;
struct command * end = commands + count;
struct command * next;
for (next = commands + 1; next < end; next ++) {
if (next -> command == 7) continue;
if (
!(commands -> command) &&
(command_size(*next) == next -> count) &&
((commands -> count + next -> count) <= MAX_COMMAND_COUNT) &&
((commands -> count > SHORT_COMMAND_COUNT) || ((commands -> count + next -> count) <= SHORT_COMMAND_COUNT))
) {
commands -> count += next -> count;
next -> command = 7;
continue;
}
if (next -> command == commands -> command) {
switch (commands -> command) {
case 0:
if ((commands -> value + commands -> count) != next -> value) break;
commands -> count += next -> count;
next -> command = 7;
if (commands -> count <= MAX_COMMAND_COUNT) continue;
next -> command = 0;
next -> value = commands -> value + MAX_COMMAND_COUNT;
next -> count = commands -> count - MAX_COMMAND_COUNT;
commands -> count = MAX_COMMAND_COUNT;
break;
case 1:
if (commands -> value != next -> value) break;
// fallthrough
case 3:
if ((commands -> count + next -> count) <= MAX_COMMAND_COUNT) {
commands -> count += next -> count;
next -> command = 7;
continue;
}
next -> count = (commands -> count + next -> count) - MAX_COMMAND_COUNT;
commands -> count = MAX_COMMAND_COUNT;
break;
}
}
commands = next;
}
}
void repack (struct command ** commands, unsigned short * length) {
struct command * new_commands = malloc(sizeof(struct command) * *length);
struct command * current = new_commands;
unsigned short p;
for (p = 0; p < *length; p ++) if (p[*commands].command != 7) *(current ++) = p[*commands];
free(*commands);
*commands = new_commands;
*length = current - new_commands;
}

105
tools/lz/proto.h Normal file
View File

@@ -0,0 +1,105 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>
#define COMPRESSION_METHODS 96 /* sum of all values for the methods field in compressors */
#define MAX_FILE_SIZE 32768
#define SHORT_COMMAND_COUNT 32
#define MAX_COMMAND_COUNT 1024
#define LOOKBACK_LIMIT 128 /* highest negative valid count for a copy command */
#define LOOKAHEAD_LIMIT 3072 /* maximum lookahead distance for the first pass of multi-pass compression */
#define MULTIPASS_SKIP_THRESHOLD 64
#if __STDC_VERSION__ >= 201112L
// <noreturn.h> forces "noreturn void", which is silly and redundant; this is simpler
#define noreturn _Noreturn void
#else
#define noreturn void /* fallback */
#endif
struct command {
unsigned command: 3; // commands 0-6 as per compression spec; command 7 is used as a dummy placeholder
unsigned count: 12; // always equals the uncompressed data length
signed value: 17; // offset for commands 0 (into source) and 4-6 (into decompressed output); repeated bytes for commands 1-2
};
struct compressor {
unsigned methods;
const char * name;
struct command * (* function) (const unsigned char *, const unsigned char *, unsigned short *, unsigned);
};
struct options {
const char * input;
const char * output;
unsigned method; // method to use, or >= COMPRESSION_METHODS to try them all
unsigned char mode; // 0: compress, 1: compress to text, 2: uncompress, 3: dump commands as text
unsigned char alignment; // 1 << value
};
// global.c
extern const struct compressor compressors[];
extern const unsigned char bit_flipping_table[];
extern char option_name_buffer[];
// main.c
int main(int, char **);
struct command * compress(const unsigned char *, unsigned short *, unsigned);
// merging.c
struct command * select_command_sequence(struct command **, const unsigned short *, unsigned, unsigned short *);
struct command * merge_command_sequences(const struct command *, unsigned short, const struct command *, unsigned short, unsigned short *);
// mpcomp.c
struct command * try_compress_multi_pass(const unsigned char *, const unsigned char *, unsigned short *, unsigned);
struct command pick_command_for_pass(const unsigned char *, const unsigned char *, const unsigned char *, const short *, unsigned short,
unsigned short, unsigned);
struct command pick_repetition_for_pass(const unsigned char *, unsigned short, unsigned short, unsigned);
struct command pick_copy_for_pass(const unsigned char *, const unsigned char *, const short *, unsigned char, unsigned short, unsigned short, unsigned);
// nullcomp.c
struct command * store_uncompressed(const unsigned char *, const unsigned char *, unsigned short *, unsigned);
// options.c
struct options get_options(int, char **);
unsigned parse_numeric_option_argument(char ***, unsigned);
int parse_compressor_option_argument(char ***);
const char * get_argument_for_option(char ***, const char **);
noreturn usage(const char *);
noreturn list_compressors(void);
// output.c
void write_commands_to_textfile(const char *, const struct command *, unsigned, const unsigned char *, unsigned char);
void write_commands_and_padding_to_textfile(const char *, const struct command *, unsigned, const unsigned char *, unsigned, unsigned);
void write_command_to_textfile(FILE *, struct command, const unsigned char *);
void write_commands_to_file(const char *, const struct command *, unsigned, const unsigned char *, unsigned char);
void write_command_to_file(FILE *, struct command, const unsigned char *);
void write_raw_data_to_file(const char *, const void *, unsigned);
// packing.c
void optimize(struct command *, unsigned short);
void repack(struct command **, unsigned short *);
// repcomp.c
struct command * try_compress_repetitions(const unsigned char *, const unsigned char *, unsigned short *, unsigned);
struct command find_repetition_at_position(const unsigned char *, unsigned short, unsigned short);
// spcomp.c
struct command * try_compress_single_pass(const unsigned char *, const unsigned char *, unsigned short *, unsigned);
struct command find_best_copy(const unsigned char *, unsigned short, unsigned short, const unsigned char *, unsigned);
unsigned short scan_forwards(const unsigned char *, unsigned short, const unsigned char *, unsigned short, short *);
unsigned short scan_backwards(const unsigned char *, unsigned short, unsigned short, short *);
struct command find_best_repetition(const unsigned char *, unsigned short, unsigned short);
// uncomp.c
struct command * get_commands_from_file(const unsigned char *, unsigned short * restrict, unsigned short * restrict);
unsigned char * get_uncompressed_data(const struct command *, const unsigned char *, unsigned short *);
// util.c
noreturn error_exit(int, const char *, ...);
unsigned char * read_file_into_buffer(const char *, unsigned short *);
struct command pick_best_command(unsigned, struct command, ...);
int is_better(struct command, struct command);
short command_size(struct command);
unsigned short compressed_length(const struct command *, unsigned short);

63
tools/lz/repcomp.c Normal file
View File

@@ -0,0 +1,63 @@
#include "proto.h"
/*
Repetitions compressor: compresses the data only using a subset of the available repetition commands.
Methods defined: 6
Flags values: the value plus one is taken as a bitfield indicating which kinds of repetition commands are used
(lowest bit to highest: repeat single byte (1), repeat two bytes (2), repeat zeros (3)).
*/
struct command * try_compress_repetitions (const unsigned char * data, __attribute__((unused)) const unsigned char * bitflipped, unsigned short * size, unsigned flags) {
unsigned short pos = 0, skipped = 0;
struct command * result = malloc(*size * sizeof(struct command));
struct command * current = result;
struct command candidate;
flags = (flags + 1) << 1;
while (pos < *size) {
candidate = find_repetition_at_position(data, pos, *size);
if ((candidate.command == 3) && !(flags & 8)) {
candidate.command = 1;
candidate.value = 0;
}
if ((candidate.command == 1) && !(flags & 2)) {
candidate.command = 2;
candidate.value |= candidate.value << 8;
}
if ((flags & (1 << candidate.command)) && (command_size(candidate) <= candidate.count)) {
if (skipped) *(current ++) = (struct command) {.command = 0, .count = skipped, .value = pos - skipped};
skipped = 0;
*(current ++) = candidate;
pos += candidate.count;
} else {
pos ++;
if ((++ skipped) == MAX_COMMAND_COUNT) {
*(current ++) = (struct command) {.command = 0, .count = MAX_COMMAND_COUNT, .value = pos - MAX_COMMAND_COUNT};
skipped = 0;
}
}
}
if (skipped) *(current ++) = (struct command) {.command = 0, .count = skipped, .value = pos - skipped};
*size = current - result;
result = realloc(result, *size * sizeof(struct command));
return result;
}
struct command find_repetition_at_position (const unsigned char * data, unsigned short position, unsigned short length) {
if ((position + 1) >= length) return data[position] ? ((struct command) {.command = 7}) : ((struct command) {.command = 3, .count = 1});
unsigned char value[2] = {data[position], data[position + 1]};
unsigned repcount, limit = length - position;
if (limit > MAX_COMMAND_COUNT) limit = MAX_COMMAND_COUNT;
for (repcount = 2; (repcount < limit) && (data[position + repcount] == value[repcount & 1]); repcount ++);
struct command result;
result.count = repcount;
if (*value != value[1]) {
if (!*value && (repcount < 3)) return (struct command) {.command = 3, .count = 1};
result.command = 2;
result.value = ((unsigned) (*value)) | (((unsigned) (value[1])) << 8);
} else if (*value) {
result.command = 1;
result.value = *value;
} else
result.command = 3;
return result;
}

141
tools/lz/spcomp.c Normal file
View File

@@ -0,0 +1,141 @@
#include "proto.h"
/*
Single-pass compressor: attempts to compress the data in a single pass, selecting the best command at each
position within some constraints.
Methods defined: 72
Flags values:
Bit fields (will trigger alternate behavior if set):
1: prefer repetition commands over copy commands of equal count
2: don't emit a copy or repetition with a count equal to its size when the previous command is a literal (0) that
is not at maximum size (32 or 1024)
4: don't emit long copy commands
Selector values (pick one from each group and add them to the bit fields):
- Scan delay: number of bytes that are forced into literal (0) commands after each non-literal command:
0: 0 bytes
8: 1 byte
16: 2 bytes
- Copy command preference (when the command counts are tied), in order from most to least:
0: normal (4), reversed (6), flipped (5)
24: reversed (6), flipped (5), normal (4)
48: flipped (5), reversed (6), normal (4)
*/
struct command * try_compress_single_pass (const unsigned char * data, const unsigned char * bitflipped, unsigned short * length, unsigned flags) {
struct command * commands = malloc(sizeof(struct command) * *length);
memset(commands, -1, sizeof(struct command) * *length);
struct command * current_command = commands;
unsigned short position = 0, previous_data = 0;
unsigned char scan_delay = 0, scan_delay_flag = (flags >> 3) % 3;
struct command copy, repetition;
while (position < *length) {
copy = find_best_copy(data, position, *length, bitflipped, flags);
repetition = find_best_repetition(data, position, *length);
if (flags & 1)
*current_command = pick_best_command(2, repetition, copy);
else
*current_command = pick_best_command(2, copy, repetition);
*current_command = pick_best_command(2, (struct command) {.command = 0, .count = 1, .value = position}, *current_command);
if ((flags & 2) && (command_size(*current_command) == current_command -> count))
if (previous_data && (previous_data != SHORT_COMMAND_COUNT) && (previous_data != MAX_COMMAND_COUNT))
*current_command = (struct command) {.command = 0, .count = 1, .value = position};
if (scan_delay_flag) {
if (scan_delay >= scan_delay_flag)
scan_delay = 0;
else if (current_command -> command) {
scan_delay ++;
*current_command = (struct command) {.command = 0, .count = 1, .value = position};
}
}
if (current_command -> command)
previous_data = 0;
else
previous_data += current_command -> count;
position += (current_command ++) -> count;
}
optimize(commands, current_command - commands);
repack(&commands, length);
return commands;
}
struct command find_best_copy (const unsigned char * data, unsigned short position, unsigned short length, const unsigned char * bitflipped, unsigned flags) {
struct command simple = {.command = 7};
struct command flipped = simple, backwards = simple;
short count, offset;
if ((count = scan_forwards(data + position, length - position, data, position, &offset)))
simple = (struct command) {.command = 4, .count = count, .value = offset};
if ((count = scan_forwards(data + position, length - position, bitflipped, position, &offset)))
flipped = (struct command) {.command = 5, .count = count, .value = offset};
if ((count = scan_backwards(data, length - position, position, &offset)))
backwards = (struct command) {.command = 6, .count = count, .value = offset};
struct command command;
switch (flags / 24) {
case 0: command = pick_best_command(3, simple, backwards, flipped); break;
case 1: command = pick_best_command(3, backwards, flipped, simple); break;
case 2: command = pick_best_command(3, flipped, backwards, simple);
}
if ((flags & 4) && (command.count > SHORT_COMMAND_COUNT)) command.count = SHORT_COMMAND_COUNT;
return command;
}
unsigned short scan_forwards (const unsigned char * target, unsigned short limit, const unsigned char * source, unsigned short real_position, short * offset) {
unsigned short best_match, best_length = 0;
unsigned short current_length;
unsigned short position;
for (position = 0; position < real_position; position ++) {
if (source[position] != *target) continue;
for (current_length = 0; (current_length < limit) && (source[position + current_length] == target[current_length]); current_length ++);
if (current_length > MAX_COMMAND_COUNT) current_length = MAX_COMMAND_COUNT;
if (current_length < best_length) continue;
best_match = position;
best_length = current_length;
}
if (!best_length) return 0;
if ((best_match + LOOKBACK_LIMIT) >= real_position)
*offset = best_match - real_position;
else
*offset = best_match;
return best_length;
}
unsigned short scan_backwards (const unsigned char * data, unsigned short limit, unsigned short real_position, short * offset) {
if (real_position < limit) limit = real_position;
unsigned short best_match, best_length = 0;
unsigned short current_length;
unsigned short position;
for (position = 0; position < real_position; position ++) {
if (data[position] != data[real_position]) continue;
for (current_length = 0; (current_length <= position) && (current_length < limit) &&
(data[position - current_length] == data[real_position + current_length]); current_length ++);
if (current_length > MAX_COMMAND_COUNT) current_length = MAX_COMMAND_COUNT;
if (current_length < best_length) continue;
best_match = position;
best_length = current_length;
}
if (!best_length) return 0;
if ((best_match + LOOKBACK_LIMIT) >= real_position)
*offset = best_match - real_position;
else
*offset = best_match;
return best_length;
}
struct command find_best_repetition (const unsigned char * data, unsigned short position, unsigned short length) {
if ((position + 1) >= length) return data[position] ? ((struct command) {.command = 7}) : ((struct command) {.command = 3, .count = 1});
unsigned char value[2] = {data[position], data[position + 1]};
unsigned repcount, limit = length - position;
if (limit > MAX_COMMAND_COUNT) limit = MAX_COMMAND_COUNT;
for (repcount = 2; (repcount < limit) && (data[position + repcount] == value[repcount & 1]); repcount ++);
struct command result;
result.count = repcount;
if (*value != value[1]) {
if (!*value && (repcount < 3)) return (struct command) {.command = 3, .count = 1};
result.command = 2;
result.value = ((unsigned) (*value)) | (((unsigned) (value[1])) << 8);
} else if (*value) {
result.command = 1;
result.value = *value;
} else
result.command = 3;
return result;
}

92
tools/lz/uncomp.c Normal file
View File

@@ -0,0 +1,92 @@
#include "proto.h"
struct command * get_commands_from_file (const unsigned char * data, unsigned short * restrict size, unsigned short * restrict slack) {
struct command * result = malloc(*size * sizeof(struct command));
unsigned short remaining = *size;
struct command * current = result;
const unsigned char * rp = data;
while (1) {
if (!(remaining --)) goto error;
current -> command = *rp >> 5;
current -> count = *(rp ++) & 31;
if (current -> command == 7) {
current -> command = current -> count >> 2;
current -> count = (current -> count & 3) << 8;
if (current -> command == 7) {
// long commands inside long commands are not allowed, but if the count is 0x300 here, it means that the original byte was 0xff
if (current -> count == 0x300) break;
goto error;
}
if (!(remaining --)) goto error;
current -> count |= *(rp ++);
}
current -> count ++;
switch (current -> command) {
case 0:
if (remaining <= current -> count) goto error;
current -> value = rp - data;
rp += current -> count;
remaining -= current -> count;
case 3:
break;
case 1: case 2: {
unsigned char p;
if (remaining <= current -> command) goto error;
current -> value = 0;
for (p = 0; p < current -> command; p ++) current -> value |= *(rp ++) << (p << 3);
remaining -= current -> command;
} break;
default:
if (!(remaining --)) goto error;
if ((current -> value = *(rp ++)) & 128)
current -> value = 127 - current -> value;
else {
if (!(remaining --)) goto error;
current -> value = (current -> value << 8) | *(rp ++);
}
}
current ++;
}
if (slack) *slack = *size - (rp - data);
*size = current - result;
return realloc(result, *size * sizeof(struct command));
error:
free(result);
return NULL;
}
unsigned char * get_uncompressed_data (const struct command * commands, const unsigned char * compressed, unsigned short * size) {
const struct command * limit = commands + *size;
unsigned char * result = malloc(MAX_FILE_SIZE + MAX_COMMAND_COUNT);
unsigned char * current = result;
unsigned short p;
for (; commands < limit; commands ++) {
switch (commands -> command) {
case 0:
memcpy(current, compressed + commands -> value, commands -> count);
current += commands -> count;
break;
case 1: case 2:
for (p = 0; p < commands -> count; p ++) *(current ++) = commands -> value >> ((p % commands -> command) << 3);
break;
case 3:
memset(current, 0, commands -> count);
current += commands -> count;
break;
default: {
const unsigned char * ref = ((commands -> value < 0) ? current : result) + commands -> value;
for (p = 0; p < commands -> count; p ++) {
current[p] = ref[(commands -> command == 6) ? -(int) p : p];
if (commands -> command == 5) current[p] = bit_flipping_table[current[p]];
}
current += commands -> count;
}
}
if ((current - result) > MAX_FILE_SIZE) {
free(result);
return NULL;
}
}
*size = current - result;
return result;
}

54
tools/lz/util.c Normal file
View File

@@ -0,0 +1,54 @@
#include "proto.h"
noreturn error_exit (int error_code, const char * error, ...) {
va_list ap;
va_start(ap, error);
fputs("error: ", stderr);
vfprintf(stderr, error, ap);
va_end(ap);
fputc('\n', stderr);
exit(error_code);
}
unsigned char * read_file_into_buffer (const char * file, unsigned short * size) {
FILE * fp = file ? fopen(file, "rb") : stdin;
if (!fp) error_exit(1, "could not open file %s for reading", file);
unsigned char * buf = malloc(MAX_FILE_SIZE + 1);
int rv = fread(buf, 1, MAX_FILE_SIZE + 1, fp);
if (file) fclose(fp);
if (rv < 0) error_exit(1, "could not read from file %s", file);
if (rv > MAX_FILE_SIZE) error_exit(1, "file %s is too big", file ? file : "<standard input>");
*size = rv;
return buf;
}
struct command pick_best_command (unsigned count, struct command command, ...) {
struct command result = command;
va_list ap;
va_start(ap, command);
while (-- count) {
command = va_arg(ap, struct command);
if (is_better(command, result)) result = command;
}
va_end(ap);
return result;
}
int is_better (struct command new, struct command old) {
if (new.command == 7) return 0;
if (old.command == 7) return 1;
short new_savings = new.count - command_size(new), old_savings = old.count - command_size(old);
return new_savings > old_savings;
}
short command_size (struct command command) {
short header_size = 1 + (command.count > SHORT_COMMAND_COUNT);
if (command.command & 4) return header_size + 1 + (command.value >= 0);
return header_size + command.command[(short []) {command.count, 1, 2, 0}];
}
unsigned short compressed_length (const struct command * commands, unsigned short count) {
unsigned short current, total = 0;
for (current = 0; current < count; current ++) if (commands[current].command != 7) total += command_size(commands[current]);
return total;
}

File diff suppressed because it is too large Load Diff