You've already forked pokecrystal-board
mirror of
https://gitlab.com/xCrystal/pokecrystal-board.git
synced 2025-04-09 05:44:44 -07:00
Match LZ compressed files (#724)
Replace lzcomp with new version and match all LZ compressed files
This commit is contained in:
@@ -20,5 +20,9 @@ clean:
|
||||
|
||||
gfx md5: common.h
|
||||
|
||||
lzcomp: CFLAGS = -O3 -flto -std=c11 -Wall -Wextra -pedantic -Wno-strict-overflow -Wno-sign-compare
|
||||
lzcomp: $(wildcard lz/*.c) $(wildcard lz/*.h)
|
||||
$(CC) $(CFLAGS) -o $@ lz/*.c
|
||||
|
||||
%: %.c
|
||||
$(CC) $(CFLAGS) -o $@ $<
|
||||
|
35
tools/lz/global.c
Normal file
35
tools/lz/global.c
Normal file
@@ -0,0 +1,35 @@
|
||||
#include "proto.h"
|
||||
|
||||
const struct compressor compressors[] = {
|
||||
// NOTE: the "flags" field for each compressor will be set to the chosen/current method number minus the base
|
||||
// number for that particular compressor. That means that each compressor will use a zero-based flags value.
|
||||
{.methods = 72, .name = "singlepass", .function = &try_compress_single_pass}, // 0-71
|
||||
{.methods = 2, .name = "null", .function = &store_uncompressed}, // 72-73
|
||||
{.methods = 6, .name = "repetitions", .function = &try_compress_repetitions}, // 74-79
|
||||
{.methods = 16, .name = "multipass", .function = &try_compress_multi_pass}, // 80-95
|
||||
{0} // end of the list
|
||||
};
|
||||
|
||||
const unsigned char bit_flipping_table[] = {
|
||||
// For each byte, the table contains that same byte with its bits flipped around (for instance,
|
||||
// 0x58 (01011000 binary) becomes 0x1a (00011010 binary)). This is faster than flipping bits
|
||||
// manually at runtime.
|
||||
0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0, 0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0,
|
||||
0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8, 0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8,
|
||||
0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4, 0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4,
|
||||
0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec, 0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc,
|
||||
0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2, 0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2,
|
||||
0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea, 0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa,
|
||||
0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6, 0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6,
|
||||
0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee, 0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe,
|
||||
0x01, 0x81, 0x41, 0xc1, 0x21, 0xa1, 0x61, 0xe1, 0x11, 0x91, 0x51, 0xd1, 0x31, 0xb1, 0x71, 0xf1,
|
||||
0x09, 0x89, 0x49, 0xc9, 0x29, 0xa9, 0x69, 0xe9, 0x19, 0x99, 0x59, 0xd9, 0x39, 0xb9, 0x79, 0xf9,
|
||||
0x05, 0x85, 0x45, 0xc5, 0x25, 0xa5, 0x65, 0xe5, 0x15, 0x95, 0x55, 0xd5, 0x35, 0xb5, 0x75, 0xf5,
|
||||
0x0d, 0x8d, 0x4d, 0xcd, 0x2d, 0xad, 0x6d, 0xed, 0x1d, 0x9d, 0x5d, 0xdd, 0x3d, 0xbd, 0x7d, 0xfd,
|
||||
0x03, 0x83, 0x43, 0xc3, 0x23, 0xa3, 0x63, 0xe3, 0x13, 0x93, 0x53, 0xd3, 0x33, 0xb3, 0x73, 0xf3,
|
||||
0x0b, 0x8b, 0x4b, 0xcb, 0x2b, 0xab, 0x6b, 0xeb, 0x1b, 0x9b, 0x5b, 0xdb, 0x3b, 0xbb, 0x7b, 0xfb,
|
||||
0x07, 0x87, 0x47, 0xc7, 0x27, 0xa7, 0x67, 0xe7, 0x17, 0x97, 0x57, 0xd7, 0x37, 0xb7, 0x77, 0xf7,
|
||||
0x0f, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef, 0x1f, 0x9f, 0x5f, 0xdf, 0x3f, 0xbf, 0x7f, 0xff
|
||||
};
|
||||
|
||||
char option_name_buffer[] = "-?"; // used to extract the name of a short option (separated from its argument)
|
51
tools/lz/main.c
Normal file
51
tools/lz/main.c
Normal file
@@ -0,0 +1,51 @@
|
||||
#include "proto.h"
|
||||
|
||||
int main (int argc, char ** argv) {
|
||||
struct options options = get_options(argc, argv);
|
||||
unsigned short size;
|
||||
unsigned char * file_buffer = read_file_into_buffer(options.input, &size);
|
||||
struct command * commands;
|
||||
if (options.mode & 2) {
|
||||
unsigned short original_size = size, remainder;
|
||||
commands = get_commands_from_file(file_buffer, &size, &remainder);
|
||||
if (!commands) error_exit(1, "invalid command stream");
|
||||
if (options.mode == 2) {
|
||||
unsigned char * uncompressed = get_uncompressed_data(commands, file_buffer, &size);
|
||||
if (!uncompressed) error_exit(1, "output data is too large");
|
||||
write_raw_data_to_file(options.output, uncompressed, size);
|
||||
free(uncompressed);
|
||||
} else
|
||||
write_commands_and_padding_to_textfile(options.output, commands, size, file_buffer, original_size - remainder, remainder);
|
||||
} else {
|
||||
commands = compress(file_buffer, &size, options.method);
|
||||
(options.mode ? write_commands_to_textfile : write_commands_to_file)(options.output, commands, size, file_buffer, options.alignment);
|
||||
}
|
||||
free(file_buffer);
|
||||
free(commands);
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct command * compress (const unsigned char * data, unsigned short * size, unsigned method) {
|
||||
unsigned char * bitflipped = malloc(*size);
|
||||
unsigned current;
|
||||
for (current = 0; current < *size; current ++) bitflipped[current] = bit_flipping_table[data[current]];
|
||||
const struct compressor * compressor = compressors;
|
||||
struct command * result;
|
||||
if (method < COMPRESSION_METHODS) {
|
||||
while (method >= compressor -> methods) method -= (compressor ++) -> methods;
|
||||
result = compressor -> function(data, bitflipped, size, method);
|
||||
} else {
|
||||
struct command * compressed_sequences[COMPRESSION_METHODS];
|
||||
unsigned short lengths[COMPRESSION_METHODS];
|
||||
unsigned flags = compressor -> methods;
|
||||
for (current = 0; current < COMPRESSION_METHODS; current ++) {
|
||||
lengths[current] = *size;
|
||||
if (!flags) flags = (++ compressor) -> methods;
|
||||
compressed_sequences[current] = compressor -> function(data, bitflipped, lengths + current, -- flags);
|
||||
}
|
||||
result = select_command_sequence(compressed_sequences, lengths, COMPRESSION_METHODS, size);
|
||||
for (current = 0; current < COMPRESSION_METHODS; current ++) free(compressed_sequences[current]);
|
||||
}
|
||||
free(bitflipped);
|
||||
return result;
|
||||
}
|
61
tools/lz/merging.c
Normal file
61
tools/lz/merging.c
Normal file
@@ -0,0 +1,61 @@
|
||||
#include "proto.h"
|
||||
|
||||
struct command * select_command_sequence (struct command ** sequences, const unsigned short * lengths, unsigned count, unsigned short * final_length) {
|
||||
unsigned short min_sequence = 0, min_length = compressed_length(*sequences, *lengths);
|
||||
unsigned short seq, len;
|
||||
for (seq = 1; seq < count; seq ++) {
|
||||
len = compressed_length(sequences[seq], lengths[seq]);
|
||||
if (len < min_length) {
|
||||
min_sequence = seq;
|
||||
min_length = len;
|
||||
}
|
||||
}
|
||||
*final_length = lengths[min_sequence];
|
||||
struct command * current = malloc(*final_length * sizeof(struct command));
|
||||
memcpy(current, sequences[min_sequence], *final_length * sizeof(struct command));
|
||||
struct command * new;
|
||||
for (seq = 1; seq < count; seq ++) {
|
||||
new = merge_command_sequences(current, *final_length, sequences[(seq + min_sequence) % count], lengths[(seq + min_sequence) % count], final_length);
|
||||
free(current);
|
||||
current = new;
|
||||
}
|
||||
return current;
|
||||
}
|
||||
|
||||
struct command * merge_command_sequences (const struct command * current, unsigned short current_length, const struct command * new, unsigned short new_length,
|
||||
unsigned short * result_length) {
|
||||
struct command * result = malloc(sizeof(struct command) * (current_length + new_length));
|
||||
struct command * current_command = result;
|
||||
const struct command * saved_current;
|
||||
const struct command * saved_new;
|
||||
unsigned short current_pos, new_pos;
|
||||
while (current_length) {
|
||||
if (current -> count == new -> count) {
|
||||
*(current_command ++) = pick_best_command(2, *(current ++), *(new ++));
|
||||
current_length --;
|
||||
continue;
|
||||
}
|
||||
saved_current = current;
|
||||
saved_new = new;
|
||||
current_pos = (current ++) -> count;
|
||||
new_pos = (new ++) -> count;
|
||||
current_length --;
|
||||
while (current_pos != new_pos)
|
||||
if (current_pos < new_pos) {
|
||||
current_pos += (current ++) -> count;
|
||||
current_length --;
|
||||
} else
|
||||
new_pos += (new ++) -> count;
|
||||
current_pos = compressed_length(saved_current, current - saved_current);
|
||||
new_pos = compressed_length(saved_new, new - saved_new);
|
||||
if (new_pos < current_pos) {
|
||||
memcpy(current_command, saved_new, sizeof(struct command) * (new - saved_new));
|
||||
current_command += new - saved_new;
|
||||
} else {
|
||||
memcpy(current_command, saved_current, sizeof(struct command) * (current - saved_current));
|
||||
current_command += current - saved_current;
|
||||
}
|
||||
}
|
||||
*result_length = current_command - result;
|
||||
return result;
|
||||
}
|
112
tools/lz/mpcomp.c
Normal file
112
tools/lz/mpcomp.c
Normal file
@@ -0,0 +1,112 @@
|
||||
#include "proto.h"
|
||||
|
||||
/*
|
||||
Multi-pass compressor: performs an initial pass generating a single command for each byte position in the data and
|
||||
refines the command stream further in subsequent passes.
|
||||
Methods defined: 16
|
||||
Flags values: the flags are a bitfield; each bit triggers some alternate behavior if set:
|
||||
1: always emit a literal command (0) for the first byte of the file
|
||||
2: when reducing a two-byte repetition (2) command in the overlap elimination pass, don't force it to contain a
|
||||
whole number of repetitions (i.e., an even count)
|
||||
4: don't emit copy commands (4, 5, 6) with a count of 3
|
||||
8: don't emit single-byte repetition (1) commands
|
||||
*/
|
||||
|
||||
struct command * try_compress_multi_pass (const unsigned char * data, const unsigned char * flipped, unsigned short * size, unsigned flags) {
|
||||
struct command * result = calloc(*size, sizeof(struct command));
|
||||
unsigned char * reversed = malloc(*size);
|
||||
short * sources = malloc(*size * sizeof(short));
|
||||
unsigned short pos, next, current = 0;
|
||||
for (pos = 0; pos < *size; pos ++) {
|
||||
reversed[pos] = data[*size - 1 - pos];
|
||||
sources[pos] = -1;
|
||||
}
|
||||
for (pos = (flags & 1); pos < *size; pos += (result[pos].count >= MULTIPASS_SKIP_THRESHOLD) ? result[pos].count : 1) {
|
||||
result[pos] = pick_command_for_pass(data, flipped, reversed, sources, *size, pos, flags);
|
||||
if ((result[pos].command >= 4) || (result[pos].count < MULTIPASS_SKIP_THRESHOLD)) sources[current ++] = pos;
|
||||
}
|
||||
free(reversed);
|
||||
free(sources);
|
||||
for (pos = 0; pos < *size; pos ++) {
|
||||
for (current = 1; current < result[pos].count; current ++) if (result[pos + current].count > result[pos].count) {
|
||||
result[pos].count = current;
|
||||
if ((result[pos].command == 2) && (current & 1) && !(flags & 2)) result[pos].count --;
|
||||
}
|
||||
if (result[pos].count <= command_size(result[pos])) result[pos] = (struct command) {.command = 0, .count = 0};
|
||||
}
|
||||
for (pos = 0; pos < *size; pos ++)
|
||||
if (!result[pos].command) {
|
||||
for (current = 1; (current < MAX_COMMAND_COUNT) && ((pos + current) < *size); current ++) if (result[pos + current].command) break;
|
||||
result[pos] = (struct command) {.command = 0, .count = current, .value = pos};
|
||||
} else if (result[pos].count > MAX_COMMAND_COUNT) {
|
||||
result[pos + MAX_COMMAND_COUNT] = result[pos];
|
||||
result[pos + MAX_COMMAND_COUNT].count -= MAX_COMMAND_COUNT;
|
||||
if ((result[pos + MAX_COMMAND_COUNT].command >= 4) && (result[pos + MAX_COMMAND_COUNT].value >= 0))
|
||||
result[pos + MAX_COMMAND_COUNT].value += (result[pos].command == 6) ? -MAX_COMMAND_COUNT : MAX_COMMAND_COUNT;
|
||||
result[pos].count = MAX_COMMAND_COUNT;
|
||||
}
|
||||
for (next = pos = 0; pos < *size; pos ++)
|
||||
if (pos == next)
|
||||
next += result[pos].count;
|
||||
else
|
||||
result[pos].command = 7;
|
||||
repack(&result, size);
|
||||
return result;
|
||||
}
|
||||
|
||||
struct command pick_command_for_pass (const unsigned char * data, const unsigned char * flipped, const unsigned char * reversed, const short * sources,
|
||||
unsigned short length, unsigned short position, unsigned flags) {
|
||||
struct command result = pick_repetition_for_pass(data, length, position, flags);
|
||||
if (result.count >= MULTIPASS_SKIP_THRESHOLD) return result;
|
||||
unsigned char p;
|
||||
for (p = 0; p < 3; p ++) {
|
||||
struct command temp = pick_copy_for_pass(data, p[(const unsigned char * []) {data, flipped, reversed}], sources, p + 4, length, position, flags);
|
||||
if (temp.command == 7) continue;
|
||||
if (temp.count > result.count) result = temp;
|
||||
}
|
||||
if ((result.command >= 4) && (result.value >= (position - LOOKBACK_LIMIT))) result.value -= position;
|
||||
return result;
|
||||
}
|
||||
|
||||
struct command pick_repetition_for_pass (const unsigned char * data, unsigned short length, unsigned short position, unsigned flags) {
|
||||
unsigned short p;
|
||||
if (data[position]) {
|
||||
if ((position + 1) >= length) return (struct command) {.command = 1, .count = 1, .value = data[position]};
|
||||
struct command result;
|
||||
if (!(flags & 8) && (data[position] == data[position + 1]))
|
||||
result = (struct command) {.command = 1, .value = data[position]};
|
||||
else
|
||||
result = (struct command) {.command = 2, .value = data[position] | (data[position + 1] << 8)};
|
||||
for (p = 1; ((position + p) < length) && (p < LOOKAHEAD_LIMIT); p ++) if (data[position + p] != data[position + (p & 1)]) break;
|
||||
result.count = p;
|
||||
return result;
|
||||
} else {
|
||||
for (p = position + 1; (p < length) && (p < (position + LOOKAHEAD_LIMIT)); p ++) if (data[p]) break;
|
||||
return (struct command) {.command = 3, .count = p - position};
|
||||
}
|
||||
}
|
||||
|
||||
struct command pick_copy_for_pass (const unsigned char * data, const unsigned char * reference, const short * sources, unsigned char command_type,
|
||||
unsigned short length, unsigned short position, unsigned flags) {
|
||||
struct command result = {.command = 7, .count = (flags & 4) ? 4 : 3};
|
||||
if (length < 3) return result;
|
||||
unsigned refpos, count;
|
||||
const unsigned char * current;
|
||||
unsigned char buffer[6] = {0};
|
||||
memcpy(buffer, reference + length - 3, 3);
|
||||
while (*sources >= 0) {
|
||||
refpos = *(sources ++);
|
||||
if (command_type == 6) refpos = length - 1 - refpos;
|
||||
if (refpos >= (length - 3))
|
||||
current = buffer + refpos - (length - 3);
|
||||
else
|
||||
current = reference + refpos;
|
||||
if (memcmp(data + position, current, 4)) continue;
|
||||
for (count = 4; (count < (length - position)) && (count < (length - refpos)); count ++) if (data[position + count] != current[count]) break;
|
||||
if (count > (length - refpos)) count = length - refpos;
|
||||
if (count > (length - position)) count = length - position;
|
||||
if (result.count > count) continue;
|
||||
result = (struct command) {.command = command_type, .count = count, .value = sources[-1]};
|
||||
}
|
||||
return result;
|
||||
}
|
20
tools/lz/nullcomp.c
Normal file
20
tools/lz/nullcomp.c
Normal file
@@ -0,0 +1,20 @@
|
||||
#include "proto.h"
|
||||
|
||||
/*
|
||||
Null compressor: stores data uncompressed, using literal (0) commands only.
|
||||
Methods defined: 2
|
||||
Flags values: 0 = split a trailing 33-to-64-byte block at the end into two short blocks; 1 = don't
|
||||
*/
|
||||
|
||||
struct command * store_uncompressed (__attribute__((unused)) const unsigned char * data, __attribute__((unused)) const unsigned char * bitflipped, unsigned short * size, unsigned flags) {
|
||||
unsigned short position, block, remainder = *size;
|
||||
struct command * result = NULL;
|
||||
*size = 0;
|
||||
for (position = 0; remainder; position += block, remainder -= block) {
|
||||
block = (remainder > MAX_COMMAND_COUNT) ? MAX_COMMAND_COUNT : remainder;
|
||||
if (!(flags & 1) && (block <= (2 * SHORT_COMMAND_COUNT)) && (block > SHORT_COMMAND_COUNT)) block = SHORT_COMMAND_COUNT;
|
||||
result = realloc(result, sizeof(struct command) * (1 + *size));
|
||||
result[(*size) ++] = (struct command) {.command = 0, .count = block, .value = position};
|
||||
}
|
||||
return result;
|
||||
}
|
141
tools/lz/options.c
Normal file
141
tools/lz/options.c
Normal file
@@ -0,0 +1,141 @@
|
||||
#include "proto.h"
|
||||
|
||||
struct options get_options (int argc, char ** argv) {
|
||||
struct options result = {.input = NULL, .output = NULL, .mode = 0, .alignment = 0, .method = COMPRESSION_METHODS};
|
||||
const char * program_name = *argv;
|
||||
int compressor = -1;
|
||||
if (argc == 1) usage(program_name);
|
||||
for (argv ++; *argv; argv ++) {
|
||||
if (**argv != '-') break;
|
||||
if (!1[*argv]) break;
|
||||
if (!strcmp(*argv, "--")) {
|
||||
argv ++;
|
||||
break;
|
||||
} else if (!(strcmp(*argv, "--text") && strcmp(*argv, "-t")))
|
||||
result.mode = 1;
|
||||
else if (!(strcmp(*argv, "--binary") && strcmp(*argv, "-b")))
|
||||
result.mode = 0;
|
||||
else if (!(strcmp(*argv, "--uncompress") && strcmp(*argv, "-u")))
|
||||
result.mode = 2;
|
||||
else if (!(strcmp(*argv, "--dump") && strcmp(*argv, "-d")))
|
||||
result.mode = 3;
|
||||
else if (!(strcmp(*argv, "--align") && strncmp(*argv, "-a", 2)))
|
||||
result.alignment = parse_numeric_option_argument(&argv, 12);
|
||||
else if (!(strcmp(*argv, "--method") && strncmp(*argv, "-m", 2)))
|
||||
result.method = parse_numeric_option_argument(&argv, COMPRESSION_METHODS - 1);
|
||||
else if (!(strcmp(*argv, "--compressor") && strncmp(*argv, "-c", 2)))
|
||||
compressor = parse_compressor_option_argument(&argv);
|
||||
else if (!(strcmp(*argv, "--optimize") && strcmp(*argv, "-o"))) {
|
||||
result.method = COMPRESSION_METHODS;
|
||||
compressor = -1;
|
||||
} else if (!(strcmp(*argv, "--help") && strcmp(*argv, "-?")))
|
||||
usage(program_name);
|
||||
else if (!(strcmp(*argv, "--list") && strcmp(*argv, "-l")))
|
||||
list_compressors();
|
||||
else
|
||||
error_exit(3, "unknown option: %s", *argv);
|
||||
}
|
||||
if (compressor >= 0) {
|
||||
if (result.method >= COMPRESSION_METHODS) result.method = 0;
|
||||
if (result.method >= compressors[compressor].methods)
|
||||
error_exit(3, "method for the %s compressor must be between 0 and %u", compressors[compressor].name, compressors[compressor].methods - 1);
|
||||
while (compressor > 0) result.method += compressors[-- compressor].methods;
|
||||
}
|
||||
if (*argv) {
|
||||
if (strcmp(*argv, "-")) result.input = *argv;
|
||||
if (*(++ argv)) {
|
||||
if (argv[1]) error_exit(3, "too many command-line arguments");
|
||||
if (strcmp(*argv, "-")) result.output = *argv;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
unsigned parse_numeric_option_argument (char *** alp, unsigned limit) {
|
||||
const char * option;
|
||||
const char * value = get_argument_for_option(alp, &option);
|
||||
char * error;
|
||||
unsigned long result = strtoul(value, &error, 10);
|
||||
if (*error) error_exit(3, "invalid argument to option %s", option);
|
||||
if (result > limit) error_exit(3, "argument to option %s must be between 0 and %u", option, limit);
|
||||
return result;
|
||||
}
|
||||
|
||||
int parse_compressor_option_argument (char *** alp) {
|
||||
const char * name = get_argument_for_option(alp, NULL);
|
||||
if (!strcmp(name, "*")) return -1;
|
||||
int result = -1;
|
||||
unsigned length = strlen(name);
|
||||
const struct compressor * compressor;
|
||||
for (compressor = compressors; compressor -> name; compressor ++) {
|
||||
if (strncmp(name, compressor -> name, length)) continue;
|
||||
if (result >= 0) error_exit(3, "ambiguous compressor prefix: %s", name);
|
||||
result = compressor - compressors;
|
||||
}
|
||||
if (result < 0) error_exit(3, "unknown compressor: %s", name);
|
||||
return result;
|
||||
}
|
||||
|
||||
const char * get_argument_for_option (char *** alp, const char ** option_name) {
|
||||
// alp: argument list pointer (i.e., address of the current value of argv after indexing)
|
||||
// will point at the last consumed argument on exit (since the caller will probably increment it once more)
|
||||
const char * option;
|
||||
const char * result;
|
||||
if (1[**alp] == '-') {
|
||||
option = *((*alp) ++);
|
||||
result = **alp;
|
||||
} else {
|
||||
option_name_buffer[1] = 1[**alp];
|
||||
option = option_name_buffer;
|
||||
result = **alp + 2;
|
||||
}
|
||||
if (!(result && *result)) error_exit(3, "option %s requires an argument", option);
|
||||
if (option_name) *option_name = option;
|
||||
return result;
|
||||
}
|
||||
|
||||
noreturn usage (const char * program_name) {
|
||||
fprintf(stderr, "Usage: %s [<options>] [<source file> [<output>]]\n\n", program_name);
|
||||
fputs("Execution mode:\n", stderr);
|
||||
fputs(" -b, --binary Output the command stream as binary data (default).\n", stderr);
|
||||
fputs(" -t, --text Output the command stream as text.\n", stderr);
|
||||
fputs(" -u, --uncompress Process a compressed file and output the original data.\n", stderr);
|
||||
fputs(" -d, --dump Process a compressed file and dump the command stream as\n", stderr);
|
||||
fputs(" text (as if compressed with the --text option).\n", stderr);
|
||||
fputs(" -l, --list List compressors and their method numbers.\n", stderr);
|
||||
fputs(" -?, --help Print this help text and exit.\n", stderr);
|
||||
fputs("Compression options:\n", stderr);
|
||||
fputs(" -o, --optimize Use the best combination of compression\n", stderr);
|
||||
fputs(" methods available (default).\n", stderr);
|
||||
fputs(" -m<number>, --method <number> Use only one specific compression method.\n", stderr);
|
||||
fprintf(stderr, " Valid method numbers are between 0 and %u.\n", COMPRESSION_METHODS - 1);
|
||||
fputs(" -c<name>, --compressor <name> Use the specified compressor: the method\n", stderr);
|
||||
fputs(" number will be relative to that compressor.\n", stderr);
|
||||
fputs(" Any prefix of the compressor name may be\n", stderr);
|
||||
fputs(" specified. Use * to indicate any compressor.\n", stderr);
|
||||
fputs(" -a<number>, --align <number> Pad the compressed output with zeros until\n", stderr);
|
||||
fputs(" the size has the specified number of low bits\n", stderr);
|
||||
fputs(" cleared (default: 0).\n", stderr);
|
||||
fputs("The source and output filenames can be given as - (or omitted) to use standard\n", stderr);
|
||||
fputs("input and output. Use -- to indicate that subsequent arguments are file names.\n", stderr);
|
||||
exit(3);
|
||||
}
|
||||
|
||||
noreturn list_compressors (void) {
|
||||
const struct compressor * compressor;
|
||||
unsigned current, length = 10;
|
||||
for (compressor = compressors; compressor -> name; compressor ++) if ((current = strlen(compressor -> name)) > length) length = current;
|
||||
fprintf(stderr, "%-*s Offset Methods\n", length, "Compressor");
|
||||
for (current = 0; current < length; current ++) putc('-', stderr);
|
||||
fputs(" ------ -------\n", stderr);
|
||||
current = 0;
|
||||
for (compressor = compressors; compressor -> name; compressor ++) {
|
||||
fprintf(stderr, "%-*s %6u %7u\n", length, compressor -> name, current, compressor -> methods);
|
||||
current += compressor -> methods;
|
||||
}
|
||||
putc('\n', stderr);
|
||||
fputs("Note: the offset indicates the compressor's lowest method number when the\n", stderr);
|
||||
fputs("--compressor option is not given. When that option is used, every compressor's\n", stderr);
|
||||
fputs("methods are numbered from zero.\n", stderr);
|
||||
exit(3);
|
||||
}
|
138
tools/lz/output.c
Normal file
138
tools/lz/output.c
Normal file
@@ -0,0 +1,138 @@
|
||||
#include "proto.h"
|
||||
|
||||
void write_commands_to_textfile (const char * file, const struct command * commands, unsigned count, const unsigned char * input_stream,
|
||||
unsigned char alignment) {
|
||||
FILE * fp = file ? fopen(file, "w") : stdout;
|
||||
if (!fp) error_exit(1, "could not open file %s for writing", file);
|
||||
unsigned length = 0;
|
||||
while (count --) {
|
||||
write_command_to_textfile(fp, *commands, input_stream);
|
||||
length += command_size(*(commands ++));
|
||||
}
|
||||
if (fputs("\tlzend\n", fp) < 0) error_exit(1, "could not write terminator to compressed output");
|
||||
length = ~length & ((1 << alignment) - 1);
|
||||
if (length --) {
|
||||
int rv = fputs("\tdb 0", fp);
|
||||
while ((rv >= 0) && length --) rv = fputs(", 0", fp);
|
||||
if (rv >= 0) rv = -(putc('\n', fp) == EOF);
|
||||
if (rv < 0) error_exit(1, "could not write padding to compressed output");
|
||||
}
|
||||
if (file) fclose(fp);
|
||||
}
|
||||
|
||||
void write_commands_and_padding_to_textfile (const char * file, const struct command * commands, unsigned count, const unsigned char * input_stream,
|
||||
unsigned padding_offset, unsigned padding_size) {
|
||||
FILE * fp = file ? fopen(file, "w") : stdout;
|
||||
if (!fp) error_exit(1, "could not open file %s for writing", file);
|
||||
while (count --) write_command_to_textfile(fp, *(commands ++), input_stream);
|
||||
if (fputs("\tlzend\n", fp) < 0) error_exit(1, "could not write terminator to compressed output");
|
||||
if (padding_size) {
|
||||
input_stream += padding_offset;
|
||||
int rv = fprintf(fp, "\tdb $%02hhx", *(input_stream ++));
|
||||
while ((rv >= 0) && (-- padding_size)) rv = fprintf(fp, ", $%02hhx", *(input_stream ++));
|
||||
if (rv >= 0) rv = -(putc('\n', fp) == EOF);
|
||||
if (rv < 0) error_exit(1, "could not write padding to compressed output");
|
||||
}
|
||||
if (file) fclose(fp);
|
||||
}
|
||||
|
||||
void write_command_to_textfile (FILE * fp, struct command command, const unsigned char * input_stream) {
|
||||
if ((!command.count) || (command.count > MAX_COMMAND_COUNT)) error_exit(2, "invalid command in output stream");
|
||||
int rv, pos;
|
||||
const char * kind;
|
||||
switch (command.command) {
|
||||
case 0:
|
||||
if ((rv = fprintf(fp, "\tlzdata")) < 0) break;
|
||||
for (pos = 0; pos < command.count; pos ++) if ((rv = fprintf(fp, "%s$%02hhx", pos ? ", " : " ", input_stream[command.value + pos])) < 0) break;
|
||||
rv = putc('\n', fp);
|
||||
break;
|
||||
case 1:
|
||||
if ((command.value < 0) || (command.value > 255)) error_exit(2, "invalid command in output stream");
|
||||
rv = fprintf(fp, "\tlzrepeat %u, $%02hhx\n", command.count, (unsigned char) command.value);
|
||||
break;
|
||||
case 2:
|
||||
if (command.value < 0) error_exit(2, "invalid command in output stream");
|
||||
rv = fprintf(fp, "\tlzrepeat %u, $%02hhx, $%02hhx\n", command.count, (unsigned char) command.value, (unsigned char) (command.value >> 8));
|
||||
break;
|
||||
case 3:
|
||||
rv = fprintf(fp, "\tlzzero %u\n", command.count);
|
||||
break;
|
||||
case 4:
|
||||
kind = "normal";
|
||||
goto copy;
|
||||
case 5:
|
||||
kind = "flipped";
|
||||
goto copy;
|
||||
case 6:
|
||||
kind = "reversed";
|
||||
copy:
|
||||
if ((command.value < -LOOKBACK_LIMIT) || (command.value >= MAX_FILE_SIZE)) error_exit(2, "invalid command in output stream");
|
||||
if (command.value < 0)
|
||||
rv = fprintf(fp, "\tlzcopy %s, %u, %d\n", kind, command.count, command.value);
|
||||
else
|
||||
rv = fprintf(fp, "\tlzcopy %s, %u, $%04hx\n", kind, command.count, (unsigned short) command.value);
|
||||
break;
|
||||
default:
|
||||
error_exit(2, "invalid command in output stream");
|
||||
}
|
||||
if (rv < 0) error_exit(1, "could not write command to compressed output");
|
||||
}
|
||||
|
||||
void write_commands_to_file (const char * file, const struct command * commands, unsigned count, const unsigned char * input_stream, unsigned char alignment) {
|
||||
FILE * fp = file ? fopen(file, "wb") : stdout;
|
||||
if (!fp) error_exit(1, "could not open file %s for writing", file);
|
||||
unsigned length = 0;
|
||||
while (count --) {
|
||||
write_command_to_file(fp, *commands, input_stream);
|
||||
length += command_size(*(commands ++));
|
||||
}
|
||||
if (putc(-1, fp) == EOF) error_exit(1, "could not write terminator to compressed output");
|
||||
length = ~length & ((1 << alignment) - 1);
|
||||
while (length --) if (putc(0, fp) == EOF) error_exit(1, "could not write padding to compressed output");
|
||||
if (file) fclose(fp);
|
||||
}
|
||||
|
||||
void write_command_to_file (FILE * fp, struct command command, const unsigned char * input_stream) {
|
||||
if ((!command.count) || (command.count > MAX_COMMAND_COUNT)) error_exit(2, "invalid command in output stream");
|
||||
unsigned char buf[4];
|
||||
unsigned char * pos = buf;
|
||||
int n;
|
||||
command.count --;
|
||||
if (command.count < SHORT_COMMAND_COUNT)
|
||||
*(pos ++) = (command.command << 5) + command.count;
|
||||
else {
|
||||
*(pos ++) = 224 + (command.command << 2) + (command.count >> 8);
|
||||
*(pos ++) = command.count;
|
||||
}
|
||||
switch (command.command) {
|
||||
case 1: case 2:
|
||||
if ((command.value < 0) || (command.value >= (1 << (command.command << 3)))) error_exit(2, "invalid command in output stream");
|
||||
for (n = 0; n < command.command; n ++) *(pos ++) = command.value >> (n << 3);
|
||||
case 0: case 3:
|
||||
break;
|
||||
default:
|
||||
if ((command.value < -LOOKBACK_LIMIT) || (command.value >= MAX_FILE_SIZE)) error_exit(2, "invalid command in output stream");
|
||||
if (command.value < 0)
|
||||
*(pos ++) = command.value ^ 127;
|
||||
else {
|
||||
*(pos ++) = command.value >> 8;
|
||||
*(pos ++) = command.value;
|
||||
}
|
||||
}
|
||||
if (fwrite(buf, 1, pos - buf, fp) != (pos - buf)) error_exit(1, "could not write command to compressed output");
|
||||
if (command.command) return;
|
||||
command.count ++;
|
||||
if (fwrite(input_stream + command.value, 1, command.count, fp) != command.count) error_exit(1, "could not write data to compressed output");
|
||||
}
|
||||
|
||||
void write_raw_data_to_file (const char * file, const void * data, unsigned length) {
|
||||
FILE * fp = file ? fopen(file, "w") : stdout;
|
||||
if (!fp) error_exit(1, "could not open file %s for writing", file);
|
||||
while (length) {
|
||||
unsigned rv = fwrite(data, 1, length, fp);
|
||||
if (!rv) error_exit(1, "could not write raw data to output");
|
||||
data = (const char *) data + rv;
|
||||
length -= rv;
|
||||
}
|
||||
if (file) fclose(fp);
|
||||
}
|
58
tools/lz/packing.c
Normal file
58
tools/lz/packing.c
Normal file
@@ -0,0 +1,58 @@
|
||||
#include "proto.h"
|
||||
|
||||
void optimize (struct command * commands, unsigned short count) {
|
||||
while (count && (commands -> command == 7)) commands ++, count --;
|
||||
if (count < 2) return;
|
||||
struct command * end = commands + count;
|
||||
struct command * next;
|
||||
for (next = commands + 1; next < end; next ++) {
|
||||
if (next -> command == 7) continue;
|
||||
if (
|
||||
!(commands -> command) &&
|
||||
(command_size(*next) == next -> count) &&
|
||||
((commands -> count + next -> count) <= MAX_COMMAND_COUNT) &&
|
||||
((commands -> count > SHORT_COMMAND_COUNT) || ((commands -> count + next -> count) <= SHORT_COMMAND_COUNT))
|
||||
) {
|
||||
commands -> count += next -> count;
|
||||
next -> command = 7;
|
||||
continue;
|
||||
}
|
||||
if (next -> command == commands -> command) {
|
||||
switch (commands -> command) {
|
||||
case 0:
|
||||
if ((commands -> value + commands -> count) != next -> value) break;
|
||||
commands -> count += next -> count;
|
||||
next -> command = 7;
|
||||
if (commands -> count <= MAX_COMMAND_COUNT) continue;
|
||||
next -> command = 0;
|
||||
next -> value = commands -> value + MAX_COMMAND_COUNT;
|
||||
next -> count = commands -> count - MAX_COMMAND_COUNT;
|
||||
commands -> count = MAX_COMMAND_COUNT;
|
||||
break;
|
||||
case 1:
|
||||
if (commands -> value != next -> value) break;
|
||||
// fallthrough
|
||||
case 3:
|
||||
if ((commands -> count + next -> count) <= MAX_COMMAND_COUNT) {
|
||||
commands -> count += next -> count;
|
||||
next -> command = 7;
|
||||
continue;
|
||||
}
|
||||
next -> count = (commands -> count + next -> count) - MAX_COMMAND_COUNT;
|
||||
commands -> count = MAX_COMMAND_COUNT;
|
||||
break;
|
||||
}
|
||||
}
|
||||
commands = next;
|
||||
}
|
||||
}
|
||||
|
||||
void repack (struct command ** commands, unsigned short * length) {
|
||||
struct command * new_commands = malloc(sizeof(struct command) * *length);
|
||||
struct command * current = new_commands;
|
||||
unsigned short p;
|
||||
for (p = 0; p < *length; p ++) if (p[*commands].command != 7) *(current ++) = p[*commands];
|
||||
free(*commands);
|
||||
*commands = new_commands;
|
||||
*length = current - new_commands;
|
||||
}
|
105
tools/lz/proto.h
Normal file
105
tools/lz/proto.h
Normal file
@@ -0,0 +1,105 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdarg.h>
|
||||
|
||||
#define COMPRESSION_METHODS 96 /* sum of all values for the methods field in compressors */
|
||||
#define MAX_FILE_SIZE 32768
|
||||
#define SHORT_COMMAND_COUNT 32
|
||||
#define MAX_COMMAND_COUNT 1024
|
||||
#define LOOKBACK_LIMIT 128 /* highest negative valid count for a copy command */
|
||||
#define LOOKAHEAD_LIMIT 3072 /* maximum lookahead distance for the first pass of multi-pass compression */
|
||||
#define MULTIPASS_SKIP_THRESHOLD 64
|
||||
|
||||
#if __STDC_VERSION__ >= 201112L
|
||||
// <noreturn.h> forces "noreturn void", which is silly and redundant; this is simpler
|
||||
#define noreturn _Noreturn void
|
||||
#else
|
||||
#define noreturn void /* fallback */
|
||||
#endif
|
||||
|
||||
struct command {
|
||||
unsigned command: 3; // commands 0-6 as per compression spec; command 7 is used as a dummy placeholder
|
||||
unsigned count: 12; // always equals the uncompressed data length
|
||||
signed value: 17; // offset for commands 0 (into source) and 4-6 (into decompressed output); repeated bytes for commands 1-2
|
||||
};
|
||||
|
||||
struct compressor {
|
||||
unsigned methods;
|
||||
const char * name;
|
||||
struct command * (* function) (const unsigned char *, const unsigned char *, unsigned short *, unsigned);
|
||||
};
|
||||
|
||||
struct options {
|
||||
const char * input;
|
||||
const char * output;
|
||||
unsigned method; // method to use, or >= COMPRESSION_METHODS to try them all
|
||||
unsigned char mode; // 0: compress, 1: compress to text, 2: uncompress, 3: dump commands as text
|
||||
unsigned char alignment; // 1 << value
|
||||
};
|
||||
|
||||
// global.c
|
||||
extern const struct compressor compressors[];
|
||||
extern const unsigned char bit_flipping_table[];
|
||||
extern char option_name_buffer[];
|
||||
|
||||
// main.c
|
||||
int main(int, char **);
|
||||
struct command * compress(const unsigned char *, unsigned short *, unsigned);
|
||||
|
||||
// merging.c
|
||||
struct command * select_command_sequence(struct command **, const unsigned short *, unsigned, unsigned short *);
|
||||
struct command * merge_command_sequences(const struct command *, unsigned short, const struct command *, unsigned short, unsigned short *);
|
||||
|
||||
// mpcomp.c
|
||||
struct command * try_compress_multi_pass(const unsigned char *, const unsigned char *, unsigned short *, unsigned);
|
||||
struct command pick_command_for_pass(const unsigned char *, const unsigned char *, const unsigned char *, const short *, unsigned short,
|
||||
unsigned short, unsigned);
|
||||
struct command pick_repetition_for_pass(const unsigned char *, unsigned short, unsigned short, unsigned);
|
||||
struct command pick_copy_for_pass(const unsigned char *, const unsigned char *, const short *, unsigned char, unsigned short, unsigned short, unsigned);
|
||||
|
||||
// nullcomp.c
|
||||
struct command * store_uncompressed(const unsigned char *, const unsigned char *, unsigned short *, unsigned);
|
||||
|
||||
// options.c
|
||||
struct options get_options(int, char **);
|
||||
unsigned parse_numeric_option_argument(char ***, unsigned);
|
||||
int parse_compressor_option_argument(char ***);
|
||||
const char * get_argument_for_option(char ***, const char **);
|
||||
noreturn usage(const char *);
|
||||
noreturn list_compressors(void);
|
||||
|
||||
// output.c
|
||||
void write_commands_to_textfile(const char *, const struct command *, unsigned, const unsigned char *, unsigned char);
|
||||
void write_commands_and_padding_to_textfile(const char *, const struct command *, unsigned, const unsigned char *, unsigned, unsigned);
|
||||
void write_command_to_textfile(FILE *, struct command, const unsigned char *);
|
||||
void write_commands_to_file(const char *, const struct command *, unsigned, const unsigned char *, unsigned char);
|
||||
void write_command_to_file(FILE *, struct command, const unsigned char *);
|
||||
void write_raw_data_to_file(const char *, const void *, unsigned);
|
||||
|
||||
// packing.c
|
||||
void optimize(struct command *, unsigned short);
|
||||
void repack(struct command **, unsigned short *);
|
||||
|
||||
// repcomp.c
|
||||
struct command * try_compress_repetitions(const unsigned char *, const unsigned char *, unsigned short *, unsigned);
|
||||
struct command find_repetition_at_position(const unsigned char *, unsigned short, unsigned short);
|
||||
|
||||
// spcomp.c
|
||||
struct command * try_compress_single_pass(const unsigned char *, const unsigned char *, unsigned short *, unsigned);
|
||||
struct command find_best_copy(const unsigned char *, unsigned short, unsigned short, const unsigned char *, unsigned);
|
||||
unsigned short scan_forwards(const unsigned char *, unsigned short, const unsigned char *, unsigned short, short *);
|
||||
unsigned short scan_backwards(const unsigned char *, unsigned short, unsigned short, short *);
|
||||
struct command find_best_repetition(const unsigned char *, unsigned short, unsigned short);
|
||||
|
||||
// uncomp.c
|
||||
struct command * get_commands_from_file(const unsigned char *, unsigned short * restrict, unsigned short * restrict);
|
||||
unsigned char * get_uncompressed_data(const struct command *, const unsigned char *, unsigned short *);
|
||||
|
||||
// util.c
|
||||
noreturn error_exit(int, const char *, ...);
|
||||
unsigned char * read_file_into_buffer(const char *, unsigned short *);
|
||||
struct command pick_best_command(unsigned, struct command, ...);
|
||||
int is_better(struct command, struct command);
|
||||
short command_size(struct command);
|
||||
unsigned short compressed_length(const struct command *, unsigned short);
|
63
tools/lz/repcomp.c
Normal file
63
tools/lz/repcomp.c
Normal file
@@ -0,0 +1,63 @@
|
||||
#include "proto.h"
|
||||
|
||||
/*
|
||||
Repetitions compressor: compresses the data only using a subset of the available repetition commands.
|
||||
Methods defined: 6
|
||||
Flags values: the value plus one is taken as a bitfield indicating which kinds of repetition commands are used
|
||||
(lowest bit to highest: repeat single byte (1), repeat two bytes (2), repeat zeros (3)).
|
||||
*/
|
||||
|
||||
struct command * try_compress_repetitions (const unsigned char * data, __attribute__((unused)) const unsigned char * bitflipped, unsigned short * size, unsigned flags) {
|
||||
unsigned short pos = 0, skipped = 0;
|
||||
struct command * result = malloc(*size * sizeof(struct command));
|
||||
struct command * current = result;
|
||||
struct command candidate;
|
||||
flags = (flags + 1) << 1;
|
||||
while (pos < *size) {
|
||||
candidate = find_repetition_at_position(data, pos, *size);
|
||||
if ((candidate.command == 3) && !(flags & 8)) {
|
||||
candidate.command = 1;
|
||||
candidate.value = 0;
|
||||
}
|
||||
if ((candidate.command == 1) && !(flags & 2)) {
|
||||
candidate.command = 2;
|
||||
candidate.value |= candidate.value << 8;
|
||||
}
|
||||
if ((flags & (1 << candidate.command)) && (command_size(candidate) <= candidate.count)) {
|
||||
if (skipped) *(current ++) = (struct command) {.command = 0, .count = skipped, .value = pos - skipped};
|
||||
skipped = 0;
|
||||
*(current ++) = candidate;
|
||||
pos += candidate.count;
|
||||
} else {
|
||||
pos ++;
|
||||
if ((++ skipped) == MAX_COMMAND_COUNT) {
|
||||
*(current ++) = (struct command) {.command = 0, .count = MAX_COMMAND_COUNT, .value = pos - MAX_COMMAND_COUNT};
|
||||
skipped = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (skipped) *(current ++) = (struct command) {.command = 0, .count = skipped, .value = pos - skipped};
|
||||
*size = current - result;
|
||||
result = realloc(result, *size * sizeof(struct command));
|
||||
return result;
|
||||
}
|
||||
|
||||
struct command find_repetition_at_position (const unsigned char * data, unsigned short position, unsigned short length) {
|
||||
if ((position + 1) >= length) return data[position] ? ((struct command) {.command = 7}) : ((struct command) {.command = 3, .count = 1});
|
||||
unsigned char value[2] = {data[position], data[position + 1]};
|
||||
unsigned repcount, limit = length - position;
|
||||
if (limit > MAX_COMMAND_COUNT) limit = MAX_COMMAND_COUNT;
|
||||
for (repcount = 2; (repcount < limit) && (data[position + repcount] == value[repcount & 1]); repcount ++);
|
||||
struct command result;
|
||||
result.count = repcount;
|
||||
if (*value != value[1]) {
|
||||
if (!*value && (repcount < 3)) return (struct command) {.command = 3, .count = 1};
|
||||
result.command = 2;
|
||||
result.value = ((unsigned) (*value)) | (((unsigned) (value[1])) << 8);
|
||||
} else if (*value) {
|
||||
result.command = 1;
|
||||
result.value = *value;
|
||||
} else
|
||||
result.command = 3;
|
||||
return result;
|
||||
}
|
141
tools/lz/spcomp.c
Normal file
141
tools/lz/spcomp.c
Normal file
@@ -0,0 +1,141 @@
|
||||
#include "proto.h"
|
||||
|
||||
/*
|
||||
Single-pass compressor: attempts to compress the data in a single pass, selecting the best command at each
|
||||
position within some constraints.
|
||||
Methods defined: 72
|
||||
Flags values:
|
||||
Bit fields (will trigger alternate behavior if set):
|
||||
1: prefer repetition commands over copy commands of equal count
|
||||
2: don't emit a copy or repetition with a count equal to its size when the previous command is a literal (0) that
|
||||
is not at maximum size (32 or 1024)
|
||||
4: don't emit long copy commands
|
||||
Selector values (pick one from each group and add them to the bit fields):
|
||||
- Scan delay: number of bytes that are forced into literal (0) commands after each non-literal command:
|
||||
0: 0 bytes
|
||||
8: 1 byte
|
||||
16: 2 bytes
|
||||
- Copy command preference (when the command counts are tied), in order from most to least:
|
||||
0: normal (4), reversed (6), flipped (5)
|
||||
24: reversed (6), flipped (5), normal (4)
|
||||
48: flipped (5), reversed (6), normal (4)
|
||||
*/
|
||||
|
||||
struct command * try_compress_single_pass (const unsigned char * data, const unsigned char * bitflipped, unsigned short * length, unsigned flags) {
|
||||
struct command * commands = malloc(sizeof(struct command) * *length);
|
||||
memset(commands, -1, sizeof(struct command) * *length);
|
||||
struct command * current_command = commands;
|
||||
unsigned short position = 0, previous_data = 0;
|
||||
unsigned char scan_delay = 0, scan_delay_flag = (flags >> 3) % 3;
|
||||
struct command copy, repetition;
|
||||
while (position < *length) {
|
||||
copy = find_best_copy(data, position, *length, bitflipped, flags);
|
||||
repetition = find_best_repetition(data, position, *length);
|
||||
if (flags & 1)
|
||||
*current_command = pick_best_command(2, repetition, copy);
|
||||
else
|
||||
*current_command = pick_best_command(2, copy, repetition);
|
||||
*current_command = pick_best_command(2, (struct command) {.command = 0, .count = 1, .value = position}, *current_command);
|
||||
if ((flags & 2) && (command_size(*current_command) == current_command -> count))
|
||||
if (previous_data && (previous_data != SHORT_COMMAND_COUNT) && (previous_data != MAX_COMMAND_COUNT))
|
||||
*current_command = (struct command) {.command = 0, .count = 1, .value = position};
|
||||
if (scan_delay_flag) {
|
||||
if (scan_delay >= scan_delay_flag)
|
||||
scan_delay = 0;
|
||||
else if (current_command -> command) {
|
||||
scan_delay ++;
|
||||
*current_command = (struct command) {.command = 0, .count = 1, .value = position};
|
||||
}
|
||||
}
|
||||
if (current_command -> command)
|
||||
previous_data = 0;
|
||||
else
|
||||
previous_data += current_command -> count;
|
||||
position += (current_command ++) -> count;
|
||||
}
|
||||
optimize(commands, current_command - commands);
|
||||
repack(&commands, length);
|
||||
return commands;
|
||||
}
|
||||
|
||||
struct command find_best_copy (const unsigned char * data, unsigned short position, unsigned short length, const unsigned char * bitflipped, unsigned flags) {
|
||||
struct command simple = {.command = 7};
|
||||
struct command flipped = simple, backwards = simple;
|
||||
short count, offset;
|
||||
if ((count = scan_forwards(data + position, length - position, data, position, &offset)))
|
||||
simple = (struct command) {.command = 4, .count = count, .value = offset};
|
||||
if ((count = scan_forwards(data + position, length - position, bitflipped, position, &offset)))
|
||||
flipped = (struct command) {.command = 5, .count = count, .value = offset};
|
||||
if ((count = scan_backwards(data, length - position, position, &offset)))
|
||||
backwards = (struct command) {.command = 6, .count = count, .value = offset};
|
||||
struct command command;
|
||||
switch (flags / 24) {
|
||||
case 0: command = pick_best_command(3, simple, backwards, flipped); break;
|
||||
case 1: command = pick_best_command(3, backwards, flipped, simple); break;
|
||||
case 2: command = pick_best_command(3, flipped, backwards, simple);
|
||||
}
|
||||
if ((flags & 4) && (command.count > SHORT_COMMAND_COUNT)) command.count = SHORT_COMMAND_COUNT;
|
||||
return command;
|
||||
}
|
||||
|
||||
unsigned short scan_forwards (const unsigned char * target, unsigned short limit, const unsigned char * source, unsigned short real_position, short * offset) {
|
||||
unsigned short best_match, best_length = 0;
|
||||
unsigned short current_length;
|
||||
unsigned short position;
|
||||
for (position = 0; position < real_position; position ++) {
|
||||
if (source[position] != *target) continue;
|
||||
for (current_length = 0; (current_length < limit) && (source[position + current_length] == target[current_length]); current_length ++);
|
||||
if (current_length > MAX_COMMAND_COUNT) current_length = MAX_COMMAND_COUNT;
|
||||
if (current_length < best_length) continue;
|
||||
best_match = position;
|
||||
best_length = current_length;
|
||||
}
|
||||
if (!best_length) return 0;
|
||||
if ((best_match + LOOKBACK_LIMIT) >= real_position)
|
||||
*offset = best_match - real_position;
|
||||
else
|
||||
*offset = best_match;
|
||||
return best_length;
|
||||
}
|
||||
|
||||
unsigned short scan_backwards (const unsigned char * data, unsigned short limit, unsigned short real_position, short * offset) {
|
||||
if (real_position < limit) limit = real_position;
|
||||
unsigned short best_match, best_length = 0;
|
||||
unsigned short current_length;
|
||||
unsigned short position;
|
||||
for (position = 0; position < real_position; position ++) {
|
||||
if (data[position] != data[real_position]) continue;
|
||||
for (current_length = 0; (current_length <= position) && (current_length < limit) &&
|
||||
(data[position - current_length] == data[real_position + current_length]); current_length ++);
|
||||
if (current_length > MAX_COMMAND_COUNT) current_length = MAX_COMMAND_COUNT;
|
||||
if (current_length < best_length) continue;
|
||||
best_match = position;
|
||||
best_length = current_length;
|
||||
}
|
||||
if (!best_length) return 0;
|
||||
if ((best_match + LOOKBACK_LIMIT) >= real_position)
|
||||
*offset = best_match - real_position;
|
||||
else
|
||||
*offset = best_match;
|
||||
return best_length;
|
||||
}
|
||||
|
||||
struct command find_best_repetition (const unsigned char * data, unsigned short position, unsigned short length) {
|
||||
if ((position + 1) >= length) return data[position] ? ((struct command) {.command = 7}) : ((struct command) {.command = 3, .count = 1});
|
||||
unsigned char value[2] = {data[position], data[position + 1]};
|
||||
unsigned repcount, limit = length - position;
|
||||
if (limit > MAX_COMMAND_COUNT) limit = MAX_COMMAND_COUNT;
|
||||
for (repcount = 2; (repcount < limit) && (data[position + repcount] == value[repcount & 1]); repcount ++);
|
||||
struct command result;
|
||||
result.count = repcount;
|
||||
if (*value != value[1]) {
|
||||
if (!*value && (repcount < 3)) return (struct command) {.command = 3, .count = 1};
|
||||
result.command = 2;
|
||||
result.value = ((unsigned) (*value)) | (((unsigned) (value[1])) << 8);
|
||||
} else if (*value) {
|
||||
result.command = 1;
|
||||
result.value = *value;
|
||||
} else
|
||||
result.command = 3;
|
||||
return result;
|
||||
}
|
92
tools/lz/uncomp.c
Normal file
92
tools/lz/uncomp.c
Normal file
@@ -0,0 +1,92 @@
|
||||
#include "proto.h"
|
||||
|
||||
struct command * get_commands_from_file (const unsigned char * data, unsigned short * restrict size, unsigned short * restrict slack) {
|
||||
struct command * result = malloc(*size * sizeof(struct command));
|
||||
unsigned short remaining = *size;
|
||||
struct command * current = result;
|
||||
const unsigned char * rp = data;
|
||||
while (1) {
|
||||
if (!(remaining --)) goto error;
|
||||
current -> command = *rp >> 5;
|
||||
current -> count = *(rp ++) & 31;
|
||||
if (current -> command == 7) {
|
||||
current -> command = current -> count >> 2;
|
||||
current -> count = (current -> count & 3) << 8;
|
||||
if (current -> command == 7) {
|
||||
// long commands inside long commands are not allowed, but if the count is 0x300 here, it means that the original byte was 0xff
|
||||
if (current -> count == 0x300) break;
|
||||
goto error;
|
||||
}
|
||||
if (!(remaining --)) goto error;
|
||||
current -> count |= *(rp ++);
|
||||
}
|
||||
current -> count ++;
|
||||
switch (current -> command) {
|
||||
case 0:
|
||||
if (remaining <= current -> count) goto error;
|
||||
current -> value = rp - data;
|
||||
rp += current -> count;
|
||||
remaining -= current -> count;
|
||||
case 3:
|
||||
break;
|
||||
case 1: case 2: {
|
||||
unsigned char p;
|
||||
if (remaining <= current -> command) goto error;
|
||||
current -> value = 0;
|
||||
for (p = 0; p < current -> command; p ++) current -> value |= *(rp ++) << (p << 3);
|
||||
remaining -= current -> command;
|
||||
} break;
|
||||
default:
|
||||
if (!(remaining --)) goto error;
|
||||
if ((current -> value = *(rp ++)) & 128)
|
||||
current -> value = 127 - current -> value;
|
||||
else {
|
||||
if (!(remaining --)) goto error;
|
||||
current -> value = (current -> value << 8) | *(rp ++);
|
||||
}
|
||||
}
|
||||
current ++;
|
||||
}
|
||||
if (slack) *slack = *size - (rp - data);
|
||||
*size = current - result;
|
||||
return realloc(result, *size * sizeof(struct command));
|
||||
error:
|
||||
free(result);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
unsigned char * get_uncompressed_data (const struct command * commands, const unsigned char * compressed, unsigned short * size) {
|
||||
const struct command * limit = commands + *size;
|
||||
unsigned char * result = malloc(MAX_FILE_SIZE + MAX_COMMAND_COUNT);
|
||||
unsigned char * current = result;
|
||||
unsigned short p;
|
||||
for (; commands < limit; commands ++) {
|
||||
switch (commands -> command) {
|
||||
case 0:
|
||||
memcpy(current, compressed + commands -> value, commands -> count);
|
||||
current += commands -> count;
|
||||
break;
|
||||
case 1: case 2:
|
||||
for (p = 0; p < commands -> count; p ++) *(current ++) = commands -> value >> ((p % commands -> command) << 3);
|
||||
break;
|
||||
case 3:
|
||||
memset(current, 0, commands -> count);
|
||||
current += commands -> count;
|
||||
break;
|
||||
default: {
|
||||
const unsigned char * ref = ((commands -> value < 0) ? current : result) + commands -> value;
|
||||
for (p = 0; p < commands -> count; p ++) {
|
||||
current[p] = ref[(commands -> command == 6) ? -(int) p : p];
|
||||
if (commands -> command == 5) current[p] = bit_flipping_table[current[p]];
|
||||
}
|
||||
current += commands -> count;
|
||||
}
|
||||
}
|
||||
if ((current - result) > MAX_FILE_SIZE) {
|
||||
free(result);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
*size = current - result;
|
||||
return result;
|
||||
}
|
54
tools/lz/util.c
Normal file
54
tools/lz/util.c
Normal file
@@ -0,0 +1,54 @@
|
||||
#include "proto.h"
|
||||
|
||||
noreturn error_exit (int error_code, const char * error, ...) {
|
||||
va_list ap;
|
||||
va_start(ap, error);
|
||||
fputs("error: ", stderr);
|
||||
vfprintf(stderr, error, ap);
|
||||
va_end(ap);
|
||||
fputc('\n', stderr);
|
||||
exit(error_code);
|
||||
}
|
||||
|
||||
unsigned char * read_file_into_buffer (const char * file, unsigned short * size) {
|
||||
FILE * fp = file ? fopen(file, "rb") : stdin;
|
||||
if (!fp) error_exit(1, "could not open file %s for reading", file);
|
||||
unsigned char * buf = malloc(MAX_FILE_SIZE + 1);
|
||||
int rv = fread(buf, 1, MAX_FILE_SIZE + 1, fp);
|
||||
if (file) fclose(fp);
|
||||
if (rv < 0) error_exit(1, "could not read from file %s", file);
|
||||
if (rv > MAX_FILE_SIZE) error_exit(1, "file %s is too big", file ? file : "<standard input>");
|
||||
*size = rv;
|
||||
return buf;
|
||||
}
|
||||
|
||||
struct command pick_best_command (unsigned count, struct command command, ...) {
|
||||
struct command result = command;
|
||||
va_list ap;
|
||||
va_start(ap, command);
|
||||
while (-- count) {
|
||||
command = va_arg(ap, struct command);
|
||||
if (is_better(command, result)) result = command;
|
||||
}
|
||||
va_end(ap);
|
||||
return result;
|
||||
}
|
||||
|
||||
int is_better (struct command new, struct command old) {
|
||||
if (new.command == 7) return 0;
|
||||
if (old.command == 7) return 1;
|
||||
short new_savings = new.count - command_size(new), old_savings = old.count - command_size(old);
|
||||
return new_savings > old_savings;
|
||||
}
|
||||
|
||||
short command_size (struct command command) {
|
||||
short header_size = 1 + (command.count > SHORT_COMMAND_COUNT);
|
||||
if (command.command & 4) return header_size + 1 + (command.value >= 0);
|
||||
return header_size + command.command[(short []) {command.count, 1, 2, 0}];
|
||||
}
|
||||
|
||||
unsigned short compressed_length (const struct command * commands, unsigned short count) {
|
||||
unsigned short current, total = 0;
|
||||
for (current = 0; current < count; current ++) if (commands[current].command != 7) total += command_size(commands[current]);
|
||||
return total;
|
||||
}
|
512
tools/lzcomp.c
512
tools/lzcomp.c
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user