file purge + extract_assets revamp (#644)

* remove gen_asset_list (frozen)

* move format.sh

* remove rasm2armips (decompilation helper)

* remove patch_libmalloc (IDO exclusive)

* remove unnecessary sha1's and makefile definitions

* remove libultra math script (IDO exclusive)

* remove patch_elf_32bit

* actually remove bitwidth tool

* remove the need for all the sha1 files

* extract_assets can now find a baserom named anything

* added error handling and some more graceful error handling

* formatting the error message

* more relevant error info

* add a check for a roms folder

* fix gitignore to prevent patch_elf from being re-committed

* naming clarity; change to baseroms/; feedback addressed

* skip rom verification if assets have been extracted

* changes addressed

---------

Co-authored-by: someone2639 <someone2639@gmail.com>
This commit is contained in:
someone2639
2023-08-29 10:06:41 -04:00
committed by GitHub
parent 5e9f8c7ee0
commit 683bb80c72
14 changed files with 85 additions and 750 deletions

View File

@@ -7,7 +7,7 @@ CC := gcc
CXX := g++
CFLAGS := -I. -O2 -s
LDFLAGS := -lm
ALL_PROGRAMS := armips filesizer rncpack n64graphics n64graphics_ci mio0 slienc n64cksum textconv patch_elf_32bit aifc_decode aiff_extract_codebook vadpcm_enc tabledesign extract_data_for_mio skyconv flips
ALL_PROGRAMS := armips filesizer rncpack n64graphics n64graphics_ci mio0 slienc n64cksum textconv aifc_decode aiff_extract_codebook vadpcm_enc tabledesign extract_data_for_mio skyconv flips
LIBAUDIOFILE := audiofile/libaudiofile.a
# Only build armips from tools if it is not found on the system
@@ -39,8 +39,6 @@ n64cksum_CFLAGS := -DN64CKSUM_STANDALONE
textconv_SOURCES := textconv.c utf8.c hashtable.c
patch_elf_32bit_SOURCES := patch_elf_32bit.c
aifc_decode_SOURCES := aifc_decode.c
aiff_extract_codebook_SOURCES := aiff_extract_codebook.c

View File

@@ -1,11 +0,0 @@
#include "platform_info.h"
#if IS_BIG_ENDIAN && IS_64_BIT
#error msgbegin --endian big --bitwidth 64 msgend
#elif IS_BIG_ENDIAN && !IS_64_BIT
#error msgbegin --endian big --bitwidth 32 msgend
#elif !IS_BIG_ENDIAN && IS_64_BIT
#error msgbegin --endian little --bitwidth 64 msgend
#else
#error msgbegin --endian little --bitwidth 32 msgend
#endif

14
tools/format.sh Executable file
View File

@@ -0,0 +1,14 @@
#!/usr/bin/env bash
shopt -s globstar
if (( $# > 0 )); then
printf "formatting file(s) $*"
echo
clang-format -i -style=file "$@"
echo done.
exit
fi
echo formatting...
clang-format -i -style=file src/**/*.c # src
clang-format -i -style=file lib/src/*.c # libultra
clang-format -i -style=file enhancements/*.inc.c # enhancements
echo done.

View File

@@ -1,382 +0,0 @@
// WARNING: THIS SCRIPT IS CURRENTLY BROKEN.
// It doesn't handle skyboxes/cake images correctly.
// Usage:
// g++-8 -std=c++17 ./tools/gen_asset_list.cpp -lstdc++fs -O1 -Wall -o tools/gen_asset_list
// ./tools/gen_asset_list
#include <algorithm>
#include <cassert>
#include <cstdio>
#include <filesystem>
#include <fstream>
#include <iostream>
#include <map>
#include <sstream>
#include <string>
#include <unordered_map>
#include <vector>
using namespace std;
#define BSWAP32(x) ((((x) >> 24) & 0xff) | (((x) >> 8) & 0xff00) | (((x) << 8) & 0xff0000) | (((x) << 24) & 0xff000000U))
#define BSWAP16(x) ((((x) >> 8) & 0xff) | (((x) << 8) & 0xff00))
const char* OUTPUT_FILE = "assets.json";
const size_t CHUNK_SIZE = 16;
const vector<string> LANGS = {"jp", "us", "eu", "sh"};
typedef uint8_t u8;
typedef uint64_t u64;
struct Pos {
size_t pos;
size_t mio0;
};
const u64 C = 12318461241ULL;
size_t findCutPos(const string& s) {
size_t ind = s.find_first_not_of(s[0], 1);
if (ind == string::npos) ind = 0;
else ind--;
if (ind + CHUNK_SIZE <= s.size())
return ind;
return s.size() - CHUNK_SIZE;
}
pair<size_t, u64> hashString(const string& inp) {
size_t cutPos = findCutPos(inp);
string s = inp.substr(cutPos, CHUNK_SIZE);
u64 ret = 0;
for (u8 c : s) {
ret *= C;
ret += c;
}
return {cutPos, ret};
}
template<class F>
void rollingHashes(const string& str, size_t chunkSize, F&& f) {
if (str.size() < chunkSize) return;
u64 h = 0, pw = 1;
for (size_t i = 0; i < chunkSize; i++)
h = h * C + (u8)str[i], pw = pw * C;
f(0, h);
for (size_t i = chunkSize; i < str.size(); i++) {
h = h * C + (u8)str[i] - pw * (u8)str[i-chunkSize];
f(i - chunkSize + 1, h);
}
}
bool stringMatches(const string& base, size_t pos, const string& target) {
if (pos + target.size() > base.size()) return false;
for (int it = 0; it < 10; it++) {
size_t i = rand() % target.size();
if (base[pos + i] != target[i]) return false;
}
for (size_t i = 0; i < target.size(); i++) {
if (base[pos + i] != target[i]) return false;
}
return true;
}
string mio0_decompress(uint32_t *src) {
uint32_t size = BSWAP32(src[1]);
string output(size, '\0');
char *dest = output.data();
char *destEnd = (size + dest);
uint16_t *cmpOffset = (uint16_t *)((char *)src + BSWAP32(src[2]));
char *rawOffset = ((char *)src + BSWAP32(src[3]));
int counter = 0;
uint32_t controlBits;
src += 4;
while (dest != destEnd) {
if (counter == 0) {
controlBits = *src++;
controlBits = BSWAP32(controlBits);
counter = 32;
}
if (controlBits & 0x80000000) {
*dest++ = *rawOffset++;
}
else {
uint16_t dcmpParam = *cmpOffset++;
dcmpParam = BSWAP16(dcmpParam);
int dcmpCount = (dcmpParam >> 12) + 3;
char* dcmpPtr = dest - (dcmpParam & 0x0FFF);
while (dcmpCount) {
*dest++ = dcmpPtr[-1];
dcmpCount--;
dcmpPtr++;
}
}
counter--;
controlBits <<= 1;
}
return output;
}
string readFile(const string& p, bool allowMissing = false) {
ifstream fin(p, ios::binary);
if (!fin) {
if (allowMissing) return "";
cerr << "missing file " << p << endl;
exit(1);
}
fin.seekg(0, fin.end);
auto length = fin.tellg();
fin.seekg(0, fin.beg);
string data(length, '\0');
fin.read(data.data(), length);
assert(fin);
return data;
}
pair<int, int> getPngSize(const string& fname) {
string buffer(16, '\0');
uint32_t w, h;
ifstream fin(fname, ios::binary);
fin.read(buffer.data(), 16);
fin.read((char*)&w, 4);
fin.read((char*)&h, 4);
assert(fin);
assert(buffer.substr(0, 4) == "\x89PNG");
assert(buffer.substr(12, 4) == "IHDR");
w = BSWAP32(w);
h = BSWAP32(h);
return {w, h};
}
string exec(const string& cmd) {
char buffer[128];
string result;
FILE* pipe = popen(cmd.c_str(), "r");
assert(pipe);
size_t s;
while ((s = fread(buffer, 1, sizeof(buffer), pipe))) {
result += string(buffer, buffer + s);
}
assert(!ferror(pipe));
pclose(pipe);
return result;
}
string compileAsset(const string& fname) {
auto ind = fname.rfind('.');
if (ind == string::npos) return "";
string q = fname.substr(ind + 1);
if (q == "png") {
string prev = fname.substr(0, ind);
for (const string& lang : LANGS) {
string ret = readFile("build/" + lang + "/" + prev, true);
if (!ret.empty()) return ret;
}
ind = prev.rfind('.');
if (ind == string::npos) return "";
q = prev.substr(ind + 1);
if (q == "rgba16" || q == "ia16" || q == "ia8" || q == "ia4" || q == "ia1") {
return exec("./tools/n64graphics -i /dev/stdout -g " + fname + " -f " + q);
}
}
if (q == "m64")
return readFile(fname);
if (q == "bin" && fname.find("assets") != string::npos)
return readFile(fname);
return "";
}
tuple<string, string, vector<string>> compileSoundData(const string& lang) {
string upper_lang = lang;
for (char& ch : upper_lang) ch = (char)(ch + 'A' - 'a');
string build_dir = "build/" + lang;
string dir = build_dir + "/sound";
string ctl = dir + "/sound_data.ctl";
string tbl = dir + "/sound_data.tbl";
exec("make " + tbl + " VERSION=" + lang + " NOEXTRACT=1");
string sampleFilesStr =
exec("python3 tools/assemble_sound.py " +
dir + "/samples/ "
"sound/sound_banks/ " +
dir + "/sound_data.ctl " +
dir + "/sound_data.tbl " +
"-DVERSION_" + upper_lang +
" --print-samples");
vector<string> sampleFiles;
istringstream iss(sampleFilesStr);
string line;
while (getline(iss, line)) {
line = line.substr(build_dir.size() + 1);
line[line.size() - 1] = 'f';
sampleFiles.push_back(line);
}
string ctlData = readFile(ctl);
string tblData = readFile(tbl);
return {ctlData, tblData, sampleFiles};
}
int main() {
intentional syntax error; // (see comment at top of file)
map<string, string> assets;
map<string, vector<pair<string, int>>> soundAssets;
cout << "compiling assets..." << endl;
int totalAssets = 0;
for (string base_dir : {"assets", "sound/sequences", "textures", "levels", "actors"}) {
for (auto& ent: filesystem::recursive_directory_iterator(base_dir)) {
string p = ent.path().string();
string bin = compileAsset(p);
if (bin.empty()) continue;
if (bin.size() < CHUNK_SIZE) {
cerr << "asset " << p << " is too small (" << bin.size() << " bytes), expected at least " << CHUNK_SIZE << " bytes" << endl;
continue;
}
assets[p] = bin;
totalAssets++;
}
}
for (const string& lang : LANGS) {
string ctl, tbl;
vector<string> sampleFiles;
tie(ctl, tbl, sampleFiles) = compileSoundData(lang);
assets["@sound ctl " + lang] = ctl;
assets["@sound tbl " + lang] = tbl;
totalAssets += 2;
for (size_t i = 0; i < sampleFiles.size(); i++) {
soundAssets[sampleFiles[i]].emplace_back(lang, i);
}
}
cout << "compiled " << totalAssets << " assets" << endl;
unordered_map<u64, vector<pair<size_t, pair<string, string>>>> hashes;
for (const auto& asset : assets) {
size_t cutPos;
u64 hash;
tie(cutPos, hash) = hashString(asset.second);
hashes[hash].push_back(make_pair(cutPos, asset));
}
map<pair<string, string>, Pos> assetPositions;
for (const string& lang : LANGS) {
cout << "searching " << lang << "..." << endl;
auto remHashes = hashes;
auto search = [&](string& str, string lang, size_t mio0) {
rollingHashes(str, CHUNK_SIZE, [&](size_t hashPos, u64 hash) {
if (!remHashes.count(hash)) return;
vector<pair<size_t, pair<string, string>>>& conts = remHashes.at(hash);
auto it = remove_if(conts.begin(), conts.end(),
[&](const pair<size_t, pair<string, string>>& pa) {
size_t cutPos = pa.first;
const string& name = pa.second.first;
const string& data = pa.second.second;
size_t assetPos = hashPos - cutPos;
if (stringMatches(str, assetPos, data)) {
assetPositions[make_pair(lang, name)] = {assetPos, mio0};
return true;
}
return false;
});
conts.erase(it, conts.end());
if (conts.empty()) remHashes.erase(hash);
});
};
string rom = readFile("baserom." + lang + ".z64");
for (size_t i = 0; i < rom.size(); i += 4) {
if (rom[i] == 'M' && rom[i+1] == 'I' && rom[i+2] == 'O' && rom[i+3] == '0') {
string data = mio0_decompress((uint32_t*)&rom[i]);
search(data, lang, i);
}
}
search(rom, lang, 0);
}
cout << "generating " << OUTPUT_FILE << "..." << endl;
ofstream fout(OUTPUT_FILE);
assert(fout);
fout <<
"{\n"
"\"@comment\": \"This file was generated by tools/gen_asset_list.cpp. "
"When renaming a file, either change its name in this file directly, "
"or regenerate this file using that script.\"";
bool first1 = true;
vector<string> notFound;
for (const auto& asset : assets) {
const string& name = asset.first;
const string& data = asset.second;
vector<pair<string, Pos>> positions;
for (const string& lang : LANGS) {
auto it = assetPositions.find(make_pair(lang, name));
if (it != assetPositions.end()) {
positions.push_back(make_pair(lang, it->second));
}
}
if (positions.empty()) {
notFound.push_back(name);
}
else {
fout << ",\n";
if (first1) fout << "\n";
first1 = false;
fout << "\"" << name << "\": [";
if (name.substr(name.size() - 4) == ".png") {
int w, h;
tie(w, h) = getPngSize(name);
fout << w << "," << h << ",";
}
fout << data.size() << ",{";
bool first2 = true;
for (auto& pa : positions) {
auto p = pa.second;
if (!first2) fout << ",";
first2 = false;
fout << "\"" << pa.first << "\":[";
if (p.mio0)
fout << p.mio0 << ",";
fout << p.pos << ']';
}
fout << "}]";
}
}
for (const auto& asset : soundAssets) {
const string& name = asset.first;
const vector<pair<string, int>>& locs = asset.second;
fout << ",\n";
fout << "\"" << name << "\": [0,{";
bool first2 = true;
for (auto& pa : locs) {
if (!first2) fout << ",";
first2 = false;
fout << "\"" << pa.first << "\":[\"@sound\"," << pa.second << ']';
}
fout << "}]";
}
fout << "\n}" << endl;
assert(fout);
fout.close();
if (!notFound.empty()) {
cout << endl;
cout << "Missing " << notFound.size() << " assets." << endl;
if (notFound.size() <= 10) {
for (auto& s : notFound) {
cout << s << endl;
}
}
return 1;
}
cout << "done!" << endl;
return 0;
}

View File

@@ -1,161 +0,0 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#define ARMAG "!<arch>\n"
#define SARMAG 8
/* from elf.h */
/* Type for a 16-bit quantity. */
typedef uint16_t Elf32_Half;
/* Types for signed and unsigned 32-bit quantities. */
typedef uint32_t Elf32_Word;
/* Type of addresses. */
typedef uint32_t Elf32_Addr;
/* Type of file offsets. */
typedef uint32_t Elf32_Off;
/* The ELF file header. This appears at the start of every ELF file. */
#define EI_NIDENT (16)
typedef struct
{
unsigned char e_ident[EI_NIDENT]; /* Magic number and other info */
Elf32_Half e_type; /* Object file type */
Elf32_Half e_machine; /* Architecture */
Elf32_Word e_version; /* Object file version */
Elf32_Addr e_entry; /* Entry point virtual address */
Elf32_Off e_phoff; /* Program header table file offset */
Elf32_Off e_shoff; /* Section header table file offset */
Elf32_Word e_flags; /* Processor-specific flags */
Elf32_Half e_ehsize; /* ELF header size in bytes */
Elf32_Half e_phentsize; /* Program header table entry size */
Elf32_Half e_phnum; /* Program header table entry count */
Elf32_Half e_shentsize; /* Section header table entry size */
Elf32_Half e_shnum; /* Section header table entry count */
Elf32_Half e_shstrndx; /* Section header string table index */
} Elf32_Ehdr;
/* Conglomeration of the identification bytes, for easy testing as a word. */
#define ELFMAG "\177ELF"
#define SELFMAG 4
#define EI_CLASS 4 /* File class byte index */
#define ELFCLASS32 1 /* 32-bit objects */
#define EI_DATA 5 /* Data encoding byte index */
#define ELFDATA2MSB 2 /* 2's complement, big endian */
/* end from elf.h */
// This file will find all mips3 object files in an ar archive and set the ABI flags to O32
// this allows gcc to link them with the mips2 object files.
// Irix CC doesn't set the elf e_flags properly.
// the AR file is structured as followed
//"!<arch>" followed by 0x0A (linefeed) 8 characters
// then a file header that follows the following structure
// everything is represented using space padded characters
// the last two characters are alwos 0x60 0x0A
// then come the file contents
// you can find the location of the next header by adding file_size_in_bytes (after parsing)
// all file headers start at an even offset so if the file size in bytes is odd you have to add 1
// the first two "files" are special. One is a symbol table with a pointer to the header of the file
// contaning the symbol the other is an extended list of filenames
struct ar_header {
char identifier[16];
char file_modification_timestamp[12];
char owner_id[6];
char group_id[6];
char file_mode[8];
char file_size_in_bytes[10];
char ending[2];
};
//These constants found by inspecting output of objdump
#define FLAGS_MIPS3 0x20
#define FLAGS_O32ABI 0x100000
int fix_mips_elf(FILE *f, size_t filesize)
{
Elf32_Ehdr hdr;
if (filesize < sizeof(hdr) || (1 != fread(&hdr, sizeof(hdr), 1, f))) {
printf("Failed to read ELF header\n");
return -1;
}
if (strncmp((const char *) hdr.e_ident, ELFMAG, SELFMAG) == 0) {
// found an ELF file.
if (hdr.e_ident[EI_CLASS] != ELFCLASS32 || hdr.e_ident[EI_DATA] != ELFDATA2MSB) {
printf("Expected 32bit big endian object files\n");
return -1;
}
if ((hdr.e_flags & 0xFF) == FLAGS_MIPS3 && (hdr.e_flags & FLAGS_O32ABI) == 0) {
hdr.e_flags |= FLAGS_O32ABI;
fseek(f, -(long)sizeof(hdr), SEEK_CUR);
if (1 != fwrite(&hdr, sizeof(hdr), 1, f)) {
printf("Failed to write back ELF header after patching.\n");
return -1;
}
}
}
return 0;
}
int fix_mips_ar(FILE *f)
{
struct ar_header current_header;
fseek(f, 0x8, SEEK_SET); // skip header, this is safe enough given that we check to make sure the
// file header is valid
while (1 == fread(&current_header, sizeof(current_header), 1, f)) {
if (current_header.ending[0] != 0x60 && current_header.ending[1] != 0x0A) {
printf("Expected file header\n");
return -1;
}
size_t filesize = atoi(current_header.file_size_in_bytes);
if (fix_mips_elf(f, filesize)) {
return -1;
}
if (filesize % 2 == 1)
filesize++;
fseek(f, filesize - sizeof(Elf32_Ehdr), SEEK_CUR);
}
return 0;
}
int main(int argc, char **argv) {
FILE *f = fopen(argv[1], "r+b");
uint8_t magic[8];
int status = 0;
if (f == NULL) {
printf("Failed to open file! %s\n", argv[1]);
return -1;
}
if (1 != fread(&magic, sizeof(magic), 1, f)) {
printf("Failed to read file magic\n");
return -1;
}
rewind(f);
if (!memcmp(ARMAG, magic, SARMAG)) {
status = fix_mips_ar(f);
} else if (!memcmp(ELFMAG, magic, SELFMAG)) {
fseek(f, 0, SEEK_END);
size_t filesize = ftell(f);
rewind(f);
status = fix_mips_elf(f, filesize);
} else {
printf("Unknown file magic: %02x%02x%02X%02X\n", magic[0], magic[1], magic[2], magic[3]);
status = -1;
}
fclose(f);
return status;
}

View File

@@ -1,58 +0,0 @@
#!/usr/bin/env python
#
# Patches the malloc() function in libmalloc.so to allocate more than the
# specified number of bytes. This is needed to work around issues with the
# compiler occasionally crashing.
#
# This script replaces the "move a1, a0" (00 80 28 25) instruction with
# "addiu a1, a0, n" (24 85 nn nn), which causes the malloc function to add n to
# the size parameter that was passed in.
import hashlib
import os.path
import sys
# file to patch
filename = 'tools/ido5.3_compiler/lib/libmalloc.so'
# Expected (unpatched) hash of file
filehash = 'adde672b5d79b52ca3cce9a47c7cb648'
# location in file to patch
address = 0xAB4
# Get parameter
if len(sys.argv) != 2:
print('Usage: ' + sys.argv[0] + ' n\n where n is the number of extra bytes to allocate in malloc()')
exit(1)
n = int(sys.argv[1])
# Original instruction "move a1, a0"
oldinsn = bytearray([0x00, 0x80, 0x28, 0x25])
# New instruction "addiu a1, a0, n"
newinsn = bytearray([0x24, 0x85, (n >> 8) & 0xFF, (n & 0xFF)])
# Patch the file
try:
with open(filename, 'rb+') as f:
# Read file contents
contents = bytearray(f.read())
# Unpatch the file by restoring original instruction
contents[address:address+4] = oldinsn
# Verify the (unpatched) hash of the file
md5 = hashlib.md5()
md5.update(contents)
if md5.hexdigest() != filehash:
print('Error: ' + filename + ' does not appear to be the correct version.')
exit(1)
# Patch the file
if n != 0:
contents[address:address+4] = newinsn
# Write file
f.seek(0, os.SEEK_SET)
f.write(contents)
except IOError as e:
print('Error: Could not open library file for writing: ' + str(e))

Binary file not shown.

View File

@@ -1,76 +0,0 @@
#!/usr/bin/env python
import argparse
import re
import sys
def read_file(filepath):
with open(filepath) as f:
lines = f.readlines()
split_lines = [re.split(r'[ ,]+', l.strip().replace('$', '')) for l in lines]
return split_lines
# jumps and branches with named targets
jumps = ['jal', 'j']
branches = ['beq', 'bgez', 'bgtz', 'blez', 'bltz', 'bne']
jump_branches = jumps + branches
# jumps and branches with delay slots
has_delay_slot = jump_branches + ['jr']
def decode_references(instructions):
refs = []
for ins in instructions:
if ins[3] in jump_branches:
target = int(ins[-1], 0)
if target not in refs:
refs.append(target)
return refs
def reassemble(args, instructions, refs):
print('.rsp')
print('\n.create DATA_FILE, 0x%04X' % 0x0000)
print('\n.close // DATA_FILE\n')
print('.create CODE_FILE, 0x%08X\n' % args.base)
delay_slot = False
for ins in instructions:
addr = int(ins[0], 0)
if (addr & 0xFFFF) in refs:
print('%s_%08x:' % (args.name, addr))
sys.stdout.write(' ' * args.indent)
if delay_slot:
sys.stdout.write(' ')
delay_slot = False
if ins[3] in jumps:
target = int(ins[-1], 0) | (args.base & 0xFFFF0000)
ins[-1] = '%s_%08x' % (args.name, target)
elif ins[3] in branches:
if ins[3][-1] =='z' and ins[5] == 'zero':
del ins[5] # remove 'zero' operand from branch
target = (int(ins[-1], 0) & 0x1FFF) + (args.base & 0xFFFF0000)
ins[-1] = '%s_%08x' % (args.name, target)
elif ins[3] == 'vsar': # fixup last operand of vsar
reg_map = {'ACC_H': 0, 'ACC_M': 1, 'ACC_L': 2}
reg = ins[4].split(r'[')[0]
num = reg_map[ins[-1]]
ins[-1] = '%s[%d]' % (reg, num)
if ins[3] in has_delay_slot:
delay_slot = True
if len(ins) > 4: # with args
print('%-5s %s' % (ins[3], ', '.join(ins[4:])))
else:
print('%s' % ins[3])
print('\n.close // CODE_FILE')
def main():
parser = argparse.ArgumentParser()
parser.add_argument('input_file', help="input assembly file generated from `rasm2 -D -e -a rsp -B -o 0x04001000 -f`")
parser.add_argument('-b', type=int, help="base address of file", dest='base', default=0x04001000)
parser.add_argument('-i', type=int, help="amount of indentation", dest='indent', default=4)
parser.add_argument('-n', help="name to prefex labels with", dest='name', default='f3d')
args = parser.parse_args()
lines = read_file(args.input_file)
refs = decode_references(lines)
reassemble(args, lines, refs)
main()