import io import json import os import re import shutil import struct from dataclasses import dataclass from itertools import tee from pathlib import Path import lxml.etree as etree import pandas as pd import pycdlib from tqdm import tqdm from pythonlib.formats.FileIO import FileIO from pythonlib.formats.pak import Pak from pythonlib.formats.scpk import Scpk import pythonlib.utils.comptolib as comptolib import pythonlib.formats.pak2 as pak2lib from pythonlib.formats.theirsce import Theirsce from pythonlib.formats.theirsce_instructions import (AluOperation, InstructionType, TheirsceBaseInstruction) from .ToolsTales import ToolsTales @dataclass class LineEntry: names: list[str] text: str offset: int @dataclass class NameEntry: index: int offsets: list[int] VARIABLE_NAME = "[VARIABLE]" class ToolsTOR(ToolsTales): POINTERS_BEGIN = 0xD76B0 # Offset to DAT.BIN pointer list start in SLPS_254.50 file POINTERS_END = 0xE60C8 # Offset to DAT.BIN pointer list end in SLPS_254.50 file HIGH_BITS = 0xFFFFFFC0 LOW_BITS = 0x3F #Path to used # fmt: off dat_bin_original = '../Data/Tales-Of-Rebirth/Disc/Original/DAT.BIN' dat_bin_new = '../Data/Tales-Of-Rebirth/Disc/New/DAT.BIN' elf_original = '../Data/Tales-Of-Rebirth/Disc/Original/SLPS_254.50' elf_new = '../Data/Tales-Of-Rebirth/Disc/New/SLPS_254.50' story_XML_new = '../Tales-Of-Rebirth/Data/TOR/Story/' #Story XML files will be extracted here story_XML_patch = '../Data/Tales-Of-Rebirth/Story/' #Story XML files will be extracted here skit_XML_patch = '../Data/Tales-Of-Rebirth/Skits/' #Skits XML files will be extracted here menu_XML_patch = '../Tales-Of-Rebirth/Data/TOR/Menu/' skit_XML_new = '../Tales-Of-Rebirth/Data/TOR/Skits/' dat_archive_extract = '../Data/Tales-Of-Rebirth/DAT/' # fmt: on def __init__(self, project_file: Path) -> None: base_path = project_file.parent self.jsonTblTags = {} self.ijsonTblTags = {} with open(project_file, encoding="utf-8") as f: jsonRaw = json.load(f) self.paths: dict[str, Path] = {k: base_path / v for k, v in jsonRaw["paths"].items()} self.main_exe_name = jsonRaw["main_exe_name"] # super().__init__("TOR", str(self.paths["encoding_table"]), "Tales-Of-Rebirth") with open(self.paths["encoding_table"], encoding="utf-8") as f: jsonRaw = json.load(f) for k, v in jsonRaw.items(): self.jsonTblTags[k] = {int(k2, 16): v2 for k2, v2 in v.items()} for k, v in self.jsonTblTags.items(): self.ijsonTblTags[k] = {v2: k2 for k2, v2 in v.items()} self.id = 1 # byteCode self.story_byte_code = b"\xF8" self.string_opcode = InstructionType.STRING self.list_status_insertion: list[str] = ['Done', 'Proofreading', 'Editing'] # Extract the story files def extract_all_story(self, replace=False) -> None: print("Extracting Story files...") folder_path = self.paths["story_xml"] folder_path.mkdir(exist_ok=True) scpk_path = self.paths["extracted_files"] / "DAT" / "SCPK" for file in tqdm(list(scpk_path.glob("*.scpk"))): theirsce = Theirsce(Scpk.from_path(file).rsce) xml_text = self.get_xml_from_theirsce(theirsce, "Story") self.id = 1 with open(folder_path / file.with_suffix(".xml").name, "wb") as xml: xml.write(xml_text) # Extract all the skits files def extract_all_skits(self, replace=False) -> None: print("Extracting Skit files...") folder_path = self.paths["skit_xml"] folder_path.mkdir(exist_ok=True) pak2_path = self.paths["extracted_files"] / "DAT" / "PAK2" for file in tqdm(list(pak2_path.glob("*.pak2"))): with open(file, "rb") as pak: theirsce = pak2lib.get_theirsce_from_pak2(pak.read()) xml_text = self.get_xml_from_theirsce(Theirsce(theirsce), "Skits") xml_name = file.name.split(".")[0] + ".xml" with open(folder_path / xml_name, "wb") as xml: xml.write(xml_text) # Extract THEIRSCE to XML def get_xml_from_theirsce(self, rsce: Theirsce, section: str) -> bytes: #Create the XML file # root = etree.Element('SceneText') # etree.SubElement(root, "OriginalName").text = file_name #pointers_offset, texts_offset = self.extract_Story_Pointers(rsce) names, lines = self.extract_lines_with_speaker(rsce) for i, (k, v) in enumerate(names.items(), -1): names[k] = NameEntry(i, v) #Remove duplicates #list_informations = self.remove_duplicates(["Story"] * len(pointers_offset), pointers_offset, text_list) # list_lines = ( ['Story', line.offset, line.text] for line in lines) # list_names = ( ['Story', line.offset, line.text] for i, (k, v) in enumerate(found_names.items())) #Build the XML Structure with the information root = etree.Element("SceneText") speakers_node = etree.SubElement(root, 'Speakers') etree.SubElement(speakers_node, 'Section').text = "Speaker" strings_node = etree.SubElement(root, 'Strings') etree.SubElement(strings_node, 'Section').text = section self.make_speakers_section(speakers_node, names) self.make_strings_section(strings_node, lines, names) # Return XML string return etree.tostring(root, encoding="UTF-8", pretty_print=True) def make_strings_section(self, root, lines: list[LineEntry], names: dict[str, NameEntry]): pass for line in lines: entry_node = etree.SubElement(root, "Entry") etree.SubElement(entry_node,"PointerOffset").text = str(line.offset) text_split = list(filter(None, re.split(self.COMMON_TAG, line.text))) if len(text_split) > 1 and text_split[0].startswith("= 1: name = [self.bytes_to_text(theirsce, p.offset + theirsce.strings_offset) for p in params] [names.setdefault(n, []).append(p.position + 1) for n, p in zip(name, params)] elif len(params) == 0: name = [] text = self.bytes_to_text(theirsce, op2.offset + theirsce.strings_offset) lines.append(LineEntry(name, text, op2.position + 1)) #print(f"{params}: {text}") used = True skip() continue # This sequence represents the textbox call with # the text being a variable (Notice boxes do this) if (op1.type is InstructionType.STRING and op2.type is InstructionType.REFERENCE and op3.type is InstructionType.SYSCALL and op3.function_index == 0x45 ): name = [self.bytes_to_text(theirsce, op1.offset + theirsce.strings_offset)] names.setdefault(name[0], []).append(op1.position + 1) for param in params: text = self.bytes_to_text(theirsce, param.offset + theirsce.strings_offset) lines.append(LineEntry(name, text, param.position + 1)) #print(f"{text}: {name}") used = True params.clear() skip() continue # This sequence represents a regular textbox call # where both fields are an string (everything else, save for skits) if (op1.type is InstructionType.STRING and op2.type is InstructionType.STRING and op3.type is InstructionType.SYSCALL and op3.function_index == 0x45 ): name = [self.bytes_to_text(theirsce, op1.offset + theirsce.strings_offset)] names.setdefault(name[0], []).append(op1.position + 1) text = self.bytes_to_text(theirsce, op2.offset + theirsce.strings_offset) lines.append(LineEntry(name, text, op2.position + 1)) #print(f"{name}: {text}") skip() continue # Any other string in assorted code calls if op1.type is InstructionType.STRING: #print(theirsce.read_string_at(op1.offset + theirsce.strings_offset)) text = self.bytes_to_text(theirsce, op1.offset + theirsce.strings_offset) lines.append(LineEntry([], text, op1.position + 1)) continue return names, lines def extract_story_pointers_plain(self, theirsce: Theirsce): pointers_offset = []; texts_offset = [] for opcode in theirsce.walk_code(): if opcode.type == self.string_opcode: pointers_offset.append(theirsce.tell() - 2) # Maybe check this later texts_offset.append(opcode.offset + theirsce.strings_offset) return pointers_offset, texts_offset #Convert a bytes object to text using TAGS and TBL in the json file def bytes_to_text(self, src: FileIO, offset: int = -1) -> str: finalText = "" tags = self.jsonTblTags['TAGS'] chars = self.jsonTblTags['TBL'] if (offset > 0): src.seek(offset, 0) while True: b = src.read(1) if b == b"\x00": break b = ord(b) # Custom Encoded Text if (0x99 <= b <= 0x9F) or (0xE0 <= b <= 0xEB): c = (b << 8) | src.read_uint8() finalText += chars.get(c, "{%02X}{%02X}" % (c >> 8, c & 0xFF)) continue if b == 0x1: finalText += ("\n") continue # ASCII text if chr(b) in self.PRINTABLE_CHARS: finalText += chr(b) continue # cp932 text if 0xA0 < b < 0xE0: finalText += struct.pack("B", b).decode("cp932") continue if b == 0x81: next_b = src.read(1) if next_b == b"\x40": finalText += " " else: finalText += "{%02X}" % b finalText += "{%02X}" % ord(next_b) continue # Simple Tags if 0x3 <= b <= 0xF: parameter = src.read_uint32() tag_name = tags.get(b, f"{b:02X}") tag_param = self.jsonTblTags.get(tag_name.upper(), {}).get(parameter, None) if tag_param is not None: finalText += f"<{tag_param}>" else: finalText += f"<{tag_name}:{parameter:X}>" continue # Variable tags (same as above but using rsce bytecode as parameter) if 0x13 <= b <= 0x1A: tag_name = f"unk{b:02X}" parameter = "".join([f"{c:02X}" for c in Theirsce.read_tag_bytes(src)]) finalText += f"<{tag_name}:{parameter}>" continue # None of the above finalText += "{%02X}" % b return finalText def get_node_bytes(self, entry_node): #Grab the fields from the Entry in the XML status = entry_node.find("Status").text japanese_text = entry_node.find("JapaneseText").text english_text = entry_node.find("EnglishText").text #Use the values only for Status = Done and use English if non empty final_text = '' if (status in self.list_status_insertion): final_text = english_text or japanese_text or '' else: final_text = japanese_text or '' voiceId_node = entry_node.find("VoiceId") if (voiceId_node != None): final_text = ''.format(voiceId_node.text) + final_text #Convert the text values to bytes using TBL, TAGS, COLORS, ... bytes_entry = self.text_to_bytes(final_text) return bytes_entry def get_new_theirsce(self, theirsce: Theirsce, xml: Path) -> Theirsce: #To store the new text_offset and pointers to update new_text_offsets = dict() #Read the XML for the corresponding THEIRSCE tree = etree.parse(xml) root = tree.getroot() #Go at the start of the dialog #Loop on every Entry and reinsert theirsce.seek(theirsce.strings_offset + 1) nodes = [ele for ele in root.iter('Entry') if ele.find('Id').text != "-1"] nodes = [ele for ele in nodes if ele.find('PointerOffset').text != "-1"] for entry_node in nodes: #Add the PointerOffset and TextOffset new_text_offsets[entry_node.find("PointerOffset").text] = theirsce.tell() #Use the node to get the new bytes bytes_entry = self.get_node_bytes(entry_node) #Write to the file theirsce.write(bytes_entry + b'\x00') #Update the pointers based on the new text_offset of the entries for pointer_offset, text_offset in new_text_offsets.items(): pointers_list = pointer_offset.split(",") new_value = text_offset - theirsce.strings_offset for pointer in pointers_list: theirsce.seek(int(pointer)) theirsce.write( struct.pack(" list[tuple[int, int]]: slps_path = self.paths["original_files"] / self.main_exe_name with open(slps_path, "rb") as elf: elf.seek(self.POINTERS_BEGIN, 0) blob = elf.read(self.POINTERS_END-self.POINTERS_BEGIN) pointers = struct.unpack(f"<{len(blob)//4}I", blob) file_data: list[tuple[int, int]] = [] for c, n in zip(pointers, pointers[1:]): remainder = c & self.LOW_BITS start = c & self.HIGH_BITS end = (n & self.HIGH_BITS) - remainder file_data.append((start, end - start)) return file_data # Extract the file DAT.BIN to the different directorties def extract_main_archive(self) -> None: dat_bin_path = self.paths["extracted_files"] / "DAT" self.clean_folder(dat_bin_path) print("Extracting DAT.BIN files...") with open( self.dat_bin_original, "rb") as f: for i, (offset, size) in enumerate(tqdm(self.get_datbin_file_data(), desc="Extracting files", unit="file")): # Ignore 0 byte files if size == 0: continue f.seek(offset, 0) data = f.read(size) if comptolib.is_compressed(data): c_type = struct.unpack("= text_start and text_offset < text_max: pointers_value.append(text_offset) pointers_offset.append(file.tell()-4) else: ok = False else: file.read(int(step)) return pointers_offset, pointers_value def extract_all_menu(self) -> None: print("Extracting Menu Files...") xml_path = self.paths["menu_xml"] xml_path.mkdir(exist_ok=True) # Read json descriptor file with open(self.paths["menu_table"], encoding="utf-8") as f: menu_json = json.load(f) for entry in tqdm(menu_json): if entry["file_path"] == "${main_exe}": file_path = self.paths["original_files"] / self.main_exe_name else: file_path = self.paths["extracted_files"] / "DAT" / entry["file_path"] if entry["is_pak"]: pak = Pak.from_path(file_path, int(entry["pak_type"])) for p_file in entry["files"]: f_index = int(p_file["file"]) with FileIO(pak[f_index].data, "rb") as f: xml_data = self.extract_menu_file(p_file, f) with open(xml_path / f"{file_path.stem}_{f_index:03d}.xml", "wb") as xmlFile: xmlFile.write(xml_data) else: with FileIO(file_path, "rb") as f: xml_data = self.extract_menu_file(entry, f) with open(xml_path / f"{file_path.stem}.xml", "wb") as xmlFile: xmlFile.write(xml_data) def extract_menu_file(self, file_def, f: FileIO): section_list = [] pointers_offset_list = [] texts_list = [] base_offset = int(file_def["base_offset"]) xml_root = etree.Element("MenuText") # print("BaseOffset:{}".format(base_offset)) for section in file_def['sections']: text_start = int(section['text_start']) text_end = int(section['text_end']) #Extract Pointers of the file # print("Extract Pointers") pointers_offset, pointers_value = self.get_style_pointers(text_start, text_end, base_offset, section['pointers_start'], section['style'], f) # print([hex(pv) for pv in pointers_value]) #Extract Text from the pointers # print("Extract Text") texts = [ self.bytes_to_text(f, ele) for ele in pointers_value] #Make a list section_list.extend( [section['section']] * len(texts)) pointers_offset_list.extend( pointers_offset) texts_list.extend( texts ) #Remove duplicates list_informations = self.remove_duplicates(section_list, pointers_offset_list, texts_list) #Build the XML Structure with the information xml_section = self.create_Node_XML(xml_root, list_informations, section['section']) #Write to XML file return etree.tostring(xml_root, encoding="UTF-8", pretty_print=True) def create_Node_XML(self, root, list_informations, section): strings_node = etree.SubElement(root, 'Strings') etree.SubElement(strings_node, 'Section').text = section for s, pointers_offset, text in list_informations: self.create_Entry(strings_node, pointers_offset, text) return root def pack_main_archive(self): sectors: list[int] = [0] remainders: list[int] = [] buffer = 0 # Copy the original SLPS to Disc/New shutil.copy(self.elf_original, self.elf_new) print("Packing DAT.BIN files...") output_dat_path = self.paths["final_files"] / "DAT.BIN" original_files = self.paths["extracted_files"] / "DAT" total_files = (self.POINTERS_END - self.POINTERS_BEGIN) // 4 # Get all original DAT.BIN files file_list: dict[int, Path] = {} for file in original_files.glob("*/*"): file_index = int(file.name[:5]) file_list[file_index] = file # Overlay whatever we have compiled file_list: dict[int, Path] = {} for file in self.paths["patched_temp"].glob("*/*"): file_index = int(file.name[:5]) file_list[file_index] = file with open(output_dat_path, "wb") as output_dat: for i in tqdm(range(total_files)): file = file_list.get(i) if not file: remainders.append(0); sectors.append(buffer) continue with open(file, "rb") as f2: data = f2.read() comp_type = re.search(self.VALID_FILE_NAME, file.name).group(2) if comp_type != None: data = comptolib.compress_data(data, version=int(comp_type)) output_dat.write(data) size = len(data) remainder = 0x40 - (size % 0x40) if remainder == 0x40: remainder = 0 output_dat.write(b"\x00" * remainder) remainders.append(remainder) buffer += size + remainder sectors.append(buffer) #Use the new SLPS updated and update the pointers for the SCPK original_slps = self.paths["original_files"] / self.main_exe_name patched_slps = self.paths["final_files"] / self.main_exe_name with open(original_slps, "rb") as f: slps = f.read() with open(patched_slps, "wb") as f: f.write(slps) f.seek(self.POINTERS_BEGIN) for sector, remainder in zip(sectors, remainders): f.write(struct.pack(" None: print("Extracting ISO files...") iso = pycdlib.PyCdlib() iso.open(str(umd_iso)) extract_to = self.paths["original_files"] self.clean_folder(extract_to) files = [] for dirname, _, filelist in iso.walk(iso_path="/"): files += [dirname + x for x in filelist] for file in files: out_path = extract_to / file[1:] out_path.parent.mkdir(parents=True, exist_ok=True) with iso.open_file_from_iso(iso_path=file) as f, open(str(out_path).split(";")[0], "wb+") as output: with tqdm(total=f.length(), desc=f"Extracting {file[1:].split(';')[0]}", unit="B", unit_divisor=1024, unit_scale=True) as pbar: while data := f.read(2048): output.write(data) pbar.update(len(data)) iso.close() def clean_folder(self, path: Path) -> None: target_files = list(path.iterdir()) if len(target_files) != 0: print("Cleaning folder...") for file in target_files: if file.is_dir(): shutil.rmtree(file) elif file.name.lower() != ".gitignore": file.unlink(missing_ok=False)