import io import json import os import re import shutil import struct from dataclasses import dataclass from itertools import tee from pathlib import Path import lxml.etree as etree import pandas as pd from tqdm import tqdm from pythonlib.formats.scpk import Scpk import pythonlib.utils.comptolib as comptolib import pythonlib.formats.pak2 as pak2lib from pythonlib.formats.theirsce import Theirsce from pythonlib.formats.theirsce_instructions import (AluOperation, InstructionType, TheirsceBaseInstruction) from .ToolsTales import ToolsTales @dataclass class LineEntry: names: list[str] text: str offset: int @dataclass class NameEntry: index: int offsets: list[int] VARIABLE_NAME = "[VARIABLE]" class ToolsTOR(ToolsTales): POINTERS_BEGIN = 0xD76B0 # Offset to DAT.BIN pointer list start in SLPS_254.50 file POINTERS_END = 0xE60C8 # Offset to DAT.BIN pointer list end in SLPS_254.50 file HIGH_BITS = 0xFFFFFFC0 LOW_BITS = 0x3F #Path to used # fmt: off dat_bin_original = '../Data/Tales-Of-Rebirth/Disc/Original/DAT.BIN' dat_bin_new = '../Data/Tales-Of-Rebirth/Disc/New/DAT.BIN' elf_original = '../Data/Tales-Of-Rebirth/Disc/Original/SLPS_254.50' elf_new = '../Data/Tales-Of-Rebirth/Disc/New/SLPS_254.50' story_XML_new = '../Tales-Of-Rebirth/Data/TOR/Story/' #Story XML files will be extracted here story_XML_patch = '../Data/Tales-Of-Rebirth/Story/' #Story XML files will be extracted here skit_XML_patch = '../Data/Tales-Of-Rebirth/Skits/' #Skits XML files will be extracted here skit_XML_new = '../Tales-Of-Rebirth/Data/TOR/Skits/' dat_archive_extract = '../Data/Tales-Of-Rebirth/DAT/' # fmt: on def __init__(self, tbl): super().__init__("TOR", tbl, "Tales-Of-Rebirth") with open("../{}/Data/{}/Misc/{}".format(self.repo_name, self.gameName, self.tblFile), encoding="utf-8") as f: jsonRaw = json.load(f) for k, v in jsonRaw.items(): self.jsonTblTags[k] = {int(k2, 16): v2 for k2, v2 in v.items()} for k, v in self.jsonTblTags.items(): self.ijsonTblTags[k] = {v2: k2 for k2, v2 in v.items()} self.id = 1 # byteCode self.story_byte_code = b"\xF8" self.string_opcode = InstructionType.STRING self.list_status_insertion = ['Done', 'Proofreading', 'Editing'] self.mkdir('../Data/{}/DAT'.format(self.repo_name)) # Replace n occurences of a string starting from the right def rreplace(self, s, old, new, occurrence): li = s.rsplit(old, occurrence) return new.join(li) def add_line_break(self, text): temp = "" currentLineSize = 0 text_size = len(text) max_size = 32 split_space = text.split(" ") for word in split_space: currentLineSize += (len(word) + 1) if currentLineSize <= max_size: temp = temp + word + ' ' else: temp = temp + '\n' + word + ' ' currentLineSize = 0 temp = temp.replace(" \n", "\n") temp = self.rreplace(temp, " ", "", 1) return temp def clean_text(self, text): text = re.sub(r"\n ", "\n", text) text = re.sub(r"\n", "", text) text = re.sub(r"(<\w+:?\w+>)", "", text) text = re.sub(r"\[\w+=*\w+\]", "", text) text = re.sub(r" ", "", text) text = re.sub(u'\u3000', '', text) text = re.sub(r" ", "", text) return text # Extract/Transform Lauren translation def extract_Lauren_Translation(self): # Load Lauren's googlesheet data inside a dataframe df = self.extract_Google_Sheets("1-XwzS7F0SaLlXwv1KS6RcTEYYORH2DDb1bMRy5VM5oo", "Story") # 1) Make some renaming and transformations df = df.rename(columns={"KEY": "File", "Japanese": "JapaneseText", "Lauren's Script": "EnglishText"}) # 2) Filter only relevant rows and columns from the googlesheet df = df.loc[(df['EnglishText'] != "") & (df['JapaneseText'] != ""), :] df = df[['File', 'JapaneseText', 'EnglishText']] # 3) Make some transformations to the JapaneseText so we can better match with XML df['File'] = df['File'].apply(lambda x: x.split("_")[0] + ".xml") df['JapaneseText'] = df['JapaneseText'].apply(lambda x: self.clean_text(x)) return df # Transfer Lauren translation def transfer_Lauren_Translation(self): df_lauren = self.extract_Lauren_Translation() # Distinct list of XMLs file xml_files = list(set(df_lauren['File'].tolist())) for file in xml_files: cond = df_lauren['File'] == file lauren_translations = dict(df_lauren[cond][['JapaneseText', 'EnglishText']].values) file_path = self.story_XML_new + 'XML/' + file if os.path.exists(file_path): tree = etree.parse(file_path) root = tree.getroot() need_save = False for key, item in lauren_translations.items(): for entry_node in root.iter("Entry"): xml_jap = entry_node.find("JapaneseText").text or '' xml_eng = entry_node.find("EnglishText").text or '' xml_jap_cleaned = self.clean_text(xml_jap) if key == xml_jap_cleaned: item = self.add_line_break(item) if xml_eng != item: entry_node.find("EnglishText").text = item need_save = True if entry_node.find("Status").text == "To Do": entry_node.find("Status").text = "Editing" # else: # print("File: {} - {}".format(file, key)) if need_save: txt = etree.tostring(root, encoding="UTF-8", pretty_print=True, xml_declaration=False) with open(file_path, 'wb') as xml_file: xml_file.write(txt) else: print("File {} skipped because file is not found".format(file)) # Extract the story files def extract_all_story(self, replace=False) -> None: print("Extracting Story files...") # TODO: use pathlib for everything folder_path = Path(self.story_XML_patch) / "XML" scpk_path = Path(self.dat_archive_extract) / "SCPK" for file in tqdm(scpk_path.glob("*.scpk")): theirsce = Theirsce(Scpk(file).rsce) xml_text = self.get_xml_from_theirsce(theirsce, "Story") self.id = 1 with open(folder_path / file.with_suffix(".xml").name, "wb") as xml: xml.write(xml_text) # Extract all the skits files def extract_all_skits(self, replace=False) -> None: print("Extracting Skit files...") # TODO: use pathlib for everything folder_path = Path(self.skit_XML_patch) / "XML" pak2_path = Path(self.dat_archive_extract) / "PAK2" for file in tqdm(pak2_path.glob("*.pak2")): with open(file, "rb") as pak: theirsce = pak2lib.get_theirsce_from_pak2(pak.read()) xml_text = self.get_xml_from_theirsce(Theirsce(theirsce), "Skits") with open(folder_path / file.with_suffix(".xml").name, "wb") as xml: xml.write(xml_text) # Extract THEIRSCE to XML def get_xml_from_theirsce(self, rsce: Theirsce, section: str) -> bytes: #Create the XML file # root = etree.Element('SceneText') # etree.SubElement(root, "OriginalName").text = file_name #pointers_offset, texts_offset = self.extract_Story_Pointers(rsce) names, lines = self.extract_lines_with_speaker(rsce) for i, (k, v) in enumerate(names.items(), -1): names[k] = NameEntry(i, v) #Remove duplicates #list_informations = self.remove_duplicates(["Story"] * len(pointers_offset), pointers_offset, text_list) # list_lines = ( ['Story', line.offset, line.text] for line in lines) # list_names = ( ['Story', line.offset, line.text] for i, (k, v) in enumerate(found_names.items())) #Build the XML Structure with the information root = etree.Element("SceneText") speakers_node = etree.SubElement(root, 'Speakers') etree.SubElement(speakers_node, 'Section').text = "Speaker" strings_node = etree.SubElement(root, 'Strings') etree.SubElement(strings_node, 'Section').text = section self.make_speakers_section(speakers_node, names) self.make_strings_section(strings_node, lines, names) # Return XML string return etree.tostring(root, encoding="UTF-8", pretty_print=True) def make_strings_section(self, root, lines: list[LineEntry], names: dict[str, NameEntry]): pass for line in lines: entry_node = etree.SubElement(root, "Entry") etree.SubElement(entry_node,"PointerOffset").text = str(line.offset) text_split = list(filter(None, re.split(self.COMMON_TAG, line.text))) if len(text_split) > 1 and text_split[0].startswith("= 1: name = [self.bytes_to_text(theirsce, p.offset + theirsce.strings_offset) for p in params] [names.setdefault(n, []).append(p.position + 1) for n, p in zip(name, params)] elif len(params) == 0: name = [] text = self.bytes_to_text(theirsce, op2.offset + theirsce.strings_offset) lines.append(LineEntry(name, text, op2.position + 1)) #print(f"{params}: {text}") used = True skip() continue # This sequence represents the textbox call with # the text being a variable (Notice boxes do this) if (op1.type is InstructionType.STRING and op2.type is InstructionType.REFERENCE and op3.type is InstructionType.SYSCALL and op3.function_index == 0x45 ): name = [self.bytes_to_text(theirsce, op1.offset + theirsce.strings_offset)] names.setdefault(name[0], []).append(op1.position + 1) for param in params: text = self.bytes_to_text(theirsce, param.offset + theirsce.strings_offset) lines.append(LineEntry(name, text, param.position + 1)) #print(f"{text}: {name}") used = True params.clear() skip() continue # This sequence represents a regular textbox call # where both fields are an string (everything else, save for skits) if (op1.type is InstructionType.STRING and op2.type is InstructionType.STRING and op3.type is InstructionType.SYSCALL and op3.function_index == 0x45 ): name = [self.bytes_to_text(theirsce, op1.offset + theirsce.strings_offset)] names.setdefault(name[0], []).append(op1.position + 1) text = self.bytes_to_text(theirsce, op2.offset + theirsce.strings_offset) lines.append(LineEntry(name, text, op2.position + 1)) #print(f"{name}: {text}") skip() continue # Any other string in assorted code calls if op1.type is InstructionType.STRING: #print(theirsce.read_string_at(op1.offset + theirsce.strings_offset)) text = self.bytes_to_text(theirsce, op1.offset + theirsce.strings_offset) lines.append(LineEntry([], text, op1.position + 1)) continue return names, lines def extract_story_pointers_plain(self, theirsce: Theirsce): pointers_offset = []; texts_offset = [] for opcode in theirsce.walk_code(): if opcode.type == self.string_opcode: pointers_offset.append(theirsce.tell() - 2) # Maybe check this later texts_offset.append(opcode.offset + theirsce.strings_offset) return pointers_offset, texts_offset #Convert a bytes object to text using TAGS and TBL in the json file def bytes_to_text(self, theirsce: Theirsce, offset=-1, end_strings = b"\x00"): finalText = "" tags = self.jsonTblTags['TAGS'] chars = self.jsonTblTags['TBL'] if (offset > 0): theirsce.seek(offset, 0) b = theirsce.read(1) while True: b = theirsce.read(1) if b == end_strings: break b = ord(b) # Custom Encoded Text if (0x99 <= b <= 0x9F) or (0xE0 <= b <= 0xEB): c = (b << 8) | theirsce.read_uint8() finalText += chars.get(c, "{%02X}{%02X}" % (c >> 8, c & 0xFF)) continue if b == 0x1: finalText += ("\n") continue # ASCII text if chr(b) in self.PRINTABLE_CHARS: finalText += chr(b) continue # cp932 text if 0xA0 < b < 0xE0: finalText += struct.pack("B", b).decode("cp932") continue if b == 0x81: next_b = theirsce.read(1) if next_b == b"\x40": finalText += " " else: finalText += "{%02X}" % b finalText += "{%02X}" % ord(next_b) continue # Simple Tags if 0x3 <= b <= 0xF: parameter = theirsce.read_uint32() tag_name = tags.get(b, f"{b:02X}") tag_param = self.jsonTblTags.get(tag_name.upper(), {}).get(parameter, None) if tag_param is not None: finalText += tag_param else: finalText += f"<{tag_name}:{self.hex2(parameter)}>" continue # Variable tags (same as above but using rsce bytecode as parameter) if 0x13 <= b <= 0x1A: tag_name = f"unk{b:02X}" parameter = "".join([f"{c:02X}" for c in theirsce.read_tag_bytes()]) finalText += f"<{tag_name}:{parameter}>" continue # None of the above finalText += "{%02X}" % b return finalText def get_Node_Bytes(self, entry_node): #Grab the fields from the Entry in the XML status = entry_node.find("Status").text japanese_text = entry_node.find("JapaneseText").text english_text = entry_node.find("EnglishText").text #Use the values only for Status = Done and use English if non empty final_text = '' if (status in self.list_status_insertion): final_text = english_text or japanese_text or '' else: final_text = japanese_text or '' voiceId_node = entry_node.find("VoiceId") if (voiceId_node != None): final_text = ''.format(voiceId_node.text) + final_text #Convert the text values to bytes using TBL, TAGS, COLORS, ... bytes_entry = self.text_to_bytes(final_text) return bytes_entry def get_New_Theirsce(self, theirsce, scpk_file_name, destination): #To store the new text_offset and pointers to update new_text_offsets = dict() #Grab strings_offset for pointers theirsce.read(12) strings_offset = struct.unpack(" dict[int, int]: with open(self.elf_original , "rb") as elf: elf.seek(self.POINTERS_BEGIN, 0) blob = elf.read(self.POINTERS_END-self.POINTERS_BEGIN) pointers = struct.unpack(f"<{len(blob)//4}L", blob) file_data: dict[int, int] = {} for c, n in zip(pointers, pointers[1:]): remainder = c & self.LOW_BITS start = c & self.HIGH_BITS end = (n & self.HIGH_BITS) - remainder file_data[c] = end - start return file_data # Extract the file DAT.BIN to the different directorties def extract_main_archive(self) -> None: print("Extracting DAT bin files...") with open( self.dat_bin_original, "rb") as f: for i, (offset, size) in enumerate(tqdm(self.get_datbin_file_data().items(), desc="Extracting files", unit="file")): # Ignore 0 byte files if size == 0: continue f.seek(offset, 0) data = f.read(size) if comptolib.is_compressed(data): c_type = struct.unpack(" 0: file_list.extend( [os.path.join(path,file) for file in filenames]) list_test = [os.path.splitext(os.path.basename(ele))[0] for ele in file_list] previous = -1 dummies = 0 for file in tqdm(sorted(file_list, key=self.get_file_name)): size = 0 remainder = 0 current = int(re.search(self.VALID_FILE_NAME, file).group(1)) if current != previous + 1: while previous < current - 1: remainders.append(remainder) buffer += size + remainder sectors.append(buffer) previous += 1 dummies += 1 file_name = self.get_file_name(file) if ".scpk" in file: path = os.path.join(self.story_XML_patch, 'New', '{}.scpk'.format(file_name)) print(path) elif ".pak2" in file: path = os.path.join(self.skit_XML_patch, 'New', '{}.pak2'.format(file_name)) print(path) else: path = file with open(path, "rb") as f2: data = f2.read() #data = f2.read() comp_type = re.search(self.VALID_FILE_NAME, file).group(2) if comp_type != None: data = comptolib.compress_data(data, version=int(comp_type)) output_dat.write(data) size = len(data) #print("file: {} size: {}".format(file, size)) remainder = 0x40 - (size % 0x40) if remainder == 0x40: remainder = 0 output_dat.write(b"\x00" * remainder) remainders.append(remainder) buffer += size + remainder sectors.append(buffer) previous += 1 #Use the new SLPS updated and update the pointers for the SCPK with open("../Data/{}/Disc/New/SLPS_254.50".format(self.repo_name), "r+b") as output_elf: output_elf.seek(self.POINTERS_BEGIN) for i in range(len(sectors) - 1): output_elf.write(struct.pack("