import io import json import os import re import shutil import struct from dataclasses import dataclass from itertools import tee from pathlib import Path import lxml.etree as etree import pandas as pd from tqdm import tqdm from scpk import Scpk import comptolib as comptolib import pak2 as pak2lib from theirsce import Theirsce from theirsce_instructions import (AluOperation, InstructionType, TheirsceBaseInstruction) from .ToolsTales import ToolsTales @dataclass class LineEntry: names: list[str] text: str offset: int @dataclass class NameEntry: index: int offsets: list[int] VARIABLE_NAME = "[VARIABLE]" class ToolsTOR(ToolsTales): POINTERS_BEGIN = 0xD76B0 # Offset to DAT.BIN pointer list start in SLPS_254.50 file POINTERS_END = 0xE60C8 # Offset to DAT.BIN pointer list end in SLPS_254.50 file HIGH_BITS = 0xFFFFFFC0 LOW_BITS = 0x3F #Path to used dat_bin_original = '../Data/Tales-Of-Rebirth/Disc/Original/DAT.BIN' dat_bin_new = '../Data/Tales-Of-Rebirth/Disc/New/DAT.BIN' elf_original = '../Data/Tales-Of-Rebirth/Disc/Original/SLPS_254.50' elf_new = '../Data/Tales-Of-Rebirth/Disc/New/SLPS_254.50' story_XML_new = '../Tales-Of-Rebirth/Data/TOR/Story/' #Story XML files will be extracted here story_XML_patch = '../Data/Tales-Of-Rebirth/Story/' #Story XML files will be extracted here skit_XML_patch = '../Data/Tales-Of-Rebirth/Skits/' #Skits XML files will be extracted here skit_XML_new = '../Tales-Of-Rebirth/Data/TOR/Skits/' dat_archive_extract = '../Data/Tales-Of-Rebirth/DAT/' def __init__(self, tbl): super().__init__("TOR", tbl, "Tales-Of-Rebirth") with open("../{}/Data/{}/Misc/{}".format(self.repo_name, self.gameName, self.tblFile), encoding="utf-8") as f: jsonRaw = json.load(f) for k, v in jsonRaw.items(): self.jsonTblTags[k] = {int(k2, 16): v2 for k2, v2 in v.items()} for k, v in self.jsonTblTags.items(): self.ijsonTblTags[k] = {v2: k2 for k2, v2 in v.items()} self.id = 1 # byteCode self.story_byte_code = b"\xF8" self.string_opcode = InstructionType.STRING self.list_status_insertion = ['Done', 'Proofreading', 'Editing'] self.mkdir('../Data/{}/DAT'.format(self.repo_name)) # Replace n occurences of a string starting from the right def rreplace(self, s, old, new, occurrence): li = s.rsplit(old, occurrence) return new.join(li) def add_line_break(self, text): temp = "" currentLineSize = 0 text_size = len(text) max_size = 32 split_space = text.split(" ") for word in split_space: currentLineSize += (len(word) + 1) if currentLineSize <= max_size: temp = temp + word + ' ' else: temp = temp + '\n' + word + ' ' currentLineSize = 0 temp = temp.replace(" \n", "\n") temp = self.rreplace(temp, " ", "", 1) return temp def clean_text(self, text): text = re.sub(r"\n ", "\n", text) text = re.sub(r"\n", "", text) text = re.sub(r"(<\w+:?\w+>)", "", text) text = re.sub(r"\[\w+=*\w+\]", "", text) text = re.sub(r" ", "", text) text = re.sub(u'\u3000', '', text) text = re.sub(r" ", "", text) return text # Extract/Transform Lauren translation def extract_Lauren_Translation(self): # Load Lauren's googlesheet data inside a dataframe df = self.extract_Google_Sheets("1-XwzS7F0SaLlXwv1KS6RcTEYYORH2DDb1bMRy5VM5oo", "Story") # 1) Make some renaming and transformations df = df.rename(columns={"KEY": "File", "Japanese": "JapaneseText", "Lauren's Script": "EnglishText"}) # 2) Filter only relevant rows and columns from the googlesheet df = df.loc[(df['EnglishText'] != "") & (df['JapaneseText'] != ""), :] df = df[['File', 'JapaneseText', 'EnglishText']] # 3) Make some transformations to the JapaneseText so we can better match with XML df['File'] = df['File'].apply(lambda x: x.split("_")[0] + ".xml") df['JapaneseText'] = df['JapaneseText'].apply(lambda x: self.clean_text(x)) return df # Transfer Lauren translation def transfer_Lauren_Translation(self): df_lauren = self.extract_Lauren_Translation() # Distinct list of XMLs file xml_files = list(set(df_lauren['File'].tolist())) for file in xml_files: cond = df_lauren['File'] == file lauren_translations = dict(df_lauren[cond][['JapaneseText', 'EnglishText']].values) file_path = self.story_XML_new + 'XML/' + file if os.path.exists(file_path): tree = etree.parse(file_path) root = tree.getroot() need_save = False for key, item in lauren_translations.items(): for entry_node in root.iter("Entry"): xml_jap = entry_node.find("JapaneseText").text or '' xml_eng = entry_node.find("EnglishText").text or '' xml_jap_cleaned = self.clean_text(xml_jap) if key == xml_jap_cleaned: item = self.add_line_break(item) if xml_eng != item: entry_node.find("EnglishText").text = item need_save = True if entry_node.find("Status").text == "To Do": entry_node.find("Status").text = "Editing" # else: # print("File: {} - {}".format(file, key)) if need_save: txt = etree.tostring(root, encoding="UTF-8", pretty_print=True, xml_declaration=False) with open(file_path, 'wb') as xml_file: xml_file.write(txt) else: print("File {} skipped because file is not found".format(file)) # Extract the story files def extract_All_Story(self,replace=False): print("Extracting Story files") i = 1 self.mkdir( self.story_XML_patch + "XML") listFiles = [self.dat_archive_extract + 'SCPK/' + ele for ele in os.listdir( os.path.join(self.dat_archive_extract, "SCPK"))] for scpk_file in listFiles: # Copy the original SCPK file to the folder used for the new version file_name = self.get_file_name(scpk_file) shutil.copy(self.dat_archive_extract + "SCPK/" + file_name + '.scpk', self.story_XML_patch + "New/" + file_name + '.scpk') theirsce = self.get_theirsce_from_scpk(scpk_file) self.extract_TheirSce_XML(theirsce, scpk_file, self.story_XML_patch, "Story", replace) self.id = 1 print("Writing file %05d.." % i, end="\r") # Not healthy i += 1 print("Writing file %05d..." % (i-1)) # Extract all the skits files def extract_All_Skits(self, replace=False): i = 1 os.makedirs( self.skit_XML_patch + "XML", exist_ok=True) list_pak2_files = [ self.dat_archive_extract + "PAK2/" + ele for ele in os.listdir(self.dat_archive_extract + "PAK2")] for file_path in list_pak2_files: if os.path.isfile(file_path) and file_path.endswith(".pak2"): # Copy the original PAK2 file to the folder used for the new version file_name = self.get_file_name(file_path) shutil.copy(self.dat_archive_extract + "PAK2/" + file_name + '.pak2', self.skit_XML_patch + "New/" + file_name + '.pak2') with open(file_path, "rb") as pak: data = pak.read() theirsce = io.BytesIO(pak2lib.get_theirsce_from_pak2(data)) self.extract_TheirSce_XML(theirsce, re.sub("\.\d+", "", file_path), self.skit_XML_patch, "Skits", replace) print("Writing file %05d" % i, end="\r") i += 1 print("Writing file %05d..." % (i-1)) return def get_theirsce_from_scpk(self, scpk_file_name, debug=False)->bytes: with open(scpk_file_name,"rb") as scpk: header = scpk.read(4) if header != b"SCPK": # sys.exit(f"{file} is not a .scpk file!") raise ValueError("File is not a .scpk file!") scpk.read(4) nbFiles = struct.unpack(" 1 and text_split[0].startswith("= 1: name = [self.bytes_to_text(theirsce, p.offset + theirsce.strings_offset) for p in params] [names.setdefault(n, []).append(p.position + 1) for n, p in zip(name, params)] elif len(params) == 0: name = [] text = self.bytes_to_text(theirsce, op2.offset + theirsce.strings_offset) lines.append(LineEntry(name, text, op2.position + 1)) #print(f"{params}: {text}") used = True skip() continue # This sequence represents the textbox call with # the text being a variable (Notice boxes do this) if (op1.type is InstructionType.STRING and op2.type is InstructionType.REFERENCE and op3.type is InstructionType.SYSCALL and op3.function_index == 0x45 ): name = [self.bytes_to_text(theirsce, op1.offset + theirsce.strings_offset)] names.setdefault(name[0], []).append(op1.position + 1) for param in params: text = self.bytes_to_text(theirsce, param.offset + theirsce.strings_offset) lines.append(LineEntry(name, text, param.position + 1)) #print(f"{text}: {name}") used = True params.clear() skip() continue # This sequence represents a regular textbox call # where both fields are an string (everything else, save for skits) if (op1.type is InstructionType.STRING and op2.type is InstructionType.STRING and op3.type is InstructionType.SYSCALL and op3.function_index == 0x45 ): name = [self.bytes_to_text(theirsce, op1.offset + theirsce.strings_offset)] names.setdefault(name[0], []).append(op1.position + 1) text = self.bytes_to_text(theirsce, op2.offset + theirsce.strings_offset) lines.append(LineEntry(name, text, op2.position + 1)) #print(f"{name}: {text}") skip() continue # Any other string in assorted code calls if op1.type is InstructionType.STRING: #print(theirsce.read_string_at(op1.offset + theirsce.strings_offset)) text = self.bytes_to_text(theirsce, op1.offset + theirsce.strings_offset) lines.append(LineEntry([], text, op1.position + 1)) continue return names, lines def extract_story_pointers_plain(self, theirsce: Theirsce): pointers_offset = []; texts_offset = [] for opcode in theirsce.walk_code(): if opcode.type == self.string_opcode: pointers_offset.append(theirsce.tell() - 2) # Maybe check this later texts_offset.append(opcode.offset + theirsce.strings_offset) return pointers_offset, texts_offset #Convert a bytes object to text using TAGS and TBL in the json file def bytes_to_text(self, theirsce: Theirsce, offset=-1, end_strings = b"\x00"): finalText = "" tags = self.jsonTblTags['TAGS'] chars = self.jsonTblTags['TBL'] if (offset > 0): theirsce.seek(offset, 0) b = theirsce.read(1) while True: b = theirsce.read(1) if b == end_strings: break b = ord(b) # Custom Encoded Text if (0x99 <= b <= 0x9F) or (0xE0 <= b <= 0xEB): c = (b << 8) | theirsce.read_uint8() finalText += chars.get(c, "{%02X}{%02X}" % (c >> 8, c & 0xFF)) continue if b == 0x1: finalText += ("\n") continue # ASCII text if chr(b) in self.PRINTABLE_CHARS: finalText += chr(b) continue # cp932 text if 0xA0 < b < 0xE0: finalText += struct.pack("B", b).decode("cp932") continue if b == 0x81: next_b = theirsce.read(1) if next_b == b"\x40": finalText += " " else: finalText += "{%02X}" % b finalText += "{%02X}" % ord(next_b) continue # Simple Tags if 0x3 <= b <= 0xF: parameter = theirsce.read_uint32() tag_name = tags.get(b, f"{b:02X}") tag_param = self.jsonTblTags.get(tag_name.upper(), {}).get(parameter, None) if tag_param is not None: finalText += tag_param else: finalText += f"<{tag_name}:{self.hex2(parameter)}>" continue # Variable tags (same as above but using rsce bytecode as parameter) if 0x13 <= b <= 0x1A: tag_name = f"unk{b:02X}" parameter = "".join([f"{c:02X}" for c in theirsce.read_tag_bytes()]) finalText += f"<{tag_name}:{parameter}>" continue # None of the above finalText += "{%02X}" % b return finalText def get_Node_Bytes(self, entry_node): #Grab the fields from the Entry in the XML status = entry_node.find("Status").text japanese_text = entry_node.find("JapaneseText").text english_text = entry_node.find("EnglishText").text #Use the values only for Status = Done and use English if non empty final_text = '' if (status in self.list_status_insertion): final_text = english_text or japanese_text or '' else: final_text = japanese_text or '' voiceId_node = entry_node.find("VoiceId") if (voiceId_node != None): final_text = ''.format(voiceId_node.text) + final_text #Convert the text values to bytes using TBL, TAGS, COLORS, ... bytes_entry = self.text_to_bytes(final_text) return bytes_entry def get_New_Theirsce(self, theirsce, scpk_file_name, destination): #To store the new text_offset and pointers to update new_text_offsets = dict() #Grab strings_offset for pointers theirsce.read(12) strings_offset = struct.unpack(" 0: file_list.extend( [os.path.join(path,file) for file in filenames]) list_test = [os.path.splitext(os.path.basename(ele))[0] for ele in file_list] previous = -1 dummies = 0 for file in sorted(file_list, key=self.get_file_name): size = 0 remainder = 0 current = int(re.search(self.VALID_FILE_NAME, file).group(1)) if current != previous + 1: while previous < current - 1: remainders.append(remainder) buffer += size + remainder sectors.append(buffer) previous += 1 dummies += 1 file_name = self.get_file_name(file) if ".scpk" in file: path = os.path.join(self.story_XML_patch, 'New', '{}.scpk'.format(file_name)) print(path) elif ".pak2" in file: path = os.path.join(self.skit_XML_patch, 'New', '{}.pak2'.format(file_name)) print(path) else: path = file with open(path, "rb") as f2: data = f2.read() #data = f2.read() comp_type = re.search(self.VALID_FILE_NAME, file).group(2) if comp_type != None: data = comptolib.compress_data(data, version=int(comp_type)) output_dat.write(data) size = len(data) #print("file: {} size: {}".format(file, size)) remainder = 0x40 - (size % 0x40) if remainder == 0x40: remainder = 0 output_dat.write(b"\x00" * remainder) remainders.append(remainder) buffer += size + remainder sectors.append(buffer) previous += 1 #print( # "Writing file %05d/%05d..." % (current - dummies, len(file_list)), end="\r" #) print("Writing file %05d/%05d..." % (current - dummies, len(file_list))) #Use the new SLPS updated and update the pointers for the SCPK with open("../Data/{}/Disc/New/SLPS_254.50".format(self.repo_name), "r+b") as output_elf: output_elf.seek(self.POINTERS_BEGIN) for i in range(len(sectors) - 1): output_elf.write(struct.pack("