from ToolsTales import ToolsTales import subprocess from dicttoxml import dicttoxml import json import struct import shutil import os import re import pandas as pd import xml.etree.ElementTree as ET import lxml.etree as etree import comptolib from xml.dom import minidom from pathlib import Path import string import io import pak2 as pak2lib from theirsce import Theirsce from theirsce_instructions import InstructionType, TheirsceStringInstruction class ToolsTOR(ToolsTales): POINTERS_BEGIN = 0xD76B0 # Offset to DAT.BIN pointer list start in SLPS_254.50 file POINTERS_END = 0xE60C8 # Offset to DAT.BIN pointer list end in SLPS_254.50 file HIGH_BITS = 0xFFFFFFC0 LOW_BITS = 0x3F #Path to used dat_bin_original = '../Data/Tales-Of-Rebirth/Disc/Original/DAT.BIN' dat_bin_new = '../Data/Tales-Of-Rebirth/Disc/New/DAT.BIN' elf_original = '../Data/Tales-Of-Rebirth/Disc/Original/SLPS_254.50' elf_new = '../Data/Tales-Of-Rebirth/Disc/New/SLPS_254.50' story_XML_new = '../Tales-Of-Rebirth/Data/TOR/Story/' #Story XML files will be extracted here story_XML_patch = '../Data/Tales-Of-Rebirth/Story/' #Story XML files will be extracted here skit_XML_patch = '../Data/Tales-Of-Rebirth/Skits/' #Skits XML files will be extracted here dat_archive_extract = '../Data/Tales-Of-Rebirth/DAT/' def __init__(self, tbl): super().__init__("TOR", tbl, "Tales-Of-Rebirth") with open("../{}/Data/{}/Misc/{}".format(self.repo_name, self.gameName, self.tblFile), encoding="utf-8") as f: jsonRaw = json.load(f) self.jsonTblTags ={ k1:{ int(k2,16) if (k1 != "TBL") else k2:v2 for k2,v2 in jsonRaw[k1].items()} for k1,v1 in jsonRaw.items()} self.itable = dict([[i, struct.pack(">H", int(j))] for j, i in self.jsonTblTags['TBL'].items()]) self.itags = dict([[i, j] for j, i in self.jsonTblTags['TAGS'].items()]) if "NAME" in self.jsonTblTags.keys(): self.inames = dict([[i, j] for j, i in self.jsonTblTags['NAME'].items()]) if "COLOR" in self.jsonTblTags.keys(): self.icolors = dict([[i, j] for j, i in self.jsonTblTags['COLOR'].items()]) self.id = 1 #byteCode self.story_byte_code = b"\xF8" self.string_opcode = InstructionType.STRING self.list_status_insertion = ['Done', 'Proofreading'] self.mkdir('../Data/{}/DAT'.format(self.repo_name)) # Replace n occurences of a string starting from the right def rreplace(self, s, old, new, occurrence): li = s.rsplit(old, occurrence) return new.join(li) def add_line_break(self, text): temp = ""; currentLineSize = 0; text_size = len(text) max_size = 32 split_space = text.split(" ") for word in split_space: currentLineSize += (len(word) + 1) if currentLineSize <= max_size: temp = temp + word + ' ' else: temp = temp + '\n' + word + ' ' currentLineSize = 0 temp = temp.replace(" \n", "\n") temp = self.rreplace(temp, " ", "", 1) return temp def clean_text(self, text): text = re.sub(r"\n ", "\n", text) text = re.sub(r"\n", "", text) text = re.sub(r"(<\w+:?\w+>)", "", text) text = re.sub(r"\[\w+=*\w+\]", "", text) text = re.sub(r" ", "", text) text = re.sub(u'\u3000', '', text) text = re.sub(r" ", "", text) return text # Extract/Transform Lauren translation def extract_Lauren_Translation(self): # Load Lauren's googlesheet data inside a dataframe df = self.extract_Google_Sheets("1-XwzS7F0SaLlXwv1KS6RcTEYYORH2DDb1bMRy5VM5oo", "Story") # 1) Make some renaming and transformations df = df.rename(columns={"KEY": "File", "Japanese": "JapaneseText", "Lauren's Script": "EnglishText"}) # 2) Filter only relevant rows and columns from the googlesheet df = df.loc[(df['EnglishText'] != "") & (df['JapaneseText'] != ""), :] df = df[['File', 'JapaneseText', 'EnglishText']] # 3) Make some transformations to the JapaneseText so we can better match with XML df['File'] = df['File'].apply(lambda x: x.split("_")[0] + ".xml") df['JapaneseText'] = df['JapaneseText'].apply(lambda x: self.clean_text(x)) return df # Transfer Lauren translation def transfer_Lauren_Translation(self): df_lauren = self.extract_Lauren_Translation() # Distinct list of XMLs file xml_files = list(set(df_lauren['File'].tolist())) for file in xml_files: cond = df_lauren['File'] == file lauren_translations = dict(df_lauren[cond][['JapaneseText', 'EnglishText']].values) file_path = self.story_XML_new + 'XML/' + file if os.path.exists(file_path): tree = etree.parse(file_path) root = tree.getroot() need_save = False for key, item in lauren_translations.items(): for entry_node in root.iter("Entry"): xml_jap = entry_node.find("JapaneseText").text or '' xml_eng = entry_node.find("EnglishText").text or '' xml_jap_cleaned = self.clean_text(xml_jap) if key == xml_jap_cleaned: split_text = re.split(r"()", xml_eng) item = self.add_line_break(item) if len(split_text) >= 2: item = split_text[1] + item if xml_eng != item: entry_node.find("EnglishText").text = item need_save = True if entry_node.find("Status").text == "To Do": entry_node.find("Status").text = "Editing" # else: # print("File: {} - {}".format(file, key)) if need_save: txt = etree.tostring(root, encoding="UTF-8", pretty_print=True, xml_declaration=False) with open(file_path, 'wb') as xml_file: xml_file.write(txt) else: print("File {} skipped because file is not found".format(file)) # Extract the story files def extract_All_Story(self,replace=False): print("Extracting Story files") print(replace) i = 1 self.mkdir( self.story_XML_patch + "XML") listFiles = [self.dat_archive_extract + 'SCPK/' + ele for ele in os.listdir( os.path.join(self.dat_archive_extract, "SCPK"))] for scpk_file in listFiles: theirsce = self.get_theirsce_from_scpk(scpk_file) self.extract_TheirSce_XML(theirsce, scpk_file, self.story_XML_patch, "Story", replace) self.id = 1 print("Writing file %05d.." % i, end="\r") # Not healthy i += 1 print("Writing file %05d..." % (i-1)) # Extract all the skits files def extract_All_Skits(self, replace): i = 1 os.makedirs( self.skit_XML_patch + "XML", exist_ok=True) list_pak2_files = [ self.dat_archive_extract + "PAK2/" + ele for ele in os.listdir(self.dat_archive_extract + "PAK2")] for file_path in list_pak2_files: if os.path.isfile(file_path) and file_path.endswith(".pak2"): with open(file_path, "rb") as pak: data = pak.read() theirsce = io.BytesIO(pak2lib.get_theirsce_from_pak2(data)) self.extract_TheirSce_XML(theirsce, re.sub("\.\d+", "", file_path), self.skit_XML_patch, "Skits", replace) print("Writing file %05d" % i, end="\r") i += 1 print("Writing file %05d..." % (i-1)) return def get_theirsce_from_scpk(self, scpk_file_name, debug=False)->bytes: with open(scpk_file_name,"rb") as scpk: header = scpk.read(4) if header != b"SCPK": # sys.exit(f"{file} is not a .scpk file!") raise ValueError("File is not a .scpk file!") scpk.read(4) nbFiles = struct.unpack(" 0): fileRead.seek(offset, 0) pos = fileRead.tell() b = fileRead.read(1) while b != end_strings: #print(hex(fileRead.tell())) b = ord(b) if (b >= 0x99 and b <= 0x9F) or (b >= 0xE0 and b <= 0xEB): c = (b << 8) + ord(fileRead.read(1)) # if str(c) not in json_data.keys(): # json_data[str(c)] = char_index[decode(c)] try: finalText += (self.jsonTblTags['TBL'][str(c)]) except KeyError: b_u = (c >> 8) & 0xff b_l = c & 0xff finalText += ("{%02X}" % b_u) finalText += ("{%02X}" % b_l) elif b == 0x1: finalText += ("\n") elif b in (0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xB, 0xC, 0xD, 0xE, 0xF): b2 = struct.unpack("" % (tag_name, b2)) else: finalText += "<%02X:%08X>" % (b, b2) elif chr(b) in self.PRINTABLE_CHARS: finalText += chr(b) elif b >= 0xA1 and b < 0xE0: finalText += struct.pack("B", b).decode("cp932") elif b in (0x13, 0x17, 0x1A): tag_name = f"Unk{b:02X}" hex_value = "" while fileRead.read(1) != b"\x80": fileRead.seek(fileRead.tell()-1) mark = fileRead.read(1) hex_value += mark.hex() if mark == "\x38": hex_value += f"{struct.unpack(' 0: file_list.extend( [os.path.join(path,file) for file in filenames]) list_test = [os.path.splitext(os.path.basename(ele))[0] for ele in file_list] previous = -1 dummies = 0 for file in sorted(file_list, key=self.get_file_name): size = 0 remainder = 0 current = int(re.search(self.VALID_FILE_NAME, file).group(1)) if current != previous + 1: while previous < current - 1: remainders.append(remainder) buffer += size + remainder sectors.append(buffer) previous += 1 dummies += 1 file_name = self.get_file_name(file) if ".scpk" in file: print(file) data = self.pack_Story_File(file_name+".scpk") if ".pak2" in file: print(file) data = self.pack_Skit_File(file_name+".pak2") else: with open(file, "rb") as f2: data = f2.read() #data = f2.read() comp_type = re.search(self.VALID_FILE_NAME, file).group(2) if comp_type != None: data = comptolib.compress_data(data, version=int(comp_type)) output_dat.write(data) size = len(data) #print("file: {} size: {}".format(file, size)) remainder = 0x40 - (size % 0x40) if remainder == 0x40: remainder = 0 output_dat.write(b"\x00" * remainder) remainders.append(remainder) buffer += size + remainder sectors.append(buffer) previous += 1 #print( # "Writing file %05d/%05d..." % (current - dummies, len(file_list)), end="\r" #) print("Writing file %05d/%05d..." % (current - dummies, len(file_list))) #Use the new SLPS updated and update the pointers for the SCPK with open("../Data/{}/Disc/New/SLPS_254.50".format(self.repo_name), "r+b") as output_elf: output_elf.seek(self.POINTERS_BEGIN) for i in range(len(sectors) - 1): output_elf.write(struct.pack("