pythonlib/games/ToolsTOR.py

import io
import json
import os
import re
import shutil
import struct
from dataclasses import dataclass
from itertools import tee
from pathlib import Path

import lxml.etree as etree
import pandas as pd
from tqdm import tqdm
from pythonlib.formats.scpk import Scpk

import pythonlib.utils.comptolib as comptolib
import pythonlib.formats.pak2 as pak2lib
from pythonlib.formats.theirsce import Theirsce
from pythonlib.formats.theirsce_instructions import (AluOperation, InstructionType,
                                                     TheirsceBaseInstruction)
from .ToolsTales import ToolsTales


@dataclass
class LineEntry:
    names: list[str]
    text: str
    offset: int

@dataclass
class NameEntry:
    index: int
    offsets: list[int]


VARIABLE_NAME = "[VARIABLE]"

class ToolsTOR(ToolsTales):
    
    POINTERS_BEGIN = 0xD76B0                                            # Offset to DAT.BIN pointer list start in SLPS_254.50 file
    POINTERS_END   = 0xE60C8                                            # Offset to DAT.BIN pointer list end in SLPS_254.50 file
    HIGH_BITS      = 0xFFFFFFC0
    LOW_BITS       = 0x3F
    
    
    #Path to used
    # fmt: off
    dat_bin_original    = '../Data/Tales-Of-Rebirth/Disc/Original/DAT.BIN'
    dat_bin_new         = '../Data/Tales-Of-Rebirth/Disc/New/DAT.BIN'
    elf_original        = '../Data/Tales-Of-Rebirth/Disc/Original/SLPS_254.50'
    elf_new             = '../Data/Tales-Of-Rebirth/Disc/New/SLPS_254.50'
    story_XML_new       = '../Tales-Of-Rebirth/Data/TOR/Story/'                        #Story XML files will be extracted here                      
    story_XML_patch     = '../Data/Tales-Of-Rebirth/Story/'               #Story XML files will be extracted here
    skit_XML_patch      = '../Data/Tales-Of-Rebirth/Skits/'                        #Skits XML files will be extracted here
    skit_XML_new        = '../Tales-Of-Rebirth/Data/TOR/Skits/'
    dat_archive_extract = '../Data/Tales-Of-Rebirth/DAT/' 
    # fmt: on
    
    def __init__(self, tbl):
        
        super().__init__("TOR", tbl, "Tales-Of-Rebirth")
        
        with open("../{}/Data/{}/Misc/{}".format(self.repo_name, self.gameName, self.tblFile), encoding="utf-8") as f:
            jsonRaw = json.load(f)

        for k, v in jsonRaw.items():
            self.jsonTblTags[k] = {int(k2, 16): v2 for k2, v2 in v.items()}
        
        for k, v in self.jsonTblTags.items():
            self.ijsonTblTags[k] = {v2: k2 for k2, v2 in v.items()}
        self.id = 1
        # byteCode
        self.story_byte_code = b"\xF8"
        self.string_opcode = InstructionType.STRING
        self.list_status_insertion = ['Done', 'Proofreading', 'Editing']
    
        self.mkdir('../Data/{}/DAT'.format(self.repo_name))

    # Replace n occurences of a string starting from the right
    def rreplace(self, s, old, new, occurrence):
        li = s.rsplit(old, occurrence)
        return new.join(li)

    def add_line_break(self, text):
        temp = ""
        currentLineSize = 0

        text_size = len(text)
        max_size = 32
        split_space = text.split(" ")

        for word in split_space:
            currentLineSize += (len(word) + 1)

            if currentLineSize <= max_size:
                temp = temp + word + ' '

            else:
                temp = temp + '\n' + word + ' '
                currentLineSize = 0

        temp = temp.replace(" \n", "\n")
        temp = self.rreplace(temp, " ", "", 1)

        return temp
    def clean_text(self, text):
        text = re.sub(r"\n ", "\n", text)
        text = re.sub(r"\n", "", text)
        text = re.sub(r"(<\w+:?\w+>)", "", text)
        text = re.sub(r"\[\w+=*\w+\]", "", text)
        text = re.sub(r" ", "", text)
        text = re.sub(u'\u3000', '', text)
        text = re.sub(r" ", "", text)
        return text

    # Extract/Transform Lauren translation
    def extract_Lauren_Translation(self):

        # Load Lauren's googlesheet data inside a dataframe
        df = self.extract_Google_Sheets("1-XwzS7F0SaLlXwv1KS6RcTEYYORH2DDb1bMRy5VM5oo", "Story")

        # 1) Make some renaming and transformations
        df = df.rename(columns={"KEY": "File", "Japanese": "JapaneseText", "Lauren's Script": "EnglishText"})

        # 2) Filter only relevant rows and columns from the googlesheet
        df = df.loc[(df['EnglishText'] != "") & (df['JapaneseText'] != ""), :]
        df = df[['File', 'JapaneseText', 'EnglishText']]

        # 3) Make some transformations to the JapaneseText so we can better match with XML
        df['File'] = df['File'].apply(lambda x: x.split("_")[0] + ".xml")
        df['JapaneseText'] = df['JapaneseText'].apply(lambda x: self.clean_text(x))
        return df

    # Transfer Lauren translation
    def transfer_Lauren_Translation(self):

        df_lauren = self.extract_Lauren_Translation()

        # Distinct list of XMLs file
        xml_files = list(set(df_lauren['File'].tolist()))

        for file in xml_files:
            cond = df_lauren['File'] == file
            lauren_translations = dict(df_lauren[cond][['JapaneseText', 'EnglishText']].values)
            file_path = self.story_XML_new + 'XML/' + file

            if os.path.exists(file_path):
                tree = etree.parse(file_path)
                root = tree.getroot()
                need_save = False

                for key, item in lauren_translations.items():

                    for entry_node in root.iter("Entry"):
                        xml_jap = entry_node.find("JapaneseText").text or ''
                        xml_eng = entry_node.find("EnglishText").text or ''
                        xml_jap_cleaned = self.clean_text(xml_jap)

                        if key == xml_jap_cleaned:
                            item = self.add_line_break(item)

                            if xml_eng != item:
                                entry_node.find("EnglishText").text = item
                                need_save = True

                                if entry_node.find("Status").text == "To Do":
                                    entry_node.find("Status").text = "Editing"

                        # else:
                        #    print("File: {} - {}".format(file, key))

                if need_save:
                    txt = etree.tostring(root, encoding="UTF-8", pretty_print=True, xml_declaration=False)

                    with open(file_path, 'wb') as xml_file:
                        xml_file.write(txt)

            else:
                print("File {} skipped because file is not found".format(file))

    # Extract the story files
    def extract_all_story(self, replace=False) -> None:
        print("Extracting Story files...")

        # TODO: use pathlib for everything
        folder_path = Path(self.story_XML_patch) / "XML"
        scpk_path = Path(self.dat_archive_extract) / "SCPK"
        
        for file in tqdm(scpk_path.glob("*.scpk")):
            theirsce = Theirsce(Scpk(file).rsce)
            xml_text = self.get_xml_from_theirsce(theirsce, "Story")
            self.id = 1
            
            with open(folder_path / file.with_suffix(".xml").name, "wb") as xml:
                xml.write(xml_text)

            
    # Extract all the skits files
    def extract_all_skits(self, replace=False) -> None:
        print("Extracting Skit files...")

        # TODO: use pathlib for everything
        folder_path = Path(self.skit_XML_patch) / "XML"
        pak2_path = Path(self.dat_archive_extract) / "PAK2"

        for file in tqdm(pak2_path.glob("*.pak2")):
            with open(file, "rb") as pak:
                theirsce = pak2lib.get_theirsce_from_pak2(pak.read())
            
            xml_text = self.get_xml_from_theirsce(Theirsce(theirsce), "Skits")
            
            with open(folder_path / file.with_suffix(".xml").name, "wb") as xml:
                xml.write(xml_text)


    # Extract THEIRSCE to XML
    def get_xml_from_theirsce(self, rsce: Theirsce, section: str) -> bytes:
     
        #Create the XML file
        # root = etree.Element('SceneText')
        # etree.SubElement(root, "OriginalName").text = file_name

        #pointers_offset, texts_offset = self.extract_Story_Pointers(rsce)
        names, lines = self.extract_lines_with_speaker(rsce)

        for i, (k, v) in enumerate(names.items(), -1):
            names[k] = NameEntry(i, v)
  
        #Remove duplicates
        #list_informations = self.remove_duplicates(["Story"] * len(pointers_offset), pointers_offset, text_list)
        
        # list_lines = ( ['Story', line.offset, line.text] for line in lines)
        # list_names = ( ['Story', line.offset, line.text] for i, (k, v) in enumerate(found_names.items()))
        #Build the XML Structure with the information  

        root = etree.Element("SceneText")
        speakers_node = etree.SubElement(root, 'Speakers')
        etree.SubElement(speakers_node, 'Section').text = "Speaker"
        strings_node = etree.SubElement(root, 'Strings')
        etree.SubElement(strings_node, 'Section').text = section
        
        self.make_speakers_section(speakers_node, names)
        self.make_strings_section(strings_node, lines, names)
        
        # Return XML string
        return etree.tostring(root, encoding="UTF-8", pretty_print=True)

    
    def make_strings_section(self, root, lines: list[LineEntry], names: dict[str, NameEntry]):
        pass
        for line in lines:
            entry_node = etree.SubElement(root, "Entry")
            etree.SubElement(entry_node,"PointerOffset").text = str(line.offset)
            text_split = list(filter(None, re.split(self.COMMON_TAG, line.text)))
            
            if len(text_split) > 1 and text_split[0].startswith("<voice:"):
                etree.SubElement(entry_node,"VoiceId").text  = text_split[0][1:-1].split(":")[1]
                etree.SubElement(entry_node, "JapaneseText").text = ''.join(text_split[1:])
            else:
                etree.SubElement(entry_node, "JapaneseText").text = line.text
            
            etree.SubElement(entry_node,"EnglishText")
            etree.SubElement(entry_node,"Notes")

            if line.names:
                etree.SubElement(entry_node,"SpeakerId").text = ','.join([str(names[n].index) for n in line.names])
            etree.SubElement(entry_node,"Id").text = str(self.id)
            
            self.id = self.id + 1
            
            if line.text == '':
                statusText = 'Done'
            else:
                statusText = 'To Do'
            etree.SubElement(entry_node,"Status").text        = statusText
        
    
    def make_speakers_section(self, root, names: dict[str, NameEntry]):
        for k, v in names.items():
            entry_node = etree.SubElement(root, "Entry")
            if v.offsets:
                etree.SubElement(entry_node,"PointerOffset").text = ",".join([str(off) for off in v.offsets])
            else:
                etree.SubElement(entry_node,"PointerOffset")
            etree.SubElement(entry_node,"JapaneseText").text  = str(k)
            etree.SubElement(entry_node,"EnglishText")
            etree.SubElement(entry_node,"Notes")
            etree.SubElement(entry_node,"Id").text            = str(v.index)
            etree.SubElement(entry_node,"Status").text         = "To Do"

    
    def extract_lines_with_speaker(self, theirsce: Theirsce):
        # This will do a bit of everything thanks to the "nice"
        # architecture of the Theirsce class :)
    
        # Debug
        # sections = []
        # for _, section in enumerate(theirsce.sections):
        #     for _, sub in enumerate(section):
        #         sections.append(sub.off)

        # Setup three-way opcode generator
        d = TheirsceBaseInstruction(); d.type = InstructionType.INVALID
        a,b,c = tee(theirsce.walk_code(), 3)
        next(a, d)
        next(b, d); next(b, d)
        next(c, d); next(c, d); next(c, d)

        # Helper function, in the future I'll
        # just use a list of opcodes
        def skip():
            next(a, d); next(a, d)
            next(b, d); next(b, d)
            next(c, d); next(c, d)
        
        
        names = {VARIABLE_NAME: []}
        lines = []
        params = []
        used = False
        for op1, op2, op3 in zip(a,b,c):
            # Debug
            # if theirsce.tell() in sections:
            #     print()
            #     print("SECTION: ")

            # BREAK marks start of a local function
            # so local params are no longer in scope
            if op1.type is InstructionType.BREAK:
                if used == False:
                    for param in params:
                        text = self.bytes_to_text(theirsce, param.offset + theirsce.strings_offset)
                        lines.append(LineEntry([], text, op1.position + 1))
                params.clear()

                continue

            # This sequence mark the simple act of assigning
            # a string to a local variable, so we can detect
            # when they are used later in a function call
            if (op1.type is InstructionType.REFERENCE
                and op2.type is InstructionType.STRING 
                and op3.type is InstructionType.ALU
                and op3.operation == AluOperation.ASSIGNMENT 
                ):
                params.append(op2)
                skip()
                continue

            # This sequence represents the textbox call with
            # the name being a variable (NPCs do this)
            if (op1.type is InstructionType.REFERENCE
                and op2.type is InstructionType.STRING 
                and op3.type is InstructionType.SYSCALL
                and op3.function_index == 0x45
                ):
                if len(params) >= 1:
                    name = [self.bytes_to_text(theirsce, p.offset + theirsce.strings_offset) for p in params]
                    [names.setdefault(n, []).append(p.position + 1) for n, p in zip(name, params)]
                elif len(params) == 0:
                    name = []
                text = self.bytes_to_text(theirsce, op2.offset + theirsce.strings_offset)
                lines.append(LineEntry(name, text, op2.position + 1))
                #print(f"{params}: {text}")
                used = True
                skip()
                continue
            
            # This sequence represents the textbox call with
            # the text being a variable (Notice boxes do this)
            if (op1.type is InstructionType.STRING
                and op2.type is InstructionType.REFERENCE 
                and op3.type is InstructionType.SYSCALL
                and op3.function_index == 0x45
                ):
                name = [self.bytes_to_text(theirsce, op1.offset + theirsce.strings_offset)]
                names.setdefault(name[0], []).append(op1.position + 1)
                for param in params:
                    text = self.bytes_to_text(theirsce, param.offset + theirsce.strings_offset)
                    lines.append(LineEntry(name, text, param.position + 1))
                    #print(f"{text}: {name}")
                used = True
                params.clear()
                skip()
                continue
            
            # This sequence represents a regular textbox call
            # where both fields are an string (everything else, save for skits)
            if (op1.type is InstructionType.STRING
                and op2.type is InstructionType.STRING
                and op3.type is InstructionType.SYSCALL
                and op3.function_index == 0x45
                ):
                name = [self.bytes_to_text(theirsce, op1.offset + theirsce.strings_offset)]
                names.setdefault(name[0], []).append(op1.position + 1)
                text = self.bytes_to_text(theirsce, op2.offset + theirsce.strings_offset)
                lines.append(LineEntry(name, text, op2.position + 1))
                #print(f"{name}: {text}")
                skip()
                continue
            
            # Any other string in assorted code calls
            if op1.type is InstructionType.STRING:
                #print(theirsce.read_string_at(op1.offset + theirsce.strings_offset))
                text = self.bytes_to_text(theirsce, op1.offset + theirsce.strings_offset)
                lines.append(LineEntry([], text, op1.position + 1))
                continue
        
        return names, lines


    def extract_story_pointers_plain(self, theirsce: Theirsce):
        pointers_offset = []; texts_offset = []

        for opcode in theirsce.walk_code():
            if opcode.type == self.string_opcode:
                pointers_offset.append(theirsce.tell() - 2) # Maybe check this later
                texts_offset.append(opcode.offset + theirsce.strings_offset)
                    
        return pointers_offset, texts_offset

    #Convert a bytes object to text using TAGS and TBL in the json file
    def bytes_to_text(self, theirsce: Theirsce, offset=-1, end_strings = b"\x00"):
        finalText = ""
        tags = self.jsonTblTags['TAGS']
        chars = self.jsonTblTags['TBL']

        if (offset > 0):
            theirsce.seek(offset, 0)

        b = theirsce.read(1)
        while True:
            b = theirsce.read(1)
            if b == end_strings: break

            b = ord(b)
            # Custom Encoded Text
            if (0x99 <= b <= 0x9F) or (0xE0 <= b <= 0xEB):
                c = (b << 8) | theirsce.read_uint8()
                finalText += chars.get(c, "{%02X}{%02X}" % (c >> 8, c & 0xFF))
                continue
            
            if b == 0x1:
                finalText += ("\n")
                continue
            
            # ASCII text
            if chr(b) in self.PRINTABLE_CHARS:
                finalText += chr(b)
                continue
            
            # cp932 text
            if 0xA0 < b < 0xE0:
                finalText += struct.pack("B", b).decode("cp932")
                continue

            if b == 0x81:
                next_b = theirsce.read(1)
                if next_b == b"\x40":
                    finalText += "　"
                else:
                    finalText += "{%02X}" % b
                    finalText += "{%02X}" % ord(next_b)
                continue
            
            # Simple Tags
            if 0x3 <= b <= 0xF:
                parameter = theirsce.read_uint32()

                tag_name = tags.get(b, f"{b:02X}")
                tag_param = self.jsonTblTags.get(tag_name.upper(), {}).get(parameter, None)  

                if tag_param is not None:
                    finalText += tag_param
                else:
                    finalText += f"<{tag_name}:{self.hex2(parameter)}>"

                continue
            
            # Variable tags (same as above but using rsce bytecode as parameter)
            if 0x13 <= b <= 0x1A:
                tag_name = f"unk{b:02X}"
                parameter = "".join([f"{c:02X}" for c in theirsce.read_tag_bytes()])
         
                finalText += f"<{tag_name}:{parameter}>"
                continue
            
            # None of the above
            finalText += "{%02X}" % b
       
        return finalText
    
    def get_Node_Bytes(self, entry_node):
        
        #Grab the fields from the Entry in the XML
        status = entry_node.find("Status").text
        japanese_text = entry_node.find("JapaneseText").text
        english_text = entry_node.find("EnglishText").text
        
        #Use the values only for Status = Done and use English if non empty
        final_text = ''
        if (status in self.list_status_insertion):
            final_text = english_text or japanese_text or ''
        else:
            final_text = japanese_text or ''
        
        voiceId_node = entry_node.find("VoiceId")
        if (voiceId_node != None):
            final_text = '<voice:{}>'.format(voiceId_node.text) + final_text
            
        #Convert the text values to bytes using TBL, TAGS, COLORS, ...
        bytes_entry = self.text_to_bytes(final_text)
        
        return bytes_entry   
    
    def get_New_Theirsce(self, theirsce, scpk_file_name, destination):
        
        #To store the new text_offset and pointers to update
        new_text_offsets = dict()
        
        #Grab strings_offset for pointers
        theirsce.read(12)
        strings_offset = struct.unpack("<L", theirsce.read(4))[0]
              
        #Read the XML for the corresponding THEIRSCE
        file = destination +"XML/"+ self.get_file_name(scpk_file_name)+'.xml'
        #print("XML : {}".format(self.get_file_name(scpk_file_name)+'.xml'))
        
        tree = etree.parse(file)
        root = tree.getroot()

        #Go at the start of the dialog
        #Loop on every Entry and reinsert
        theirsce.seek(strings_offset+1)
        nodes = [ele for ele in root.iter('Entry') if ele.find('Id').text != "-1"]
        nodes = [ele for ele in nodes if ele.find('PointerOffset').text != "-1"]

        for entry_node in nodes:

            #Add the PointerOffset and TextOffset
            new_text_offsets[entry_node.find("PointerOffset").text] = theirsce.tell()
            #Use the node to get the new bytes
            bytes_entry = self.get_Node_Bytes(entry_node)

            #Write to the file
            theirsce.write(bytes_entry + b'\x00')
            
        #Update the pointers based on the new text_offset of  the entries
        for pointer_offset, text_offset in new_text_offsets.items():
            
            pointers_list = pointer_offset.split(",")
            new_value = text_offset - strings_offset


            for pointer in pointers_list:
                
                theirsce.seek(int(pointer))
                theirsce.write( struct.pack("<H", new_value))
            
        return theirsce
            
    #Repack SCPK files for Story
    def pack_Story_File(self, scpk_file_name):
        
        #Copy the original SCPK file to the folder used for the new version
        shutil.copy( self.dat_archive_extract + "SCPK/" + scpk_file_name, self.story_XML_patch + "New/" + scpk_file_name)
        
        #Open the original SCPK
        with open( self.dat_archive_extract + "SCPK/" + scpk_file_name, 'r+b') as scpk:
            #Get nb_files and files_size
            scpk.read(4)
            scpk.read(4)
            nb_files = struct.unpack("<L", scpk.read(4))[0]
            scpk.read(4)
            file_size_dict = dict()
            for i in range(nb_files):
                pointer_offset = scpk.tell()
                file_size = struct.unpack("<L", scpk.read(4))[0]
                file_size_dict[pointer_offset] = file_size
                         
            #Extract each files and append to the final data_final
            dataFinal = bytearray()
            sizes = []
            o = io.BytesIO()

            i=0
            for pointer_offset, fsize in file_size_dict.items():
                
                data_compressed = scpk.read(fsize)
                if comptolib.is_compressed(data_compressed):
                    c_type = struct.unpack("<b", data_compressed[:1])[0]
                    #print("File {}   size: {}    ctype: {}".format(i, fsize,c_type))
                    data_uncompressed = comptolib.decompress_data(data_compressed)

                    if data_uncompressed[:8] == b"THEIRSCE":
                        
                        #Only for debug to have  the original THEIRSCE
                        #with open("test_original_comp.theirsce", "wb") as f:
                        #    print("Size original: {}".format(len(data_uncompressed)))
                        #    f.write(data)
                        #with open("test_original.theirsce", "wb") as f:
                        #    f.write(data_uncompressed)
                            
                        #Update THEIRSCE uncompressed file
                        theirsce = self.get_New_Theirsce(io.BytesIO(data_uncompressed), scpk_file_name, self.story_XML_new)
                        
                            
                        theirsce.seek(0)
                        data_new_uncompressed = theirsce.read()
                        data_compressed = comptolib.compress_data(data_new_uncompressed, version=c_type)
                        
                    else:
                        data_compressed = comptolib.compress_data(data_uncompressed, version=c_type)
                            
                #Updating the header of the SCPK file to adjust the size
                new_size = len(data_compressed)  
                #print("File recomp {}   size: {}    ctype: {}".format(i, new_size,c_type))
                            
                dataFinal += data_compressed
                sizes.append(new_size)
                i=i+1
                
        
        #Write down the new SCPK from scratch
        o.write(b"\x53\x43\x50\x4B\x01\x00\x0F\x00")
        o.write(struct.pack("<L", len(sizes)))
        o.write(b"\x00" * 4)

        for i in range(len(sizes)):
            o.write(struct.pack("<L", sizes[i]))
        
        o.write(dataFinal)
        
        with open(self.story_XML_patch + "New/" + scpk_file_name, "wb") as f:
            f.write(o.getvalue())
        
        return o.getvalue()        
    
    def pack_Skit_File(self, pak2_file):

        # Copy the original PAK2 file to the folder used for the new version
        shutil.copy(self.dat_archive_extract + "PAK2/" + pak2_file, self.skit_XML_patch + "New/" + pak2_file)

        pak2_file_path = os.path.join(self.dat_archive_extract, "PAK2", pak2_file)
        with open(pak2_file_path,"rb") as f_pak2:
            pak2_data = f_pak2.read()
        
        #Create the pak2 object
        pak2_obj = pak2lib.get_data(pak2_data)
        
        #Generate the new Theirsce based on the XML and replace the original one
        theirsce_io = self.get_New_Theirsce(io.BytesIO(pak2_obj.chunks.theirsce), os.path.basename(pak2_file_path).split(".")[0], self.skit_XML_new)
        theirsce_io.seek(0)
        new_data = theirsce_io.read()
        pak2_obj.chunks.theirsce = new_data
        
        self.mkdir(self.skit_XML_patch+ "New")
        with open(self.skit_XML_patch+ "New/" + pak2_file, "wb") as f2:
            f2.write(pak2lib.create_pak2(pak2_obj))
            
        return

    def pack_All_Skits(self):

        print("Recreating Skits files")
        listFiles = [ele for ele in os.listdir(self.skit_XML_patch + "New/")]
        for pak2_file in listFiles:
            self.pack_Skit_File(pak2_file)
            print("Writing file {} ...".format(pak2_file))

    def debug_Story_Skits(self, section, file_name, text=False):

        if section == "Story":
            theirsce = self.get_theirsce_from_scpk(self.dat_archive_extract + 'SCPK/' + self.get_file_name(file_name) + '.scpk')
        else:
            with open(self.dat_archive_extract + "PAK2/" + file_name.split(".")[0] + '.3.pak2', "rb") as pak:
                data = pak.read()
            theirsce = io.BytesIO(pak2lib.get_theirsce_from_pak2(data))

        rsce = Theirsce(path=theirsce)
        # pointers_offset, texts_offset = self.extract_Story_Pointers(rsce)
        names, lines = self.extract_lines_with_speaker(rsce)

        for i, (k, v) in enumerate(names.items(), -1):
            names[k] = NameEntry(i, v)

        with open('../{}.theirsce'.format(file_name), 'wb') as f:
            f.write(theirsce.getvalue())

        text_list = []
        if text:
            text_list = [line.text for line in lines]

        df = pd.DataFrame({"Jap_Text": text_list})
        df['Text_Offset'] = df['Text_Offset'].apply(lambda x: hex(x)[2:])
        df['Pointers_Offset'] = df['Pointers_Offset'].apply(lambda x: hex(x)[2:])
        df.to_excel('../{}.xlsx'.format(self.get_file_name(file_name)), index=False)

            
    def get_datbin_file_data(self) -> dict[int, int]:

        with open(self.elf_original , "rb") as elf:
            elf.seek(self.POINTERS_BEGIN, 0)
            blob = elf.read(self.POINTERS_END-self.POINTERS_BEGIN)
            
        pointers = struct.unpack(f"<{len(blob)//4}L", blob)
        file_data: dict[int, int] = {}
        for c, n in zip(pointers, pointers[1:]):
            remainder = c & self.LOW_BITS
            start = c & self.HIGH_BITS
            end = (n & self.HIGH_BITS) - remainder
            file_data[c] = end - start
        
        return file_data

    # Extract the file DAT.BIN to the different directorties
    def extract_main_archive(self) -> None:
        
        print("Extracting DAT bin files...")
        with open( self.dat_bin_original, "rb") as f:
            for i, (offset, size) in enumerate(tqdm(self.get_datbin_file_data().items(), desc="Extracting files", unit="file")):
                
                # Ignore 0 byte files
                if size == 0:
                    continue

                f.seek(offset, 0)
                data = f.read(size)
                
                if comptolib.is_compressed(data):
                    c_type = struct.unpack("<b", data[:1])[0]
                    data = comptolib.decompress_data(data)
                    extension = self.get_extension(data)
                    fname = f"{i:05d}.{c_type}.{extension}"
                else:
                    extension = self.get_extension(data)
                    fname = f"{i:05d}.{extension}"
                
                # TODO: use pathlib for everything
                final_path = Path(self.dat_archive_extract) / extension.upper()
                final_path.mkdir(parents=True, exist_ok=True)
        
                with open(final_path / fname, "wb") as output:
                    output.write(data)
        
        
    def pack_main_archive(self):
        sectors = [0]
        remainders = []
        buffer = 0

        # Copy the original SLPS to Disc/New
        shutil.copy(self.elf_original, self.elf_new)
   
        output_dat_path = self.dat_bin_new
        with open(output_dat_path, "wb") as output_dat:
    
            print("Packing files into %s..." % os.path.basename(output_dat_path))
            
            #Make a list with all the files of DAT.bin
            file_list = []
            for path, subdir, filenames in os.walk(self.dat_archive_extract):
                if len(filenames) > 0:
                    file_list.extend( [os.path.join(path,file) for file in filenames])
                
                
            list_test = [os.path.splitext(os.path.basename(ele))[0] for ele in file_list]
            previous = -1
            dummies = 0
        
    
            for file in tqdm(sorted(file_list, key=self.get_file_name)):
             
                size = 0
                remainder = 0
                current = int(re.search(self.VALID_FILE_NAME, file).group(1))
                
                if current != previous + 1:
                    while previous < current - 1:
                        remainders.append(remainder)
                        buffer += size + remainder
                        sectors.append(buffer)
                        previous += 1
                        dummies += 1
                file_name = self.get_file_name(file)
                
                if ".scpk" in file:
                    path = os.path.join(self.story_XML_patch, 'New', '{}.scpk'.format(file_name))
                    print(path)

                elif ".pak2" in file:
                    path = os.path.join(self.skit_XML_patch, 'New', '{}.pak2'.format(file_name))
                    print(path)
                else:
                    path = file

                with open(path, "rb") as f2:
                    data = f2.read()
                #data = f2.read()  
                
                comp_type = re.search(self.VALID_FILE_NAME, file).group(2)
                if comp_type != None:
                    data = comptolib.compress_data(data, version=int(comp_type))
            
                output_dat.write(data)
                size = len(data)
                #print("file: {}   size: {}".format(file, size))
                remainder = 0x40 - (size % 0x40)
                if remainder == 0x40:
                    remainder = 0
                output_dat.write(b"\x00" * remainder)
              
        
                remainders.append(remainder)
                buffer += size + remainder
                sectors.append(buffer)
                previous += 1
        
        #Use the new SLPS updated and update the pointers for the SCPK
        with open("../Data/{}/Disc/New/SLPS_254.50".format(self.repo_name), "r+b") as output_elf:
            output_elf.seek(self.POINTERS_BEGIN)
        
            for i in range(len(sectors) - 1):
                output_elf.write(struct.pack("<L", sectors[i] + remainders[i]))
    
        
    def pack_All_Story(self):
        
        print("Recreating Story files")
        listFiles = [ele for ele in os.listdir( self.story_XML_patch + "New/")]
        for scpk_file in listFiles:
            self.pack_Story_File(scpk_file)
            print("Writing file {} ...".format(scpk_file))
            
    def insert_All(self):
        
        #Updates SCPK based on XMLs data
        
        self.pack_main_archive()
-												Formatting changes

											
										
										
											2023-05-19 07:30:53 -05:00
+								import io
-												Basic set of files to extract TOR SCPK files

- Basic example in TOR_Test.py to extract the Scenario files to XML

											
										
										
											2022-01-23 08:25:40 -05:00
+								import json
 								import os
 								import re
-												Formatting changes

											
										
										
											2023-05-19 07:30:53 -05:00
+								import shutil
 								import struct
 								from dataclasses import dataclass
 								from itertools import tee
-												Basic set of files to extract TOR SCPK files

- Basic example in TOR_Test.py to extract the Scenario files to XML

											
										
										
											2022-01-23 08:25:40 -05:00
+								from pathlib import Path
-												Formatting changes

											
										
										
											2023-05-19 07:30:53 -05:00
 								import lxml.etree as etree
 								import pandas as pd
 								from tqdm import tqdm
-												Folder structure

Trying to make it more package-y

											
										
										
											2023-05-19 08:07:41 -05:00
+								from pythonlib.formats.scpk import Scpk
-												Formatting changes

											
										
										
											2023-05-19 07:30:53 -05:00
-												Folder structure

Trying to make it more package-y

											
										
										
											2023-05-19 08:07:41 -05:00
+								import pythonlib.utils.comptolib as comptolib
 								import pythonlib.formats.pak2 as pak2lib
 								from pythonlib.formats.theirsce import Theirsce
 								from pythonlib.formats.theirsce_instructions import (AluOperation, InstructionType,
-												Formatting changes

											
										
										
											2023-05-19 07:30:53 -05:00
+								                                                     TheirsceBaseInstruction)
 								from .ToolsTales import ToolsTales
-												Update dump process with speaker nodes

											
										
										
											2023-01-06 22:45:40 -05:00
 								@dataclass
 								class LineEntry:
 								    names: list[str]
 								    text: str
 								    offset: int
 								@dataclass
 								class NameEntry:
 								    index: int
 								    offsets: list[int]
 								VARIABLE_NAME = "[VARIABLE]"
-												Basic set of files to extract TOR SCPK files

- Basic example in TOR_Test.py to extract the Scenario files to XML

											
										
										
											2022-01-23 08:25:40 -05:00
 								class ToolsTOR(ToolsTales):
 								    POINTERS_BEGIN = 0xD76B0                                            # Offset to DAT.BIN pointer list start in SLPS_254.50 file
 								    POINTERS_END   = 0xE60C8                                            # Offset to DAT.BIN pointer list end in SLPS_254.50 file
 								    HIGH_BITS      = 0xFFFFFFC0
 								    LOW_BITS       = 0x3F
-												Updating Scripts and doing test with TOR

											
										
										
											2022-01-30 19:19:43 -05:00
-												Basic set of files to extract TOR SCPK files

- Basic example in TOR_Test.py to extract the Scenario files to XML

											
										
										
											2022-01-23 08:25:40 -05:00
 								    #Path to used
-												Folder structure

Trying to make it more package-y

											
										
										
											2023-05-19 08:07:41 -05:00
+								    # fmt: off
-												Formatting changes

											
										
										
											2023-05-19 07:30:53 -05:00
+								    dat_bin_original    = '../Data/Tales-Of-Rebirth/Disc/Original/DAT.BIN'
 								    dat_bin_new         = '../Data/Tales-Of-Rebirth/Disc/New/DAT.BIN'
 								    elf_original        = '../Data/Tales-Of-Rebirth/Disc/Original/SLPS_254.50'
 								    elf_new             = '../Data/Tales-Of-Rebirth/Disc/New/SLPS_254.50'
 								    story_XML_new       = '../Tales-Of-Rebirth/Data/TOR/Story/'                        #Story XML files will be extracted here
 								    story_XML_patch     = '../Data/Tales-Of-Rebirth/Story/'               #Story XML files will be extracted here
 								    skit_XML_patch      = '../Data/Tales-Of-Rebirth/Skits/'                        #Skits XML files will be extracted here
 								    skit_XML_new        = '../Tales-Of-Rebirth/Data/TOR/Skits/'
 								    dat_archive_extract = '../Data/Tales-Of-Rebirth/DAT/'
-												Folder structure

Trying to make it more package-y

											
										
										
											2023-05-19 08:07:41 -05:00
+								    # fmt: on
-												Basic set of files to extract TOR SCPK files

- Basic example in TOR_Test.py to extract the Scenario files to XML

											
										
										
											2022-01-23 08:25:40 -05:00
 								    def __init__(self, tbl):
-												Updating extract_Iso function to use pycdlib

											
										
										
											2022-10-02 08:18:37 -04:00
+								        super().__init__("TOR", tbl, "Tales-Of-Rebirth")
-												Basic set of files to extract TOR SCPK files

- Basic example in TOR_Test.py to extract the Scenario files to XML

											
										
										
											2022-01-23 08:25:40 -05:00
-												Fix TBL_All path

											
										
										
											2022-09-02 20:56:48 -04:00
+								        with open("../{}/Data/{}/Misc/{}".format(self.repo_name, self.gameName, self.tblFile), encoding="utf-8") as f:
-												Rework tag acquisition and text conversion

											
										
										
											2023-05-16 00:05:37 -05:00
+								            jsonRaw = json.load(f)
 								        for k, v in jsonRaw.items():
 								            self.jsonTblTags[k] = {int(k2, 16): v2 for k2, v2 in v.items()}
-												Move functions to TOR instead of parent class

											
										
										
											2022-07-16 15:11:15 -04:00
-												Rework tag acquisition and text conversion

											
										
										
											2023-05-16 00:05:37 -05:00
+								        for k, v in self.jsonTblTags.items():
 								            self.ijsonTblTags[k] = {v2: k2 for k2, v2 in v.items()}
-												Move functions to TOR instead of parent class

											
										
										
											2022-07-16 15:11:15 -04:00
+								        self.id = 1
-												Formatting changes

											
										
										
											2023-05-19 07:30:53 -05:00
+								        # byteCode
-												Updates some scripts

											
										
										
											2022-02-18 15:42:52 -05:00
+								        self.story_byte_code = b"\xF8"
-												Theirsce specific text extraction, part 1

											
										
										
											2023-01-06 22:39:17 -05:00
+								        self.string_opcode = InstructionType.STRING
-												Fixing different issues during repacking (#105)

* Fix Reinsertion issues with Pointeroffset

* Add batch files

* fix issues with SCPK and PAK2 not being created in folder New

* Adding Editing and Copy SLPS

* Fixing Tags Unkn + VoiceId in Story
											
										
										
											2023-05-14 16:50:41 -04:00
+								        self.list_status_insertion = ['Done', 'Proofreading', 'Editing']
-												Update Scripts to extract and insert the menu

											
										
										
											2022-02-10 20:13:15 -05:00
-												Add make_dirs call when instantiating class

Create folders for Rebirth

											
										
										
											2022-09-18 18:22:59 -04:00
+								        self.mkdir('../Data/{}/DAT'.format(self.repo_name))
-												Basic set of files to extract TOR SCPK files

- Basic example in TOR_Test.py to extract the Scenario files to XML

											
										
										
											2022-01-23 08:25:40 -05:00
-												Add functions to transfer Lauren translations

											
										
										
											2023-01-06 22:16:30 -05:00
+								    # Replace n occurences of a string starting from the right
 								    def rreplace(self, s, old, new, occurrence):
 								        li = s.rsplit(old, occurrence)
 								        return new.join(li)
 								    def add_line_break(self, text):
-												Formatting changes

											
										
										
											2023-05-19 07:30:53 -05:00
+								        temp = ""
 								        currentLineSize = 0
-												Add functions to transfer Lauren translations

											
										
										
											2023-01-06 22:16:30 -05:00
 								        text_size = len(text)
 								        max_size = 32
 								        split_space = text.split(" ")
 								        for word in split_space:
 								            currentLineSize += (len(word) + 1)
 								            if currentLineSize <= max_size:
 								                temp = temp + word + ' '
 								            else:
 								                temp = temp + '\n' + word + ' '
 								                currentLineSize = 0
 								        temp = temp.replace(" \n", "\n")
 								        temp = self.rreplace(temp, " ", "", 1)
 								        return temp
 								    def clean_text(self, text):
 								        text = re.sub(r"\n ", "\n", text)
 								        text = re.sub(r"\n", "", text)
 								        text = re.sub(r"(<\w+:?\w+>)", "", text)
 								        text = re.sub(r"\[\w+=*\w+\]", "", text)
 								        text = re.sub(r" ", "", text)
 								        text = re.sub(u'\u3000', '', text)
 								        text = re.sub(r" ", "", text)
 								        return text
 								    # Extract/Transform Lauren translation
 								    def extract_Lauren_Translation(self):
 								        # Load Lauren's googlesheet data inside a dataframe
 								        df = self.extract_Google_Sheets("1-XwzS7F0SaLlXwv1KS6RcTEYYORH2DDb1bMRy5VM5oo", "Story")
 								        # 1) Make some renaming and transformations
 								        df = df.rename(columns={"KEY": "File", "Japanese": "JapaneseText", "Lauren's Script": "EnglishText"})
 								        # 2) Filter only relevant rows and columns from the googlesheet
 								        df = df.loc[(df['EnglishText'] != "") & (df['JapaneseText'] != ""), :]
 								        df = df[['File', 'JapaneseText', 'EnglishText']]
 								        # 3) Make some transformations to the JapaneseText so we can better match with XML
 								        df['File'] = df['File'].apply(lambda x: x.split("_")[0] + ".xml")
 								        df['JapaneseText'] = df['JapaneseText'].apply(lambda x: self.clean_text(x))
 								        return df
 								    # Transfer Lauren translation
 								    def transfer_Lauren_Translation(self):
 								        df_lauren = self.extract_Lauren_Translation()
 								        # Distinct list of XMLs file
 								        xml_files = list(set(df_lauren['File'].tolist()))
 								        for file in xml_files:
 								            cond = df_lauren['File'] == file
 								            lauren_translations = dict(df_lauren[cond][['JapaneseText', 'EnglishText']].values)
 								            file_path = self.story_XML_new + 'XML/' + file
 								            if os.path.exists(file_path):
 								                tree = etree.parse(file_path)
 								                root = tree.getroot()
 								                need_save = False
 								                for key, item in lauren_translations.items():
 								                    for entry_node in root.iter("Entry"):
 								                        xml_jap = entry_node.find("JapaneseText").text or ''
 								                        xml_eng = entry_node.find("EnglishText").text or ''
 								                        xml_jap_cleaned = self.clean_text(xml_jap)
 								                        if key == xml_jap_cleaned:
 								                            item = self.add_line_break(item)
 								                            if xml_eng != item:
 								                                entry_node.find("EnglishText").text = item
 								                                need_save = True
 								                                if entry_node.find("Status").text == "To Do":
 								                                    entry_node.find("Status").text = "Editing"
 								                        # else:
 								                        #    print("File: {} - {}".format(file, key))
 								                if need_save:
 								                    txt = etree.tostring(root, encoding="UTF-8", pretty_print=True, xml_declaration=False)
 								                    with open(file_path, 'wb') as xml_file:
 								                        xml_file.write(txt)
 								            else:
 								                print("File {} skipped because file is not found".format(file))
-												Basic set of files to extract TOR SCPK files

- Basic example in TOR_Test.py to extract the Scenario files to XML

											
										
										
											2022-01-23 08:25:40 -05:00
+								    # Extract the story files
-												Rewrite skit and story extraction

No more in-between copies when extracting, also moved the xml saving to the extracting functions and progress bars!

											
										
										
											2023-05-19 07:55:15 -05:00
+								    def extract_all_story(self, replace=False) -> None:
 								        print("Extracting Story files...")
-												Add a replace flag

Add a replace tag

											
										
										
											2022-12-21 19:10:18 -05:00
-												Rewrite skit and story extraction

No more in-between copies when extracting, also moved the xml saving to the extracting functions and progress bars!

											
										
										
											2023-05-19 07:55:15 -05:00
+								        # TODO: use pathlib for everything
 								        folder_path = Path(self.story_XML_patch) / "XML"
 								        scpk_path = Path(self.dat_archive_extract) / "SCPK"
 								        for file in tqdm(scpk_path.glob("*.scpk")):
 								            theirsce = Theirsce(Scpk(file).rsce)
 								            xml_text = self.get_xml_from_theirsce(theirsce, "Story")
-												Remove Section and Duplicate

											
										
										
											2022-07-04 16:10:43 -04:00
+								            self.id = 1
-												Rewrite skit and story extraction

No more in-between copies when extracting, also moved the xml saving to the extracting functions and progress bars!

											
										
										
											2023-05-19 07:55:15 -05:00
 								            with open(folder_path / file.with_suffix(".xml").name, "wb") as xml:
 								                xml.write(xml_text)
-												Create new function to extract all skits

											
										
										
											2022-08-13 00:13:24 -04:00
 								    # Extract all the skits files
-												Capitalization memes

											
										
										
											2023-05-19 08:23:47 -05:00
+								    def extract_all_skits(self, replace=False) -> None:
-												Rewrite skit and story extraction

No more in-between copies when extracting, also moved the xml saving to the extracting functions and progress bars!

											
										
										
											2023-05-19 07:55:15 -05:00
+								        print("Extracting Skit files...")
 								        # TODO: use pathlib for everything
 								        folder_path = Path(self.skit_XML_patch) / "XML"
 								        pak2_path = Path(self.dat_archive_extract) / "PAK2"
 								        for file in tqdm(pak2_path.glob("*.pak2")):
 								            with open(file, "rb") as pak:
 								                theirsce = pak2lib.get_theirsce_from_pak2(pak.read())
 								            xml_text = self.get_xml_from_theirsce(Theirsce(theirsce), "Skits")
 								            with open(folder_path / file.with_suffix(".xml").name, "wb") as xml:
 								                xml.write(xml_text)
-												Fixing different issues during repacking (#105)

* Fix Reinsertion issues with Pointeroffset

* Add batch files

* fix issues with SCPK and PAK2 not being created in folder New

* Adding Editing and Copy SLPS

* Fixing Tags Unkn + VoiceId in Story
											
										
										
											2023-05-14 16:50:41 -04:00
-												Basic set of files to extract TOR SCPK files

- Basic example in TOR_Test.py to extract the Scenario files to XML

											
										
										
											2022-01-23 08:25:40 -05:00
+								    # Extract THEIRSCE to XML
-												Rewrite skit and story extraction

No more in-between copies when extracting, also moved the xml saving to the extracting functions and progress bars!

											
										
										
											2023-05-19 07:55:15 -05:00
+								    def get_xml_from_theirsce(self, rsce: Theirsce, section: str) -> bytes:
-												Basic set of files to extract TOR SCPK files

- Basic example in TOR_Test.py to extract the Scenario files to XML

											
										
										
											2022-01-23 08:25:40 -05:00
 								        #Create the XML file
-												Update dump process with speaker nodes

											
										
										
											2023-01-06 22:45:40 -05:00
+								        # root = etree.Element('SceneText')
 								        # etree.SubElement(root, "OriginalName").text = file_name
-												Theirsce specific text extraction, part 1

											
										
										
											2023-01-06 22:39:17 -05:00
-												Update dump process with speaker nodes

											
										
										
											2023-01-06 22:45:40 -05:00
+								        #pointers_offset, texts_offset = self.extract_Story_Pointers(rsce)
 								        names, lines = self.extract_lines_with_speaker(rsce)
 								        for i, (k, v) in enumerate(names.items(), -1):
 								            names[k] = NameEntry(i, v)
-												correct Menu reinsertion of Skits Name

											
										
										
											2022-02-11 21:16:19 -05:00
 								        #Remove duplicates
-												Remove Section and Duplicate

											
										
										
											2022-07-04 16:10:43 -04:00
+								        #list_informations = self.remove_duplicates(["Story"] * len(pointers_offset), pointers_offset, text_list)
-												Basic set of files to extract TOR SCPK files

- Basic example in TOR_Test.py to extract the Scenario files to XML

											
										
										
											2022-01-23 08:25:40 -05:00
-												Update dump process with speaker nodes

											
										
										
											2023-01-06 22:45:40 -05:00
+								        # list_lines = ( ['Story', line.offset, line.text] for line in lines)
 								        # list_names = ( ['Story', line.offset, line.text] for i, (k, v) in enumerate(found_names.items()))
-												Adjust extract_All_Story_Files

Add Section and Destination parameters

- update Extract_Theirsce_XML
   - Add theirsce file as parameter
   - Add destination and section
- update extract_All_Story_Files so it works with new parameters

											
										
										
											2022-08-12 21:49:23 -04:00
+								        #Build the XML Structure with the information
-												Update dump process with speaker nodes

											
										
										
											2023-01-06 22:45:40 -05:00
 								        root = etree.Element("SceneText")
 								        speakers_node = etree.SubElement(root, 'Speakers')
-												Fix "Speaker" and voice_id replace

											
										
										
											2023-01-07 22:00:46 -05:00
+								        etree.SubElement(speakers_node, 'Section').text = "Speaker"
-												Update dump process with speaker nodes

											
										
										
											2023-01-06 22:45:40 -05:00
+								        strings_node = etree.SubElement(root, 'Strings')
 								        etree.SubElement(strings_node, 'Section').text = section
 								        self.make_speakers_section(speakers_node, names)
 								        self.make_strings_section(strings_node, lines, names)
-												Updates some scripts

											
										
										
											2022-02-18 15:42:52 -05:00
-												Rewrite skit and story extraction

No more in-between copies when extracting, also moved the xml saving to the extracting functions and progress bars!

											
										
										
											2023-05-19 07:55:15 -05:00
+								        # Return XML string
 								        return etree.tostring(root, encoding="UTF-8", pretty_print=True)
-												Add a replace flag

Add a replace tag

											
										
										
											2022-12-21 19:10:18 -05:00
-												Update dump process with speaker nodes

											
										
										
											2023-01-06 22:45:40 -05:00
 								    def make_strings_section(self, root, lines: list[LineEntry], names: dict[str, NameEntry]):
 								        pass
 								        for line in lines:
 								            entry_node = etree.SubElement(root, "Entry")
 								            etree.SubElement(entry_node,"PointerOffset").text = str(line.offset)
 								            text_split = list(filter(None, re.split(self.COMMON_TAG, line.text)))
 								            if len(text_split) > 1 and text_split[0].startswith("<voice:"):
 								                etree.SubElement(entry_node,"VoiceId").text  = text_split[0][1:-1].split(":")[1]
 								                etree.SubElement(entry_node, "JapaneseText").text = ''.join(text_split[1:])
 								            else:
 								                etree.SubElement(entry_node, "JapaneseText").text = line.text
 								            etree.SubElement(entry_node,"EnglishText")
 								            etree.SubElement(entry_node,"Notes")
-												Add a replace flag

Add a replace tag

											
										
										
											2022-12-21 19:10:18 -05:00
-												Update dump process with speaker nodes

											
										
										
											2023-01-06 22:45:40 -05:00
+								            if line.names:
 								                etree.SubElement(entry_node,"SpeakerId").text = ','.join([str(names[n].index) for n in line.names])
 								            etree.SubElement(entry_node,"Id").text = str(self.id)
 								            self.id = self.id + 1
 								            if line.text == '':
 								                statusText = 'Done'
 								            else:
 								                statusText = 'To Do'
 								            etree.SubElement(entry_node,"Status").text        = statusText
 								    def make_speakers_section(self, root, names: dict[str, NameEntry]):
 								        for k, v in names.items():
 								            entry_node = etree.SubElement(root, "Entry")
-												Make PointerOffset self-closing

											
										
										
											2023-01-06 23:16:52 -05:00
+								            if v.offsets:
 								                etree.SubElement(entry_node,"PointerOffset").text = ",".join([str(off) for off in v.offsets])
 								            else:
 								                etree.SubElement(entry_node,"PointerOffset")
-												Update dump process with speaker nodes

											
										
										
											2023-01-06 22:45:40 -05:00
+								            etree.SubElement(entry_node,"JapaneseText").text  = str(k)
 								            etree.SubElement(entry_node,"EnglishText")
 								            etree.SubElement(entry_node,"Notes")
 								            etree.SubElement(entry_node,"Id").text            = str(v.index)
 								            etree.SubElement(entry_node,"Status").text         = "To Do"
 								    def extract_lines_with_speaker(self, theirsce: Theirsce):
 								        # This will do a bit of everything thanks to the "nice"
 								        # architecture of the Theirsce class :)
 								        # Debug
 								        # sections = []
 								        # for _, section in enumerate(theirsce.sections):
 								        #     for _, sub in enumerate(section):
 								        #         sections.append(sub.off)
 								        # Setup three-way opcode generator
 								        d = TheirsceBaseInstruction(); d.type = InstructionType.INVALID
 								        a,b,c = tee(theirsce.walk_code(), 3)
 								        next(a, d)
 								        next(b, d); next(b, d)
 								        next(c, d); next(c, d); next(c, d)
 								        # Helper function, in the future I'll
 								        # just use a list of opcodes
 								        def skip():
 								            next(a, d); next(a, d)
 								            next(b, d); next(b, d)
 								            next(c, d); next(c, d)
 								        names = {VARIABLE_NAME: []}
 								        lines = []
 								        params = []
 								        used = False
 								        for op1, op2, op3 in zip(a,b,c):
 								            # Debug
 								            # if theirsce.tell() in sections:
 								            #     print()
 								            #     print("SECTION: ")
 								            # BREAK marks start of a local function
 								            # so local params are no longer in scope
 								            if op1.type is InstructionType.BREAK:
 								                if used == False:
 								                    for param in params:
 								                        text = self.bytes_to_text(theirsce, param.offset + theirsce.strings_offset)
 								                        lines.append(LineEntry([], text, op1.position + 1))
 								                params.clear()
 								                continue
 								            # This sequence mark the simple act of assigning
 								            # a string to a local variable, so we can detect
 								            # when they are used later in a function call
 								            if (op1.type is InstructionType.REFERENCE
 								                and op2.type is InstructionType.STRING
 								                and op3.type is InstructionType.ALU
 								                and op3.operation == AluOperation.ASSIGNMENT
 								                ):
 								                params.append(op2)
 								                skip()
 								                continue
 								            # This sequence represents the textbox call with
 								            # the name being a variable (NPCs do this)
 								            if (op1.type is InstructionType.REFERENCE
 								                and op2.type is InstructionType.STRING
 								                and op3.type is InstructionType.SYSCALL
 								                and op3.function_index == 0x45
 								                ):
 								                if len(params) >= 1:
 								                    name = [self.bytes_to_text(theirsce, p.offset + theirsce.strings_offset) for p in params]
 								                    [names.setdefault(n, []).append(p.position + 1) for n, p in zip(name, params)]
 								                elif len(params) == 0:
 								                    name = []
 								                text = self.bytes_to_text(theirsce, op2.offset + theirsce.strings_offset)
 								                lines.append(LineEntry(name, text, op2.position + 1))
 								                #print(f"{params}: {text}")
 								                used = True
 								                skip()
 								                continue
 								            # This sequence represents the textbox call with
 								            # the text being a variable (Notice boxes do this)
 								            if (op1.type is InstructionType.STRING
 								                and op2.type is InstructionType.REFERENCE
 								                and op3.type is InstructionType.SYSCALL
 								                and op3.function_index == 0x45
 								                ):
 								                name = [self.bytes_to_text(theirsce, op1.offset + theirsce.strings_offset)]
 								                names.setdefault(name[0], []).append(op1.position + 1)
 								                for param in params:
 								                    text = self.bytes_to_text(theirsce, param.offset + theirsce.strings_offset)
 								                    lines.append(LineEntry(name, text, param.position + 1))
 								                    #print(f"{text}: {name}")
 								                used = True
 								                params.clear()
 								                skip()
 								                continue
 								            # This sequence represents a regular textbox call
 								            # where both fields are an string (everything else, save for skits)
 								            if (op1.type is InstructionType.STRING
 								                and op2.type is InstructionType.STRING
 								                and op3.type is InstructionType.SYSCALL
 								                and op3.function_index == 0x45
 								                ):
 								                name = [self.bytes_to_text(theirsce, op1.offset + theirsce.strings_offset)]
 								                names.setdefault(name[0], []).append(op1.position + 1)
 								                text = self.bytes_to_text(theirsce, op2.offset + theirsce.strings_offset)
 								                lines.append(LineEntry(name, text, op2.position + 1))
 								                #print(f"{name}: {text}")
 								                skip()
 								                continue
 								            # Any other string in assorted code calls
 								            if op1.type is InstructionType.STRING:
 								                #print(theirsce.read_string_at(op1.offset + theirsce.strings_offset))
 								                text = self.bytes_to_text(theirsce, op1.offset + theirsce.strings_offset)
 								                lines.append(LineEntry([], text, op1.position + 1))
 								                continue
 								        return names, lines
 								    def extract_story_pointers_plain(self, theirsce: Theirsce):
-												Theirsce specific text extraction, part 1

											
										
										
											2023-01-06 22:39:17 -05:00
+								        pointers_offset = []; texts_offset = []
 								        for opcode in theirsce.walk_code():
-												Update dump process with speaker nodes

											
										
										
											2023-01-06 22:45:40 -05:00
+								            if opcode.type == self.string_opcode:
-												Theirsce specific text extraction, part 1

											
										
										
											2023-01-06 22:39:17 -05:00
+								                pointers_offset.append(theirsce.tell() - 2) # Maybe check this later
 								                texts_offset.append(opcode.offset + theirsce.strings_offset)
 								        return pointers_offset, texts_offset
-												Move functions to TOR instead of parent class

											
										
										
											2022-07-16 15:11:15 -04:00
+								    #Convert a bytes object to text using TAGS and TBL in the json file
-												Update dump process with speaker nodes

											
										
										
											2023-01-06 22:45:40 -05:00
+								    def bytes_to_text(self, theirsce: Theirsce, offset=-1, end_strings = b"\x00"):
-												Rework tag acquisition and text conversion

											
										
										
											2023-05-16 00:05:37 -05:00
+								        finalText = ""
 								        tags = self.jsonTblTags['TAGS']
 								        chars = self.jsonTblTags['TBL']
-												Move functions to TOR instead of parent class

											
										
										
											2022-07-16 15:11:15 -04:00
+								        if (offset > 0):
-												Update dump process with speaker nodes

											
										
										
											2023-01-06 22:45:40 -05:00
+								            theirsce.seek(offset, 0)
-												Move functions to TOR instead of parent class

											
										
										
											2022-07-16 15:11:15 -04:00
-												Rework tag acquisition and text conversion

											
										
										
											2023-05-16 00:05:37 -05:00
+								        b = theirsce.read(1)
 								        while True:
 								            b = theirsce.read(1)
 								            if b == end_strings: break
 								            b = ord(b)
 								            # Custom Encoded Text
 								            if (0x99 <= b <= 0x9F) or (0xE0 <= b <= 0xEB):
 								                c = (b << 8) | theirsce.read_uint8()
 								                finalText += chars.get(c, "{%02X}{%02X}" % (c >> 8, c & 0xFF))
 								                continue
 								            if b == 0x1:
 								                finalText += ("\n")
 								                continue
 								            # ASCII text
 								            if chr(b) in self.PRINTABLE_CHARS:
 								                finalText += chr(b)
 								                continue
 								            # cp932 text
 								            if 0xA0 < b < 0xE0:
 								                finalText += struct.pack("B", b).decode("cp932")
 								                continue
 								            if b == 0x81:
-												Update dump process with speaker nodes

											
										
										
											2023-01-06 22:45:40 -05:00
+								                next_b = theirsce.read(1)
-												Move functions to TOR instead of parent class

											
										
										
											2022-07-16 15:11:15 -04:00
+								                if next_b == b"\x40":
 								                    finalText += "　"
 								                else:
 								                    finalText += "{%02X}" % b
 								                    finalText += "{%02X}" % ord(next_b)
-												Rework tag acquisition and text conversion

											
										
										
											2023-05-16 00:05:37 -05:00
+								                continue
 								            # Simple Tags
 								            if 0x3 <= b <= 0xF:
 								                parameter = theirsce.read_uint32()
 								                tag_name = tags.get(b, f"{b:02X}")
 								                tag_param = self.jsonTblTags.get(tag_name.upper(), {}).get(parameter, None)
 								                if tag_param is not None:
 								                    finalText += tag_param
 								                else:
 								                    finalText += f"<{tag_name}:{self.hex2(parameter)}>"
 								                continue
 								            # Variable tags (same as above but using rsce bytecode as parameter)
 								            if 0x13 <= b <= 0x1A:
 								                tag_name = f"unk{b:02X}"
 								                parameter = "".join([f"{c:02X}" for c in theirsce.read_tag_bytes()])
 								                finalText += f"<{tag_name}:{parameter}>"
 								                continue
 								            # None of the above
 								            finalText += "{%02X}" % b
-												Move functions to TOR instead of parent class

											
										
										
											2022-07-16 15:11:15 -04:00
-												Update dump process with speaker nodes

											
										
										
											2023-01-06 22:45:40 -05:00
+								        return finalText
-												Move functions to TOR instead of parent class

											
										
										
											2022-07-16 15:11:15 -04:00
-												Replace previous variable by list_status_insertion

											
										
										
											2022-06-28 21:41:25 -04:00
+								    def get_Node_Bytes(self, entry_node):
-												Create new function get_Node_Bytes

Take a node and return Bytes

											
										
										
											2022-06-28 21:39:01 -04:00
 								        #Grab the fields from the Entry in the XML
 								        status = entry_node.find("Status").text
 								        japanese_text = entry_node.find("JapaneseText").text
 								        english_text = entry_node.find("EnglishText").text
 								        #Use the values only for Status = Done and use English if non empty
 								        final_text = ''
-												Replace previous variable by list_status_insertion

											
										
										
											2022-06-28 21:41:25 -04:00
+								        if (status in self.list_status_insertion):
-												Create new function get_Node_Bytes

Take a node and return Bytes

											
										
										
											2022-06-28 21:39:01 -04:00
+								            final_text = english_text or japanese_text or ''
 								        else:
 								            final_text = japanese_text or ''
-												Update TSS functions

update voiceId

											
										
										
											2022-10-22 21:18:01 -04:00
 								        voiceId_node = entry_node.find("VoiceId")
 								        if (voiceId_node != None):
-												Fixing different issues during repacking (#105)

* Fix Reinsertion issues with Pointeroffset

* Add batch files

* fix issues with SCPK and PAK2 not being created in folder New

* Adding Editing and Copy SLPS

* Fixing Tags Unkn + VoiceId in Story
											
										
										
											2023-05-14 16:50:41 -04:00
+								            final_text = '<voice:{}>'.format(voiceId_node.text) + final_text
-												Adding destination parameter to the  function

											
										
										
											2022-08-15 12:56:58 -04:00
-												Create new function get_Node_Bytes

Take a node and return Bytes

											
										
										
											2022-06-28 21:39:01 -04:00
+								        #Convert the text values to bytes using TBL, TAGS, COLORS, ...
 								        bytes_entry = self.text_to_bytes(final_text)
 								        return bytes_entry
-												Adding destination parameter to the  function

											
										
										
											2022-08-15 12:56:58 -04:00
+								    def get_New_Theirsce(self, theirsce, scpk_file_name, destination):
-												Updating Scripts and doing test with TOR

											
										
										
											2022-01-30 19:19:43 -05:00
 								        #To store the new text_offset and pointers to update
 								        new_text_offsets = dict()
 								        #Grab strings_offset for pointers
 								        theirsce.read(12)
 								        strings_offset = struct.unpack("<L", theirsce.read(4))[0]
 								        #Read the XML for the corresponding THEIRSCE
-												Adding destination parameter to the  function

											
										
										
											2022-08-15 12:56:58 -04:00
+								        file = destination +"XML/"+ self.get_file_name(scpk_file_name)+'.xml'
-												Remove Section and Duplicate

											
										
										
											2022-07-04 16:10:43 -04:00
+								        #print("XML : {}".format(self.get_file_name(scpk_file_name)+'.xml'))
-												Updating Scripts and doing test with TOR

											
										
										
											2022-01-30 19:19:43 -05:00
+								        tree = etree.parse(file)
 								        root = tree.getroot()
-												Fixing different issues during repacking (#105)

* Fix Reinsertion issues with Pointeroffset

* Add batch files

* fix issues with SCPK and PAK2 not being created in folder New

* Adding Editing and Copy SLPS

* Fixing Tags Unkn + VoiceId in Story
											
										
										
											2023-05-14 16:50:41 -04:00
-												Comments and rename getNewTheirsce

											
										
										
											2022-06-28 21:47:33 -04:00
+								        #Go at the start of the dialog
 								        #Loop on every Entry and reinsert
-												Updating Scripts and doing test with TOR

											
										
										
											2022-01-30 19:19:43 -05:00
+								        theirsce.seek(strings_offset+1)
-												Fixing different issues during repacking (#105)

* Fix Reinsertion issues with Pointeroffset

* Add batch files

* fix issues with SCPK and PAK2 not being created in folder New

* Adding Editing and Copy SLPS

* Fixing Tags Unkn + VoiceId in Story
											
										
										
											2023-05-14 16:50:41 -04:00
+								        nodes = [ele for ele in root.iter('Entry') if ele.find('Id').text != "-1"]
 								        nodes = [ele for ele in nodes if ele.find('PointerOffset').text != "-1"]
 								        for entry_node in nodes:
-												Updating Scripts and doing test with TOR

											
										
										
											2022-01-30 19:19:43 -05:00
+								            #Add the PointerOffset and TextOffset
 								            new_text_offsets[entry_node.find("PointerOffset").text] = theirsce.tell()
-												Replace old code by calling get_Node_Bytes

											
										
										
											2022-06-28 21:43:07 -04:00
+								            #Use the node to get the new bytes
 								            bytes_entry = self.get_Node_Bytes(entry_node)
-												Updating Scripts and doing test with TOR

											
										
										
											2022-01-30 19:19:43 -05:00
 								            #Write to the file
-												Replace old code by calling get_Node_Bytes

											
										
										
											2022-06-28 21:43:07 -04:00
+								            theirsce.write(bytes_entry + b'\x00')
-												Updating Scripts and doing test with TOR

											
										
										
											2022-01-30 19:19:43 -05:00
-												Comments and rename getNewTheirsce

											
										
										
											2022-06-28 21:47:33 -04:00
+								        #Update the pointers based on the new text_offset of  the entries
-												Updating Scripts and doing test with TOR

											
										
										
											2022-01-30 19:19:43 -05:00
+								        for pointer_offset, text_offset in new_text_offsets.items():
-												Updates some scripts

											
										
										
											2022-02-18 15:42:52 -05:00
+								            pointers_list = pointer_offset.split(",")
 								            new_value = text_offset - strings_offset
 								            for pointer in pointers_list:
 								                theirsce.seek(int(pointer))
-												Change pointers to 2 bytes

											
										
										
											2022-06-28 21:31:01 -04:00
+								                theirsce.write( struct.pack("<H", new_value))
-												Updating Scripts and doing test with TOR

											
										
										
											2022-01-30 19:19:43 -05:00
 								        return theirsce
 								    #Repack SCPK files for Story
-												Other cleaning

											
										
										
											2022-06-28 21:51:09 -04:00
+								    def pack_Story_File(self, scpk_file_name):
-												Updating Scripts and doing test with TOR

											
										
										
											2022-01-30 19:19:43 -05:00
 								        #Copy the original SCPK file to the folder used for the new version
-												Other cleaning

											
										
										
											2022-06-28 21:51:09 -04:00
+								        shutil.copy( self.dat_archive_extract + "SCPK/" + scpk_file_name, self.story_XML_patch + "New/" + scpk_file_name)
-												Updating Scripts and doing test with TOR

											
										
										
											2022-01-30 19:19:43 -05:00
-												Updates some scripts

											
										
										
											2022-02-18 15:42:52 -05:00
+								        #Open the original SCPK
-												Other cleaning

											
										
										
											2022-06-28 21:51:09 -04:00
+								        with open( self.dat_archive_extract + "SCPK/" + scpk_file_name, 'r+b') as scpk:
-												Updating Scripts and doing test with TOR

											
										
										
											2022-01-30 19:19:43 -05:00
+								            #Get nb_files and files_size
 								            scpk.read(4)
 								            scpk.read(4)
 								            nb_files = struct.unpack("<L", scpk.read(4))[0]
 								            scpk.read(4)
 								            file_size_dict = dict()
 								            for i in range(nb_files):
 								                pointer_offset = scpk.tell()
-												Updates some scripts

											
										
										
											2022-02-18 15:42:52 -05:00
+								                file_size = struct.unpack("<L", scpk.read(4))[0]
 								                file_size_dict[pointer_offset] = file_size
-												Updating Scripts and doing test with TOR

											
										
										
											2022-01-30 19:19:43 -05:00
 								            #Extract each files and append to the final data_final
-												Updates some scripts

											
										
										
											2022-02-18 15:42:52 -05:00
+								            dataFinal = bytearray()
 								            sizes = []
 								            o = io.BytesIO()
 								            i=0
 								            for pointer_offset, fsize in file_size_dict.items():
-												Updating Scripts and doing test with TOR

											
										
										
											2022-01-30 19:19:43 -05:00
-												Updates some scripts

											
										
										
											2022-02-18 15:42:52 -05:00
+								                data_compressed = scpk.read(fsize)
-												Folder structure

Trying to make it more package-y

											
										
										
											2023-05-19 08:07:41 -05:00
+								                if comptolib.is_compressed(data_compressed):
-												Updates some scripts

											
										
										
											2022-02-18 15:42:52 -05:00
+								                    c_type = struct.unpack("<b", data_compressed[:1])[0]
 								                    #print("File {}   size: {}    ctype: {}".format(i, fsize,c_type))
 								                    data_uncompressed = comptolib.decompress_data(data_compressed)
-												Updating Scripts and doing test with TOR

											
										
										
											2022-01-30 19:19:43 -05:00
 								                    if data_uncompressed[:8] == b"THEIRSCE":
 								                        #Only for debug to have  the original THEIRSCE
 								                        #with open("test_original_comp.theirsce", "wb") as f:
 								                        #    print("Size original: {}".format(len(data_uncompressed)))
 								                        #    f.write(data)
 								                        #with open("test_original.theirsce", "wb") as f:
 								                        #    f.write(data_uncompressed)
-												Updates some scripts

											
										
										
											2022-02-18 15:42:52 -05:00
+								                        #Update THEIRSCE uncompressed file
-												Fixing different issues during repacking (#105)

* Fix Reinsertion issues with Pointeroffset

* Add batch files

* fix issues with SCPK and PAK2 not being created in folder New

* Adding Editing and Copy SLPS

* Fixing Tags Unkn + VoiceId in Story
											
										
										
											2023-05-14 16:50:41 -04:00
+								                        theirsce = self.get_New_Theirsce(io.BytesIO(data_uncompressed), scpk_file_name, self.story_XML_new)
-												Updating Scripts and doing test with TOR

											
										
										
											2022-01-30 19:19:43 -05:00
-												Updates some scripts

											
										
										
											2022-02-18 15:42:52 -05:00
-												Updating Scripts and doing test with TOR

											
										
										
											2022-01-30 19:19:43 -05:00
+								                        theirsce.seek(0)
 								                        data_new_uncompressed = theirsce.read()
 								                        data_compressed = comptolib.compress_data(data_new_uncompressed, version=c_type)
-												Updates some scripts

											
										
										
											2022-02-18 15:42:52 -05:00
 								                    else:
 								                        data_compressed = comptolib.compress_data(data_uncompressed, version=c_type)
-												Updating Scripts and doing test with TOR

											
										
										
											2022-01-30 19:19:43 -05:00
-												Updates some scripts

											
										
										
											2022-02-18 15:42:52 -05:00
+								                #Updating the header of the SCPK file to adjust the size
 								                new_size = len(data_compressed)
 								                #print("File recomp {}   size: {}    ctype: {}".format(i, new_size,c_type))
-												Updating Scripts and doing test with TOR

											
										
										
											2022-01-30 19:19:43 -05:00
 								                dataFinal += data_compressed
-												Updates some scripts

											
										
										
											2022-02-18 15:42:52 -05:00
+								                sizes.append(new_size)
 								                i=i+1
-												Updating Scripts and doing test with TOR

											
										
										
											2022-01-30 19:19:43 -05:00
-												Updates some scripts

											
										
										
											2022-02-18 15:42:52 -05:00
 								        #Write down the new SCPK from scratch
 								        o.write(b"\x53\x43\x50\x4B\x01\x00\x0F\x00")
 								        o.write(struct.pack("<L", len(sizes)))
 								        o.write(b"\x00" * 4)
-												Fixing different issues during repacking (#105)

* Fix Reinsertion issues with Pointeroffset

* Add batch files

* fix issues with SCPK and PAK2 not being created in folder New

* Adding Editing and Copy SLPS

* Fixing Tags Unkn + VoiceId in Story
											
										
										
											2023-05-14 16:50:41 -04:00
-												Updates some scripts

											
										
										
											2022-02-18 15:42:52 -05:00
+								        for i in range(len(sizes)):
 								            o.write(struct.pack("<L", sizes[i]))
 								        o.write(dataFinal)
-												Fixing different issues during repacking (#105)

* Fix Reinsertion issues with Pointeroffset

* Add batch files

* fix issues with SCPK and PAK2 not being created in folder New

* Adding Editing and Copy SLPS

* Fixing Tags Unkn + VoiceId in Story
											
										
										
											2023-05-14 16:50:41 -04:00
+								        with open(self.story_XML_patch + "New/" + scpk_file_name, "wb") as f:
 								            f.write(o.getvalue())
-												Updates some scripts

											
										
										
											2022-02-18 15:42:52 -05:00
 								        return o.getvalue()
-												Updating Scripts and doing test with TOR

											
										
										
											2022-01-30 19:19:43 -05:00
-												Create pack_Skit_File function

											
										
										
											2022-08-15 12:55:30 -04:00
+								    def pack_Skit_File(self, pak2_file):
-												Fixing different issues during repacking (#105)

* Fix Reinsertion issues with Pointeroffset

* Add batch files

* fix issues with SCPK and PAK2 not being created in folder New

* Adding Editing and Copy SLPS

* Fixing Tags Unkn + VoiceId in Story
											
										
										
											2023-05-14 16:50:41 -04:00
 								        # Copy the original PAK2 file to the folder used for the new version
 								        shutil.copy(self.dat_archive_extract + "PAK2/" + pak2_file, self.skit_XML_patch + "New/" + pak2_file)
-												Create pack_Skit_File function

											
										
										
											2022-08-15 12:55:30 -04:00
+								        pak2_file_path = os.path.join(self.dat_archive_extract, "PAK2", pak2_file)
 								        with open(pak2_file_path,"rb") as f_pak2:
 								            pak2_data = f_pak2.read()
 								        #Create the pak2 object
 								        pak2_obj = pak2lib.get_data(pak2_data)
 								        #Generate the new Theirsce based on the XML and replace the original one
-												Fixing different issues during repacking (#105)

* Fix Reinsertion issues with Pointeroffset

* Add batch files

* fix issues with SCPK and PAK2 not being created in folder New

* Adding Editing and Copy SLPS

* Fixing Tags Unkn + VoiceId in Story
											
										
										
											2023-05-14 16:50:41 -04:00
+								        theirsce_io = self.get_New_Theirsce(io.BytesIO(pak2_obj.chunks.theirsce), os.path.basename(pak2_file_path).split(".")[0], self.skit_XML_new)
-												Create pack_Skit_File function

											
										
										
											2022-08-15 12:55:30 -04:00
+								        theirsce_io.seek(0)
 								        new_data = theirsce_io.read()
 								        pak2_obj.chunks.theirsce = new_data
 								        self.mkdir(self.skit_XML_patch+ "New")
-												Fixing different issues during repacking (#105)

* Fix Reinsertion issues with Pointeroffset

* Add batch files

* fix issues with SCPK and PAK2 not being created in folder New

* Adding Editing and Copy SLPS

* Fixing Tags Unkn + VoiceId in Story
											
										
										
											2023-05-14 16:50:41 -04:00
+								        with open(self.skit_XML_patch+ "New/" + pak2_file, "wb") as f2:
 								            f2.write(pak2lib.create_pak2(pak2_obj))
-												Create pack_Skit_File function

											
										
										
											2022-08-15 12:55:30 -04:00
-												Folder structure

Trying to make it more package-y

											
										
										
											2023-05-19 08:07:41 -05:00
+								        return
-												Add a replace flag

Add a replace tag

											
										
										
											2022-12-21 19:10:18 -05:00
 								    def pack_All_Skits(self):
 								        print("Recreating Skits files")
 								        listFiles = [ele for ele in os.listdir(self.skit_XML_patch + "New/")]
 								        for pak2_file in listFiles:
 								            self.pack_Skit_File(pak2_file)
 								            print("Writing file {} ...".format(pak2_file))
 								    def debug_Story_Skits(self, section, file_name, text=False):
 								        if section == "Story":
 								            theirsce = self.get_theirsce_from_scpk(self.dat_archive_extract + 'SCPK/' + self.get_file_name(file_name) + '.scpk')
 								        else:
 								            with open(self.dat_archive_extract + "PAK2/" + file_name.split(".")[0] + '.3.pak2', "rb") as pak:
 								                data = pak.read()
 								            theirsce = io.BytesIO(pak2lib.get_theirsce_from_pak2(data))
-												Fix "Speaker" and voice_id replace

											
										
										
											2023-01-07 22:00:46 -05:00
+								        rsce = Theirsce(path=theirsce)
 								        # pointers_offset, texts_offset = self.extract_Story_Pointers(rsce)
 								        names, lines = self.extract_lines_with_speaker(rsce)
 								        for i, (k, v) in enumerate(names.items(), -1):
 								            names[k] = NameEntry(i, v)
-												Add a replace flag

Add a replace tag

											
										
										
											2022-12-21 19:10:18 -05:00
+								        with open('../{}.theirsce'.format(file_name), 'wb') as f:
 								            f.write(theirsce.getvalue())
 								        text_list = []
 								        if text:
-												Fix "Speaker" and voice_id replace

											
										
										
											2023-01-07 22:00:46 -05:00
+								            text_list = [line.text for line in lines]
-												Add a replace flag

Add a replace tag

											
										
										
											2022-12-21 19:10:18 -05:00
-												Fix "Speaker" and voice_id replace

											
										
										
											2023-01-07 22:00:46 -05:00
+								        df = pd.DataFrame({"Jap_Text": text_list})
-												Add a replace flag

Add a replace tag

											
										
										
											2022-12-21 19:10:18 -05:00
+								        df['Text_Offset'] = df['Text_Offset'].apply(lambda x: hex(x)[2:])
 								        df['Pointers_Offset'] = df['Pointers_Offset'].apply(lambda x: hex(x)[2:])
 								        df.to_excel('../{}.xlsx'.format(self.get_file_name(file_name)), index=False)
-												Move and tweak get pointers into talesTOR

code seemed TOR specific anyway

											
										
										
											2023-05-19 07:45:20 -05:00
-												Rewrite DAT.BIN extraction

											
										
										
											2023-05-19 07:48:01 -05:00
+								    def get_datbin_file_data(self) -> dict[int, int]:
-												Move and tweak get pointers into talesTOR

code seemed TOR specific anyway

											
										
										
											2023-05-19 07:45:20 -05:00
 								        with open(self.elf_original , "rb") as elf:
 								            elf.seek(self.POINTERS_BEGIN, 0)
 								            blob = elf.read(self.POINTERS_END-self.POINTERS_BEGIN)
-												Rewrite DAT.BIN extraction

											
										
										
											2023-05-19 07:48:01 -05:00
+								        pointers = struct.unpack(f"<{len(blob)//4}L", blob)
 								        file_data: dict[int, int] = {}
 								        for c, n in zip(pointers, pointers[1:]):
 								            remainder = c & self.LOW_BITS
 								            start = c & self.HIGH_BITS
 								            end = (n & self.HIGH_BITS) - remainder
 								            file_data[c] = end - start
-												Basic set of files to extract TOR SCPK files

- Basic example in TOR_Test.py to extract the Scenario files to XML

											
										
										
											2022-01-23 08:25:40 -05:00
-												Rewrite DAT.BIN extraction

											
										
										
											2023-05-19 07:48:01 -05:00
+								        return file_data
 								    # Extract the file DAT.BIN to the different directorties
 								    def extract_main_archive(self) -> None:
-												Updates some scripts

											
										
										
											2022-02-18 15:42:52 -05:00
-												Rewrite DAT.BIN extraction

											
										
										
											2023-05-19 07:48:01 -05:00
+								        print("Extracting DAT bin files...")
 								        with open( self.dat_bin_original, "rb") as f:
 								            for i, (offset, size) in enumerate(tqdm(self.get_datbin_file_data().items(), desc="Extracting files", unit="file")):
-												Basic set of files to extract TOR SCPK files

- Basic example in TOR_Test.py to extract the Scenario files to XML

											
										
										
											2022-01-23 08:25:40 -05:00
+								                # Ignore 0 byte files
-												Rewrite DAT.BIN extraction

											
										
										
											2023-05-19 07:48:01 -05:00
+								                if size == 0:
 								                    continue
 								                f.seek(offset, 0)
 								                data = f.read(size)
 								                if comptolib.is_compressed(data):
 								                    c_type = struct.unpack("<b", data[:1])[0]
 								                    data = comptolib.decompress_data(data)
 								                    extension = self.get_extension(data)
 								                    fname = f"{i:05d}.{c_type}.{extension}"
 								                else:
 								                    extension = self.get_extension(data)
 								                    fname = f"{i:05d}.{extension}"
 								                # TODO: use pathlib for everything
 								                final_path = Path(self.dat_archive_extract) / extension.upper()
 								                final_path.mkdir(parents=True, exist_ok=True)
 								                with open(final_path / fname, "wb") as output:
 								                    output.write(data)
-												Updating Scripts and doing test with TOR

											
										
										
											2022-01-30 19:19:43 -05:00
-												fixing 11181.pak3 and adding preparing Menu File function

											
										
										
											2022-03-20 10:10:58 -04:00
-												Capitalization memes

											
										
										
											2023-05-19 08:23:47 -05:00
+								    def pack_main_archive(self):
-												Updating Scripts and doing test with TOR

											
										
										
											2022-01-30 19:19:43 -05:00
+								        sectors = [0]
 								        remainders = []
 								        buffer = 0
-												Fixing different issues during repacking (#105)

* Fix Reinsertion issues with Pointeroffset

* Add batch files

* fix issues with SCPK and PAK2 not being created in folder New

* Adding Editing and Copy SLPS

* Fixing Tags Unkn + VoiceId in Story
											
										
										
											2023-05-14 16:50:41 -04:00
 								        # Copy the original SLPS to Disc/New
 								        shutil.copy(self.elf_original, self.elf_new)
-												Remove Section and Duplicate

											
										
										
											2022-07-04 16:10:43 -04:00
-												Mass replace for naming convention adjustmentes

											
										
										
											2022-06-28 21:49:19 -04:00
+								        output_dat_path = self.dat_bin_new
-												Updates some scripts

											
										
										
											2022-02-18 15:42:52 -05:00
+								        with open(output_dat_path, "wb") as output_dat:
-												Updating Scripts and doing test with TOR

											
										
										
											2022-01-30 19:19:43 -05:00
-												Updates some scripts

											
										
										
											2022-02-18 15:42:52 -05:00
+								            print("Packing files into %s..." % os.path.basename(output_dat_path))
-												Updating Scripts and doing test with TOR

											
										
										
											2022-01-30 19:19:43 -05:00
-												Updates some scripts

											
										
										
											2022-02-18 15:42:52 -05:00
+								            #Make a list with all the files of DAT.bin
 								            file_list = []
-												fixing path

											
										
										
											2022-06-27 19:25:41 -04:00
+								            for path, subdir, filenames in os.walk(self.dat_archive_extract):
-												Updates some scripts

											
										
										
											2022-02-18 15:42:52 -05:00
+								                if len(filenames) > 0:
 								                    file_list.extend( [os.path.join(path,file) for file in filenames])
-												Updating Scripts and doing test with TOR

											
										
										
											2022-01-30 19:19:43 -05:00
-												Updates some scripts

											
										
										
											2022-02-18 15:42:52 -05:00
 								            list_test = [os.path.splitext(os.path.basename(ele))[0] for ele in file_list]
 								            previous = -1
 								            dummies = 0
-												Updating Scripts and doing test with TOR

											
										
										
											2022-01-30 19:19:43 -05:00
-												Updates some scripts

											
										
										
											2022-02-18 15:42:52 -05:00
-												Folder structure

Trying to make it more package-y

											
										
										
											2023-05-19 08:07:41 -05:00
+								            for file in tqdm(sorted(file_list, key=self.get_file_name)):
-												Modify Pack_Main_Archive to repack all story files

											
										
										
											2022-07-03 19:40:42 -04:00
-												Updates some scripts

											
										
										
											2022-02-18 15:42:52 -05:00
+								                size = 0
-												Updating Scripts and doing test with TOR

											
										
										
											2022-01-30 19:19:43 -05:00
+								                remainder = 0
-												Updates some scripts

											
										
										
											2022-02-18 15:42:52 -05:00
+								                current = int(re.search(self.VALID_FILE_NAME, file).group(1))
-												Remove Section and Duplicate

											
										
										
											2022-07-04 16:10:43 -04:00
-												Updates some scripts

											
										
										
											2022-02-18 15:42:52 -05:00
+								                if current != previous + 1:
 								                    while previous < current - 1:
 								                        remainders.append(remainder)
 								                        buffer += size + remainder
 								                        sectors.append(buffer)
 								                        previous += 1
 								                        dummies += 1
 								                file_name = self.get_file_name(file)
-												Modify Pack_Main_Archive to repack all story files

											
										
										
											2022-07-03 19:40:42 -04:00
 								                if ".scpk" in file:
-												Fixing different issues during repacking (#105)

* Fix Reinsertion issues with Pointeroffset

* Add batch files

* fix issues with SCPK and PAK2 not being created in folder New

* Adding Editing and Copy SLPS

* Fixing Tags Unkn + VoiceId in Story
											
										
										
											2023-05-14 16:50:41 -04:00
+								                    path = os.path.join(self.story_XML_patch, 'New', '{}.scpk'.format(file_name))
 								                    print(path)
 								                elif ".pak2" in file:
 								                    path = os.path.join(self.skit_XML_patch, 'New', '{}.pak2'.format(file_name))
 								                    print(path)
-												Updates some scripts

											
										
										
											2022-02-18 15:42:52 -05:00
+								                else:
-												Fixing different issues during repacking (#105)

* Fix Reinsertion issues with Pointeroffset

* Add batch files

* fix issues with SCPK and PAK2 not being created in folder New

* Adding Editing and Copy SLPS

* Fixing Tags Unkn + VoiceId in Story
											
										
										
											2023-05-14 16:50:41 -04:00
+								                    path = file
 								                with open(path, "rb") as f2:
 								                    data = f2.read()
-												Updates some scripts

											
										
										
											2022-02-18 15:42:52 -05:00
+								                #data = f2.read()
 								                comp_type = re.search(self.VALID_FILE_NAME, file).group(2)
 								                if comp_type != None:
 								                    data = comptolib.compress_data(data, version=int(comp_type))
 								                output_dat.write(data)
 								                size = len(data)
 								                #print("file: {}   size: {}".format(file, size))
 								                remainder = 0x40 - (size % 0x40)
 								                if remainder == 0x40:
 								                    remainder = 0
 								                output_dat.write(b"\x00" * remainder)
 								                remainders.append(remainder)
 								                buffer += size + remainder
 								                sectors.append(buffer)
 								                previous += 1
-												Use the SLPS updated for SCPK pointers

											
										
										
											2022-08-05 15:22:45 -04:00
+								        #Use the new SLPS updated and update the pointers for the SCPK
-												Small changes to Tools_Executable and to SLPS update

											
										
										
											2022-09-24 07:33:03 -04:00
+								        with open("../Data/{}/Disc/New/SLPS_254.50".format(self.repo_name), "r+b") as output_elf:
-												Updates some scripts

											
										
										
											2022-02-18 15:42:52 -05:00
+								            output_elf.seek(self.POINTERS_BEGIN)
 								            for i in range(len(sectors) - 1):
 								                output_elf.write(struct.pack("<L", sectors[i] + remainders[i]))
-												Updating Scripts and doing test with TOR

											
										
										
											2022-01-30 19:19:43 -05:00
-												Updates some scripts

											
										
										
											2022-02-18 15:42:52 -05:00
-												Add a replace flag

Add a replace tag

											
										
										
											2022-12-21 19:10:18 -05:00
+								    def pack_All_Story(self):
-												Updates some scripts

											
										
										
											2022-02-18 15:42:52 -05:00
 								        print("Recreating Story files")
-												fixing StoryPath for pack_Story_File

											
										
										
											2022-06-27 19:55:39 -04:00
+								        listFiles = [ele for ele in os.listdir( self.story_XML_patch + "New/")]
-												Other cleaning

											
										
										
											2022-06-28 21:51:09 -04:00
+								        for scpk_file in listFiles:
 								            self.pack_Story_File(scpk_file)
 								            print("Writing file {} ...".format(scpk_file))
-												Updates some scripts

											
										
										
											2022-02-18 15:42:52 -05:00
 								    def insert_All(self):
 								        #Updates SCPK based on XMLs data
-												Capitalization memes

											
										
										
											2023-05-19 08:23:47 -05:00
+								        self.pack_main_archive()