Diddy-Kong-Racing/tools/python/rodata_move.py

import os
import re
import argparse
from file_util import FileUtil

C_RODATA_REGEX = r"/[*]+[ ]*[.]rodata[ ]*[*]+/(?:.*\n)+/[*]{33}/"
C_STRING_REGEX = r"const[ ]+char[ ]+(D_[0-9A-Fa-f]{8})[[][^]]*[]][ ]*=[ ]*(\"[^\"]*\")[ ]*;"
C_FLOAT_REGEX = r".*(D_[0-9A-Fa-f]{8})[ ]*=[ ]*{[ ]*([-]?[0-9]+.[0-9]+)[f]?[ ]*}.*"
C_JMPTABLE_REGEX = r"(?:(?:D_)|0x)[0-9A-Fa-f]{8}"

# Debugging variables
NO_OUTPUT = False               # If True, then no files will be changed.
ONLY_THE_FIRST_ASM_FILE = False # If True, then only one asm file will be changed.

ASM_DIRECTORY = 'asm/non_matchings/'
#ASM_DIRECTORY = 'lib/asm/non_matchings/'

def getMatches(string, regex):
    out = []
    matches = re.finditer(regex, string, re.MULTILINE)
    for matchNum, match in enumerate(matches, start=1):
        start = match.start()
        end = match.end()
        out.append((string[start:end], match.groups(), (start, end)))
    return out

def getLineType(line):
    if line.lower().startswith('const char '):
        return 1 # Literal string
    elif line.lower().startswith('const floatliteral '):
        return 2 # Literal float
    elif line.lower().startswith('const doubleliteral '):
        return 3 # Literal double
    elif line.lower().startswith('};'):
        return 4 # Jump Table bottom
    elif line.lower().startswith('const u32 '):
        return 5 # Jump Table top
    else:
        return 0

def getAsmFileReferenceForLabel(filename, label):
    possibleFiles = []
    search = os.popen('fgrep -r "' + label + '"').read().split('\n')
    for line in search:
        if ASM_DIRECTORY in line:
            asmfilePath = line.split(':')[0]
            if asmfilePath not in possibleFiles:
                possibleFiles.append(asmfilePath)
    if len(possibleFiles) > 1:
        raise Exception('Cannot determine asm file for label: ' + label)
    elif len(possibleFiles) == 0:
        raise Exception(label + ' has no asm files associated with it!')
    return possibleFiles[0]

def adjustAsmFile(asmFilepath, refs):
    with open(asmFilepath, 'r') as inFile:
        asmFile = inFile.read()
        if '.late_rodata' in asmFile:
            raise Exception('Cannot add to file "' + asmFilepath + '" since it already has .late_rodata in it!')
        asmLines = asmFile.split('\n')
        replaces = []

        rodata = ''
        lateRodata = ''

        for index, ref in enumerate(refs):
            if ref[0] == 'asciz':
                rodata += 'glabel ' + ref[1] + '\n'
                rodata += '.' + ref[0] + ' ' + ref[2] + '\n'
                strEnd = int(ref[1][-1:], 16) + len(ref[2].replace('\\', '')) - 2 + 1
                if strEnd % 4 != 0:
                    rodata += '.ascii "' + ('\\0' * (4 - (strEnd % 4))) + '" # padding\n'
            elif ref[0] == 'double' or ref[0] == 'float':
                lateRodata += 'glabel ' + ref[1] + '\n'
                lateRodata += '.' + ref[0] + ' ' + ref[2] + '\n'
            elif ref[0] == 'table':
                replaces.append((ref[1], 'jpt' + ref[1][1:]))
                tblWords = '.word L' + ref[2][0]
                for word in ref[2][1:]:
                    tblWords += ', L' + word
                doneWords = []
                for word in ref[2]:
                    if word in doneWords:
                        continue
                    for i in range(0, len(asmLines)):
                        line = asmLines[i]
                        if line.startswith('/* '):
                            lineParts = line.split(' ')
                            if lineParts[2] == word:
                                asmLines.insert(i, 'glabel L' + word)
                                break
                    doneWords.append(word)
                lateRodata += 'glabel ' + ref[1] + '\n'
                lateRodata += tblWords + '\n'
            if index < len(refs) - 1 and ref[0] != 'double' and refs[index + 1][0] == 'double':
                addPadding = False
                if ref[0] == 'float':
                    addPadding = ref[1].endswith('0') or ref[1].endswith('8')
                elif ref[0] == 'table':
                    tableSizeAlignment = len(ref[2]) % 4
                    if ref[1].endswith('0') or ref[1].endswith('8'):
                        addPadding = tableSizeAlignment == 1 or tableSizeAlignment == 3
                    else:
                        addPadding = tableSizeAlignment == 0 or tableSizeAlignment == 2
                if addPadding:
                    lateRodata += '.word 0 # Padding\n' # Insert padding before the next double.
        outData = ''
        if len(rodata) > 0:
            outData += '.rdata\n' + rodata + '\n'
        if len(lateRodata) > 0:
            outData += '.late_rodata\n' + lateRodata + '\n'
        asmLines.insert(0, outData + '.text');
        asmOut = '\n'.join(asmLines)
        for rep in replaces:
            asmOut = asmOut.replace(rep[0], rep[1])
        # print(asmOut)
        if not NO_OUTPUT:
            with open(asmFilepath, 'w') as outFile:
                outFile.write(asmOut)
    #print(asmFilepath)
    #print(refs)

def convertFile(cFilepath):
    filename = cFilepath[cFilepath.rindex('/') + 1:-2]
    print(filename)
    with open(cFilepath, 'r') as inFile:
        cFile = inFile.read()
        match = getMatches(cFile, C_RODATA_REGEX)
        if len(match) == 0:
            print('No .rodata in file "' + cFilepath + '"')
            return
        rodataPart = match[0][0]
        rodataSection = match[0][2]
        rodataLines = rodataPart.split('\n')
        tableBottom = -1
        hasProcessed = False
        stoppedPremature = False
        curLine = len(rodataLines) - 1
        bottomLine = curLine
        asmData = []
        for line in reversed(rodataLines):
            lineType = getLineType(line)
            if lineType == 0:
                pass
            elif lineType == 1: # Literal string
                try:
                    strMatch = getMatches(line, C_STRING_REGEX)[0]
                    strLabel = strMatch[1][0]
                    strValue = strMatch[1][1]
                    # Check if the string has a reference somewhere
                    getAsmFileReferenceForLabel(filename, strLabel)
                    hasProcessed = True
                    bottomLine = curLine
                    asmData.insert(0, ('asciz', strLabel, strValue, bottomLine))
                except:
                    print('Currently cannot process literal string: ')
                    print(line)
                    stoppedPremature = True
                    break
            elif lineType == 2: # Literal float
                floatMatch = getMatches(line, C_FLOAT_REGEX)[0]
                floatLabel = floatMatch[1][0]
                floatValue = floatMatch[1][1]
                if floatValue == '0.0':
                    print('File boundary hit!')
                    stoppedPremature = True
                    break
                hasProcessed = True
                bottomLine = curLine
                asmData.insert(0, ('float', floatLabel, floatValue, bottomLine))
            elif lineType == 3: # Literal double
                floatMatch = getMatches(line, C_FLOAT_REGEX)[0]
                floatLabel = floatMatch[1][0]
                floatValue = floatMatch[1][1]
                if floatValue == '0.0':
                    print('File boundary hit!')
                    stoppedPremature = True
                    break
                hasProcessed = True
                bottomLine = curLine
                asmData.insert(0, ('double', floatLabel, floatValue, bottomLine))
            elif lineType == 4: # Jump table bottom
                tableBottom = curLine
            elif lineType == 5: # Jump table top
                tblMatch = getMatches('\n'.join(rodataLines[curLine:tableBottom+1]), C_JMPTABLE_REGEX)
                tblLabel = tblMatch[0][0]
                tblValues = []
                for val in range(1, len(tblMatch)):
                    tblValues.append(tblMatch[val][0][2:])
                hasProcessed = True
                bottomLine = curLine
                asmData.insert(0, ('table', tblLabel, tblValues, bottomLine))
                tableBottom = -1
            else:
                print(line)
            curLine -= 1
        asmRefs = {}
        asmBottomLine = 999999
        for i in reversed(range(0, len(asmData))):
            data = asmData[i]
            try:
                ref = getAsmFileReferenceForLabel(filename, data[1])
                if ref not in asmRefs:
                    asmRefs[ref] = []
                asmRefs[ref].append(data)
                asmBottomLine = min(asmBottomLine, data[3])
            except Exception as e:
                # raise e
                print(e)
                if asmBottomLine != 999999:
                    print(bottomLine, asmBottomLine)
                    bottomLine = asmBottomLine
                stoppedPremature = True
                break
        for asmFilepath in asmRefs:
            print('Processing: ' + asmFilepath)
            asmRefs[asmFilepath].reverse()
            asmBottomLine = 999999
            adjustAsmFile(asmFilepath, asmRefs[asmFilepath])
            for ref in asmRefs[asmFilepath]:
                asmBottomLine = min(asmBottomLine, ref[3])
            if ONLY_THE_FIRST_ASM_FILE:
                print(bottomLine, asmBottomLine)
                bottomLine = asmBottomLine
                stoppedPremature = True
                break
            # print(asmRefs)
        if hasProcessed and not NO_OUTPUT:
            newRodata = '\n'.join(rodataLines[0:bottomLine] + rodataLines[-2:])
            newFileText = cFile[:rodataSection[0]] + (newRodata if stoppedPremature else '') + cFile[rodataSection[1]:]
            with open(cFilepath, 'w') as outFile:
                outFile.write(newFileText)
                print('Changes were made to ' + cFilepath)
        else:
            print('No changes were made to ' + cFilepath)


parser = argparse.ArgumentParser(description="")
parser.add_argument("file", help=".c file that contains rodata to move.")
parser.add_argument("-s", "--single", help="Only do one ASM function in the file", action='store_true')
args = parser.parse_args()

if args.single is not None and args.single:
    ONLY_THE_FIRST_ASM_FILE = True

if args.file.endswith('.c'):
    convertFile(args.file)
else:
    raise Exception('The file must be a .c file.')

# files = FileUtil.get_filenames_from_directory('src', ('.c',))
#