Files

308 lines
10 KiB
Python
Raw Permalink Normal View History

import logging
2021-09-16 15:31:15 -04:00
import subprocess
2024-01-16 22:42:43 -07:00
import shlex
2022-07-03 10:43:59 -04:00
from pathlib import Path
from typing import List, Dict, Any
from functools import lru_cache
import diff as asm_differ
2022-03-13 13:33:19 -04:00
from coreapp.platforms import DUMMY, Platform
2022-11-14 03:33:08 +00:00
from coreapp.flags import ASMDIFF_FLAG_PREFIX
from django.conf import settings
2022-03-13 13:33:19 -04:00
from .compiler_wrapper import DiffResult, PATH
2022-02-17 18:47:04 +00:00
from .error import AssemblyError, DiffError, NmError, ObjdumpError
from .models.scratch import Assembly
from .sandbox import Sandbox
logger = logging.getLogger(__name__)
MAX_FUNC_SIZE_LINES = 25000
2021-08-01 02:42:11 +09:00
class DiffWrapper:
@staticmethod
def filter_objdump_flags(compiler_flags: str) -> str:
# Remove irrelevant flags that are part of the base objdump configs, but clutter the compiler settings field.
# TODO: use cfg for this?
skip_flags_with_args: set[str] = set()
skip_flags = {
"--disassemble",
"--disassemble-zeroes",
"--line-numbers",
"--reloc",
}
skip_next = False
flags = []
for flag in compiler_flags.split():
if skip_next:
skip_next = False
continue
if flag in skip_flags:
continue
if flag in skip_flags_with_args:
skip_next = True
continue
if any(flag.startswith(f) for f in skip_flags_with_args):
continue
flags.append(flag)
return " ".join(flags)
2021-08-01 03:35:42 +09:00
@staticmethod
2022-11-14 03:33:08 +00:00
def create_config(
arch: asm_differ.ArchSettings, diff_flags: List[str]
) -> asm_differ.Config:
show_rodata_refs = "-DIFFno_show_rodata_refs" not in diff_flags
algorithm = "difflib" if "-DIFFdifflib" in diff_flags else "levenshtein"
diff_function_symbols = "-DIFFdiff_function_symbols" in diff_flags
2022-11-14 03:33:08 +00:00
2021-09-18 12:43:02 -04:00
return asm_differ.Config(
arch=arch,
2021-08-01 03:35:42 +09:00
# Build/objdump options
diff_obj=True,
2023-09-12 06:40:08 +09:00
file="",
2021-08-01 03:35:42 +09:00
make=False,
source_old_binutils=True,
diff_section=".text",
2021-08-01 03:35:42 +09:00
inlines=False,
max_function_size_lines=MAX_FUNC_SIZE_LINES,
max_function_size_bytes=MAX_FUNC_SIZE_LINES * 4,
# Display options
formatter=asm_differ.PythonFormatter(arch_str=arch.name),
2022-12-19 09:42:11 -05:00
diff_mode=asm_differ.DiffMode.NORMAL,
2021-08-01 03:35:42 +09:00
base_shift=0,
skip_lines=0,
2021-08-27 01:39:06 +09:00
compress=None,
2021-08-01 03:35:42 +09:00
show_branches=True,
show_line_numbers=False,
show_source=False,
2022-09-03 03:29:21 +01:00
stop_at_ret=False,
2021-08-01 03:35:42 +09:00
ignore_large_imms=False,
ignore_addr_diffs=True,
algorithm=algorithm,
2022-09-03 03:29:21 +01:00
reg_categories={},
2022-11-14 03:33:08 +00:00
show_rodata_refs=show_rodata_refs,
diff_function_symbols=diff_function_symbols,
2021-08-01 03:35:42 +09:00
)
@staticmethod
2022-02-20 09:21:38 -05:00
def get_objdump_target_function_flags(
2022-07-03 10:43:59 -04:00
sandbox: Sandbox, target_path: Path, platform: Platform, label: str
2022-02-20 09:21:38 -05:00
) -> List[str]:
if not label:
return ["--start-address=0"]
2022-03-13 13:33:19 -04:00
if platform.supports_objdump_disassemble:
return [f"--disassemble={label}"]
2022-03-13 13:33:19 -04:00
if not platform.nm_cmd:
raise NmError(f"No nm command for {platform.id}")
try:
nm_proc = sandbox.run_subprocess(
2022-03-13 13:33:19 -04:00
[platform.nm_cmd] + [sandbox.rewrite_path(target_path)],
shell=True,
env={
"PATH": PATH,
2023-07-03 13:51:58 +01:00
"COMPILER_BASE_PATH": sandbox.rewrite_path(
settings.COMPILER_BASE_PATH
),
},
timeout=settings.OBJDUMP_TIMEOUT_SECONDS,
)
except subprocess.TimeoutExpired:
raise NmError("Timeout expired")
except subprocess.CalledProcessError as e:
raise NmError.from_process_error(e)
if nm_proc.stdout:
# e.g.
# 00000000 T osEepromRead
# U osMemSize
for line in nm_proc.stdout.splitlines():
nm_line = line.split()
if len(nm_line) == 3 and label == nm_line[2]:
start_addr = int(nm_line[0], 16)
return [f"--start-address={start_addr}"]
return ["--start-address=0"]
@staticmethod
2022-07-03 10:43:59 -04:00
def parse_objdump_flags(diff_flags: List[str]) -> List[str]:
known_objdump_flags = ["-Mno-aliases", "--reloc"]
known_objdump_flag_prefixes = ["-Mreg-names=", "--disassemble="]
ret = []
for flag in diff_flags:
if flag in known_objdump_flags or flag.startswith(
tuple(known_objdump_flag_prefixes)
):
ret.append(flag)
return ret
@lru_cache()
2021-08-01 03:35:42 +09:00
@staticmethod
2022-02-20 09:21:38 -05:00
def run_objdump(
target_data: bytes,
2022-03-13 13:33:19 -04:00
platform: Platform,
arch_flags: tuple[str, ...],
label: str,
objdump_flags: tuple[str, ...],
2022-02-20 09:21:38 -05:00
) -> str:
flags = [
flag for flag in objdump_flags if not flag.startswith(ASMDIFF_FLAG_PREFIX)
]
flags += [
"--disassemble-zeroes",
"--line-numbers",
]
2021-08-01 03:35:42 +09:00
# --reloc can cause issues with DOS disasm?
if platform.id != "msdos":
flags += ["--reloc"]
2021-08-11 13:37:24 -04:00
with Sandbox() as sandbox:
2021-08-10 22:44:39 -04:00
target_path = sandbox.path / "out.s"
target_path.write_bytes(target_data)
# If the flags contain `--disassemble=[symbol]`,
# use that instead of `--start-address`.
has_symbol = False
for flag in flags:
if flag.startswith("--disassemble="):
has_symbol = True
if not has_symbol:
flags.append("--disassemble")
flags += DiffWrapper.get_objdump_target_function_flags(
sandbox, target_path, platform, label
)
2021-10-11 16:59:33 +01:00
flags += arch_flags
2022-03-13 13:33:19 -04:00
if platform.objdump_cmd:
2021-10-11 16:59:33 +01:00
try:
objdump_proc = sandbox.run_subprocess(
platform.objdump_cmd.split()
2024-01-16 22:42:43 -07:00
+ list(map(shlex.quote, flags))
2022-02-20 09:21:38 -05:00
+ [sandbox.rewrite_path(target_path)],
2021-10-11 16:59:33 +01:00
shell=True,
env={
"PATH": PATH,
2023-07-03 13:51:58 +01:00
"COMPILER_BASE_PATH": sandbox.rewrite_path(
settings.COMPILER_BASE_PATH
),
2021-10-11 16:59:33 +01:00
},
timeout=settings.OBJDUMP_TIMEOUT_SECONDS,
2021-10-11 16:59:33 +01:00
)
except subprocess.TimeoutExpired:
raise ObjdumpError("Timeout expired")
2021-10-11 16:59:33 +01:00
except subprocess.CalledProcessError as e:
raise ObjdumpError.from_process_error(e)
2021-10-11 16:59:33 +01:00
else:
2022-03-13 13:33:19 -04:00
raise ObjdumpError(f"No objdump command for {platform.id}")
2021-08-01 03:35:42 +09:00
2021-08-10 22:44:39 -04:00
out = objdump_proc.stdout
2021-08-01 03:35:42 +09:00
return out
2021-08-26 14:49:33 -04:00
@staticmethod
2022-02-20 09:21:38 -05:00
def get_dump(
elf_object: bytes,
2022-03-13 13:33:19 -04:00
platform: Platform,
diff_label: str,
2022-02-20 09:21:38 -05:00
config: asm_differ.Config,
2022-07-03 10:43:59 -04:00
diff_flags: List[str],
2022-02-20 09:21:38 -05:00
) -> str:
if len(elf_object) == 0:
raise AssemblyError("Asm empty")
basedump = DiffWrapper.run_objdump(
elf_object,
platform,
tuple(config.arch.arch_flags),
diff_label,
tuple(diff_flags),
2022-02-20 09:21:38 -05:00
)
if not basedump:
raise ObjdumpError("Error running objdump")
# Preprocess the dump
try:
2022-02-20 09:21:38 -05:00
basedump = asm_differ.preprocess_objdump_out(
None, elf_object, basedump, config
)
except AssertionError as e:
logger.exception("Error preprocessing dump: %s", e)
raise DiffError(f"Error preprocessing dump: {e}")
except Exception as e:
logger.exception("Error preprocessing dump: %s", e)
raise DiffError(f"Error preprocessing dump: {e}")
return basedump
@staticmethod
def run_diff(
base_lines: list[str], my_lines: list[str], config: Any
) -> Dict[str, Any]:
diff_output = asm_differ.do_diff(base_lines, my_lines, config)
table_data = asm_differ.align_diffs(diff_output, diff_output, config)
return config.formatter.raw(table_data)
@staticmethod
2022-02-20 09:21:38 -05:00
def diff(
target_assembly: Assembly,
2022-03-13 13:33:19 -04:00
platform: Platform,
diff_label: str,
2022-02-20 09:21:38 -05:00
compiled_elf: bytes,
2022-07-03 10:43:59 -04:00
diff_flags: List[str],
2022-02-20 09:21:38 -05:00
) -> DiffResult:
2022-03-13 13:33:19 -04:00
if platform == DUMMY:
# Todo produce diff for dummy
return DiffResult({"rows": ["a", "b"]})
2021-09-18 12:43:02 -04:00
try:
2022-03-13 13:33:19 -04:00
arch = asm_differ.get_arch(platform.arch or "")
2021-09-18 12:43:02 -04:00
except ValueError:
2022-03-13 13:33:19 -04:00
logger.error(f"Unsupported arch: {platform.arch}. Continuing assuming mips")
2021-09-18 12:43:02 -04:00
arch = asm_differ.get_arch("mips")
objdump_flags = DiffWrapper.parse_objdump_flags(diff_flags)
2022-11-14 03:33:08 +00:00
config = DiffWrapper.create_config(arch, diff_flags)
try:
basedump = DiffWrapper.get_dump(
bytes(target_assembly.elf_object),
platform,
diff_label,
config,
objdump_flags,
)
except Exception as e:
logger.exception("Error dumping target assembly: %s", e)
raise DiffError(f"Error dumping target assembly: {e}")
try:
mydump = DiffWrapper.get_dump(
compiled_elf, platform, diff_label, config, objdump_flags
2022-02-20 09:21:38 -05:00
)
except Exception:
2022-12-14 09:40:41 -05:00
mydump = ""
2021-08-02 22:45:57 +09:00
2021-10-05 20:51:30 +09:00
try:
base_lines = asm_differ.process(basedump, config)
my_lines = asm_differ.process(mydump, config)
result = DiffWrapper.run_diff(base_lines, my_lines, config)
diff_result = DiffResult(result)
if any(x.startswith("--disassemble=") for x in objdump_flags):
if len(base_lines) and len(my_lines) == 0:
diff_result.errors = (
"Warning: No diff rows. Is your function signature correct?"
)
except Exception as e:
logger.exception("Error running asm-differ: %s", e)
raise DiffError(f"Error running asm-differ: {e}")
2021-08-01 03:35:42 +09:00
return diff_result