// Copyright (c) 2012- PPSSPP Project. // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, version 2.0 or later versions. // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License 2.0 for more details. // A copy of the GPL 2.0 should have been included with the program. // If not, see http://www.gnu.org/licenses/ // Official git repository and contact information can be found at // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. #include "ppsspp_config.h" #include #include #include "ext/xxhash.h" #include "Common/Profiler/Profiler.h" #include "Common/Log.h" #include "Common/Serialize/Serializer.h" #include "Common/StringUtils.h" #include "Core/Config.h" #include "Core/Core.h" #include "Core/CoreTiming.h" #include "Core/HLE/sceKernelMemory.h" #include "Core/MemMap.h" #include "Core/MIPS/MIPS.h" #include "Core/MIPS/MIPSCodeUtils.h" #include "Core/MIPS/MIPSInt.h" #include "Core/MIPS/MIPSTables.h" #include "Core/MIPS/IR/IRRegCache.h" #include "Core/MIPS/IR/IRInterpreter.h" #include "Core/MIPS/IR/IRJit.h" #include "Core/MIPS/IR/IRNativeCommon.h" #include "Core/MIPS/JitCommon/JitCommon.h" #include "Core/Reporting.h" #include "Common/TimeUtil.h" #include "Core/MIPS/MIPSTracer.h" namespace MIPSComp { IRJit::IRJit(MIPSState *mipsState, bool actualJit) : frontend_(mipsState->HasDefaultPrefix()), mips_(mipsState), blocks_(actualJit) { // u32 size = 128 * 1024; InitIR(); compileToNative_ = actualJit; // If this IRJit instance will be used to drive a "JIT using IR", don't optimize for interpretation. jo.optimizeForInterpreter = !actualJit; IROptions opts{}; opts.disableFlags = g_Config.uJitDisableFlags; #if PPSSPP_ARCH(RISCV64) // Assume RISC-V always has very slow unaligned memory accesses. opts.unalignedLoadStore = false; opts.unalignedLoadStoreVec4 = true; opts.preferVec4 = cpu_info.RiscV_V; #elif PPSSPP_ARCH(ARM) || PPSSPP_ARCH(ARM64) opts.unalignedLoadStore = (opts.disableFlags & (uint32_t)JitDisable::LSU_UNALIGNED) == 0; opts.unalignedLoadStoreVec4 = true; opts.preferVec4 = true; #else opts.unalignedLoadStore = (opts.disableFlags & (uint32_t)JitDisable::LSU_UNALIGNED) == 0; // TODO: Could allow on x86 pretty easily... opts.unalignedLoadStoreVec4 = false; opts.preferVec4 = true; #endif opts.optimizeForInterpreter = jo.optimizeForInterpreter; frontend_.SetOptions(opts); } IRJit::~IRJit() { } void IRJit::DoState(PointerWrap &p) { frontend_.DoState(p); } void IRJit::UpdateFCR31() { } void IRJit::ClearCache() { INFO_LOG(Log::JIT, "IRJit: Clearing the block cache!"); blocks_.Clear(); } void IRJit::InvalidateCacheAt(u32 em_address, int length) { std::vector numbers = blocks_.FindInvalidatedBlockNumbers(em_address, length); if (numbers.empty()) { return; } DEBUG_LOG(Log::JIT, "Invalidating IR block cache at %08x (%d bytes): %d blocks", em_address, length, (int)numbers.size()); for (int block_num : numbers) { auto block = blocks_.GetBlock(block_num); // TODO: We are invalidating a lot of blocks that are already invalid (yu gi oh). // INFO_LOG(Log::JIT, "Block at %08x invalidated: valid: %d", block->GetOriginalStart(), block->IsValid()); // If we're a native JIT (IR->JIT, not just IR interpreter), we write native offsets into the blocks. int cookie = compileToNative_ ? block->GetNativeOffset() : block->GetIRArenaOffset(); blocks_.RemoveBlockFromPageLookup(block_num); block->Destroy(cookie); } } void IRJit::Compile(u32 em_address) { _dbg_assert_(compilerEnabled_); PROFILE_THIS_SCOPE("jitc"); if (g_Config.bPreloadFunctions) { // Look to see if we've preloaded this block. int block_num = blocks_.FindPreloadBlock(em_address); if (block_num != -1) { IRBlock *block = blocks_.GetBlock(block_num); // Okay, let's link and finalize the block now. int cookie = compileToNative_ ? block->GetNativeOffset() : block->GetIRArenaOffset(); block->Finalize(cookie); if (block->IsValid()) { // Success, we're done. FinalizeNativeBlock(&blocks_, block_num); return; } } } std::vector instructions; u32 mipsBytes; if (!CompileBlock(em_address, instructions, mipsBytes, false)) { // Ran out of block numbers - need to reset. ERROR_LOG(Log::JIT, "Ran out of block numbers, clearing cache"); ClearCache(); CompileBlock(em_address, instructions, mipsBytes, false); } if (frontend_.CheckRounding(em_address)) { // Our assumptions are all wrong so it's clean-slate time. ClearCache(); CompileBlock(em_address, instructions, mipsBytes, false); } } // WARNING! This can be called from IRInterpret / the JIT, through the function preload stuff! bool IRJit::CompileBlock(u32 em_address, std::vector &instructions, u32 &mipsBytes, bool preload) { _dbg_assert_(compilerEnabled_); frontend_.DoJit(em_address, instructions, mipsBytes, preload); if (instructions.empty()) { _dbg_assert_(preload); // We return true when preloading so it doesn't abort. return preload; } int block_num = blocks_.AllocateBlock(em_address, mipsBytes, instructions); if ((block_num & ~MIPS_EMUHACK_VALUE_MASK) != 0) { WARN_LOG(Log::JIT, "Failed to allocate block for %08x (%d instructions)", em_address, (int)instructions.size()); // Out of block numbers. Caller will handle. return false; } IRBlock *b = blocks_.GetBlock(block_num); if (preload || mipsTracer.tracing_enabled) { // Hash, then only update page stats, don't link yet. // TODO: Should we always hash? Then we can reuse blocks. b->UpdateHash(); } if (!CompileNativeBlock(&blocks_, block_num, preload)) return false; if (mipsTracer.tracing_enabled) { mipsTracer.prepare_block(b, blocks_); } // Updates stats, also patches the first MIPS instruction into an emuhack if 'preload == false' blocks_.FinalizeBlock(block_num, preload); if (!preload) FinalizeNativeBlock(&blocks_, block_num); return true; } void IRJit::CompileFunction(u32 start_address, u32 length) { _dbg_assert_(compilerEnabled_); PROFILE_THIS_SCOPE("jitc"); // Note: we don't actually write emuhacks yet, so we can validate hashes. // This way, if the game changes the code afterward, we'll catch even without icache invalidation. // We may go up and down from branches, so track all block starts done here. std::set doneAddresses; std::vector pendingAddresses; pendingAddresses.reserve(16); pendingAddresses.push_back(start_address); while (!pendingAddresses.empty()) { u32 em_address = pendingAddresses.back(); pendingAddresses.pop_back(); // To be safe, also check if a real block is there. This can be a runtime module load. u32 inst = Memory::ReadUnchecked_U32(em_address); if (MIPS_IS_RUNBLOCK(inst) || doneAddresses.find(em_address) != doneAddresses.end()) { // Already compiled this address. continue; } std::vector instructions; u32 mipsBytes; if (!CompileBlock(em_address, instructions, mipsBytes, true)) { // Ran out of block numbers - let's hope there's no more code it needs to run. // Will flush when actually compiling. ERROR_LOG(Log::JIT, "Ran out of block numbers while compiling function"); return; } doneAddresses.insert(em_address); for (const IRInst &inst : instructions) { u32 exit = 0; switch (inst.op) { case IROp::ExitToConst: case IROp::ExitToConstIfEq: case IROp::ExitToConstIfNeq: case IROp::ExitToConstIfGtZ: case IROp::ExitToConstIfGeZ: case IROp::ExitToConstIfLtZ: case IROp::ExitToConstIfLeZ: case IROp::ExitToConstIfFpTrue: case IROp::ExitToConstIfFpFalse: exit = inst.constant; break; case IROp::ExitToPC: case IROp::Break: // Don't add any, we'll do block end anyway (for jal, etc.) exit = 0; break; default: exit = 0; break; } // Only follow jumps internal to the function. if (exit != 0 && exit >= start_address && exit < start_address + length) { // Even if it's a duplicate, we check at loop start. pendingAddresses.push_back(exit); } } // Also include after the block for jal returns. if (em_address + mipsBytes < start_address + length) { pendingAddresses.push_back(em_address + mipsBytes); } } } void IRJit::RunLoopUntil(u64 globalticks) { PROFILE_THIS_SCOPE("jit"); // ApplyRoundingMode(true); // IR Dispatcher while (true) { // RestoreRoundingMode(true); CoreTiming::Advance(); // ApplyRoundingMode(true); if (coreState != 0) { break; } MIPSState *mips = mips_; #ifdef _DEBUG compilerEnabled_ = false; #endif while (mips->downcount >= 0) { u32 inst = Memory::ReadUnchecked_U32(mips->pc); u32 opcode = inst & 0xFF000000; if (opcode == MIPS_EMUHACK_OPCODE) { u32 offset = inst & 0x00FFFFFF; // Alternatively, inst - opcode const IRInst *instPtr = blocks_.GetArenaPtr() + offset; // First op is always, except when using breakpoints, downcount, to save one dispatch inside IRInterpret. // This branch is very cpu-branch-predictor-friendly so this still beats the dispatch. if (instPtr->op == IROp::Downcount) { mips->downcount -= instPtr->constant; instPtr++; } #ifdef IR_PROFILING IRBlock *block = blocks_.GetBlock(blocks_.GetBlockNumFromOffset(offset)); Instant start = Instant::Now(); mips->pc = IRInterpret(mips, instPtr); int64_t elapsedNanos = start.ElapsedNanos(); block->profileStats_.executions += 1; block->profileStats_.totalNanos += elapsedNanos; #else mips->pc = IRInterpret(mips, instPtr); #endif // Note: this will "jump to zero" on a badly constructed block missing exits. if (!Memory::IsValid4AlignedAddress(mips->pc)) { int blockNum = blocks_.GetBlockNumFromIRArenaOffset(offset); IRBlock *block = blocks_.GetBlockUnchecked(blockNum); Core_ExecException(mips->pc, block->GetOriginalStart(), ExecExceptionType::JUMP); break; } } else { // RestoreRoundingMode(true); #ifdef _DEBUG compilerEnabled_ = true; #endif Compile(mips->pc); #ifdef _DEBUG compilerEnabled_ = false; #endif // ApplyRoundingMode(true); } } #ifdef _DEBUG compilerEnabled_ = true; #endif } // RestoreRoundingMode(true); } bool IRJit::DescribeCodePtr(const u8 *ptr, std::string &name) { // Used in native disassembly viewer. return false; } void IRJit::LinkBlock(u8 *exitPoint, const u8 *checkedEntry) { Crash(); } void IRJit::UnlinkBlock(u8 *checkedEntry, u32 originalAddress) { Crash(); } void IRBlockCache::Clear() { for (int i = 0; i < (int)blocks_.size(); ++i) { int cookie = compileToNative_ ? blocks_[i].GetNativeOffset() : blocks_[i].GetIRArenaOffset(); blocks_[i].Destroy(cookie); } blocks_.clear(); byPage_.clear(); arena_.clear(); arena_.shrink_to_fit(); } IRBlockCache::IRBlockCache(bool compileToNative) : compileToNative_(compileToNative) {} int IRBlockCache::AllocateBlock(int emAddr, u32 origSize, const std::vector &insts) { // We have 24 bits to represent offsets with. const u32 MAX_ARENA_SIZE = 0x1000000 - 1; int offset = (int)arena_.size(); if (offset >= MAX_ARENA_SIZE) { WARN_LOG(Log::JIT, "Filled JIT arena, restarting"); return -1; } // TODO: Use memcpy. for (int i = 0; i < insts.size(); i++) { arena_.push_back(insts[i]); } int newBlockIndex = (int)blocks_.size(); blocks_.push_back(IRBlock(emAddr, origSize, offset, (u32)insts.size())); return newBlockIndex; } int IRBlockCache::GetBlockNumFromIRArenaOffset(int offset) const { // Block offsets are always in rising order (we don't go back and replace them when invalidated). So we can binary search. int low = 0; int high = (int)blocks_.size() - 1; int found = -1; while (low <= high) { int mid = low + (high - low) / 2; const int blockOffset = blocks_[mid].GetIRArenaOffset(); if (blockOffset == offset) { found = mid; break; } if (blockOffset < offset) { low = mid + 1; } else { high = mid - 1; } } #ifndef _DEBUG // Then, in debug builds, cross check the result. return found; #else // TODO: Optimize if we need to call this often. for (int i = 0; i < (int)blocks_.size(); i++) { if (blocks_[i].GetIRArenaOffset() == offset) { _dbg_assert_(i == found); return i; } } #endif _dbg_assert_(found == -1); return -1; } std::vector IRBlockCache::FindInvalidatedBlockNumbers(u32 address, u32 lengthInBytes) { u32 startPage = AddressToPage(address); u32 endPage = AddressToPage(address + lengthInBytes); std::vector found; for (u32 page = startPage; page <= endPage; ++page) { const auto iter = byPage_.find(page); if (iter == byPage_.end()) continue; const std::vector &blocksInPage = iter->second; for (int i : blocksInPage) { if (blocks_[i].OverlapsRange(address, lengthInBytes)) { // We now try to remove these during invalidation. found.push_back(i); } } } return found; } void IRBlockCache::FinalizeBlock(int blockIndex, bool preload) { // TODO: What's different about preload blocks? IRBlock &block = blocks_[blockIndex]; if (!preload) { int cookie = compileToNative_ ? block.GetNativeOffset() : block.GetIRArenaOffset(); block.Finalize(cookie); } u32 startAddr, size; block.GetRange(&startAddr, &size); u32 startPage = AddressToPage(startAddr); u32 endPage = AddressToPage(startAddr + size); for (u32 page = startPage; page <= endPage; ++page) { byPage_[page].push_back(blockIndex); } } // Call after Destroy-ing it. void IRBlockCache::RemoveBlockFromPageLookup(int blockIndex) { // We need to remove the block from the byPage lookup. IRBlock &block = blocks_[blockIndex]; u32 startAddr, size; block.GetRange(&startAddr, &size); u32 startPage = AddressToPage(startAddr); u32 endPage = AddressToPage(startAddr + size); for (u32 page = startPage; page <= endPage; ++page) { auto iter = std::find(byPage_[page].begin(), byPage_[page].end(), blockIndex); if (iter != byPage_[page].end()) { byPage_[page].erase(iter); } else if (block.IsValid()) { // If it was previously invalidated, we don't care, hence the above check. WARN_LOG(Log::JIT, "RemoveBlock: Block at %08x was not found where expected in byPage table.", startAddr); } } // Additionally, we'd like to zap the block in the IR arena. // However, this breaks if calling sceKernelIcacheClearAll(), since as soon as we return, we'll be executing garbage. /* IRInst bad{ IROp::Bad }; for (int off = block.GetIRArenaOffset(); off < (int)(block.GetIRArenaOffset() + block.GetNumIRInstructions()); off++) { arena_[off] = bad; } */ } u32 IRBlockCache::AddressToPage(u32 addr) const { // Use relatively small pages since basic blocks are typically small. return (addr & 0x3FFFFFFF) >> 10; } int IRBlockCache::FindPreloadBlock(u32 em_address) { u32 page = AddressToPage(em_address); auto iter = byPage_.find(page); if (iter == byPage_.end()) return -1; const std::vector &blocksInPage = iter->second; for (int i : blocksInPage) { if (blocks_[i].GetOriginalStart() == em_address) { if (blocks_[i].HashMatches()) { return i; } } } return -1; } int IRBlockCache::FindByCookie(int cookie) { if (blocks_.empty()) return -1; // TODO: Maybe a flag to determine native offset mode? if (!compileToNative_) { return GetBlockNumFromIRArenaOffset(cookie); } // TODO: This could also use a binary search. for (int i = 0; i < GetNumBlocks(); ++i) { int offset = blocks_[i].GetNativeOffset(); if (offset == cookie) return i; } return -1; } std::vector IRBlockCache::SaveAndClearEmuHackOps() { std::vector result; result.resize(blocks_.size()); for (int number = 0; number < (int)blocks_.size(); ++number) { IRBlock &b = blocks_[number]; int cookie = compileToNative_ ? b.GetNativeOffset() : b.GetIRArenaOffset(); if (b.IsValid() && b.RestoreOriginalFirstOp(cookie)) { result[number] = number; } else { result[number] = 0; } } return result; } void IRBlockCache::RestoreSavedEmuHackOps(const std::vector &saved) { if ((int)blocks_.size() != (int)saved.size()) { ERROR_LOG(Log::JIT, "RestoreSavedEmuHackOps: Wrong saved block size."); return; } for (int number = 0; number < (int)blocks_.size(); ++number) { IRBlock &b = blocks_[number]; // Only if we restored it, write it back. if (b.IsValid() && saved[number] != 0 && b.HasOriginalFirstOp()) { int cookie = compileToNative_ ? b.GetNativeOffset() : b.GetIRArenaOffset(); b.Finalize(cookie); } } } JitBlockDebugInfo IRBlockCache::GetBlockDebugInfo(int blockNum) const { const IRBlock &ir = blocks_[blockNum]; JitBlockDebugInfo debugInfo{}; uint32_t start, size; ir.GetRange(&start, &size); debugInfo.originalAddress = start; // TODO debugInfo.origDisasm.reserve(((start + size) - start) / 4); for (u32 addr = start; addr < start + size; addr += 4) { char temp[256]; MIPSDisAsm(Memory::Read_Instruction(addr), addr, temp, sizeof(temp), true); std::string mipsDis = temp; debugInfo.origDisasm.push_back(mipsDis); } debugInfo.irDisasm.reserve(ir.GetNumIRInstructions()); const IRInst *instructions = GetBlockInstructionPtr(ir); for (int i = 0; i < ir.GetNumIRInstructions(); i++) { IRInst inst = instructions[i]; char buffer[256]; DisassembleIR(buffer, sizeof(buffer), inst); debugInfo.irDisasm.push_back(buffer); } return debugInfo; } void IRBlockCache::ComputeStats(BlockCacheStats &bcStats) const { double totalBloat = 0.0; double maxBloat = 0.0; double minBloat = 1000000000.0; for (const auto &b : blocks_) { double codeSize = (double)b.GetNumIRInstructions() * 4; // We count bloat in instructions, not bytes. sizeof(IRInst); if (codeSize == 0) continue; u32 origAddr, mipsBytes; b.GetRange(&origAddr, &mipsBytes); double origSize = (double)mipsBytes; double bloat = codeSize / origSize; if (bloat < minBloat) { minBloat = bloat; bcStats.minBloatBlock = origAddr; } if (bloat > maxBloat) { maxBloat = bloat; bcStats.maxBloatBlock = origAddr; } totalBloat += bloat; } bcStats.numBlocks = (int)blocks_.size(); bcStats.minBloat = minBloat; bcStats.maxBloat = maxBloat; bcStats.avgBloat = totalBloat / (double)blocks_.size(); } int IRBlockCache::GetBlockNumberFromStartAddress(u32 em_address, bool realBlocksOnly) const { u32 page = AddressToPage(em_address); const auto iter = byPage_.find(page); if (iter == byPage_.end()) return -1; const std::vector &blocksInPage = iter->second; int best = -1; for (int i : blocksInPage) { if (blocks_[i].GetOriginalStart() == em_address) { best = i; if (blocks_[i].IsValid()) { return i; } } } return best; } bool IRBlock::HasOriginalFirstOp() const { return Memory::ReadUnchecked_U32(origAddr_) == origFirstOpcode_.encoding; } bool IRBlock::RestoreOriginalFirstOp(int cookie) { const u32 emuhack = MIPS_EMUHACK_OPCODE | cookie; if (Memory::ReadUnchecked_U32(origAddr_) == emuhack) { Memory::Write_Opcode_JIT(origAddr_, origFirstOpcode_); return true; } return false; } void IRBlock::Finalize(int cookie) { // Check it wasn't invalidated, in case this is after preload. // TODO: Allow reusing blocks when the code matches hash_ again, instead. if (origAddr_) { origFirstOpcode_ = Memory::Read_Opcode_JIT(origAddr_); MIPSOpcode opcode = MIPSOpcode(MIPS_EMUHACK_OPCODE | cookie); Memory::Write_Opcode_JIT(origAddr_, opcode); } else { WARN_LOG(Log::JIT, "Finalizing invalid block (cookie: %d)", cookie); } } void IRBlock::Destroy(int cookie) { if (origAddr_) { MIPSOpcode opcode = MIPSOpcode(MIPS_EMUHACK_OPCODE | cookie); u32 memOp = Memory::ReadUnchecked_U32(origAddr_); if (memOp == opcode.encoding) { Memory::Write_Opcode_JIT(origAddr_, origFirstOpcode_); } else { // NOTE: This is not an error. Just interesting to log. DEBUG_LOG(Log::JIT, "IRBlock::Destroy: Note: Block at %08x was overwritten - checked for %08x, got %08x when restoring the MIPS op to %08x", origAddr_, opcode.encoding, memOp, origFirstOpcode_.encoding); } // TODO: Also wipe the block in the IR opcode arena. // Let's mark this invalid so we don't try to clear it again. origAddr_ = 0; } } u64 IRBlock::CalculateHash() const { if (origAddr_) { // This is unfortunate. In case there are emuhacks, we have to make a copy. // If we could hash while reading we could avoid this. std::vector buffer; buffer.resize(origSize_ / 4); size_t pos = 0; for (u32 off = 0; off < origSize_; off += 4) { // Let's actually hash the replacement, if any. MIPSOpcode instr = Memory::ReadUnchecked_Instruction(origAddr_ + off, false); buffer[pos++] = instr.encoding; } return XXH3_64bits(&buffer[0], origSize_); } return 0; } bool IRBlock::OverlapsRange(u32 addr, u32 size) const { addr &= 0x3FFFFFFF; u32 origAddr = origAddr_ & 0x3FFFFFFF; return addr + size > origAddr && addr < origAddr + origSize_; } MIPSOpcode IRJit::GetOriginalOp(MIPSOpcode op) { IRBlock *b = blocks_.GetBlock(blocks_.FindByCookie(op.encoding & 0xFFFFFF)); if (b) { return b->GetOriginalFirstOp(); } return op; } } // namespace MIPSComp