#include "ArmEmitterTest.h" #include "Common/ArmEmitter.h" #include "Common/CPUDetect.h" static bool functionWasCalled; using namespace ArmGen; class TestCode : public ArmGen::ARMXCodeBlock { public: TestCode(); void Generate(); const u8 *testCodePtr; const u8 *testCodePtr2; }; TestCode::TestCode() { AllocCodeSpace(0x10000); } static float abc[256] = {1.0f, 2.0f, 0.0f}; static float a[4] = {1.0f, 2.0f, 3.0f, 4.5f}; static float b[4] = {1.0f, 1.0f, 1.0f, 0.5f}; static float c[4] = {0.0f, 0.0f, 0.0f, 0.0f}; static u32 x[4] = {0x04030201, 0x08070605, 0x0, 0x0}; static u32 y[4] = {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF}; static u32 z[4] = {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF}; void TestCode::Generate() { testCodePtr = this->GetCodePtr(); // Sonic1 commented that R11 is the frame pointer in debug mode, whatever "debug mode" means. PUSH(2, R11, R_LR); // Load the three pointers /* MOVP2R(R0, a); MOVP2R(R1, b); MOVP2R(R2, c); // Load from two, do the operation, write to the third. VLD1(F_32, D0, R0, 2); // Load 2 doubles VLD1(F_32, D2, R1, 2); // Load another 2 doubles // VADD(F_32, Q2, Q0, Q1); // Add them, seeing them as floating point quads VMUL_scalar(F_32, Q2, Q0, DScalar(D3, 1)); // Multiply a quad by a scalar (ultra efficient for matrix mul! limitation: Scalar has to come out of D0-D15) ADD(R1, R1, 12); VLD1_all_lanes(F_32, Q2, R1, true); ADD(R0, R0, 12); VLD1_lane(F_32, D4, R0, 1, true); // VMUL(F_32, Q2, Q0, Q1); VST1(F_32, D4, R2, 2); */ // Let's try some integer stuff MOVP2R(R0, x); MOVP2R(R1, y); MOVP2R(R2, z); MOVP2R(R3, c); VLD1(I_32, D0, R0, 1); // Load 1 double VMOVL(I_8 | I_UNSIGNED, Q1, D0); VMOVL(I_16 | I_UNSIGNED, Q2, D2); VCVT(F_32 | I_SIGNED, Q3, Q2); VST1(I_32, D2, R1, 2); VST1(I_32, D4, R2, 2); VST1(I_32, D6, R3, 2); PLD(R1, 32); u32 word = *(u32 *)(GetCodePtr() - 4); INFO_LOG(Log::System, "Instruction Word: %08x", word); // This works! // c will later be logged. /* MOVP2R(R11, &abc[0]); MOVI2R(R1, 0x3f800000); STR(R11, R1, 4 * (32 + 31)); VLDR(S0, R11, 0); VLDR(S1, R11, 4); VADD(S12, S0, S1); VSTR(S0, R11, 4 * (32 + 31)); VSTR(S12, R11, 4 * (32 + 31)); */ //VSTR(S2, R0, 8); POP(2, R11, R_PC); // Yup, this is how you return. FlushLitPool(); FlushIcache(); //VLDR(S1, R0, 4); //VADD(S2, S0, S1); //VSTR(S2, R0, 8); //QuickCallFunction(R3, (void*)&TestLeaf); //ARMABI_CallFunctionCCC((void*)&TestLeaf, 0x1, 0x100, 0x1337); //ARMABI_CallFunctionCCC((void*)&TestLeaf, 0x2, 0x100, 0x31337); //ARMABI_CallFunctionCCC((void*)&TestLeaf, 0x3, 0x100, 0x1337); } u32 CallPtr(const void *ptr) { return ((u32(*)())ptr)(); } extern void DisassembleArm(const u8 *data, int size); void ArmEmitterTest() { // Disabled for now. return; // If I commit with it enabled by accident, let's not blow up. if (!cpu_info.bNEON) return; for (int i = 0; i < 6; i++) { INFO_LOG(Log::System, "--------------------------"); } INFO_LOG(Log::System, "--------------------------"); INFO_LOG(Log::System, "Running ARM emitter test!"); INFO_LOG(Log::System, "--------------------------"); TestCode gen; gen.ReserveCodeSpace(0x1000); const u8 *codeStart = gen.GetCodePtr(); gen.Generate(); u32 retval = CallPtr(gen.testCodePtr); // INFO_LOG(Log::System, "ARM emitter test 1 passed if %f == 3.0! retval = %08x", abc[32 + 31], retval); INFO_LOG(Log::System, "x: %08x %08x %08x %08x", x[0], x[1], x[2], x[3]); INFO_LOG(Log::System, "y: %08x %08x %08x %08x", y[0], y[1], y[2], y[3]); INFO_LOG(Log::System, "z: %08x %08x %08x %08x", z[0], z[1], z[2], z[3]); INFO_LOG(Log::System, "c: %f %f %f %f", c[0], c[1], c[2], c[3]); for (int i = 0; i < 6; i++) { INFO_LOG(Log::System, "--------------------------"); } // DisassembleArm(codeStart, gen.GetCodePtr()-codeStart); }