// Copyright (c) 2012- PPSSPP Project. // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, version 2.0 or later versions. // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License 2.0 for more details. // A copy of the GPL 2.0 should have been included with the program. // If not, see http://www.gnu.org/licenses/ // Official git repository and contact information can be found at // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. #include #include "GPU/Common/TextureDecoder.h" #ifndef ARM #error Should not be compiled on non-ARM. #endif static const u16 MEMORY_ALIGNED16(QuickTexHashInitial[8]) = {0x0001U, 0x0083U, 0x4309U, 0x4d9bU, 0xb651U, 0x4b73U, 0x9bd9U, 0xc00bU}; u32 QuickTexHashNEON(const void *checkp, u32 size) { u32 check = 0; __builtin_prefetch(checkp, 0, 0); if (((intptr_t)checkp & 0xf) == 0 && (size & 0x3f) == 0) { uint32x4_t cursor = vdupq_n_u32(0); uint32x4_t cursor2 = vld1q_u32((const u32 *)QuickTexHashInitial); uint32x4_t update = vdupq_n_u32(0x24552455U); const u32 *p = (const u32 *)checkp; for (u32 i = 0; i < size / 16; i += 4) { cursor = vmlaq_u32(cursor, vld1q_u32(&p[4 * 0]), cursor2); cursor = veorq_u32(cursor, vld1q_u32(&p[4 * 1])); cursor = vaddq_u32(cursor, vld1q_u32(&p[4 * 2])); cursor = veorq_u32(cursor, vmulq_u32(vld1q_u32(&p[4 * 3]), cursor2)); cursor2 = vaddq_u32(cursor2, update); p += 4 * 4; } cursor = vaddq_u32(cursor, cursor2); check = vgetq_lane_u32(cursor, 0) + vgetq_lane_u32(cursor, 1) + vgetq_lane_u32(cursor, 2) + vgetq_lane_u32(cursor, 3); } else { const u32 size_u32 = size / 4; const u32 *p = (const u32 *)checkp; for (u32 i = 0; i < size_u32; i += 4) { check += p[i + 0]; check ^= p[i + 1]; check += p[i + 2]; check ^= p[i + 3]; } } return check; }