From 0474ff5c238bd79110faedf85a304fc4128bc8a4 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 11 Feb 2018 08:12:51 -0800 Subject: [PATCH] GLES: Use aligned memory for textures. We use SSE on them, and they used to be aligned, and must be aligned in PSP RAM, so keep them aligned. This only really affects 32-bit, since allocs will typically be aligned on 64-bit anyway. Fixes #10601. --- GPU/GLES/DepalettizeShaderGLES.cpp | 2 +- GPU/GLES/DrawEngineGLES.cpp | 6 +++--- GPU/GLES/FramebufferManagerGLES.cpp | 2 +- GPU/GLES/TextureCacheGLES.cpp | 10 +++++----- ext/native/thin3d/GLQueueRunner.cpp | 7 ++++++- ext/native/thin3d/GLQueueRunner.h | 5 +++++ ext/native/thin3d/GLRenderManager.h | 3 ++- 7 files changed, 23 insertions(+), 12 deletions(-) diff --git a/GPU/GLES/DepalettizeShaderGLES.cpp b/GPU/GLES/DepalettizeShaderGLES.cpp index 49e1b36f3c..77614c93df 100644 --- a/GPU/GLES/DepalettizeShaderGLES.cpp +++ b/GPU/GLES/DepalettizeShaderGLES.cpp @@ -95,7 +95,7 @@ GLRTexture *DepalShaderCacheGLES::GetClutTexture(GEPaletteFormat clutFormat, con uint8_t *clutCopy = new uint8_t[1024]; memcpy(clutCopy, rawClut, 1024); - render_->TextureImage(tex->texture, 0, texturePixels, 1, components, components2, dstFmt, clutCopy, false); + render_->TextureImage(tex->texture, 0, texturePixels, 1, components, components2, dstFmt, clutCopy, GLRAllocType::NEW, false); tex->lastFrame = gpuStats.numFlips; texCache_[clutId] = tex; diff --git a/GPU/GLES/DrawEngineGLES.cpp b/GPU/GLES/DrawEngineGLES.cpp index 5988ee5fa1..bb0676719c 100644 --- a/GPU/GLES/DrawEngineGLES.cpp +++ b/GPU/GLES/DrawEngineGLES.cpp @@ -777,7 +777,7 @@ void DrawEngineGLES::TessellationDataTransferGLES::SendDataToShader(const float uint8_t *pos_data = new uint8_t[size * sizeof(float) * 4]; memcpy(pos_data, pos, size * sizeof(float) * 4); data_tex[0] = renderManager_->CreateTexture(GL_TEXTURE_2D); - renderManager_->TextureImage(data_tex[0], 0, size, 1, GL_RGBA32F, GL_RGBA, GL_FLOAT, pos_data, false); + renderManager_->TextureImage(data_tex[0], 0, size, 1, GL_RGBA32F, GL_RGBA, GL_FLOAT, pos_data, GLRAllocType::NEW, false); renderManager_->FinalizeTexture(data_tex[0], 0, false); renderManager_->BindTexture(4, data_tex[0]); @@ -788,7 +788,7 @@ void DrawEngineGLES::TessellationDataTransferGLES::SendDataToShader(const float uint8_t *tex_data = new uint8_t[size * sizeof(float) * 4]; memcpy(tex_data, pos, size * sizeof(float) * 4); data_tex[1] = renderManager_->CreateTexture(GL_TEXTURE_2D); - renderManager_->TextureImage(data_tex[1], 0, size, 1, GL_RGBA32F, GL_RGBA, GL_FLOAT, tex_data, false); + renderManager_->TextureImage(data_tex[1], 0, size, 1, GL_RGBA32F, GL_RGBA, GL_FLOAT, tex_data, GLRAllocType::NEW, false); renderManager_->FinalizeTexture(data_tex[1], 0, false); renderManager_->BindTexture(5, data_tex[1]); } @@ -800,7 +800,7 @@ void DrawEngineGLES::TessellationDataTransferGLES::SendDataToShader(const float uint8_t *col_data = new uint8_t[sizeColor * sizeof(float) * 4]; memcpy(col_data, col, sizeColor * sizeof(float) * 4); - renderManager_->TextureImage(data_tex[2], 0, sizeColor, 1, GL_RGBA32F, GL_RGBA, GL_FLOAT, col_data, false); + renderManager_->TextureImage(data_tex[2], 0, sizeColor, 1, GL_RGBA32F, GL_RGBA, GL_FLOAT, col_data, GLRAllocType::NEW, false); renderManager_->FinalizeTexture(data_tex[2], 0, false); renderManager_->BindTexture(6, data_tex[2]); } diff --git a/GPU/GLES/FramebufferManagerGLES.cpp b/GPU/GLES/FramebufferManagerGLES.cpp index dd0a0c2473..351579744f 100644 --- a/GPU/GLES/FramebufferManagerGLES.cpp +++ b/GPU/GLES/FramebufferManagerGLES.cpp @@ -367,7 +367,7 @@ void FramebufferManagerGLES::MakePixelTexture(const u8 *srcPixels, GEBufferForma break; } } - render_->TextureImage(drawPixelsTex_, 0, texWidth, height, GL_RGBA, GL_RGBA, GL_UNSIGNED_BYTE, convBuf, false); + render_->TextureImage(drawPixelsTex_, 0, texWidth, height, GL_RGBA, GL_RGBA, GL_UNSIGNED_BYTE, convBuf, GLRAllocType::NEW, false); render_->FinalizeTexture(drawPixelsTex_, 0, false); // TODO: Return instead? diff --git a/GPU/GLES/TextureCacheGLES.cpp b/GPU/GLES/TextureCacheGLES.cpp index d16f14f372..1c1ff3551d 100644 --- a/GPU/GLES/TextureCacheGLES.cpp +++ b/GPU/GLES/TextureCacheGLES.cpp @@ -714,7 +714,7 @@ u8 *TextureCacheGLES::DecodeTextureLevelOld(GETextureFormat format, GEPaletteFor decPitch = bufw * pixelSize; } - uint8_t *texBuf = new uint8_t[std::max(w, bufw) * h * pixelSize]; + uint8_t *texBuf = (uint8_t *)AllocateAlignedMemory(std::max(w, bufw) * h * pixelSize, 16); DecodeTextureLevel(texBuf, decPitch, format, clutformat, texaddr, level, bufw, true, false, false); return texBuf; } @@ -755,7 +755,7 @@ void TextureCacheGLES::LoadTextureLevel(TexCacheEntry &entry, ReplacedTexture &r PROFILE_THIS_SCOPE("replacetex"); int bpp = replaced.Format(level) == ReplacedTextureFormat::F_8888 ? 4 : 2; - uint8_t *rearrange = new uint8_t[w * h * bpp]; + uint8_t *rearrange = (uint8_t *)AllocateAlignedMemory(w * h * bpp, 16); replaced.Load(level, rearrange, bpp * w); pixelData = rearrange; @@ -785,7 +785,7 @@ void TextureCacheGLES::LoadTextureLevel(TexCacheEntry &entry, ReplacedTexture &r } if (scaleFactor > 1) { - uint8_t *rearrange = new uint8_t[w * scaleFactor * h * scaleFactor * 4]; + uint8_t *rearrange = (uint8_t *)AllocateAlignedMemory(w * scaleFactor * h * scaleFactor * 4, 16); scaler.ScaleAlways((u32 *)rearrange, (u32 *)pixelData, dstFmt, w, h, scaleFactor); pixelData = rearrange; delete [] finalBuf; @@ -817,9 +817,9 @@ void TextureCacheGLES::LoadTextureLevel(TexCacheEntry &entry, ReplacedTexture &r } else { PROFILE_THIS_SCOPE("loadtex"); if (IsFakeMipmapChange()) - render_->TextureImage(entry.textureName, 0, w, h, components, components2, dstFmt, pixelData); + render_->TextureImage(entry.textureName, 0, w, h, components, components2, dstFmt, pixelData, GLRAllocType::ALIGNED); else - render_->TextureImage(entry.textureName, level, w, h, components, components2, dstFmt, pixelData); + render_->TextureImage(entry.textureName, level, w, h, components, components2, dstFmt, pixelData, GLRAllocType::ALIGNED); } } diff --git a/ext/native/thin3d/GLQueueRunner.cpp b/ext/native/thin3d/GLQueueRunner.cpp index e37b411e69..1053752af1 100644 --- a/ext/native/thin3d/GLQueueRunner.cpp +++ b/ext/native/thin3d/GLQueueRunner.cpp @@ -1,3 +1,4 @@ +#include "Common/MemoryUtil.h" #include "Core/Reporting.h" #include "GLQueueRunner.h" #include "GLRenderManager.h" @@ -241,7 +242,11 @@ void GLQueueRunner::RunInitSteps(const std::vector &steps) { // For things to show in RenderDoc, need to split into glTexImage2D(..., nullptr) and glTexSubImage. glTexImage2D(tex->target, step.texture_image.level, step.texture_image.internalFormat, step.texture_image.width, step.texture_image.height, 0, step.texture_image.format, step.texture_image.type, step.texture_image.data); allocatedTextures = true; - delete[] step.texture_image.data; + if (step.texture_image.allocType == GLRAllocType::ALIGNED) { + FreeAlignedMemory(step.texture_image.data); + } else { + delete[] step.texture_image.data; + } CHECK_GL_ERROR_IF_DEBUG(); tex->wrapS = GL_CLAMP_TO_EDGE; tex->wrapT = GL_CLAMP_TO_EDGE; diff --git a/ext/native/thin3d/GLQueueRunner.h b/ext/native/thin3d/GLQueueRunner.h index 63a17503c4..9119b0d05f 100644 --- a/ext/native/thin3d/GLQueueRunner.h +++ b/ext/native/thin3d/GLQueueRunner.h @@ -19,6 +19,10 @@ struct GLOffset2D { int x, y; }; +enum class GLRAllocType { + NEW, + ALIGNED, +}; class GLRShader; class GLRTexture; @@ -242,6 +246,7 @@ struct GLRInitStep { int level; int width; int height; + GLRAllocType allocType; bool linearFilter; uint8_t *data; // owned, delete[]-d } texture_image; diff --git a/ext/native/thin3d/GLRenderManager.h b/ext/native/thin3d/GLRenderManager.h index 2e50f392a5..c3b473e605 100644 --- a/ext/native/thin3d/GLRenderManager.h +++ b/ext/native/thin3d/GLRenderManager.h @@ -337,7 +337,7 @@ public: } // Takes ownership over the data pointer and delete[]-s it. - void TextureImage(GLRTexture *texture, int level, int width, int height, GLenum internalFormat, GLenum format, GLenum type, uint8_t *data, bool linearFilter = false) { + void TextureImage(GLRTexture *texture, int level, int width, int height, GLenum internalFormat, GLenum format, GLenum type, uint8_t *data, GLRAllocType allocType = GLRAllocType::NEW, bool linearFilter = false) { GLRInitStep step{ GLRInitStepType::TEXTURE_IMAGE }; step.texture_image.texture = texture; step.texture_image.data = data; @@ -347,6 +347,7 @@ public: step.texture_image.level = level; step.texture_image.width = width; step.texture_image.height = height; + step.texture_image.allocType = allocType; step.texture_image.linearFilter = linearFilter; initSteps_.push_back(step); }