/* -*- Mode: c++; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 40; -*- */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include "GLUploadHelpers.h" #include "GLContext.h" #include "mozilla/gfx/2D.h" #include "mozilla/gfx/Tools.h" // For BytesPerPixel #include "nsRegion.h" namespace mozilla { using namespace gfx; namespace gl { /* These two techniques are suggested by "Bit Twiddling Hacks" */ /** * Returns true if |aNumber| is a power of two * 0 is incorreclty considered a power of two */ static bool IsPowerOfTwo(int aNumber) { return (aNumber & (aNumber - 1)) == 0; } /** * Returns the first integer greater than |aNumber| which is a power of two * Undefined for |aNumber| < 0 */ static int NextPowerOfTwo(int aNumber) { #if defined(__arm__) return 1 << (32 - __builtin_clz(aNumber - 1)); #else --aNumber; aNumber |= aNumber >> 1; aNumber |= aNumber >> 2; aNumber |= aNumber >> 4; aNumber |= aNumber >> 8; aNumber |= aNumber >> 16; return ++aNumber; #endif } static unsigned int DataOffset(const nsIntPoint &aPoint, int32_t aStride, SurfaceFormat aFormat) { unsigned int data = aPoint.y * aStride; data += aPoint.x * BytesPerPixel(aFormat); return data; } static GLint GetAddressAlignment(ptrdiff_t aAddress) { if (!(aAddress & 0x7)) { return 8; } else if (!(aAddress & 0x3)) { return 4; } else if (!(aAddress & 0x1)) { return 2; } else { return 1; } } // Take texture data in a given buffer and copy it into a larger buffer, // padding out the edge pixels for filtering if necessary static void CopyAndPadTextureData(const GLvoid* srcBuffer, GLvoid* dstBuffer, GLsizei srcWidth, GLsizei srcHeight, GLsizei dstWidth, GLsizei dstHeight, GLsizei stride, GLint pixelsize) { unsigned char *rowDest = static_cast(dstBuffer); const unsigned char *source = static_cast(srcBuffer); for (GLsizei h = 0; h < srcHeight; ++h) { memcpy(rowDest, source, srcWidth * pixelsize); rowDest += dstWidth * pixelsize; source += stride; } GLsizei padHeight = srcHeight; // Pad out an extra row of pixels so that edge filtering doesn't use garbage data if (dstHeight > srcHeight) { memcpy(rowDest, source - stride, srcWidth * pixelsize); padHeight++; } // Pad out an extra column of pixels if (dstWidth > srcWidth) { rowDest = static_cast(dstBuffer) + srcWidth * pixelsize; for (GLsizei h = 0; h < padHeight; ++h) { memcpy(rowDest, rowDest - pixelsize, pixelsize); rowDest += dstWidth * pixelsize; } } } // In both of these cases (for the Adreno at least) it is impossible // to determine good or bad driver versions for POT texture uploads, // so blacklist them all. Newer drivers use a different rendering // string in the form "Adreno (TM) 200" and the drivers we've seen so // far work fine with NPOT textures, so don't blacklist those until we // have evidence of any problems with them. bool CanUploadSubTextures(GLContext* gl) { if (!gl->WorkAroundDriverBugs()) return true; // There are certain GPUs that we don't want to use glTexSubImage2D on // because that function can be very slow and/or buggy if (gl->Renderer() == GLRenderer::Adreno200 || gl->Renderer() == GLRenderer::Adreno205) { return false; } // On PowerVR glTexSubImage does a readback, so it will be slower // than just doing a glTexImage2D() directly. i.e. 26ms vs 10ms if (gl->Renderer() == GLRenderer::SGX540 || gl->Renderer() == GLRenderer::SGX530) { return false; } return true; } static void TexSubImage2DWithUnpackSubimageGLES(GLContext* gl, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLsizei stride, GLint pixelsize, GLenum format, GLenum type, const GLvoid* pixels) { gl->fPixelStorei(LOCAL_GL_UNPACK_ALIGNMENT, std::min(GetAddressAlignment((ptrdiff_t)pixels), GetAddressAlignment((ptrdiff_t)stride))); // When using GL_UNPACK_ROW_LENGTH, we need to work around a Tegra // driver crash where the driver apparently tries to read // (stride - width * pixelsize) bytes past the end of the last input // row. We only upload the first height-1 rows using GL_UNPACK_ROW_LENGTH, // and then we upload the final row separately. See bug 697990. int rowLength = stride/pixelsize; gl->fPixelStorei(LOCAL_GL_UNPACK_ROW_LENGTH, rowLength); gl->fTexSubImage2D(target, level, xoffset, yoffset, width, height-1, format, type, pixels); gl->fPixelStorei(LOCAL_GL_UNPACK_ROW_LENGTH, 0); gl->fTexSubImage2D(target, level, xoffset, yoffset+height-1, width, 1, format, type, (const unsigned char *)pixels+(height-1)*stride); gl->fPixelStorei(LOCAL_GL_UNPACK_ALIGNMENT, 4); } static void TexSubImage2DWithoutUnpackSubimage(GLContext* gl, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLsizei stride, GLint pixelsize, GLenum format, GLenum type, const GLvoid* pixels) { // Not using the whole row of texture data and GL_UNPACK_ROW_LENGTH // isn't supported. We make a copy of the texture data we're using, // such that we're using the whole row of data in the copy. This turns // out to be more efficient than uploading row-by-row; see bug 698197. unsigned char *newPixels = new unsigned char[width*height*pixelsize]; unsigned char *rowDest = newPixels; const unsigned char *rowSource = (const unsigned char *)pixels; for (int h = 0; h < height; h++) { memcpy(rowDest, rowSource, width*pixelsize); rowDest += width*pixelsize; rowSource += stride; } stride = width*pixelsize; gl->fPixelStorei(LOCAL_GL_UNPACK_ALIGNMENT, std::min(GetAddressAlignment((ptrdiff_t)newPixels), GetAddressAlignment((ptrdiff_t)stride))); gl->fTexSubImage2D(target, level, xoffset, yoffset, width, height, format, type, newPixels); delete [] newPixels; gl->fPixelStorei(LOCAL_GL_UNPACK_ALIGNMENT, 4); } static void TexSubImage2DHelper(GLContext *gl, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLsizei stride, GLint pixelsize, GLenum format, GLenum type, const GLvoid* pixels) { if (gl->IsGLES2()) { if (stride == width * pixelsize) { gl->fPixelStorei(LOCAL_GL_UNPACK_ALIGNMENT, std::min(GetAddressAlignment((ptrdiff_t)pixels), GetAddressAlignment((ptrdiff_t)stride))); gl->fTexSubImage2D(target, level, xoffset, yoffset, width, height, format, type, pixels); gl->fPixelStorei(LOCAL_GL_UNPACK_ALIGNMENT, 4); } else if (gl->IsExtensionSupported(GLContext::EXT_unpack_subimage)) { TexSubImage2DWithUnpackSubimageGLES(gl, target, level, xoffset, yoffset, width, height, stride, pixelsize, format, type, pixels); } else { TexSubImage2DWithoutUnpackSubimage(gl, target, level, xoffset, yoffset, width, height, stride, pixelsize, format, type, pixels); } } else { // desktop GL (non-ES) path gl->fPixelStorei(LOCAL_GL_UNPACK_ALIGNMENT, std::min(GetAddressAlignment((ptrdiff_t)pixels), GetAddressAlignment((ptrdiff_t)stride))); int rowLength = stride/pixelsize; gl->fPixelStorei(LOCAL_GL_UNPACK_ROW_LENGTH, rowLength); gl->fTexSubImage2D(target, level, xoffset, yoffset, width, height, format, type, pixels); gl->fPixelStorei(LOCAL_GL_UNPACK_ROW_LENGTH, 0); gl->fPixelStorei(LOCAL_GL_UNPACK_ALIGNMENT, 4); } } static void TexImage2DHelper(GLContext *gl, GLenum target, GLint level, GLint internalformat, GLsizei width, GLsizei height, GLsizei stride, GLint pixelsize, GLint border, GLenum format, GLenum type, const GLvoid *pixels) { if (gl->IsGLES2()) { NS_ASSERTION(format == (GLenum)internalformat, "format and internalformat not the same for glTexImage2D on GLES2"); if (!CanUploadNonPowerOfTwo(gl) && (stride != width * pixelsize || !IsPowerOfTwo(width) || !IsPowerOfTwo(height))) { // Pad out texture width and height to the next power of two // as we don't support/want non power of two texture uploads GLsizei paddedWidth = NextPowerOfTwo(width); GLsizei paddedHeight = NextPowerOfTwo(height); GLvoid* paddedPixels = new unsigned char[paddedWidth * paddedHeight * pixelsize]; // Pad out texture data to be in a POT sized buffer for uploading to // a POT sized texture CopyAndPadTextureData(pixels, paddedPixels, width, height, paddedWidth, paddedHeight, stride, pixelsize); gl->fPixelStorei(LOCAL_GL_UNPACK_ALIGNMENT, std::min(GetAddressAlignment((ptrdiff_t)paddedPixels), GetAddressAlignment((ptrdiff_t)paddedWidth * pixelsize))); gl->fTexImage2D(target, border, internalformat, paddedWidth, paddedHeight, border, format, type, paddedPixels); gl->fPixelStorei(LOCAL_GL_UNPACK_ALIGNMENT, 4); delete[] static_cast(paddedPixels); return; } if (stride == width * pixelsize) { gl->fPixelStorei(LOCAL_GL_UNPACK_ALIGNMENT, std::min(GetAddressAlignment((ptrdiff_t)pixels), GetAddressAlignment((ptrdiff_t)stride))); gl->fTexImage2D(target, border, internalformat, width, height, border, format, type, pixels); gl->fPixelStorei(LOCAL_GL_UNPACK_ALIGNMENT, 4); } else { // Use GLES-specific workarounds for GL_UNPACK_ROW_LENGTH; these are // implemented in TexSubImage2D. gl->fTexImage2D(target, border, internalformat, width, height, border, format, type, nullptr); TexSubImage2DHelper(gl, target, level, 0, 0, width, height, stride, pixelsize, format, type, pixels); } } else { // desktop GL (non-ES) path gl->fPixelStorei(LOCAL_GL_UNPACK_ALIGNMENT, std::min(GetAddressAlignment((ptrdiff_t)pixels), GetAddressAlignment((ptrdiff_t)stride))); int rowLength = stride/pixelsize; gl->fPixelStorei(LOCAL_GL_UNPACK_ROW_LENGTH, rowLength); gl->fTexImage2D(target, level, internalformat, width, height, border, format, type, pixels); gl->fPixelStorei(LOCAL_GL_UNPACK_ROW_LENGTH, 0); gl->fPixelStorei(LOCAL_GL_UNPACK_ALIGNMENT, 4); } } SurfaceFormat UploadImageDataToTexture(GLContext* gl, unsigned char* aData, int32_t aStride, SurfaceFormat aFormat, const nsIntRegion& aDstRegion, GLuint& aTexture, bool aOverwrite, bool aPixelBuffer, GLenum aTextureUnit, GLenum aTextureTarget) { bool textureInited = aOverwrite ? false : true; gl->MakeCurrent(); gl->fActiveTexture(aTextureUnit); if (!aTexture) { gl->fGenTextures(1, &aTexture); gl->fBindTexture(aTextureTarget, aTexture); gl->fTexParameteri(aTextureTarget, LOCAL_GL_TEXTURE_MIN_FILTER, LOCAL_GL_LINEAR); gl->fTexParameteri(aTextureTarget, LOCAL_GL_TEXTURE_MAG_FILTER, LOCAL_GL_LINEAR); gl->fTexParameteri(aTextureTarget, LOCAL_GL_TEXTURE_WRAP_S, LOCAL_GL_CLAMP_TO_EDGE); gl->fTexParameteri(aTextureTarget, LOCAL_GL_TEXTURE_WRAP_T, LOCAL_GL_CLAMP_TO_EDGE); textureInited = false; } else { gl->fBindTexture(aTextureTarget, aTexture); } nsIntRegion paintRegion; if (!textureInited) { paintRegion = nsIntRegion(aDstRegion.GetBounds()); } else { paintRegion = aDstRegion; } GLenum format = 0; GLenum internalFormat = 0; GLenum type = 0; int32_t pixelSize = BytesPerPixel(aFormat); SurfaceFormat surfaceFormat = gfx::SurfaceFormat::UNKNOWN; MOZ_ASSERT(gl->GetPreferredARGB32Format() == LOCAL_GL_BGRA || gl->GetPreferredARGB32Format() == LOCAL_GL_RGBA); switch (aFormat) { case SurfaceFormat::B8G8R8A8: if (gl->GetPreferredARGB32Format() == LOCAL_GL_BGRA) { format = LOCAL_GL_BGRA; surfaceFormat = SurfaceFormat::R8G8B8A8; type = LOCAL_GL_UNSIGNED_INT_8_8_8_8_REV; } else { format = LOCAL_GL_RGBA; surfaceFormat = SurfaceFormat::B8G8R8A8; type = LOCAL_GL_UNSIGNED_BYTE; } internalFormat = LOCAL_GL_RGBA; break; case SurfaceFormat::B8G8R8X8: // Treat BGRX surfaces as BGRA except for the surface // format used. if (gl->GetPreferredARGB32Format() == LOCAL_GL_BGRA) { format = LOCAL_GL_BGRA; surfaceFormat = SurfaceFormat::R8G8B8X8; type = LOCAL_GL_UNSIGNED_INT_8_8_8_8_REV; } else { format = LOCAL_GL_RGBA; surfaceFormat = SurfaceFormat::B8G8R8X8; type = LOCAL_GL_UNSIGNED_BYTE; } internalFormat = LOCAL_GL_RGBA; break; case SurfaceFormat::R5G6B5: internalFormat = format = LOCAL_GL_RGB; type = LOCAL_GL_UNSIGNED_SHORT_5_6_5; surfaceFormat = SurfaceFormat::R5G6B5; break; case SurfaceFormat::A8: internalFormat = format = LOCAL_GL_LUMINANCE; type = LOCAL_GL_UNSIGNED_BYTE; // We don't have a specific luminance shader surfaceFormat = SurfaceFormat::A8; break; default: NS_ASSERTION(false, "Unhandled image surface format!"); } nsIntRegionRectIterator iter(paintRegion); const nsIntRect *iterRect; // Top left point of the region's bounding rectangle. nsIntPoint topLeft = paintRegion.GetBounds().TopLeft(); while ((iterRect = iter.Next())) { // The inital data pointer is at the top left point of the region's // bounding rectangle. We need to find the offset of this rect // within the region and adjust the data pointer accordingly. unsigned char *rectData = aData + DataOffset(iterRect->TopLeft() - topLeft, aStride, aFormat); NS_ASSERTION(textureInited || (iterRect->x == 0 && iterRect->y == 0), "Must be uploading to the origin when we don't have an existing texture"); if (textureInited && CanUploadSubTextures(gl)) { TexSubImage2DHelper(gl, aTextureTarget, 0, iterRect->x, iterRect->y, iterRect->width, iterRect->height, aStride, pixelSize, format, type, rectData); } else { TexImage2DHelper(gl, aTextureTarget, 0, internalFormat, iterRect->width, iterRect->height, aStride, pixelSize, 0, format, type, rectData); } } return surfaceFormat; } SurfaceFormat UploadSurfaceToTexture(GLContext* gl, DataSourceSurface *aSurface, const nsIntRegion& aDstRegion, GLuint& aTexture, bool aOverwrite, const nsIntPoint& aSrcPoint, bool aPixelBuffer, GLenum aTextureUnit, GLenum aTextureTarget) { unsigned char* data = aPixelBuffer ? nullptr : aSurface->GetData(); int32_t stride = aSurface->Stride(); SurfaceFormat format = aSurface->GetFormat(); data += DataOffset(aSrcPoint, stride, format); return UploadImageDataToTexture(gl, data, stride, format, aDstRegion, aTexture, aOverwrite, aPixelBuffer, aTextureUnit, aTextureTarget); } bool CanUploadNonPowerOfTwo(GLContext* gl) { if (!gl->WorkAroundDriverBugs()) return true; // Some GPUs driver crash when uploading non power of two 565 textures. return gl->Renderer() != GLRenderer::Adreno200 && gl->Renderer() != GLRenderer::Adreno205; } } }