From 826a893d647b7eab3da9c73a165343e9f59acefe Mon Sep 17 00:00:00 2001 From: Benoit Jacob Date: Mon, 7 May 2012 13:05:32 -0400 Subject: [PATCH] Bug 749711 - Lots of WebGL texture conversion fixes and improvements - r=jgilbert * Templatize pack/unpack routines ** Inside anonymous namespace in a naive attempt to not hammer PGO linker memory usage. * Support conversions changing texel size * Support conversion from integer to float formats * Support RGBA32F properly * Avoid compiling useless paths (code size down to 17k from 44k) --- content/canvas/src/Makefile.in | 1 + content/canvas/src/WebGLContext.h | 59 +- content/canvas/src/WebGLContextGL.cpp | 305 ++------ content/canvas/src/WebGLTexelConversions.cpp | 382 +++++++++ content/canvas/src/WebGLTexelConversions.h | 771 ++++++++++++------- 5 files changed, 991 insertions(+), 527 deletions(-) create mode 100644 content/canvas/src/WebGLTexelConversions.cpp diff --git a/content/canvas/src/Makefile.in b/content/canvas/src/Makefile.in index 8919c2c4b91..662a8b61b79 100644 --- a/content/canvas/src/Makefile.in +++ b/content/canvas/src/Makefile.in @@ -82,6 +82,7 @@ CPPSRCS += \ WebGLExtensionStandardDerivatives.cpp \ WebGLExtensionTextureFilterAnisotropic.cpp \ WebGLExtensionLoseContext.cpp \ + WebGLTexelConversions.cpp \ $(NULL) DEFINES += -DUSE_ANGLE diff --git a/content/canvas/src/WebGLContext.h b/content/canvas/src/WebGLContext.h index 8ccb337bebf..40a027aeea4 100644 --- a/content/canvas/src/WebGLContext.h +++ b/content/canvas/src/WebGLContext.h @@ -128,16 +128,51 @@ struct BackbufferClearingStatus { enum { NotClearedSinceLastPresented, ClearedToDefaultValues, HasBeenDrawnTo }; }; -struct WebGLTexelFormat { - enum { Generic, Auto, RGBA8, RGB8, RGBX8, BGRA8, BGR8, BGRX8, RGBA5551, RGBA4444, RGB565, R8, RA8, A8, - RGBA32F, RGB32F, A32F, R32F, RA32F }; +namespace WebGLTexelConversions { + +/* + * The formats that may participate, either as source or destination formats, + * in WebGL texture conversions. This includes: + * - all the formats accepted by WebGL.texImage2D, e.g. RGBA4444 + * - additional formats provided by extensions, e.g. RGB32F + * - additional source formats, depending on browser details, used when uploading + * textures from DOM elements. See gfxImageSurface::Format(). + */ +enum WebGLTexelFormat +{ + // dummy error code returned by GetWebGLTexelFormat in error cases, + // after assertion failure (so this never happens in debug builds) + BadFormat, + // dummy pseudo-format meaning "use the other format". + // For example, if SrcFormat=Auto and DstFormat=RGB8, then the source + // is implicitly treated as being RGB8 itself. + Auto, + // 1-channel formats + R8, + A8, + R32F, // used for OES_texture_float extension + A32F, // used for OES_texture_float extension + // 2-channel formats + RA8, + RA32F, + // 3-channel formats + RGB8, + BGRX8, // used for DOM elements. Source format only. + RGB565, + RGB32F, // used for OES_texture_float extension + // 4-channel formats + RGBA8, + BGRA8, // used for DOM elements + RGBA5551, + RGBA4444, + RGBA32F // used for OES_texture_float extension }; -struct WebGLTexelPremultiplicationOp { - enum { Generic, None, Premultiply, Unmultiply }; -}; +} // end namespace WebGLTexelConversions -int GetWebGLTexelFormat(GLenum format, GLenum type); +using WebGLTexelConversions::WebGLTexelFormat; + +WebGLTexelFormat GetWebGLTexelFormat(GLenum format, GLenum type); // Zero is not an integer power of two. inline bool is_pot_assuming_nonnegative(WebGLsizei x) @@ -1205,26 +1240,26 @@ protected: WebGLenum format, WebGLenum type, void *data, PRUint32 byteLength, int jsArrayType, - int srcFormat, bool srcPremultiplied); + WebGLTexelFormat srcFormat, bool srcPremultiplied); void TexSubImage2D_base(WebGLenum target, WebGLint level, WebGLint xoffset, WebGLint yoffset, WebGLsizei width, WebGLsizei height, WebGLsizei srcStrideOrZero, WebGLenum format, WebGLenum type, void *pixels, PRUint32 byteLength, int jsArrayType, - int srcFormat, bool srcPremultiplied); + WebGLTexelFormat srcFormat, bool srcPremultiplied); void TexParameter_base(WebGLenum target, WebGLenum pname, WebGLint *intParamPtr, WebGLfloat *floatParamPtr); void ConvertImage(size_t width, size_t height, size_t srcStride, size_t dstStride, const PRUint8*src, PRUint8 *dst, - int srcFormat, bool srcPremultiplied, - int dstFormat, bool dstPremultiplied, + WebGLTexelFormat srcFormat, bool srcPremultiplied, + WebGLTexelFormat dstFormat, bool dstPremultiplied, size_t dstTexelSize); nsresult DOMElementToImageSurface(dom::Element* imageOrCanvas, gfxImageSurface **imageOut, - int *format); + WebGLTexelFormat *format); void CopyTexSubImage2D_base(WebGLenum target, WebGLint level, diff --git a/content/canvas/src/WebGLContextGL.cpp b/content/canvas/src/WebGLContextGL.cpp index c6f1ce6a2e2..1ef346251ee 100644 --- a/content/canvas/src/WebGLContextGL.cpp +++ b/content/canvas/src/WebGLContextGL.cpp @@ -4296,198 +4296,9 @@ WebGLContext::StencilOpSeparate(WebGLenum face, WebGLenum sfail, WebGLenum dpfai gl->fStencilOpSeparate(face, sfail, dpfail, dppass); } -struct WebGLImageConverter -{ - bool flip; - size_t width, height, srcStride, dstStride, srcTexelSize, dstTexelSize; - const PRUint8 *src; - PRUint8 *dst; - - WebGLImageConverter() - { - memset(this, 0, sizeof(WebGLImageConverter)); - } - - template - void run() - { - // Note -- even though the functions take UnpackType, the - // pointers below are all in terms of PRUint8; otherwise - // pointer math starts getting tricky. - for (size_t src_row = 0; src_row < height; ++src_row) { - size_t dst_row = flip ? (height - 1 - src_row) : src_row; - PRUint8 *dst_row_ptr = dst + dst_row * dstStride; - const PRUint8 *src_row_ptr = src + src_row * srcStride; - const PRUint8 *src_row_end = src_row_ptr + width * srcTexelSize; // != src_row_ptr + byteStride - while (src_row_ptr != src_row_end) { - UnpackType tmp[4]; - unpackingFunc(reinterpret_cast(src_row_ptr), tmp); - packingFunc(tmp, reinterpret_cast(dst_row_ptr)); - src_row_ptr += srcTexelSize; - dst_row_ptr += dstTexelSize; - } - } - } -}; - -void -WebGLContext::ConvertImage(size_t width, size_t height, size_t srcStride, size_t dstStride, - const PRUint8*src, PRUint8 *dst, - int srcFormat, bool srcPremultiplied, - int dstFormat, bool dstPremultiplied, - size_t dstTexelSize) -{ - if (width <= 0 || height <= 0) - return; - - if (srcFormat == dstFormat && - srcPremultiplied == dstPremultiplied) - { - // fast exit path: we just have to memcpy all the rows. - // - // The case where absolutely nothing needs to be done is supposed to have - // been handled earlier (in TexImage2D_base, etc). - // - // So the case we're handling here is when even though no format conversion is needed, - // we still might have to flip vertically and/or to adjust to a different stride. - - NS_ASSERTION(mPixelStoreFlipY || srcStride != dstStride, "Performance trap -- should handle this case earlier, to avoid memcpy"); - - size_t row_size = width * dstTexelSize; // doesn't matter, src and dst formats agree - const PRUint8* src_row = src; - const PRUint8* src_end = src + height * srcStride; - - PRUint8* dst_row = mPixelStoreFlipY ? dst + (height-1) * dstStride : dst; - ptrdiff_t dstStrideSigned(dstStride); - ptrdiff_t dst_delta = mPixelStoreFlipY ? -dstStrideSigned : dstStrideSigned; - - while(src_row != src_end) { - memcpy(dst_row, src_row, row_size); - src_row += srcStride; - dst_row += dst_delta; - } - return; - } - - WebGLImageConverter converter; - converter.flip = mPixelStoreFlipY; - converter.width = width; - converter.height = height; - converter.srcStride = srcStride; - converter.dstStride = dstStride; - converter.dstTexelSize = dstTexelSize; - converter.src = src; - converter.dst = dst; - - int premultiplicationOp = (!srcPremultiplied && dstPremultiplied) ? WebGLTexelPremultiplicationOp::Premultiply - : (srcPremultiplied && !dstPremultiplied) ? WebGLTexelPremultiplicationOp::Unmultiply - : WebGLTexelPremultiplicationOp::None; - -#define HANDLE_DSTFORMAT(format, SrcType, DstType, unpackFunc, packFunc) \ - case WebGLTexelFormat::format: \ - switch (premultiplicationOp) { \ - case WebGLTexelPremultiplicationOp::Premultiply: \ - converter.run(); \ - break; \ - case WebGLTexelPremultiplicationOp::Unmultiply: \ - converter.run(); \ - break; \ - default: \ - converter.run(); \ - break; \ - } \ - break; - -#define HANDLE_SRCFORMAT(format, size, SrcType, unpackFunc) \ - case WebGLTexelFormat::format: \ - converter.srcTexelSize = size; \ - switch (dstFormat) { \ - HANDLE_DSTFORMAT(RGBA8, SrcType, PRUint8, unpackFunc, packRGBA8ToRGBA8) \ - HANDLE_DSTFORMAT(RGB8, SrcType, PRUint8, unpackFunc, packRGBA8ToRGB8) \ - HANDLE_DSTFORMAT(R8, SrcType, PRUint8, unpackFunc, packRGBA8ToR8) \ - HANDLE_DSTFORMAT(RA8, SrcType, PRUint8, unpackFunc, packRGBA8ToRA8) \ - HANDLE_DSTFORMAT(RGBA5551, SrcType, PRUint16, unpackFunc, packRGBA8ToUnsignedShort5551) \ - HANDLE_DSTFORMAT(RGBA4444, SrcType, PRUint16, unpackFunc, packRGBA8ToUnsignedShort4444) \ - HANDLE_DSTFORMAT(RGB565, SrcType, PRUint16, unpackFunc, packRGBA8ToUnsignedShort565) \ - /* A8 needs to be special-cased as it doesn't have color channels to premultiply */ \ - case WebGLTexelFormat::A8: \ - converter.run(); \ - break; \ - default: \ - NS_ASSERTION(false, "Coding error?! Should never reach this point."); \ - return; \ - } \ - break; - -#define HANDLE_FLOAT_DSTFORMAT(format, unpackFunc, packFunc) \ - case WebGLTexelFormat::format: \ - switch (premultiplicationOp) { \ - case WebGLTexelPremultiplicationOp::Premultiply: \ - converter.run(); \ - break; \ - case WebGLTexelPremultiplicationOp::Unmultiply: \ - NS_ASSERTION(false, "Floating point can't be un-premultiplied -- we have no premultiplied source data!"); \ - break; \ - default: \ - converter.run(); \ - break; \ - } \ - break; - -#define HANDLE_FLOAT_SRCFORMAT(format, size, unpackFunc) \ - case WebGLTexelFormat::format: \ - converter.srcTexelSize = size; \ - switch (dstFormat) { \ - HANDLE_FLOAT_DSTFORMAT(RGB32F, unpackFunc, packRGBA32FToRGB32F) \ - HANDLE_FLOAT_DSTFORMAT(A32F, unpackFunc, packRGBA32FToA32F) \ - HANDLE_FLOAT_DSTFORMAT(R32F, unpackFunc, packRGBA32FToR32F) \ - HANDLE_FLOAT_DSTFORMAT(RA32F, unpackFunc, packRGBA32FToRA32F) \ - default: \ - NS_ASSERTION(false, "Coding error?! Should never reach this point."); \ - return; \ - } \ - break; - - switch (srcFormat) { - HANDLE_SRCFORMAT(RGBA8, 4, PRUint8, unpackRGBA8ToRGBA8) - HANDLE_SRCFORMAT(RGBX8, 4, PRUint8, unpackRGB8ToRGBA8) - HANDLE_SRCFORMAT(RGB8, 3, PRUint8, unpackRGB8ToRGBA8) - HANDLE_SRCFORMAT(BGRA8, 4, PRUint8, unpackBGRA8ToRGBA8) - HANDLE_SRCFORMAT(BGRX8, 4, PRUint8, unpackBGR8ToRGBA8) - HANDLE_SRCFORMAT(BGR8, 3, PRUint8, unpackBGR8ToRGBA8) - HANDLE_SRCFORMAT(R8, 1, PRUint8, unpackR8ToRGBA8) - HANDLE_SRCFORMAT(A8, 1, PRUint8, unpackA8ToRGBA8) - HANDLE_SRCFORMAT(RA8, 2, PRUint8, unpackRA8ToRGBA8) - HANDLE_SRCFORMAT(RGBA5551, 2, PRUint16, unpackRGBA5551ToRGBA8) - HANDLE_SRCFORMAT(RGBA4444, 2, PRUint16, unpackRGBA4444ToRGBA8) - HANDLE_SRCFORMAT(RGB565, 2, PRUint16, unpackRGB565ToRGBA8) - HANDLE_FLOAT_SRCFORMAT(RGB32F, 12, unpackRGB32FToRGBA32F) - HANDLE_FLOAT_SRCFORMAT(RA32F, 8, unpackRA32FToRGBA32F) - HANDLE_FLOAT_SRCFORMAT(R32F, 4, unpackR32FToRGBA32F) - HANDLE_FLOAT_SRCFORMAT(A32F, 4, unpackA32FToRGBA32F) - default: - NS_ASSERTION(false, "Coding error?! Should never reach this point."); - return; - } -} - nsresult WebGLContext::DOMElementToImageSurface(Element* imageOrCanvas, - gfxImageSurface **imageOut, int *format) + gfxImageSurface **imageOut, WebGLTexelFormat *format) { if (!imageOrCanvas) { return NS_ERROR_FAILURE; @@ -4556,16 +4367,16 @@ WebGLContext::DOMElementToImageSurface(Element* imageOrCanvas, switch (surf->Format()) { case gfxASurface::ImageFormatARGB32: - *format = WebGLTexelFormat::BGRA8; // careful, our ARGB means BGRA + *format = WebGLTexelConversions::BGRA8; // careful, our ARGB means BGRA break; case gfxASurface::ImageFormatRGB24: - *format = WebGLTexelFormat::BGRX8; // careful, our RGB24 is not tightly packed. Whence BGRX8. + *format = WebGLTexelConversions::BGRX8; // careful, our RGB24 is not tightly packed. Whence BGRX8. break; case gfxASurface::ImageFormatA8: - *format = WebGLTexelFormat::A8; + *format = WebGLTexelConversions::A8; break; case gfxASurface::ImageFormatRGB16_565: - *format = WebGLTexelFormat::RGB565; + *format = WebGLTexelConversions::RGB565; break; default: NS_ASSERTION(false, "Unsupported image format. Unimplemented."); @@ -5644,7 +5455,7 @@ WebGLContext::TexImage2D_base(WebGLenum target, WebGLint level, WebGLenum intern WebGLenum format, WebGLenum type, void *data, PRUint32 byteLength, int jsArrayType, // a TypedArray format enum, or -1 if not relevant - int srcFormat, bool srcPremultiplied) + WebGLTexelFormat srcFormat, bool srcPremultiplied) { switch (target) { case LOCAL_GL_TEXTURE_2D: @@ -5699,14 +5510,19 @@ WebGLContext::TexImage2D_base(WebGLenum target, WebGLint level, WebGLenum intern if (border != 0) return ErrorInvalidValue("TexImage2D: border must be 0"); - PRUint32 texelSize = 0; - if (!ValidateTexFormatAndType(format, type, jsArrayType, &texelSize, "texImage2D")) + PRUint32 dstTexelSize = 0; + if (!ValidateTexFormatAndType(format, type, jsArrayType, &dstTexelSize, "texImage2D")) return; - CheckedUint32 checked_neededByteLength = - GetImageSize(height, width, texelSize, mPixelStoreUnpackAlignment); + WebGLTexelFormat dstFormat = GetWebGLTexelFormat(format, type); + WebGLTexelFormat actualSrcFormat = srcFormat == WebGLTexelConversions::Auto ? dstFormat : srcFormat; - CheckedUint32 checked_plainRowSize = CheckedUint32(width) * texelSize; + PRUint32 srcTexelSize = WebGLTexelConversions::TexelBytesForFormat(actualSrcFormat); + + CheckedUint32 checked_neededByteLength = + GetImageSize(height, width, srcTexelSize, mPixelStoreUnpackAlignment); + + CheckedUint32 checked_plainRowSize = CheckedUint32(width) * srcTexelSize; CheckedUint32 checked_alignedRowSize = RoundedToNextMultipleOf(checked_plainRowSize.value(), mPixelStoreUnpackAlignment); @@ -5734,11 +5550,9 @@ WebGLContext::TexImage2D_base(WebGLenum target, WebGLint level, WebGLenum intern GLenum error = LOCAL_GL_NO_ERROR; if (byteLength) { - int dstFormat = GetWebGLTexelFormat(format, type); - int actualSrcFormat = srcFormat == WebGLTexelFormat::Auto ? dstFormat : srcFormat; size_t srcStride = srcStrideOrZero ? srcStrideOrZero : checked_alignedRowSize.value(); - size_t dstPlainRowSize = texelSize * width; + size_t dstPlainRowSize = dstTexelSize * width; size_t unpackAlignment = mPixelStoreUnpackAlignment; size_t dstStride = ((dstPlainRowSize + unpackAlignment-1) / unpackAlignment) * unpackAlignment; @@ -5753,11 +5567,12 @@ WebGLContext::TexImage2D_base(WebGLenum target, WebGLint level, WebGLenum intern } else { - nsAutoArrayPtr convertedData(new PRUint8[bytesNeeded]); + size_t convertedDataSize = height * dstStride; + nsAutoArrayPtr convertedData(new PRUint8[convertedDataSize]); ConvertImage(width, height, srcStride, dstStride, (PRUint8*)data, convertedData, actualSrcFormat, srcPremultiplied, - dstFormat, mPixelStorePremultiplyAlpha, texelSize); + dstFormat, mPixelStorePremultiplyAlpha, dstTexelSize); error = CheckedTexImage2D(target, level, internalformat, width, height, border, format, type, convertedData); } @@ -5814,7 +5629,7 @@ WebGLContext::TexImage2D(JSContext* cx, WebGLenum target, WebGLint level, pixels ? pixels->mData : 0, pixels ? pixels->mLength : 0, pixels ? (int)JS_GetTypedArrayType(pixels->mObj, cx) : -1, - WebGLTexelFormat::Auto, false); + WebGLTexelConversions::Auto, false); } NS_IMETHODIMP @@ -5832,7 +5647,7 @@ WebGLContext::TexImage2D_imageData(WebGLenum target, WebGLint level, WebGLenum i pixels ? JS_GetArrayBufferViewData(pixels, cx) : 0, pixels ? JS_GetArrayBufferViewByteLength(pixels, cx) : 0, -1, - WebGLTexelFormat::RGBA8, false); + WebGLTexelConversions::RGBA8, false); return NS_OK; } @@ -5853,7 +5668,7 @@ WebGLContext::TexImage2D(JSContext* cx, WebGLenum target, WebGLint level, return TexImage2D_base(target, level, internalformat, pixels->GetWidth(), pixels->GetHeight(), 4*pixels->GetWidth(), 0, format, type, arr.mData, arr.mLength, -1, - WebGLTexelFormat::RGBA8, false); + WebGLTexelConversions::RGBA8, false); } @@ -5877,7 +5692,7 @@ WebGLContext::TexImage2D(JSContext* /* unused */, WebGLenum target, nsRefPtr isurf; - int srcFormat; + WebGLTexelFormat srcFormat; rv = DOMElementToImageSurface(elt, getter_AddRefs(isurf), &srcFormat); if (rv.Failed()) return; @@ -5908,7 +5723,7 @@ WebGLContext::TexSubImage2D_base(WebGLenum target, WebGLint level, WebGLenum format, WebGLenum type, void *pixels, PRUint32 byteLength, int jsArrayType, - int srcFormat, bool srcPremultiplied) + WebGLTexelFormat srcFormat, bool srcPremultiplied) { switch (target) { case LOCAL_GL_TEXTURE_2D: @@ -5943,17 +5758,22 @@ WebGLContext::TexSubImage2D_base(WebGLenum target, WebGLint level, return ErrorInvalidValue("texSubImage2D: with level > 0, width and height must be powers of two"); } - PRUint32 texelSize = 0; - if (!ValidateTexFormatAndType(format, type, jsArrayType, &texelSize, "texSubImage2D")) + PRUint32 dstTexelSize = 0; + if (!ValidateTexFormatAndType(format, type, jsArrayType, &dstTexelSize, "texSubImage2D")) return; + WebGLTexelFormat dstFormat = GetWebGLTexelFormat(format, type); + WebGLTexelFormat actualSrcFormat = srcFormat == WebGLTexelConversions::Auto ? dstFormat : srcFormat; + + PRUint32 srcTexelSize = WebGLTexelConversions::TexelBytesForFormat(actualSrcFormat); + if (width == 0 || height == 0) return; // ES 2.0 says it has no effect, we better return right now CheckedUint32 checked_neededByteLength = - GetImageSize(height, width, texelSize, mPixelStoreUnpackAlignment); + GetImageSize(height, width, srcTexelSize, mPixelStoreUnpackAlignment); - CheckedUint32 checked_plainRowSize = CheckedUint32(width) * texelSize; + CheckedUint32 checked_plainRowSize = CheckedUint32(width) * srcTexelSize; CheckedUint32 checked_alignedRowSize = RoundedToNextMultipleOf(checked_plainRowSize.value(), mPixelStoreUnpackAlignment); @@ -5986,11 +5806,9 @@ WebGLContext::TexSubImage2D_base(WebGLenum target, WebGLint level, MakeContextCurrent(); - int dstFormat = GetWebGLTexelFormat(format, type); - int actualSrcFormat = srcFormat == WebGLTexelFormat::Auto ? dstFormat : srcFormat; size_t srcStride = srcStrideOrZero ? srcStrideOrZero : checked_alignedRowSize.value(); - size_t dstPlainRowSize = texelSize * width; + size_t dstPlainRowSize = dstTexelSize * width; // There are checks above to ensure that this won't overflow. size_t dstStride = RoundedToNextMultipleOf(dstPlainRowSize, mPixelStoreUnpackAlignment).value(); @@ -6004,11 +5822,12 @@ WebGLContext::TexSubImage2D_base(WebGLenum target, WebGLint level, } else { - nsAutoArrayPtr convertedData(new PRUint8[bytesNeeded]); + size_t convertedDataSize = height * dstStride; + nsAutoArrayPtr convertedData(new PRUint8[convertedDataSize]); ConvertImage(width, height, srcStride, dstStride, (const PRUint8*)pixels, convertedData, actualSrcFormat, srcPremultiplied, - dstFormat, mPixelStorePremultiplyAlpha, texelSize); + dstFormat, mPixelStorePremultiplyAlpha, dstTexelSize); gl->fTexSubImage2D(target, level, xoffset, yoffset, width, height, format, type, convertedData); } @@ -6051,7 +5870,7 @@ WebGLContext::TexSubImage2D(JSContext* cx, WebGLenum target, WebGLint level, width, height, 0, format, type, pixels->mData, pixels->mLength, JS_GetTypedArrayType(pixels->mObj, cx), - WebGLTexelFormat::Auto, false); + WebGLTexelConversions::Auto, false); } NS_IMETHODIMP @@ -6075,7 +5894,7 @@ WebGLContext::TexSubImage2D_imageData(WebGLenum target, WebGLint level, width, height, 4*width, format, type, JS_GetArrayBufferViewData(pixels, cx), JS_GetArrayBufferViewByteLength(pixels, cx), -1, - WebGLTexelFormat::RGBA8, false); + WebGLTexelConversions::RGBA8, false); return NS_OK; } @@ -6097,7 +5916,7 @@ WebGLContext::TexSubImage2D(JSContext* cx, WebGLenum target, WebGLint level, 4*pixels->GetWidth(), format, type, arr.mData, arr.mLength, -1, - WebGLTexelFormat::RGBA8, false); + WebGLTexelConversions::RGBA8, false); } NS_IMETHODIMP @@ -6122,7 +5941,7 @@ WebGLContext::TexSubImage2D(JSContext* /* unused */, WebGLenum target, nsRefPtr isurf; - int srcFormat; + WebGLTexelFormat srcFormat; rv = DOMElementToImageSurface(elt, getter_AddRefs(isurf), &srcFormat); if (rv.Failed()) return; @@ -6232,52 +6051,52 @@ BaseTypeAndSizeFromUniformType(WebGLenum uType, WebGLenum *baseType, WebGLint *u } -int mozilla::GetWebGLTexelFormat(GLenum format, GLenum type) +WebGLTexelFormat mozilla::GetWebGLTexelFormat(GLenum format, GLenum type) { if (type == LOCAL_GL_UNSIGNED_BYTE) { switch (format) { case LOCAL_GL_RGBA: - return WebGLTexelFormat::RGBA8; + return WebGLTexelConversions::RGBA8; case LOCAL_GL_RGB: - return WebGLTexelFormat::RGB8; + return WebGLTexelConversions::RGB8; case LOCAL_GL_ALPHA: - return WebGLTexelFormat::A8; + return WebGLTexelConversions::A8; case LOCAL_GL_LUMINANCE: - return WebGLTexelFormat::R8; + return WebGLTexelConversions::R8; case LOCAL_GL_LUMINANCE_ALPHA: - return WebGLTexelFormat::RA8; + return WebGLTexelConversions::RA8; default: - NS_ASSERTION(false, "Coding mistake?! Should never reach this point."); - return WebGLTexelFormat::Generic; + NS_ABORT_IF_FALSE(false, "Coding mistake?! Should never reach this point."); + return WebGLTexelConversions::BadFormat; } } else if (type == LOCAL_GL_FLOAT) { // OES_texture_float switch (format) { case LOCAL_GL_RGBA: - return WebGLTexelFormat::RGBA32F; + return WebGLTexelConversions::RGBA32F; case LOCAL_GL_RGB: - return WebGLTexelFormat::RGB32F; + return WebGLTexelConversions::RGB32F; case LOCAL_GL_ALPHA: - return WebGLTexelFormat::A32F; + return WebGLTexelConversions::A32F; case LOCAL_GL_LUMINANCE: - return WebGLTexelFormat::R32F; + return WebGLTexelConversions::R32F; case LOCAL_GL_LUMINANCE_ALPHA: - return WebGLTexelFormat::RA32F; + return WebGLTexelConversions::RA32F; default: - NS_ASSERTION(false, "Coding mistake?! Should never reach this point."); - return WebGLTexelFormat::Generic; + NS_ABORT_IF_FALSE(false, "Coding mistake?! Should never reach this point."); + return WebGLTexelConversions::BadFormat; } } else { switch (type) { case LOCAL_GL_UNSIGNED_SHORT_4_4_4_4: - return WebGLTexelFormat::RGBA4444; + return WebGLTexelConversions::RGBA4444; case LOCAL_GL_UNSIGNED_SHORT_5_5_5_1: - return WebGLTexelFormat::RGBA5551; + return WebGLTexelConversions::RGBA5551; case LOCAL_GL_UNSIGNED_SHORT_5_6_5: - return WebGLTexelFormat::RGB565; + return WebGLTexelConversions::RGB565; default: - NS_ASSERTION(false, "Coding mistake?! Should never reach this point."); - return WebGLTexelFormat::Generic; + NS_ABORT_IF_FALSE(false, "Coding mistake?! Should never reach this point."); + return WebGLTexelConversions::BadFormat; } } } diff --git a/content/canvas/src/WebGLTexelConversions.cpp b/content/canvas/src/WebGLTexelConversions.cpp new file mode 100644 index 00000000000..04036cdaaed --- /dev/null +++ b/content/canvas/src/WebGLTexelConversions.cpp @@ -0,0 +1,382 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "WebGLTexelConversions.h" + +namespace mozilla { + +using namespace WebGLTexelConversions; + +namespace { + +/** @class WebGLImageConverter + * + * This class is just a helper to implement WebGLContext::ConvertImage below. + * + * Design comments: + * + * WebGLContext::ConvertImage has to handle hundreds of format conversion paths. + * It is important to minimize executable code size here. Instead of passing around + * a large number of function parameters hundreds of times, we create a + * WebGLImageConverter object once, storing these parameters, and then we call + * the run() method on it. + */ +class WebGLImageConverter +{ + const size_t mWidth, mHeight; + const void* const mSrcStart; + void* const mDstStart; + const ptrdiff_t mSrcStride, mDstStride; + bool mAlreadyRun; + bool mSuccess; + + /* + * Returns sizeof(texel)/sizeof(type). The point is that we will iterate over + * texels with typed pointers and this value will tell us by how much we need + * to increment these pointers to advance to the next texel. + */ + template + static size_t NumElementsPerTexelForFormat() { + switch (Format) { + case R8: + case A8: + case R32F: + case A32F: + case RGBA5551: + case RGBA4444: + case RGB565: + return 1; + case RA8: + case RA32F: + return 2; + case RGB8: + case RGB32F: + return 3; + case RGBA8: + case BGRA8: + case BGRX8: + case RGBA32F: + return 4; + default: + NS_ABORT_IF_FALSE(false, "Unknown texel format. Coding mistake?"); + return 0; + } + } + + /* + * This is the completely format-specific templatized conversion function, + * that will be instantiated hundreds of times for all different combinations. + * It is important to avoid generating useless code here. In particular, many + * instantiations of this function template will never be called, so we try + * to return immediately in these cases to allow the compiler to avoid generating + * useless code. + */ + template + void run() + { + // check for never-called cases. We early-return to allow the compiler + // to avoid generating this code. It would be tempting to abort() instead, + // as returning early does leave the destination surface with uninitialized + // data, but that would not allow the compiler to avoid generating this code. + // So instead, we return early, so Success() will return false, and the caller + // must check that and abort in that case. See WebGLContext::ConvertImage. + + if (SrcFormat == DstFormat && + PremultiplicationOp == NoPremultiplicationOp) + { + // Should have used a fast exit path earlier, rather than entering this function. + // we explicitly return here to allow the compiler to avoid generating this code + return; + } + + // Only textures uploaded from DOM elements or ImageData can allow DstFormat != SrcFormat. + // DOM elements can only give BGRA8, BGRX8, A8, RGB565 formats. See DOMElementToImageSurface. + // ImageData is always RGBA8. So all other SrcFormat will always satisfy DstFormat==SrcFormat, + // so we can avoid compiling the code for all the unreachable paths. + const bool CanSrcFormatComeFromDOMElementOrImageData + = SrcFormat == BGRA8 || + SrcFormat == BGRX8 || + SrcFormat == A8 || + SrcFormat == RGB565 || + SrcFormat == RGBA8; + if (!CanSrcFormatComeFromDOMElementOrImageData && + SrcFormat != DstFormat) + { + return; + } + + // Likewise, only textures uploaded from DOM elements or ImageData can possibly have to be unpremultiplied. + if (!CanSrcFormatComeFromDOMElementOrImageData && + PremultiplicationOp == Unpremultiply) + { + return; + } + + // there is no point in premultiplication/unpremultiplication + // in the following cases: + // - the source format has no alpha + // - the source format has no color + // - the destination format has no color + if (!HasAlpha(SrcFormat) || + !HasColor(SrcFormat) || + !HasColor(DstFormat)) + { + + if (PremultiplicationOp != NoPremultiplicationOp) + { + return; + } + } + + // end of early return cases. + + NS_ABORT_IF_FALSE(!mAlreadyRun, "converter should be run only once!"); + mAlreadyRun = true; + + // gather some compile-time meta-data about the formats at hand. + + typedef + typename DataTypeForFormat::Type + SrcType; + typedef + typename DataTypeForFormat::Type + DstType; + + const int IntermediateSrcFormat + = IntermediateFormat::Value; + const int IntermediateDstFormat + = IntermediateFormat::Value; + typedef + typename DataTypeForFormat::Type + IntermediateSrcType; + typedef + typename DataTypeForFormat::Type + IntermediateDstType; + + const size_t NumElementsPerSrcTexel = NumElementsPerTexelForFormat(); + const size_t NumElementsPerDstTexel = NumElementsPerTexelForFormat(); + const size_t MaxElementsPerTexel = 4; + NS_ABORT_IF_FALSE(NumElementsPerSrcTexel <= MaxElementsPerTexel, "unhandled format"); + NS_ABORT_IF_FALSE(NumElementsPerDstTexel <= MaxElementsPerTexel, "unhandled format"); + + // we assume that the strides are multiples of the sizeof of respective types. + // this assumption will allow us to iterate over src and dst images using typed + // pointers, e.g. uint8_t* or uint16_t* or float*, instead of untyped pointers. + // So this assumption allows us to write cleaner and safer code, but it might + // not be true forever and if it eventually becomes wrong, we'll have to revert + // to always iterating using uint8_t* pointers regardless of the types at hand. + NS_ABORT_IF_FALSE(mSrcStride % sizeof(SrcType) == 0 && + mDstStride % sizeof(DstType) == 0, + "Unsupported: texture stride is not a multiple of sizeof(type)"); + const ptrdiff_t srcStrideInElements = mSrcStride / sizeof(SrcType); + const ptrdiff_t dstStrideInElements = mDstStride / sizeof(DstType); + + const SrcType *srcRowStart = static_cast(mSrcStart); + DstType *dstRowStart = static_cast(mDstStart); + + // the loop performing the texture format conversion + for (size_t i = 0; i < mHeight; ++i) { + const SrcType *srcRowEnd = srcRowStart + mWidth * NumElementsPerSrcTexel; + const SrcType *srcPtr = srcRowStart; + DstType *dstPtr = dstRowStart; + while (srcPtr != srcRowEnd) { + // convert a single texel. We proceed in 3 steps: unpack the source texel + // so the corresponding interchange format (e.g. unpack RGB565 to RGBA8), + // convert the resulting data type to the destination type (e.g. convert + // from RGBA8 to RGBA32F), and finally pack the destination texel + // (e.g. pack RGBA32F to RGB32F). + IntermediateSrcType unpackedSrc[MaxElementsPerTexel]; + IntermediateDstType unpackedDst[MaxElementsPerTexel]; + + // unpack a src texel to corresponding intermediate src format. + // for example, unpack RGB565 to RGBA8 + unpack(srcPtr, unpackedSrc); + // convert the data type to the destination type, if needed. + // for example, convert RGBA8 to RGBA32F + convertType(unpackedSrc, unpackedDst); + // pack the destination texel. + // for example, pack RGBA32F to RGB32F + pack(unpackedDst, dstPtr); + + srcPtr += NumElementsPerSrcTexel; + dstPtr += NumElementsPerDstTexel; + } + srcRowStart += srcStrideInElements; + dstRowStart += dstStrideInElements; + } + + mSuccess = true; + return; + } + + template + void run(WebGLTexelPremultiplicationOp premultiplicationOp) + { + #define WEBGLIMAGECONVERTER_CASE_PREMULTIPLICATIONOP(PremultiplicationOp) \ + case PremultiplicationOp: \ + return run(); + + switch (premultiplicationOp) { + WEBGLIMAGECONVERTER_CASE_PREMULTIPLICATIONOP(NoPremultiplicationOp) + WEBGLIMAGECONVERTER_CASE_PREMULTIPLICATIONOP(Premultiply) + WEBGLIMAGECONVERTER_CASE_PREMULTIPLICATIONOP(Unpremultiply) + default: + NS_ABORT_IF_FALSE(false, "unhandled case. Coding mistake?"); + } + + #undef WEBGLIMAGECONVERTER_CASE_PREMULTIPLICATIONOP + } + + template + void run(WebGLTexelFormat dstFormat, + WebGLTexelPremultiplicationOp premultiplicationOp) + { + #define WEBGLIMAGECONVERTER_CASE_DSTFORMAT(DstFormat) \ + case DstFormat: \ + return run(premultiplicationOp); + + switch (dstFormat) { + WEBGLIMAGECONVERTER_CASE_DSTFORMAT(R8) + WEBGLIMAGECONVERTER_CASE_DSTFORMAT(A8) + WEBGLIMAGECONVERTER_CASE_DSTFORMAT(R32F) + WEBGLIMAGECONVERTER_CASE_DSTFORMAT(A32F) + WEBGLIMAGECONVERTER_CASE_DSTFORMAT(RA8) + WEBGLIMAGECONVERTER_CASE_DSTFORMAT(RA32F) + WEBGLIMAGECONVERTER_CASE_DSTFORMAT(RGB8) + WEBGLIMAGECONVERTER_CASE_DSTFORMAT(RGB565) + WEBGLIMAGECONVERTER_CASE_DSTFORMAT(RGB32F) + WEBGLIMAGECONVERTER_CASE_DSTFORMAT(RGBA8) + WEBGLIMAGECONVERTER_CASE_DSTFORMAT(RGBA5551) + WEBGLIMAGECONVERTER_CASE_DSTFORMAT(RGBA4444) + WEBGLIMAGECONVERTER_CASE_DSTFORMAT(RGBA32F) + default: + NS_ABORT_IF_FALSE(false, "unhandled case. Coding mistake?"); + } + + #undef WEBGLIMAGECONVERTER_CASE_DSTFORMAT + } + +public: + + void run(WebGLTexelFormat srcFormat, + WebGLTexelFormat dstFormat, + WebGLTexelPremultiplicationOp premultiplicationOp) + { + #define WEBGLIMAGECONVERTER_CASE_SRCFORMAT(SrcFormat) \ + case SrcFormat: \ + return run(dstFormat, premultiplicationOp); + + switch (srcFormat) { + WEBGLIMAGECONVERTER_CASE_SRCFORMAT(R8) + WEBGLIMAGECONVERTER_CASE_SRCFORMAT(A8) + WEBGLIMAGECONVERTER_CASE_SRCFORMAT(R32F) + WEBGLIMAGECONVERTER_CASE_SRCFORMAT(A32F) + WEBGLIMAGECONVERTER_CASE_SRCFORMAT(RA8) + WEBGLIMAGECONVERTER_CASE_SRCFORMAT(RA32F) + WEBGLIMAGECONVERTER_CASE_SRCFORMAT(RGB8) + WEBGLIMAGECONVERTER_CASE_SRCFORMAT(BGRX8) // source format only + WEBGLIMAGECONVERTER_CASE_SRCFORMAT(RGB565) + WEBGLIMAGECONVERTER_CASE_SRCFORMAT(RGB32F) + WEBGLIMAGECONVERTER_CASE_SRCFORMAT(RGBA8) + WEBGLIMAGECONVERTER_CASE_SRCFORMAT(BGRA8) + WEBGLIMAGECONVERTER_CASE_SRCFORMAT(RGBA5551) + WEBGLIMAGECONVERTER_CASE_SRCFORMAT(RGBA4444) + WEBGLIMAGECONVERTER_CASE_SRCFORMAT(RGBA32F) + default: + NS_ABORT_IF_FALSE(false, "unhandled case. Coding mistake?"); + } + + #undef WEBGLIMAGECONVERTER_CASE_SRCFORMAT + } + + WebGLImageConverter(size_t width, size_t height, + const void* srcStart, void* dstStart, + ptrdiff_t srcStride, ptrdiff_t dstStride) + : mWidth(width), mHeight(height), + mSrcStart(srcStart), mDstStart(dstStart), + mSrcStride(srcStride), mDstStride(dstStride), + mAlreadyRun(false), mSuccess(false) + {} + + bool Success() const { + return mSuccess; + } +}; + +} // end anonymous namespace + +void +WebGLContext::ConvertImage(size_t width, size_t height, size_t srcStride, size_t dstStride, + const uint8_t* src, uint8_t *dst, + WebGLTexelFormat srcFormat, bool srcPremultiplied, + WebGLTexelFormat dstFormat, bool dstPremultiplied, + size_t dstTexelSize) +{ + if (width <= 0 || height <= 0) + return; + + const bool FormatsRequireNoPremultiplicationOp = + !HasAlpha(srcFormat) || + !HasColor(srcFormat) || + !HasColor(dstFormat); + + if (srcFormat == dstFormat && + (FormatsRequireNoPremultiplicationOp || srcPremultiplied == dstPremultiplied)) + { + // fast exit path: we just have to memcpy all the rows. + // + // The case where absolutely nothing needs to be done is supposed to have + // been handled earlier (in TexImage2D_base, etc). + // + // So the case we're handling here is when even though no format conversion is needed, + // we still might have to flip vertically and/or to adjust to a different stride. + + NS_ABORT_IF_FALSE(mPixelStoreFlipY || srcStride != dstStride, "Performance trap -- should handle this case earlier, to avoid memcpy"); + + size_t row_size = width * dstTexelSize; // doesn't matter, src and dst formats agree + const uint8_t* ptr = src; + const uint8_t* src_end = src + height * srcStride; + + uint8_t* dst_row = mPixelStoreFlipY + ? dst + (height-1) * dstStride + : dst; + ptrdiff_t dstStrideSigned(dstStride); + ptrdiff_t dst_delta = mPixelStoreFlipY ? -dstStrideSigned : dstStrideSigned; + + while(ptr != src_end) { + memcpy(dst_row, ptr, row_size); + ptr += srcStride; + dst_row += dst_delta; + } + return; + } + + uint8_t* dstStart = dst; + ptrdiff_t signedDstStride = dstStride; + if (mPixelStoreFlipY) { + dstStart = dst + (height - 1) * dstStride; + signedDstStride = -dstStride; + } + + WebGLImageConverter converter(width, height, src, dstStart, srcStride, signedDstStride); + + const WebGLTexelPremultiplicationOp premultiplicationOp + = FormatsRequireNoPremultiplicationOp ? NoPremultiplicationOp + : (!srcPremultiplied && dstPremultiplied) ? Premultiply + : (srcPremultiplied && !dstPremultiplied) ? Unpremultiply + : NoPremultiplicationOp; + + converter.run(srcFormat, dstFormat, premultiplicationOp); + + if (!converter.Success()) { + // the dst image may be left uninitialized, so we better not try to + // continue even in release builds. This should never happen anyway, + // and would be a bug in our code. + NS_RUNTIMEABORT("programming mistake in WebGL texture conversions"); + } +} + +} // end namespace mozilla diff --git a/content/canvas/src/WebGLTexelConversions.h b/content/canvas/src/WebGLTexelConversions.h index 2ef798dfa71..015b8ad2197 100644 --- a/content/canvas/src/WebGLTexelConversions.h +++ b/content/canvas/src/WebGLTexelConversions.h @@ -25,17 +25,6 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -// the pixel conversions code here is originally from this file: -// http://trac.webkit.org/browser/trunk/WebCore/platform/graphics/GraphicsContext3D.cpp - -// Keep as much as possible unchanged to ease sharing code with the WebKit guys. -// Changes: -// * added BGR8 path, we need it in Mozilla to load textures from DOMElements -// * enclosing in a namespace WebGLTexelConversions to make it clear it is, in profilers and in symbol table dumps -// * added __restrict keywords. Although non-standard, this is very well supported across all compilers -// that I know of (GCC/LLVM/MSC/ICC/XLC...) -// * optimized scaleFactor computation in Unmultiply functions (1 div instead of 2) - #ifndef WEBGLTEXELCONVERSIONS_H_ #define WEBGLTEXELCONVERSIONS_H_ @@ -44,6 +33,7 @@ #endif #include "WebGLContext.h" +#include "mozilla/StandardInteger.h" #if defined _MSC_VER #define FORCE_INLINE __forceinline @@ -57,410 +47,647 @@ namespace mozilla { namespace WebGLTexelConversions { +enum WebGLTexelPremultiplicationOp +{ + NoPremultiplicationOp, + Premultiply, + Unpremultiply +}; + +template +struct IsFloatFormat +{ + static const bool Value = + Format == RGBA32F || + Format == RGB32F || + Format == RA32F || + Format == R32F || + Format == A32F; +}; + +template +struct Is16bppFormat +{ + static const bool Value = + Format == RGBA4444 || + Format == RGBA5551 || + Format == RGB565; +}; + +template::Value, + bool Is16bpp = Is16bppFormat::Value> +struct DataTypeForFormat +{ + typedef uint8_t Type; +}; + +template +struct DataTypeForFormat +{ + typedef float Type; +}; + +template +struct DataTypeForFormat +{ + typedef uint16_t Type; +}; + +template +struct IntermediateFormat +{ + static const int Value = IsFloatFormat::Value ? RGBA32F : RGBA8; +}; + +inline size_t TexelBytesForFormat(int format) { + switch (format) { + case WebGLTexelConversions::R8: + case WebGLTexelConversions::A8: + return 1; + case WebGLTexelConversions::RA8: + case WebGLTexelConversions::RGBA5551: + case WebGLTexelConversions::RGBA4444: + case WebGLTexelConversions::RGB565: + return 2; + case WebGLTexelConversions::RGB8: + return 3; + case WebGLTexelConversions::RGBA8: + case WebGLTexelConversions::BGRA8: + case WebGLTexelConversions::BGRX8: + case WebGLTexelConversions::R32F: + case WebGLTexelConversions::A32F: + return 4; + case WebGLTexelConversions::RA32F: + return 8; + case WebGLTexelConversions::RGB32F: + return 12; + case WebGLTexelConversions::RGBA32F: + return 16; + default: + NS_ABORT_IF_FALSE(false, "Unknown texel format. Coding mistake?"); + return 0; + } +} + +FORCE_INLINE bool HasAlpha(int format) { + return format == A8 || + format == A32F || + format == RA8 || + format == RA32F || + format == RGBA8 || + format == BGRA8 || + format == RGBA32F || + format == RGBA4444 || + format == RGBA5551; +} + +FORCE_INLINE bool HasColor(int format) { + return format == R8 || + format == R32F || + format == RA8 || + format == RA32F || + format == RGB8 || + format == BGRX8 || + format == RGB565 || + format == RGB32F || + format == RGBA8 || + format == BGRA8 || + format == RGBA32F || + format == RGBA4444 || + format == RGBA5551; +} + + /****** BEGIN CODE SHARED WITH WEBKIT ******/ +// the pack/unpack functions here are originally from this file: +// http://trac.webkit.org/browser/trunk/WebCore/platform/graphics/GraphicsContext3D.cpp + //---------------------------------------------------------------------- // Pixel unpacking routines. -FORCE_INLINE void unpackRGBA8ToRGBA8(const uint8_t* __restrict source, uint8_t* __restrict destination) +template +FORCE_INLINE void +unpack(const typename DataTypeForFormat::Type* __restrict src, + typename DataTypeForFormat::Value>::Type* __restrict dst) { - destination[0] = source[0]; - destination[1] = source[1]; - destination[2] = source[2]; - destination[3] = source[3]; + NS_ABORT_IF_FALSE(false, "Unimplemented texture format conversion"); } -FORCE_INLINE void unpackRGB8ToRGBA8(const uint8_t* __restrict source, uint8_t* __restrict destination) +template<> FORCE_INLINE void +unpack(const uint8_t* __restrict src, uint8_t* __restrict dst) { - destination[0] = source[0]; - destination[1] = source[1]; - destination[2] = source[2]; - destination[3] = 0xFF; + dst[0] = src[0]; + dst[1] = src[1]; + dst[2] = src[2]; + dst[3] = src[3]; } -FORCE_INLINE void unpackBGRA8ToRGBA8(const uint8_t* __restrict source, uint8_t* __restrict destination) +template<> FORCE_INLINE void +unpack(const uint8_t* __restrict src, uint8_t* __restrict dst) { - destination[0] = source[2]; - destination[1] = source[1]; - destination[2] = source[0]; - destination[3] = source[3]; + dst[0] = src[0]; + dst[1] = src[1]; + dst[2] = src[2]; + dst[3] = 0xFF; } -FORCE_INLINE void unpackBGR8ToRGBA8(const uint8_t* __restrict source, uint8_t* __restrict destination) +template<> FORCE_INLINE void +unpack(const uint8_t* __restrict src, uint8_t* __restrict dst) { - destination[0] = source[2]; - destination[1] = source[1]; - destination[2] = source[0]; - destination[3] = 0xFF; + dst[0] = src[2]; + dst[1] = src[1]; + dst[2] = src[0]; + dst[3] = src[3]; } -FORCE_INLINE void unpackRGBA5551ToRGBA8(const uint16_t* __restrict source, uint8_t* __restrict destination) +template<> FORCE_INLINE void +unpack(const uint8_t* __restrict src, uint8_t* __restrict dst) { - uint16_t packedValue = source[0]; - uint8_t r = packedValue >> 11; + dst[0] = src[2]; + dst[1] = src[1]; + dst[2] = src[0]; + dst[3] = 0xFF; +} + +template<> FORCE_INLINE void +unpack(const uint16_t* __restrict src, uint8_t* __restrict dst) +{ + uint16_t packedValue = src[0]; + uint8_t r = (packedValue >> 11) & 0x1F; uint8_t g = (packedValue >> 6) & 0x1F; uint8_t b = (packedValue >> 1) & 0x1F; - destination[0] = (r << 3) | (r & 0x7); - destination[1] = (g << 3) | (g & 0x7); - destination[2] = (b << 3) | (b & 0x7); - destination[3] = (packedValue & 0x1) ? 0xFF : 0x0; + dst[0] = (r << 3) | (r & 0x7); + dst[1] = (g << 3) | (g & 0x7); + dst[2] = (b << 3) | (b & 0x7); + dst[3] = (packedValue & 0x1) ? 0xFF : 0; } -FORCE_INLINE void unpackRGBA4444ToRGBA8(const uint16_t* __restrict source, uint8_t* __restrict destination) +template<> FORCE_INLINE void +unpack(const uint16_t* __restrict src, uint8_t* __restrict dst) { - uint16_t packedValue = source[0]; - uint8_t r = packedValue >> 12; + uint16_t packedValue = src[0]; + uint8_t r = (packedValue >> 12) & 0x0F; uint8_t g = (packedValue >> 8) & 0x0F; uint8_t b = (packedValue >> 4) & 0x0F; uint8_t a = packedValue & 0x0F; - destination[0] = r << 4 | r; - destination[1] = g << 4 | g; - destination[2] = b << 4 | b; - destination[3] = a << 4 | a; + dst[0] = (r << 4) | r; + dst[1] = (g << 4) | g; + dst[2] = (b << 4) | b; + dst[3] = (a << 4) | a; } -FORCE_INLINE void unpackRGB565ToRGBA8(const uint16_t* __restrict source, uint8_t* __restrict destination) +template<> FORCE_INLINE void +unpack(const uint16_t* __restrict src, uint8_t* __restrict dst) { - uint16_t packedValue = source[0]; - uint8_t r = packedValue >> 11; + uint16_t packedValue = src[0]; + uint8_t r = (packedValue >> 11) & 0x1F; uint8_t g = (packedValue >> 5) & 0x3F; uint8_t b = packedValue & 0x1F; - destination[0] = (r << 3) | (r & 0x7); - destination[1] = (g << 2) | (g & 0x3); - destination[2] = (b << 3) | (b & 0x7); - destination[3] = 0xFF; + dst[0] = (r << 3) | (r & 0x7); + dst[1] = (g << 2) | (g & 0x3); + dst[2] = (b << 3) | (b & 0x7); + dst[3] = 0xFF; } -FORCE_INLINE void unpackR8ToRGBA8(const uint8_t* __restrict source, uint8_t* __restrict destination) +template<> FORCE_INLINE void +unpack(const uint8_t* __restrict src, uint8_t* __restrict dst) { - destination[0] = source[0]; - destination[1] = source[0]; - destination[2] = source[0]; - destination[3] = 0xFF; + dst[0] = src[0]; + dst[1] = src[0]; + dst[2] = src[0]; + dst[3] = 0xFF; } -FORCE_INLINE void unpackRA8ToRGBA8(const uint8_t* __restrict source, uint8_t* __restrict destination) +template<> FORCE_INLINE void +unpack(const uint8_t* __restrict src, uint8_t* __restrict dst) { - destination[0] = source[0]; - destination[1] = source[0]; - destination[2] = source[0]; - destination[3] = source[1]; + dst[0] = src[0]; + dst[1] = src[0]; + dst[2] = src[0]; + dst[3] = src[1]; } -FORCE_INLINE void unpackA8ToRGBA8(const uint8_t* __restrict source, uint8_t* __restrict destination) +template<> FORCE_INLINE void +unpack(const uint8_t* __restrict src, uint8_t* __restrict dst) { - destination[0] = 0x0; - destination[1] = 0x0; - destination[2] = 0x0; - destination[3] = source[0]; + dst[0] = 0; + dst[1] = 0; + dst[2] = 0; + dst[3] = src[0]; } -FORCE_INLINE void unpackRGB32FToRGBA32F(const float* __restrict source, float* __restrict destination) +template<> FORCE_INLINE void +unpack(const float* __restrict src, float* __restrict dst) { - destination[0] = source[0]; - destination[1] = source[1]; - destination[2] = source[2]; - destination[3] = 1; + dst[0] = src[0]; + dst[1] = src[1]; + dst[2] = src[2]; + dst[3] = src[3]; } -FORCE_INLINE void unpackR32FToRGBA32F(const float* __restrict source, float* __restrict destination) +template<> FORCE_INLINE void +unpack(const float* __restrict src, float* __restrict dst) { - destination[0] = source[0]; - destination[1] = source[0]; - destination[2] = source[0]; - destination[3] = 1; + dst[0] = src[0]; + dst[1] = src[1]; + dst[2] = src[2]; + dst[3] = 1.0f; } -FORCE_INLINE void unpackRA32FToRGBA32F(const float* __restrict source, float* __restrict destination) +template<> FORCE_INLINE void +unpack(const float* __restrict src, float* __restrict dst) { - destination[0] = source[0]; - destination[1] = source[0]; - destination[2] = source[0]; - destination[3] = source[1]; + dst[0] = src[0]; + dst[1] = src[0]; + dst[2] = src[0]; + dst[3] = 1.0f; } -FORCE_INLINE void unpackA32FToRGBA32F(const float* __restrict source, float* __restrict destination) +template<> FORCE_INLINE void +unpack(const float* __restrict src, float* __restrict dst) { - destination[0] = 0; - destination[1] = 0; - destination[2] = 0; - destination[3] = source[0]; + dst[0] = src[0]; + dst[1] = src[0]; + dst[2] = src[0]; + dst[3] = src[1]; +} + +template<> FORCE_INLINE void +unpack(const float* __restrict src, float* __restrict dst) +{ + dst[0] = 0; + dst[1] = 0; + dst[2] = 0; + dst[3] = src[0]; } //---------------------------------------------------------------------- // Pixel packing routines. // -FORCE_INLINE void packRGBA8ToA8(const uint8_t* __restrict source, uint8_t* __restrict destination) +template +FORCE_INLINE void +pack(const typename DataTypeForFormat::Value>::Type* __restrict src, + typename DataTypeForFormat::Type* __restrict dst) { - destination[0] = source[3]; + NS_ABORT_IF_FALSE(false, "Unimplemented texture format conversion"); } -FORCE_INLINE void packRGBA8ToR8(const uint8_t* __restrict source, uint8_t* __restrict destination) +template<> FORCE_INLINE void +pack(const uint8_t* __restrict src, uint8_t* __restrict dst) { - destination[0] = source[0]; + dst[0] = src[3]; } -FORCE_INLINE void packRGBA8ToR8Premultiply(const uint8_t* __restrict source, uint8_t* __restrict destination) +template<> FORCE_INLINE void +pack(const uint8_t* __restrict src, uint8_t* __restrict dst) { - float scaleFactor = source[3] / 255.0f; - uint8_t sourceR = static_cast(static_cast(source[0]) * scaleFactor); - destination[0] = sourceR; + dst[0] = src[3]; +} + +template<> FORCE_INLINE void +pack(const uint8_t* __restrict src, uint8_t* __restrict dst) +{ + dst[0] = src[3]; +} + +template<> FORCE_INLINE void +pack(const uint8_t* __restrict src, uint8_t* __restrict dst) +{ + dst[0] = src[0]; +} + +template<> FORCE_INLINE void +pack(const uint8_t* __restrict src, uint8_t* __restrict dst) +{ + float scaleFactor = src[3] / 255.0f; + uint8_t srcR = static_cast(src[0] * scaleFactor); + dst[0] = srcR; +} + +template<> FORCE_INLINE void +pack(const uint8_t* __restrict src, uint8_t* __restrict dst) +{ + float scaleFactor = src[3] ? 255.0f / src[3] : 1.0f; + uint8_t srcR = static_cast(src[0] * scaleFactor); + dst[0] = srcR; +} + +template<> FORCE_INLINE void +pack(const uint8_t* __restrict src, uint8_t* __restrict dst) +{ + dst[0] = src[0]; + dst[1] = src[3]; +} + +template<> FORCE_INLINE void +pack(const uint8_t* __restrict src, uint8_t* __restrict dst) +{ + float scaleFactor = src[3] / 255.0f; + uint8_t srcR = static_cast(src[0] * scaleFactor); + dst[0] = srcR; + dst[1] = src[3]; } // FIXME: this routine is lossy and must be removed. -FORCE_INLINE void packRGBA8ToR8Unmultiply(const uint8_t* __restrict source, uint8_t* __restrict destination) +template<> FORCE_INLINE void +pack(const uint8_t* __restrict src, uint8_t* __restrict dst) { - float scaleFactor = source[3] ? 255.0f / source[3] : 1.0f; - uint8_t sourceR = static_cast(static_cast(source[0]) * scaleFactor); - destination[0] = sourceR; + float scaleFactor = src[3] ? 255.0f / src[3] : 1.0f; + uint8_t srcR = static_cast(src[0] * scaleFactor); + dst[0] = srcR; + dst[1] = src[3]; } -FORCE_INLINE void packRGBA8ToRA8(const uint8_t* __restrict source, uint8_t* __restrict destination) +template<> FORCE_INLINE void +pack(const uint8_t* __restrict src, uint8_t* __restrict dst) { - destination[0] = source[0]; - destination[1] = source[3]; + dst[0] = src[0]; + dst[1] = src[1]; + dst[2] = src[2]; } -FORCE_INLINE void packRGBA8ToRA8Premultiply(const uint8_t* __restrict source, uint8_t* __restrict destination) +template<> FORCE_INLINE void +pack(const uint8_t* __restrict src, uint8_t* __restrict dst) { - float scaleFactor = source[3] / 255.0f; - uint8_t sourceR = static_cast(static_cast(source[0]) * scaleFactor); - destination[0] = sourceR; - destination[1] = source[3]; + float scaleFactor = src[3] / 255.0f; + uint8_t srcR = static_cast(src[0] * scaleFactor); + uint8_t srcG = static_cast(src[1] * scaleFactor); + uint8_t srcB = static_cast(src[2] * scaleFactor); + dst[0] = srcR; + dst[1] = srcG; + dst[2] = srcB; +} + +template<> FORCE_INLINE void +pack(const uint8_t* __restrict src, uint8_t* __restrict dst) +{ + float scaleFactor = src[3] ? 255.0f / src[3] : 1.0f; + uint8_t srcR = static_cast(src[0] * scaleFactor); + uint8_t srcG = static_cast(src[1] * scaleFactor); + uint8_t srcB = static_cast(src[2] * scaleFactor); + dst[0] = srcR; + dst[1] = srcG; + dst[2] = srcB; +} + +template<> FORCE_INLINE void +pack(const uint8_t* __restrict src, uint8_t* __restrict dst) +{ + dst[0] = src[0]; + dst[1] = src[1]; + dst[2] = src[2]; + dst[3] = src[3]; +} + +template<> FORCE_INLINE void +pack(const uint8_t* __restrict src, uint8_t* __restrict dst) +{ + float scaleFactor = src[3] / 255.0f; + uint8_t srcR = static_cast(src[0] * scaleFactor); + uint8_t srcG = static_cast(src[1] * scaleFactor); + uint8_t srcB = static_cast(src[2] * scaleFactor); + dst[0] = srcR; + dst[1] = srcG; + dst[2] = srcB; + dst[3] = src[3]; } // FIXME: this routine is lossy and must be removed. -FORCE_INLINE void packRGBA8ToRA8Unmultiply(const uint8_t* __restrict source, uint8_t* __restrict destination) +template<> FORCE_INLINE void +pack(const uint8_t* __restrict src, uint8_t* __restrict dst) { - float scaleFactor = source[3] ? 255.0f / source[3] : 1.0f; - uint8_t sourceR = static_cast(static_cast(source[0]) * scaleFactor); - destination[0] = sourceR; - destination[1] = source[3]; + float scaleFactor = src[3] ? 255.0f / src[3] : 1.0f; + uint8_t srcR = static_cast(src[0] * scaleFactor); + uint8_t srcG = static_cast(src[1] * scaleFactor); + uint8_t srcB = static_cast(src[2] * scaleFactor); + dst[0] = srcR; + dst[1] = srcG; + dst[2] = srcB; + dst[3] = src[3]; } -FORCE_INLINE void packRGBA8ToRGB8(const uint8_t* __restrict source, uint8_t* __restrict destination) +template<> FORCE_INLINE void +pack(const uint8_t* __restrict src, uint16_t* __restrict dst) { - destination[0] = source[0]; - destination[1] = source[1]; - destination[2] = source[2]; + *dst = ( ((src[0] & 0xF0) << 8) + | ((src[1] & 0xF0) << 4) + | (src[2] & 0xF0) + | (src[3] >> 4) ); } -FORCE_INLINE void packRGBA8ToRGB8Premultiply(const uint8_t* __restrict source, uint8_t* __restrict destination) +template<> FORCE_INLINE void +pack(const uint8_t* __restrict src, uint16_t* __restrict dst) { - float scaleFactor = source[3] / 255.0f; - uint8_t sourceR = static_cast(static_cast(source[0]) * scaleFactor); - uint8_t sourceG = static_cast(static_cast(source[1]) * scaleFactor); - uint8_t sourceB = static_cast(static_cast(source[2]) * scaleFactor); - destination[0] = sourceR; - destination[1] = sourceG; - destination[2] = sourceB; + float scaleFactor = src[3] / 255.0f; + uint8_t srcR = static_cast(src[0] * scaleFactor); + uint8_t srcG = static_cast(src[1] * scaleFactor); + uint8_t srcB = static_cast(src[2] * scaleFactor); + *dst = ( ((srcR & 0xF0) << 8) + | ((srcG & 0xF0) << 4) + | (srcB & 0xF0) + | (src[3] >> 4)); } // FIXME: this routine is lossy and must be removed. -FORCE_INLINE void packRGBA8ToRGB8Unmultiply(const uint8_t* __restrict source, uint8_t* __restrict destination) +template<> FORCE_INLINE void +pack(const uint8_t* __restrict src, uint16_t* __restrict dst) { - float scaleFactor = source[3] ? 255.0f / source[3] : 1.0f; - uint8_t sourceR = static_cast(static_cast(source[0]) * scaleFactor); - uint8_t sourceG = static_cast(static_cast(source[1]) * scaleFactor); - uint8_t sourceB = static_cast(static_cast(source[2]) * scaleFactor); - destination[0] = sourceR; - destination[1] = sourceG; - destination[2] = sourceB; + float scaleFactor = src[3] ? 255.0f / src[3] : 1.0f; + uint8_t srcR = static_cast(src[0] * scaleFactor); + uint8_t srcG = static_cast(src[1] * scaleFactor); + uint8_t srcB = static_cast(src[2] * scaleFactor); + *dst = ( ((srcR & 0xF0) << 8) + | ((srcG & 0xF0) << 4) + | (srcB & 0xF0) + | (src[3] >> 4)); } -// This is only used when the source format is different than kSourceFormatRGBA8. -FORCE_INLINE void packRGBA8ToRGBA8(const uint8_t* __restrict source, uint8_t* __restrict destination) +template<> FORCE_INLINE void +pack(const uint8_t* __restrict src, uint16_t* __restrict dst) { - destination[0] = source[0]; - destination[1] = source[1]; - destination[2] = source[2]; - destination[3] = source[3]; + *dst = ( ((src[0] & 0xF8) << 8) + | ((src[1] & 0xF8) << 3) + | ((src[2] & 0xF8) >> 2) + | (src[3] >> 7)); } -FORCE_INLINE void packRGBA8ToRGBA8Premultiply(const uint8_t* __restrict source, uint8_t* __restrict destination) +template<> FORCE_INLINE void +pack(const uint8_t* __restrict src, uint16_t* __restrict dst) { - float scaleFactor = source[3] / 255.0f; - uint8_t sourceR = static_cast(static_cast(source[0]) * scaleFactor); - uint8_t sourceG = static_cast(static_cast(source[1]) * scaleFactor); - uint8_t sourceB = static_cast(static_cast(source[2]) * scaleFactor); - destination[0] = sourceR; - destination[1] = sourceG; - destination[2] = sourceB; - destination[3] = source[3]; + float scaleFactor = src[3] / 255.0f; + uint8_t srcR = static_cast(src[0] * scaleFactor); + uint8_t srcG = static_cast(src[1] * scaleFactor); + uint8_t srcB = static_cast(src[2] * scaleFactor); + *dst = ( ((srcR & 0xF8) << 8) + | ((srcG & 0xF8) << 3) + | ((srcB & 0xF8) >> 2) + | (src[3] >> 7)); } // FIXME: this routine is lossy and must be removed. -FORCE_INLINE void packRGBA8ToRGBA8Unmultiply(const uint8_t* __restrict source, uint8_t* __restrict destination) +template<> FORCE_INLINE void +pack(const uint8_t* __restrict src, uint16_t* __restrict dst) { - float scaleFactor = source[3] ? 255.0f / source[3] : 1.0f; - uint8_t sourceR = static_cast(static_cast(source[0]) * scaleFactor); - uint8_t sourceG = static_cast(static_cast(source[1]) * scaleFactor); - uint8_t sourceB = static_cast(static_cast(source[2]) * scaleFactor); - destination[0] = sourceR; - destination[1] = sourceG; - destination[2] = sourceB; - destination[3] = source[3]; + float scaleFactor = src[3] ? 255.0f / src[3] : 1.0f; + uint8_t srcR = static_cast(src[0] * scaleFactor); + uint8_t srcG = static_cast(src[1] * scaleFactor); + uint8_t srcB = static_cast(src[2] * scaleFactor); + *dst = ( ((srcR & 0xF8) << 8) + | ((srcG & 0xF8) << 3) + | ((srcB & 0xF8) >> 2) + | (src[3] >> 7)); } -FORCE_INLINE void packRGBA8ToUnsignedShort4444(const uint8_t* __restrict source, uint16_t* __restrict destination) +template<> FORCE_INLINE void +pack(const uint8_t* __restrict src, uint16_t* __restrict dst) { - *destination = (((source[0] & 0xF0) << 8) - | ((source[1] & 0xF0) << 4) - | (source[2] & 0xF0) - | (source[3] >> 4)); + *dst = ( ((src[0] & 0xF8) << 8) + | ((src[1] & 0xFC) << 3) + | ((src[2] & 0xF8) >> 3)); } -FORCE_INLINE void packRGBA8ToUnsignedShort4444Premultiply(const uint8_t* __restrict source, uint16_t* __restrict destination) +template<> FORCE_INLINE void +pack(const uint8_t* __restrict src, uint16_t* __restrict dst) { - float scaleFactor = source[3] / 255.0f; - uint8_t sourceR = static_cast(static_cast(source[0]) * scaleFactor); - uint8_t sourceG = static_cast(static_cast(source[1]) * scaleFactor); - uint8_t sourceB = static_cast(static_cast(source[2]) * scaleFactor); - *destination = (((sourceR & 0xF0) << 8) - | ((sourceG & 0xF0) << 4) - | (sourceB & 0xF0) - | (source[3] >> 4)); + float scaleFactor = src[3] / 255.0f; + uint8_t srcR = static_cast(src[0] * scaleFactor); + uint8_t srcG = static_cast(src[1] * scaleFactor); + uint8_t srcB = static_cast(src[2] * scaleFactor); + *dst = ( ((srcR & 0xF8) << 8) + | ((srcG & 0xFC) << 3) + | ((srcB & 0xF8) >> 3)); } // FIXME: this routine is lossy and must be removed. -FORCE_INLINE void packRGBA8ToUnsignedShort4444Unmultiply(const uint8_t* __restrict source, uint16_t* __restrict destination) +template<> FORCE_INLINE void +pack(const uint8_t* __restrict src, uint16_t* __restrict dst) { - float scaleFactor = source[3] ? 255.0f / source[3] : 1.0f; - uint8_t sourceR = static_cast(static_cast(source[0]) * scaleFactor); - uint8_t sourceG = static_cast(static_cast(source[1]) * scaleFactor); - uint8_t sourceB = static_cast(static_cast(source[2]) * scaleFactor); - *destination = (((sourceR & 0xF0) << 8) - | ((sourceG & 0xF0) << 4) - | (sourceB & 0xF0) - | (source[3] >> 4)); + float scaleFactor = src[3] ? 255.0f / src[3] : 1.0f; + uint8_t srcR = static_cast(src[0] * scaleFactor); + uint8_t srcG = static_cast(src[1] * scaleFactor); + uint8_t srcB = static_cast(src[2] * scaleFactor); + *dst = ( ((srcR & 0xF8) << 8) + | ((srcG & 0xFC) << 3) + | ((srcB & 0xF8) >> 3)); } -FORCE_INLINE void packRGBA8ToUnsignedShort5551(const uint8_t* __restrict source, uint16_t* __restrict destination) +template<> FORCE_INLINE void +pack(const float* __restrict src, float* __restrict dst) { - *destination = (((source[0] & 0xF8) << 8) - | ((source[1] & 0xF8) << 3) - | ((source[2] & 0xF8) >> 2) - | (source[3] >> 7)); + dst[0] = src[0]; + dst[1] = src[1]; + dst[2] = src[2]; } -FORCE_INLINE void packRGBA8ToUnsignedShort5551Premultiply(const uint8_t* __restrict source, uint16_t* __restrict destination) +template<> FORCE_INLINE void +pack(const float* __restrict src, float* __restrict dst) { - float scaleFactor = source[3] / 255.0f; - uint8_t sourceR = static_cast(static_cast(source[0]) * scaleFactor); - uint8_t sourceG = static_cast(static_cast(source[1]) * scaleFactor); - uint8_t sourceB = static_cast(static_cast(source[2]) * scaleFactor); - *destination = (((sourceR & 0xF8) << 8) - | ((sourceG & 0xF8) << 3) - | ((sourceB & 0xF8) >> 2) - | (source[3] >> 7)); + float scaleFactor = src[3]; + dst[0] = src[0] * scaleFactor; + dst[1] = src[1] * scaleFactor; + dst[2] = src[2] * scaleFactor; } -// FIXME: this routine is lossy and must be removed. -FORCE_INLINE void packRGBA8ToUnsignedShort5551Unmultiply(const uint8_t* __restrict source, uint16_t* __restrict destination) +template<> FORCE_INLINE void +pack(const float* __restrict src, float* __restrict dst) { - float scaleFactor = source[3] ? 255.0f / source[3] : 1.0f; - uint8_t sourceR = static_cast(static_cast(source[0]) * scaleFactor); - uint8_t sourceG = static_cast(static_cast(source[1]) * scaleFactor); - uint8_t sourceB = static_cast(static_cast(source[2]) * scaleFactor); - *destination = (((sourceR & 0xF8) << 8) - | ((sourceG & 0xF8) << 3) - | ((sourceB & 0xF8) >> 2) - | (source[3] >> 7)); + dst[0] = src[0]; + dst[1] = src[1]; + dst[2] = src[2]; + dst[3] = src[3]; } -FORCE_INLINE void packRGBA8ToUnsignedShort565(const uint8_t* __restrict source, uint16_t* __restrict destination) +template<> FORCE_INLINE void +pack(const float* __restrict src, float* __restrict dst) { - *destination = (((source[0] & 0xF8) << 8) - | ((source[1] & 0xFC) << 3) - | ((source[2] & 0xF8) >> 3)); + float scaleFactor = src[3]; + dst[0] = src[0] * scaleFactor; + dst[1] = src[1] * scaleFactor; + dst[2] = src[2] * scaleFactor; + dst[3] = src[3]; } -FORCE_INLINE void packRGBA8ToUnsignedShort565Premultiply(const uint8_t* __restrict source, uint16_t* __restrict destination) +template<> FORCE_INLINE void +pack(const float* __restrict src, float* __restrict dst) { - float scaleFactor = source[3] / 255.0f; - uint8_t sourceR = static_cast(static_cast(source[0]) * scaleFactor); - uint8_t sourceG = static_cast(static_cast(source[1]) * scaleFactor); - uint8_t sourceB = static_cast(static_cast(source[2]) * scaleFactor); - *destination = (((sourceR & 0xF8) << 8) - | ((sourceG & 0xFC) << 3) - | ((sourceB & 0xF8) >> 3)); + dst[0] = src[3]; } -// FIXME: this routine is lossy and must be removed. -FORCE_INLINE void packRGBA8ToUnsignedShort565Unmultiply(const uint8_t* __restrict source, uint16_t* __restrict destination) +template<> FORCE_INLINE void +pack(const float* __restrict src, float* __restrict dst) { - float scaleFactor = source[3] ? 255.0f / source[3] : 1.0f; - uint8_t sourceR = static_cast(static_cast(source[0]) * scaleFactor); - uint8_t sourceG = static_cast(static_cast(source[1]) * scaleFactor); - uint8_t sourceB = static_cast(static_cast(source[2]) * scaleFactor); - *destination = (((sourceR & 0xF8) << 8) - | ((sourceG & 0xFC) << 3) - | ((sourceB & 0xF8) >> 3)); + dst[0] = src[3]; } -FORCE_INLINE void packRGBA32FToRGB32F(const float* __restrict source, float* __restrict destination) +template<> FORCE_INLINE void +pack(const float* __restrict src, float* __restrict dst) { - destination[0] = source[0]; - destination[1] = source[1]; - destination[2] = source[2]; + dst[0] = src[0]; } -FORCE_INLINE void packRGBA32FToRGB32FPremultiply(const float* __restrict source, float* __restrict destination) +template<> FORCE_INLINE void +pack(const float* __restrict src, float* __restrict dst) { - float scaleFactor = source[3]; - destination[0] = source[0] * scaleFactor; - destination[1] = source[1] * scaleFactor; - destination[2] = source[2] * scaleFactor; + float scaleFactor = src[3]; + dst[0] = src[0] * scaleFactor; } -FORCE_INLINE void packRGBA32FToRGBA32FPremultiply(const float* __restrict source, float* __restrict destination) +template<> FORCE_INLINE void +pack(const float* __restrict src, float* __restrict dst) { - float scaleFactor = source[3]; - destination[0] = source[0] * scaleFactor; - destination[1] = source[1] * scaleFactor; - destination[2] = source[2] * scaleFactor; - destination[3] = source[3]; + dst[0] = src[0]; + dst[1] = src[3]; } -FORCE_INLINE void packRGBA32FToA32F(const float* __restrict source, float* __restrict destination) +template<> FORCE_INLINE void +pack(const float* __restrict src, float* __restrict dst) { - destination[0] = source[3]; -} - -// identical to above, to avoid special-casing -FORCE_INLINE void packRGBA32FToA32FPremultiply(const float* __restrict source, float* __restrict destination) -{ - destination[0] = source[3]; -} - -FORCE_INLINE void packRGBA32FToR32F(const float* __restrict source, float* __restrict destination) -{ - destination[0] = source[0]; -} - -FORCE_INLINE void packRGBA32FToR32FPremultiply(const float* __restrict source, float* __restrict destination) -{ - float scaleFactor = source[3]; - destination[0] = source[0] * scaleFactor; -} - - -FORCE_INLINE void packRGBA32FToRA32F(const float* __restrict source, float* __restrict destination) -{ - destination[0] = source[0]; - destination[1] = source[3]; -} - -FORCE_INLINE void packRGBA32FToRA32FPremultiply(const float* __restrict source, float* __restrict destination) -{ - float scaleFactor = source[3]; - destination[0] = source[0] * scaleFactor; - destination[1] = scaleFactor; + float scaleFactor = src[3]; + dst[0] = src[0] * scaleFactor; + dst[1] = scaleFactor; } /****** END CODE SHARED WITH WEBKIT ******/ +template FORCE_INLINE void +convertType(const SrcType* __restrict src, DstType* __restrict dst) +{ + NS_ABORT_IF_FALSE(false, "Unimplemented texture format conversion"); +} + +template<> FORCE_INLINE void +convertType(const uint8_t* __restrict src, uint8_t* __restrict dst) +{ + dst[0] = src[0]; + dst[1] = src[1]; + dst[2] = src[2]; + dst[3] = src[3]; +} + +template<> FORCE_INLINE void +convertType(const float* __restrict src, float* __restrict dst) +{ + dst[0] = src[0]; + dst[1] = src[1]; + dst[2] = src[2]; + dst[3] = src[3]; +} + +template<> FORCE_INLINE void +convertType(const uint8_t* __restrict src, float* __restrict dst) +{ + const float scaleFactor = 1.f / 255.0f; + dst[0] = src[0] * scaleFactor; + dst[1] = src[1] * scaleFactor; + dst[2] = src[2] * scaleFactor; + dst[3] = src[3] * scaleFactor; +} + +#undef FORCE_INLINE + } // end namespace WebGLTexelConversions } // end namespace mozilla