Bug 749711 - Lots of WebGL texture conversion fixes and improvements - r=jgilbert

* Templatize pack/unpack routines ** Inside anonymous namespace in a naive attempt to not hammer PGO linker memory usage. * Support conversions changing texel size * Support conversion from integer to float formats * Support RGBA32F properly * Avoid compiling useless paths (code size down to 17k from 44k)
2024-09-13 09:24:08 -07:00 · 2012-05-07 13:05:32 -04:00 · 2012-05-07 13:05:32 -04:00 · 826a893d64
commit 826a893d64
parent 0253660d2a
5 changed files with 991 additions and 527 deletions
--- a/content/canvas/src/Makefile.in
+++ b/content/canvas/src/Makefile.in
@ -82,6 +82,7 @@ CPPSRCS += \
 	WebGLExtensionStandardDerivatives.cpp \
 	WebGLExtensionTextureFilterAnisotropic.cpp \
 	WebGLExtensionLoseContext.cpp \
 	WebGLTexelConversions.cpp \
 	$(NULL)
 DEFINES += -DUSE_ANGLE
--- a/content/canvas/src/WebGLContext.h
+++ b/content/canvas/src/WebGLContext.h
@ -128,16 +128,51 @@ struct BackbufferClearingStatus {
    enum { NotClearedSinceLastPresented, ClearedToDefaultValues, HasBeenDrawnTo };
 };
-struct WebGLTexelFormat {
+namespace WebGLTexelConversions {
-    enum { Generic, Auto, RGBA8, RGB8, RGBX8, BGRA8, BGR8, BGRX8, RGBA5551, RGBA4444, RGB565, R8, RA8, A8,
+
-           RGBA32F, RGB32F, A32F, R32F, RA32F };
+/*
 * The formats that may participate, either as source or destination formats,
 * in WebGL texture conversions. This includes:
 *  - all the formats accepted by WebGL.texImage2D, e.g. RGBA4444
 *  - additional formats provided by extensions, e.g. RGB32F
 *  - additional source formats, depending on browser details, used when uploading
 *    textures from DOM elements. See gfxImageSurface::Format().
 */
 enum WebGLTexelFormat
 {
    // dummy error code returned by GetWebGLTexelFormat in error cases,
    // after assertion failure (so this never happens in debug builds)
    BadFormat,
    // dummy pseudo-format meaning "use the other format".
    // For example, if SrcFormat=Auto and DstFormat=RGB8, then the source
    // is implicitly treated as being RGB8 itself.
    Auto,
    // 1-channel formats
    R8,
    A8,
    R32F, // used for OES_texture_float extension
    A32F, // used for OES_texture_float extension
    // 2-channel formats
    RA8,
    RA32F,
    // 3-channel formats
    RGB8,
    BGRX8, // used for DOM elements. Source format only.
    RGB565,
    RGB32F, // used for OES_texture_float extension
    // 4-channel formats
    RGBA8,
    BGRA8, // used for DOM elements
    RGBA5551,
    RGBA4444,
    RGBA32F // used for OES_texture_float extension
 };
-struct WebGLTexelPremultiplicationOp {
+} // end namespace WebGLTexelConversions
    enum { Generic, None, Premultiply, Unmultiply };
 };
-int GetWebGLTexelFormat(GLenum format, GLenum type);
+using WebGLTexelConversions::WebGLTexelFormat;
 WebGLTexelFormat GetWebGLTexelFormat(GLenum format, GLenum type);
 // Zero is not an integer power of two.
 inline bool is_pot_assuming_nonnegative(WebGLsizei x)
@ -1205,26 +1240,26 @@ protected:
                         WebGLenum format, WebGLenum type,
                         void *data, PRUint32 byteLength,
                         int jsArrayType,
-                         int srcFormat, bool srcPremultiplied);
+                         WebGLTexelFormat srcFormat, bool srcPremultiplied);
    void TexSubImage2D_base(WebGLenum target, WebGLint level,
                            WebGLint xoffset, WebGLint yoffset,
                            WebGLsizei width, WebGLsizei height, WebGLsizei srcStrideOrZero,
                            WebGLenum format, WebGLenum type,
                            void *pixels, PRUint32 byteLength,
                            int jsArrayType,
-                            int srcFormat, bool srcPremultiplied);
+                            WebGLTexelFormat srcFormat, bool srcPremultiplied);
    void TexParameter_base(WebGLenum target, WebGLenum pname,
                           WebGLint *intParamPtr, WebGLfloat *floatParamPtr);
    void ConvertImage(size_t width, size_t height, size_t srcStride, size_t dstStride,
                      const PRUint8*src, PRUint8 *dst,
-                      int srcFormat, bool srcPremultiplied,
+                      WebGLTexelFormat srcFormat, bool srcPremultiplied,
-                      int dstFormat, bool dstPremultiplied,
+                      WebGLTexelFormat dstFormat, bool dstPremultiplied,
                      size_t dstTexelSize);
    nsresult DOMElementToImageSurface(dom::Element* imageOrCanvas,
                                      gfxImageSurface **imageOut,
-                                      int *format);
+                                      WebGLTexelFormat *format);
    void CopyTexSubImage2D_base(WebGLenum target,
                                WebGLint level,
--- a/content/canvas/src/WebGLContextGL.cpp
+++ b/content/canvas/src/WebGLContextGL.cpp
@ -4296,198 +4296,9 @@ WebGLContext::StencilOpSeparate(WebGLenum face, WebGLenum sfail, WebGLenum dpfai
    gl->fStencilOpSeparate(face, sfail, dpfail, dppass);
 }
 struct WebGLImageConverter
 {
    bool flip;
    size_t width, height, srcStride, dstStride, srcTexelSize, dstTexelSize;
    const PRUint8 *src;
    PRUint8 *dst;
    WebGLImageConverter()
    {
        memset(this, 0, sizeof(WebGLImageConverter));
    }
    template<typename SrcType, typename DstType, typename UnpackType,
         void unpackingFunc(const SrcType*, UnpackType*),
         void packingFunc(const UnpackType*, DstType*)>
    void run()
    {
        // Note -- even though the functions take UnpackType, the
        // pointers below are all in terms of PRUint8; otherwise
        // pointer math starts getting tricky.
        for (size_t src_row = 0; src_row < height; ++src_row) {
            size_t dst_row = flip ? (height - 1 - src_row) : src_row;
            PRUint8 *dst_row_ptr = dst + dst_row * dstStride;
            const PRUint8 *src_row_ptr = src + src_row * srcStride;
            const PRUint8 *src_row_end = src_row_ptr + width * srcTexelSize; // != src_row_ptr + byteStride
            while (src_row_ptr != src_row_end) {
                UnpackType tmp[4];
                unpackingFunc(reinterpret_cast<const SrcType*>(src_row_ptr), tmp);
                packingFunc(tmp, reinterpret_cast<DstType*>(dst_row_ptr));
                src_row_ptr += srcTexelSize;
                dst_row_ptr += dstTexelSize;
            }
        }
    }
 };
 void
 WebGLContext::ConvertImage(size_t width, size_t height, size_t srcStride, size_t dstStride,
                           const PRUint8*src, PRUint8 *dst,
                           int srcFormat, bool srcPremultiplied,
                           int dstFormat, bool dstPremultiplied,
                           size_t dstTexelSize)
 {
    if (width <= 0 || height <= 0)
        return;
    if (srcFormat == dstFormat &&
        srcPremultiplied == dstPremultiplied)
    {
        // fast exit path: we just have to memcpy all the rows.
        //
        // The case where absolutely nothing needs to be done is supposed to have
        // been handled earlier (in TexImage2D_base, etc).
        //
        // So the case we're handling here is when even though no format conversion is needed,
        // we still might have to flip vertically and/or to adjust to a different stride.
        NS_ASSERTION(mPixelStoreFlipY || srcStride != dstStride, "Performance trap -- should handle this case earlier, to avoid memcpy");
        size_t row_size = width * dstTexelSize; // doesn't matter, src and dst formats agree
        const PRUint8* src_row = src;
        const PRUint8* src_end = src + height * srcStride;
        PRUint8* dst_row = mPixelStoreFlipY ? dst + (height-1) * dstStride : dst;
        ptrdiff_t dstStrideSigned(dstStride);
        ptrdiff_t dst_delta = mPixelStoreFlipY ? -dstStrideSigned : dstStrideSigned;
        while(src_row != src_end) {
            memcpy(dst_row, src_row, row_size);
            src_row += srcStride;
            dst_row += dst_delta;
        }
        return;
    }
    WebGLImageConverter converter;
    converter.flip = mPixelStoreFlipY;
    converter.width = width;
    converter.height = height;
    converter.srcStride = srcStride;
    converter.dstStride = dstStride;
    converter.dstTexelSize = dstTexelSize;
    converter.src = src;
    converter.dst = dst;
    int premultiplicationOp = (!srcPremultiplied && dstPremultiplied) ? WebGLTexelPremultiplicationOp::Premultiply
                            : (srcPremultiplied && !dstPremultiplied) ? WebGLTexelPremultiplicationOp::Unmultiply
                            : WebGLTexelPremultiplicationOp::None;
 #define HANDLE_DSTFORMAT(format, SrcType, DstType, unpackFunc, packFunc) \
        case WebGLTexelFormat::format: \
            switch (premultiplicationOp) { \
                case WebGLTexelPremultiplicationOp::Premultiply: \
                    converter.run<SrcType, DstType, PRUint8,          \
                                  WebGLTexelConversions::unpackFunc, \
                                  WebGLTexelConversions::packFunc##Premultiply>(); \
                break; \
                case WebGLTexelPremultiplicationOp::Unmultiply: \
                    converter.run<SrcType, DstType, PRUint8, \
                                  WebGLTexelConversions::unpackFunc, \
                                  WebGLTexelConversions::packFunc##Unmultiply>(); \
                break; \
                default: \
                    converter.run<SrcType, DstType, PRUint8, \
                                  WebGLTexelConversions::unpackFunc, \
                                  WebGLTexelConversions::packFunc>(); \
                break; \
            } \
            break;
 #define HANDLE_SRCFORMAT(format, size, SrcType, unpackFunc) \
        case WebGLTexelFormat::format: \
            converter.srcTexelSize = size; \
            switch (dstFormat) { \
                HANDLE_DSTFORMAT(RGBA8,    SrcType, PRUint8,  unpackFunc, packRGBA8ToRGBA8) \
                HANDLE_DSTFORMAT(RGB8,     SrcType, PRUint8,  unpackFunc, packRGBA8ToRGB8) \
                HANDLE_DSTFORMAT(R8,       SrcType, PRUint8,  unpackFunc, packRGBA8ToR8) \
                HANDLE_DSTFORMAT(RA8,      SrcType, PRUint8,  unpackFunc, packRGBA8ToRA8) \
                HANDLE_DSTFORMAT(RGBA5551, SrcType, PRUint16, unpackFunc, packRGBA8ToUnsignedShort5551) \
                HANDLE_DSTFORMAT(RGBA4444, SrcType, PRUint16, unpackFunc, packRGBA8ToUnsignedShort4444) \
                HANDLE_DSTFORMAT(RGB565,   SrcType, PRUint16, unpackFunc, packRGBA8ToUnsignedShort565) \
                /* A8 needs to be special-cased as it doesn't have color channels to premultiply */ \
                case WebGLTexelFormat::A8: \
                    converter.run<SrcType, PRUint8, PRUint8,          \
                                  WebGLTexelConversions::unpackFunc, \
                                  WebGLTexelConversions::packRGBA8ToA8>(); \
                    break; \
                default: \
                    NS_ASSERTION(false, "Coding error?! Should never reach this point."); \
                    return; \
            } \
            break;
 #define HANDLE_FLOAT_DSTFORMAT(format, unpackFunc, packFunc) \
        case WebGLTexelFormat::format: \
            switch (premultiplicationOp) { \
                case WebGLTexelPremultiplicationOp::Premultiply: \
                    converter.run<float, float, float,                \
                                  WebGLTexelConversions::unpackFunc, \
                                  WebGLTexelConversions::packFunc##Premultiply>(); \
                break; \
                case WebGLTexelPremultiplicationOp::Unmultiply: \
                    NS_ASSERTION(false, "Floating point can't be un-premultiplied -- we have no premultiplied source data!"); \
                break; \
                default: \
                    converter.run<float, float, float,                \
                                  WebGLTexelConversions::unpackFunc, \
                                  WebGLTexelConversions::packFunc>(); \
                break; \
            } \
            break;
 #define HANDLE_FLOAT_SRCFORMAT(format, size, unpackFunc)                \
        case WebGLTexelFormat::format:                                  \
            converter.srcTexelSize = size;                              \
            switch (dstFormat) {                                        \
                HANDLE_FLOAT_DSTFORMAT(RGB32F, unpackFunc, packRGBA32FToRGB32F) \
                HANDLE_FLOAT_DSTFORMAT(A32F,   unpackFunc, packRGBA32FToA32F) \
                HANDLE_FLOAT_DSTFORMAT(R32F,   unpackFunc, packRGBA32FToR32F) \
                HANDLE_FLOAT_DSTFORMAT(RA32F,  unpackFunc, packRGBA32FToRA32F) \
                default: \
                    NS_ASSERTION(false, "Coding error?! Should never reach this point."); \
                    return; \
            } \
            break;
    switch (srcFormat) {
        HANDLE_SRCFORMAT(RGBA8,    4, PRUint8,  unpackRGBA8ToRGBA8)
        HANDLE_SRCFORMAT(RGBX8,    4, PRUint8,  unpackRGB8ToRGBA8)
        HANDLE_SRCFORMAT(RGB8,     3, PRUint8,  unpackRGB8ToRGBA8)
        HANDLE_SRCFORMAT(BGRA8,    4, PRUint8,  unpackBGRA8ToRGBA8)
        HANDLE_SRCFORMAT(BGRX8,    4, PRUint8,  unpackBGR8ToRGBA8)
        HANDLE_SRCFORMAT(BGR8,     3, PRUint8,  unpackBGR8ToRGBA8)
        HANDLE_SRCFORMAT(R8,       1, PRUint8,  unpackR8ToRGBA8)
        HANDLE_SRCFORMAT(A8,       1, PRUint8,  unpackA8ToRGBA8)
        HANDLE_SRCFORMAT(RA8,      2, PRUint8,  unpackRA8ToRGBA8)
        HANDLE_SRCFORMAT(RGBA5551, 2, PRUint16, unpackRGBA5551ToRGBA8)
        HANDLE_SRCFORMAT(RGBA4444, 2, PRUint16, unpackRGBA4444ToRGBA8)
        HANDLE_SRCFORMAT(RGB565,   2, PRUint16, unpackRGB565ToRGBA8)
        HANDLE_FLOAT_SRCFORMAT(RGB32F,  12, unpackRGB32FToRGBA32F)
        HANDLE_FLOAT_SRCFORMAT(RA32F,    8, unpackRA32FToRGBA32F)
        HANDLE_FLOAT_SRCFORMAT(R32F,     4, unpackR32FToRGBA32F)
        HANDLE_FLOAT_SRCFORMAT(A32F,     4, unpackA32FToRGBA32F)
        default:
            NS_ASSERTION(false, "Coding error?! Should never reach this point.");
            return;
    }
 }
 nsresult
 WebGLContext::DOMElementToImageSurface(Element* imageOrCanvas,
-                                       gfxImageSurface **imageOut, int *format)
+                                       gfxImageSurface **imageOut, WebGLTexelFormat *format)
 {
    if (!imageOrCanvas) {
        return NS_ERROR_FAILURE;
@ -4556,16 +4367,16 @@ WebGLContext::DOMElementToImageSurface(Element* imageOrCanvas,
    switch (surf->Format()) {
        case gfxASurface::ImageFormatARGB32:
-            *format = WebGLTexelFormat::BGRA8; // careful, our ARGB means BGRA
+            *format = WebGLTexelConversions::BGRA8; // careful, our ARGB means BGRA
            break;
        case gfxASurface::ImageFormatRGB24:
-            *format = WebGLTexelFormat::BGRX8; // careful, our RGB24 is not tightly packed. Whence BGRX8.
+            *format = WebGLTexelConversions::BGRX8; // careful, our RGB24 is not tightly packed. Whence BGRX8.
            break;
        case gfxASurface::ImageFormatA8:
-            *format = WebGLTexelFormat::A8;
+            *format = WebGLTexelConversions::A8;
            break;
        case gfxASurface::ImageFormatRGB16_565:
-            *format = WebGLTexelFormat::RGB565;
+            *format = WebGLTexelConversions::RGB565;
            break;
        default:
            NS_ASSERTION(false, "Unsupported image format. Unimplemented.");
@ -5644,7 +5455,7 @@ WebGLContext::TexImage2D_base(WebGLenum target, WebGLint level, WebGLenum intern
                              WebGLenum format, WebGLenum type,
                              void *data, PRUint32 byteLength,
                              int jsArrayType, // a TypedArray format enum, or -1 if not relevant
-                              int srcFormat, bool srcPremultiplied)
+                              WebGLTexelFormat srcFormat, bool srcPremultiplied)
 {
    switch (target) {
        case LOCAL_GL_TEXTURE_2D:
@ -5699,14 +5510,19 @@ WebGLContext::TexImage2D_base(WebGLenum target, WebGLint level, WebGLenum intern
    if (border != 0)
        return ErrorInvalidValue("TexImage2D: border must be 0");
-    PRUint32 texelSize = 0;
+    PRUint32 dstTexelSize = 0;
-    if (!ValidateTexFormatAndType(format, type, jsArrayType, &texelSize, "texImage2D"))
+    if (!ValidateTexFormatAndType(format, type, jsArrayType, &dstTexelSize, "texImage2D"))
        return;
-    CheckedUint32 checked_neededByteLength = 
+    WebGLTexelFormat dstFormat = GetWebGLTexelFormat(format, type);
-        GetImageSize(height, width, texelSize, mPixelStoreUnpackAlignment); 
+    WebGLTexelFormat actualSrcFormat = srcFormat == WebGLTexelConversions::Auto ? dstFormat : srcFormat;
-    CheckedUint32 checked_plainRowSize = CheckedUint32(width) * texelSize;
+    PRUint32 srcTexelSize = WebGLTexelConversions::TexelBytesForFormat(actualSrcFormat);
    CheckedUint32 checked_neededByteLength = 
        GetImageSize(height, width, srcTexelSize, mPixelStoreUnpackAlignment);
    CheckedUint32 checked_plainRowSize = CheckedUint32(width) * srcTexelSize;
    CheckedUint32 checked_alignedRowSize =
        RoundedToNextMultipleOf(checked_plainRowSize.value(), mPixelStoreUnpackAlignment);
@ -5734,11 +5550,9 @@ WebGLContext::TexImage2D_base(WebGLenum target, WebGLint level, WebGLenum intern
    GLenum error = LOCAL_GL_NO_ERROR;
    if (byteLength) {
        int dstFormat = GetWebGLTexelFormat(format, type);
        int actualSrcFormat = srcFormat == WebGLTexelFormat::Auto ? dstFormat : srcFormat;
        size_t srcStride = srcStrideOrZero ? srcStrideOrZero : checked_alignedRowSize.value();
-        size_t dstPlainRowSize = texelSize * width;
+        size_t dstPlainRowSize = dstTexelSize * width;
        size_t unpackAlignment = mPixelStoreUnpackAlignment;
        size_t dstStride = ((dstPlainRowSize + unpackAlignment-1) / unpackAlignment) * unpackAlignment;
@ -5753,11 +5567,12 @@ WebGLContext::TexImage2D_base(WebGLenum target, WebGLint level, WebGLenum intern
        }
        else
        {
-            nsAutoArrayPtr<PRUint8> convertedData(new PRUint8[bytesNeeded]);
+            size_t convertedDataSize = height * dstStride;
            nsAutoArrayPtr<PRUint8> convertedData(new PRUint8[convertedDataSize]);
            ConvertImage(width, height, srcStride, dstStride,
                        (PRUint8*)data, convertedData,
                        actualSrcFormat, srcPremultiplied,
-                        dstFormat, mPixelStorePremultiplyAlpha, texelSize);
+                        dstFormat, mPixelStorePremultiplyAlpha, dstTexelSize);
            error = CheckedTexImage2D(target, level, internalformat,
                                      width, height, border, format, type, convertedData);
        }
@ -5814,7 +5629,7 @@ WebGLContext::TexImage2D(JSContext* cx, WebGLenum target, WebGLint level,
                           pixels ? pixels->mData : 0,
                           pixels ? pixels->mLength : 0,
                           pixels ? (int)JS_GetTypedArrayType(pixels->mObj, cx) : -1,
-                           WebGLTexelFormat::Auto, false);
+                           WebGLTexelConversions::Auto, false);
 }
 NS_IMETHODIMP
@ -5832,7 +5647,7 @@ WebGLContext::TexImage2D_imageData(WebGLenum target, WebGLint level, WebGLenum i
                    pixels ? JS_GetArrayBufferViewData(pixels, cx) : 0,
                    pixels ? JS_GetArrayBufferViewByteLength(pixels, cx) : 0,
                    -1,
-                    WebGLTexelFormat::RGBA8, false);
+                    WebGLTexelConversions::RGBA8, false);
    return NS_OK;
 }
@ -5853,7 +5668,7 @@ WebGLContext::TexImage2D(JSContext* cx, WebGLenum target, WebGLint level,
    return TexImage2D_base(target, level, internalformat, pixels->GetWidth(),
                           pixels->GetHeight(), 4*pixels->GetWidth(), 0,
                           format, type, arr.mData, arr.mLength, -1,
-                           WebGLTexelFormat::RGBA8, false);
+                           WebGLTexelConversions::RGBA8, false);
 }
@ -5877,7 +5692,7 @@ WebGLContext::TexImage2D(JSContext* /* unused */, WebGLenum target,
    nsRefPtr<gfxImageSurface> isurf;
-    int srcFormat;
+    WebGLTexelFormat srcFormat;
    rv = DOMElementToImageSurface(elt, getter_AddRefs(isurf), &srcFormat);
    if (rv.Failed())
        return;
@ -5908,7 +5723,7 @@ WebGLContext::TexSubImage2D_base(WebGLenum target, WebGLint level,
                                 WebGLenum format, WebGLenum type,
                                 void *pixels, PRUint32 byteLength,
                                 int jsArrayType,
-                                 int srcFormat, bool srcPremultiplied)
+                                 WebGLTexelFormat srcFormat, bool srcPremultiplied)
 {
    switch (target) {
        case LOCAL_GL_TEXTURE_2D:
@ -5943,17 +5758,22 @@ WebGLContext::TexSubImage2D_base(WebGLenum target, WebGLint level,
            return ErrorInvalidValue("texSubImage2D: with level > 0, width and height must be powers of two");
    }
-    PRUint32 texelSize = 0;
+    PRUint32 dstTexelSize = 0;
-    if (!ValidateTexFormatAndType(format, type, jsArrayType, &texelSize, "texSubImage2D"))
+    if (!ValidateTexFormatAndType(format, type, jsArrayType, &dstTexelSize, "texSubImage2D"))
        return;
    WebGLTexelFormat dstFormat = GetWebGLTexelFormat(format, type);
    WebGLTexelFormat actualSrcFormat = srcFormat == WebGLTexelConversions::Auto ? dstFormat : srcFormat;
    PRUint32 srcTexelSize = WebGLTexelConversions::TexelBytesForFormat(actualSrcFormat);
    if (width == 0 || height == 0)
        return; // ES 2.0 says it has no effect, we better return right now
    CheckedUint32 checked_neededByteLength = 
-        GetImageSize(height, width, texelSize, mPixelStoreUnpackAlignment);
+        GetImageSize(height, width, srcTexelSize, mPixelStoreUnpackAlignment);
-    CheckedUint32 checked_plainRowSize = CheckedUint32(width) * texelSize;
+    CheckedUint32 checked_plainRowSize = CheckedUint32(width) * srcTexelSize;
    CheckedUint32 checked_alignedRowSize = 
        RoundedToNextMultipleOf(checked_plainRowSize.value(), mPixelStoreUnpackAlignment);
@ -5986,11 +5806,9 @@ WebGLContext::TexSubImage2D_base(WebGLenum target, WebGLint level,
    MakeContextCurrent();
    int dstFormat = GetWebGLTexelFormat(format, type);
    int actualSrcFormat = srcFormat == WebGLTexelFormat::Auto ? dstFormat : srcFormat;
    size_t srcStride = srcStrideOrZero ? srcStrideOrZero : checked_alignedRowSize.value();
-    size_t dstPlainRowSize = texelSize * width;
+    size_t dstPlainRowSize = dstTexelSize * width;
    // There are checks above to ensure that this won't overflow.
    size_t dstStride = RoundedToNextMultipleOf(dstPlainRowSize, mPixelStoreUnpackAlignment).value();
@ -6004,11 +5822,12 @@ WebGLContext::TexSubImage2D_base(WebGLenum target, WebGLint level,
    }
    else
    {
-        nsAutoArrayPtr<PRUint8> convertedData(new PRUint8[bytesNeeded]);
+        size_t convertedDataSize = height * dstStride;
        nsAutoArrayPtr<PRUint8> convertedData(new PRUint8[convertedDataSize]);
        ConvertImage(width, height, srcStride, dstStride,
                    (const PRUint8*)pixels, convertedData,
                    actualSrcFormat, srcPremultiplied,
-                    dstFormat, mPixelStorePremultiplyAlpha, texelSize);
+                    dstFormat, mPixelStorePremultiplyAlpha, dstTexelSize);
        gl->fTexSubImage2D(target, level, xoffset, yoffset, width, height, format, type, convertedData);
    }
@ -6051,7 +5870,7 @@ WebGLContext::TexSubImage2D(JSContext* cx, WebGLenum target, WebGLint level,
                              width, height, 0, format, type,
                              pixels->mData, pixels->mLength,
                              JS_GetTypedArrayType(pixels->mObj, cx),
-                              WebGLTexelFormat::Auto, false);
+                              WebGLTexelConversions::Auto, false);
 }
 NS_IMETHODIMP
@ -6075,7 +5894,7 @@ WebGLContext::TexSubImage2D_imageData(WebGLenum target, WebGLint level,
                       width, height, 4*width, format, type,
                       JS_GetArrayBufferViewData(pixels, cx), JS_GetArrayBufferViewByteLength(pixels, cx),
                       -1,
-                       WebGLTexelFormat::RGBA8, false);
+                       WebGLTexelConversions::RGBA8, false);
    return NS_OK;
 }
@ -6097,7 +5916,7 @@ WebGLContext::TexSubImage2D(JSContext* cx, WebGLenum target, WebGLint level,
                              4*pixels->GetWidth(), format, type,
                              arr.mData, arr.mLength,
                              -1,
-                              WebGLTexelFormat::RGBA8, false);
+                              WebGLTexelConversions::RGBA8, false);
 }
 NS_IMETHODIMP
@ -6122,7 +5941,7 @@ WebGLContext::TexSubImage2D(JSContext* /* unused */, WebGLenum target,
    nsRefPtr<gfxImageSurface> isurf;
-    int srcFormat;
+    WebGLTexelFormat srcFormat;
    rv = DOMElementToImageSurface(elt, getter_AddRefs(isurf), &srcFormat);
    if (rv.Failed())
        return;
@ -6232,52 +6051,52 @@ BaseTypeAndSizeFromUniformType(WebGLenum uType, WebGLenum *baseType, WebGLint *u
 }
-int mozilla::GetWebGLTexelFormat(GLenum format, GLenum type)
+WebGLTexelFormat mozilla::GetWebGLTexelFormat(GLenum format, GLenum type)
 {
    if (type == LOCAL_GL_UNSIGNED_BYTE) {
        switch (format) {
            case LOCAL_GL_RGBA:
-                return WebGLTexelFormat::RGBA8;
+                return WebGLTexelConversions::RGBA8;
            case LOCAL_GL_RGB:
-                return WebGLTexelFormat::RGB8;
+                return WebGLTexelConversions::RGB8;
            case LOCAL_GL_ALPHA:
-                return WebGLTexelFormat::A8;
+                return WebGLTexelConversions::A8;
            case LOCAL_GL_LUMINANCE:
-                return WebGLTexelFormat::R8;
+                return WebGLTexelConversions::R8;
            case LOCAL_GL_LUMINANCE_ALPHA:
-                return WebGLTexelFormat::RA8;
+                return WebGLTexelConversions::RA8;
            default:
-                NS_ASSERTION(false, "Coding mistake?! Should never reach this point.");
+                NS_ABORT_IF_FALSE(false, "Coding mistake?! Should never reach this point.");
-                return WebGLTexelFormat::Generic;
+                return WebGLTexelConversions::BadFormat;
        }
    } else if (type == LOCAL_GL_FLOAT) {
        // OES_texture_float
        switch (format) {
            case LOCAL_GL_RGBA:
-                return WebGLTexelFormat::RGBA32F;
+                return WebGLTexelConversions::RGBA32F;
            case LOCAL_GL_RGB:
-                return WebGLTexelFormat::RGB32F;
+                return WebGLTexelConversions::RGB32F;
            case LOCAL_GL_ALPHA:
-                return WebGLTexelFormat::A32F;
+                return WebGLTexelConversions::A32F;
            case LOCAL_GL_LUMINANCE:
-                return WebGLTexelFormat::R32F;
+                return WebGLTexelConversions::R32F;
            case LOCAL_GL_LUMINANCE_ALPHA:
-                return WebGLTexelFormat::RA32F;
+                return WebGLTexelConversions::RA32F;
            default:
-                NS_ASSERTION(false, "Coding mistake?! Should never reach this point.");
+                NS_ABORT_IF_FALSE(false, "Coding mistake?! Should never reach this point.");
-                return WebGLTexelFormat::Generic;
+                return WebGLTexelConversions::BadFormat;
        }
    } else {
        switch (type) {
            case LOCAL_GL_UNSIGNED_SHORT_4_4_4_4:
-                return WebGLTexelFormat::RGBA4444;
+                return WebGLTexelConversions::RGBA4444;
            case LOCAL_GL_UNSIGNED_SHORT_5_5_5_1:
-                return WebGLTexelFormat::RGBA5551;
+                return WebGLTexelConversions::RGBA5551;
            case LOCAL_GL_UNSIGNED_SHORT_5_6_5:
-                return WebGLTexelFormat::RGB565;
+                return WebGLTexelConversions::RGB565;
            default:
-                NS_ASSERTION(false, "Coding mistake?! Should never reach this point.");
+                NS_ABORT_IF_FALSE(false, "Coding mistake?! Should never reach this point.");
-                return WebGLTexelFormat::Generic;
+                return WebGLTexelConversions::BadFormat;
        }
    }
 }
--- a/content/canvas/src/WebGLTexelConversions.cpp
+++ b/content/canvas/src/WebGLTexelConversions.cpp
@ -0,0 +1,382 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this file,
 * You can obtain one at http://mozilla.org/MPL/2.0/. */
 #include "WebGLTexelConversions.h"
 namespace mozilla {
 using namespace WebGLTexelConversions;
 namespace {
 /** @class WebGLImageConverter
 *
 * This class is just a helper to implement WebGLContext::ConvertImage below.
 *
 * Design comments:
 * 
 * WebGLContext::ConvertImage has to handle hundreds of format conversion paths.
 * It is important to minimize executable code size here. Instead of passing around
 * a large number of function parameters hundreds of times, we create a
 * WebGLImageConverter object once, storing these parameters, and then we call
 * the run() method on it.
 */
 class WebGLImageConverter
 {
    const size_t mWidth, mHeight;
    const void* const mSrcStart;
    void* const mDstStart;
    const ptrdiff_t mSrcStride, mDstStride;
    bool mAlreadyRun;
    bool mSuccess;
    /*
     * Returns sizeof(texel)/sizeof(type). The point is that we will iterate over
     * texels with typed pointers and this value will tell us by how much we need
     * to increment these pointers to advance to the next texel.
     */
    template<int Format>
    static size_t NumElementsPerTexelForFormat() {
        switch (Format) {
            case R8:
            case A8:
            case R32F:
            case A32F:
            case RGBA5551:
            case RGBA4444:
            case RGB565:
                return 1;
            case RA8:
            case RA32F:
                return 2;
            case RGB8:
            case RGB32F:
                return 3;
            case RGBA8:
            case BGRA8:
            case BGRX8:
            case RGBA32F:
                return 4;
            default:
                NS_ABORT_IF_FALSE(false, "Unknown texel format. Coding mistake?");
                return 0;
        }
    }
    /*
     * This is the completely format-specific templatized conversion function,
     * that will be instantiated hundreds of times for all different combinations.
     * It is important to avoid generating useless code here. In particular, many
     * instantiations of this function template will never be called, so we try
     * to return immediately in these cases to allow the compiler to avoid generating
     * useless code.
     */
    template<WebGLTexelFormat SrcFormat,
             WebGLTexelFormat DstFormat,
             WebGLTexelPremultiplicationOp PremultiplicationOp>
    void run()
    {
        // check for never-called cases. We early-return to allow the compiler
        // to avoid generating this code. It would be tempting to abort() instead,
        // as returning early does leave the destination surface with uninitialized
        // data, but that would not allow the compiler to avoid generating this code.
        // So instead, we return early, so Success() will return false, and the caller
        // must check that and abort in that case. See WebGLContext::ConvertImage.
        if (SrcFormat == DstFormat &&
            PremultiplicationOp == NoPremultiplicationOp)
        {
            // Should have used a fast exit path earlier, rather than entering this function.
            // we explicitly return here to allow the compiler to avoid generating this code
            return;
        }
        // Only textures uploaded from DOM elements or ImageData can allow DstFormat != SrcFormat.
        // DOM elements can only give BGRA8, BGRX8, A8, RGB565 formats. See DOMElementToImageSurface.
        // ImageData is always RGBA8. So all other SrcFormat will always satisfy DstFormat==SrcFormat,
        // so we can avoid compiling the code for all the unreachable paths.
        const bool CanSrcFormatComeFromDOMElementOrImageData
            = SrcFormat == BGRA8 ||
              SrcFormat == BGRX8 ||
              SrcFormat == A8 ||
              SrcFormat == RGB565 ||
              SrcFormat == RGBA8;
        if (!CanSrcFormatComeFromDOMElementOrImageData &&
            SrcFormat != DstFormat)
        {
            return;
        }
        // Likewise, only textures uploaded from DOM elements or ImageData can possibly have to be unpremultiplied.
        if (!CanSrcFormatComeFromDOMElementOrImageData &&
            PremultiplicationOp == Unpremultiply)
        {
            return;
        }
        // there is no point in premultiplication/unpremultiplication
        // in the following cases:
        //  - the source format has no alpha
        //  - the source format has no color
        //  - the destination format has no color
        if (!HasAlpha(SrcFormat) ||
            !HasColor(SrcFormat) ||
            !HasColor(DstFormat))
        {
            if (PremultiplicationOp != NoPremultiplicationOp)
            {
                return;
            }
        }
        // end of early return cases.
        NS_ABORT_IF_FALSE(!mAlreadyRun, "converter should be run only once!");
        mAlreadyRun = true;
        // gather some compile-time meta-data about the formats at hand.
        typedef
            typename DataTypeForFormat<SrcFormat>::Type
            SrcType;
        typedef
            typename DataTypeForFormat<DstFormat>::Type
            DstType;
        const int IntermediateSrcFormat
            = IntermediateFormat<SrcFormat>::Value;
        const int IntermediateDstFormat
            = IntermediateFormat<DstFormat>::Value;
        typedef
            typename DataTypeForFormat<IntermediateSrcFormat>::Type
            IntermediateSrcType;
        typedef
            typename DataTypeForFormat<IntermediateDstFormat>::Type
            IntermediateDstType;
        const size_t NumElementsPerSrcTexel = NumElementsPerTexelForFormat<SrcFormat>();
        const size_t NumElementsPerDstTexel = NumElementsPerTexelForFormat<DstFormat>();
        const size_t MaxElementsPerTexel = 4;
        NS_ABORT_IF_FALSE(NumElementsPerSrcTexel <= MaxElementsPerTexel, "unhandled format");
        NS_ABORT_IF_FALSE(NumElementsPerDstTexel <= MaxElementsPerTexel, "unhandled format");
        // we assume that the strides are multiples of the sizeof of respective types.
        // this assumption will allow us to iterate over src and dst images using typed
        // pointers, e.g. uint8_t* or uint16_t* or float*, instead of untyped pointers.
        // So this assumption allows us to write cleaner and safer code, but it might
        // not be true forever and if it eventually becomes wrong, we'll have to revert
        // to always iterating using uint8_t* pointers regardless of the types at hand.
        NS_ABORT_IF_FALSE(mSrcStride % sizeof(SrcType) == 0 &&
                          mDstStride % sizeof(DstType) == 0,
                          "Unsupported: texture stride is not a multiple of sizeof(type)");
        const ptrdiff_t srcStrideInElements = mSrcStride / sizeof(SrcType);
        const ptrdiff_t dstStrideInElements = mDstStride / sizeof(DstType);
        const SrcType *srcRowStart = static_cast<const SrcType*>(mSrcStart);
        DstType *dstRowStart = static_cast<DstType*>(mDstStart);
        // the loop performing the texture format conversion
        for (size_t i = 0; i < mHeight; ++i) {
            const SrcType *srcRowEnd = srcRowStart + mWidth * NumElementsPerSrcTexel;
            const SrcType *srcPtr = srcRowStart;
            DstType *dstPtr = dstRowStart;
            while (srcPtr != srcRowEnd) {
                // convert a single texel. We proceed in 3 steps: unpack the source texel
                // so the corresponding interchange format (e.g. unpack RGB565 to RGBA8),
                // convert the resulting data type to the destination type (e.g. convert
                // from RGBA8 to RGBA32F), and finally pack the destination texel
                // (e.g. pack RGBA32F to RGB32F).
                IntermediateSrcType unpackedSrc[MaxElementsPerTexel];
                IntermediateDstType unpackedDst[MaxElementsPerTexel];
                // unpack a src texel to corresponding intermediate src format.
                // for example, unpack RGB565 to RGBA8
                unpack<SrcFormat>(srcPtr, unpackedSrc);
                // convert the data type to the destination type, if needed.
                // for example, convert RGBA8 to RGBA32F
                convertType(unpackedSrc, unpackedDst);
                // pack the destination texel.
                // for example, pack RGBA32F to RGB32F
                pack<DstFormat, PremultiplicationOp>(unpackedDst, dstPtr);
                srcPtr += NumElementsPerSrcTexel;
                dstPtr += NumElementsPerDstTexel;
            }
            srcRowStart += srcStrideInElements;
            dstRowStart += dstStrideInElements;
        }
        mSuccess = true;
        return;
    }
    template<WebGLTexelFormat SrcFormat, WebGLTexelFormat DstFormat>
    void run(WebGLTexelPremultiplicationOp premultiplicationOp)
    {
        #define WEBGLIMAGECONVERTER_CASE_PREMULTIPLICATIONOP(PremultiplicationOp) \
            case PremultiplicationOp: \
                return run<SrcFormat, DstFormat, PremultiplicationOp>();
        switch (premultiplicationOp) {
            WEBGLIMAGECONVERTER_CASE_PREMULTIPLICATIONOP(NoPremultiplicationOp)
            WEBGLIMAGECONVERTER_CASE_PREMULTIPLICATIONOP(Premultiply)
            WEBGLIMAGECONVERTER_CASE_PREMULTIPLICATIONOP(Unpremultiply)
            default:
                NS_ABORT_IF_FALSE(false, "unhandled case. Coding mistake?");
        }
        #undef WEBGLIMAGECONVERTER_CASE_PREMULTIPLICATIONOP
    }
    template<WebGLTexelFormat SrcFormat>
    void run(WebGLTexelFormat dstFormat,
             WebGLTexelPremultiplicationOp premultiplicationOp)
    {
        #define WEBGLIMAGECONVERTER_CASE_DSTFORMAT(DstFormat) \
            case DstFormat: \
                return run<SrcFormat, DstFormat>(premultiplicationOp);
        switch (dstFormat) {
            WEBGLIMAGECONVERTER_CASE_DSTFORMAT(R8)
            WEBGLIMAGECONVERTER_CASE_DSTFORMAT(A8)
            WEBGLIMAGECONVERTER_CASE_DSTFORMAT(R32F)
            WEBGLIMAGECONVERTER_CASE_DSTFORMAT(A32F)
            WEBGLIMAGECONVERTER_CASE_DSTFORMAT(RA8)
            WEBGLIMAGECONVERTER_CASE_DSTFORMAT(RA32F)
            WEBGLIMAGECONVERTER_CASE_DSTFORMAT(RGB8)
            WEBGLIMAGECONVERTER_CASE_DSTFORMAT(RGB565)
            WEBGLIMAGECONVERTER_CASE_DSTFORMAT(RGB32F)
            WEBGLIMAGECONVERTER_CASE_DSTFORMAT(RGBA8)
            WEBGLIMAGECONVERTER_CASE_DSTFORMAT(RGBA5551)
            WEBGLIMAGECONVERTER_CASE_DSTFORMAT(RGBA4444)
            WEBGLIMAGECONVERTER_CASE_DSTFORMAT(RGBA32F)
            default:
                NS_ABORT_IF_FALSE(false, "unhandled case. Coding mistake?");
        }
        #undef WEBGLIMAGECONVERTER_CASE_DSTFORMAT
    }
 public:
    void run(WebGLTexelFormat srcFormat,
             WebGLTexelFormat dstFormat,
             WebGLTexelPremultiplicationOp premultiplicationOp)
    {
        #define WEBGLIMAGECONVERTER_CASE_SRCFORMAT(SrcFormat) \
            case SrcFormat: \
                return run<SrcFormat>(dstFormat, premultiplicationOp);
        switch (srcFormat) {
            WEBGLIMAGECONVERTER_CASE_SRCFORMAT(R8)
            WEBGLIMAGECONVERTER_CASE_SRCFORMAT(A8)
            WEBGLIMAGECONVERTER_CASE_SRCFORMAT(R32F)
            WEBGLIMAGECONVERTER_CASE_SRCFORMAT(A32F)
            WEBGLIMAGECONVERTER_CASE_SRCFORMAT(RA8)
            WEBGLIMAGECONVERTER_CASE_SRCFORMAT(RA32F)
            WEBGLIMAGECONVERTER_CASE_SRCFORMAT(RGB8)
            WEBGLIMAGECONVERTER_CASE_SRCFORMAT(BGRX8) // source format only
            WEBGLIMAGECONVERTER_CASE_SRCFORMAT(RGB565)
            WEBGLIMAGECONVERTER_CASE_SRCFORMAT(RGB32F)
            WEBGLIMAGECONVERTER_CASE_SRCFORMAT(RGBA8)
            WEBGLIMAGECONVERTER_CASE_SRCFORMAT(BGRA8)
            WEBGLIMAGECONVERTER_CASE_SRCFORMAT(RGBA5551)
            WEBGLIMAGECONVERTER_CASE_SRCFORMAT(RGBA4444)
            WEBGLIMAGECONVERTER_CASE_SRCFORMAT(RGBA32F)
            default:
                NS_ABORT_IF_FALSE(false, "unhandled case. Coding mistake?");
        }
        #undef WEBGLIMAGECONVERTER_CASE_SRCFORMAT
    }
    WebGLImageConverter(size_t width, size_t height,
                        const void* srcStart, void* dstStart,
                        ptrdiff_t srcStride, ptrdiff_t dstStride)
        : mWidth(width), mHeight(height),
          mSrcStart(srcStart), mDstStart(dstStart),
          mSrcStride(srcStride), mDstStride(dstStride),
          mAlreadyRun(false), mSuccess(false)
    {}
    bool Success() const {
        return mSuccess;
    }
 };
 } // end anonymous namespace
 void
 WebGLContext::ConvertImage(size_t width, size_t height, size_t srcStride, size_t dstStride,
                           const uint8_t* src, uint8_t *dst,
                           WebGLTexelFormat srcFormat, bool srcPremultiplied,
                           WebGLTexelFormat dstFormat, bool dstPremultiplied,
                           size_t dstTexelSize)
 {
    if (width <= 0 || height <= 0)
        return;
    const bool FormatsRequireNoPremultiplicationOp =
        !HasAlpha(srcFormat) ||
        !HasColor(srcFormat) ||
        !HasColor(dstFormat);
    if (srcFormat == dstFormat &&
        (FormatsRequireNoPremultiplicationOp || srcPremultiplied == dstPremultiplied))
    {
        // fast exit path: we just have to memcpy all the rows.
        //
        // The case where absolutely nothing needs to be done is supposed to have
        // been handled earlier (in TexImage2D_base, etc).
        //
        // So the case we're handling here is when even though no format conversion is needed,
        // we still might have to flip vertically and/or to adjust to a different stride.
        NS_ABORT_IF_FALSE(mPixelStoreFlipY || srcStride != dstStride, "Performance trap -- should handle this case earlier, to avoid memcpy");
        size_t row_size = width * dstTexelSize; // doesn't matter, src and dst formats agree
        const uint8_t* ptr = src;
        const uint8_t* src_end = src + height * srcStride;
        uint8_t* dst_row = mPixelStoreFlipY
                           ? dst + (height-1) * dstStride
                           : dst;
        ptrdiff_t dstStrideSigned(dstStride);
        ptrdiff_t dst_delta = mPixelStoreFlipY ? -dstStrideSigned : dstStrideSigned;
        while(ptr != src_end) {
            memcpy(dst_row, ptr, row_size);
            ptr += srcStride;
            dst_row += dst_delta;
        }
        return;
    }
    uint8_t* dstStart = dst;
    ptrdiff_t signedDstStride = dstStride;
    if (mPixelStoreFlipY) {
        dstStart = dst + (height - 1) * dstStride;
        signedDstStride = -dstStride;
    }
    WebGLImageConverter converter(width, height, src, dstStart, srcStride, signedDstStride);
    const WebGLTexelPremultiplicationOp premultiplicationOp
        = FormatsRequireNoPremultiplicationOp     ? NoPremultiplicationOp
        : (!srcPremultiplied && dstPremultiplied) ? Premultiply
        : (srcPremultiplied && !dstPremultiplied) ? Unpremultiply
                                                  : NoPremultiplicationOp;
    converter.run(srcFormat, dstFormat, premultiplicationOp);
    if (!converter.Success()) {
        // the dst image may be left uninitialized, so we better not try to
        // continue even in release builds. This should never happen anyway,
        // and would be a bug in our code.
        NS_RUNTIMEABORT("programming mistake in WebGL texture conversions");
    }
 }
 } // end namespace mozilla 
--- a/content/canvas/src/WebGLTexelConversions.h
+++ b/content/canvas/src/WebGLTexelConversions.h
@ -25,17 +25,6 @@
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 // the pixel conversions code here is originally from this file:
 //   http://trac.webkit.org/browser/trunk/WebCore/platform/graphics/GraphicsContext3D.cpp
 // Keep as much as possible unchanged to ease sharing code with the WebKit guys.
 // Changes:
 //  * added BGR8 path, we need it in Mozilla to load textures from DOMElements
 //  * enclosing in a namespace WebGLTexelConversions to make it clear it is, in profilers and in symbol table dumps
 //  * added __restrict keywords. Although non-standard, this is very well supported across all compilers
 //    that I know of (GCC/LLVM/MSC/ICC/XLC...)
 //  * optimized scaleFactor computation in Unmultiply functions (1 div instead of 2)
 #ifndef WEBGLTEXELCONVERSIONS_H_
 #define WEBGLTEXELCONVERSIONS_H_
@ -44,6 +33,7 @@
 #endif
 #include "WebGLContext.h"
 #include "mozilla/StandardInteger.h"
 #if defined _MSC_VER
 #define FORCE_INLINE __forceinline
@ -57,410 +47,647 @@ namespace mozilla {
 namespace WebGLTexelConversions {
 enum WebGLTexelPremultiplicationOp
 {
    NoPremultiplicationOp,
    Premultiply,
    Unpremultiply
 };
 template<int Format>
 struct IsFloatFormat
 {
    static const bool Value =
        Format == RGBA32F ||
        Format == RGB32F ||
        Format == RA32F ||
        Format == R32F ||
        Format == A32F;
 };
 template<int Format>
 struct Is16bppFormat
 {
    static const bool Value =
        Format == RGBA4444 ||
        Format == RGBA5551 ||
        Format == RGB565;
 };
 template<int Format,
         bool IsFloat = IsFloatFormat<Format>::Value,
         bool Is16bpp = Is16bppFormat<Format>::Value>
 struct DataTypeForFormat
 {
    typedef uint8_t Type;
 };
 template<int Format>
 struct DataTypeForFormat<Format, true, false>
 {
    typedef float Type;
 };
 template<int Format>
 struct DataTypeForFormat<Format, false, true>
 {
    typedef uint16_t Type;
 };
 template<int Format>
 struct IntermediateFormat
 {
    static const int Value = IsFloatFormat<Format>::Value ? RGBA32F : RGBA8;
 };
 inline size_t TexelBytesForFormat(int format) {
    switch (format) {
        case WebGLTexelConversions::R8:
        case WebGLTexelConversions::A8:
            return 1;
        case WebGLTexelConversions::RA8:
        case WebGLTexelConversions::RGBA5551:
        case WebGLTexelConversions::RGBA4444:
        case WebGLTexelConversions::RGB565:
            return 2;
        case WebGLTexelConversions::RGB8:
            return 3;
        case WebGLTexelConversions::RGBA8:
        case WebGLTexelConversions::BGRA8:
        case WebGLTexelConversions::BGRX8:
        case WebGLTexelConversions::R32F:
        case WebGLTexelConversions::A32F:
            return 4;
        case WebGLTexelConversions::RA32F:
            return 8;
        case WebGLTexelConversions::RGB32F:
            return 12;
        case WebGLTexelConversions::RGBA32F:
            return 16;
        default:
            NS_ABORT_IF_FALSE(false, "Unknown texel format. Coding mistake?");
            return 0;
    }
 }
 FORCE_INLINE bool HasAlpha(int format) {
    return format == A8 ||
           format == A32F ||
           format == RA8 ||
           format == RA32F ||
           format == RGBA8 ||
           format == BGRA8 ||
           format == RGBA32F ||
           format == RGBA4444 ||
           format == RGBA5551;
 }
 FORCE_INLINE bool HasColor(int format) {
    return format == R8 ||
           format == R32F ||
           format == RA8 ||
           format == RA32F ||
           format == RGB8 ||
           format == BGRX8 ||
           format == RGB565 ||
           format == RGB32F ||
           format == RGBA8 ||
           format == BGRA8 ||
           format == RGBA32F ||
           format == RGBA4444 ||
           format == RGBA5551;
 }
 /****** BEGIN CODE SHARED WITH WEBKIT ******/
 // the pack/unpack functions here are originally from this file:
 //   http://trac.webkit.org/browser/trunk/WebCore/platform/graphics/GraphicsContext3D.cpp
 //----------------------------------------------------------------------
 // Pixel unpacking routines.
-FORCE_INLINE void unpackRGBA8ToRGBA8(const uint8_t* __restrict source, uint8_t* __restrict destination)
+template<int Format>
 FORCE_INLINE void
 unpack(const typename DataTypeForFormat<Format>::Type* __restrict src,
       typename DataTypeForFormat<IntermediateFormat<Format>::Value>::Type* __restrict dst)
 {
-    destination[0] = source[0];
+    NS_ABORT_IF_FALSE(false, "Unimplemented texture format conversion");
    destination[1] = source[1];
    destination[2] = source[2];
    destination[3] = source[3];
 }
-FORCE_INLINE void unpackRGB8ToRGBA8(const uint8_t* __restrict source, uint8_t* __restrict destination)
+template<> FORCE_INLINE void
 unpack<RGBA8>(const uint8_t* __restrict src, uint8_t* __restrict dst)
 {
-    destination[0] = source[0];
+    dst[0] = src[0];
-    destination[1] = source[1];
+    dst[1] = src[1];
-    destination[2] = source[2];
+    dst[2] = src[2];
-    destination[3] = 0xFF;
+    dst[3] = src[3];
 }
-FORCE_INLINE void unpackBGRA8ToRGBA8(const uint8_t* __restrict source, uint8_t* __restrict destination)
+template<> FORCE_INLINE void
 unpack<RGB8>(const uint8_t* __restrict src, uint8_t* __restrict dst)
 {
-    destination[0] = source[2];
+    dst[0] = src[0];
-    destination[1] = source[1];
+    dst[1] = src[1];
-    destination[2] = source[0];
+    dst[2] = src[2];
-    destination[3] = source[3];
+    dst[3] = 0xFF;
 }
-FORCE_INLINE void unpackBGR8ToRGBA8(const uint8_t* __restrict source, uint8_t* __restrict destination)
+template<> FORCE_INLINE void
 unpack<BGRA8>(const uint8_t* __restrict src, uint8_t* __restrict dst)
 {
-    destination[0] = source[2];
+    dst[0] = src[2];
-    destination[1] = source[1];
+    dst[1] = src[1];
-    destination[2] = source[0];
+    dst[2] = src[0];
-    destination[3] = 0xFF;
+    dst[3] = src[3];
 }
-FORCE_INLINE void unpackRGBA5551ToRGBA8(const uint16_t* __restrict source, uint8_t* __restrict destination)
+template<> FORCE_INLINE void
 unpack<BGRX8>(const uint8_t* __restrict src, uint8_t* __restrict dst)
 {
-    uint16_t packedValue = source[0];
+    dst[0] = src[2];
-    uint8_t r = packedValue >> 11;
+    dst[1] = src[1];
    dst[2] = src[0];
    dst[3] = 0xFF;
 }
 template<> FORCE_INLINE void
 unpack<RGBA5551>(const uint16_t* __restrict src, uint8_t* __restrict dst)
 {
    uint16_t packedValue = src[0];
    uint8_t r = (packedValue >> 11) & 0x1F;
    uint8_t g = (packedValue >> 6) & 0x1F;
    uint8_t b = (packedValue >> 1) & 0x1F;
-    destination[0] = (r << 3) | (r & 0x7);
+    dst[0] = (r << 3) | (r & 0x7);
-    destination[1] = (g << 3) | (g & 0x7);
+    dst[1] = (g << 3) | (g & 0x7);
-    destination[2] = (b << 3) | (b & 0x7);
+    dst[2] = (b << 3) | (b & 0x7);
-    destination[3] = (packedValue & 0x1) ? 0xFF : 0x0;
+    dst[3] = (packedValue & 0x1) ? 0xFF : 0;
 }
-FORCE_INLINE void unpackRGBA4444ToRGBA8(const uint16_t* __restrict source, uint8_t* __restrict destination)
+template<> FORCE_INLINE void
 unpack<RGBA4444>(const uint16_t* __restrict src, uint8_t* __restrict dst)
 {
-    uint16_t packedValue = source[0];
+    uint16_t packedValue = src[0];
-    uint8_t r = packedValue >> 12;
+    uint8_t r = (packedValue >> 12) & 0x0F;
    uint8_t g = (packedValue >> 8) & 0x0F;
    uint8_t b = (packedValue >> 4) & 0x0F;
    uint8_t a = packedValue & 0x0F;
-    destination[0] = r << 4 | r;
+    dst[0] = (r << 4) | r;
-    destination[1] = g << 4 | g;
+    dst[1] = (g << 4) | g;
-    destination[2] = b << 4 | b;
+    dst[2] = (b << 4) | b;
-    destination[3] = a << 4 | a;
+    dst[3] = (a << 4) | a;
 }
-FORCE_INLINE void unpackRGB565ToRGBA8(const uint16_t* __restrict source, uint8_t* __restrict destination)
+template<> FORCE_INLINE void
 unpack<RGB565>(const uint16_t* __restrict src, uint8_t* __restrict dst)
 {
-    uint16_t packedValue = source[0];
+    uint16_t packedValue = src[0];
-    uint8_t r = packedValue >> 11;
+    uint8_t r = (packedValue >> 11) & 0x1F;
    uint8_t g = (packedValue >> 5) & 0x3F;
    uint8_t b = packedValue & 0x1F;
-    destination[0] = (r << 3) | (r & 0x7);
+    dst[0] = (r << 3) | (r & 0x7);
-    destination[1] = (g << 2) | (g & 0x3);
+    dst[1] = (g << 2) | (g & 0x3);
-    destination[2] = (b << 3) | (b & 0x7);
+    dst[2] = (b << 3) | (b & 0x7);
-    destination[3] = 0xFF;
+    dst[3] = 0xFF;
 }
-FORCE_INLINE void unpackR8ToRGBA8(const uint8_t* __restrict source, uint8_t* __restrict destination)
+template<> FORCE_INLINE void
 unpack<R8>(const uint8_t* __restrict src, uint8_t* __restrict dst)
 {
-    destination[0] = source[0];
+    dst[0] = src[0];
-    destination[1] = source[0];
+    dst[1] = src[0];
-    destination[2] = source[0];
+    dst[2] = src[0];
-    destination[3] = 0xFF;
+    dst[3] = 0xFF;
 }
-FORCE_INLINE void unpackRA8ToRGBA8(const uint8_t* __restrict source, uint8_t* __restrict destination)
+template<> FORCE_INLINE void
 unpack<RA8>(const uint8_t* __restrict src, uint8_t* __restrict dst)
 {
-    destination[0] = source[0];
+    dst[0] = src[0];
-    destination[1] = source[0];
+    dst[1] = src[0];
-    destination[2] = source[0];
+    dst[2] = src[0];
-    destination[3] = source[1];
+    dst[3] = src[1];
 }
-FORCE_INLINE void unpackA8ToRGBA8(const uint8_t* __restrict source, uint8_t* __restrict destination)
+template<> FORCE_INLINE void
 unpack<A8>(const uint8_t* __restrict src, uint8_t* __restrict dst)
 {
-    destination[0] = 0x0;
+    dst[0] = 0;
-    destination[1] = 0x0;
+    dst[1] = 0;
-    destination[2] = 0x0;
+    dst[2] = 0;
-    destination[3] = source[0];
+    dst[3] = src[0];
 }
-FORCE_INLINE void unpackRGB32FToRGBA32F(const float* __restrict source, float* __restrict destination)
+template<> FORCE_INLINE void
 unpack<RGBA32F>(const float* __restrict src, float* __restrict dst)
 {
-    destination[0] = source[0];
+    dst[0] = src[0];
-    destination[1] = source[1];
+    dst[1] = src[1];
-    destination[2] = source[2];
+    dst[2] = src[2];
-    destination[3] = 1;
+    dst[3] = src[3];
 }
-FORCE_INLINE void unpackR32FToRGBA32F(const float* __restrict source, float* __restrict destination)
+template<> FORCE_INLINE void
 unpack<RGB32F>(const float* __restrict src, float* __restrict dst)
 {
-    destination[0] = source[0];
+    dst[0] = src[0];
-    destination[1] = source[0];
+    dst[1] = src[1];
-    destination[2] = source[0];
+    dst[2] = src[2];
-    destination[3] = 1;
+    dst[3] = 1.0f;
 }
-FORCE_INLINE void unpackRA32FToRGBA32F(const float* __restrict source, float* __restrict destination)
+template<> FORCE_INLINE void
 unpack<R32F>(const float* __restrict src, float* __restrict dst)
 {
-    destination[0] = source[0];
+    dst[0] = src[0];
-    destination[1] = source[0];
+    dst[1] = src[0];
-    destination[2] = source[0];
+    dst[2] = src[0];
-    destination[3] = source[1];
+    dst[3] = 1.0f;
 }
-FORCE_INLINE void unpackA32FToRGBA32F(const float* __restrict source, float* __restrict destination)
+template<> FORCE_INLINE void
 unpack<RA32F>(const float* __restrict src, float* __restrict dst)
 {
-    destination[0] = 0;
+    dst[0] = src[0];
-    destination[1] = 0;
+    dst[1] = src[0];
-    destination[2] = 0;
+    dst[2] = src[0];
-    destination[3] = source[0];
+    dst[3] = src[1];
 }
 template<> FORCE_INLINE void
 unpack<A32F>(const float* __restrict src, float* __restrict dst)
 {
    dst[0] = 0;
    dst[1] = 0;
    dst[2] = 0;
    dst[3] = src[0];
 }
 //----------------------------------------------------------------------
 // Pixel packing routines.
 //
-FORCE_INLINE void packRGBA8ToA8(const uint8_t* __restrict source, uint8_t* __restrict destination)
+template<int Format, int PremultiplicationOp>
 FORCE_INLINE void
 pack(const typename DataTypeForFormat<IntermediateFormat<Format>::Value>::Type* __restrict src,
     typename DataTypeForFormat<Format>::Type* __restrict dst)
 {
-    destination[0] = source[3];
+    NS_ABORT_IF_FALSE(false, "Unimplemented texture format conversion");
 }
-FORCE_INLINE void packRGBA8ToR8(const uint8_t* __restrict source, uint8_t* __restrict destination)
+template<> FORCE_INLINE void
 pack<A8, NoPremultiplicationOp>(const uint8_t* __restrict src, uint8_t* __restrict dst)
 {
-    destination[0] = source[0];
+    dst[0] = src[3];
 }
-FORCE_INLINE void packRGBA8ToR8Premultiply(const uint8_t* __restrict source, uint8_t* __restrict destination)
+template<> FORCE_INLINE void
 pack<A8, Premultiply>(const uint8_t* __restrict src, uint8_t* __restrict dst)
 {
-    float scaleFactor = source[3] / 255.0f;
+    dst[0] = src[3];
-    uint8_t sourceR = static_cast<uint8_t>(static_cast<float>(source[0]) * scaleFactor);
+}
-    destination[0] = sourceR;
+
 template<> FORCE_INLINE void
 pack<A8, Unpremultiply>(const uint8_t* __restrict src, uint8_t* __restrict dst)
 {
    dst[0] = src[3];
 }
 template<> FORCE_INLINE void
 pack<R8, NoPremultiplicationOp>(const uint8_t* __restrict src, uint8_t* __restrict dst)
 {
    dst[0] = src[0];
 }
 template<> FORCE_INLINE void
 pack<R8, Premultiply>(const uint8_t* __restrict src, uint8_t* __restrict dst)
 {
    float scaleFactor = src[3] / 255.0f;
    uint8_t srcR = static_cast<uint8_t>(src[0] * scaleFactor);
    dst[0] = srcR;
 }
 template<> FORCE_INLINE void
 pack<R8, Unpremultiply>(const uint8_t* __restrict src, uint8_t* __restrict dst)
 {
    float scaleFactor = src[3] ? 255.0f / src[3] : 1.0f;
    uint8_t srcR = static_cast<uint8_t>(src[0] * scaleFactor);
    dst[0] = srcR;
 }
 template<> FORCE_INLINE void
 pack<RA8, NoPremultiplicationOp>(const uint8_t* __restrict src, uint8_t* __restrict dst)
 {
    dst[0] = src[0];
    dst[1] = src[3];
 }
 template<> FORCE_INLINE void
 pack<RA8, Premultiply>(const uint8_t* __restrict src, uint8_t* __restrict dst)
 {
    float scaleFactor = src[3] / 255.0f;
    uint8_t srcR = static_cast<uint8_t>(src[0] * scaleFactor);
    dst[0] = srcR;
    dst[1] = src[3];
 }
 // FIXME: this routine is lossy and must be removed.
-FORCE_INLINE void packRGBA8ToR8Unmultiply(const uint8_t* __restrict source, uint8_t* __restrict destination)
+template<> FORCE_INLINE void
 pack<RA8, Unpremultiply>(const uint8_t* __restrict src, uint8_t* __restrict dst)
 {
-    float scaleFactor = source[3] ? 255.0f / source[3] : 1.0f;
+    float scaleFactor = src[3] ? 255.0f / src[3] : 1.0f;
-    uint8_t sourceR = static_cast<uint8_t>(static_cast<float>(source[0]) * scaleFactor);
+    uint8_t srcR = static_cast<uint8_t>(src[0] * scaleFactor);
-    destination[0] = sourceR;
+    dst[0] = srcR;
    dst[1] = src[3];
 }
-FORCE_INLINE void packRGBA8ToRA8(const uint8_t* __restrict source, uint8_t* __restrict destination)
+template<> FORCE_INLINE void
 pack<RGB8, NoPremultiplicationOp>(const uint8_t* __restrict src, uint8_t* __restrict dst)
 {
-    destination[0] = source[0];
+    dst[0] = src[0];
-    destination[1] = source[3];
+    dst[1] = src[1];
    dst[2] = src[2];
 }
-FORCE_INLINE void packRGBA8ToRA8Premultiply(const uint8_t* __restrict source, uint8_t* __restrict destination)
+template<> FORCE_INLINE void
 pack<RGB8, Premultiply>(const uint8_t* __restrict src, uint8_t* __restrict dst)
 {
-    float scaleFactor = source[3] / 255.0f;
+    float scaleFactor = src[3] / 255.0f;
-    uint8_t sourceR = static_cast<uint8_t>(static_cast<float>(source[0]) * scaleFactor);
+    uint8_t srcR = static_cast<uint8_t>(src[0] * scaleFactor);
-    destination[0] = sourceR;
+    uint8_t srcG = static_cast<uint8_t>(src[1] * scaleFactor);
-    destination[1] = source[3];
+    uint8_t srcB = static_cast<uint8_t>(src[2] * scaleFactor);
    dst[0] = srcR;
    dst[1] = srcG;
    dst[2] = srcB;
 }
 template<> FORCE_INLINE void
 pack<RGB8, Unpremultiply>(const uint8_t* __restrict src, uint8_t* __restrict dst)
 {
    float scaleFactor = src[3] ? 255.0f / src[3] : 1.0f;
    uint8_t srcR = static_cast<uint8_t>(src[0] * scaleFactor);
    uint8_t srcG = static_cast<uint8_t>(src[1] * scaleFactor);
    uint8_t srcB = static_cast<uint8_t>(src[2] * scaleFactor);
    dst[0] = srcR;
    dst[1] = srcG;
    dst[2] = srcB;
 }
 template<> FORCE_INLINE void
 pack<RGBA8, NoPremultiplicationOp>(const uint8_t* __restrict src, uint8_t* __restrict dst)
 {
    dst[0] = src[0];
    dst[1] = src[1];
    dst[2] = src[2];
    dst[3] = src[3];
 }
 template<> FORCE_INLINE void
 pack<RGBA8, Premultiply>(const uint8_t* __restrict src, uint8_t* __restrict dst)
 {
    float scaleFactor = src[3] / 255.0f;
    uint8_t srcR = static_cast<uint8_t>(src[0] * scaleFactor);
    uint8_t srcG = static_cast<uint8_t>(src[1] * scaleFactor);
    uint8_t srcB = static_cast<uint8_t>(src[2] * scaleFactor);
    dst[0] = srcR;
    dst[1] = srcG;
    dst[2] = srcB;
    dst[3] = src[3];
 }
 // FIXME: this routine is lossy and must be removed.
-FORCE_INLINE void packRGBA8ToRA8Unmultiply(const uint8_t* __restrict source, uint8_t* __restrict destination)
+template<> FORCE_INLINE void
 pack<RGBA8, Unpremultiply>(const uint8_t* __restrict src, uint8_t* __restrict dst)
 {
-    float scaleFactor = source[3] ? 255.0f / source[3] : 1.0f;
+    float scaleFactor = src[3] ? 255.0f / src[3] : 1.0f;
-    uint8_t sourceR = static_cast<uint8_t>(static_cast<float>(source[0]) * scaleFactor);
+    uint8_t srcR = static_cast<uint8_t>(src[0] * scaleFactor);
-    destination[0] = sourceR;
+    uint8_t srcG = static_cast<uint8_t>(src[1] * scaleFactor);
-    destination[1] = source[3];
+    uint8_t srcB = static_cast<uint8_t>(src[2] * scaleFactor);
    dst[0] = srcR;
    dst[1] = srcG;
    dst[2] = srcB;
    dst[3] = src[3];
 }
-FORCE_INLINE void packRGBA8ToRGB8(const uint8_t* __restrict source, uint8_t* __restrict destination)
+template<> FORCE_INLINE void
 pack<RGBA4444, NoPremultiplicationOp>(const uint8_t* __restrict src, uint16_t* __restrict dst)
 {
-    destination[0] = source[0];
+    *dst = ( ((src[0] & 0xF0) << 8)
-    destination[1] = source[1];
+           | ((src[1] & 0xF0) << 4)
-    destination[2] = source[2];
+           | (src[2] & 0xF0)
           | (src[3] >> 4) );
 }
-FORCE_INLINE void packRGBA8ToRGB8Premultiply(const uint8_t* __restrict source, uint8_t* __restrict destination)
+template<> FORCE_INLINE void
 pack<RGBA4444, Premultiply>(const uint8_t* __restrict src, uint16_t* __restrict dst)
 {
-    float scaleFactor = source[3] / 255.0f;
+    float scaleFactor = src[3] / 255.0f;
-    uint8_t sourceR = static_cast<uint8_t>(static_cast<float>(source[0]) * scaleFactor);
+    uint8_t srcR = static_cast<uint8_t>(src[0] * scaleFactor);
-    uint8_t sourceG = static_cast<uint8_t>(static_cast<float>(source[1]) * scaleFactor);
+    uint8_t srcG = static_cast<uint8_t>(src[1] * scaleFactor);
-    uint8_t sourceB = static_cast<uint8_t>(static_cast<float>(source[2]) * scaleFactor);
+    uint8_t srcB = static_cast<uint8_t>(src[2] * scaleFactor);
-    destination[0] = sourceR;
+    *dst = ( ((srcR & 0xF0) << 8)
-    destination[1] = sourceG;
+           | ((srcG & 0xF0) << 4)
-    destination[2] = sourceB;
+           | (srcB & 0xF0)
           | (src[3] >> 4));
 }
 // FIXME: this routine is lossy and must be removed.
-FORCE_INLINE void packRGBA8ToRGB8Unmultiply(const uint8_t* __restrict source, uint8_t* __restrict destination)
+template<> FORCE_INLINE void
 pack<RGBA4444, Unpremultiply>(const uint8_t* __restrict src, uint16_t* __restrict dst)
 {
-    float scaleFactor = source[3] ? 255.0f / source[3] : 1.0f;
+    float scaleFactor = src[3] ? 255.0f / src[3] : 1.0f;
-    uint8_t sourceR = static_cast<uint8_t>(static_cast<float>(source[0]) * scaleFactor);
+    uint8_t srcR = static_cast<uint8_t>(src[0] * scaleFactor);
-    uint8_t sourceG = static_cast<uint8_t>(static_cast<float>(source[1]) * scaleFactor);
+    uint8_t srcG = static_cast<uint8_t>(src[1] * scaleFactor);
-    uint8_t sourceB = static_cast<uint8_t>(static_cast<float>(source[2]) * scaleFactor);
+    uint8_t srcB = static_cast<uint8_t>(src[2] * scaleFactor);
-    destination[0] = sourceR;
+    *dst = ( ((srcR & 0xF0) << 8)
-    destination[1] = sourceG;
+           | ((srcG & 0xF0) << 4)
-    destination[2] = sourceB;
+           | (srcB & 0xF0)
           | (src[3] >> 4));
 }
-// This is only used when the source format is different than kSourceFormatRGBA8.
+template<> FORCE_INLINE void
-FORCE_INLINE void packRGBA8ToRGBA8(const uint8_t* __restrict source, uint8_t* __restrict destination)
+pack<RGBA5551, NoPremultiplicationOp>(const uint8_t* __restrict src, uint16_t* __restrict dst)
 {
-    destination[0] = source[0];
+    *dst = ( ((src[0] & 0xF8) << 8)
-    destination[1] = source[1];
+           | ((src[1] & 0xF8) << 3)
-    destination[2] = source[2];
+           | ((src[2] & 0xF8) >> 2)
-    destination[3] = source[3];
+           | (src[3] >> 7));
 }
-FORCE_INLINE void packRGBA8ToRGBA8Premultiply(const uint8_t* __restrict source, uint8_t* __restrict destination)
+template<> FORCE_INLINE void
 pack<RGBA5551, Premultiply>(const uint8_t* __restrict src, uint16_t* __restrict dst)
 {
-    float scaleFactor = source[3] / 255.0f;
+    float scaleFactor = src[3] / 255.0f;
-    uint8_t sourceR = static_cast<uint8_t>(static_cast<float>(source[0]) * scaleFactor);
+    uint8_t srcR = static_cast<uint8_t>(src[0] * scaleFactor);
-    uint8_t sourceG = static_cast<uint8_t>(static_cast<float>(source[1]) * scaleFactor);
+    uint8_t srcG = static_cast<uint8_t>(src[1] * scaleFactor);
-    uint8_t sourceB = static_cast<uint8_t>(static_cast<float>(source[2]) * scaleFactor);
+    uint8_t srcB = static_cast<uint8_t>(src[2] * scaleFactor);
-    destination[0] = sourceR;
+    *dst = ( ((srcR & 0xF8) << 8)
-    destination[1] = sourceG;
+           | ((srcG & 0xF8) << 3)
-    destination[2] = sourceB;
+           | ((srcB & 0xF8) >> 2)
-    destination[3] = source[3];
+           | (src[3] >> 7));
 }
 // FIXME: this routine is lossy and must be removed.
-FORCE_INLINE void packRGBA8ToRGBA8Unmultiply(const uint8_t* __restrict source, uint8_t* __restrict destination)
+template<> FORCE_INLINE void
 pack<RGBA5551, Unpremultiply>(const uint8_t* __restrict src, uint16_t* __restrict dst)
 {
-    float scaleFactor = source[3] ? 255.0f / source[3] : 1.0f;
+    float scaleFactor = src[3] ? 255.0f / src[3] : 1.0f;
-    uint8_t sourceR = static_cast<uint8_t>(static_cast<float>(source[0]) * scaleFactor);
+    uint8_t srcR = static_cast<uint8_t>(src[0] * scaleFactor);
-    uint8_t sourceG = static_cast<uint8_t>(static_cast<float>(source[1]) * scaleFactor);
+    uint8_t srcG = static_cast<uint8_t>(src[1] * scaleFactor);
-    uint8_t sourceB = static_cast<uint8_t>(static_cast<float>(source[2]) * scaleFactor);
+    uint8_t srcB = static_cast<uint8_t>(src[2] * scaleFactor);
-    destination[0] = sourceR;
+    *dst = ( ((srcR & 0xF8) << 8)
-    destination[1] = sourceG;
+           | ((srcG & 0xF8) << 3)
-    destination[2] = sourceB;
+           | ((srcB & 0xF8) >> 2)
-    destination[3] = source[3];
+           | (src[3] >> 7));
 }
-FORCE_INLINE void packRGBA8ToUnsignedShort4444(const uint8_t* __restrict source, uint16_t* __restrict destination)
+template<> FORCE_INLINE void
 pack<RGB565, NoPremultiplicationOp>(const uint8_t* __restrict src, uint16_t* __restrict dst)
 {
-    *destination = (((source[0] & 0xF0) << 8)
+    *dst = ( ((src[0] & 0xF8) << 8)
-                    | ((source[1] & 0xF0) << 4)
+           | ((src[1] & 0xFC) << 3)
-                    | (source[2] & 0xF0)
+           | ((src[2] & 0xF8) >> 3));
                    | (source[3] >> 4));
 }
-FORCE_INLINE void packRGBA8ToUnsignedShort4444Premultiply(const uint8_t* __restrict source, uint16_t* __restrict destination)
+template<> FORCE_INLINE void
 pack<RGB565, Premultiply>(const uint8_t* __restrict src, uint16_t* __restrict dst)
 {
-    float scaleFactor = source[3] / 255.0f;
+    float scaleFactor = src[3] / 255.0f;
-    uint8_t sourceR = static_cast<uint8_t>(static_cast<float>(source[0]) * scaleFactor);
+    uint8_t srcR = static_cast<uint8_t>(src[0] * scaleFactor);
-    uint8_t sourceG = static_cast<uint8_t>(static_cast<float>(source[1]) * scaleFactor);
+    uint8_t srcG = static_cast<uint8_t>(src[1] * scaleFactor);
-    uint8_t sourceB = static_cast<uint8_t>(static_cast<float>(source[2]) * scaleFactor);
+    uint8_t srcB = static_cast<uint8_t>(src[2] * scaleFactor);
-    *destination = (((sourceR & 0xF0) << 8)
+    *dst = ( ((srcR & 0xF8) << 8)
-                    | ((sourceG & 0xF0) << 4)
+           | ((srcG & 0xFC) << 3)
-                    | (sourceB & 0xF0)
+           | ((srcB & 0xF8) >> 3));
                    | (source[3] >> 4));
 }
 // FIXME: this routine is lossy and must be removed.
-FORCE_INLINE void packRGBA8ToUnsignedShort4444Unmultiply(const uint8_t* __restrict source, uint16_t* __restrict destination)
+template<> FORCE_INLINE void
 pack<RGB565, Unpremultiply>(const uint8_t* __restrict src, uint16_t* __restrict dst)
 {
-    float scaleFactor = source[3] ? 255.0f / source[3] : 1.0f;
+    float scaleFactor = src[3] ? 255.0f / src[3] : 1.0f;
-    uint8_t sourceR = static_cast<uint8_t>(static_cast<float>(source[0]) * scaleFactor);
+    uint8_t srcR = static_cast<uint8_t>(src[0] * scaleFactor);
-    uint8_t sourceG = static_cast<uint8_t>(static_cast<float>(source[1]) * scaleFactor);
+    uint8_t srcG = static_cast<uint8_t>(src[1] * scaleFactor);
-    uint8_t sourceB = static_cast<uint8_t>(static_cast<float>(source[2]) * scaleFactor);
+    uint8_t srcB = static_cast<uint8_t>(src[2] * scaleFactor);
-    *destination = (((sourceR & 0xF0) << 8)
+    *dst = ( ((srcR & 0xF8) << 8)
-                    | ((sourceG & 0xF0) << 4)
+           | ((srcG & 0xFC) << 3)
-                    | (sourceB & 0xF0)
+           | ((srcB & 0xF8) >> 3));
                    | (source[3] >> 4));
 }
-FORCE_INLINE void packRGBA8ToUnsignedShort5551(const uint8_t* __restrict source, uint16_t* __restrict destination)
+template<> FORCE_INLINE void
 pack<RGB32F, NoPremultiplicationOp>(const float* __restrict src, float* __restrict dst)
 {
-    *destination = (((source[0] & 0xF8) << 8)
+    dst[0] = src[0];
-                    | ((source[1] & 0xF8) << 3)
+    dst[1] = src[1];
-                    | ((source[2] & 0xF8) >> 2)
+    dst[2] = src[2];
                    | (source[3] >> 7));
 }
-FORCE_INLINE void packRGBA8ToUnsignedShort5551Premultiply(const uint8_t* __restrict source, uint16_t* __restrict destination)
+template<> FORCE_INLINE void
 pack<RGB32F, Premultiply>(const float* __restrict src, float* __restrict dst)
 {
-    float scaleFactor = source[3] / 255.0f;
+    float scaleFactor = src[3];
-    uint8_t sourceR = static_cast<uint8_t>(static_cast<float>(source[0]) * scaleFactor);
+    dst[0] = src[0] * scaleFactor;
-    uint8_t sourceG = static_cast<uint8_t>(static_cast<float>(source[1]) * scaleFactor);
+    dst[1] = src[1] * scaleFactor;
-    uint8_t sourceB = static_cast<uint8_t>(static_cast<float>(source[2]) * scaleFactor);
+    dst[2] = src[2] * scaleFactor;
    *destination = (((sourceR & 0xF8) << 8)
                    | ((sourceG & 0xF8) << 3)
                    | ((sourceB & 0xF8) >> 2)
                    | (source[3] >> 7));
 }
-// FIXME: this routine is lossy and must be removed.
+template<> FORCE_INLINE void
-FORCE_INLINE void packRGBA8ToUnsignedShort5551Unmultiply(const uint8_t* __restrict source, uint16_t* __restrict destination)
+pack<RGBA32F, NoPremultiplicationOp>(const float* __restrict src, float* __restrict dst)
 {
-    float scaleFactor = source[3] ? 255.0f / source[3] : 1.0f;
+    dst[0] = src[0];
-    uint8_t sourceR = static_cast<uint8_t>(static_cast<float>(source[0]) * scaleFactor);
+    dst[1] = src[1];
-    uint8_t sourceG = static_cast<uint8_t>(static_cast<float>(source[1]) * scaleFactor);
+    dst[2] = src[2];
-    uint8_t sourceB = static_cast<uint8_t>(static_cast<float>(source[2]) * scaleFactor);
+    dst[3] = src[3];
    *destination = (((sourceR & 0xF8) << 8)
                    | ((sourceG & 0xF8) << 3)
                    | ((sourceB & 0xF8) >> 2)
                    | (source[3] >> 7));
 }
-FORCE_INLINE void packRGBA8ToUnsignedShort565(const uint8_t* __restrict source, uint16_t* __restrict destination)
+template<> FORCE_INLINE void
 pack<RGBA32F, Premultiply>(const float* __restrict src, float* __restrict dst)
 {
-    *destination = (((source[0] & 0xF8) << 8)
+    float scaleFactor = src[3];
-                    | ((source[1] & 0xFC) << 3)
+    dst[0] = src[0] * scaleFactor;
-                    | ((source[2] & 0xF8) >> 3));
+    dst[1] = src[1] * scaleFactor;
    dst[2] = src[2] * scaleFactor;
    dst[3] = src[3];
 }
-FORCE_INLINE void packRGBA8ToUnsignedShort565Premultiply(const uint8_t* __restrict source, uint16_t* __restrict destination)
+template<> FORCE_INLINE void
 pack<A32F, NoPremultiplicationOp>(const float* __restrict src, float* __restrict dst)
 {
-    float scaleFactor = source[3] / 255.0f;
+    dst[0] = src[3];
    uint8_t sourceR = static_cast<uint8_t>(static_cast<float>(source[0]) * scaleFactor);
    uint8_t sourceG = static_cast<uint8_t>(static_cast<float>(source[1]) * scaleFactor);
    uint8_t sourceB = static_cast<uint8_t>(static_cast<float>(source[2]) * scaleFactor);
    *destination = (((sourceR & 0xF8) << 8)
                    | ((sourceG & 0xFC) << 3)
                    | ((sourceB & 0xF8) >> 3));
 }
-// FIXME: this routine is lossy and must be removed.
+template<> FORCE_INLINE void
-FORCE_INLINE void packRGBA8ToUnsignedShort565Unmultiply(const uint8_t* __restrict source, uint16_t* __restrict destination)
+pack<A32F, Premultiply>(const float* __restrict src, float* __restrict dst)
 {
-    float scaleFactor = source[3] ? 255.0f / source[3] : 1.0f;
+    dst[0] = src[3];
    uint8_t sourceR = static_cast<uint8_t>(static_cast<float>(source[0]) * scaleFactor);
    uint8_t sourceG = static_cast<uint8_t>(static_cast<float>(source[1]) * scaleFactor);
    uint8_t sourceB = static_cast<uint8_t>(static_cast<float>(source[2]) * scaleFactor);
    *destination = (((sourceR & 0xF8) << 8)
                    | ((sourceG & 0xFC) << 3)
                    | ((sourceB & 0xF8) >> 3));
 }
-FORCE_INLINE void packRGBA32FToRGB32F(const float* __restrict source, float* __restrict destination)
+template<> FORCE_INLINE void
 pack<R32F, NoPremultiplicationOp>(const float* __restrict src, float* __restrict dst)
 {
-    destination[0] = source[0];
+    dst[0] = src[0];
    destination[1] = source[1];
    destination[2] = source[2];
 }
-FORCE_INLINE void packRGBA32FToRGB32FPremultiply(const float* __restrict source, float* __restrict destination)
+template<> FORCE_INLINE void
 pack<R32F, Premultiply>(const float* __restrict src, float* __restrict dst)
 {
-    float scaleFactor = source[3];
+    float scaleFactor = src[3];
-    destination[0] = source[0] * scaleFactor;
+    dst[0] = src[0] * scaleFactor;
    destination[1] = source[1] * scaleFactor;
    destination[2] = source[2] * scaleFactor;
 }
-FORCE_INLINE void packRGBA32FToRGBA32FPremultiply(const float* __restrict source, float* __restrict destination)
+template<> FORCE_INLINE void
 pack<RA32F, NoPremultiplicationOp>(const float* __restrict src, float* __restrict dst)
 {
-    float scaleFactor = source[3];
+    dst[0] = src[0];
-    destination[0] = source[0] * scaleFactor;
+    dst[1] = src[3];
    destination[1] = source[1] * scaleFactor;
    destination[2] = source[2] * scaleFactor;
    destination[3] = source[3];
 }
-FORCE_INLINE void packRGBA32FToA32F(const float* __restrict source, float* __restrict destination)
+template<> FORCE_INLINE void
 pack<RA32F, Premultiply>(const float* __restrict src, float* __restrict dst)
 {
-    destination[0] = source[3];
+    float scaleFactor = src[3];
-}
+    dst[0] = src[0] * scaleFactor;
-
+    dst[1] = scaleFactor;
 // identical to above, to avoid special-casing
 FORCE_INLINE void packRGBA32FToA32FPremultiply(const float* __restrict source, float* __restrict destination)
 {
    destination[0] = source[3];
 }
 FORCE_INLINE void packRGBA32FToR32F(const float* __restrict source, float* __restrict destination)
 {
    destination[0] = source[0];
 }
 FORCE_INLINE void packRGBA32FToR32FPremultiply(const float* __restrict source, float* __restrict destination)
 {
    float scaleFactor = source[3];
    destination[0] = source[0] * scaleFactor;
 }
 FORCE_INLINE void packRGBA32FToRA32F(const float* __restrict source, float* __restrict destination)
 {
    destination[0] = source[0];
    destination[1] = source[3];
 }
 FORCE_INLINE void packRGBA32FToRA32FPremultiply(const float* __restrict source, float* __restrict destination)
 {
    float scaleFactor = source[3];
    destination[0] = source[0] * scaleFactor;
    destination[1] = scaleFactor;
 }
 /****** END CODE SHARED WITH WEBKIT ******/
 template<typename SrcType, typename DstType> FORCE_INLINE void
 convertType(const SrcType* __restrict src, DstType* __restrict dst)
 {
    NS_ABORT_IF_FALSE(false, "Unimplemented texture format conversion");
 }
 template<> FORCE_INLINE void
 convertType<uint8_t, uint8_t>(const uint8_t* __restrict src, uint8_t* __restrict dst)
 {
    dst[0] = src[0];
    dst[1] = src[1];
    dst[2] = src[2];
    dst[3] = src[3];
 }
 template<> FORCE_INLINE void
 convertType<float, float>(const float* __restrict src, float* __restrict dst)
 {
    dst[0] = src[0];
    dst[1] = src[1];
    dst[2] = src[2];
    dst[3] = src[3];
 }
 template<> FORCE_INLINE void
 convertType<uint8_t, float>(const uint8_t* __restrict src, float* __restrict dst)
 {
    const float scaleFactor = 1.f / 255.0f;
    dst[0] = src[0] * scaleFactor;
    dst[1] = src[1] * scaleFactor;
    dst[2] = src[2] * scaleFactor;
    dst[3] = src[3] * scaleFactor;
 }
 #undef FORCE_INLINE
 } // end namespace WebGLTexelConversions
 } // end namespace mozilla