b=451621; push new pixman with arm fast-paths; r=me

2024-09-13 09:24:08 -07:00 · 2008-09-17 14:15:01 -07:00 · 2008-09-17 14:15:01 -07:00 · 6e6bf3a103
commit 6e6bf3a103
parent a84761b588
12 changed files with 645 additions and 28 deletions
--- a/config/autoconf.mk.in
+++ b/config/autoconf.mk.in
@ -644,3 +644,5 @@ MOZ_V1_STRING_ABI = @MOZ_V1_STRING_ABI@
 MOZ_EMBEDDING_LEVEL_DEFAULT = @MOZ_EMBEDDING_LEVEL_DEFAULT@
 MOZ_EMBEDDING_LEVEL_BASIC = @MOZ_EMBEDDING_LEVEL_BASIC@
 MOZ_EMBEDDING_LEVEL_MINIMAL = @MOZ_EMBEDDING_LEVEL_MINIMAL@
+
+HAVE_ARM_SIMD= @HAVE_ARM_SIMD@
--- a/configure.in
+++ b/configure.in
@ -2991,6 +2991,17 @@ dnl NB - later gcc versions require -mmmx for this header to be successfully
 dnl included (or another option which implies it, such as -march=pentium-mmx)
 AC_CHECK_HEADERS(mmintrin.h)

+AC_MSG_CHECKING(for ARM SIMD support)
+AC_TRY_COMPILE([],
+               [asm("uqadd8 r1, r1, r2");],
+               result="yes", result="no")
+AC_MSG_RESULT("$result")
+if test "$result" = "yes"; then
+    AC_DEFINE(HAVE_ARM_SIMD)
+    HAVE_ARM_SIMD=1
+fi
+AC_SUBST(HAVE_ARM_SIMD)
+
 dnl Check whether the compiler supports the new-style C++ standard
 dnl library headers (i.e. <new>) or needs the old "new.h"
 AC_LANG_CPLUSPLUS
--- a/gfx/cairo/README
+++ b/gfx/cairo/README
@ -8,7 +8,7 @@ http://www.cairographics.org/.
 VERSIONS:

  cairo (1.7.4-136-g5ea2555)
-  pixman (pixman-0.11.8-17-gf9d3f37)
+  pixman (pixman-0.11.10-8-g7180230)

 ***** NOTE FOR VISUAL C++ 6.0 *****

--- a/gfx/cairo/libpixman/src/Makefile.in
+++ b/gfx/cairo/libpixman/src/Makefile.in
@ -81,6 +81,11 @@ MMX_CFLAGS+=--param inline-unit-growth=10000 --param large-function-growth=10000
 endif
 endif
 endif
+ifeq (arm,$(findstring arm,$(OS_TEST)))
+ifdef HAVE_ARM_SIMD
+USE_ARM=1
+endif
+endif
 endif


@ -120,6 +125,11 @@ CSRCS += pixman-vmx.c
 DEFINES += -DUSE_VMX
 endif

+ifdef USE_ARM
+CSRCS += pixman-arm.c
+DEFINE += -DUSE_ARM
+endif
+
 EXPORTS		= pixman.h pixman-version.h

 LOCAL_INCLUDES	+= -I$(srcdir) -I$(srcdir)/../../cairo/src
--- a/gfx/cairo/libpixman/src/pixman-arm.c
+++ b/gfx/cairo/libpixman/src/pixman-arm.c
@ -0,0 +1,409 @@
+/*
+ * Copyright © 2008 Mozilla Corporation
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Mozilla Corporation not be used in
+ * advertising or publicity pertaining to distribution of the software without
+ * specific, written prior permission.  Mozilla Corporation makes no
+ * representations about the suitability of this software for any purpose.  It
+ * is provided "as is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ *
+ * Author:  Jeff Muizelaar (jeff@infidigm.net)
+ *
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include "pixman-arm.h"
+
+void
+fbCompositeSrcAdd_8000x8000arm (pixman_op_t op,
+				pixman_image_t * pSrc,
+				pixman_image_t * pMask,
+				pixman_image_t * pDst,
+				int16_t      xSrc,
+				int16_t      ySrc,
+				int16_t      xMask,
+				int16_t      yMask,
+				int16_t      xDst,
+				int16_t      yDst,
+				uint16_t     width,
+				uint16_t     height)
+{
+    uint8_t	*dstLine, *dst;
+    uint8_t	*srcLine, *src;
+    int	dstStride, srcStride;
+    uint16_t	w;
+    uint8_t	s, d;
+
+    fbComposeGetStart (pSrc, xSrc, ySrc, uint8_t, srcStride, srcLine, 1);
+    fbComposeGetStart (pDst, xDst, yDst, uint8_t, dstStride, dstLine, 1);
+
+    while (height--)
+    {
+	dst = dstLine;
+	dstLine += dstStride;
+	src = srcLine;
+	srcLine += srcStride;
+	w = width;
+
+	while (w && (unsigned long)dst & 3)
+	{
+	    s = *src;
+	    d = *dst;
+	    asm("uqadd8 %0, %1, %2" : "+r"(d) : "r"(s));
+	    *dst = d;
+
+	    dst++;
+	    src++;
+	    w--;
+	}
+
+	while (w >= 4)
+	{
+	    asm("uqadd8 %0, %1, %2" : "=r"(*(uint32_t*)dst) : "r"(*(uint32_t*)src), "r"(*(uint32_t*)dst));
+	    dst += 4;
+	    src += 4;
+	    w -= 4;
+	}
+
+	while (w)
+	{
+	    s = *src;
+	    d = *dst;
+	    asm("uqadd8 %0, %1, %2" : "+r"(d) : "r"(s));
+	    *dst = d;
+
+	    dst++;
+	    src++;
+	    w--;
+	}
+    }
+
+}
+
+void
+fbCompositeSrc_8888x8888arm (pixman_op_t op,
+			 pixman_image_t * pSrc,
+			 pixman_image_t * pMask,
+			 pixman_image_t * pDst,
+			 int16_t      xSrc,
+			 int16_t      ySrc,
+			 int16_t      xMask,
+			 int16_t      yMask,
+			 int16_t      xDst,
+			 int16_t      yDst,
+			 uint16_t     width,
+			 uint16_t     height)
+{
+    uint32_t	*dstLine, *dst;
+    uint32_t	*srcLine, *src;
+    int	dstStride, srcStride;
+    uint16_t	w;
+    uint32_t component_half = 0x800080;
+    uint32_t upper_component_mask = 0xff00ff00;
+    uint32_t alpha_mask = 0xff;
+
+    fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
+    fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1);
+
+    while (height--)
+    {
+	dst = dstLine;
+	dstLine += dstStride;
+	src = srcLine;
+	srcLine += srcStride;
+	w = width;
+
+//#define inner_branch
+	asm volatile (
+			"cmp %[w], #0\n\t"
+			"beq 2f\n\t"
+			"1:\n\t"
+			/* load dest */
+			"ldr r5, [%[src]], #4\n\t"
+#ifdef inner_branch
+			/* We can avoid doing the multiplication in two cases: 0x0 or 0xff.
+			 * The 0x0 case also allows us to avoid doing an unecessary data
+			 * write which is more valuable so we only check for that */
+			"cmp r5, #0x1000000\n\t"
+			"blt 3f\n\t"
+
+			/* = 255 - alpha */
+			"sub r8, %[alpha_mask], r5, lsr #24\n\t"
+
+			"ldr r4, [%[dest]] \n\t"
+
+#else
+			"ldr r4, [%[dest]] \n\t"
+
+			/* = 255 - alpha */
+			"sub r8, %[alpha_mask], r5, lsr #24\n\t"
+#endif
+			"uxtb16 r6, r4\n\t"
+			"uxtb16 r7, r4, ror #8\n\t"
+
+			/* multiply by 257 and divide by 65536 */
+			"mla r6, r6, r8, %[component_half]\n\t"
+			"mla r7, r7, r8, %[component_half]\n\t"
+
+			"uxtab16 r6, r6, r6, ror #8\n\t"
+			"uxtab16 r7, r7, r7, ror #8\n\t"
+
+			/* recombine the 0xff00ff00 bytes of r6 and r7 */
+			"and r7, %[upper_component_mask]\n\t"
+			"uxtab16 r6, r7, r6, ror #8\n\t"
+
+			"uqadd8 r5, r6, r5\n\t"
+
+#ifdef inner_branch
+			"3:\n\t"
+
+#endif
+			"str r5, [%[dest]], #4\n\t"
+			/* increment counter and jmp to top */
+			"subs	%[w], %[w], #1\n\t"
+			"bne	1b\n\t"
+			"2:\n\t"
+			: [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src)
+			: [component_half] "r" (component_half), [upper_component_mask] "r" (upper_component_mask),
+			  [alpha_mask] "r" (alpha_mask)
+			: "r4", "r5", "r6", "r7", "r8", "cc", "memory"
+			);
+    }
+}
+
+void
+fbCompositeSrc_8888x8x8888arm (pixman_op_t op,
+			       pixman_image_t * pSrc,
+			       pixman_image_t * pMask,
+			       pixman_image_t * pDst,
+			       int16_t	xSrc,
+			       int16_t	ySrc,
+			       int16_t      xMask,
+			       int16_t      yMask,
+			       int16_t      xDst,
+			       int16_t      yDst,
+			       uint16_t     width,
+			       uint16_t     height)
+{
+    uint32_t	*dstLine, *dst;
+    uint32_t	*srcLine, *src;
+    uint32_t	mask;
+    int	dstStride, srcStride;
+    uint16_t	w;
+    uint32_t component_half = 0x800080;
+    uint32_t alpha_mask = 0xff;
+
+    fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
+    fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1);
+
+    fbComposeGetSolid (pMask, mask, pDst->bits.format);
+    mask = (mask) >> 24;
+
+    while (height--)
+    {
+	dst = dstLine;
+	dstLine += dstStride;
+	src = srcLine;
+	srcLine += srcStride;
+	w = width;
+
+//#define inner_branch
+	asm volatile (
+			"cmp %[w], #0\n\t"
+			"beq 2f\n\t"
+			"1:\n\t"
+			/* load dest */
+			"ldr r5, [%[src]], #4\n\t"
+#ifdef inner_branch
+			/* We can avoid doing the multiplication in two cases: 0x0 or 0xff.
+			 * The 0x0 case also allows us to avoid doing an unecessary data
+			 * write which is more valuable so we only check for that */
+			"cmp r5, #0x1000000\n\t"
+			"blt 3f\n\t"
+
+#endif
+			"ldr r4, [%[dest]] \n\t"
+
+			"uxtb16 r6, r5\n\t"
+			"uxtb16 r7, r5, ror #8\n\t"
+
+			/* multiply by alpha (r8) then by 257 and divide by 65536 */
+			"mla r6, r6, %[mask_alpha], %[component_half]\n\t"
+			"mla r7, r7, %[mask_alpha], %[component_half]\n\t"
+
+			"uxtab16 r6, r6, r6, ror #8\n\t"
+			"uxtab16 r7, r7, r7, ror #8\n\t"
+
+			"uxtb16 r6, r6, ror #8\n\t"
+			"uxtb16 r7, r7, ror #8\n\t"
+
+			/* recombine */
+			"orr r5, r6, r7, lsl #8\n\t"
+
+			"uxtb16 r6, r4\n\t"
+			"uxtb16 r7, r4, ror #8\n\t"
+
+			/* 255 - alpha */
+			"sub r8, %[alpha_mask], r5, lsr #24\n\t"
+
+			/* multiply by alpha (r8) then by 257 and divide by 65536 */
+			"mla r6, r6, r8, %[component_half]\n\t"
+			"mla r7, r7, r8, %[component_half]\n\t"
+
+			"uxtab16 r6, r6, r6, ror #8\n\t"
+			"uxtab16 r7, r7, r7, ror #8\n\t"
+
+			"uxtb16 r6, r6, ror #8\n\t"
+			"uxtb16 r7, r7, ror #8\n\t"
+
+			/* recombine */
+			"orr r6, r6, r7, lsl #8\n\t"
+
+			"uqadd8 r5, r6, r5\n\t"
+
+#ifdef inner_branch
+			"3:\n\t"
+
+#endif
+			"str r5, [%[dest]], #4\n\t"
+			/* increment counter and jmp to top */
+			"subs	%[w], %[w], #1\n\t"
+			"bne	1b\n\t"
+			"2:\n\t"
+			: [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src)
+			: [component_half] "r" (component_half), [mask_alpha] "r" (mask),
+			  [alpha_mask] "r" (alpha_mask)
+			: "r4", "r5", "r6", "r7", "r8", "r9", "cc", "memory"
+			);
+    }
+}
+
+void
+fbCompositeSolidMask_nx8x8888arm (pixman_op_t      op,
+			       pixman_image_t * pSrc,
+			       pixman_image_t * pMask,
+			       pixman_image_t * pDst,
+			       int16_t      xSrc,
+			       int16_t      ySrc,
+			       int16_t      xMask,
+			       int16_t      yMask,
+			       int16_t      xDst,
+			       int16_t      yDst,
+			       uint16_t     width,
+			       uint16_t     height)
+{
+    uint32_t	 src, srca;
+    uint32_t	*dstLine, *dst;
+    uint8_t	*maskLine, *mask;
+    int		 dstStride, maskStride;
+    uint16_t	 w;
+
+    fbComposeGetSolid(pSrc, src, pDst->bits.format);
+
+    srca = src >> 24;
+    if (src == 0)
+	return;
+
+    uint32_t component_mask = 0xff00ff;
+    uint32_t component_half = 0x800080;
+
+    uint32_t src_hi = (src >> 8) & component_mask;
+    uint32_t src_lo = src & component_mask;
+
+    fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
+    fbComposeGetStart (pMask, xMask, yMask, uint8_t, maskStride, maskLine, 1);
+
+    while (height--)
+    {
+	dst = dstLine;
+	dstLine += dstStride;
+	mask = maskLine;
+	maskLine += maskStride;
+	w = width;
+
+//#define inner_branch
+	asm volatile (
+			"cmp %[w], #0\n\t"
+			"beq 2f\n\t"
+			"1:\n\t"
+			/* load mask */
+			"ldrb r5, [%[mask]], #1\n\t"
+#ifdef inner_branch
+			/* We can avoid doing the multiplication in two cases: 0x0 or 0xff.
+			 * The 0x0 case also allows us to avoid doing an unecessary data
+			 * write which is more valuable so we only check for that */
+			/* 0x1000000 is the least value that contains alpha all values
+			 * less than it have a 0 alpha value */
+			"cmp r5, #0x0\n\t"
+			"beq 3f\n\t"
+
+#endif
+			"ldr r4, [%[dest]] \n\t"
+
+			/* multiply by alpha (r8) then by 257 and divide by 65536 */
+			"mla r6, %[src_lo], r5, %[component_half]\n\t"
+			"mla r7, %[src_hi], r5, %[component_half]\n\t"
+
+			"uxtab16 r6, r6, r6, ror #8\n\t"
+			"uxtab16 r7, r7, r7, ror #8\n\t"
+
+			"uxtb16 r6, r6, ror #8\n\t"
+			"uxtb16 r7, r7, ror #8\n\t"
+
+			/* recombine */
+			"orr r5, r6, r7, lsl #8\n\t"
+
+			"uxtb16 r6, r4\n\t"
+			"uxtb16 r7, r4, ror #8\n\t"
+
+			/* we could simplify this to use 'sub' if we were
+			 * willing to give up a register for alpha_mask */
+			"mvn r8, r5\n\t"
+			"mov r8, r8, lsr #24\n\t"
+
+			/* multiply by alpha (r8) then by 257 and divide by 65536 */
+			"mla r6, r6, r8, %[component_half]\n\t"
+			"mla r7, r7, r8, %[component_half]\n\t"
+
+			"uxtab16 r6, r6, r6, ror #8\n\t"
+			"uxtab16 r7, r7, r7, ror #8\n\t"
+
+			"uxtb16 r6, r6, ror #8\n\t"
+			"uxtb16 r7, r7, ror #8\n\t"
+
+			/* recombine */
+			"orr r6, r6, r7, lsl #8\n\t"
+
+			"uqadd8 r5, r6, r5\n\t"
+
+#ifdef inner_branch
+			"3:\n\t"
+
+#endif
+			"str r5, [%[dest]], #4\n\t"
+			/* increment counter and jmp to top */
+			"subs	%[w], %[w], #1\n\t"
+			"bne	1b\n\t"
+			"2:\n\t"
+			: [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src), [mask] "+r" (mask)
+			: [component_half] "r" (component_half),
+			  [src_hi] "r" (src_hi), [src_lo] "r" (src_lo)
+			: "r4", "r5", "r6", "r7", "r8", "cc", "memory"
+			);
+    }
+}
--- a/gfx/cairo/libpixman/src/pixman-arm.h
+++ b/gfx/cairo/libpixman/src/pixman-arm.h
@ -0,0 +1,94 @@
+/*
+ * Copyright © 2008 Mozilla Corporation
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Mozilla Corporation not be used in
+ * advertising or publicity pertaining to distribution of the software without
+ * specific, written prior permission.  Mozilla Corporation makes no
+ * representations about the suitability of this software for any purpose.  It
+ * is provided "as is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ *
+ * Author:  Jeff Muizelaar (jeff@infidigm.net)
+ *
+ */
+
+#include "pixman-private.h"
+
+#ifdef USE_ARM
+
+static inline pixman_bool_t pixman_have_arm(void) { return TRUE; }
+
+#else
+#define pixman_have_arm() FALSE
+#endif
+
+#ifdef USE_ARM
+
+void
+fbCompositeSrcAdd_8000x8000arm (pixman_op_t op,
+				pixman_image_t * pSrc,
+				pixman_image_t * pMask,
+				pixman_image_t * pDst,
+				int16_t      xSrc,
+				int16_t      ySrc,
+				int16_t      xMask,
+				int16_t      yMask,
+				int16_t      xDst,
+				int16_t      yDst,
+				uint16_t     width,
+				uint16_t     height);
+void
+fbCompositeSrc_8888x8888arm (pixman_op_t op,
+			 pixman_image_t * pSrc,
+			 pixman_image_t * pMask,
+			 pixman_image_t * pDst,
+			 int16_t      xSrc,
+			 int16_t      ySrc,
+			 int16_t      xMask,
+			 int16_t      yMask,
+			 int16_t      xDst,
+			 int16_t      yDst,
+			 uint16_t     width,
+			 uint16_t     height);
+
+void
+fbCompositeSrc_8888x8x8888arm (pixman_op_t op,
+			 pixman_image_t * pSrc,
+			 pixman_image_t * pMask,
+			 pixman_image_t * pDst,
+			 int16_t      xSrc,
+			 int16_t      ySrc,
+			 int16_t      xMask,
+			 int16_t      yMask,
+			 int16_t      xDst,
+			 int16_t      yDst,
+			 uint16_t     width,
+			 uint16_t     height);
+void
+fbCompositeSolidMask_nx8x8888arm (pixman_op_t op,
+			 pixman_image_t * pSrc,
+			 pixman_image_t * pMask,
+			 pixman_image_t * pDst,
+			 int16_t      xSrc,
+			 int16_t      ySrc,
+			 int16_t      xMask,
+			 int16_t      yMask,
+			 int16_t      xDst,
+			 int16_t      yDst,
+			 uint16_t     width,
+			 uint16_t     height);
+
+
+#endif /* USE_ARM */
--- a/gfx/cairo/libpixman/src/pixman-image.c
+++ b/gfx/cairo/libpixman/src/pixman-image.c
@ -818,6 +818,10 @@ pixman_image_is_opaque(pixman_image_t *image)

        if (image->common.transform)
            return FALSE;
+
+	/* Gradients do not necessarily cover the entire compositing area */
+	if (image->type == LINEAR || image->type == CONICAL || image->type == RADIAL)
+	    return FALSE;
    }

     return TRUE;
--- a/gfx/cairo/libpixman/src/pixman-pict.c
+++ b/gfx/cairo/libpixman/src/pixman-pict.c
@ -34,6 +34,7 @@
 #include "pixman-mmx.h"
 #include "pixman-vmx.h"
 #include "pixman-sse2.h"
+#include "pixman-arm.h"
 #include "pixman-combine32.h"

 #ifdef __GNUC__
@ -758,6 +759,46 @@ fbCompositeSrc_8888x0565 (pixman_op_t op,
    }
 }

+
+void
+fbCompositeSrc_x888x0565 (pixman_op_t op,
+                          pixman_image_t * pSrc,
+                          pixman_image_t * pMask,
+                          pixman_image_t * pDst,
+                          int16_t      xSrc,
+                          int16_t      ySrc,
+                          int16_t      xMask,
+                          int16_t      yMask,
+                          int16_t      xDst,
+                          int16_t      yDst,
+                          uint16_t     width,
+                          uint16_t     height)
+{
+    uint16_t	*dstLine, *dst;
+    uint32_t	*srcLine, *src, s;
+    int	dstStride, srcStride;
+    uint16_t	w;
+
+    fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1);
+    fbComposeGetStart (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1);
+
+    while (height--)
+    {
+	dst = dstLine;
+	dstLine += dstStride;
+	src = srcLine;
+	srcLine += srcStride;
+	w = width;
+
+	while (w--)
+	{
+	    s = READ(pSrc, src++);
+	    WRITE(pDst, dst, cvt8888to0565(s));
+	    dst++;
+	}
+    }
+}
+
 void
 fbCompositeSrcAdd_8000x8000 (pixman_op_t	op,
 			     pixman_image_t * pSrc,
@ -1479,6 +1520,26 @@ static const FastPathInfo vmx_fast_paths[] =
 };
 #endif

+#ifdef USE_ARM
+static const FastPathInfo arm_fast_paths[] =
+{
+    { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null,     PIXMAN_a8r8g8b8, fbCompositeSrc_8888x8888arm,      0 },
+    { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null,	PIXMAN_x8r8g8b8, fbCompositeSrc_8888x8888arm,	   0 },
+    { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null,	PIXMAN_a8b8g8r8, fbCompositeSrc_8888x8888arm,	   0 },
+    { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null,	PIXMAN_x8b8g8r8, fbCompositeSrc_8888x8888arm,	   0 },
+    { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8,       PIXMAN_a8r8g8b8, fbCompositeSrc_8888x8x8888arm,    NEED_SOLID_MASK },
+    { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8,       PIXMAN_x8r8g8b8, fbCompositeSrc_8888x8x8888arm,	   NEED_SOLID_MASK },
+
+    { PIXMAN_OP_ADD, PIXMAN_a8,        PIXMAN_null,     PIXMAN_a8,       fbCompositeSrcAdd_8000x8000arm,   0 },
+
+    { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_a8r8g8b8, fbCompositeSolidMask_nx8x8888arm,     0 },
+    { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_x8r8g8b8, fbCompositeSolidMask_nx8x8888arm,     0 },
+    { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_a8b8g8r8, fbCompositeSolidMask_nx8x8888arm,     0 },
+    { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_x8b8g8r8, fbCompositeSolidMask_nx8x8888arm,     0 },
+
+    { PIXMAN_OP_NONE },
+};
+#endif

 static const FastPathInfo c_fast_paths[] =
 {
@ -1547,6 +1608,10 @@ static const FastPathInfo c_fast_paths[] =
    { PIXMAN_OP_SRC, PIXMAN_r5g6b5,    PIXMAN_null,     PIXMAN_r5g6b5,   fbCompositeSrcSrc_nxn, 0 },
    { PIXMAN_OP_SRC, PIXMAN_b5g6r5,    PIXMAN_null,     PIXMAN_b5g6r5,   fbCompositeSrcSrc_nxn, 0 },
 #endif
+    { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8,  PIXMAN_null,     PIXMAN_r5g6b5,   fbCompositeSrc_x888x0565, 0 },
+    { PIXMAN_OP_SRC, PIXMAN_x8r8g8b8,  PIXMAN_null,     PIXMAN_r5g6b5,   fbCompositeSrc_x888x0565, 0 },
+    { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8,  PIXMAN_null,     PIXMAN_b5g6r5,   fbCompositeSrc_x888x0565, 0 },
+    { PIXMAN_OP_SRC, PIXMAN_x8b8g8r8,  PIXMAN_null,     PIXMAN_b5g6r5,   fbCompositeSrc_x888x0565, 0 },
    { PIXMAN_OP_IN,  PIXMAN_a8,        PIXMAN_null,     PIXMAN_a8,       fbCompositeSrcIn_8x8,   0 },
    { PIXMAN_OP_IN,  PIXMAN_solid,     PIXMAN_a8,	PIXMAN_a8,	 fbCompositeSolidMaskIn_nx8x8, 0 },
    { PIXMAN_OP_NONE },
@ -1829,6 +1894,12 @@ pixman_image_composite (pixman_op_t      op,
 	if (!info && pixman_have_vmx())
 	    info = get_fast_path (vmx_fast_paths, op, pSrc, pMask, pDst, pixbuf);
 #endif
+
+#ifdef USE_ARM
+	if (!info && pixman_have_arm())
+	    info = get_fast_path (arm_fast_paths, op, pSrc, pMask, pDst, pixbuf);
+#endif
+
        if (!info)
 	    info = get_fast_path (c_fast_paths, op, pSrc, pMask, pDst, pixbuf);

--- a/gfx/cairo/libpixman/src/pixman-sse2.c
+++ b/gfx/cairo/libpixman/src/pixman-sse2.c
@ -73,6 +73,9 @@ static __m128i MaskRed;
 static __m128i MaskGreen;
 static __m128i MaskBlue;

+static __m128i Mask565FixRB;
+static __m128i Mask565FixG;
+
 /* -------------------------------------------------------------------------------------------------
 * SSE2 Inlines
 */
@ -89,26 +92,37 @@ unpack_128_2x128 (__m128i data, __m128i* dataLo, __m128i* dataHi)
    *dataHi = _mm_unpackhi_epi8 (data, _mm_setzero_si128 ());
 }

-static inline void
-unpack565_128_4x128 (__m128i data, __m128i* data0, __m128i* data1, __m128i* data2, __m128i* data3)
+static inline __m128i
+unpack565to8888 (__m128i lo)
 {
-    __m128i lo, hi;
-    __m128i r, g, b;
-
-    lo = _mm_unpacklo_epi16 (data, _mm_setzero_si128 ());
-    hi = _mm_unpackhi_epi16 (data, _mm_setzero_si128 ());
+    __m128i r, g, b, rb, t;
    
    r = _mm_and_si128 (_mm_slli_epi32 (lo, 8), MaskRed);
    g = _mm_and_si128 (_mm_slli_epi32 (lo, 5), MaskGreen);
    b = _mm_and_si128 (_mm_slli_epi32 (lo, 3), MaskBlue);

-    lo = _mm_or_si128 (_mm_or_si128 (r, g), b);
+    rb = _mm_or_si128 (r, b);
+    t  = _mm_and_si128 (rb, Mask565FixRB);
+    t  = _mm_srli_epi32 (t, 5);
+    rb = _mm_or_si128 (rb, t);

-    r = _mm_and_si128 (_mm_slli_epi32 (hi, 8), MaskRed);
-    g = _mm_and_si128 (_mm_slli_epi32 (hi, 5), MaskGreen);
-    b = _mm_and_si128 (_mm_slli_epi32 (hi, 3), MaskBlue);
+    t  = _mm_and_si128 (g, Mask565FixG);
+    t  = _mm_srli_epi32 (t, 6);
+    g  = _mm_or_si128 (g, t);
    
-    hi = _mm_or_si128 (_mm_or_si128 (r, g), b);
+    return _mm_or_si128 (rb, g);
+}
+
+static inline void
+unpack565_128_4x128 (__m128i data, __m128i* data0, __m128i* data1, __m128i* data2, __m128i* data3)
+{
+    __m128i lo, hi;
+
+    lo = _mm_unpacklo_epi16 (data, _mm_setzero_si128 ());
+    hi = _mm_unpackhi_epi16 (data, _mm_setzero_si128 ());
+
+    lo = unpack565to8888 (lo);
+    hi = unpack565to8888 (hi);

    unpack_128_2x128 (lo, data0, data1);
    unpack_128_2x128 (hi, data2, data3);
@ -244,9 +258,11 @@ invertColors_2x128 (__m128i dataLo, __m128i dataHi, __m128i* invLo, __m128i* inv
 static inline void
 over_2x128 (__m128i* srcLo, __m128i* srcHi, __m128i* alphaLo, __m128i* alphaHi, __m128i* dstLo, __m128i* dstHi)
 {
-    negate_2x128 (*alphaLo, *alphaHi, alphaLo, alphaHi);
+    __m128i t1, t2;

-    pixMultiply_2x128 (dstLo, dstHi, alphaLo, alphaHi, dstLo, dstHi);
+    negate_2x128 (*alphaLo, *alphaHi, &t1, &t2);
+
+    pixMultiply_2x128 (dstLo, dstHi, &t1, &t2, dstLo, dstHi);

    *dstLo = _mm_adds_epu8 (*srcLo, *dstLo);
    *dstHi = _mm_adds_epu8 (*srcHi, *dstHi);
@ -2295,7 +2311,8 @@ fbComposeSetupSSE2(void)
        MaskRed   = createMask_2x32_128 (0x00f80000, 0x00f80000);
        MaskGreen = createMask_2x32_128 (0x0000fc00, 0x0000fc00);
        MaskBlue  = createMask_2x32_128 (0x000000f8, 0x000000f8);
-
+	Mask565FixRB = createMask_2x32_128 (0x00e000e0, 0x00e000e0);
+	Mask565FixG = createMask_2x32_128  (0x0000c000, 0x0000c000);
        Mask0080 = createMask_16_128 (0x0080);
        Mask00ff = createMask_16_128 (0x00ff);
        Mask0101 = createMask_16_128 (0x0101);
@ -2482,6 +2499,7 @@ fbCompositeSolid_nx0565sse2 (pixman_op_t op,
        while (w && (unsigned long)dst & 15)
        {
            d = *dst;
+
            *dst++ = pack565_32_16 (pack_1x64_32 (over_1x64 (_mm_movepi64_pi64 (xmmSrc),
                                                             _mm_movepi64_pi64 (xmmAlpha),
                                                             expand565_16_1x64 (d))));
@ -2496,15 +2514,14 @@ fbCompositeSolid_nx0565sse2 (pixman_op_t op,
            /* fill cache line with next memory */
            cachePrefetchNext ((__m128i*)dst);

-            xmmDst = load128Aligned ((__m128i*)dst);
+	    xmmDst = load128Aligned ((__m128i*)dst);
 	    
-            unpack565_128_4x128 (xmmDst, &xmmDst0, &xmmDst1, &xmmDst2, &xmmDst3);
+	    unpack565_128_4x128 (xmmDst, &xmmDst0, &xmmDst1, &xmmDst2, &xmmDst3);
 	    
            over_2x128 (&xmmSrc, &xmmSrc, &xmmAlpha, &xmmAlpha, &xmmDst0, &xmmDst1);
            over_2x128 (&xmmSrc, &xmmSrc, &xmmAlpha, &xmmAlpha, &xmmDst2, &xmmDst3);

            xmmDst = pack565_4x128_128 (&xmmDst0, &xmmDst1, &xmmDst2, &xmmDst3);
-
            save128Aligned ((__m128i*)dst, xmmDst);

            dst += 8;
--- a/gfx/cairo/libpixman/src/pixman-transformed.c
+++ b/gfx/cairo/libpixman/src/pixman-transformed.c
@ -602,7 +602,6 @@ ACCESS(fbFetchTransformed)(bits_image_t * pict, int x, int y, int width,
        if (pict->common.repeat == PIXMAN_REPEAT_NORMAL)
        {
            fbFetchTransformed_Nearest_Normal(pict, width, buffer, mask, maskBits, affine, v, unit);
-
        }
        else if (pict->common.repeat == PIXMAN_REPEAT_PAD)
        {
--- a/gfx/cairo/libpixman/src/pixman-version.h
+++ b/gfx/cairo/libpixman/src/pixman-version.h
@ -32,10 +32,10 @@
 #endif

 #define PIXMAN_VERSION_MAJOR 0
-#define PIXMAN_VERSION_MINOR 11
-#define PIXMAN_VERSION_MICRO 9
+#define PIXMAN_VERSION_MINOR 12
+#define PIXMAN_VERSION_MICRO 0

-#define PIXMAN_VERSION_STRING "0.11.9"
+#define PIXMAN_VERSION_STRING "0.12.0"

 #define PIXMAN_VERSION_ENCODE(major, minor, micro) (	\
 	  ((major) * 10000)				\
--- a/gfx/cairo/libpixman/src/pixman.h
+++ b/gfx/cairo/libpixman/src/pixman.h
@ -74,7 +74,7 @@ SOFTWARE.
 /*
 * Standard integers
 */
-#if defined (_SVR4) || defined (SVR4) || defined (__OpenBSD__) || defined (_sgi)
+#if defined (_SVR4) || defined (SVR4) || defined (__OpenBSD__) || defined (_sgi) || defined (__sun) || defined (sun)
 #  include <inttypes.h>
 #elif defined (_MSC_VER)
 typedef __int8 int8_t;