From 6e6bf3a10395e654be19ac524f2029055439d156 Mon Sep 17 00:00:00 2001 From: Vladimir Vukicevic Date: Wed, 17 Sep 2008 14:15:01 -0700 Subject: [PATCH] b=451621; push new pixman with arm fast-paths; r=me --- config/autoconf.mk.in | 2 + configure.in | 11 + gfx/cairo/README | 2 +- gfx/cairo/libpixman/src/Makefile.in | 10 + gfx/cairo/libpixman/src/pixman-arm.c | 409 +++++++++++++++++++ gfx/cairo/libpixman/src/pixman-arm.h | 94 +++++ gfx/cairo/libpixman/src/pixman-image.c | 4 + gfx/cairo/libpixman/src/pixman-pict.c | 71 ++++ gfx/cairo/libpixman/src/pixman-sse2.c | 59 ++- gfx/cairo/libpixman/src/pixman-transformed.c | 3 +- gfx/cairo/libpixman/src/pixman-version.h | 6 +- gfx/cairo/libpixman/src/pixman.h | 2 +- 12 files changed, 645 insertions(+), 28 deletions(-) create mode 100644 gfx/cairo/libpixman/src/pixman-arm.c create mode 100644 gfx/cairo/libpixman/src/pixman-arm.h diff --git a/config/autoconf.mk.in b/config/autoconf.mk.in index 4b769d086d5..af214633e4c 100644 --- a/config/autoconf.mk.in +++ b/config/autoconf.mk.in @@ -644,3 +644,5 @@ MOZ_V1_STRING_ABI = @MOZ_V1_STRING_ABI@ MOZ_EMBEDDING_LEVEL_DEFAULT = @MOZ_EMBEDDING_LEVEL_DEFAULT@ MOZ_EMBEDDING_LEVEL_BASIC = @MOZ_EMBEDDING_LEVEL_BASIC@ MOZ_EMBEDDING_LEVEL_MINIMAL = @MOZ_EMBEDDING_LEVEL_MINIMAL@ + +HAVE_ARM_SIMD= @HAVE_ARM_SIMD@ diff --git a/configure.in b/configure.in index e4b5022ec31..e336ff5bbdb 100644 --- a/configure.in +++ b/configure.in @@ -2991,6 +2991,17 @@ dnl NB - later gcc versions require -mmmx for this header to be successfully dnl included (or another option which implies it, such as -march=pentium-mmx) AC_CHECK_HEADERS(mmintrin.h) +AC_MSG_CHECKING(for ARM SIMD support) +AC_TRY_COMPILE([], + [asm("uqadd8 r1, r1, r2");], + result="yes", result="no") +AC_MSG_RESULT("$result") +if test "$result" = "yes"; then + AC_DEFINE(HAVE_ARM_SIMD) + HAVE_ARM_SIMD=1 +fi +AC_SUBST(HAVE_ARM_SIMD) + dnl Check whether the compiler supports the new-style C++ standard dnl library headers (i.e. ) or needs the old "new.h" AC_LANG_CPLUSPLUS diff --git a/gfx/cairo/README b/gfx/cairo/README index aecb12b147c..47fbcb589b8 100644 --- a/gfx/cairo/README +++ b/gfx/cairo/README @@ -8,7 +8,7 @@ http://www.cairographics.org/. VERSIONS: cairo (1.7.4-136-g5ea2555) - pixman (pixman-0.11.8-17-gf9d3f37) + pixman (pixman-0.11.10-8-g7180230) ***** NOTE FOR VISUAL C++ 6.0 ***** diff --git a/gfx/cairo/libpixman/src/Makefile.in b/gfx/cairo/libpixman/src/Makefile.in index 91d3e87ecc4..2c5e6b3090d 100644 --- a/gfx/cairo/libpixman/src/Makefile.in +++ b/gfx/cairo/libpixman/src/Makefile.in @@ -81,6 +81,11 @@ MMX_CFLAGS+=--param inline-unit-growth=10000 --param large-function-growth=10000 endif endif endif +ifeq (arm,$(findstring arm,$(OS_TEST))) +ifdef HAVE_ARM_SIMD +USE_ARM=1 +endif +endif endif @@ -120,6 +125,11 @@ CSRCS += pixman-vmx.c DEFINES += -DUSE_VMX endif +ifdef USE_ARM +CSRCS += pixman-arm.c +DEFINE += -DUSE_ARM +endif + EXPORTS = pixman.h pixman-version.h LOCAL_INCLUDES += -I$(srcdir) -I$(srcdir)/../../cairo/src diff --git a/gfx/cairo/libpixman/src/pixman-arm.c b/gfx/cairo/libpixman/src/pixman-arm.c new file mode 100644 index 00000000000..77c37ac5bf0 --- /dev/null +++ b/gfx/cairo/libpixman/src/pixman-arm.c @@ -0,0 +1,409 @@ +/* + * Copyright © 2008 Mozilla Corporation + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of Mozilla Corporation not be used in + * advertising or publicity pertaining to distribution of the software without + * specific, written prior permission. Mozilla Corporation makes no + * representations about the suitability of this software for any purpose. It + * is provided "as is" without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS + * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY + * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN + * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS + * SOFTWARE. + * + * Author: Jeff Muizelaar (jeff@infidigm.net) + * + */ +#ifdef HAVE_CONFIG_H +#include +#endif + +#include "pixman-arm.h" + +void +fbCompositeSrcAdd_8000x8000arm (pixman_op_t op, + pixman_image_t * pSrc, + pixman_image_t * pMask, + pixman_image_t * pDst, + int16_t xSrc, + int16_t ySrc, + int16_t xMask, + int16_t yMask, + int16_t xDst, + int16_t yDst, + uint16_t width, + uint16_t height) +{ + uint8_t *dstLine, *dst; + uint8_t *srcLine, *src; + int dstStride, srcStride; + uint16_t w; + uint8_t s, d; + + fbComposeGetStart (pSrc, xSrc, ySrc, uint8_t, srcStride, srcLine, 1); + fbComposeGetStart (pDst, xDst, yDst, uint8_t, dstStride, dstLine, 1); + + while (height--) + { + dst = dstLine; + dstLine += dstStride; + src = srcLine; + srcLine += srcStride; + w = width; + + while (w && (unsigned long)dst & 3) + { + s = *src; + d = *dst; + asm("uqadd8 %0, %1, %2" : "+r"(d) : "r"(s)); + *dst = d; + + dst++; + src++; + w--; + } + + while (w >= 4) + { + asm("uqadd8 %0, %1, %2" : "=r"(*(uint32_t*)dst) : "r"(*(uint32_t*)src), "r"(*(uint32_t*)dst)); + dst += 4; + src += 4; + w -= 4; + } + + while (w) + { + s = *src; + d = *dst; + asm("uqadd8 %0, %1, %2" : "+r"(d) : "r"(s)); + *dst = d; + + dst++; + src++; + w--; + } + } + +} + +void +fbCompositeSrc_8888x8888arm (pixman_op_t op, + pixman_image_t * pSrc, + pixman_image_t * pMask, + pixman_image_t * pDst, + int16_t xSrc, + int16_t ySrc, + int16_t xMask, + int16_t yMask, + int16_t xDst, + int16_t yDst, + uint16_t width, + uint16_t height) +{ + uint32_t *dstLine, *dst; + uint32_t *srcLine, *src; + int dstStride, srcStride; + uint16_t w; + uint32_t component_half = 0x800080; + uint32_t upper_component_mask = 0xff00ff00; + uint32_t alpha_mask = 0xff; + + fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1); + fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1); + + while (height--) + { + dst = dstLine; + dstLine += dstStride; + src = srcLine; + srcLine += srcStride; + w = width; + +//#define inner_branch + asm volatile ( + "cmp %[w], #0\n\t" + "beq 2f\n\t" + "1:\n\t" + /* load dest */ + "ldr r5, [%[src]], #4\n\t" +#ifdef inner_branch + /* We can avoid doing the multiplication in two cases: 0x0 or 0xff. + * The 0x0 case also allows us to avoid doing an unecessary data + * write which is more valuable so we only check for that */ + "cmp r5, #0x1000000\n\t" + "blt 3f\n\t" + + /* = 255 - alpha */ + "sub r8, %[alpha_mask], r5, lsr #24\n\t" + + "ldr r4, [%[dest]] \n\t" + +#else + "ldr r4, [%[dest]] \n\t" + + /* = 255 - alpha */ + "sub r8, %[alpha_mask], r5, lsr #24\n\t" +#endif + "uxtb16 r6, r4\n\t" + "uxtb16 r7, r4, ror #8\n\t" + + /* multiply by 257 and divide by 65536 */ + "mla r6, r6, r8, %[component_half]\n\t" + "mla r7, r7, r8, %[component_half]\n\t" + + "uxtab16 r6, r6, r6, ror #8\n\t" + "uxtab16 r7, r7, r7, ror #8\n\t" + + /* recombine the 0xff00ff00 bytes of r6 and r7 */ + "and r7, %[upper_component_mask]\n\t" + "uxtab16 r6, r7, r6, ror #8\n\t" + + "uqadd8 r5, r6, r5\n\t" + +#ifdef inner_branch + "3:\n\t" + +#endif + "str r5, [%[dest]], #4\n\t" + /* increment counter and jmp to top */ + "subs %[w], %[w], #1\n\t" + "bne 1b\n\t" + "2:\n\t" + : [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src) + : [component_half] "r" (component_half), [upper_component_mask] "r" (upper_component_mask), + [alpha_mask] "r" (alpha_mask) + : "r4", "r5", "r6", "r7", "r8", "cc", "memory" + ); + } +} + +void +fbCompositeSrc_8888x8x8888arm (pixman_op_t op, + pixman_image_t * pSrc, + pixman_image_t * pMask, + pixman_image_t * pDst, + int16_t xSrc, + int16_t ySrc, + int16_t xMask, + int16_t yMask, + int16_t xDst, + int16_t yDst, + uint16_t width, + uint16_t height) +{ + uint32_t *dstLine, *dst; + uint32_t *srcLine, *src; + uint32_t mask; + int dstStride, srcStride; + uint16_t w; + uint32_t component_half = 0x800080; + uint32_t alpha_mask = 0xff; + + fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1); + fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1); + + fbComposeGetSolid (pMask, mask, pDst->bits.format); + mask = (mask) >> 24; + + while (height--) + { + dst = dstLine; + dstLine += dstStride; + src = srcLine; + srcLine += srcStride; + w = width; + +//#define inner_branch + asm volatile ( + "cmp %[w], #0\n\t" + "beq 2f\n\t" + "1:\n\t" + /* load dest */ + "ldr r5, [%[src]], #4\n\t" +#ifdef inner_branch + /* We can avoid doing the multiplication in two cases: 0x0 or 0xff. + * The 0x0 case also allows us to avoid doing an unecessary data + * write which is more valuable so we only check for that */ + "cmp r5, #0x1000000\n\t" + "blt 3f\n\t" + +#endif + "ldr r4, [%[dest]] \n\t" + + "uxtb16 r6, r5\n\t" + "uxtb16 r7, r5, ror #8\n\t" + + /* multiply by alpha (r8) then by 257 and divide by 65536 */ + "mla r6, r6, %[mask_alpha], %[component_half]\n\t" + "mla r7, r7, %[mask_alpha], %[component_half]\n\t" + + "uxtab16 r6, r6, r6, ror #8\n\t" + "uxtab16 r7, r7, r7, ror #8\n\t" + + "uxtb16 r6, r6, ror #8\n\t" + "uxtb16 r7, r7, ror #8\n\t" + + /* recombine */ + "orr r5, r6, r7, lsl #8\n\t" + + "uxtb16 r6, r4\n\t" + "uxtb16 r7, r4, ror #8\n\t" + + /* 255 - alpha */ + "sub r8, %[alpha_mask], r5, lsr #24\n\t" + + /* multiply by alpha (r8) then by 257 and divide by 65536 */ + "mla r6, r6, r8, %[component_half]\n\t" + "mla r7, r7, r8, %[component_half]\n\t" + + "uxtab16 r6, r6, r6, ror #8\n\t" + "uxtab16 r7, r7, r7, ror #8\n\t" + + "uxtb16 r6, r6, ror #8\n\t" + "uxtb16 r7, r7, ror #8\n\t" + + /* recombine */ + "orr r6, r6, r7, lsl #8\n\t" + + "uqadd8 r5, r6, r5\n\t" + +#ifdef inner_branch + "3:\n\t" + +#endif + "str r5, [%[dest]], #4\n\t" + /* increment counter and jmp to top */ + "subs %[w], %[w], #1\n\t" + "bne 1b\n\t" + "2:\n\t" + : [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src) + : [component_half] "r" (component_half), [mask_alpha] "r" (mask), + [alpha_mask] "r" (alpha_mask) + : "r4", "r5", "r6", "r7", "r8", "r9", "cc", "memory" + ); + } +} + +void +fbCompositeSolidMask_nx8x8888arm (pixman_op_t op, + pixman_image_t * pSrc, + pixman_image_t * pMask, + pixman_image_t * pDst, + int16_t xSrc, + int16_t ySrc, + int16_t xMask, + int16_t yMask, + int16_t xDst, + int16_t yDst, + uint16_t width, + uint16_t height) +{ + uint32_t src, srca; + uint32_t *dstLine, *dst; + uint8_t *maskLine, *mask; + int dstStride, maskStride; + uint16_t w; + + fbComposeGetSolid(pSrc, src, pDst->bits.format); + + srca = src >> 24; + if (src == 0) + return; + + uint32_t component_mask = 0xff00ff; + uint32_t component_half = 0x800080; + + uint32_t src_hi = (src >> 8) & component_mask; + uint32_t src_lo = src & component_mask; + + fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1); + fbComposeGetStart (pMask, xMask, yMask, uint8_t, maskStride, maskLine, 1); + + while (height--) + { + dst = dstLine; + dstLine += dstStride; + mask = maskLine; + maskLine += maskStride; + w = width; + +//#define inner_branch + asm volatile ( + "cmp %[w], #0\n\t" + "beq 2f\n\t" + "1:\n\t" + /* load mask */ + "ldrb r5, [%[mask]], #1\n\t" +#ifdef inner_branch + /* We can avoid doing the multiplication in two cases: 0x0 or 0xff. + * The 0x0 case also allows us to avoid doing an unecessary data + * write which is more valuable so we only check for that */ + /* 0x1000000 is the least value that contains alpha all values + * less than it have a 0 alpha value */ + "cmp r5, #0x0\n\t" + "beq 3f\n\t" + +#endif + "ldr r4, [%[dest]] \n\t" + + /* multiply by alpha (r8) then by 257 and divide by 65536 */ + "mla r6, %[src_lo], r5, %[component_half]\n\t" + "mla r7, %[src_hi], r5, %[component_half]\n\t" + + "uxtab16 r6, r6, r6, ror #8\n\t" + "uxtab16 r7, r7, r7, ror #8\n\t" + + "uxtb16 r6, r6, ror #8\n\t" + "uxtb16 r7, r7, ror #8\n\t" + + /* recombine */ + "orr r5, r6, r7, lsl #8\n\t" + + "uxtb16 r6, r4\n\t" + "uxtb16 r7, r4, ror #8\n\t" + + /* we could simplify this to use 'sub' if we were + * willing to give up a register for alpha_mask */ + "mvn r8, r5\n\t" + "mov r8, r8, lsr #24\n\t" + + /* multiply by alpha (r8) then by 257 and divide by 65536 */ + "mla r6, r6, r8, %[component_half]\n\t" + "mla r7, r7, r8, %[component_half]\n\t" + + "uxtab16 r6, r6, r6, ror #8\n\t" + "uxtab16 r7, r7, r7, ror #8\n\t" + + "uxtb16 r6, r6, ror #8\n\t" + "uxtb16 r7, r7, ror #8\n\t" + + /* recombine */ + "orr r6, r6, r7, lsl #8\n\t" + + "uqadd8 r5, r6, r5\n\t" + +#ifdef inner_branch + "3:\n\t" + +#endif + "str r5, [%[dest]], #4\n\t" + /* increment counter and jmp to top */ + "subs %[w], %[w], #1\n\t" + "bne 1b\n\t" + "2:\n\t" + : [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src), [mask] "+r" (mask) + : [component_half] "r" (component_half), + [src_hi] "r" (src_hi), [src_lo] "r" (src_lo) + : "r4", "r5", "r6", "r7", "r8", "cc", "memory" + ); + } +} diff --git a/gfx/cairo/libpixman/src/pixman-arm.h b/gfx/cairo/libpixman/src/pixman-arm.h new file mode 100644 index 00000000000..258054a10cb --- /dev/null +++ b/gfx/cairo/libpixman/src/pixman-arm.h @@ -0,0 +1,94 @@ +/* + * Copyright © 2008 Mozilla Corporation + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of Mozilla Corporation not be used in + * advertising or publicity pertaining to distribution of the software without + * specific, written prior permission. Mozilla Corporation makes no + * representations about the suitability of this software for any purpose. It + * is provided "as is" without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS + * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY + * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN + * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS + * SOFTWARE. + * + * Author: Jeff Muizelaar (jeff@infidigm.net) + * + */ + +#include "pixman-private.h" + +#ifdef USE_ARM + +static inline pixman_bool_t pixman_have_arm(void) { return TRUE; } + +#else +#define pixman_have_arm() FALSE +#endif + +#ifdef USE_ARM + +void +fbCompositeSrcAdd_8000x8000arm (pixman_op_t op, + pixman_image_t * pSrc, + pixman_image_t * pMask, + pixman_image_t * pDst, + int16_t xSrc, + int16_t ySrc, + int16_t xMask, + int16_t yMask, + int16_t xDst, + int16_t yDst, + uint16_t width, + uint16_t height); +void +fbCompositeSrc_8888x8888arm (pixman_op_t op, + pixman_image_t * pSrc, + pixman_image_t * pMask, + pixman_image_t * pDst, + int16_t xSrc, + int16_t ySrc, + int16_t xMask, + int16_t yMask, + int16_t xDst, + int16_t yDst, + uint16_t width, + uint16_t height); + +void +fbCompositeSrc_8888x8x8888arm (pixman_op_t op, + pixman_image_t * pSrc, + pixman_image_t * pMask, + pixman_image_t * pDst, + int16_t xSrc, + int16_t ySrc, + int16_t xMask, + int16_t yMask, + int16_t xDst, + int16_t yDst, + uint16_t width, + uint16_t height); +void +fbCompositeSolidMask_nx8x8888arm (pixman_op_t op, + pixman_image_t * pSrc, + pixman_image_t * pMask, + pixman_image_t * pDst, + int16_t xSrc, + int16_t ySrc, + int16_t xMask, + int16_t yMask, + int16_t xDst, + int16_t yDst, + uint16_t width, + uint16_t height); + + +#endif /* USE_ARM */ diff --git a/gfx/cairo/libpixman/src/pixman-image.c b/gfx/cairo/libpixman/src/pixman-image.c index 487a6728ca7..e80c479357b 100644 --- a/gfx/cairo/libpixman/src/pixman-image.c +++ b/gfx/cairo/libpixman/src/pixman-image.c @@ -818,6 +818,10 @@ pixman_image_is_opaque(pixman_image_t *image) if (image->common.transform) return FALSE; + + /* Gradients do not necessarily cover the entire compositing area */ + if (image->type == LINEAR || image->type == CONICAL || image->type == RADIAL) + return FALSE; } return TRUE; diff --git a/gfx/cairo/libpixman/src/pixman-pict.c b/gfx/cairo/libpixman/src/pixman-pict.c index 193ea287cb3..f9b574e9d06 100644 --- a/gfx/cairo/libpixman/src/pixman-pict.c +++ b/gfx/cairo/libpixman/src/pixman-pict.c @@ -34,6 +34,7 @@ #include "pixman-mmx.h" #include "pixman-vmx.h" #include "pixman-sse2.h" +#include "pixman-arm.h" #include "pixman-combine32.h" #ifdef __GNUC__ @@ -758,6 +759,46 @@ fbCompositeSrc_8888x0565 (pixman_op_t op, } } + +void +fbCompositeSrc_x888x0565 (pixman_op_t op, + pixman_image_t * pSrc, + pixman_image_t * pMask, + pixman_image_t * pDst, + int16_t xSrc, + int16_t ySrc, + int16_t xMask, + int16_t yMask, + int16_t xDst, + int16_t yDst, + uint16_t width, + uint16_t height) +{ + uint16_t *dstLine, *dst; + uint32_t *srcLine, *src, s; + int dstStride, srcStride; + uint16_t w; + + fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1); + fbComposeGetStart (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1); + + while (height--) + { + dst = dstLine; + dstLine += dstStride; + src = srcLine; + srcLine += srcStride; + w = width; + + while (w--) + { + s = READ(pSrc, src++); + WRITE(pDst, dst, cvt8888to0565(s)); + dst++; + } + } +} + void fbCompositeSrcAdd_8000x8000 (pixman_op_t op, pixman_image_t * pSrc, @@ -1479,6 +1520,26 @@ static const FastPathInfo vmx_fast_paths[] = }; #endif +#ifdef USE_ARM +static const FastPathInfo arm_fast_paths[] = +{ + { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_a8r8g8b8, fbCompositeSrc_8888x8888arm, 0 }, + { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_x8r8g8b8, fbCompositeSrc_8888x8888arm, 0 }, + { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_a8b8g8r8, fbCompositeSrc_8888x8888arm, 0 }, + { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, fbCompositeSrc_8888x8888arm, 0 }, + { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8, PIXMAN_a8r8g8b8, fbCompositeSrc_8888x8x8888arm, NEED_SOLID_MASK }, + { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8, PIXMAN_x8r8g8b8, fbCompositeSrc_8888x8x8888arm, NEED_SOLID_MASK }, + + { PIXMAN_OP_ADD, PIXMAN_a8, PIXMAN_null, PIXMAN_a8, fbCompositeSrcAdd_8000x8000arm, 0 }, + + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8r8g8b8, fbCompositeSolidMask_nx8x8888arm, 0 }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8r8g8b8, fbCompositeSolidMask_nx8x8888arm, 0 }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8b8g8r8, fbCompositeSolidMask_nx8x8888arm, 0 }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8b8g8r8, fbCompositeSolidMask_nx8x8888arm, 0 }, + + { PIXMAN_OP_NONE }, +}; +#endif static const FastPathInfo c_fast_paths[] = { @@ -1547,6 +1608,10 @@ static const FastPathInfo c_fast_paths[] = { PIXMAN_OP_SRC, PIXMAN_r5g6b5, PIXMAN_null, PIXMAN_r5g6b5, fbCompositeSrcSrc_nxn, 0 }, { PIXMAN_OP_SRC, PIXMAN_b5g6r5, PIXMAN_null, PIXMAN_b5g6r5, fbCompositeSrcSrc_nxn, 0 }, #endif + { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_r5g6b5, fbCompositeSrc_x888x0565, 0 }, + { PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, PIXMAN_null, PIXMAN_r5g6b5, fbCompositeSrc_x888x0565, 0 }, + { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_b5g6r5, fbCompositeSrc_x888x0565, 0 }, + { PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, PIXMAN_null, PIXMAN_b5g6r5, fbCompositeSrc_x888x0565, 0 }, { PIXMAN_OP_IN, PIXMAN_a8, PIXMAN_null, PIXMAN_a8, fbCompositeSrcIn_8x8, 0 }, { PIXMAN_OP_IN, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8, fbCompositeSolidMaskIn_nx8x8, 0 }, { PIXMAN_OP_NONE }, @@ -1829,6 +1894,12 @@ pixman_image_composite (pixman_op_t op, if (!info && pixman_have_vmx()) info = get_fast_path (vmx_fast_paths, op, pSrc, pMask, pDst, pixbuf); #endif + +#ifdef USE_ARM + if (!info && pixman_have_arm()) + info = get_fast_path (arm_fast_paths, op, pSrc, pMask, pDst, pixbuf); +#endif + if (!info) info = get_fast_path (c_fast_paths, op, pSrc, pMask, pDst, pixbuf); diff --git a/gfx/cairo/libpixman/src/pixman-sse2.c b/gfx/cairo/libpixman/src/pixman-sse2.c index 8a3e4b5967b..0f36436b62b 100644 --- a/gfx/cairo/libpixman/src/pixman-sse2.c +++ b/gfx/cairo/libpixman/src/pixman-sse2.c @@ -73,6 +73,9 @@ static __m128i MaskRed; static __m128i MaskGreen; static __m128i MaskBlue; +static __m128i Mask565FixRB; +static __m128i Mask565FixG; + /* ------------------------------------------------------------------------------------------------- * SSE2 Inlines */ @@ -89,26 +92,37 @@ unpack_128_2x128 (__m128i data, __m128i* dataLo, __m128i* dataHi) *dataHi = _mm_unpackhi_epi8 (data, _mm_setzero_si128 ()); } -static inline void -unpack565_128_4x128 (__m128i data, __m128i* data0, __m128i* data1, __m128i* data2, __m128i* data3) +static inline __m128i +unpack565to8888 (__m128i lo) { - __m128i lo, hi; - __m128i r, g, b; - - lo = _mm_unpacklo_epi16 (data, _mm_setzero_si128 ()); - hi = _mm_unpackhi_epi16 (data, _mm_setzero_si128 ()); - + __m128i r, g, b, rb, t; + r = _mm_and_si128 (_mm_slli_epi32 (lo, 8), MaskRed); g = _mm_and_si128 (_mm_slli_epi32 (lo, 5), MaskGreen); b = _mm_and_si128 (_mm_slli_epi32 (lo, 3), MaskBlue); - lo = _mm_or_si128 (_mm_or_si128 (r, g), b); + rb = _mm_or_si128 (r, b); + t = _mm_and_si128 (rb, Mask565FixRB); + t = _mm_srli_epi32 (t, 5); + rb = _mm_or_si128 (rb, t); - r = _mm_and_si128 (_mm_slli_epi32 (hi, 8), MaskRed); - g = _mm_and_si128 (_mm_slli_epi32 (hi, 5), MaskGreen); - b = _mm_and_si128 (_mm_slli_epi32 (hi, 3), MaskBlue); + t = _mm_and_si128 (g, Mask565FixG); + t = _mm_srli_epi32 (t, 6); + g = _mm_or_si128 (g, t); + + return _mm_or_si128 (rb, g); +} - hi = _mm_or_si128 (_mm_or_si128 (r, g), b); +static inline void +unpack565_128_4x128 (__m128i data, __m128i* data0, __m128i* data1, __m128i* data2, __m128i* data3) +{ + __m128i lo, hi; + + lo = _mm_unpacklo_epi16 (data, _mm_setzero_si128 ()); + hi = _mm_unpackhi_epi16 (data, _mm_setzero_si128 ()); + + lo = unpack565to8888 (lo); + hi = unpack565to8888 (hi); unpack_128_2x128 (lo, data0, data1); unpack_128_2x128 (hi, data2, data3); @@ -244,9 +258,11 @@ invertColors_2x128 (__m128i dataLo, __m128i dataHi, __m128i* invLo, __m128i* inv static inline void over_2x128 (__m128i* srcLo, __m128i* srcHi, __m128i* alphaLo, __m128i* alphaHi, __m128i* dstLo, __m128i* dstHi) { - negate_2x128 (*alphaLo, *alphaHi, alphaLo, alphaHi); + __m128i t1, t2; - pixMultiply_2x128 (dstLo, dstHi, alphaLo, alphaHi, dstLo, dstHi); + negate_2x128 (*alphaLo, *alphaHi, &t1, &t2); + + pixMultiply_2x128 (dstLo, dstHi, &t1, &t2, dstLo, dstHi); *dstLo = _mm_adds_epu8 (*srcLo, *dstLo); *dstHi = _mm_adds_epu8 (*srcHi, *dstHi); @@ -2295,7 +2311,8 @@ fbComposeSetupSSE2(void) MaskRed = createMask_2x32_128 (0x00f80000, 0x00f80000); MaskGreen = createMask_2x32_128 (0x0000fc00, 0x0000fc00); MaskBlue = createMask_2x32_128 (0x000000f8, 0x000000f8); - + Mask565FixRB = createMask_2x32_128 (0x00e000e0, 0x00e000e0); + Mask565FixG = createMask_2x32_128 (0x0000c000, 0x0000c000); Mask0080 = createMask_16_128 (0x0080); Mask00ff = createMask_16_128 (0x00ff); Mask0101 = createMask_16_128 (0x0101); @@ -2482,6 +2499,7 @@ fbCompositeSolid_nx0565sse2 (pixman_op_t op, while (w && (unsigned long)dst & 15) { d = *dst; + *dst++ = pack565_32_16 (pack_1x64_32 (over_1x64 (_mm_movepi64_pi64 (xmmSrc), _mm_movepi64_pi64 (xmmAlpha), expand565_16_1x64 (d)))); @@ -2496,15 +2514,14 @@ fbCompositeSolid_nx0565sse2 (pixman_op_t op, /* fill cache line with next memory */ cachePrefetchNext ((__m128i*)dst); - xmmDst = load128Aligned ((__m128i*)dst); - - unpack565_128_4x128 (xmmDst, &xmmDst0, &xmmDst1, &xmmDst2, &xmmDst3); - + xmmDst = load128Aligned ((__m128i*)dst); + + unpack565_128_4x128 (xmmDst, &xmmDst0, &xmmDst1, &xmmDst2, &xmmDst3); + over_2x128 (&xmmSrc, &xmmSrc, &xmmAlpha, &xmmAlpha, &xmmDst0, &xmmDst1); over_2x128 (&xmmSrc, &xmmSrc, &xmmAlpha, &xmmAlpha, &xmmDst2, &xmmDst3); xmmDst = pack565_4x128_128 (&xmmDst0, &xmmDst1, &xmmDst2, &xmmDst3); - save128Aligned ((__m128i*)dst, xmmDst); dst += 8; diff --git a/gfx/cairo/libpixman/src/pixman-transformed.c b/gfx/cairo/libpixman/src/pixman-transformed.c index 8dba26c07df..5ad92ae9fe4 100644 --- a/gfx/cairo/libpixman/src/pixman-transformed.c +++ b/gfx/cairo/libpixman/src/pixman-transformed.c @@ -593,7 +593,7 @@ ACCESS(fbFetchTransformed)(bits_image_t * pict, int x, int y, int width, /* This allows filtering code to pretend that pixels are located at integer coordinates */ adjust (&v, &unit, -(pixman_fixed_1 / 2)); - + if (pict->common.filter == PIXMAN_FILTER_NEAREST || pict->common.filter == PIXMAN_FILTER_FAST) { /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */ @@ -602,7 +602,6 @@ ACCESS(fbFetchTransformed)(bits_image_t * pict, int x, int y, int width, if (pict->common.repeat == PIXMAN_REPEAT_NORMAL) { fbFetchTransformed_Nearest_Normal(pict, width, buffer, mask, maskBits, affine, v, unit); - } else if (pict->common.repeat == PIXMAN_REPEAT_PAD) { diff --git a/gfx/cairo/libpixman/src/pixman-version.h b/gfx/cairo/libpixman/src/pixman-version.h index 8b8dcf9eb16..67d31dd3123 100644 --- a/gfx/cairo/libpixman/src/pixman-version.h +++ b/gfx/cairo/libpixman/src/pixman-version.h @@ -32,10 +32,10 @@ #endif #define PIXMAN_VERSION_MAJOR 0 -#define PIXMAN_VERSION_MINOR 11 -#define PIXMAN_VERSION_MICRO 9 +#define PIXMAN_VERSION_MINOR 12 +#define PIXMAN_VERSION_MICRO 0 -#define PIXMAN_VERSION_STRING "0.11.9" +#define PIXMAN_VERSION_STRING "0.12.0" #define PIXMAN_VERSION_ENCODE(major, minor, micro) ( \ ((major) * 10000) \ diff --git a/gfx/cairo/libpixman/src/pixman.h b/gfx/cairo/libpixman/src/pixman.h index 977c0a66467..36d91a900fb 100644 --- a/gfx/cairo/libpixman/src/pixman.h +++ b/gfx/cairo/libpixman/src/pixman.h @@ -74,7 +74,7 @@ SOFTWARE. /* * Standard integers */ -#if defined (_SVR4) || defined (SVR4) || defined (__OpenBSD__) || defined (_sgi) +#if defined (_SVR4) || defined (SVR4) || defined (__OpenBSD__) || defined (_sgi) || defined (__sun) || defined (sun) # include #elif defined (_MSC_VER) typedef __int8 int8_t;