b=451621; push new pixman with arm fast-paths; r=me

This commit is contained in:
Vladimir Vukicevic 2008-09-17 14:15:01 -07:00
parent a84761b588
commit 6e6bf3a103
12 changed files with 645 additions and 28 deletions

View File

@ -644,3 +644,5 @@ MOZ_V1_STRING_ABI = @MOZ_V1_STRING_ABI@
MOZ_EMBEDDING_LEVEL_DEFAULT = @MOZ_EMBEDDING_LEVEL_DEFAULT@
MOZ_EMBEDDING_LEVEL_BASIC = @MOZ_EMBEDDING_LEVEL_BASIC@
MOZ_EMBEDDING_LEVEL_MINIMAL = @MOZ_EMBEDDING_LEVEL_MINIMAL@
HAVE_ARM_SIMD= @HAVE_ARM_SIMD@

View File

@ -2991,6 +2991,17 @@ dnl NB - later gcc versions require -mmmx for this header to be successfully
dnl included (or another option which implies it, such as -march=pentium-mmx)
AC_CHECK_HEADERS(mmintrin.h)
AC_MSG_CHECKING(for ARM SIMD support)
AC_TRY_COMPILE([],
[asm("uqadd8 r1, r1, r2");],
result="yes", result="no")
AC_MSG_RESULT("$result")
if test "$result" = "yes"; then
AC_DEFINE(HAVE_ARM_SIMD)
HAVE_ARM_SIMD=1
fi
AC_SUBST(HAVE_ARM_SIMD)
dnl Check whether the compiler supports the new-style C++ standard
dnl library headers (i.e. <new>) or needs the old "new.h"
AC_LANG_CPLUSPLUS

View File

@ -8,7 +8,7 @@ http://www.cairographics.org/.
VERSIONS:
cairo (1.7.4-136-g5ea2555)
pixman (pixman-0.11.8-17-gf9d3f37)
pixman (pixman-0.11.10-8-g7180230)
***** NOTE FOR VISUAL C++ 6.0 *****

View File

@ -81,6 +81,11 @@ MMX_CFLAGS+=--param inline-unit-growth=10000 --param large-function-growth=10000
endif
endif
endif
ifeq (arm,$(findstring arm,$(OS_TEST)))
ifdef HAVE_ARM_SIMD
USE_ARM=1
endif
endif
endif
@ -120,6 +125,11 @@ CSRCS += pixman-vmx.c
DEFINES += -DUSE_VMX
endif
ifdef USE_ARM
CSRCS += pixman-arm.c
DEFINE += -DUSE_ARM
endif
EXPORTS = pixman.h pixman-version.h
LOCAL_INCLUDES += -I$(srcdir) -I$(srcdir)/../../cairo/src

View File

@ -0,0 +1,409 @@
/*
* Copyright © 2008 Mozilla Corporation
*
* Permission to use, copy, modify, distribute, and sell this software and its
* documentation for any purpose is hereby granted without fee, provided that
* the above copyright notice appear in all copies and that both that
* copyright notice and this permission notice appear in supporting
* documentation, and that the name of Mozilla Corporation not be used in
* advertising or publicity pertaining to distribution of the software without
* specific, written prior permission. Mozilla Corporation makes no
* representations about the suitability of this software for any purpose. It
* is provided "as is" without express or implied warranty.
*
* THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
* SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
* FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
* SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
* OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
* SOFTWARE.
*
* Author: Jeff Muizelaar (jeff@infidigm.net)
*
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include "pixman-arm.h"
void
fbCompositeSrcAdd_8000x8000arm (pixman_op_t op,
pixman_image_t * pSrc,
pixman_image_t * pMask,
pixman_image_t * pDst,
int16_t xSrc,
int16_t ySrc,
int16_t xMask,
int16_t yMask,
int16_t xDst,
int16_t yDst,
uint16_t width,
uint16_t height)
{
uint8_t *dstLine, *dst;
uint8_t *srcLine, *src;
int dstStride, srcStride;
uint16_t w;
uint8_t s, d;
fbComposeGetStart (pSrc, xSrc, ySrc, uint8_t, srcStride, srcLine, 1);
fbComposeGetStart (pDst, xDst, yDst, uint8_t, dstStride, dstLine, 1);
while (height--)
{
dst = dstLine;
dstLine += dstStride;
src = srcLine;
srcLine += srcStride;
w = width;
while (w && (unsigned long)dst & 3)
{
s = *src;
d = *dst;
asm("uqadd8 %0, %1, %2" : "+r"(d) : "r"(s));
*dst = d;
dst++;
src++;
w--;
}
while (w >= 4)
{
asm("uqadd8 %0, %1, %2" : "=r"(*(uint32_t*)dst) : "r"(*(uint32_t*)src), "r"(*(uint32_t*)dst));
dst += 4;
src += 4;
w -= 4;
}
while (w)
{
s = *src;
d = *dst;
asm("uqadd8 %0, %1, %2" : "+r"(d) : "r"(s));
*dst = d;
dst++;
src++;
w--;
}
}
}
void
fbCompositeSrc_8888x8888arm (pixman_op_t op,
pixman_image_t * pSrc,
pixman_image_t * pMask,
pixman_image_t * pDst,
int16_t xSrc,
int16_t ySrc,
int16_t xMask,
int16_t yMask,
int16_t xDst,
int16_t yDst,
uint16_t width,
uint16_t height)
{
uint32_t *dstLine, *dst;
uint32_t *srcLine, *src;
int dstStride, srcStride;
uint16_t w;
uint32_t component_half = 0x800080;
uint32_t upper_component_mask = 0xff00ff00;
uint32_t alpha_mask = 0xff;
fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1);
while (height--)
{
dst = dstLine;
dstLine += dstStride;
src = srcLine;
srcLine += srcStride;
w = width;
//#define inner_branch
asm volatile (
"cmp %[w], #0\n\t"
"beq 2f\n\t"
"1:\n\t"
/* load dest */
"ldr r5, [%[src]], #4\n\t"
#ifdef inner_branch
/* We can avoid doing the multiplication in two cases: 0x0 or 0xff.
* The 0x0 case also allows us to avoid doing an unecessary data
* write which is more valuable so we only check for that */
"cmp r5, #0x1000000\n\t"
"blt 3f\n\t"
/* = 255 - alpha */
"sub r8, %[alpha_mask], r5, lsr #24\n\t"
"ldr r4, [%[dest]] \n\t"
#else
"ldr r4, [%[dest]] \n\t"
/* = 255 - alpha */
"sub r8, %[alpha_mask], r5, lsr #24\n\t"
#endif
"uxtb16 r6, r4\n\t"
"uxtb16 r7, r4, ror #8\n\t"
/* multiply by 257 and divide by 65536 */
"mla r6, r6, r8, %[component_half]\n\t"
"mla r7, r7, r8, %[component_half]\n\t"
"uxtab16 r6, r6, r6, ror #8\n\t"
"uxtab16 r7, r7, r7, ror #8\n\t"
/* recombine the 0xff00ff00 bytes of r6 and r7 */
"and r7, %[upper_component_mask]\n\t"
"uxtab16 r6, r7, r6, ror #8\n\t"
"uqadd8 r5, r6, r5\n\t"
#ifdef inner_branch
"3:\n\t"
#endif
"str r5, [%[dest]], #4\n\t"
/* increment counter and jmp to top */
"subs %[w], %[w], #1\n\t"
"bne 1b\n\t"
"2:\n\t"
: [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src)
: [component_half] "r" (component_half), [upper_component_mask] "r" (upper_component_mask),
[alpha_mask] "r" (alpha_mask)
: "r4", "r5", "r6", "r7", "r8", "cc", "memory"
);
}
}
void
fbCompositeSrc_8888x8x8888arm (pixman_op_t op,
pixman_image_t * pSrc,
pixman_image_t * pMask,
pixman_image_t * pDst,
int16_t xSrc,
int16_t ySrc,
int16_t xMask,
int16_t yMask,
int16_t xDst,
int16_t yDst,
uint16_t width,
uint16_t height)
{
uint32_t *dstLine, *dst;
uint32_t *srcLine, *src;
uint32_t mask;
int dstStride, srcStride;
uint16_t w;
uint32_t component_half = 0x800080;
uint32_t alpha_mask = 0xff;
fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1);
fbComposeGetSolid (pMask, mask, pDst->bits.format);
mask = (mask) >> 24;
while (height--)
{
dst = dstLine;
dstLine += dstStride;
src = srcLine;
srcLine += srcStride;
w = width;
//#define inner_branch
asm volatile (
"cmp %[w], #0\n\t"
"beq 2f\n\t"
"1:\n\t"
/* load dest */
"ldr r5, [%[src]], #4\n\t"
#ifdef inner_branch
/* We can avoid doing the multiplication in two cases: 0x0 or 0xff.
* The 0x0 case also allows us to avoid doing an unecessary data
* write which is more valuable so we only check for that */
"cmp r5, #0x1000000\n\t"
"blt 3f\n\t"
#endif
"ldr r4, [%[dest]] \n\t"
"uxtb16 r6, r5\n\t"
"uxtb16 r7, r5, ror #8\n\t"
/* multiply by alpha (r8) then by 257 and divide by 65536 */
"mla r6, r6, %[mask_alpha], %[component_half]\n\t"
"mla r7, r7, %[mask_alpha], %[component_half]\n\t"
"uxtab16 r6, r6, r6, ror #8\n\t"
"uxtab16 r7, r7, r7, ror #8\n\t"
"uxtb16 r6, r6, ror #8\n\t"
"uxtb16 r7, r7, ror #8\n\t"
/* recombine */
"orr r5, r6, r7, lsl #8\n\t"
"uxtb16 r6, r4\n\t"
"uxtb16 r7, r4, ror #8\n\t"
/* 255 - alpha */
"sub r8, %[alpha_mask], r5, lsr #24\n\t"
/* multiply by alpha (r8) then by 257 and divide by 65536 */
"mla r6, r6, r8, %[component_half]\n\t"
"mla r7, r7, r8, %[component_half]\n\t"
"uxtab16 r6, r6, r6, ror #8\n\t"
"uxtab16 r7, r7, r7, ror #8\n\t"
"uxtb16 r6, r6, ror #8\n\t"
"uxtb16 r7, r7, ror #8\n\t"
/* recombine */
"orr r6, r6, r7, lsl #8\n\t"
"uqadd8 r5, r6, r5\n\t"
#ifdef inner_branch
"3:\n\t"
#endif
"str r5, [%[dest]], #4\n\t"
/* increment counter and jmp to top */
"subs %[w], %[w], #1\n\t"
"bne 1b\n\t"
"2:\n\t"
: [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src)
: [component_half] "r" (component_half), [mask_alpha] "r" (mask),
[alpha_mask] "r" (alpha_mask)
: "r4", "r5", "r6", "r7", "r8", "r9", "cc", "memory"
);
}
}
void
fbCompositeSolidMask_nx8x8888arm (pixman_op_t op,
pixman_image_t * pSrc,
pixman_image_t * pMask,
pixman_image_t * pDst,
int16_t xSrc,
int16_t ySrc,
int16_t xMask,
int16_t yMask,
int16_t xDst,
int16_t yDst,
uint16_t width,
uint16_t height)
{
uint32_t src, srca;
uint32_t *dstLine, *dst;
uint8_t *maskLine, *mask;
int dstStride, maskStride;
uint16_t w;
fbComposeGetSolid(pSrc, src, pDst->bits.format);
srca = src >> 24;
if (src == 0)
return;
uint32_t component_mask = 0xff00ff;
uint32_t component_half = 0x800080;
uint32_t src_hi = (src >> 8) & component_mask;
uint32_t src_lo = src & component_mask;
fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
fbComposeGetStart (pMask, xMask, yMask, uint8_t, maskStride, maskLine, 1);
while (height--)
{
dst = dstLine;
dstLine += dstStride;
mask = maskLine;
maskLine += maskStride;
w = width;
//#define inner_branch
asm volatile (
"cmp %[w], #0\n\t"
"beq 2f\n\t"
"1:\n\t"
/* load mask */
"ldrb r5, [%[mask]], #1\n\t"
#ifdef inner_branch
/* We can avoid doing the multiplication in two cases: 0x0 or 0xff.
* The 0x0 case also allows us to avoid doing an unecessary data
* write which is more valuable so we only check for that */
/* 0x1000000 is the least value that contains alpha all values
* less than it have a 0 alpha value */
"cmp r5, #0x0\n\t"
"beq 3f\n\t"
#endif
"ldr r4, [%[dest]] \n\t"
/* multiply by alpha (r8) then by 257 and divide by 65536 */
"mla r6, %[src_lo], r5, %[component_half]\n\t"
"mla r7, %[src_hi], r5, %[component_half]\n\t"
"uxtab16 r6, r6, r6, ror #8\n\t"
"uxtab16 r7, r7, r7, ror #8\n\t"
"uxtb16 r6, r6, ror #8\n\t"
"uxtb16 r7, r7, ror #8\n\t"
/* recombine */
"orr r5, r6, r7, lsl #8\n\t"
"uxtb16 r6, r4\n\t"
"uxtb16 r7, r4, ror #8\n\t"
/* we could simplify this to use 'sub' if we were
* willing to give up a register for alpha_mask */
"mvn r8, r5\n\t"
"mov r8, r8, lsr #24\n\t"
/* multiply by alpha (r8) then by 257 and divide by 65536 */
"mla r6, r6, r8, %[component_half]\n\t"
"mla r7, r7, r8, %[component_half]\n\t"
"uxtab16 r6, r6, r6, ror #8\n\t"
"uxtab16 r7, r7, r7, ror #8\n\t"
"uxtb16 r6, r6, ror #8\n\t"
"uxtb16 r7, r7, ror #8\n\t"
/* recombine */
"orr r6, r6, r7, lsl #8\n\t"
"uqadd8 r5, r6, r5\n\t"
#ifdef inner_branch
"3:\n\t"
#endif
"str r5, [%[dest]], #4\n\t"
/* increment counter and jmp to top */
"subs %[w], %[w], #1\n\t"
"bne 1b\n\t"
"2:\n\t"
: [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src), [mask] "+r" (mask)
: [component_half] "r" (component_half),
[src_hi] "r" (src_hi), [src_lo] "r" (src_lo)
: "r4", "r5", "r6", "r7", "r8", "cc", "memory"
);
}
}

View File

@ -0,0 +1,94 @@
/*
* Copyright © 2008 Mozilla Corporation
*
* Permission to use, copy, modify, distribute, and sell this software and its
* documentation for any purpose is hereby granted without fee, provided that
* the above copyright notice appear in all copies and that both that
* copyright notice and this permission notice appear in supporting
* documentation, and that the name of Mozilla Corporation not be used in
* advertising or publicity pertaining to distribution of the software without
* specific, written prior permission. Mozilla Corporation makes no
* representations about the suitability of this software for any purpose. It
* is provided "as is" without express or implied warranty.
*
* THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
* SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
* FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
* SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
* OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
* SOFTWARE.
*
* Author: Jeff Muizelaar (jeff@infidigm.net)
*
*/
#include "pixman-private.h"
#ifdef USE_ARM
static inline pixman_bool_t pixman_have_arm(void) { return TRUE; }
#else
#define pixman_have_arm() FALSE
#endif
#ifdef USE_ARM
void
fbCompositeSrcAdd_8000x8000arm (pixman_op_t op,
pixman_image_t * pSrc,
pixman_image_t * pMask,
pixman_image_t * pDst,
int16_t xSrc,
int16_t ySrc,
int16_t xMask,
int16_t yMask,
int16_t xDst,
int16_t yDst,
uint16_t width,
uint16_t height);
void
fbCompositeSrc_8888x8888arm (pixman_op_t op,
pixman_image_t * pSrc,
pixman_image_t * pMask,
pixman_image_t * pDst,
int16_t xSrc,
int16_t ySrc,
int16_t xMask,
int16_t yMask,
int16_t xDst,
int16_t yDst,
uint16_t width,
uint16_t height);
void
fbCompositeSrc_8888x8x8888arm (pixman_op_t op,
pixman_image_t * pSrc,
pixman_image_t * pMask,
pixman_image_t * pDst,
int16_t xSrc,
int16_t ySrc,
int16_t xMask,
int16_t yMask,
int16_t xDst,
int16_t yDst,
uint16_t width,
uint16_t height);
void
fbCompositeSolidMask_nx8x8888arm (pixman_op_t op,
pixman_image_t * pSrc,
pixman_image_t * pMask,
pixman_image_t * pDst,
int16_t xSrc,
int16_t ySrc,
int16_t xMask,
int16_t yMask,
int16_t xDst,
int16_t yDst,
uint16_t width,
uint16_t height);
#endif /* USE_ARM */

View File

@ -818,6 +818,10 @@ pixman_image_is_opaque(pixman_image_t *image)
if (image->common.transform)
return FALSE;
/* Gradients do not necessarily cover the entire compositing area */
if (image->type == LINEAR || image->type == CONICAL || image->type == RADIAL)
return FALSE;
}
return TRUE;

View File

@ -34,6 +34,7 @@
#include "pixman-mmx.h"
#include "pixman-vmx.h"
#include "pixman-sse2.h"
#include "pixman-arm.h"
#include "pixman-combine32.h"
#ifdef __GNUC__
@ -758,6 +759,46 @@ fbCompositeSrc_8888x0565 (pixman_op_t op,
}
}
void
fbCompositeSrc_x888x0565 (pixman_op_t op,
pixman_image_t * pSrc,
pixman_image_t * pMask,
pixman_image_t * pDst,
int16_t xSrc,
int16_t ySrc,
int16_t xMask,
int16_t yMask,
int16_t xDst,
int16_t yDst,
uint16_t width,
uint16_t height)
{
uint16_t *dstLine, *dst;
uint32_t *srcLine, *src, s;
int dstStride, srcStride;
uint16_t w;
fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1);
fbComposeGetStart (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1);
while (height--)
{
dst = dstLine;
dstLine += dstStride;
src = srcLine;
srcLine += srcStride;
w = width;
while (w--)
{
s = READ(pSrc, src++);
WRITE(pDst, dst, cvt8888to0565(s));
dst++;
}
}
}
void
fbCompositeSrcAdd_8000x8000 (pixman_op_t op,
pixman_image_t * pSrc,
@ -1479,6 +1520,26 @@ static const FastPathInfo vmx_fast_paths[] =
};
#endif
#ifdef USE_ARM
static const FastPathInfo arm_fast_paths[] =
{
{ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_a8r8g8b8, fbCompositeSrc_8888x8888arm, 0 },
{ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_x8r8g8b8, fbCompositeSrc_8888x8888arm, 0 },
{ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_a8b8g8r8, fbCompositeSrc_8888x8888arm, 0 },
{ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, fbCompositeSrc_8888x8888arm, 0 },
{ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8, PIXMAN_a8r8g8b8, fbCompositeSrc_8888x8x8888arm, NEED_SOLID_MASK },
{ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8, PIXMAN_x8r8g8b8, fbCompositeSrc_8888x8x8888arm, NEED_SOLID_MASK },
{ PIXMAN_OP_ADD, PIXMAN_a8, PIXMAN_null, PIXMAN_a8, fbCompositeSrcAdd_8000x8000arm, 0 },
{ PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8r8g8b8, fbCompositeSolidMask_nx8x8888arm, 0 },
{ PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8r8g8b8, fbCompositeSolidMask_nx8x8888arm, 0 },
{ PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8b8g8r8, fbCompositeSolidMask_nx8x8888arm, 0 },
{ PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8b8g8r8, fbCompositeSolidMask_nx8x8888arm, 0 },
{ PIXMAN_OP_NONE },
};
#endif
static const FastPathInfo c_fast_paths[] =
{
@ -1547,6 +1608,10 @@ static const FastPathInfo c_fast_paths[] =
{ PIXMAN_OP_SRC, PIXMAN_r5g6b5, PIXMAN_null, PIXMAN_r5g6b5, fbCompositeSrcSrc_nxn, 0 },
{ PIXMAN_OP_SRC, PIXMAN_b5g6r5, PIXMAN_null, PIXMAN_b5g6r5, fbCompositeSrcSrc_nxn, 0 },
#endif
{ PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_r5g6b5, fbCompositeSrc_x888x0565, 0 },
{ PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, PIXMAN_null, PIXMAN_r5g6b5, fbCompositeSrc_x888x0565, 0 },
{ PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_b5g6r5, fbCompositeSrc_x888x0565, 0 },
{ PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, PIXMAN_null, PIXMAN_b5g6r5, fbCompositeSrc_x888x0565, 0 },
{ PIXMAN_OP_IN, PIXMAN_a8, PIXMAN_null, PIXMAN_a8, fbCompositeSrcIn_8x8, 0 },
{ PIXMAN_OP_IN, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8, fbCompositeSolidMaskIn_nx8x8, 0 },
{ PIXMAN_OP_NONE },
@ -1829,6 +1894,12 @@ pixman_image_composite (pixman_op_t op,
if (!info && pixman_have_vmx())
info = get_fast_path (vmx_fast_paths, op, pSrc, pMask, pDst, pixbuf);
#endif
#ifdef USE_ARM
if (!info && pixman_have_arm())
info = get_fast_path (arm_fast_paths, op, pSrc, pMask, pDst, pixbuf);
#endif
if (!info)
info = get_fast_path (c_fast_paths, op, pSrc, pMask, pDst, pixbuf);

View File

@ -73,6 +73,9 @@ static __m128i MaskRed;
static __m128i MaskGreen;
static __m128i MaskBlue;
static __m128i Mask565FixRB;
static __m128i Mask565FixG;
/* -------------------------------------------------------------------------------------------------
* SSE2 Inlines
*/
@ -89,26 +92,37 @@ unpack_128_2x128 (__m128i data, __m128i* dataLo, __m128i* dataHi)
*dataHi = _mm_unpackhi_epi8 (data, _mm_setzero_si128 ());
}
static inline void
unpack565_128_4x128 (__m128i data, __m128i* data0, __m128i* data1, __m128i* data2, __m128i* data3)
static inline __m128i
unpack565to8888 (__m128i lo)
{
__m128i lo, hi;
__m128i r, g, b;
lo = _mm_unpacklo_epi16 (data, _mm_setzero_si128 ());
hi = _mm_unpackhi_epi16 (data, _mm_setzero_si128 ());
__m128i r, g, b, rb, t;
r = _mm_and_si128 (_mm_slli_epi32 (lo, 8), MaskRed);
g = _mm_and_si128 (_mm_slli_epi32 (lo, 5), MaskGreen);
b = _mm_and_si128 (_mm_slli_epi32 (lo, 3), MaskBlue);
lo = _mm_or_si128 (_mm_or_si128 (r, g), b);
rb = _mm_or_si128 (r, b);
t = _mm_and_si128 (rb, Mask565FixRB);
t = _mm_srli_epi32 (t, 5);
rb = _mm_or_si128 (rb, t);
r = _mm_and_si128 (_mm_slli_epi32 (hi, 8), MaskRed);
g = _mm_and_si128 (_mm_slli_epi32 (hi, 5), MaskGreen);
b = _mm_and_si128 (_mm_slli_epi32 (hi, 3), MaskBlue);
t = _mm_and_si128 (g, Mask565FixG);
t = _mm_srli_epi32 (t, 6);
g = _mm_or_si128 (g, t);
hi = _mm_or_si128 (_mm_or_si128 (r, g), b);
return _mm_or_si128 (rb, g);
}
static inline void
unpack565_128_4x128 (__m128i data, __m128i* data0, __m128i* data1, __m128i* data2, __m128i* data3)
{
__m128i lo, hi;
lo = _mm_unpacklo_epi16 (data, _mm_setzero_si128 ());
hi = _mm_unpackhi_epi16 (data, _mm_setzero_si128 ());
lo = unpack565to8888 (lo);
hi = unpack565to8888 (hi);
unpack_128_2x128 (lo, data0, data1);
unpack_128_2x128 (hi, data2, data3);
@ -244,9 +258,11 @@ invertColors_2x128 (__m128i dataLo, __m128i dataHi, __m128i* invLo, __m128i* inv
static inline void
over_2x128 (__m128i* srcLo, __m128i* srcHi, __m128i* alphaLo, __m128i* alphaHi, __m128i* dstLo, __m128i* dstHi)
{
negate_2x128 (*alphaLo, *alphaHi, alphaLo, alphaHi);
__m128i t1, t2;
pixMultiply_2x128 (dstLo, dstHi, alphaLo, alphaHi, dstLo, dstHi);
negate_2x128 (*alphaLo, *alphaHi, &t1, &t2);
pixMultiply_2x128 (dstLo, dstHi, &t1, &t2, dstLo, dstHi);
*dstLo = _mm_adds_epu8 (*srcLo, *dstLo);
*dstHi = _mm_adds_epu8 (*srcHi, *dstHi);
@ -2295,7 +2311,8 @@ fbComposeSetupSSE2(void)
MaskRed = createMask_2x32_128 (0x00f80000, 0x00f80000);
MaskGreen = createMask_2x32_128 (0x0000fc00, 0x0000fc00);
MaskBlue = createMask_2x32_128 (0x000000f8, 0x000000f8);
Mask565FixRB = createMask_2x32_128 (0x00e000e0, 0x00e000e0);
Mask565FixG = createMask_2x32_128 (0x0000c000, 0x0000c000);
Mask0080 = createMask_16_128 (0x0080);
Mask00ff = createMask_16_128 (0x00ff);
Mask0101 = createMask_16_128 (0x0101);
@ -2482,6 +2499,7 @@ fbCompositeSolid_nx0565sse2 (pixman_op_t op,
while (w && (unsigned long)dst & 15)
{
d = *dst;
*dst++ = pack565_32_16 (pack_1x64_32 (over_1x64 (_mm_movepi64_pi64 (xmmSrc),
_mm_movepi64_pi64 (xmmAlpha),
expand565_16_1x64 (d))));
@ -2496,15 +2514,14 @@ fbCompositeSolid_nx0565sse2 (pixman_op_t op,
/* fill cache line with next memory */
cachePrefetchNext ((__m128i*)dst);
xmmDst = load128Aligned ((__m128i*)dst);
xmmDst = load128Aligned ((__m128i*)dst);
unpack565_128_4x128 (xmmDst, &xmmDst0, &xmmDst1, &xmmDst2, &xmmDst3);
unpack565_128_4x128 (xmmDst, &xmmDst0, &xmmDst1, &xmmDst2, &xmmDst3);
over_2x128 (&xmmSrc, &xmmSrc, &xmmAlpha, &xmmAlpha, &xmmDst0, &xmmDst1);
over_2x128 (&xmmSrc, &xmmSrc, &xmmAlpha, &xmmAlpha, &xmmDst2, &xmmDst3);
xmmDst = pack565_4x128_128 (&xmmDst0, &xmmDst1, &xmmDst2, &xmmDst3);
save128Aligned ((__m128i*)dst, xmmDst);
dst += 8;

View File

@ -602,7 +602,6 @@ ACCESS(fbFetchTransformed)(bits_image_t * pict, int x, int y, int width,
if (pict->common.repeat == PIXMAN_REPEAT_NORMAL)
{
fbFetchTransformed_Nearest_Normal(pict, width, buffer, mask, maskBits, affine, v, unit);
}
else if (pict->common.repeat == PIXMAN_REPEAT_PAD)
{

View File

@ -32,10 +32,10 @@
#endif
#define PIXMAN_VERSION_MAJOR 0
#define PIXMAN_VERSION_MINOR 11
#define PIXMAN_VERSION_MICRO 9
#define PIXMAN_VERSION_MINOR 12
#define PIXMAN_VERSION_MICRO 0
#define PIXMAN_VERSION_STRING "0.11.9"
#define PIXMAN_VERSION_STRING "0.12.0"
#define PIXMAN_VERSION_ENCODE(major, minor, micro) ( \
((major) * 10000) \

View File

@ -74,7 +74,7 @@ SOFTWARE.
/*
* Standard integers
*/
#if defined (_SVR4) || defined (SVR4) || defined (__OpenBSD__) || defined (_sgi)
#if defined (_SVR4) || defined (SVR4) || defined (__OpenBSD__) || defined (_sgi) || defined (__sun) || defined (sun)
# include <inttypes.h>
#elif defined (_MSC_VER)
typedef __int8 int8_t;