Bug 406580 - "Faster copying of RGB pixel data" (Where Alpha=0xFF, keep RGB values contiguous in pixel copying) [p=swsnyder@insightbb.com (Steve Snyder) r+a1.9=stuart]

This commit is contained in:
reed@reedloden.com 2007-12-21 02:26:31 -08:00
parent 826dc57112
commit 100f2a76b4
4 changed files with 93 additions and 4 deletions

View File

@ -44,6 +44,33 @@
#include "gfxTypes.h"
/**
* Attempt to use x86's bswap instruction for byte-swapping, via compiler
* intrinsic functions, in preference to a sequence of shift/or operations.
* 64-bit swapping also supported but not used here.
*/
#if defined(_WIN32) && (_MSC_VER >= 1300) && defined(_M_IX86)
# include <stdlib.h>
# pragma intrinsic(_byteswap_ushort,_byteswap_ulong)
# define GFX_BYTESWAP16(x) _byteswap_ushort(x)
# define GFX_BYTESWAP32(x) _byteswap_ulong(x)
# define _GFX_USE_INTRIN_BYTESWAP_
#elif defined(__GNUC__) && (__GNUC__ >= 2) && defined(__i386__) && !defined(XP_OS2)
# include <byteswap.h>
# define GFX_BYTESWAP16(x) bswap_16(x)
# define GFX_BYTESWAP32(x) bswap_32(x)
# define _GFX_USE_INTRIN_BYTESWAP_
#else
# define GFX_BYTESWAP16(x) ( (((x) & 0xff) << 8) | (((x) >> 8) & 0xff) )
# define GFX_BYTESWAP32(x) ( (GFX_BYTESWAP16((x) & 0xffff) << 16) | GFX_BYTESWAP16(x >> 16) )
#endif
// Avoid tortured construction of 32-bit ARGB pixel from 3 individual bytes
// of memory plus constant 0xFF. RGB bytes are already contiguous!
// Equivalent to: GFX_PACKED_PIXEL(0xff,r,g,b)
#define GFX_0XFF_PPIXEL_FROM_BPTR(pbptr) \
(GFX_BYTESWAP32(*((PRUint32 *)(pbptr))) >> 8) | (0xFF << 24)
/**
* Fast approximate division by 255. It has the property that
* for all 0 <= n <= 255*255, FAST_DIVIDE_BY_255(n) == n/255.

View File

@ -671,9 +671,33 @@ static void ConvertColormap(PRUint32 *aColormap, PRUint32 aColors)
PRUint32 *to = aColormap + aColors;
// Convert color entries to Cairo format
for (PRUint32 c = aColors; c > 0; c--) {
// set up for loops below
if (!aColors) return;
PRUint32 c = aColors;
// copy 1st pixel as bytes to avoid reading past end of buffer
*--to = GFX_PACKED_PIXEL(0xFF, from[-3], from[-2], from[-1]);
from -= 3; c--;
// bulk copy of pixels.
while (c >= 4) {
PRUint32 p0, p1, p2, p3; // to avoid back-to-back register stalls
from -= 12;
to -= 4;
c -= 4;
p0 = GFX_0XFF_PPIXEL_FROM_BPTR(from+9);
p1 = GFX_0XFF_PPIXEL_FROM_BPTR(from+6);
p2 = GFX_0XFF_PPIXEL_FROM_BPTR(from+3);
p3 = GFX_0XFF_PPIXEL_FROM_BPTR(from+0);
to[3] = p0; to[2] = p1;
to[1] = p2; to[0] = p3;
}
// copy remaining pixel(s)
while (c--) {
from -= 3;
*--to = GFX_PACKED_PIXEL(0xFF, from[0], from[1], from[2]);
*--to = GFX_0XFF_PPIXEL_FROM_BPTR(from);
}
}

View File

@ -714,7 +714,26 @@ nsJPEGDecoder::OutputScanlines()
}
}
for (PRUint32 i=mInfo.output_width; i>0; --i) {
// counter for while() loops below
PRUint32 idx = mInfo.output_width;
// bulk copy of pixels.
while (idx > 4) { // >4 to avoid last 3 bytes in buffer
PRUint32 p0, p1, p2, p3; // to avoid back-to-back register stalls
p0 = GFX_0XFF_PPIXEL_FROM_BPTR(sampleRow+0);
p1 = GFX_0XFF_PPIXEL_FROM_BPTR(sampleRow+3);
p2 = GFX_0XFF_PPIXEL_FROM_BPTR(sampleRow+6);
p3 = GFX_0XFF_PPIXEL_FROM_BPTR(sampleRow+9);
imageRow[0] = p0; imageRow[1] = p1;
imageRow[2] = p2; imageRow[3] = p3;
idx -= 4;
sampleRow += 12;
imageRow += 4;
}
// copy remaining pixel(s)
while (idx--) {
// 32-bit read of final pixel will exceed buffer, so read bytes
*imageRow++ = GFX_PACKED_PIXEL(0xFF, sampleRow[0], sampleRow[1], sampleRow[2]);
sampleRow += 3;
}

View File

@ -732,7 +732,26 @@ row_callback(png_structp png_ptr, png_bytep new_row,
case gfxIFormats::RGB:
case gfxIFormats::BGR:
{
for (PRUint32 x=iwidth; x>0; --x) {
// counter for while() loops below
PRUint32 idx = iwidth;
// bulk copy of pixels.
while (idx > 4) { // >4 to avoid last 3 bytes in buffer
PRUint32 p0, p1, p2, p3; // to avoid back-to-back register stalls
p0 = GFX_0XFF_PPIXEL_FROM_BPTR(line+0);
p1 = GFX_0XFF_PPIXEL_FROM_BPTR(line+3);
p2 = GFX_0XFF_PPIXEL_FROM_BPTR(line+6);
p3 = GFX_0XFF_PPIXEL_FROM_BPTR(line+9);
cptr32[0] = p0; cptr32[1] = p1;
cptr32[2] = p2; cptr32[3] = p3;
idx -= 4;
line += 12;
cptr32 += 4;
}
// copy remaining pixel(s)
while (idx--) {
// 32-bit read of final pixel will exceed buffer, so read bytes
*cptr32++ = GFX_PACKED_PIXEL(0xFF, line[0], line[1], line[2]);
line += 3;
}