Bug 413143: trunk is broken(sigbus) on SPARC since 20071221. r/sr=pavlov, a=beltzner Unaligned 32-bit memory reads cause a sigbus on SPARC. Turns out that fixing this also gets us a little performance boost on other platforms because we're now mostly doing aligned 32-bit reads.

This commit is contained in:
jag@tty.nl 2008-01-29 22:22:23 -08:00
parent 197ed48d96
commit 7bbff4292c
4 changed files with 85 additions and 48 deletions

View File

@ -44,7 +44,32 @@
#include "gfxTypes.h"
#define GFX_UINT32_FROM_BPTR(pbptr,i) (((PRUint32*)(pbptr))[i])
#if defined(IS_BIG_ENDIAN)
#define GFX_NTOHL(x) (x)
#define GFX_HAVE_CHEAP_NTOHL
#elif defined(_WIN32)
#if (_MSC_VER >= 1300) // also excludes MinGW
#include <stdlib.h>
#pragma intrinsic(_byteswap_ulong)
#define GFX_NTOHL(x) _byteswap_ulong(x)
#define GFX_HAVE_CHEAP_NTOHL
#else
// A reasonably fast generic little-endian implementation.
#define GFX_NTOHL(x) \
( (PR_ROTATE_RIGHT32((x),8) & 0xFF00FF00) | \
(PR_ROTATE_LEFT32((x),8) & 0x00FF00FF) )
#endif
#else
#include "prio.h" // for ntohl
#define GFX_NTOHL(x) ntohl(x)
#define GFX_HAVE_CHEAP_NTOHL
#endif
/**
* GFX_0XFF_PPIXEL_FROM_BPTR(x)
*
* Avoid tortured construction of 32-bit ARGB pixel from 3 individual bytes
* of memory plus constant 0xFF. RGB bytes are already contiguous!
* Equivalent to: GFX_PACKED_PIXEL(0xff,r,g,b)
@ -52,28 +77,42 @@
* Attempt to use fast byte-swapping instruction(s), e.g. bswap on x86, in
* preference to a sequence of shift/or operations.
*/
#if defined(_WIN32)
#if defined(IS_BIG_ENDIAN)
#define GFX_0XFF_PPIXEL_FROM_BPTR(pbptr) \
( (*((PRUint32 *)(pbptr)) >> 8) | (0xFF << 24) )
#elif (_MSC_VER >= 1300) // also excludes MinGW
#include <stdlib.h>
#pragma intrinsic(_byteswap_ulong)
#define GFX_0XFF_PPIXEL_FROM_BPTR(pbptr) \
( (_byteswap_ulong(*((PRUint32 *)(pbptr))) >> 8) | (0xFF << 24) )
#else
// A reasonably fast generic implementation.
#define GFX_BYTESWAP24FF(x) \
( ((((x) << 16) | ((x) >> 16)) | 0xFF00FF00) & ((x) | 0xFFFF00FF) )
#define GFX_0XFF_PPIXEL_FROM_BPTR(pbptr) \
( GFX_BYTESWAP24FF(*((PRUint32 *)(pbptr))) )
#endif
#if defined(GFX_HAVE_CHEAP_NTOHL)
#define GFX_0XFF_PPIXEL_FROM_UINT32(x) \
( (GFX_NTOHL(x) >> 8) | (0xFF << 24) )
#else
#include "prio.h" // for ntohl
#define GFX_0XFF_PPIXEL_FROM_BPTR(pbptr) \
( (ntohl(*((PRUint32 *)(pbptr))) >> 8) | (0xFF << 24) )
// A reasonably fast generic little-endian implementation.
#define GFX_0XFF_PPIXEL_FROM_UINT32(x) \
( (PR_ROTATE_LEFT32((x),16) | 0xFF00FF00) & ((x) | 0xFFFF00FF) )
#endif
#define GFX_0XFF_PPIXEL_FROM_BPTR(x) \
( GFX_0XFF_PPIXEL_FROM_UINT32(GFX_UINT32_FROM_BPTR((x),0)) )
/**
* GFX_BLOCK_RGB_TO_FRGB(from,to)
* sizeof(*from) == sizeof(char)
* sizeof(*to) == sizeof(PRUint32)
*
* Copy 4 pixels at a time, reading blocks of 12 bytes (RGB x4)
* and writing blocks of 16 bytes (FRGB x4)
*/
#define GFX_BLOCK_RGB_TO_FRGB(from,to) \
PR_BEGIN_MACRO \
PRUint32 m0 = GFX_UINT32_FROM_BPTR(from,0), \
m1 = GFX_UINT32_FROM_BPTR(from,1), \
m2 = GFX_UINT32_FROM_BPTR(from,2), \
rgbr = GFX_NTOHL(m0), \
gbrg = GFX_NTOHL(m1), \
brgb = GFX_NTOHL(m2), \
p0, p1, p2, p3; \
p0 = 0xFF000000 | ((rgbr) >> 8); \
p1 = 0xFF000000 | ((rgbr) << 16) | ((gbrg) >> 16); \
p2 = 0xFF000000 | ((gbrg) << 8) | ((brgb) >> 24); \
p3 = 0xFF000000 | (brgb); \
to[0] = p0; to[1] = p1; to[2] = p2; to[3] = p3; \
PR_END_MACRO
/**
* Fast approximate division by 255. It has the property that
* for all 0 <= n <= 255*255, FAST_DIVIDE_BY_255(n) == n/255.

View File

@ -689,28 +689,26 @@ static void ConvertColormap(PRUint32 *aColormap, PRUint32 aColors)
if (!aColors) return;
PRUint32 c = aColors;
// copy 1st pixel as bytes to avoid reading past end of buffer
*--to = GFX_PACKED_PIXEL(0xFF, from[-3], from[-2], from[-1]);
from -= 3; c--;
// copy as bytes until source pointer is 32-bit-aligned
// NB: can't use 32-bit reads, they might read off the end of the buffer
while ((NS_PTR_TO_UINT32(from) & 0x3) && c--) {
from -= 3;
*--to = GFX_PACKED_PIXEL(0xFF, from[0], from[1], from[2]);
}
// bulk copy of pixels.
while (c >= 4) {
PRUint32 p0, p1, p2, p3; // to avoid back-to-back register stalls
from -= 12;
to -= 4;
c -= 4;
p0 = GFX_0XFF_PPIXEL_FROM_BPTR(from+9);
p1 = GFX_0XFF_PPIXEL_FROM_BPTR(from+6);
p2 = GFX_0XFF_PPIXEL_FROM_BPTR(from+3);
p3 = GFX_0XFF_PPIXEL_FROM_BPTR(from+0);
to[3] = p0; to[2] = p1;
to[1] = p2; to[0] = p3;
GFX_BLOCK_RGB_TO_FRGB(from,to);
}
// copy remaining pixel(s)
// NB: can't use 32-bit reads, they might read off the end of the buffer
while (c--) {
from -= 3;
*--to = GFX_0XFF_PPIXEL_FROM_BPTR(from);
*--to = GFX_PACKED_PIXEL(0xFF, from[0], from[1], from[2]);
}
}

View File

@ -730,15 +730,15 @@ nsJPEGDecoder::OutputScanlines()
// counter for while() loops below
PRUint32 idx = mInfo.output_width;
// bulk copy of pixels.
while (idx > 4) { // >4 to avoid last 3 bytes in buffer
PRUint32 p0, p1, p2, p3; // to avoid back-to-back register stalls
p0 = GFX_0XFF_PPIXEL_FROM_BPTR(sampleRow+0);
p1 = GFX_0XFF_PPIXEL_FROM_BPTR(sampleRow+3);
p2 = GFX_0XFF_PPIXEL_FROM_BPTR(sampleRow+6);
p3 = GFX_0XFF_PPIXEL_FROM_BPTR(sampleRow+9);
imageRow[0] = p0; imageRow[1] = p1;
imageRow[2] = p2; imageRow[3] = p3;
// copy as bytes until source pointer is 32-bit-aligned
while ((NS_PTR_TO_UINT32(sampleRow) & 0x3) && idx--) {
*imageRow++ = GFX_PACKED_PIXEL(0xFF, sampleRow[0], sampleRow[1], sampleRow[2]);
sampleRow += 3;
}
// copy pixels in blocks of 4
while (idx >= 4) {
GFX_BLOCK_RGB_TO_FRGB(sampleRow, imageRow);
idx -= 4;
sampleRow += 12;
imageRow += 4;

View File

@ -735,15 +735,15 @@ row_callback(png_structp png_ptr, png_bytep new_row,
// counter for while() loops below
PRUint32 idx = iwidth;
// bulk copy of pixels.
while (idx > 4) { // >4 to avoid last 3 bytes in buffer
PRUint32 p0, p1, p2, p3; // to avoid back-to-back register stalls
p0 = GFX_0XFF_PPIXEL_FROM_BPTR(line+0);
p1 = GFX_0XFF_PPIXEL_FROM_BPTR(line+3);
p2 = GFX_0XFF_PPIXEL_FROM_BPTR(line+6);
p3 = GFX_0XFF_PPIXEL_FROM_BPTR(line+9);
cptr32[0] = p0; cptr32[1] = p1;
cptr32[2] = p2; cptr32[3] = p3;
// copy as bytes until source pointer is 32-bit-aligned
while ((NS_PTR_TO_UINT32(line) & 0x3) && idx--) {
*cptr32++ = GFX_PACKED_PIXEL(0xFF, line[0], line[1], line[2]);
line += 3;
}
// copy pixels in blocks of 4
while (idx >= 4) {
GFX_BLOCK_RGB_TO_FRGB(line, cptr32);
idx -= 4;
line += 12;
cptr32 += 4;