Bug 875929 - Generalize the code for materializing double constants. In particular, this new code handles 0x7ff8000000000000, a common NaN. To support this, implement js_bitscan_ctz64 for 32-bit targets. r=evilpies

This commit is contained in:
Dan Gohman 2013-06-04 21:39:13 -07:00
parent 309b7477e9
commit a41c2e0184
2 changed files with 43 additions and 41 deletions

View File

@ -210,27 +210,43 @@ __BitScanReverse32(unsigned int val)
unsigned char _BitScanForward64(unsigned long * Index, unsigned __int64 Mask);
unsigned char _BitScanReverse64(unsigned long * Index, unsigned __int64 Mask);
# pragma intrinsic(_BitScanForward64,_BitScanReverse64)
#endif
__forceinline static int
__BitScanForward64(unsigned __int64 val)
{
#if defined(_M_AMD64) || defined(_M_X64)
unsigned long idx;
_BitScanForward64(&idx, val);
return (int)idx;
#else
uint32_t lo = (uint32_t)val;
uint32_t hi = (uint32_t)(val >> 32);
return lo != 0 ?
js_bitscan_ctz32(lo) :
32 + js_bitscan_ctz32(hi);
#endif
}
__forceinline static int
__BitScanReverse64(unsigned __int64 val)
{
#if defined(_M_AMD64) || defined(_M_X64)
unsigned long idx;
_BitScanReverse64(&idx, val);
return (int)(63-idx);
#else
uint32_t lo = (uint32_t)val;
uint32_t hi = (uint32_t)(val >> 32);
return hi != 0 ?
js_bitscan_clz32(hi) :
32 + js_bitscan_clz32(lo);
#endif
}
# define js_bitscan_ctz64(val) __BitScanForward64(val)
# define js_bitscan_clz64(val) __BitScanReverse64(val)
# define JS_HAS_BUILTIN_BITSCAN64
#endif
#elif MOZ_IS_GCC
#if MOZ_GCC_VERSION_AT_LEAST(3, 4, 0)
@ -246,14 +262,16 @@ __BitScanReverse64(unsigned __int64 val)
#endif
#if defined(USE_BUILTIN_CTZ)
JS_STATIC_ASSERT(sizeof(unsigned int) == sizeof(uint32_t));
# define js_bitscan_ctz32(val) __builtin_ctz(val)
# define js_bitscan_clz32(val) __builtin_clz(val)
# define JS_HAS_BUILTIN_BITSCAN32
# if (JS_BYTES_PER_WORD == 8)
# define js_bitscan_ctz64(val) __builtin_ctzll(val)
# define js_bitscan_clz64(val) __builtin_clzll(val)
# define JS_HAS_BUILTIN_BITSCAN64
# endif
JS_STATIC_ASSERT(sizeof(unsigned long long) == sizeof(uint64_t));
# define js_bitscan_ctz64(val) __builtin_ctzll(val)
# define js_bitscan_clz64(val) __builtin_clzll(val)
# define JS_HAS_BUILTIN_BITSCAN64
# undef USE_BUILTIN_CTZ

View File

@ -383,43 +383,27 @@ class MacroAssemblerX86Shared : public Assembler
bool maybeInlineDouble(uint64_t u, const FloatRegister &dest) {
// This implements parts of "13.4 Generating constants" of
// "2. Optimizing subroutines in assembly language" by Agner Fog.
switch (u) {
case 0x0000000000000000ULL: // 0.0
// "2. Optimizing subroutines in assembly language" by Agner Fog,
// generalized to handle any case that can use a pcmpeqw and
// up to two shifts.
if (u == 0) {
xorpd(dest, dest);
break;
case 0x8000000000000000ULL: // -0.0
pcmpeqw(dest, dest);
psllq(Imm32(63), dest);
break;
case 0x3fe0000000000000ULL: // 0.5
pcmpeqw(dest, dest);
psllq(Imm32(55), dest);
psrlq(Imm32(2), dest);
break;
case 0x3ff0000000000000ULL: // 1.0
pcmpeqw(dest, dest);
psllq(Imm32(54), dest);
psrlq(Imm32(2), dest);
break;
case 0x3ff8000000000000ULL: // 1.5
pcmpeqw(dest, dest);
psllq(Imm32(53), dest);
psrlq(Imm32(2), dest);
break;
case 0x4000000000000000ULL: // 2.0
pcmpeqw(dest, dest);
psllq(Imm32(63), dest);
psrlq(Imm32(1), dest);
break;
case 0xc000000000000000ULL: // -2.0
pcmpeqw(dest, dest);
psllq(Imm32(62), dest);
break;
default:
return false;
return true;
}
return true;
int tz = js_bitscan_ctz64(u);
int lz = js_bitscan_clz64(u);
if (u == (~uint64_t(0) << (lz + tz) >> lz)) {
pcmpeqw(dest, dest);
if (tz != 0)
psllq(Imm32(lz + tz), dest);
if (lz != 0)
psrlq(Imm32(lz), dest);
return true;
}
return false;
}
void emitSet(Assembler::Condition cond, const Register &dest,