mirror of
https://gitlab.winehq.org/wine/wine-gecko.git
synced 2024-09-13 09:24:08 -07:00
Optimize DoubleToECMAInt32 for ARM. [Bug 551837][r=vlad]
This commit is contained in:
parent
fe3e820003
commit
1b4d13bc74
121
js/src/jsnum.h
121
js/src/jsnum.h
@ -442,6 +442,127 @@ js_DoubleToECMAInt32(jsdouble d)
|
||||
}
|
||||
|
||||
return int32(du.d);
|
||||
#elif defined (__arm__) && defined (__GNUC__)
|
||||
int32_t i;
|
||||
uint32_t tmp0;
|
||||
uint32_t tmp1;
|
||||
uint32_t tmp2;
|
||||
asm (
|
||||
// We use a pure integer solution here. In the 'softfp' ABI, the argument
|
||||
// will start in r0 and r1, and VFP can't do all of the necessary ECMA
|
||||
// conversions by itself so some integer code will be required anyway. A
|
||||
// hybrid solution is faster on A9, but this pure integer solution is
|
||||
// notably faster for A8.
|
||||
|
||||
// %0 is the result register, and may alias either of the %[QR]1 registers.
|
||||
// %Q4 holds the lower part of the mantissa.
|
||||
// %R4 holds the sign, exponent, and the upper part of the mantissa.
|
||||
// %1, %2 and %3 are used as temporary values.
|
||||
|
||||
// Extract the exponent.
|
||||
" mov %1, %R4, LSR #20\n"
|
||||
" bic %1, %1, #(1 << 11)\n" // Clear the sign.
|
||||
|
||||
// Set the implicit top bit of the mantissa. This clobbers a bit of the
|
||||
// exponent, but we have already extracted that.
|
||||
" orr %R4, %R4, #(1 << 20)\n"
|
||||
|
||||
// Special Cases
|
||||
// We should return zero in the following special cases:
|
||||
// - Exponent is 0x000 - 1023: +/-0 or subnormal.
|
||||
// - Exponent is 0x7ff - 1023: +/-INFINITY or NaN
|
||||
// - This case is implicitly handled by the standard code path anyway,
|
||||
// as shifting the mantissa up by the exponent will result in '0'.
|
||||
//
|
||||
// The result is composed of the mantissa, prepended with '1' and
|
||||
// bit-shifted left by the (decoded) exponent. Note that because the r1[20]
|
||||
// is the bit with value '1', r1 is effectively already shifted (left) by
|
||||
// 20 bits, and r0 is already shifted by 52 bits.
|
||||
|
||||
// Adjust the exponent to remove the encoding offset. If the decoded
|
||||
// exponent is negative, quickly bail out with '0' as such values round to
|
||||
// zero anyway. This also catches +/-0 and subnormals.
|
||||
" sub %1, %1, #0xff\n"
|
||||
" subs %1, %1, #0x300\n"
|
||||
" bmi 8f\n"
|
||||
|
||||
// %1 = (decoded) exponent >= 0
|
||||
// %R4 = upper mantissa and sign
|
||||
|
||||
// ---- Lower Mantissa ----
|
||||
" subs %3, %1, #52\n" // Calculate exp-52
|
||||
" bmi 1f\n"
|
||||
|
||||
// Shift r0 left by exp-52.
|
||||
// Ensure that we don't overflow ARM's 8-bit shift operand range.
|
||||
// We need to handle anything up to an 11-bit value here as we know that
|
||||
// 52 <= exp <= 1024 (0x400). Any shift beyond 31 bits results in zero
|
||||
// anyway, so as long as we don't touch the bottom 5 bits, we can use
|
||||
// a logical OR to push long shifts into the 32 <= (exp&0xff) <= 255 range.
|
||||
" bic %2, %3, #0xff\n"
|
||||
" orr %3, %3, %2, LSR #3\n"
|
||||
// We can now perform a straight shift, avoiding the need for any
|
||||
// conditional instructions or extra branches.
|
||||
" mov %Q4, %Q4, LSL %3\n"
|
||||
" b 2f\n"
|
||||
"1:\n" // Shift r0 right by 52-exp.
|
||||
// We know that 0 <= exp < 52, and we can shift up to 255 bits so 52-exp
|
||||
// will always be a valid shift and we can sk%3 the range check for this case.
|
||||
" rsb %3, %1, #52\n"
|
||||
" mov %Q4, %Q4, LSR %3\n"
|
||||
|
||||
// %1 = (decoded) exponent
|
||||
// %R4 = upper mantissa and sign
|
||||
// %Q4 = partially-converted integer
|
||||
|
||||
"2:\n"
|
||||
// ---- Upper Mantissa ----
|
||||
// This is much the same as the lower mantissa, with a few different
|
||||
// boundary checks and some masking to hide the exponent & sign bit in the
|
||||
// upper word.
|
||||
// Note that the upper mantissa is pre-shifted by 20 in %R4, but we shift
|
||||
// it left more to remove the sign and exponent so it is effectively
|
||||
// pre-shifted by 31 bits.
|
||||
" subs %3, %1, #31\n" // Calculate exp-31
|
||||
" mov %1, %R4, LSL #11\n" // Re-use %1 as a temporary register.
|
||||
" bmi 3f\n"
|
||||
|
||||
// Shift %R4 left by exp-31.
|
||||
// Avoid overflowing the 8-bit shift range, as before.
|
||||
" bic %2, %3, #0xff\n"
|
||||
" orr %3, %3, %2, LSR #3\n"
|
||||
// Perform the shift.
|
||||
" mov %2, %1, LSL %3\n"
|
||||
" b 4f\n"
|
||||
"3:\n" // Shift r1 right by 31-exp.
|
||||
// We know that 0 <= exp < 31, and we can shift up to 255 bits so 31-exp
|
||||
// will always be a valid shift and we can skip the range check for this case.
|
||||
" rsb %3, %3, #0\n" // Calculate 31-exp from -(exp-31)
|
||||
" mov %2, %1, LSR %3\n" // Thumb-2 can't do "LSR %3" in "orr".
|
||||
|
||||
// %Q4 = partially-converted integer (lower)
|
||||
// %R4 = upper mantissa and sign
|
||||
// %2 = partially-converted integer (upper)
|
||||
|
||||
"4:\n"
|
||||
// Combine the converted parts.
|
||||
" orr %Q4, %Q4, %2\n"
|
||||
// Negate the result if we have to, and move it to %0 in the process. To
|
||||
// avoid conditionals, we can do this by inverting on %R4[31], then adding
|
||||
// %R4[31]>>31.
|
||||
" eor %Q4, %Q4, %R4, ASR #31\n"
|
||||
" add %0, %Q4, %R4, LSR #31\n"
|
||||
" b 9f\n"
|
||||
"8:\n"
|
||||
// +/-INFINITY, +/-0, subnormals, NaNs, and anything else out-of-range that
|
||||
// will result in a conversion of '0'.
|
||||
" mov %0, #0\n"
|
||||
"9:\n"
|
||||
: "=r" (i), "=&r" (tmp0), "=&r" (tmp1), "=&r" (tmp2)
|
||||
: "r" (d)
|
||||
: "cc"
|
||||
);
|
||||
return i;
|
||||
#else
|
||||
int32 i;
|
||||
jsdouble two32, two31;
|
||||
|
Loading…
Reference in New Issue
Block a user