gecko/gfx/ycbcr/bug572034_mac_64bit.patch

145 lines
3.9 KiB
Diff

diff --git a/gfx/ycbcr/yuv_row_linux.cpp b/gfx/ycbcr/yuv_row_linux.cpp
--- a/gfx/ycbcr/yuv_row_linux.cpp
+++ b/gfx/ycbcr/yuv_row_linux.cpp
@@ -250,18 +250,18 @@ MMX_ALIGNED(int16 kCoefficientsRgbY[768]
// AMD64 ABI uses register paremters.
void FastConvertYUVToRGB32Row(const uint8* y_buf, // rdi
const uint8* u_buf, // rsi
const uint8* v_buf, // rdx
uint8* rgb_buf, // rcx
int width) { // r8
asm(
- "jmp convertend\n"
-"convertloop:"
+ "jmp 1f\n"
+"0:"
"movzb (%1),%%r10\n"
"add $0x1,%1\n"
"movzb (%2),%%r11\n"
"add $0x1,%2\n"
"movq 2048(%5,%%r10,8),%%xmm0\n"
"movzb (%0),%%r10\n"
"movq 4096(%5,%%r11,8),%%xmm1\n"
"movzb 0x1(%0),%%r11\n"
@@ -271,36 +271,36 @@ void FastConvertYUVToRGB32Row(const uint
"movq (%5,%%r11,8),%%xmm3\n"
"paddsw %%xmm0,%%xmm2\n"
"paddsw %%xmm0,%%xmm3\n"
"shufps $0x44,%%xmm3,%%xmm2\n"
"psraw $0x6,%%xmm2\n"
"packuswb %%xmm2,%%xmm2\n"
"movq %%xmm2,0x0(%3)\n"
"add $0x8,%3\n"
-"convertend:"
+"1:"
"sub $0x2,%4\n"
- "jns convertloop\n"
+ "jns 0b\n"
-"convertnext:"
+"2:"
"add $0x1,%4\n"
- "js convertdone\n"
+ "js 3f\n"
"movzb (%1),%%r10\n"
"movq 2048(%5,%%r10,8),%%xmm0\n"
"movzb (%2),%%r10\n"
"movq 4096(%5,%%r10,8),%%xmm1\n"
"paddsw %%xmm1,%%xmm0\n"
"movzb (%0),%%r10\n"
"movq (%5,%%r10,8),%%xmm1\n"
"paddsw %%xmm0,%%xmm1\n"
"psraw $0x6,%%xmm1\n"
"packuswb %%xmm1,%%xmm1\n"
"movd %%xmm1,0x0(%3)\n"
-"convertdone:"
+"3:"
:
: "r"(y_buf), // %0
"r"(u_buf), // %1
"r"(v_buf), // %2
"r"(rgb_buf), // %3
"r"(width), // %4
"r" (kCoefficientsRgbY) // %5
: "memory", "r10", "r11", "xmm0", "xmm1", "xmm2", "xmm3"
@@ -309,28 +309,35 @@ void FastConvertYUVToRGB32Row(const uint
#else
void FastConvertYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
+
+// It's necessary to specify the correct section for the following code,
+// otherwise it will be placed in whatever the current section is as this unit
+// is compiled. Because GCC remembers the last section it emitted, we must
+// also revert to the previous section state at the end of the asm block.
asm(
+ ".section .text\n"
".global FastConvertYUVToRGB32Row\n"
+ ".type FastConvertYUVToRGB32Row, @function\n"
"FastConvertYUVToRGB32Row:\n"
"pusha\n"
"mov 0x24(%esp),%edx\n"
"mov 0x28(%esp),%edi\n"
"mov 0x2c(%esp),%esi\n"
"mov 0x30(%esp),%ebp\n"
"mov 0x34(%esp),%ecx\n"
- "jmp convertend\n"
+ "jmp 1f\n"
-"convertloop:"
+"0:"
"movzbl (%edi),%eax\n"
"add $0x1,%edi\n"
"movzbl (%esi),%ebx\n"
"add $0x1,%esi\n"
"movq kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
"movzbl (%edx),%eax\n"
"paddsw kCoefficientsRgbY+4096(,%ebx,8),%mm0\n"
"movzbl 0x1(%edx),%ebx\n"
@@ -339,34 +346,35 @@ void FastConvertYUVToRGB32Row(const uint
"movq kCoefficientsRgbY(,%ebx,8),%mm2\n"
"paddsw %mm0,%mm1\n"
"paddsw %mm0,%mm2\n"
"psraw $0x6,%mm1\n"
"psraw $0x6,%mm2\n"
"packuswb %mm2,%mm1\n"
"movntq %mm1,0x0(%ebp)\n"
"add $0x8,%ebp\n"
-"convertend:"
+"1:"
"sub $0x2,%ecx\n"
- "jns convertloop\n"
+ "jns 0b\n"
"and $0x1,%ecx\n"
- "je convertdone\n"
+ "je 2f\n"
"movzbl (%edi),%eax\n"
"movq kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
"movzbl (%esi),%eax\n"
"paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0\n"
"movzbl (%edx),%eax\n"
"movq kCoefficientsRgbY(,%eax,8),%mm1\n"
"paddsw %mm0,%mm1\n"
"psraw $0x6,%mm1\n"
"packuswb %mm1,%mm1\n"
"movd %mm1,0x0(%ebp)\n"
-"convertdone:"
+"2:"
"popa\n"
"ret\n"
+ ".previous\n"
);
#endif
#endif // ARCH_CPU_ARM_FAMILY
} // extern "C"