diff --git a/gfx/ycbcr/yuv_convert.cpp b/gfx/ycbcr/yuv_convert.cpp index eec578d..de91f79 100644 --- a/gfx/ycbcr/yuv_convert.cpp +++ b/gfx/ycbcr/yuv_convert.cpp @@ -81,133 +81,5 @@ void ConvertYCbCrToRGB32(const uint8* y_buf, EMMS(); } -// Scale a frame of YUV to 32 bit ARGB. -void ScaleYCbCrToRGB32(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int height, - int scaled_width, - int scaled_height, - int y_pitch, - int uv_pitch, - int rgb_pitch, - YUVType yuv_type, - Rotate view_rotate) { - unsigned int y_shift = yuv_type; - bool has_mmx = supports_mmx(); - // Diagram showing origin and direction of source sampling. - // ->0 4<- - // 7 3 - // - // 6 5 - // ->1 2<- - // Rotations that start at right side of image. - if ((view_rotate == ROTATE_180) || - (view_rotate == ROTATE_270) || - (view_rotate == MIRROR_ROTATE_0) || - (view_rotate == MIRROR_ROTATE_90)) { - y_buf += width - 1; - u_buf += width / 2 - 1; - v_buf += width / 2 - 1; - width = -width; - } - // Rotations that start at bottom of image. - if ((view_rotate == ROTATE_90) || - (view_rotate == ROTATE_180) || - (view_rotate == MIRROR_ROTATE_90) || - (view_rotate == MIRROR_ROTATE_180)) { - y_buf += (height - 1) * y_pitch; - u_buf += ((height >> y_shift) - 1) * uv_pitch; - v_buf += ((height >> y_shift) - 1) * uv_pitch; - height = -height; - } - - // Handle zero sized destination. - if (scaled_width == 0 || scaled_height == 0) - return; - int scaled_dx = width * 16 / scaled_width; - int scaled_dy = height * 16 / scaled_height; - - int scaled_dx_uv = scaled_dx; - - if ((view_rotate == ROTATE_90) || - (view_rotate == ROTATE_270)) { - int tmp = scaled_height; - scaled_height = scaled_width; - scaled_width = tmp; - tmp = height; - height = width; - width = tmp; - int original_dx = scaled_dx; - int original_dy = scaled_dy; - scaled_dx = ((original_dy >> 4) * y_pitch) << 4; - scaled_dx_uv = ((original_dy >> 4) * uv_pitch) << 4; - scaled_dy = original_dx; - if (view_rotate == ROTATE_90) { - y_pitch = -1; - uv_pitch = -1; - height = -height; - } else { - y_pitch = 1; - uv_pitch = 1; - } - } - - for (int y = 0; y < scaled_height; ++y) { - uint8* dest_pixel = rgb_buf + y * rgb_pitch; - int scaled_y = (y * height / scaled_height); - const uint8* y_ptr = y_buf + scaled_y * y_pitch; - const uint8* u_ptr = u_buf + (scaled_y >> y_shift) * uv_pitch; - const uint8* v_ptr = v_buf + (scaled_y >> y_shift) * uv_pitch; - -#if defined(_MSC_VER) - if (scaled_width == (width * 2)) { - DoubleYUVToRGB32Row(y_ptr, u_ptr, v_ptr, - dest_pixel, scaled_width); - } else if ((scaled_dx & 15) == 0) { // Scaling by integer scale factor. - if (scaled_dx_uv == scaled_dx) { // Not rotated. - if (scaled_dx == 16) { // Not scaled - if (has_mmx) - FastConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr, - dest_pixel, scaled_width); - else - FastConvertYUVToRGB32Row_C(y_ptr, u_ptr, v_ptr, - dest_pixel, scaled_width); - } else { // Simple scale down. ie half - ConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr, - dest_pixel, scaled_width, scaled_dx >> 4); - } - } else { - RotateConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr, - dest_pixel, scaled_width, - scaled_dx >> 4, scaled_dx_uv >> 4); - } -#else - if (scaled_dx == 16) { // Not scaled - if (has_mmx) - FastConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr, - dest_pixel, scaled_width); - else - FastConvertYUVToRGB32Row_C(y_ptr, u_ptr, v_ptr, - dest_pixel, scaled_width); -#endif - } else { - if (has_mmx) - ScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr, - dest_pixel, scaled_width, scaled_dx); - else - ScaleYUVToRGB32Row_C(y_ptr, u_ptr, v_ptr, - dest_pixel, scaled_width, scaled_dx); - - } - } - - // MMX used for FastConvertYUVToRGB32Row requires emms instruction. - if (has_mmx) - EMMS(); -} - } // namespace gfx } // namespace mozilla diff --git a/gfx/ycbcr/yuv_convert.h b/gfx/ycbcr/yuv_convert.h index 7962af7..c9bf7e0 100644 --- a/gfx/ycbcr/yuv_convert.h +++ b/gfx/ycbcr/yuv_convert.h @@ -18,19 +18,6 @@ enum YUVType { YV12 = 1 // YV12 is half width and half height chroma channels. }; -// Mirror means flip the image horizontally, as in looking in a mirror. -// Rotate happens after mirroring. -enum Rotate { - ROTATE_0, // Rotation off. - ROTATE_90, // Rotate clockwise. - ROTATE_180, // Rotate upside down. - ROTATE_270, // Rotate counter clockwise. - MIRROR_ROTATE_0, // Mirror horizontally. - MIRROR_ROTATE_90, // Mirror then Rotate clockwise. - MIRROR_ROTATE_180, // Mirror vertically. - MIRROR_ROTATE_270 // Transpose. -}; - // Convert a frame of YUV to 32 bit ARGB. // Pass in YV16/YV12 depending on source format void ConvertYCbCrToRGB32(const uint8* yplane, @@ -48,22 +35,6 @@ void ConvertYCbCrToRGB32(const uint8* yplane, int rgbstride, YUVType yuv_type); -// Scale a frame of YUV to 32 bit ARGB. -// Supports rotation and mirroring. -void ScaleYCbCrToRGB32(const uint8* yplane, - const uint8* uplane, - const uint8* vplane, - uint8* rgbframe, - int frame_width, - int frame_height, - int scaled_width, - int scaled_height, - int ystride, - int uvstride, - int rgbstride, - YUVType yuv_type, - Rotate view_rotate); - } // namespace gfx } // namespace mozilla diff --git a/gfx/ycbcr/yuv_row.h b/gfx/ycbcr/yuv_row.h index c43f713..2a82972 100644 --- a/gfx/ycbcr/yuv_row.h +++ b/gfx/ycbcr/yuv_row.h @@ -28,53 +28,6 @@ void FastConvertYUVToRGB32Row_C(const uint8* y_buf, int width); -// Can do 1x, half size or any scale down by an integer amount. -// Step can be negative (mirroring, rotate 180). -// This is the third fastest of the scalers. -void ConvertYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int step); - -// Rotate is like Convert, but applies different step to Y versus U and V. -// This allows rotation by 90 or 270, by stepping by stride. -// This is the forth fastest of the scalers. -void RotateConvertYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int ystep, - int uvstep); - -// Doubler does 4 pixels at a time. Each pixel is replicated. -// This is the fastest of the scalers. -void DoubleYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width); - -// Handles arbitrary scaling up or down. -// Mirroring is supported, but not 90 or 270 degree rotation. -// Chroma is under sampled every 2 pixels for performance. -// This is the slowest of the scalers. -void ScaleYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int scaled_dx); - -void ScaleYUVToRGB32Row_C(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int scaled_dx); - } // extern "C" // x64 uses MMX2 (SSE) so emms is not required. diff --git a/gfx/ycbcr/yuv_row_c.cpp b/gfx/ycbcr/yuv_row_c.cpp index a81416c..d3bdab4 100644 --- a/gfx/ycbcr/yuv_row_c.cpp +++ b/gfx/ycbcr/yuv_row_c.cpp @@ -172,25 +172,5 @@ void FastConvertYUVToRGB32Row_C(const uint8* y_buf, } } -// 28.4 fixed point is used. A shift by 4 isolates the integer. -// A shift by 5 is used to further subsample the chrominence channels. -// & 15 isolates the fixed point fraction. >> 2 to get the upper 2 bits, -// for 1/4 pixel accurate interpolation. -void ScaleYUVToRGB32Row_C(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int scaled_dx) { - int scaled_x = 0; - for (int x = 0; x < width; ++x) { - uint8 u = u_buf[scaled_x >> 5]; - uint8 v = v_buf[scaled_x >> 5]; - uint8 y0 = y_buf[scaled_x >> 4]; - YuvPixel(y0, u, v, rgb_buf); - rgb_buf += 4; - scaled_x += scaled_dx; - } -} } // extern "C" diff --git a/gfx/ycbcr/yuv_row_linux.cpp b/gfx/ycbcr/yuv_row_linux.cpp index 5fb2bc4..ce5ee89 100644 --- a/gfx/ycbcr/yuv_row_linux.cpp +++ b/gfx/ycbcr/yuv_row_linux.cpp @@ -21,14 +21,6 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf, FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width); } -void ScaleYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int scaled_dx) { - ScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, scaled_dx); -} #else #define RGBY(i) { \ @@ -315,75 +307,6 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf, // rdi ); } -void ScaleYUVToRGB32Row(const uint8* y_buf, // rdi - const uint8* u_buf, // rsi - const uint8* v_buf, // rdx - uint8* rgb_buf, // rcx - int width, // r8 - int scaled_dx) { // r9 - asm( - "xor %%r11,%%r11\n" - "sub $0x2,%4\n" - "js scalenext\n" - -"scaleloop:" - "mov %%r11,%%r10\n" - "sar $0x5,%%r10\n" - "movzb (%1,%%r10,1),%%rax\n" - "movq 2048(%5,%%rax,8),%%xmm0\n" - "movzb (%2,%%r10,1),%%rax\n" - "movq 4096(%5,%%rax,8),%%xmm1\n" - "lea (%%r11,%6),%%r10\n" - "sar $0x4,%%r11\n" - "movzb (%0,%%r11,1),%%rax\n" - "paddsw %%xmm1,%%xmm0\n" - "movq (%5,%%rax,8),%%xmm1\n" - "lea (%%r10,%6),%%r11\n" - "sar $0x4,%%r10\n" - "movzb (%0,%%r10,1),%%rax\n" - "movq (%5,%%rax,8),%%xmm2\n" - "paddsw %%xmm0,%%xmm1\n" - "paddsw %%xmm0,%%xmm2\n" - "shufps $0x44,%%xmm2,%%xmm1\n" - "psraw $0x6,%%xmm1\n" - "packuswb %%xmm1,%%xmm1\n" - "movq %%xmm1,0x0(%3)\n" - "add $0x8,%3\n" - "sub $0x2,%4\n" - "jns scaleloop\n" - -"scalenext:" - "add $0x1,%4\n" - "js scaledone\n" - - "mov %%r11,%%r10\n" - "sar $0x5,%%r10\n" - "movzb (%1,%%r10,1),%%rax\n" - "movq 2048(%5,%%rax,8),%%xmm0\n" - "movzb (%2,%%r10,1),%%rax\n" - "movq 4096(%5,%%rax,8),%%xmm1\n" - "paddsw %%xmm1,%%xmm0\n" - "sar $0x4,%%r11\n" - "movzb (%0,%%r11,1),%%rax\n" - "movq (%5,%%rax,8),%%xmm1\n" - "paddsw %%xmm0,%%xmm1\n" - "psraw $0x6,%%xmm1\n" - "packuswb %%xmm1,%%xmm1\n" - "movd %%xmm1,0x0(%3)\n" - -"scaledone:" - : - : "r"(y_buf), // %0 - "r"(u_buf), // %1 - "r"(v_buf), // %2 - "r"(rgb_buf), // %3 - "r"(width), // %4 - "r" (kCoefficientsRgbY), // %5 - "r"(static_cast(scaled_dx)) // %6 - : "memory", "r10", "r11", "rax", "xmm0", "xmm1", "xmm2" -); -} - #else void FastConvertYUVToRGB32Row(const uint8* y_buf, @@ -443,81 +366,6 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf, "ret\n" ); - -void ScaleYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int scaled_dx); - - asm( - ".global ScaleYUVToRGB32Row\n" -"ScaleYUVToRGB32Row:\n" - "pusha\n" - "mov 0x24(%esp),%edx\n" - "mov 0x28(%esp),%edi\n" - "mov 0x2c(%esp),%esi\n" - "mov 0x30(%esp),%ebp\n" - "mov 0x34(%esp),%ecx\n" - "xor %ebx,%ebx\n" - "jmp scaleend\n" - -"scaleloop:" - "mov %ebx,%eax\n" - "sar $0x5,%eax\n" - "movzbl (%edi,%eax,1),%eax\n" - "movq kCoefficientsRgbY+2048(,%eax,8),%mm0\n" - "mov %ebx,%eax\n" - "sar $0x5,%eax\n" - "movzbl (%esi,%eax,1),%eax\n" - "paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0\n" - "mov %ebx,%eax\n" - "add 0x38(%esp),%ebx\n" - "sar $0x4,%eax\n" - "movzbl (%edx,%eax,1),%eax\n" - "movq kCoefficientsRgbY(,%eax,8),%mm1\n" - "mov %ebx,%eax\n" - "add 0x38(%esp),%ebx\n" - "sar $0x4,%eax\n" - "movzbl (%edx,%eax,1),%eax\n" - "movq kCoefficientsRgbY(,%eax,8),%mm2\n" - "paddsw %mm0,%mm1\n" - "paddsw %mm0,%mm2\n" - "psraw $0x6,%mm1\n" - "psraw $0x6,%mm2\n" - "packuswb %mm2,%mm1\n" - "movntq %mm1,0x0(%ebp)\n" - "add $0x8,%ebp\n" -"scaleend:" - "sub $0x2,%ecx\n" - "jns scaleloop\n" - - "and $0x1,%ecx\n" - "je scaledone\n" - - "mov %ebx,%eax\n" - "sar $0x5,%eax\n" - "movzbl (%edi,%eax,1),%eax\n" - "movq kCoefficientsRgbY+2048(,%eax,8),%mm0\n" - "mov %ebx,%eax\n" - "sar $0x5,%eax\n" - "movzbl (%esi,%eax,1),%eax\n" - "paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0\n" - "mov %ebx,%eax\n" - "sar $0x4,%eax\n" - "movzbl (%edx,%eax,1),%eax\n" - "movq kCoefficientsRgbY(,%eax,8),%mm1\n" - "paddsw %mm0,%mm1\n" - "psraw $0x6,%mm1\n" - "packuswb %mm1,%mm1\n" - "movd %mm1,0x0(%ebp)\n" - -"scaledone:" - "popa\n" - "ret\n" -); - #endif #endif // ARCH_CPU_ARM_FAMILY } // extern "C" diff --git a/gfx/ycbcr/yuv_row_mac.cpp b/gfx/ycbcr/yuv_row_mac.cpp index a7e8243..3515ada 100644 --- a/gfx/ycbcr/yuv_row_mac.cpp +++ b/gfx/ycbcr/yuv_row_mac.cpp @@ -18,14 +18,6 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf, FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width); } -void ScaleYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int scaled_dx) { - ScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, scaled_dx); -} #else #define RGBY(i) { \ @@ -323,91 +315,6 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf, &kCoefficientsRgbY[0][0]); } -extern void MacScaleYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int scaled_dx, - int16 *kCoefficientsRgbY); - - __asm__( -"_MacScaleYUVToRGB32Row:\n" - "pusha\n" - "mov 0x24(%esp),%edx\n" - "mov 0x28(%esp),%edi\n" - "mov 0x2c(%esp),%esi\n" - "mov 0x30(%esp),%ebp\n" - "mov 0x3c(%esp),%ecx\n" - "xor %ebx,%ebx\n" - "jmp Lscaleend\n" - -"Lscaleloop:" - "mov %ebx,%eax\n" - "sar $0x5,%eax\n" - "movzbl (%edi,%eax,1),%eax\n" - "movq 2048(%ecx,%eax,8),%mm0\n" - "mov %ebx,%eax\n" - "sar $0x5,%eax\n" - "movzbl (%esi,%eax,1),%eax\n" - "paddsw 4096(%ecx,%eax,8),%mm0\n" - "mov %ebx,%eax\n" - "add 0x38(%esp),%ebx\n" - "sar $0x4,%eax\n" - "movzbl (%edx,%eax,1),%eax\n" - "movq 0(%ecx,%eax,8),%mm1\n" - "mov %ebx,%eax\n" - "add 0x38(%esp),%ebx\n" - "sar $0x4,%eax\n" - "movzbl (%edx,%eax,1),%eax\n" - "movq 0(%ecx,%eax,8),%mm2\n" - "paddsw %mm0,%mm1\n" - "paddsw %mm0,%mm2\n" - "psraw $0x6,%mm1\n" - "psraw $0x6,%mm2\n" - "packuswb %mm2,%mm1\n" - "movntq %mm1,0x0(%ebp)\n" - "add $0x8,%ebp\n" -"Lscaleend:" - "sub $0x2,0x34(%esp)\n" - "jns Lscaleloop\n" - - "and $0x1,0x34(%esp)\n" - "je Lscaledone\n" - - "mov %ebx,%eax\n" - "sar $0x5,%eax\n" - "movzbl (%edi,%eax,1),%eax\n" - "movq 2048(%ecx,%eax,8),%mm0\n" - "mov %ebx,%eax\n" - "sar $0x5,%eax\n" - "movzbl (%esi,%eax,1),%eax\n" - "paddsw 4096(%ecx,%eax,8),%mm0\n" - "mov %ebx,%eax\n" - "sar $0x4,%eax\n" - "movzbl (%edx,%eax,1),%eax\n" - "movq 0(%ecx,%eax,8),%mm1\n" - "paddsw %mm0,%mm1\n" - "psraw $0x6,%mm1\n" - "packuswb %mm1,%mm1\n" - "movd %mm1,0x0(%ebp)\n" - -"Lscaledone:" - "popa\n" - "ret\n" -); - - -void ScaleYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int scaled_dx) { - - MacScaleYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, width, scaled_dx, - &kCoefficientsRgbY[0][0]); -} #endif // ARCH_CPU_PPC } // extern "C" diff --git a/gfx/ycbcr/yuv_row_win.cpp b/gfx/ycbcr/yuv_row_win.cpp index a77a16f..f994931 100644 --- a/gfx/ycbcr/yuv_row_win.cpp +++ b/gfx/ycbcr/yuv_row_win.cpp @@ -297,273 +297,5 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf, } } -__declspec(naked) -void ConvertYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int step) { - __asm { - pushad - mov edx, [esp + 32 + 4] // Y - mov edi, [esp + 32 + 8] // U - mov esi, [esp + 32 + 12] // V - mov ebp, [esp + 32 + 16] // rgb - mov ecx, [esp + 32 + 20] // width - mov ebx, [esp + 32 + 24] // step - jmp wend - - wloop : - movzx eax, byte ptr [edi] - add edi, ebx - movq mm0, [kCoefficientsRgbU + 8 * eax] - movzx eax, byte ptr [esi] - add esi, ebx - paddsw mm0, [kCoefficientsRgbV + 8 * eax] - movzx eax, byte ptr [edx] - add edx, ebx - movq mm1, [kCoefficientsRgbY + 8 * eax] - movzx eax, byte ptr [edx] - add edx, ebx - movq mm2, [kCoefficientsRgbY + 8 * eax] - paddsw mm1, mm0 - paddsw mm2, mm0 - psraw mm1, 6 - psraw mm2, 6 - packuswb mm1, mm2 - movntq [ebp], mm1 - add ebp, 8 - wend : - sub ecx, 2 - jns wloop - - and ecx, 1 // odd number of pixels? - jz wdone - - movzx eax, byte ptr [edi] - movq mm0, [kCoefficientsRgbU + 8 * eax] - movzx eax, byte ptr [esi] - paddsw mm0, [kCoefficientsRgbV + 8 * eax] - movzx eax, byte ptr [edx] - movq mm1, [kCoefficientsRgbY + 8 * eax] - paddsw mm1, mm0 - psraw mm1, 6 - packuswb mm1, mm1 - movd [ebp], mm1 - wdone : - - popad - ret - } -} - -__declspec(naked) -void RotateConvertYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int ystep, - int uvstep) { - __asm { - pushad - mov edx, [esp + 32 + 4] // Y - mov edi, [esp + 32 + 8] // U - mov esi, [esp + 32 + 12] // V - mov ebp, [esp + 32 + 16] // rgb - mov ecx, [esp + 32 + 20] // width - jmp wend - - wloop : - movzx eax, byte ptr [edi] - mov ebx, [esp + 32 + 28] // uvstep - add edi, ebx - movq mm0, [kCoefficientsRgbU + 8 * eax] - movzx eax, byte ptr [esi] - add esi, ebx - paddsw mm0, [kCoefficientsRgbV + 8 * eax] - movzx eax, byte ptr [edx] - mov ebx, [esp + 32 + 24] // ystep - add edx, ebx - movq mm1, [kCoefficientsRgbY + 8 * eax] - movzx eax, byte ptr [edx] - add edx, ebx - movq mm2, [kCoefficientsRgbY + 8 * eax] - paddsw mm1, mm0 - paddsw mm2, mm0 - psraw mm1, 6 - psraw mm2, 6 - packuswb mm1, mm2 - movntq [ebp], mm1 - add ebp, 8 - wend : - sub ecx, 2 - jns wloop - - and ecx, 1 // odd number of pixels? - jz wdone - - movzx eax, byte ptr [edi] - movq mm0, [kCoefficientsRgbU + 8 * eax] - movzx eax, byte ptr [esi] - paddsw mm0, [kCoefficientsRgbV + 8 * eax] - movzx eax, byte ptr [edx] - movq mm1, [kCoefficientsRgbY + 8 * eax] - paddsw mm1, mm0 - psraw mm1, 6 - packuswb mm1, mm1 - movd [ebp], mm1 - wdone : - - popad - ret - } -} - -__declspec(naked) -void DoubleYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width) { - __asm { - pushad - mov edx, [esp + 32 + 4] // Y - mov edi, [esp + 32 + 8] // U - mov esi, [esp + 32 + 12] // V - mov ebp, [esp + 32 + 16] // rgb - mov ecx, [esp + 32 + 20] // width - jmp wend - - wloop : - movzx eax, byte ptr [edi] - add edi, 1 - movzx ebx, byte ptr [esi] - add esi, 1 - movq mm0, [kCoefficientsRgbU + 8 * eax] - movzx eax, byte ptr [edx] - paddsw mm0, [kCoefficientsRgbV + 8 * ebx] - movq mm1, [kCoefficientsRgbY + 8 * eax] - paddsw mm1, mm0 - psraw mm1, 6 - packuswb mm1, mm1 - punpckldq mm1, mm1 - movntq [ebp], mm1 - - movzx ebx, byte ptr [edx + 1] - add edx, 2 - paddsw mm0, [kCoefficientsRgbY + 8 * ebx] - psraw mm0, 6 - packuswb mm0, mm0 - punpckldq mm0, mm0 - movntq [ebp+8], mm0 - add ebp, 16 - wend : - sub ecx, 4 - jns wloop - - add ecx, 4 - jz wdone - - movzx eax, byte ptr [edi] - movq mm0, [kCoefficientsRgbU + 8 * eax] - movzx eax, byte ptr [esi] - paddsw mm0, [kCoefficientsRgbV + 8 * eax] - movzx eax, byte ptr [edx] - movq mm1, [kCoefficientsRgbY + 8 * eax] - paddsw mm1, mm0 - psraw mm1, 6 - packuswb mm1, mm1 - jmp wend1 - - wloop1 : - movd [ebp], mm1 - add ebp, 4 - wend1 : - sub ecx, 1 - jns wloop1 - wdone : - popad - ret - } -} - -// This version does general purpose scaling by any amount, up or down. -// The only thing it can not do it rotation by 90 or 270. -// For performance the chroma is under sampled, reducing cost of a 3x -// 1080p scale from 8.4 ms to 5.4 ms. -__declspec(naked) -void ScaleYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int dx) { - __asm { - pushad - mov edx, [esp + 32 + 4] // Y - mov edi, [esp + 32 + 8] // U - mov esi, [esp + 32 + 12] // V - mov ebp, [esp + 32 + 16] // rgb - mov ecx, [esp + 32 + 20] // width - xor ebx, ebx // x - jmp scaleend - - scaleloop : - mov eax, ebx - sar eax, 5 - movzx eax, byte ptr [edi + eax] - movq mm0, [kCoefficientsRgbU + 8 * eax] - mov eax, ebx - sar eax, 5 - movzx eax, byte ptr [esi + eax] - paddsw mm0, [kCoefficientsRgbV + 8 * eax] - mov eax, ebx - add ebx, [esp + 32 + 24] // x += dx - sar eax, 4 - movzx eax, byte ptr [edx + eax] - movq mm1, [kCoefficientsRgbY + 8 * eax] - mov eax, ebx - add ebx, [esp + 32 + 24] // x += dx - sar eax, 4 - movzx eax, byte ptr [edx + eax] - movq mm2, [kCoefficientsRgbY + 8 * eax] - paddsw mm1, mm0 - paddsw mm2, mm0 - psraw mm1, 6 - psraw mm2, 6 - packuswb mm1, mm2 - movntq [ebp], mm1 - add ebp, 8 - scaleend : - sub ecx, 2 - jns scaleloop - - and ecx, 1 // odd number of pixels? - jz scaledone - - mov eax, ebx - sar eax, 5 - movzx eax, byte ptr [edi + eax] - movq mm0, [kCoefficientsRgbU + 8 * eax] - mov eax, ebx - sar eax, 5 - movzx eax, byte ptr [esi + eax] - paddsw mm0, [kCoefficientsRgbV + 8 * eax] - mov eax, ebx - sar eax, 4 - movzx eax, byte ptr [edx + eax] - movq mm1, [kCoefficientsRgbY + 8 * eax] - paddsw mm1, mm0 - psraw mm1, 6 - packuswb mm1, mm1 - movd [ebp], mm1 - - scaledone : - popad - ret - } -} } // extern "C"