gecko/gfx/ycbcr/remove_scale.patch

840 lines
25 KiB
Diff

diff --git a/gfx/ycbcr/yuv_convert.cpp b/gfx/ycbcr/yuv_convert.cpp
index eec578d..de91f79 100644
--- a/gfx/ycbcr/yuv_convert.cpp
+++ b/gfx/ycbcr/yuv_convert.cpp
@@ -81,133 +81,5 @@ void ConvertYCbCrToRGB32(const uint8* y_buf,
EMMS();
}
-// Scale a frame of YUV to 32 bit ARGB.
-void ScaleYCbCrToRGB32(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- int height,
- int scaled_width,
- int scaled_height,
- int y_pitch,
- int uv_pitch,
- int rgb_pitch,
- YUVType yuv_type,
- Rotate view_rotate) {
- unsigned int y_shift = yuv_type;
- bool has_mmx = supports_mmx();
- // Diagram showing origin and direction of source sampling.
- // ->0 4<-
- // 7 3
- //
- // 6 5
- // ->1 2<-
- // Rotations that start at right side of image.
- if ((view_rotate == ROTATE_180) ||
- (view_rotate == ROTATE_270) ||
- (view_rotate == MIRROR_ROTATE_0) ||
- (view_rotate == MIRROR_ROTATE_90)) {
- y_buf += width - 1;
- u_buf += width / 2 - 1;
- v_buf += width / 2 - 1;
- width = -width;
- }
- // Rotations that start at bottom of image.
- if ((view_rotate == ROTATE_90) ||
- (view_rotate == ROTATE_180) ||
- (view_rotate == MIRROR_ROTATE_90) ||
- (view_rotate == MIRROR_ROTATE_180)) {
- y_buf += (height - 1) * y_pitch;
- u_buf += ((height >> y_shift) - 1) * uv_pitch;
- v_buf += ((height >> y_shift) - 1) * uv_pitch;
- height = -height;
- }
-
- // Handle zero sized destination.
- if (scaled_width == 0 || scaled_height == 0)
- return;
- int scaled_dx = width * 16 / scaled_width;
- int scaled_dy = height * 16 / scaled_height;
-
- int scaled_dx_uv = scaled_dx;
-
- if ((view_rotate == ROTATE_90) ||
- (view_rotate == ROTATE_270)) {
- int tmp = scaled_height;
- scaled_height = scaled_width;
- scaled_width = tmp;
- tmp = height;
- height = width;
- width = tmp;
- int original_dx = scaled_dx;
- int original_dy = scaled_dy;
- scaled_dx = ((original_dy >> 4) * y_pitch) << 4;
- scaled_dx_uv = ((original_dy >> 4) * uv_pitch) << 4;
- scaled_dy = original_dx;
- if (view_rotate == ROTATE_90) {
- y_pitch = -1;
- uv_pitch = -1;
- height = -height;
- } else {
- y_pitch = 1;
- uv_pitch = 1;
- }
- }
-
- for (int y = 0; y < scaled_height; ++y) {
- uint8* dest_pixel = rgb_buf + y * rgb_pitch;
- int scaled_y = (y * height / scaled_height);
- const uint8* y_ptr = y_buf + scaled_y * y_pitch;
- const uint8* u_ptr = u_buf + (scaled_y >> y_shift) * uv_pitch;
- const uint8* v_ptr = v_buf + (scaled_y >> y_shift) * uv_pitch;
-
-#if defined(_MSC_VER)
- if (scaled_width == (width * 2)) {
- DoubleYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
- dest_pixel, scaled_width);
- } else if ((scaled_dx & 15) == 0) { // Scaling by integer scale factor.
- if (scaled_dx_uv == scaled_dx) { // Not rotated.
- if (scaled_dx == 16) { // Not scaled
- if (has_mmx)
- FastConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
- dest_pixel, scaled_width);
- else
- FastConvertYUVToRGB32Row_C(y_ptr, u_ptr, v_ptr,
- dest_pixel, scaled_width);
- } else { // Simple scale down. ie half
- ConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
- dest_pixel, scaled_width, scaled_dx >> 4);
- }
- } else {
- RotateConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
- dest_pixel, scaled_width,
- scaled_dx >> 4, scaled_dx_uv >> 4);
- }
-#else
- if (scaled_dx == 16) { // Not scaled
- if (has_mmx)
- FastConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
- dest_pixel, scaled_width);
- else
- FastConvertYUVToRGB32Row_C(y_ptr, u_ptr, v_ptr,
- dest_pixel, scaled_width);
-#endif
- } else {
- if (has_mmx)
- ScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
- dest_pixel, scaled_width, scaled_dx);
- else
- ScaleYUVToRGB32Row_C(y_ptr, u_ptr, v_ptr,
- dest_pixel, scaled_width, scaled_dx);
-
- }
- }
-
- // MMX used for FastConvertYUVToRGB32Row requires emms instruction.
- if (has_mmx)
- EMMS();
-}
-
} // namespace gfx
} // namespace mozilla
diff --git a/gfx/ycbcr/yuv_convert.h b/gfx/ycbcr/yuv_convert.h
index 7962af7..c9bf7e0 100644
--- a/gfx/ycbcr/yuv_convert.h
+++ b/gfx/ycbcr/yuv_convert.h
@@ -18,19 +18,6 @@ enum YUVType {
YV12 = 1 // YV12 is half width and half height chroma channels.
};
-// Mirror means flip the image horizontally, as in looking in a mirror.
-// Rotate happens after mirroring.
-enum Rotate {
- ROTATE_0, // Rotation off.
- ROTATE_90, // Rotate clockwise.
- ROTATE_180, // Rotate upside down.
- ROTATE_270, // Rotate counter clockwise.
- MIRROR_ROTATE_0, // Mirror horizontally.
- MIRROR_ROTATE_90, // Mirror then Rotate clockwise.
- MIRROR_ROTATE_180, // Mirror vertically.
- MIRROR_ROTATE_270 // Transpose.
-};
-
// Convert a frame of YUV to 32 bit ARGB.
// Pass in YV16/YV12 depending on source format
void ConvertYCbCrToRGB32(const uint8* yplane,
@@ -48,22 +35,6 @@ void ConvertYCbCrToRGB32(const uint8* yplane,
int rgbstride,
YUVType yuv_type);
-// Scale a frame of YUV to 32 bit ARGB.
-// Supports rotation and mirroring.
-void ScaleYCbCrToRGB32(const uint8* yplane,
- const uint8* uplane,
- const uint8* vplane,
- uint8* rgbframe,
- int frame_width,
- int frame_height,
- int scaled_width,
- int scaled_height,
- int ystride,
- int uvstride,
- int rgbstride,
- YUVType yuv_type,
- Rotate view_rotate);
-
} // namespace gfx
} // namespace mozilla
diff --git a/gfx/ycbcr/yuv_row.h b/gfx/ycbcr/yuv_row.h
index c43f713..2a82972 100644
--- a/gfx/ycbcr/yuv_row.h
+++ b/gfx/ycbcr/yuv_row.h
@@ -28,53 +28,6 @@ void FastConvertYUVToRGB32Row_C(const uint8* y_buf,
int width);
-// Can do 1x, half size or any scale down by an integer amount.
-// Step can be negative (mirroring, rotate 180).
-// This is the third fastest of the scalers.
-void ConvertYUVToRGB32Row(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- int step);
-
-// Rotate is like Convert, but applies different step to Y versus U and V.
-// This allows rotation by 90 or 270, by stepping by stride.
-// This is the forth fastest of the scalers.
-void RotateConvertYUVToRGB32Row(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- int ystep,
- int uvstep);
-
-// Doubler does 4 pixels at a time. Each pixel is replicated.
-// This is the fastest of the scalers.
-void DoubleYUVToRGB32Row(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width);
-
-// Handles arbitrary scaling up or down.
-// Mirroring is supported, but not 90 or 270 degree rotation.
-// Chroma is under sampled every 2 pixels for performance.
-// This is the slowest of the scalers.
-void ScaleYUVToRGB32Row(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- int scaled_dx);
-
-void ScaleYUVToRGB32Row_C(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- int scaled_dx);
-
} // extern "C"
// x64 uses MMX2 (SSE) so emms is not required.
diff --git a/gfx/ycbcr/yuv_row_c.cpp b/gfx/ycbcr/yuv_row_c.cpp
index a81416c..d3bdab4 100644
--- a/gfx/ycbcr/yuv_row_c.cpp
+++ b/gfx/ycbcr/yuv_row_c.cpp
@@ -172,25 +172,5 @@ void FastConvertYUVToRGB32Row_C(const uint8* y_buf,
}
}
-// 28.4 fixed point is used. A shift by 4 isolates the integer.
-// A shift by 5 is used to further subsample the chrominence channels.
-// & 15 isolates the fixed point fraction. >> 2 to get the upper 2 bits,
-// for 1/4 pixel accurate interpolation.
-void ScaleYUVToRGB32Row_C(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- int scaled_dx) {
- int scaled_x = 0;
- for (int x = 0; x < width; ++x) {
- uint8 u = u_buf[scaled_x >> 5];
- uint8 v = v_buf[scaled_x >> 5];
- uint8 y0 = y_buf[scaled_x >> 4];
- YuvPixel(y0, u, v, rgb_buf);
- rgb_buf += 4;
- scaled_x += scaled_dx;
- }
-}
} // extern "C"
diff --git a/gfx/ycbcr/yuv_row_linux.cpp b/gfx/ycbcr/yuv_row_linux.cpp
index 5fb2bc4..ce5ee89 100644
--- a/gfx/ycbcr/yuv_row_linux.cpp
+++ b/gfx/ycbcr/yuv_row_linux.cpp
@@ -21,14 +21,6 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf,
FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width);
}
-void ScaleYUVToRGB32Row(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- int scaled_dx) {
- ScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, scaled_dx);
-}
#else
#define RGBY(i) { \
@@ -315,75 +307,6 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf, // rdi
);
}
-void ScaleYUVToRGB32Row(const uint8* y_buf, // rdi
- const uint8* u_buf, // rsi
- const uint8* v_buf, // rdx
- uint8* rgb_buf, // rcx
- int width, // r8
- int scaled_dx) { // r9
- asm(
- "xor %%r11,%%r11\n"
- "sub $0x2,%4\n"
- "js scalenext\n"
-
-"scaleloop:"
- "mov %%r11,%%r10\n"
- "sar $0x5,%%r10\n"
- "movzb (%1,%%r10,1),%%rax\n"
- "movq 2048(%5,%%rax,8),%%xmm0\n"
- "movzb (%2,%%r10,1),%%rax\n"
- "movq 4096(%5,%%rax,8),%%xmm1\n"
- "lea (%%r11,%6),%%r10\n"
- "sar $0x4,%%r11\n"
- "movzb (%0,%%r11,1),%%rax\n"
- "paddsw %%xmm1,%%xmm0\n"
- "movq (%5,%%rax,8),%%xmm1\n"
- "lea (%%r10,%6),%%r11\n"
- "sar $0x4,%%r10\n"
- "movzb (%0,%%r10,1),%%rax\n"
- "movq (%5,%%rax,8),%%xmm2\n"
- "paddsw %%xmm0,%%xmm1\n"
- "paddsw %%xmm0,%%xmm2\n"
- "shufps $0x44,%%xmm2,%%xmm1\n"
- "psraw $0x6,%%xmm1\n"
- "packuswb %%xmm1,%%xmm1\n"
- "movq %%xmm1,0x0(%3)\n"
- "add $0x8,%3\n"
- "sub $0x2,%4\n"
- "jns scaleloop\n"
-
-"scalenext:"
- "add $0x1,%4\n"
- "js scaledone\n"
-
- "mov %%r11,%%r10\n"
- "sar $0x5,%%r10\n"
- "movzb (%1,%%r10,1),%%rax\n"
- "movq 2048(%5,%%rax,8),%%xmm0\n"
- "movzb (%2,%%r10,1),%%rax\n"
- "movq 4096(%5,%%rax,8),%%xmm1\n"
- "paddsw %%xmm1,%%xmm0\n"
- "sar $0x4,%%r11\n"
- "movzb (%0,%%r11,1),%%rax\n"
- "movq (%5,%%rax,8),%%xmm1\n"
- "paddsw %%xmm0,%%xmm1\n"
- "psraw $0x6,%%xmm1\n"
- "packuswb %%xmm1,%%xmm1\n"
- "movd %%xmm1,0x0(%3)\n"
-
-"scaledone:"
- :
- : "r"(y_buf), // %0
- "r"(u_buf), // %1
- "r"(v_buf), // %2
- "r"(rgb_buf), // %3
- "r"(width), // %4
- "r" (kCoefficientsRgbY), // %5
- "r"(static_cast<long>(scaled_dx)) // %6
- : "memory", "r10", "r11", "rax", "xmm0", "xmm1", "xmm2"
-);
-}
-
#else
void FastConvertYUVToRGB32Row(const uint8* y_buf,
@@ -443,81 +366,6 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf,
"ret\n"
);
-
-void ScaleYUVToRGB32Row(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- int scaled_dx);
-
- asm(
- ".global ScaleYUVToRGB32Row\n"
-"ScaleYUVToRGB32Row:\n"
- "pusha\n"
- "mov 0x24(%esp),%edx\n"
- "mov 0x28(%esp),%edi\n"
- "mov 0x2c(%esp),%esi\n"
- "mov 0x30(%esp),%ebp\n"
- "mov 0x34(%esp),%ecx\n"
- "xor %ebx,%ebx\n"
- "jmp scaleend\n"
-
-"scaleloop:"
- "mov %ebx,%eax\n"
- "sar $0x5,%eax\n"
- "movzbl (%edi,%eax,1),%eax\n"
- "movq kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
- "mov %ebx,%eax\n"
- "sar $0x5,%eax\n"
- "movzbl (%esi,%eax,1),%eax\n"
- "paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0\n"
- "mov %ebx,%eax\n"
- "add 0x38(%esp),%ebx\n"
- "sar $0x4,%eax\n"
- "movzbl (%edx,%eax,1),%eax\n"
- "movq kCoefficientsRgbY(,%eax,8),%mm1\n"
- "mov %ebx,%eax\n"
- "add 0x38(%esp),%ebx\n"
- "sar $0x4,%eax\n"
- "movzbl (%edx,%eax,1),%eax\n"
- "movq kCoefficientsRgbY(,%eax,8),%mm2\n"
- "paddsw %mm0,%mm1\n"
- "paddsw %mm0,%mm2\n"
- "psraw $0x6,%mm1\n"
- "psraw $0x6,%mm2\n"
- "packuswb %mm2,%mm1\n"
- "movntq %mm1,0x0(%ebp)\n"
- "add $0x8,%ebp\n"
-"scaleend:"
- "sub $0x2,%ecx\n"
- "jns scaleloop\n"
-
- "and $0x1,%ecx\n"
- "je scaledone\n"
-
- "mov %ebx,%eax\n"
- "sar $0x5,%eax\n"
- "movzbl (%edi,%eax,1),%eax\n"
- "movq kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
- "mov %ebx,%eax\n"
- "sar $0x5,%eax\n"
- "movzbl (%esi,%eax,1),%eax\n"
- "paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0\n"
- "mov %ebx,%eax\n"
- "sar $0x4,%eax\n"
- "movzbl (%edx,%eax,1),%eax\n"
- "movq kCoefficientsRgbY(,%eax,8),%mm1\n"
- "paddsw %mm0,%mm1\n"
- "psraw $0x6,%mm1\n"
- "packuswb %mm1,%mm1\n"
- "movd %mm1,0x0(%ebp)\n"
-
-"scaledone:"
- "popa\n"
- "ret\n"
-);
-
#endif
#endif // ARCH_CPU_ARM_FAMILY
} // extern "C"
diff --git a/gfx/ycbcr/yuv_row_mac.cpp b/gfx/ycbcr/yuv_row_mac.cpp
index a7e8243..3515ada 100644
--- a/gfx/ycbcr/yuv_row_mac.cpp
+++ b/gfx/ycbcr/yuv_row_mac.cpp
@@ -18,14 +18,6 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf,
FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width);
}
-void ScaleYUVToRGB32Row(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- int scaled_dx) {
- ScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, scaled_dx);
-}
#else
#define RGBY(i) { \
@@ -323,91 +315,6 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf,
&kCoefficientsRgbY[0][0]);
}
-extern void MacScaleYUVToRGB32Row(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- int scaled_dx,
- int16 *kCoefficientsRgbY);
-
- __asm__(
-"_MacScaleYUVToRGB32Row:\n"
- "pusha\n"
- "mov 0x24(%esp),%edx\n"
- "mov 0x28(%esp),%edi\n"
- "mov 0x2c(%esp),%esi\n"
- "mov 0x30(%esp),%ebp\n"
- "mov 0x3c(%esp),%ecx\n"
- "xor %ebx,%ebx\n"
- "jmp Lscaleend\n"
-
-"Lscaleloop:"
- "mov %ebx,%eax\n"
- "sar $0x5,%eax\n"
- "movzbl (%edi,%eax,1),%eax\n"
- "movq 2048(%ecx,%eax,8),%mm0\n"
- "mov %ebx,%eax\n"
- "sar $0x5,%eax\n"
- "movzbl (%esi,%eax,1),%eax\n"
- "paddsw 4096(%ecx,%eax,8),%mm0\n"
- "mov %ebx,%eax\n"
- "add 0x38(%esp),%ebx\n"
- "sar $0x4,%eax\n"
- "movzbl (%edx,%eax,1),%eax\n"
- "movq 0(%ecx,%eax,8),%mm1\n"
- "mov %ebx,%eax\n"
- "add 0x38(%esp),%ebx\n"
- "sar $0x4,%eax\n"
- "movzbl (%edx,%eax,1),%eax\n"
- "movq 0(%ecx,%eax,8),%mm2\n"
- "paddsw %mm0,%mm1\n"
- "paddsw %mm0,%mm2\n"
- "psraw $0x6,%mm1\n"
- "psraw $0x6,%mm2\n"
- "packuswb %mm2,%mm1\n"
- "movntq %mm1,0x0(%ebp)\n"
- "add $0x8,%ebp\n"
-"Lscaleend:"
- "sub $0x2,0x34(%esp)\n"
- "jns Lscaleloop\n"
-
- "and $0x1,0x34(%esp)\n"
- "je Lscaledone\n"
-
- "mov %ebx,%eax\n"
- "sar $0x5,%eax\n"
- "movzbl (%edi,%eax,1),%eax\n"
- "movq 2048(%ecx,%eax,8),%mm0\n"
- "mov %ebx,%eax\n"
- "sar $0x5,%eax\n"
- "movzbl (%esi,%eax,1),%eax\n"
- "paddsw 4096(%ecx,%eax,8),%mm0\n"
- "mov %ebx,%eax\n"
- "sar $0x4,%eax\n"
- "movzbl (%edx,%eax,1),%eax\n"
- "movq 0(%ecx,%eax,8),%mm1\n"
- "paddsw %mm0,%mm1\n"
- "psraw $0x6,%mm1\n"
- "packuswb %mm1,%mm1\n"
- "movd %mm1,0x0(%ebp)\n"
-
-"Lscaledone:"
- "popa\n"
- "ret\n"
-);
-
-
-void ScaleYUVToRGB32Row(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- int scaled_dx) {
-
- MacScaleYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, width, scaled_dx,
- &kCoefficientsRgbY[0][0]);
-}
#endif // ARCH_CPU_PPC
} // extern "C"
diff --git a/gfx/ycbcr/yuv_row_win.cpp b/gfx/ycbcr/yuv_row_win.cpp
index a77a16f..f994931 100644
--- a/gfx/ycbcr/yuv_row_win.cpp
+++ b/gfx/ycbcr/yuv_row_win.cpp
@@ -297,273 +297,5 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf,
}
}
-__declspec(naked)
-void ConvertYUVToRGB32Row(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- int step) {
- __asm {
- pushad
- mov edx, [esp + 32 + 4] // Y
- mov edi, [esp + 32 + 8] // U
- mov esi, [esp + 32 + 12] // V
- mov ebp, [esp + 32 + 16] // rgb
- mov ecx, [esp + 32 + 20] // width
- mov ebx, [esp + 32 + 24] // step
- jmp wend
-
- wloop :
- movzx eax, byte ptr [edi]
- add edi, ebx
- movq mm0, [kCoefficientsRgbU + 8 * eax]
- movzx eax, byte ptr [esi]
- add esi, ebx
- paddsw mm0, [kCoefficientsRgbV + 8 * eax]
- movzx eax, byte ptr [edx]
- add edx, ebx
- movq mm1, [kCoefficientsRgbY + 8 * eax]
- movzx eax, byte ptr [edx]
- add edx, ebx
- movq mm2, [kCoefficientsRgbY + 8 * eax]
- paddsw mm1, mm0
- paddsw mm2, mm0
- psraw mm1, 6
- psraw mm2, 6
- packuswb mm1, mm2
- movntq [ebp], mm1
- add ebp, 8
- wend :
- sub ecx, 2
- jns wloop
-
- and ecx, 1 // odd number of pixels?
- jz wdone
-
- movzx eax, byte ptr [edi]
- movq mm0, [kCoefficientsRgbU + 8 * eax]
- movzx eax, byte ptr [esi]
- paddsw mm0, [kCoefficientsRgbV + 8 * eax]
- movzx eax, byte ptr [edx]
- movq mm1, [kCoefficientsRgbY + 8 * eax]
- paddsw mm1, mm0
- psraw mm1, 6
- packuswb mm1, mm1
- movd [ebp], mm1
- wdone :
-
- popad
- ret
- }
-}
-
-__declspec(naked)
-void RotateConvertYUVToRGB32Row(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- int ystep,
- int uvstep) {
- __asm {
- pushad
- mov edx, [esp + 32 + 4] // Y
- mov edi, [esp + 32 + 8] // U
- mov esi, [esp + 32 + 12] // V
- mov ebp, [esp + 32 + 16] // rgb
- mov ecx, [esp + 32 + 20] // width
- jmp wend
-
- wloop :
- movzx eax, byte ptr [edi]
- mov ebx, [esp + 32 + 28] // uvstep
- add edi, ebx
- movq mm0, [kCoefficientsRgbU + 8 * eax]
- movzx eax, byte ptr [esi]
- add esi, ebx
- paddsw mm0, [kCoefficientsRgbV + 8 * eax]
- movzx eax, byte ptr [edx]
- mov ebx, [esp + 32 + 24] // ystep
- add edx, ebx
- movq mm1, [kCoefficientsRgbY + 8 * eax]
- movzx eax, byte ptr [edx]
- add edx, ebx
- movq mm2, [kCoefficientsRgbY + 8 * eax]
- paddsw mm1, mm0
- paddsw mm2, mm0
- psraw mm1, 6
- psraw mm2, 6
- packuswb mm1, mm2
- movntq [ebp], mm1
- add ebp, 8
- wend :
- sub ecx, 2
- jns wloop
-
- and ecx, 1 // odd number of pixels?
- jz wdone
-
- movzx eax, byte ptr [edi]
- movq mm0, [kCoefficientsRgbU + 8 * eax]
- movzx eax, byte ptr [esi]
- paddsw mm0, [kCoefficientsRgbV + 8 * eax]
- movzx eax, byte ptr [edx]
- movq mm1, [kCoefficientsRgbY + 8 * eax]
- paddsw mm1, mm0
- psraw mm1, 6
- packuswb mm1, mm1
- movd [ebp], mm1
- wdone :
-
- popad
- ret
- }
-}
-
-__declspec(naked)
-void DoubleYUVToRGB32Row(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width) {
- __asm {
- pushad
- mov edx, [esp + 32 + 4] // Y
- mov edi, [esp + 32 + 8] // U
- mov esi, [esp + 32 + 12] // V
- mov ebp, [esp + 32 + 16] // rgb
- mov ecx, [esp + 32 + 20] // width
- jmp wend
-
- wloop :
- movzx eax, byte ptr [edi]
- add edi, 1
- movzx ebx, byte ptr [esi]
- add esi, 1
- movq mm0, [kCoefficientsRgbU + 8 * eax]
- movzx eax, byte ptr [edx]
- paddsw mm0, [kCoefficientsRgbV + 8 * ebx]
- movq mm1, [kCoefficientsRgbY + 8 * eax]
- paddsw mm1, mm0
- psraw mm1, 6
- packuswb mm1, mm1
- punpckldq mm1, mm1
- movntq [ebp], mm1
-
- movzx ebx, byte ptr [edx + 1]
- add edx, 2
- paddsw mm0, [kCoefficientsRgbY + 8 * ebx]
- psraw mm0, 6
- packuswb mm0, mm0
- punpckldq mm0, mm0
- movntq [ebp+8], mm0
- add ebp, 16
- wend :
- sub ecx, 4
- jns wloop
-
- add ecx, 4
- jz wdone
-
- movzx eax, byte ptr [edi]
- movq mm0, [kCoefficientsRgbU + 8 * eax]
- movzx eax, byte ptr [esi]
- paddsw mm0, [kCoefficientsRgbV + 8 * eax]
- movzx eax, byte ptr [edx]
- movq mm1, [kCoefficientsRgbY + 8 * eax]
- paddsw mm1, mm0
- psraw mm1, 6
- packuswb mm1, mm1
- jmp wend1
-
- wloop1 :
- movd [ebp], mm1
- add ebp, 4
- wend1 :
- sub ecx, 1
- jns wloop1
- wdone :
- popad
- ret
- }
-}
-
-// This version does general purpose scaling by any amount, up or down.
-// The only thing it can not do it rotation by 90 or 270.
-// For performance the chroma is under sampled, reducing cost of a 3x
-// 1080p scale from 8.4 ms to 5.4 ms.
-__declspec(naked)
-void ScaleYUVToRGB32Row(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- int dx) {
- __asm {
- pushad
- mov edx, [esp + 32 + 4] // Y
- mov edi, [esp + 32 + 8] // U
- mov esi, [esp + 32 + 12] // V
- mov ebp, [esp + 32 + 16] // rgb
- mov ecx, [esp + 32 + 20] // width
- xor ebx, ebx // x
- jmp scaleend
-
- scaleloop :
- mov eax, ebx
- sar eax, 5
- movzx eax, byte ptr [edi + eax]
- movq mm0, [kCoefficientsRgbU + 8 * eax]
- mov eax, ebx
- sar eax, 5
- movzx eax, byte ptr [esi + eax]
- paddsw mm0, [kCoefficientsRgbV + 8 * eax]
- mov eax, ebx
- add ebx, [esp + 32 + 24] // x += dx
- sar eax, 4
- movzx eax, byte ptr [edx + eax]
- movq mm1, [kCoefficientsRgbY + 8 * eax]
- mov eax, ebx
- add ebx, [esp + 32 + 24] // x += dx
- sar eax, 4
- movzx eax, byte ptr [edx + eax]
- movq mm2, [kCoefficientsRgbY + 8 * eax]
- paddsw mm1, mm0
- paddsw mm2, mm0
- psraw mm1, 6
- psraw mm2, 6
- packuswb mm1, mm2
- movntq [ebp], mm1
- add ebp, 8
- scaleend :
- sub ecx, 2
- jns scaleloop
-
- and ecx, 1 // odd number of pixels?
- jz scaledone
-
- mov eax, ebx
- sar eax, 5
- movzx eax, byte ptr [edi + eax]
- movq mm0, [kCoefficientsRgbU + 8 * eax]
- mov eax, ebx
- sar eax, 5
- movzx eax, byte ptr [esi + eax]
- paddsw mm0, [kCoefficientsRgbV + 8 * eax]
- mov eax, ebx
- sar eax, 4
- movzx eax, byte ptr [edx + eax]
- movq mm1, [kCoefficientsRgbY + 8 * eax]
- paddsw mm1, mm0
- psraw mm1, 6
- packuswb mm1, mm1
- movd [ebp], mm1
-
- scaledone :
- popad
- ret
- }
-}
} // extern "C"