diff --git a/gfx/ycbcr/yuv_convert.cpp b/gfx/ycbcr/yuv_convert.cpp index b22e778..cdbb040 100644 --- a/gfx/ycbcr/yuv_convert.cpp +++ b/gfx/ycbcr/yuv_convert.cpp @@ -6,16 +6,17 @@ // http://www.fourcc.org/yuv.php // The actual conversion is best described here // http://en.wikipedia.org/wiki/YUV // An article on optimizing YUV conversion using tables instead of multiplies // http://lestourtereaux.free.fr/papers/data/yuvrgb.pdf // // YV12 is a full plane of Y and a half height, half width chroma planes // YV16 is a full plane of Y and a full height, half width chroma planes +// YV24 is a full plane of Y and a full height, full width chroma planes // // ARGB pixel format is output, which on little endian is stored as BGRA. // The alpha is set to 255, allowing the application to use RGBA or RGB32. #include "yuv_convert.h" // Header for low level row functions. #include "yuv_row.h" @@ -33,50 +34,55 @@ void ConvertYCbCrToRGB32(const uint8* y_buf, int pic_x, int pic_y, int pic_width, int pic_height, int y_pitch, int uv_pitch, int rgb_pitch, YUVType yuv_type) { - unsigned int y_shift = yuv_type; - bool has_mmx = supports_mmx(); - bool odd_pic_x = pic_x % 2 != 0; + unsigned int y_shift = yuv_type == YV12 ? 1 : 0; + unsigned int x_shift = yuv_type == YV24 ? 0 : 1; + // There is no optimized YV24 MMX routine so we check for this and + // fall back to the C code. + bool has_mmx = supports_mmx() && yuv_type != YV24; + bool odd_pic_x = yuv_type != YV24 && pic_x % 2 != 0; int x_width = odd_pic_x ? pic_width - 1 : pic_width; for (int y = pic_y; y < pic_height + pic_y; ++y) { uint8* rgb_row = rgb_buf + (y - pic_y) * rgb_pitch; const uint8* y_ptr = y_buf + y * y_pitch + pic_x; - const uint8* u_ptr = u_buf + (y >> y_shift) * uv_pitch + (pic_x >> 1); - const uint8* v_ptr = v_buf + (y >> y_shift) * uv_pitch + (pic_x >> 1); + const uint8* u_ptr = u_buf + (y >> y_shift) * uv_pitch + (pic_x >> x_shift); + const uint8* v_ptr = v_buf + (y >> y_shift) * uv_pitch + (pic_x >> x_shift); if (odd_pic_x) { // Handle the single odd pixel manually and use the // fast routines for the remaining. FastConvertYUVToRGB32Row_C(y_ptr++, u_ptr++, v_ptr++, rgb_row, - 1); + 1, + x_shift); rgb_row += 4; } if (has_mmx) FastConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr, rgb_row, x_width); else FastConvertYUVToRGB32Row_C(y_ptr, u_ptr, v_ptr, rgb_row, - x_width); + x_width, + x_shift); } // MMX used for FastConvertYUVToRGB32Row requires emms instruction. if (has_mmx) EMMS(); } } // namespace gfx diff --git a/gfx/ycbcr/yuv_convert.h b/gfx/ycbcr/yuv_convert.h index e624168..c0b678d 100644 --- a/gfx/ycbcr/yuv_convert.h +++ b/gfx/ycbcr/yuv_convert.h @@ -10,18 +10,19 @@ namespace mozilla { namespace gfx { // Type of YUV surface. // The value of these enums matter as they are used to shift vertical indices. enum YUVType { - YV16 = 0, // YV16 is half width and full height chroma channels. - YV12 = 1 // YV12 is half width and half height chroma channels. + YV12 = 0, // YV12 is half width and half height chroma channels. + YV16 = 1, // YV16 is half width and full height chroma channels. + YV24 = 2 // YV24 is full width and full height chroma channels. }; // Convert a frame of YUV to 32 bit ARGB. // Pass in YV16/YV12 depending on source format NS_GFX_(void) ConvertYCbCrToRGB32(const uint8* yplane, const uint8* uplane, const uint8* vplane, uint8* rgbframe, diff --git a/gfx/ycbcr/yuv_row.h b/gfx/ycbcr/yuv_row.h index 2a82972..d776dac 100644 --- a/gfx/ycbcr/yuv_row.h +++ b/gfx/ycbcr/yuv_row.h @@ -20,17 +20,18 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf, const uint8* v_buf, uint8* rgb_buf, int width); void FastConvertYUVToRGB32Row_C(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, uint8* rgb_buf, - int width); + int width, + unsigned int x_shift); } // extern "C" // x64 uses MMX2 (SSE) so emms is not required. #if !defined(ARCH_CPU_X86_64) && !defined(ARCH_CPU_PPC) #if defined(_MSC_VER) #define EMMS() __asm emms diff --git a/gfx/ycbcr/yuv_row_c.cpp b/gfx/ycbcr/yuv_row_c.cpp index d3bdab4..36d9bda 100644 --- a/gfx/ycbcr/yuv_row_c.cpp +++ b/gfx/ycbcr/yuv_row_c.cpp @@ -153,24 +153,29 @@ static inline void YuvPixel(uint8 y, (clip(C298a + cr) << 16) | (0xff000000); } void FastConvertYUVToRGB32Row_C(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, uint8* rgb_buf, - int width) { + int width, + unsigned int x_shift) { for (int x = 0; x < width; x += 2) { - uint8 u = u_buf[x >> 1]; - uint8 v = v_buf[x >> 1]; + uint8 u = u_buf[x >> x_shift]; + uint8 v = v_buf[x >> x_shift]; uint8 y0 = y_buf[x]; YuvPixel(y0, u, v, rgb_buf); if ((x + 1) < width) { uint8 y1 = y_buf[x + 1]; + if (x_shift == 0) { + u = u_buf[x + 1]; + v = v_buf[x + 1]; + } YuvPixel(y1, u, v, rgb_buf + 4); } rgb_buf += 8; // Advance 2 pixels. } } } // extern "C" diff --git a/gfx/ycbcr/yuv_row_linux.cpp b/gfx/ycbcr/yuv_row_linux.cpp index ce5ee89..455dd7b 100644 --- a/gfx/ycbcr/yuv_row_linux.cpp +++ b/gfx/ycbcr/yuv_row_linux.cpp @@ -13,17 +13,17 @@ extern "C" { #if defined(ARCH_CPU_ARM_FAMILY) // ARM implementation uses C fallback void FastConvertYUVToRGB32Row(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, uint8* rgb_buf, int width) { - FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width); + FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1); } #else #define RGBY(i) { \ static_cast(1.164 * 64 * (i - 16) + 0.5), \ static_cast(1.164 * 64 * (i - 16) + 0.5), \ static_cast(1.164 * 64 * (i - 16) + 0.5), \ diff --git a/gfx/ycbcr/yuv_row_mac.cpp b/gfx/ycbcr/yuv_row_mac.cpp index 34ecdc1..2a679cc 100644 --- a/gfx/ycbcr/yuv_row_mac.cpp +++ b/gfx/ycbcr/yuv_row_mac.cpp @@ -13,17 +13,17 @@ extern "C" { // option at all. #if defined(ARCH_CPU_PPC) || defined(ARCH_CPU_64_BITS) // PPC implementation uses C fallback void FastConvertYUVToRGB32Row(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, uint8* rgb_buf, int width) { - FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width); + FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1); } #else #define RGBY(i) { \ static_cast(1.164 * 64 * (i - 16) + 0.5), \ static_cast(1.164 * 64 * (i - 16) + 0.5), \ static_cast(1.164 * 64 * (i - 16) + 0.5), \ diff --git a/gfx/ycbcr/yuv_row_win.cpp b/gfx/ycbcr/yuv_row_win.cpp index d2b82c4..708ef14 100644 --- a/gfx/ycbcr/yuv_row_win.cpp +++ b/gfx/ycbcr/yuv_row_win.cpp @@ -9,17 +9,17 @@ extern "C" { // needs to be fixed for 64 bit builds. #if defined(ARCH_CPU_64_BITS) // PPC implementation uses C fallback void FastConvertYUVToRGB32Row(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, uint8* rgb_buf, int width) { - FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width); + FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1); } #else #define RGBY(i) { \ static_cast(1.164 * 64 * (i - 16) + 0.5), \ static_cast(1.164 * 64 * (i - 16) + 0.5), \