Bug 616778 - Part 2: Update NPOTB files in gfx/ycbcr. r=kinetik, a=NPOTB

--HG-- extra : rebase_source : 69e306bbb1aba08d2224a83b14086d2efef8f955
2024-09-13 09:24:08 -07:00 · 2011-01-12 19:08:17 -08:00 · 2011-01-12 19:08:17 -08:00 · 690116173e
commit 690116173e
parent db12052004
4 changed files with 925 additions and 636 deletions
--- a/gfx/ycbcr/README
+++ b/gfx/ycbcr/README
@ -2,16 +2,19 @@ This color conversion code is from the Chromium open source project available he

 http://code.google.com/chromium/

-The code comes from svn revision 638400 on 2010-10-26.
+The code comes from svn revision 63840 on 2010-10-26.

 The code was copied from a Chromium svn checkout using the 'update.sh' script which then applies patches for our build and to add dynamic CPU detection.

-convert.patch: Change Chromium code to build using Mozilla build system.
-               Add runtime CPU detection for MMX
-               Move default C implementation to work on all platforms.
-               Change Chromium code to allow a picture region.
-               The YUV conversion will convert within this 
-               picture region only.
-               Add YCbCr 4:4:4 support
+convert.patch contains the following changes:

-arm.patch: Add YCbCr to rgb16_565 conversion support, bug 616469
+  * Change Chromium code to build using Mozilla build system.
+  * Add runtime CPU detection for MMX
+  * Move default C implementation to work on all platforms.
+  * Change Chromium code to allow a picture region.
+  * The YUV conversion will convert within this picture region only.
+  * Add YCbCr 4:4:4 support
+  * Bug 616469 - Add YCbCr to rgb16_565 conversion support.
+  * Bug 619178 - Update CPU detection in yuv_convert to new SSE.h interface.
+  * Bug 616778 - Split yuv_convert FilterRows vectorized code into separate files so it can
+    be properly guarded with cpuid() calls.
--- a/gfx/ycbcr/arm.patch
+++ b/gfx/ycbcr/arm.patch
@ -1,347 +0,0 @@
-diff --git a/gfx/ycbcr/Makefile.in b/gfx/ycbcr/Makefile.in
--- a/gfx/ycbcr/Makefile.in
-+++ b/gfx/ycbcr/Makefile.in
-@@ -40,16 +40,21 @@ CPPSRCS += yuv_row_posix.cpp \
- else
- CPPSRCS += yuv_row_other.cpp \
-            $(NULL)
- endif # Darwin
- endif # SunOS
- endif # linux
- endif # windows
- 
-+ifeq (arm,$(findstring arm,$(OS_TEST)))
-+CPPSRCS += yuv_convert_arm.cpp \
-+           $(NULL)
-+endif
-+
- EXTRA_DSO_LDOPTS += \
-         $(LIBS_DIR) \
-         $(EXTRA_DSO_LIBS) \
-         $(XPCOM_LIBS) \
-         $(NSPR_LIBS) \
-         $(NULL)
- 
- include $(topsrcdir)/config/rules.mk
-diff --git a/gfx/ycbcr/yuv_convert.cpp b/gfx/ycbcr/yuv_convert.cpp
--- a/gfx/ycbcr/yuv_convert.cpp
-+++ b/gfx/ycbcr/yuv_convert.cpp
-@@ -19,25 +19,56 @@
- #include "yuv_convert.h"
- 
- // Header for low level row functions.
- #include "yuv_row.h"
- #define MOZILLA_SSE_INCLUDE_HEADER_FOR_SSE2
- #define MOZILLA_SSE_INCLUDE_HEADER_FOR_MMX
- #include "mozilla/SSE.h"
- 
-+#ifdef HAVE_YCBCR_TO_RGB565
-+void __attribute((noinline)) yv12_to_rgb565_neon(uint16 *dst, const uint8 *y, const uint8 *u, const uint8 *v, int n, int oddflag);
-+#endif
-+
- namespace mozilla {
- 
- namespace gfx {
-  
- // 16.16 fixed point arithmetic
- const int kFractionBits = 16;
- const int kFractionMax = 1 << kFractionBits;
- const int kFractionMask = ((1 << kFractionBits) - 1);
- 
-+
-+// Convert a frame of YUV to 16 bit RGB565.
-+NS_GFX_(void) ConvertYCbCrToRGB565(const uint8* y_buf,
-+                                  const uint8* u_buf,
-+                                  const uint8* v_buf,
-+                                  uint8* rgb_buf,
-+                                  int pic_x,
-+                                  int pic_y,
-+                                  int pic_width,
-+                                  int pic_height,
-+                                  int y_pitch,
-+                                  int uv_pitch,
-+                                  int rgb_pitch,
-+                                  YUVType yuv_type)
-+{
-+#ifdef HAVE_YCBCR_TO_RGB565
-+  for (int i = 0; i < pic_height; i++) {
-+    yv12_to_rgb565_neon((uint16*)rgb_buf + pic_width * i,
-+                         y_buf + y_pitch * i,
-+                         u_buf + uv_pitch * (i / 2),
-+                         v_buf + uv_pitch * (i / 2),
-+                         pic_width,
-+                         0);
-+  }
-+#endif
-+}
-+
- // Convert a frame of YUV to 32 bit ARGB.
- NS_GFX_(void) ConvertYCbCrToRGB32(const uint8* y_buf,
-                                   const uint8* u_buf,
-                                   const uint8* v_buf,
-                                   uint8* rgb_buf,
-                                   int pic_x,
-                                   int pic_y,
-                                   int pic_width,
-diff --git a/gfx/ycbcr/yuv_convert.h b/gfx/ycbcr/yuv_convert.h
--- a/gfx/ycbcr/yuv_convert.h
-+++ b/gfx/ycbcr/yuv_convert.h
-@@ -2,16 +2,20 @@
- // Use of this source code is governed by a BSD-style license that can be
- // found in the LICENSE file.
- 
- #ifndef MEDIA_BASE_YUV_CONVERT_H_
- #define MEDIA_BASE_YUV_CONVERT_H_
- 
- #include "chromium_types.h"
- #include "gfxCore.h"
-+
-+#ifdef __arm__
-+#define HAVE_YCBCR_TO_RGB565 1
-+#endif
-  
- namespace mozilla {
- 
- namespace gfx {
-  
- // Type of YUV surface.
- // The value of these enums matter as they are used to shift vertical indices.
- enum YUVType {
-@@ -36,16 +40,31 @@ enum Rotate {
- // Filter affects how scaling looks.
- enum ScaleFilter {
-   FILTER_NONE = 0,        // No filter (point sampled).
-   FILTER_BILINEAR_H = 1,  // Bilinear horizontal filter.
-   FILTER_BILINEAR_V = 2,  // Bilinear vertical filter.
-   FILTER_BILINEAR = 3     // Bilinear filter.
- };
- 
-+// Convert a frame of YUV to 16 bit RGB565.
-+// Pass in YV12 formats
-+NS_GFX_(void) ConvertYCbCrToRGB565(const uint8* yplane,
-+                                  const uint8* uplane,
-+                                  const uint8* vplane,
-+                                  uint8* rgbframe,
-+                                  int pic_x,
-+                                  int pic_y,
-+                                  int pic_width,
-+                                  int pic_height,
-+                                  int ystride,
-+                                  int uvstride,
-+                                  int rgbstride,
-+                                  YUVType yuv_type);
-+
- // Convert a frame of YUV to 32 bit ARGB.
- // Pass in YV16/YV12 depending on source format
- NS_GFX_(void) ConvertYCbCrToRGB32(const uint8* yplane,
-                                   const uint8* uplane,
-                                   const uint8* vplane,
-                                   uint8* rgbframe,
-                                   int pic_x,
-                                   int pic_y,
-diff --git a/gfx/ycbcr/yuv_convert_arm.cpp b/gfx/ycbcr/yuv_convert_arm.cpp
-new file mode 100644
--- /dev/null
-+++ b/gfx/ycbcr/yuv_convert_arm.cpp
-@@ -0,0 +1,201 @@
-+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-+// Use of this source code is governed by a BSD-style license that can be
-+// found in the LICENSE file.
-+
-+// contributor Siarhei Siamashka <siarhei.siamashka@gmail.com>
-+
-+#include "yuv_convert.h"
-+
-+void __attribute((noinline)) yv12_to_rgb565_neon(uint16 *dst, const uint8 *y, const uint8 *u, const uint8 *v, int n, int oddflag)
-+{
-+    static __attribute__((aligned(16))) uint16 acc_r[8] = {
-+        22840, 22840, 22840, 22840, 22840, 22840, 22840, 22840,
-+    };
-+    static __attribute__((aligned(16))) uint16 acc_g[8] = {
-+        17312, 17312, 17312, 17312, 17312, 17312, 17312, 17312,
-+    };
-+    static __attribute__((aligned(16))) uint16 acc_b[8] = {
-+        28832, 28832, 28832, 28832, 28832, 28832, 28832, 28832,
-+    };
-+    /*
-+     * Registers:
-+     * q0, q1 : d0, d1, d2, d3  - are used for initial loading of YUV data
-+     * q2     : d4, d5          - are used for storing converted RGB data
-+     * q3     : d6, d7          - are used for temporary storage
-+     *
-+     * q4-q7 - reserved
-+     *
-+     * q8, q9 : d16, d17, d18, d19  - are used for expanded Y data
-+     * q10    : d20, d21
-+     * q11    : d22, d23
-+     * q12    : d24, d25
-+     * q13    : d26, d27
-+     * q13, q14, q15            - various constants (#16, #149, #204, #50, #104, #154)
-+     */
-+    asm volatile (
-+".fpu neon\n"
-+".macro convert_macroblock size\n"
-+/* load up to 16 source pixels */
-+	".if \\size == 16\n"
-+	    "pld [%[y], #64]\n"
-+	    "pld [%[u], #64]\n"
-+	    "pld [%[v], #64]\n"
-+	    "vld1.8 {d1}, [%[y]]!\n"
-+	    "vld1.8 {d3}, [%[y]]!\n"
-+	    "vld1.8 {d0}, [%[u]]!\n"
-+	    "vld1.8 {d2}, [%[v]]!\n"
-+	".elseif \\size == 8\n"
-+	    "vld1.8 {d1}, [%[y]]!\n"
-+	    "vld1.8 {d0[0]}, [%[u]]!\n"
-+	    "vld1.8 {d0[1]}, [%[u]]!\n"
-+	    "vld1.8 {d0[2]}, [%[u]]!\n"
-+	    "vld1.8 {d0[3]}, [%[u]]!\n"
-+	    "vld1.8 {d2[0]}, [%[v]]!\n"
-+	    "vld1.8 {d2[1]}, [%[v]]!\n"
-+	    "vld1.8 {d2[2]}, [%[v]]!\n"
-+	    "vld1.8 {d2[3]}, [%[v]]!\n"
-+	".elseif \\size == 4\n"
-+	    "vld1.8 {d1[0]}, [%[y]]!\n"
-+	    "vld1.8 {d1[1]}, [%[y]]!\n"
-+	    "vld1.8 {d1[2]}, [%[y]]!\n"
-+	    "vld1.8 {d1[3]}, [%[y]]!\n"
-+	    "vld1.8 {d0[0]}, [%[u]]!\n"
-+	    "vld1.8 {d0[1]}, [%[u]]!\n"
-+	    "vld1.8 {d2[0]}, [%[v]]!\n"
-+	    "vld1.8 {d2[1]}, [%[v]]!\n"
-+	".elseif \\size == 2\n"
-+	    "vld1.8 {d1[0]}, [%[y]]!\n"
-+	    "vld1.8 {d1[1]}, [%[y]]!\n"
-+	    "vld1.8 {d0[0]}, [%[u]]!\n"
-+	    "vld1.8 {d2[0]}, [%[v]]!\n"
-+	".elseif \\size == 1\n"
-+	    "vld1.8 {d1[0]}, [%[y]]!\n"
-+	    "vld1.8 {d0[0]}, [%[u]]!\n"
-+	    "vld1.8 {d2[0]}, [%[v]]!\n"
-+	".else\n"
-+	    ".error \"unsupported macroblock size\"\n"
-+	".endif\n"
-+
-+        /* d1 - Y data (first 8 bytes) */
-+        /* d3 - Y data (next 8 bytes) */
-+        /* d0 - U data, d2 - V data */
-+
-+	/* split even and odd Y color components */
-+	"vuzp.8      d1, d3\n"                       /* d1 - evenY, d3 - oddY */
-+	/* clip upper and lower boundaries */
-+	"vqadd.u8    q0, q0, q4\n"
-+	"vqadd.u8    q1, q1, q4\n"
-+	"vqsub.u8    q0, q0, q5\n"
-+	"vqsub.u8    q1, q1, q5\n"
-+
-+	"vshr.u8     d4, d2, #1\n"                   /* d4 = V >> 1 */
-+
-+	"vmull.u8    q8, d1, d27\n"                  /* q8 = evenY * 149 */
-+	"vmull.u8    q9, d3, d27\n"                  /* q9 = oddY * 149 */
-+
-+	"vld1.16     {d20, d21}, [%[acc_r], :128]\n" /* q10 - initialize accumulator for red */
-+	"vsubw.u8    q10, q10, d4\n"                 /* red acc -= (V >> 1) */
-+	"vmlsl.u8    q10, d2, d28\n"                 /* red acc -= V * 204 */
-+	"vld1.16     {d22, d23}, [%[acc_g], :128]\n" /* q11 - initialize accumulator for green */
-+	"vmlsl.u8    q11, d2, d30\n"                 /* green acc -= V * 104 */
-+	"vmlsl.u8    q11, d0, d29\n"                 /* green acc -= U * 50 */
-+	"vld1.16     {d24, d25}, [%[acc_b], :128]\n" /* q12 - initialize accumulator for blue */
-+	"vmlsl.u8    q12, d0, d30\n"                 /* blue acc -= U * 104 */
-+	"vmlsl.u8    q12, d0, d31\n"                 /* blue acc -= U * 154 */
-+
-+	"vhsub.s16   q3, q8, q10\n"                  /* calculate even red components */
-+	"vhsub.s16   q10, q9, q10\n"                 /* calculate odd red components */
-+	"vqshrun.s16 d0, q3, #6\n"                   /* right shift, narrow and saturate even red components */
-+	"vqshrun.s16 d3, q10, #6\n"                  /* right shift, narrow and saturate odd red components */
-+
-+	"vhadd.s16   q3, q8, q11\n"                  /* calculate even green components */
-+	"vhadd.s16   q11, q9, q11\n"                 /* calculate odd green components */
-+	"vqshrun.s16 d1, q3, #6\n"                   /* right shift, narrow and saturate even green components */
-+	"vqshrun.s16 d4, q11, #6\n"                  /* right shift, narrow and saturate odd green components */
-+
-+	"vhsub.s16   q3, q8, q12\n"                  /* calculate even blue components */
-+	"vhsub.s16   q12, q9, q12\n"                 /* calculate odd blue components */
-+	"vqshrun.s16 d2, q3, #6\n"                   /* right shift, narrow and saturate even blue components */
-+	"vqshrun.s16 d5, q12, #6\n"                  /* right shift, narrow and saturate odd blue components */
-+
-+	"vzip.8      d0, d3\n"                       /* join even and odd red components */
-+	"vzip.8      d1, d4\n"                       /* join even and odd green components */
-+	"vzip.8      d2, d5\n"                       /* join even and odd blue components */
-+
-+	"vshll.u8    q3, d0, #8\n\t"
-+	"vshll.u8    q8, d1, #8\n\t"
-+	"vshll.u8    q9, d2, #8\n\t"
-+	"vsri.u16    q3, q8, #5\t\n"
-+	"vsri.u16    q3, q9, #11\t\n"
-+	/* store pixel data to memory */
-+	".if \\size == 16\n"
-+	"    vst1.16 {d6, d7}, [%[dst]]!\n"
-+	"    vshll.u8    q3, d3, #8\n\t"
-+	"    vshll.u8    q8, d4, #8\n\t"
-+	"    vshll.u8    q9, d5, #8\n\t"
-+	"    vsri.u16    q3, q8, #5\t\n"
-+	"    vsri.u16    q3, q9, #11\t\n"
-+	"    vst1.16 {d6, d7}, [%[dst]]!\n"
-+	".elseif \\size == 8\n"
-+	"    vst1.16 {d6, d7}, [%[dst]]!\n"
-+	".elseif \\size == 4\n"
-+	"    vst1.16 {d6}, [%[dst]]!\n"
-+	".elseif \\size == 2\n"
-+	"    vst1.16 {d6[0]}, [%[dst]]!\n"
-+	"    vst1.16 {d6[1]}, [%[dst]]!\n"
-+	".elseif \\size == 1\n"
-+	"    vst1.16 {d6[0]}, [%[dst]]!\n"
-+	".endif\n"
-+	".endm\n"
-+
-+	"vmov.u8     d8, #15\n" /* add this to U/V to saturate upper boundary */
-+	"vmov.u8     d9, #20\n" /* add this to Y to saturate upper boundary */
-+	"vmov.u8     d10, #31\n" /* sub this from U/V to saturate lower boundary */
-+	"vmov.u8     d11, #36\n" /* sub this from Y to saturate lower boundary */
-+
-+	"vmov.u8     d26, #16\n"
-+	"vmov.u8     d27, #149\n"
-+	"vmov.u8     d28, #204\n"
-+	"vmov.u8     d29, #50\n"
-+	"vmov.u8     d30, #104\n"
-+	"vmov.u8     d31, #154\n"
-+
-+	"cmp         %[oddflag], #0\n"
-+	"beq         1f\n"
-+	"convert_macroblock 1\n"
-+	"sub         %[n], %[n], #1\n"
-+    "1:\n"
-+	"subs        %[n], %[n], #16\n"
-+	"blt         2f\n"
-+    "1:\n"
-+	"convert_macroblock 16\n"
-+	"subs        %[n], %[n], #16\n"
-+	"bge         1b\n"
-+    "2:\n"
-+	"tst         %[n], #8\n"
-+	"beq         3f\n"
-+	"convert_macroblock 8\n"
-+    "3:\n"
-+	"tst         %[n], #4\n"
-+	"beq         4f\n"
-+	"convert_macroblock 4\n"
-+    "4:\n"
-+	"tst         %[n], #2\n"
-+	"beq         5f\n"
-+	"convert_macroblock 2\n"
-+    "5:\n"
-+	"tst         %[n], #1\n"
-+	"beq         6f\n"
-+	"convert_macroblock 1\n"
-+    "6:\n"
-+	".purgem convert_macroblock\n"
-+	: [y] "+&r" (y), [u] "+&r" (u), [v] "+&r" (v), [dst] "+&r" (dst), [n] "+&r" (n)
-+	: [acc_r] "r" (&acc_r[0]), [acc_g] "r" (&acc_g[0]), [acc_b] "r" (&acc_b[0]),
-+	  [oddflag] "r" (oddflag)
-+	: "cc", "memory",
-+	  "d0",  "d1",  "d2",  "d3",  "d4",  "d5",  "d6",  "d7",
-+	  "d8",  "d9",  "d10", "d11", /* "d12", "d13", "d14", "d15", */
-+	  "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
-+	  "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31"
-+    );
-+}
--- a/gfx/ycbcr/convert.patch
+++ b/gfx/ycbcr/convert.patch
--- a/gfx/ycbcr/update.sh
+++ b/gfx/ycbcr/update.sh
@ -7,4 +7,3 @@ cp $1/media/base/yuv_row_posix.cc yuv_row_posix.cpp
 cp $1/media/base/yuv_row_win.cc yuv_row_win.cpp
 cp $1/media/base/yuv_row_posix.cc yuv_row_c.cpp
 patch -p3 <convert.patch
-patch -p3 <arm.patch