mirror of
https://gitlab.winehq.org/wine/wine-gecko.git
synced 2024-09-13 09:24:08 -07:00
Bug 616778 - Part 2: Update NPOTB files in gfx/ycbcr. r=kinetik, a=NPOTB
--HG-- extra : rebase_source : 69e306bbb1aba08d2224a83b14086d2efef8f955
This commit is contained in:
parent
db12052004
commit
690116173e
@ -2,16 +2,19 @@ This color conversion code is from the Chromium open source project available he
|
||||
|
||||
http://code.google.com/chromium/
|
||||
|
||||
The code comes from svn revision 638400 on 2010-10-26.
|
||||
The code comes from svn revision 63840 on 2010-10-26.
|
||||
|
||||
The code was copied from a Chromium svn checkout using the 'update.sh' script which then applies patches for our build and to add dynamic CPU detection.
|
||||
|
||||
convert.patch: Change Chromium code to build using Mozilla build system.
|
||||
Add runtime CPU detection for MMX
|
||||
Move default C implementation to work on all platforms.
|
||||
Change Chromium code to allow a picture region.
|
||||
The YUV conversion will convert within this
|
||||
picture region only.
|
||||
Add YCbCr 4:4:4 support
|
||||
convert.patch contains the following changes:
|
||||
|
||||
arm.patch: Add YCbCr to rgb16_565 conversion support, bug 616469
|
||||
* Change Chromium code to build using Mozilla build system.
|
||||
* Add runtime CPU detection for MMX
|
||||
* Move default C implementation to work on all platforms.
|
||||
* Change Chromium code to allow a picture region.
|
||||
* The YUV conversion will convert within this picture region only.
|
||||
* Add YCbCr 4:4:4 support
|
||||
* Bug 616469 - Add YCbCr to rgb16_565 conversion support.
|
||||
* Bug 619178 - Update CPU detection in yuv_convert to new SSE.h interface.
|
||||
* Bug 616778 - Split yuv_convert FilterRows vectorized code into separate files so it can
|
||||
be properly guarded with cpuid() calls.
|
||||
|
@ -1,347 +0,0 @@
|
||||
diff --git a/gfx/ycbcr/Makefile.in b/gfx/ycbcr/Makefile.in
|
||||
--- a/gfx/ycbcr/Makefile.in
|
||||
+++ b/gfx/ycbcr/Makefile.in
|
||||
@@ -40,16 +40,21 @@ CPPSRCS += yuv_row_posix.cpp \
|
||||
else
|
||||
CPPSRCS += yuv_row_other.cpp \
|
||||
$(NULL)
|
||||
endif # Darwin
|
||||
endif # SunOS
|
||||
endif # linux
|
||||
endif # windows
|
||||
|
||||
+ifeq (arm,$(findstring arm,$(OS_TEST)))
|
||||
+CPPSRCS += yuv_convert_arm.cpp \
|
||||
+ $(NULL)
|
||||
+endif
|
||||
+
|
||||
EXTRA_DSO_LDOPTS += \
|
||||
$(LIBS_DIR) \
|
||||
$(EXTRA_DSO_LIBS) \
|
||||
$(XPCOM_LIBS) \
|
||||
$(NSPR_LIBS) \
|
||||
$(NULL)
|
||||
|
||||
include $(topsrcdir)/config/rules.mk
|
||||
diff --git a/gfx/ycbcr/yuv_convert.cpp b/gfx/ycbcr/yuv_convert.cpp
|
||||
--- a/gfx/ycbcr/yuv_convert.cpp
|
||||
+++ b/gfx/ycbcr/yuv_convert.cpp
|
||||
@@ -19,25 +19,56 @@
|
||||
#include "yuv_convert.h"
|
||||
|
||||
// Header for low level row functions.
|
||||
#include "yuv_row.h"
|
||||
#define MOZILLA_SSE_INCLUDE_HEADER_FOR_SSE2
|
||||
#define MOZILLA_SSE_INCLUDE_HEADER_FOR_MMX
|
||||
#include "mozilla/SSE.h"
|
||||
|
||||
+#ifdef HAVE_YCBCR_TO_RGB565
|
||||
+void __attribute((noinline)) yv12_to_rgb565_neon(uint16 *dst, const uint8 *y, const uint8 *u, const uint8 *v, int n, int oddflag);
|
||||
+#endif
|
||||
+
|
||||
namespace mozilla {
|
||||
|
||||
namespace gfx {
|
||||
|
||||
// 16.16 fixed point arithmetic
|
||||
const int kFractionBits = 16;
|
||||
const int kFractionMax = 1 << kFractionBits;
|
||||
const int kFractionMask = ((1 << kFractionBits) - 1);
|
||||
|
||||
+
|
||||
+// Convert a frame of YUV to 16 bit RGB565.
|
||||
+NS_GFX_(void) ConvertYCbCrToRGB565(const uint8* y_buf,
|
||||
+ const uint8* u_buf,
|
||||
+ const uint8* v_buf,
|
||||
+ uint8* rgb_buf,
|
||||
+ int pic_x,
|
||||
+ int pic_y,
|
||||
+ int pic_width,
|
||||
+ int pic_height,
|
||||
+ int y_pitch,
|
||||
+ int uv_pitch,
|
||||
+ int rgb_pitch,
|
||||
+ YUVType yuv_type)
|
||||
+{
|
||||
+#ifdef HAVE_YCBCR_TO_RGB565
|
||||
+ for (int i = 0; i < pic_height; i++) {
|
||||
+ yv12_to_rgb565_neon((uint16*)rgb_buf + pic_width * i,
|
||||
+ y_buf + y_pitch * i,
|
||||
+ u_buf + uv_pitch * (i / 2),
|
||||
+ v_buf + uv_pitch * (i / 2),
|
||||
+ pic_width,
|
||||
+ 0);
|
||||
+ }
|
||||
+#endif
|
||||
+}
|
||||
+
|
||||
// Convert a frame of YUV to 32 bit ARGB.
|
||||
NS_GFX_(void) ConvertYCbCrToRGB32(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int pic_x,
|
||||
int pic_y,
|
||||
int pic_width,
|
||||
diff --git a/gfx/ycbcr/yuv_convert.h b/gfx/ycbcr/yuv_convert.h
|
||||
--- a/gfx/ycbcr/yuv_convert.h
|
||||
+++ b/gfx/ycbcr/yuv_convert.h
|
||||
@@ -2,16 +2,20 @@
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#ifndef MEDIA_BASE_YUV_CONVERT_H_
|
||||
#define MEDIA_BASE_YUV_CONVERT_H_
|
||||
|
||||
#include "chromium_types.h"
|
||||
#include "gfxCore.h"
|
||||
+
|
||||
+#ifdef __arm__
|
||||
+#define HAVE_YCBCR_TO_RGB565 1
|
||||
+#endif
|
||||
|
||||
namespace mozilla {
|
||||
|
||||
namespace gfx {
|
||||
|
||||
// Type of YUV surface.
|
||||
// The value of these enums matter as they are used to shift vertical indices.
|
||||
enum YUVType {
|
||||
@@ -36,16 +40,31 @@ enum Rotate {
|
||||
// Filter affects how scaling looks.
|
||||
enum ScaleFilter {
|
||||
FILTER_NONE = 0, // No filter (point sampled).
|
||||
FILTER_BILINEAR_H = 1, // Bilinear horizontal filter.
|
||||
FILTER_BILINEAR_V = 2, // Bilinear vertical filter.
|
||||
FILTER_BILINEAR = 3 // Bilinear filter.
|
||||
};
|
||||
|
||||
+// Convert a frame of YUV to 16 bit RGB565.
|
||||
+// Pass in YV12 formats
|
||||
+NS_GFX_(void) ConvertYCbCrToRGB565(const uint8* yplane,
|
||||
+ const uint8* uplane,
|
||||
+ const uint8* vplane,
|
||||
+ uint8* rgbframe,
|
||||
+ int pic_x,
|
||||
+ int pic_y,
|
||||
+ int pic_width,
|
||||
+ int pic_height,
|
||||
+ int ystride,
|
||||
+ int uvstride,
|
||||
+ int rgbstride,
|
||||
+ YUVType yuv_type);
|
||||
+
|
||||
// Convert a frame of YUV to 32 bit ARGB.
|
||||
// Pass in YV16/YV12 depending on source format
|
||||
NS_GFX_(void) ConvertYCbCrToRGB32(const uint8* yplane,
|
||||
const uint8* uplane,
|
||||
const uint8* vplane,
|
||||
uint8* rgbframe,
|
||||
int pic_x,
|
||||
int pic_y,
|
||||
diff --git a/gfx/ycbcr/yuv_convert_arm.cpp b/gfx/ycbcr/yuv_convert_arm.cpp
|
||||
new file mode 100644
|
||||
--- /dev/null
|
||||
+++ b/gfx/ycbcr/yuv_convert_arm.cpp
|
||||
@@ -0,0 +1,201 @@
|
||||
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
|
||||
+// Use of this source code is governed by a BSD-style license that can be
|
||||
+// found in the LICENSE file.
|
||||
+
|
||||
+// contributor Siarhei Siamashka <siarhei.siamashka@gmail.com>
|
||||
+
|
||||
+#include "yuv_convert.h"
|
||||
+
|
||||
+void __attribute((noinline)) yv12_to_rgb565_neon(uint16 *dst, const uint8 *y, const uint8 *u, const uint8 *v, int n, int oddflag)
|
||||
+{
|
||||
+ static __attribute__((aligned(16))) uint16 acc_r[8] = {
|
||||
+ 22840, 22840, 22840, 22840, 22840, 22840, 22840, 22840,
|
||||
+ };
|
||||
+ static __attribute__((aligned(16))) uint16 acc_g[8] = {
|
||||
+ 17312, 17312, 17312, 17312, 17312, 17312, 17312, 17312,
|
||||
+ };
|
||||
+ static __attribute__((aligned(16))) uint16 acc_b[8] = {
|
||||
+ 28832, 28832, 28832, 28832, 28832, 28832, 28832, 28832,
|
||||
+ };
|
||||
+ /*
|
||||
+ * Registers:
|
||||
+ * q0, q1 : d0, d1, d2, d3 - are used for initial loading of YUV data
|
||||
+ * q2 : d4, d5 - are used for storing converted RGB data
|
||||
+ * q3 : d6, d7 - are used for temporary storage
|
||||
+ *
|
||||
+ * q4-q7 - reserved
|
||||
+ *
|
||||
+ * q8, q9 : d16, d17, d18, d19 - are used for expanded Y data
|
||||
+ * q10 : d20, d21
|
||||
+ * q11 : d22, d23
|
||||
+ * q12 : d24, d25
|
||||
+ * q13 : d26, d27
|
||||
+ * q13, q14, q15 - various constants (#16, #149, #204, #50, #104, #154)
|
||||
+ */
|
||||
+ asm volatile (
|
||||
+".fpu neon\n"
|
||||
+".macro convert_macroblock size\n"
|
||||
+/* load up to 16 source pixels */
|
||||
+ ".if \\size == 16\n"
|
||||
+ "pld [%[y], #64]\n"
|
||||
+ "pld [%[u], #64]\n"
|
||||
+ "pld [%[v], #64]\n"
|
||||
+ "vld1.8 {d1}, [%[y]]!\n"
|
||||
+ "vld1.8 {d3}, [%[y]]!\n"
|
||||
+ "vld1.8 {d0}, [%[u]]!\n"
|
||||
+ "vld1.8 {d2}, [%[v]]!\n"
|
||||
+ ".elseif \\size == 8\n"
|
||||
+ "vld1.8 {d1}, [%[y]]!\n"
|
||||
+ "vld1.8 {d0[0]}, [%[u]]!\n"
|
||||
+ "vld1.8 {d0[1]}, [%[u]]!\n"
|
||||
+ "vld1.8 {d0[2]}, [%[u]]!\n"
|
||||
+ "vld1.8 {d0[3]}, [%[u]]!\n"
|
||||
+ "vld1.8 {d2[0]}, [%[v]]!\n"
|
||||
+ "vld1.8 {d2[1]}, [%[v]]!\n"
|
||||
+ "vld1.8 {d2[2]}, [%[v]]!\n"
|
||||
+ "vld1.8 {d2[3]}, [%[v]]!\n"
|
||||
+ ".elseif \\size == 4\n"
|
||||
+ "vld1.8 {d1[0]}, [%[y]]!\n"
|
||||
+ "vld1.8 {d1[1]}, [%[y]]!\n"
|
||||
+ "vld1.8 {d1[2]}, [%[y]]!\n"
|
||||
+ "vld1.8 {d1[3]}, [%[y]]!\n"
|
||||
+ "vld1.8 {d0[0]}, [%[u]]!\n"
|
||||
+ "vld1.8 {d0[1]}, [%[u]]!\n"
|
||||
+ "vld1.8 {d2[0]}, [%[v]]!\n"
|
||||
+ "vld1.8 {d2[1]}, [%[v]]!\n"
|
||||
+ ".elseif \\size == 2\n"
|
||||
+ "vld1.8 {d1[0]}, [%[y]]!\n"
|
||||
+ "vld1.8 {d1[1]}, [%[y]]!\n"
|
||||
+ "vld1.8 {d0[0]}, [%[u]]!\n"
|
||||
+ "vld1.8 {d2[0]}, [%[v]]!\n"
|
||||
+ ".elseif \\size == 1\n"
|
||||
+ "vld1.8 {d1[0]}, [%[y]]!\n"
|
||||
+ "vld1.8 {d0[0]}, [%[u]]!\n"
|
||||
+ "vld1.8 {d2[0]}, [%[v]]!\n"
|
||||
+ ".else\n"
|
||||
+ ".error \"unsupported macroblock size\"\n"
|
||||
+ ".endif\n"
|
||||
+
|
||||
+ /* d1 - Y data (first 8 bytes) */
|
||||
+ /* d3 - Y data (next 8 bytes) */
|
||||
+ /* d0 - U data, d2 - V data */
|
||||
+
|
||||
+ /* split even and odd Y color components */
|
||||
+ "vuzp.8 d1, d3\n" /* d1 - evenY, d3 - oddY */
|
||||
+ /* clip upper and lower boundaries */
|
||||
+ "vqadd.u8 q0, q0, q4\n"
|
||||
+ "vqadd.u8 q1, q1, q4\n"
|
||||
+ "vqsub.u8 q0, q0, q5\n"
|
||||
+ "vqsub.u8 q1, q1, q5\n"
|
||||
+
|
||||
+ "vshr.u8 d4, d2, #1\n" /* d4 = V >> 1 */
|
||||
+
|
||||
+ "vmull.u8 q8, d1, d27\n" /* q8 = evenY * 149 */
|
||||
+ "vmull.u8 q9, d3, d27\n" /* q9 = oddY * 149 */
|
||||
+
|
||||
+ "vld1.16 {d20, d21}, [%[acc_r], :128]\n" /* q10 - initialize accumulator for red */
|
||||
+ "vsubw.u8 q10, q10, d4\n" /* red acc -= (V >> 1) */
|
||||
+ "vmlsl.u8 q10, d2, d28\n" /* red acc -= V * 204 */
|
||||
+ "vld1.16 {d22, d23}, [%[acc_g], :128]\n" /* q11 - initialize accumulator for green */
|
||||
+ "vmlsl.u8 q11, d2, d30\n" /* green acc -= V * 104 */
|
||||
+ "vmlsl.u8 q11, d0, d29\n" /* green acc -= U * 50 */
|
||||
+ "vld1.16 {d24, d25}, [%[acc_b], :128]\n" /* q12 - initialize accumulator for blue */
|
||||
+ "vmlsl.u8 q12, d0, d30\n" /* blue acc -= U * 104 */
|
||||
+ "vmlsl.u8 q12, d0, d31\n" /* blue acc -= U * 154 */
|
||||
+
|
||||
+ "vhsub.s16 q3, q8, q10\n" /* calculate even red components */
|
||||
+ "vhsub.s16 q10, q9, q10\n" /* calculate odd red components */
|
||||
+ "vqshrun.s16 d0, q3, #6\n" /* right shift, narrow and saturate even red components */
|
||||
+ "vqshrun.s16 d3, q10, #6\n" /* right shift, narrow and saturate odd red components */
|
||||
+
|
||||
+ "vhadd.s16 q3, q8, q11\n" /* calculate even green components */
|
||||
+ "vhadd.s16 q11, q9, q11\n" /* calculate odd green components */
|
||||
+ "vqshrun.s16 d1, q3, #6\n" /* right shift, narrow and saturate even green components */
|
||||
+ "vqshrun.s16 d4, q11, #6\n" /* right shift, narrow and saturate odd green components */
|
||||
+
|
||||
+ "vhsub.s16 q3, q8, q12\n" /* calculate even blue components */
|
||||
+ "vhsub.s16 q12, q9, q12\n" /* calculate odd blue components */
|
||||
+ "vqshrun.s16 d2, q3, #6\n" /* right shift, narrow and saturate even blue components */
|
||||
+ "vqshrun.s16 d5, q12, #6\n" /* right shift, narrow and saturate odd blue components */
|
||||
+
|
||||
+ "vzip.8 d0, d3\n" /* join even and odd red components */
|
||||
+ "vzip.8 d1, d4\n" /* join even and odd green components */
|
||||
+ "vzip.8 d2, d5\n" /* join even and odd blue components */
|
||||
+
|
||||
+ "vshll.u8 q3, d0, #8\n\t"
|
||||
+ "vshll.u8 q8, d1, #8\n\t"
|
||||
+ "vshll.u8 q9, d2, #8\n\t"
|
||||
+ "vsri.u16 q3, q8, #5\t\n"
|
||||
+ "vsri.u16 q3, q9, #11\t\n"
|
||||
+ /* store pixel data to memory */
|
||||
+ ".if \\size == 16\n"
|
||||
+ " vst1.16 {d6, d7}, [%[dst]]!\n"
|
||||
+ " vshll.u8 q3, d3, #8\n\t"
|
||||
+ " vshll.u8 q8, d4, #8\n\t"
|
||||
+ " vshll.u8 q9, d5, #8\n\t"
|
||||
+ " vsri.u16 q3, q8, #5\t\n"
|
||||
+ " vsri.u16 q3, q9, #11\t\n"
|
||||
+ " vst1.16 {d6, d7}, [%[dst]]!\n"
|
||||
+ ".elseif \\size == 8\n"
|
||||
+ " vst1.16 {d6, d7}, [%[dst]]!\n"
|
||||
+ ".elseif \\size == 4\n"
|
||||
+ " vst1.16 {d6}, [%[dst]]!\n"
|
||||
+ ".elseif \\size == 2\n"
|
||||
+ " vst1.16 {d6[0]}, [%[dst]]!\n"
|
||||
+ " vst1.16 {d6[1]}, [%[dst]]!\n"
|
||||
+ ".elseif \\size == 1\n"
|
||||
+ " vst1.16 {d6[0]}, [%[dst]]!\n"
|
||||
+ ".endif\n"
|
||||
+ ".endm\n"
|
||||
+
|
||||
+ "vmov.u8 d8, #15\n" /* add this to U/V to saturate upper boundary */
|
||||
+ "vmov.u8 d9, #20\n" /* add this to Y to saturate upper boundary */
|
||||
+ "vmov.u8 d10, #31\n" /* sub this from U/V to saturate lower boundary */
|
||||
+ "vmov.u8 d11, #36\n" /* sub this from Y to saturate lower boundary */
|
||||
+
|
||||
+ "vmov.u8 d26, #16\n"
|
||||
+ "vmov.u8 d27, #149\n"
|
||||
+ "vmov.u8 d28, #204\n"
|
||||
+ "vmov.u8 d29, #50\n"
|
||||
+ "vmov.u8 d30, #104\n"
|
||||
+ "vmov.u8 d31, #154\n"
|
||||
+
|
||||
+ "cmp %[oddflag], #0\n"
|
||||
+ "beq 1f\n"
|
||||
+ "convert_macroblock 1\n"
|
||||
+ "sub %[n], %[n], #1\n"
|
||||
+ "1:\n"
|
||||
+ "subs %[n], %[n], #16\n"
|
||||
+ "blt 2f\n"
|
||||
+ "1:\n"
|
||||
+ "convert_macroblock 16\n"
|
||||
+ "subs %[n], %[n], #16\n"
|
||||
+ "bge 1b\n"
|
||||
+ "2:\n"
|
||||
+ "tst %[n], #8\n"
|
||||
+ "beq 3f\n"
|
||||
+ "convert_macroblock 8\n"
|
||||
+ "3:\n"
|
||||
+ "tst %[n], #4\n"
|
||||
+ "beq 4f\n"
|
||||
+ "convert_macroblock 4\n"
|
||||
+ "4:\n"
|
||||
+ "tst %[n], #2\n"
|
||||
+ "beq 5f\n"
|
||||
+ "convert_macroblock 2\n"
|
||||
+ "5:\n"
|
||||
+ "tst %[n], #1\n"
|
||||
+ "beq 6f\n"
|
||||
+ "convert_macroblock 1\n"
|
||||
+ "6:\n"
|
||||
+ ".purgem convert_macroblock\n"
|
||||
+ : [y] "+&r" (y), [u] "+&r" (u), [v] "+&r" (v), [dst] "+&r" (dst), [n] "+&r" (n)
|
||||
+ : [acc_r] "r" (&acc_r[0]), [acc_g] "r" (&acc_g[0]), [acc_b] "r" (&acc_b[0]),
|
||||
+ [oddflag] "r" (oddflag)
|
||||
+ : "cc", "memory",
|
||||
+ "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
|
||||
+ "d8", "d9", "d10", "d11", /* "d12", "d13", "d14", "d15", */
|
||||
+ "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
|
||||
+ "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31"
|
||||
+ );
|
||||
+}
|
File diff suppressed because it is too large
Load Diff
@ -7,4 +7,3 @@ cp $1/media/base/yuv_row_posix.cc yuv_row_posix.cpp
|
||||
cp $1/media/base/yuv_row_win.cc yuv_row_win.cpp
|
||||
cp $1/media/base/yuv_row_posix.cc yuv_row_c.cpp
|
||||
patch -p3 <convert.patch
|
||||
patch -p3 <arm.patch
|
||||
|
Loading…
Reference in New Issue
Block a user