mirror of
https://gitlab.winehq.org/wine/wine-gecko.git
synced 2024-09-13 09:24:08 -07:00
5b18a38cd7
--HG-- rename : media/webrtc/trunk/third_party/libyuv/AUTHORS => media/libyuv/AUTHORS rename : media/webrtc/trunk/third_party/libyuv/Android.mk => media/libyuv/Android.mk rename : media/webrtc/trunk/third_party/libyuv/DEPS => media/libyuv/DEPS rename : media/webrtc/trunk/third_party/libyuv/LICENSE => media/libyuv/LICENSE rename : media/webrtc/trunk/third_party/libyuv/LICENSE_THIRD_PARTY => media/libyuv/LICENSE_THIRD_PARTY rename : media/webrtc/trunk/third_party/libyuv/OWNERS => media/libyuv/OWNERS rename : media/webrtc/trunk/third_party/libyuv/PATENTS => media/libyuv/PATENTS rename : media/webrtc/trunk/third_party/libyuv/README.chromium => media/libyuv/README.chromium rename : media/webrtc/trunk/third_party/libyuv/all.gyp => media/libyuv/all.gyp rename : media/webrtc/trunk/third_party/libyuv/codereview.settings => media/libyuv/codereview.settings rename : media/webrtc/trunk/third_party/libyuv/include/libyuv.h => media/libyuv/include/libyuv.h rename : media/webrtc/trunk/third_party/libyuv/include/libyuv/basic_types.h => media/libyuv/include/libyuv/basic_types.h rename : media/webrtc/trunk/third_party/libyuv/include/libyuv/compare.h => media/libyuv/include/libyuv/compare.h rename : media/webrtc/trunk/third_party/libyuv/include/libyuv/convert.h => media/libyuv/include/libyuv/convert.h rename : media/webrtc/trunk/third_party/libyuv/include/libyuv/convert_argb.h => media/libyuv/include/libyuv/convert_argb.h rename : media/webrtc/trunk/third_party/libyuv/include/libyuv/convert_from.h => media/libyuv/include/libyuv/convert_from.h rename : media/webrtc/trunk/third_party/libyuv/include/libyuv/convert_from_argb.h => media/libyuv/include/libyuv/convert_from_argb.h rename : media/webrtc/trunk/third_party/libyuv/include/libyuv/cpu_id.h => media/libyuv/include/libyuv/cpu_id.h rename : media/webrtc/trunk/third_party/libyuv/include/libyuv/format_conversion.h => media/libyuv/include/libyuv/format_conversion.h rename : media/webrtc/trunk/third_party/libyuv/include/libyuv/mjpeg_decoder.h => media/libyuv/include/libyuv/mjpeg_decoder.h rename : media/webrtc/trunk/third_party/libyuv/include/libyuv/planar_functions.h => media/libyuv/include/libyuv/planar_functions.h rename : media/webrtc/trunk/third_party/libyuv/include/libyuv/rotate.h => media/libyuv/include/libyuv/rotate.h rename : media/webrtc/trunk/third_party/libyuv/include/libyuv/rotate_argb.h => media/libyuv/include/libyuv/rotate_argb.h rename : media/webrtc/trunk/third_party/libyuv/include/libyuv/row.h => media/libyuv/include/libyuv/row.h rename : media/webrtc/trunk/third_party/libyuv/include/libyuv/scale.h => media/libyuv/include/libyuv/scale.h rename : media/webrtc/trunk/third_party/libyuv/include/libyuv/scale_argb.h => media/libyuv/include/libyuv/scale_argb.h rename : media/webrtc/trunk/third_party/libyuv/include/libyuv/scale_row.h => media/libyuv/include/libyuv/scale_row.h rename : media/webrtc/trunk/third_party/libyuv/include/libyuv/version.h => media/libyuv/include/libyuv/version.h rename : media/webrtc/trunk/third_party/libyuv/include/libyuv/video_common.h => media/libyuv/include/libyuv/video_common.h rename : media/webrtc/trunk/third_party/libyuv/libyuv.gyp => media/libyuv/libyuv.gyp rename : media/webrtc/trunk/third_party/libyuv/libyuv.gypi => media/libyuv/libyuv.gypi rename : media/webrtc/trunk/third_party/libyuv/libyuv_nacl.gyp => media/libyuv/libyuv_nacl.gyp rename : media/webrtc/trunk/third_party/libyuv/libyuv_test.gyp => media/libyuv/libyuv_test.gyp rename : media/webrtc/trunk/third_party/libyuv/linux.mk => media/libyuv/linux.mk rename : media/webrtc/trunk/third_party/libyuv/public.mk => media/libyuv/public.mk rename : media/webrtc/trunk/third_party/libyuv/source/compare.cc => media/libyuv/source/compare.cc rename : media/webrtc/trunk/third_party/libyuv/source/compare_common.cc => media/libyuv/source/compare_common.cc rename : media/webrtc/trunk/third_party/libyuv/source/compare_neon.cc => media/libyuv/source/compare_neon.cc rename : media/webrtc/trunk/third_party/libyuv/source/compare_posix.cc => media/libyuv/source/compare_posix.cc rename : media/webrtc/trunk/third_party/libyuv/source/compare_win.cc => media/libyuv/source/compare_win.cc rename : media/webrtc/trunk/third_party/libyuv/source/convert.cc => media/libyuv/source/convert.cc rename : media/webrtc/trunk/third_party/libyuv/source/convert_argb.cc => media/libyuv/source/convert_argb.cc rename : media/webrtc/trunk/third_party/libyuv/source/convert_from.cc => media/libyuv/source/convert_from.cc rename : media/webrtc/trunk/third_party/libyuv/source/convert_from_argb.cc => media/libyuv/source/convert_from_argb.cc rename : media/webrtc/trunk/third_party/libyuv/source/convert_jpeg.cc => media/libyuv/source/convert_jpeg.cc rename : media/webrtc/trunk/third_party/libyuv/source/convert_to_argb.cc => media/libyuv/source/convert_to_argb.cc rename : media/webrtc/trunk/third_party/libyuv/source/convert_to_i420.cc => media/libyuv/source/convert_to_i420.cc rename : media/webrtc/trunk/third_party/libyuv/source/cpu_id.cc => media/libyuv/source/cpu_id.cc rename : media/webrtc/trunk/third_party/libyuv/source/format_conversion.cc => media/libyuv/source/format_conversion.cc rename : media/webrtc/trunk/third_party/libyuv/source/mjpeg_decoder.cc => media/libyuv/source/mjpeg_decoder.cc rename : media/webrtc/trunk/third_party/libyuv/source/mjpeg_validate.cc => media/libyuv/source/mjpeg_validate.cc rename : media/webrtc/trunk/third_party/libyuv/source/planar_functions.cc => media/libyuv/source/planar_functions.cc rename : media/webrtc/trunk/third_party/libyuv/source/rotate.cc => media/libyuv/source/rotate.cc rename : media/webrtc/trunk/third_party/libyuv/source/rotate_argb.cc => media/libyuv/source/rotate_argb.cc rename : media/webrtc/trunk/third_party/libyuv/source/rotate_mips.cc => media/libyuv/source/rotate_mips.cc rename : media/webrtc/trunk/third_party/libyuv/source/rotate_neon.cc => media/libyuv/source/rotate_neon.cc rename : media/webrtc/trunk/third_party/libyuv/source/row_any.cc => media/libyuv/source/row_any.cc rename : media/webrtc/trunk/third_party/libyuv/source/row_common.cc => media/libyuv/source/row_common.cc rename : media/webrtc/trunk/third_party/libyuv/source/row_mips.cc => media/libyuv/source/row_mips.cc rename : media/webrtc/trunk/third_party/libyuv/source/row_neon.cc => media/libyuv/source/row_neon.cc rename : media/webrtc/trunk/third_party/libyuv/source/row_posix.cc => media/libyuv/source/row_posix.cc rename : media/webrtc/trunk/third_party/libyuv/source/row_win.cc => media/libyuv/source/row_win.cc rename : media/webrtc/trunk/third_party/libyuv/source/row_x86.asm => media/libyuv/source/row_x86.asm rename : media/webrtc/trunk/third_party/libyuv/source/scale.cc => media/libyuv/source/scale.cc rename : media/webrtc/trunk/third_party/libyuv/source/scale_argb.cc => media/libyuv/source/scale_argb.cc rename : media/webrtc/trunk/third_party/libyuv/source/scale_common.cc => media/libyuv/source/scale_common.cc rename : media/webrtc/trunk/third_party/libyuv/source/scale_mips.cc => media/libyuv/source/scale_mips.cc rename : media/webrtc/trunk/third_party/libyuv/source/scale_neon.cc => media/libyuv/source/scale_neon.cc rename : media/webrtc/trunk/third_party/libyuv/source/scale_posix.cc => media/libyuv/source/scale_posix.cc rename : media/webrtc/trunk/third_party/libyuv/source/scale_win.cc => media/libyuv/source/scale_win.cc rename : media/webrtc/trunk/third_party/libyuv/source/video_common.cc => media/libyuv/source/video_common.cc rename : media/webrtc/trunk/third_party/libyuv/source/x86inc.asm => media/libyuv/source/x86inc.asm rename : media/webrtc/trunk/third_party/libyuv/tools/valgrind-libyuv/libyuv_tests.bat => media/libyuv/tools/valgrind-libyuv/libyuv_tests.bat rename : media/webrtc/trunk/third_party/libyuv/tools/valgrind-libyuv/libyuv_tests.py => media/libyuv/tools/valgrind-libyuv/libyuv_tests.py rename : media/webrtc/trunk/third_party/libyuv/tools/valgrind-libyuv/libyuv_tests.sh => media/libyuv/tools/valgrind-libyuv/libyuv_tests.sh rename : media/webrtc/trunk/third_party/libyuv/tools/valgrind-libyuv/memcheck/OWNERS => media/libyuv/tools/valgrind-libyuv/memcheck/OWNERS rename : media/webrtc/trunk/third_party/libyuv/tools/valgrind-libyuv/memcheck/PRESUBMIT.py => media/libyuv/tools/valgrind-libyuv/memcheck/PRESUBMIT.py rename : media/webrtc/trunk/third_party/libyuv/tools/valgrind-libyuv/memcheck/suppressions.txt => media/libyuv/tools/valgrind-libyuv/memcheck/suppressions.txt rename : media/webrtc/trunk/third_party/libyuv/tools/valgrind-libyuv/memcheck/suppressions_mac.txt => media/libyuv/tools/valgrind-libyuv/memcheck/suppressions_mac.txt rename : media/webrtc/trunk/third_party/libyuv/tools/valgrind-libyuv/memcheck/suppressions_win32.txt => media/libyuv/tools/valgrind-libyuv/memcheck/suppressions_win32.txt rename : media/webrtc/trunk/third_party/libyuv/tools/valgrind-libyuv/tsan/OWNERS => media/libyuv/tools/valgrind-libyuv/tsan/OWNERS rename : media/webrtc/trunk/third_party/libyuv/tools/valgrind-libyuv/tsan/PRESUBMIT.py => media/libyuv/tools/valgrind-libyuv/tsan/PRESUBMIT.py rename : media/webrtc/trunk/third_party/libyuv/tools/valgrind-libyuv/tsan/suppressions.txt => media/libyuv/tools/valgrind-libyuv/tsan/suppressions.txt rename : media/webrtc/trunk/third_party/libyuv/tools/valgrind-libyuv/tsan/suppressions_mac.txt => media/libyuv/tools/valgrind-libyuv/tsan/suppressions_mac.txt rename : media/webrtc/trunk/third_party/libyuv/tools/valgrind-libyuv/tsan/suppressions_win32.txt => media/libyuv/tools/valgrind-libyuv/tsan/suppressions_win32.txt rename : media/webrtc/trunk/third_party/libyuv/unit_test/basictypes_test.cc => media/libyuv/unit_test/basictypes_test.cc rename : media/webrtc/trunk/third_party/libyuv/unit_test/compare_test.cc => media/libyuv/unit_test/compare_test.cc rename : media/webrtc/trunk/third_party/libyuv/unit_test/convert_test.cc => media/libyuv/unit_test/convert_test.cc rename : media/webrtc/trunk/third_party/libyuv/unit_test/cpu_test.cc => media/libyuv/unit_test/cpu_test.cc rename : media/webrtc/trunk/third_party/libyuv/unit_test/math_test.cc => media/libyuv/unit_test/math_test.cc rename : media/webrtc/trunk/third_party/libyuv/unit_test/planar_test.cc => media/libyuv/unit_test/planar_test.cc rename : media/webrtc/trunk/third_party/libyuv/unit_test/rotate_argb_test.cc => media/libyuv/unit_test/rotate_argb_test.cc rename : media/webrtc/trunk/third_party/libyuv/unit_test/rotate_test.cc => media/libyuv/unit_test/rotate_test.cc rename : media/webrtc/trunk/third_party/libyuv/unit_test/scale_argb_test.cc => media/libyuv/unit_test/scale_argb_test.cc rename : media/webrtc/trunk/third_party/libyuv/unit_test/scale_test.cc => media/libyuv/unit_test/scale_test.cc rename : media/webrtc/trunk/third_party/libyuv/unit_test/testdata/arm_v7.txt => media/libyuv/unit_test/testdata/arm_v7.txt rename : media/webrtc/trunk/third_party/libyuv/unit_test/testdata/tegra3.txt => media/libyuv/unit_test/testdata/tegra3.txt rename : media/webrtc/trunk/third_party/libyuv/unit_test/unit_test.cc => media/libyuv/unit_test/unit_test.cc rename : media/webrtc/trunk/third_party/libyuv/unit_test/unit_test.h => media/libyuv/unit_test/unit_test.h rename : media/webrtc/trunk/third_party/libyuv/unit_test/version_test.cc => media/libyuv/unit_test/version_test.cc rename : media/webrtc/trunk/third_party/libyuv/unit_test/video_common_test.cc => media/libyuv/unit_test/video_common_test.cc rename : media/webrtc/trunk/third_party/libyuv/util/Makefile => media/libyuv/util/Makefile rename : media/webrtc/trunk/third_party/libyuv/util/compare.cc => media/libyuv/util/compare.cc rename : media/webrtc/trunk/third_party/libyuv/util/convert.cc => media/libyuv/util/convert.cc rename : media/webrtc/trunk/third_party/libyuv/util/cpuid.c => media/libyuv/util/cpuid.c rename : media/webrtc/trunk/third_party/libyuv/util/psnr.cc => media/libyuv/util/psnr.cc rename : media/webrtc/trunk/third_party/libyuv/util/psnr.h => media/libyuv/util/psnr.h rename : media/webrtc/trunk/third_party/libyuv/util/psnr_main.cc => media/libyuv/util/psnr_main.cc rename : media/webrtc/trunk/third_party/libyuv/util/ssim.cc => media/libyuv/util/ssim.cc rename : media/webrtc/trunk/third_party/libyuv/util/ssim.h => media/libyuv/util/ssim.h rename : media/webrtc/trunk/third_party/libyuv/winarm.mk => media/libyuv/winarm.mk
487 lines
23 KiB
C++
487 lines
23 KiB
C++
/*
|
|
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
|
|
*
|
|
* Use of this source code is governed by a BSD-style license
|
|
* that can be found in the LICENSE file in the root of the source
|
|
* tree. An additional intellectual property rights grant can be found
|
|
* in the file PATENTS. All contributing project authors may
|
|
* be found in the AUTHORS file in the root of the source tree.
|
|
*/
|
|
|
|
#include "libyuv/row.h"
|
|
|
|
#include "libyuv/basic_types.h"
|
|
|
|
#ifdef __cplusplus
|
|
namespace libyuv {
|
|
extern "C" {
|
|
#endif
|
|
|
|
#if !defined(LIBYUV_DISABLE_MIPS) && \
|
|
defined(__mips_dsp) && (__mips_dsp_rev >= 2)
|
|
|
|
void TransposeWx8_MIPS_DSPR2(const uint8* src, int src_stride,
|
|
uint8* dst, int dst_stride,
|
|
int width) {
|
|
__asm__ __volatile__ (
|
|
".set push \n"
|
|
".set noreorder \n"
|
|
"sll $t2, %[src_stride], 0x1 \n" // src_stride x 2
|
|
"sll $t4, %[src_stride], 0x2 \n" // src_stride x 4
|
|
"sll $t9, %[src_stride], 0x3 \n" // src_stride x 8
|
|
"addu $t3, $t2, %[src_stride] \n"
|
|
"addu $t5, $t4, %[src_stride] \n"
|
|
"addu $t6, $t2, $t4 \n"
|
|
"andi $t0, %[dst], 0x3 \n"
|
|
"andi $t1, %[dst_stride], 0x3 \n"
|
|
"or $t0, $t0, $t1 \n"
|
|
"bnez $t0, 11f \n"
|
|
" subu $t7, $t9, %[src_stride] \n"
|
|
//dst + dst_stride word aligned
|
|
"1: \n"
|
|
"lbu $t0, 0(%[src]) \n"
|
|
"lbux $t1, %[src_stride](%[src]) \n"
|
|
"lbux $t8, $t2(%[src]) \n"
|
|
"lbux $t9, $t3(%[src]) \n"
|
|
"sll $t1, $t1, 16 \n"
|
|
"sll $t9, $t9, 16 \n"
|
|
"or $t0, $t0, $t1 \n"
|
|
"or $t8, $t8, $t9 \n"
|
|
"precr.qb.ph $s0, $t8, $t0 \n"
|
|
"lbux $t0, $t4(%[src]) \n"
|
|
"lbux $t1, $t5(%[src]) \n"
|
|
"lbux $t8, $t6(%[src]) \n"
|
|
"lbux $t9, $t7(%[src]) \n"
|
|
"sll $t1, $t1, 16 \n"
|
|
"sll $t9, $t9, 16 \n"
|
|
"or $t0, $t0, $t1 \n"
|
|
"or $t8, $t8, $t9 \n"
|
|
"precr.qb.ph $s1, $t8, $t0 \n"
|
|
"sw $s0, 0(%[dst]) \n"
|
|
"addiu %[width], -1 \n"
|
|
"addiu %[src], 1 \n"
|
|
"sw $s1, 4(%[dst]) \n"
|
|
"bnez %[width], 1b \n"
|
|
" addu %[dst], %[dst], %[dst_stride] \n"
|
|
"b 2f \n"
|
|
//dst + dst_stride unaligned
|
|
"11: \n"
|
|
"lbu $t0, 0(%[src]) \n"
|
|
"lbux $t1, %[src_stride](%[src]) \n"
|
|
"lbux $t8, $t2(%[src]) \n"
|
|
"lbux $t9, $t3(%[src]) \n"
|
|
"sll $t1, $t1, 16 \n"
|
|
"sll $t9, $t9, 16 \n"
|
|
"or $t0, $t0, $t1 \n"
|
|
"or $t8, $t8, $t9 \n"
|
|
"precr.qb.ph $s0, $t8, $t0 \n"
|
|
"lbux $t0, $t4(%[src]) \n"
|
|
"lbux $t1, $t5(%[src]) \n"
|
|
"lbux $t8, $t6(%[src]) \n"
|
|
"lbux $t9, $t7(%[src]) \n"
|
|
"sll $t1, $t1, 16 \n"
|
|
"sll $t9, $t9, 16 \n"
|
|
"or $t0, $t0, $t1 \n"
|
|
"or $t8, $t8, $t9 \n"
|
|
"precr.qb.ph $s1, $t8, $t0 \n"
|
|
"swr $s0, 0(%[dst]) \n"
|
|
"swl $s0, 3(%[dst]) \n"
|
|
"addiu %[width], -1 \n"
|
|
"addiu %[src], 1 \n"
|
|
"swr $s1, 4(%[dst]) \n"
|
|
"swl $s1, 7(%[dst]) \n"
|
|
"bnez %[width], 11b \n"
|
|
"addu %[dst], %[dst], %[dst_stride] \n"
|
|
"2: \n"
|
|
".set pop \n"
|
|
:[src] "+r" (src),
|
|
[dst] "+r" (dst),
|
|
[width] "+r" (width)
|
|
:[src_stride] "r" (src_stride),
|
|
[dst_stride] "r" (dst_stride)
|
|
: "t0", "t1", "t2", "t3", "t4", "t5",
|
|
"t6", "t7", "t8", "t9",
|
|
"s0", "s1"
|
|
);
|
|
}
|
|
|
|
void TransposeWx8_FAST_MIPS_DSPR2(const uint8* src, int src_stride,
|
|
uint8* dst, int dst_stride,
|
|
int width) {
|
|
__asm__ __volatile__ (
|
|
".set noat \n"
|
|
".set push \n"
|
|
".set noreorder \n"
|
|
"beqz %[width], 2f \n"
|
|
" sll $t2, %[src_stride], 0x1 \n" // src_stride x 2
|
|
"sll $t4, %[src_stride], 0x2 \n" // src_stride x 4
|
|
"sll $t9, %[src_stride], 0x3 \n" // src_stride x 8
|
|
"addu $t3, $t2, %[src_stride] \n"
|
|
"addu $t5, $t4, %[src_stride] \n"
|
|
"addu $t6, $t2, $t4 \n"
|
|
|
|
"srl $AT, %[width], 0x2 \n"
|
|
"andi $t0, %[dst], 0x3 \n"
|
|
"andi $t1, %[dst_stride], 0x3 \n"
|
|
"or $t0, $t0, $t1 \n"
|
|
"bnez $t0, 11f \n"
|
|
" subu $t7, $t9, %[src_stride] \n"
|
|
//dst + dst_stride word aligned
|
|
"1: \n"
|
|
"lw $t0, 0(%[src]) \n"
|
|
"lwx $t1, %[src_stride](%[src]) \n"
|
|
"lwx $t8, $t2(%[src]) \n"
|
|
"lwx $t9, $t3(%[src]) \n"
|
|
|
|
// t0 = | 30 | 20 | 10 | 00 |
|
|
// t1 = | 31 | 21 | 11 | 01 |
|
|
// t8 = | 32 | 22 | 12 | 02 |
|
|
// t9 = | 33 | 23 | 13 | 03 |
|
|
|
|
"precr.qb.ph $s0, $t1, $t0 \n"
|
|
"precr.qb.ph $s1, $t9, $t8 \n"
|
|
"precrq.qb.ph $s2, $t1, $t0 \n"
|
|
"precrq.qb.ph $s3, $t9, $t8 \n"
|
|
|
|
// s0 = | 21 | 01 | 20 | 00 |
|
|
// s1 = | 23 | 03 | 22 | 02 |
|
|
// s2 = | 31 | 11 | 30 | 10 |
|
|
// s3 = | 33 | 13 | 32 | 12 |
|
|
|
|
"precr.qb.ph $s4, $s1, $s0 \n"
|
|
"precrq.qb.ph $s5, $s1, $s0 \n"
|
|
"precr.qb.ph $s6, $s3, $s2 \n"
|
|
"precrq.qb.ph $s7, $s3, $s2 \n"
|
|
|
|
// s4 = | 03 | 02 | 01 | 00 |
|
|
// s5 = | 23 | 22 | 21 | 20 |
|
|
// s6 = | 13 | 12 | 11 | 10 |
|
|
// s7 = | 33 | 32 | 31 | 30 |
|
|
|
|
"lwx $t0, $t4(%[src]) \n"
|
|
"lwx $t1, $t5(%[src]) \n"
|
|
"lwx $t8, $t6(%[src]) \n"
|
|
"lwx $t9, $t7(%[src]) \n"
|
|
|
|
// t0 = | 34 | 24 | 14 | 04 |
|
|
// t1 = | 35 | 25 | 15 | 05 |
|
|
// t8 = | 36 | 26 | 16 | 06 |
|
|
// t9 = | 37 | 27 | 17 | 07 |
|
|
|
|
"precr.qb.ph $s0, $t1, $t0 \n"
|
|
"precr.qb.ph $s1, $t9, $t8 \n"
|
|
"precrq.qb.ph $s2, $t1, $t0 \n"
|
|
"precrq.qb.ph $s3, $t9, $t8 \n"
|
|
|
|
// s0 = | 25 | 05 | 24 | 04 |
|
|
// s1 = | 27 | 07 | 26 | 06 |
|
|
// s2 = | 35 | 15 | 34 | 14 |
|
|
// s3 = | 37 | 17 | 36 | 16 |
|
|
|
|
"precr.qb.ph $t0, $s1, $s0 \n"
|
|
"precrq.qb.ph $t1, $s1, $s0 \n"
|
|
"precr.qb.ph $t8, $s3, $s2 \n"
|
|
"precrq.qb.ph $t9, $s3, $s2 \n"
|
|
|
|
// t0 = | 07 | 06 | 05 | 04 |
|
|
// t1 = | 27 | 26 | 25 | 24 |
|
|
// t8 = | 17 | 16 | 15 | 14 |
|
|
// t9 = | 37 | 36 | 35 | 34 |
|
|
|
|
"addu $s0, %[dst], %[dst_stride] \n"
|
|
"addu $s1, $s0, %[dst_stride] \n"
|
|
"addu $s2, $s1, %[dst_stride] \n"
|
|
|
|
"sw $s4, 0(%[dst]) \n"
|
|
"sw $t0, 4(%[dst]) \n"
|
|
"sw $s6, 0($s0) \n"
|
|
"sw $t8, 4($s0) \n"
|
|
"sw $s5, 0($s1) \n"
|
|
"sw $t1, 4($s1) \n"
|
|
"sw $s7, 0($s2) \n"
|
|
"sw $t9, 4($s2) \n"
|
|
|
|
"addiu $AT, -1 \n"
|
|
"addiu %[src], 4 \n"
|
|
|
|
"bnez $AT, 1b \n"
|
|
" addu %[dst], $s2, %[dst_stride] \n"
|
|
"b 2f \n"
|
|
//dst + dst_stride unaligned
|
|
"11: \n"
|
|
"lw $t0, 0(%[src]) \n"
|
|
"lwx $t1, %[src_stride](%[src]) \n"
|
|
"lwx $t8, $t2(%[src]) \n"
|
|
"lwx $t9, $t3(%[src]) \n"
|
|
|
|
// t0 = | 30 | 20 | 10 | 00 |
|
|
// t1 = | 31 | 21 | 11 | 01 |
|
|
// t8 = | 32 | 22 | 12 | 02 |
|
|
// t9 = | 33 | 23 | 13 | 03 |
|
|
|
|
"precr.qb.ph $s0, $t1, $t0 \n"
|
|
"precr.qb.ph $s1, $t9, $t8 \n"
|
|
"precrq.qb.ph $s2, $t1, $t0 \n"
|
|
"precrq.qb.ph $s3, $t9, $t8 \n"
|
|
|
|
// s0 = | 21 | 01 | 20 | 00 |
|
|
// s1 = | 23 | 03 | 22 | 02 |
|
|
// s2 = | 31 | 11 | 30 | 10 |
|
|
// s3 = | 33 | 13 | 32 | 12 |
|
|
|
|
"precr.qb.ph $s4, $s1, $s0 \n"
|
|
"precrq.qb.ph $s5, $s1, $s0 \n"
|
|
"precr.qb.ph $s6, $s3, $s2 \n"
|
|
"precrq.qb.ph $s7, $s3, $s2 \n"
|
|
|
|
// s4 = | 03 | 02 | 01 | 00 |
|
|
// s5 = | 23 | 22 | 21 | 20 |
|
|
// s6 = | 13 | 12 | 11 | 10 |
|
|
// s7 = | 33 | 32 | 31 | 30 |
|
|
|
|
"lwx $t0, $t4(%[src]) \n"
|
|
"lwx $t1, $t5(%[src]) \n"
|
|
"lwx $t8, $t6(%[src]) \n"
|
|
"lwx $t9, $t7(%[src]) \n"
|
|
|
|
// t0 = | 34 | 24 | 14 | 04 |
|
|
// t1 = | 35 | 25 | 15 | 05 |
|
|
// t8 = | 36 | 26 | 16 | 06 |
|
|
// t9 = | 37 | 27 | 17 | 07 |
|
|
|
|
"precr.qb.ph $s0, $t1, $t0 \n"
|
|
"precr.qb.ph $s1, $t9, $t8 \n"
|
|
"precrq.qb.ph $s2, $t1, $t0 \n"
|
|
"precrq.qb.ph $s3, $t9, $t8 \n"
|
|
|
|
// s0 = | 25 | 05 | 24 | 04 |
|
|
// s1 = | 27 | 07 | 26 | 06 |
|
|
// s2 = | 35 | 15 | 34 | 14 |
|
|
// s3 = | 37 | 17 | 36 | 16 |
|
|
|
|
"precr.qb.ph $t0, $s1, $s0 \n"
|
|
"precrq.qb.ph $t1, $s1, $s0 \n"
|
|
"precr.qb.ph $t8, $s3, $s2 \n"
|
|
"precrq.qb.ph $t9, $s3, $s2 \n"
|
|
|
|
// t0 = | 07 | 06 | 05 | 04 |
|
|
// t1 = | 27 | 26 | 25 | 24 |
|
|
// t8 = | 17 | 16 | 15 | 14 |
|
|
// t9 = | 37 | 36 | 35 | 34 |
|
|
|
|
"addu $s0, %[dst], %[dst_stride] \n"
|
|
"addu $s1, $s0, %[dst_stride] \n"
|
|
"addu $s2, $s1, %[dst_stride] \n"
|
|
|
|
"swr $s4, 0(%[dst]) \n"
|
|
"swl $s4, 3(%[dst]) \n"
|
|
"swr $t0, 4(%[dst]) \n"
|
|
"swl $t0, 7(%[dst]) \n"
|
|
"swr $s6, 0($s0) \n"
|
|
"swl $s6, 3($s0) \n"
|
|
"swr $t8, 4($s0) \n"
|
|
"swl $t8, 7($s0) \n"
|
|
"swr $s5, 0($s1) \n"
|
|
"swl $s5, 3($s1) \n"
|
|
"swr $t1, 4($s1) \n"
|
|
"swl $t1, 7($s1) \n"
|
|
"swr $s7, 0($s2) \n"
|
|
"swl $s7, 3($s2) \n"
|
|
"swr $t9, 4($s2) \n"
|
|
"swl $t9, 7($s2) \n"
|
|
|
|
"addiu $AT, -1 \n"
|
|
"addiu %[src], 4 \n"
|
|
|
|
"bnez $AT, 11b \n"
|
|
" addu %[dst], $s2, %[dst_stride] \n"
|
|
"2: \n"
|
|
".set pop \n"
|
|
".set at \n"
|
|
:[src] "+r" (src),
|
|
[dst] "+r" (dst),
|
|
[width] "+r" (width)
|
|
:[src_stride] "r" (src_stride),
|
|
[dst_stride] "r" (dst_stride)
|
|
: "t0", "t1", "t2", "t3", "t4", "t5",
|
|
"t6", "t7", "t8", "t9",
|
|
"s0", "s1", "s2", "s3", "s4",
|
|
"s5", "s6", "s7"
|
|
);
|
|
}
|
|
|
|
void TransposeUVWx8_MIPS_DSPR2(const uint8* src, int src_stride,
|
|
uint8* dst_a, int dst_stride_a,
|
|
uint8* dst_b, int dst_stride_b,
|
|
int width) {
|
|
__asm__ __volatile__ (
|
|
".set push \n"
|
|
".set noreorder \n"
|
|
"beqz %[width], 2f \n"
|
|
" sll $t2, %[src_stride], 0x1 \n" // src_stride x 2
|
|
"sll $t4, %[src_stride], 0x2 \n" // src_stride x 4
|
|
"sll $t9, %[src_stride], 0x3 \n" // src_stride x 8
|
|
"addu $t3, $t2, %[src_stride] \n"
|
|
"addu $t5, $t4, %[src_stride] \n"
|
|
"addu $t6, $t2, $t4 \n"
|
|
"subu $t7, $t9, %[src_stride] \n"
|
|
"srl $t1, %[width], 1 \n"
|
|
|
|
// check word aligment for dst_a, dst_b, dst_stride_a and dst_stride_b
|
|
"andi $t0, %[dst_a], 0x3 \n"
|
|
"andi $t8, %[dst_b], 0x3 \n"
|
|
"or $t0, $t0, $t8 \n"
|
|
"andi $t8, %[dst_stride_a], 0x3 \n"
|
|
"andi $s5, %[dst_stride_b], 0x3 \n"
|
|
"or $t8, $t8, $s5 \n"
|
|
"or $t0, $t0, $t8 \n"
|
|
"bnez $t0, 11f \n"
|
|
" nop \n"
|
|
// dst + dst_stride word aligned (both, a & b dst addresses)
|
|
"1: \n"
|
|
"lw $t0, 0(%[src]) \n" // |B0|A0|b0|a0|
|
|
"lwx $t8, %[src_stride](%[src]) \n" // |B1|A1|b1|a1|
|
|
"addu $s5, %[dst_a], %[dst_stride_a] \n"
|
|
"lwx $t9, $t2(%[src]) \n" // |B2|A2|b2|a2|
|
|
"lwx $s0, $t3(%[src]) \n" // |B3|A3|b3|a3|
|
|
"addu $s6, %[dst_b], %[dst_stride_b] \n"
|
|
|
|
"precrq.ph.w $s1, $t8, $t0 \n" // |B1|A1|B0|A0|
|
|
"precrq.ph.w $s2, $s0, $t9 \n" // |B3|A3|B2|A2|
|
|
"precr.qb.ph $s3, $s2, $s1 \n" // |A3|A2|A1|A0|
|
|
"precrq.qb.ph $s4, $s2, $s1 \n" // |B3|B2|B1|B0|
|
|
|
|
"sll $t0, $t0, 16 \n"
|
|
"packrl.ph $s1, $t8, $t0 \n" // |b1|a1|b0|a0|
|
|
"sll $t9, $t9, 16 \n"
|
|
"packrl.ph $s2, $s0, $t9 \n" // |b3|a3|b2|a2|
|
|
|
|
"sw $s3, 0($s5) \n"
|
|
"sw $s4, 0($s6) \n"
|
|
|
|
"precr.qb.ph $s3, $s2, $s1 \n" // |a3|a2|a1|a0|
|
|
"precrq.qb.ph $s4, $s2, $s1 \n" // |b3|b2|b1|b0|
|
|
|
|
"lwx $t0, $t4(%[src]) \n" // |B4|A4|b4|a4|
|
|
"lwx $t8, $t5(%[src]) \n" // |B5|A5|b5|a5|
|
|
"lwx $t9, $t6(%[src]) \n" // |B6|A6|b6|a6|
|
|
"lwx $s0, $t7(%[src]) \n" // |B7|A7|b7|a7|
|
|
"sw $s3, 0(%[dst_a]) \n"
|
|
"sw $s4, 0(%[dst_b]) \n"
|
|
|
|
"precrq.ph.w $s1, $t8, $t0 \n" // |B5|A5|B4|A4|
|
|
"precrq.ph.w $s2, $s0, $t9 \n" // |B6|A6|B7|A7|
|
|
"precr.qb.ph $s3, $s2, $s1 \n" // |A7|A6|A5|A4|
|
|
"precrq.qb.ph $s4, $s2, $s1 \n" // |B7|B6|B5|B4|
|
|
|
|
"sll $t0, $t0, 16 \n"
|
|
"packrl.ph $s1, $t8, $t0 \n" // |b5|a5|b4|a4|
|
|
"sll $t9, $t9, 16 \n"
|
|
"packrl.ph $s2, $s0, $t9 \n" // |b7|a7|b6|a6|
|
|
"sw $s3, 4($s5) \n"
|
|
"sw $s4, 4($s6) \n"
|
|
|
|
"precr.qb.ph $s3, $s2, $s1 \n" // |a7|a6|a5|a4|
|
|
"precrq.qb.ph $s4, $s2, $s1 \n" // |b7|b6|b5|b4|
|
|
|
|
"addiu %[src], 4 \n"
|
|
"addiu $t1, -1 \n"
|
|
"sll $t0, %[dst_stride_a], 1 \n"
|
|
"sll $t8, %[dst_stride_b], 1 \n"
|
|
"sw $s3, 4(%[dst_a]) \n"
|
|
"sw $s4, 4(%[dst_b]) \n"
|
|
"addu %[dst_a], %[dst_a], $t0 \n"
|
|
"bnez $t1, 1b \n"
|
|
" addu %[dst_b], %[dst_b], $t8 \n"
|
|
"b 2f \n"
|
|
" nop \n"
|
|
|
|
// dst_a or dst_b or dst_stride_a or dst_stride_b not word aligned
|
|
"11: \n"
|
|
"lw $t0, 0(%[src]) \n" // |B0|A0|b0|a0|
|
|
"lwx $t8, %[src_stride](%[src]) \n" // |B1|A1|b1|a1|
|
|
"addu $s5, %[dst_a], %[dst_stride_a] \n"
|
|
"lwx $t9, $t2(%[src]) \n" // |B2|A2|b2|a2|
|
|
"lwx $s0, $t3(%[src]) \n" // |B3|A3|b3|a3|
|
|
"addu $s6, %[dst_b], %[dst_stride_b] \n"
|
|
|
|
"precrq.ph.w $s1, $t8, $t0 \n" // |B1|A1|B0|A0|
|
|
"precrq.ph.w $s2, $s0, $t9 \n" // |B3|A3|B2|A2|
|
|
"precr.qb.ph $s3, $s2, $s1 \n" // |A3|A2|A1|A0|
|
|
"precrq.qb.ph $s4, $s2, $s1 \n" // |B3|B2|B1|B0|
|
|
|
|
"sll $t0, $t0, 16 \n"
|
|
"packrl.ph $s1, $t8, $t0 \n" // |b1|a1|b0|a0|
|
|
"sll $t9, $t9, 16 \n"
|
|
"packrl.ph $s2, $s0, $t9 \n" // |b3|a3|b2|a2|
|
|
|
|
"swr $s3, 0($s5) \n"
|
|
"swl $s3, 3($s5) \n"
|
|
"swr $s4, 0($s6) \n"
|
|
"swl $s4, 3($s6) \n"
|
|
|
|
"precr.qb.ph $s3, $s2, $s1 \n" // |a3|a2|a1|a0|
|
|
"precrq.qb.ph $s4, $s2, $s1 \n" // |b3|b2|b1|b0|
|
|
|
|
"lwx $t0, $t4(%[src]) \n" // |B4|A4|b4|a4|
|
|
"lwx $t8, $t5(%[src]) \n" // |B5|A5|b5|a5|
|
|
"lwx $t9, $t6(%[src]) \n" // |B6|A6|b6|a6|
|
|
"lwx $s0, $t7(%[src]) \n" // |B7|A7|b7|a7|
|
|
"swr $s3, 0(%[dst_a]) \n"
|
|
"swl $s3, 3(%[dst_a]) \n"
|
|
"swr $s4, 0(%[dst_b]) \n"
|
|
"swl $s4, 3(%[dst_b]) \n"
|
|
|
|
"precrq.ph.w $s1, $t8, $t0 \n" // |B5|A5|B4|A4|
|
|
"precrq.ph.w $s2, $s0, $t9 \n" // |B6|A6|B7|A7|
|
|
"precr.qb.ph $s3, $s2, $s1 \n" // |A7|A6|A5|A4|
|
|
"precrq.qb.ph $s4, $s2, $s1 \n" // |B7|B6|B5|B4|
|
|
|
|
"sll $t0, $t0, 16 \n"
|
|
"packrl.ph $s1, $t8, $t0 \n" // |b5|a5|b4|a4|
|
|
"sll $t9, $t9, 16 \n"
|
|
"packrl.ph $s2, $s0, $t9 \n" // |b7|a7|b6|a6|
|
|
|
|
"swr $s3, 4($s5) \n"
|
|
"swl $s3, 7($s5) \n"
|
|
"swr $s4, 4($s6) \n"
|
|
"swl $s4, 7($s6) \n"
|
|
|
|
"precr.qb.ph $s3, $s2, $s1 \n" // |a7|a6|a5|a4|
|
|
"precrq.qb.ph $s4, $s2, $s1 \n" // |b7|b6|b5|b4|
|
|
|
|
"addiu %[src], 4 \n"
|
|
"addiu $t1, -1 \n"
|
|
"sll $t0, %[dst_stride_a], 1 \n"
|
|
"sll $t8, %[dst_stride_b], 1 \n"
|
|
"swr $s3, 4(%[dst_a]) \n"
|
|
"swl $s3, 7(%[dst_a]) \n"
|
|
"swr $s4, 4(%[dst_b]) \n"
|
|
"swl $s4, 7(%[dst_b]) \n"
|
|
"addu %[dst_a], %[dst_a], $t0 \n"
|
|
"bnez $t1, 11b \n"
|
|
" addu %[dst_b], %[dst_b], $t8 \n"
|
|
|
|
"2: \n"
|
|
".set pop \n"
|
|
: [src] "+r" (src),
|
|
[dst_a] "+r" (dst_a),
|
|
[dst_b] "+r" (dst_b),
|
|
[width] "+r" (width),
|
|
[src_stride] "+r" (src_stride)
|
|
: [dst_stride_a] "r" (dst_stride_a),
|
|
[dst_stride_b] "r" (dst_stride_b)
|
|
: "t0", "t1", "t2", "t3", "t4", "t5",
|
|
"t6", "t7", "t8", "t9",
|
|
"s0", "s1", "s2", "s3",
|
|
"s4", "s5", "s6"
|
|
);
|
|
}
|
|
|
|
#endif // defined(__mips_dsp) && (__mips_dsp_rev >= 2)
|
|
|
|
#ifdef __cplusplus
|
|
} // extern "C"
|
|
} // namespace libyuv
|
|
#endif
|