mirror of
https://gitlab.winehq.org/wine/wine-gecko.git
synced 2024-09-13 09:24:08 -07:00
1f162027b6
This updates our in-tree copy of libvpx to match the 1.2.0 git tag. All but one of the patches we were carrying are in this upstream version. Our update.sh script should copy the new files needed but will not remove the old ones for you. Runtime cpu detection was rewritten upstream. We now generate per-platform headers for this and include the correct one from a vpx_rtcd.h wrapper like we were already doing for vpx_config.h This revision includes improved assembly optimizations and should be faster on all platforms. Includes work by Jan Gerber and Ralph Giles. --HG-- rename : media/libvpx/vp8/common/arm/neon/save_neon_reg.asm => media/libvpx/vp8/common/arm/neon/save_reg_neon.asm rename : media/libvpx/vp8/common/arm/armv6/vp8_mse16x16_armv6.asm => media/libvpx/vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm rename : media/libvpx/vp8/common/arm/neon/vp8_mse16x16_neon.asm => media/libvpx/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm rename : media/libvpx/vp8/encoder/x86/x86_csystemdependent.c => media/libvpx/vp8/encoder/x86/vp8_enc_stubs_mmx.c rename : media/libvpx/vpx_config_arm-linux-gcc.c => media/libvpx/vpx_config_armv7-android-gcc.c rename : media/libvpx/vpx_config_arm-linux-gcc.h => media/libvpx/vpx_config_armv7-android-gcc.h
122 lines
3.8 KiB
NASM
122 lines
3.8 KiB
NASM
;
|
|
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
|
;
|
|
; Use of this source code is governed by a BSD-style license
|
|
; that can be found in the LICENSE file in the root of the source
|
|
; tree. An additional intellectual property rights grant can be found
|
|
; in the file PATENTS. All contributing project authors may
|
|
; be found in the AUTHORS file in the root of the source tree.
|
|
;
|
|
|
|
|
|
%include "vpx_ports/x86_abi_support.asm"
|
|
|
|
;void vp8_short_inv_walsh4x4_sse2(short *input, short *output)
|
|
global sym(vp8_short_inv_walsh4x4_sse2) PRIVATE
|
|
sym(vp8_short_inv_walsh4x4_sse2):
|
|
push rbp
|
|
mov rbp, rsp
|
|
SHADOW_ARGS_TO_STACK 2
|
|
; end prolog
|
|
|
|
mov rcx, arg(0)
|
|
mov rdx, arg(1)
|
|
mov rax, 30003h
|
|
|
|
movdqa xmm0, [rcx + 0] ;ip[4] ip[0]
|
|
movdqa xmm1, [rcx + 16] ;ip[12] ip[8]
|
|
|
|
|
|
pshufd xmm2, xmm1, 4eh ;ip[8] ip[12]
|
|
movdqa xmm3, xmm0 ;ip[4] ip[0]
|
|
|
|
paddw xmm0, xmm2 ;ip[4]+ip[8] ip[0]+ip[12] aka b1 a1
|
|
psubw xmm3, xmm2 ;ip[4]-ip[8] ip[0]-ip[12] aka c1 d1
|
|
|
|
movdqa xmm4, xmm0
|
|
punpcklqdq xmm0, xmm3 ;d1 a1
|
|
punpckhqdq xmm4, xmm3 ;c1 b1
|
|
|
|
movdqa xmm1, xmm4 ;c1 b1
|
|
paddw xmm4, xmm0 ;dl+cl a1+b1 aka op[4] op[0]
|
|
psubw xmm0, xmm1 ;d1-c1 a1-b1 aka op[12] op[8]
|
|
|
|
;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
; 13 12 11 10 03 02 01 00
|
|
;
|
|
; 33 32 31 30 23 22 21 20
|
|
;
|
|
movdqa xmm3, xmm4 ; 13 12 11 10 03 02 01 00
|
|
punpcklwd xmm4, xmm0 ; 23 03 22 02 21 01 20 00
|
|
punpckhwd xmm3, xmm0 ; 33 13 32 12 31 11 30 10
|
|
movdqa xmm1, xmm4 ; 23 03 22 02 21 01 20 00
|
|
punpcklwd xmm4, xmm3 ; 31 21 11 01 30 20 10 00
|
|
punpckhwd xmm1, xmm3 ; 33 23 13 03 32 22 12 02
|
|
;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
movd xmm0, eax
|
|
pshufd xmm2, xmm1, 4eh ;ip[8] ip[12]
|
|
movdqa xmm3, xmm4 ;ip[4] ip[0]
|
|
|
|
pshufd xmm0, xmm0, 0 ;03 03 03 03 03 03 03 03
|
|
|
|
paddw xmm4, xmm2 ;ip[4]+ip[8] ip[0]+ip[12] aka b1 a1
|
|
psubw xmm3, xmm2 ;ip[4]-ip[8] ip[0]-ip[12] aka c1 d1
|
|
|
|
movdqa xmm5, xmm4
|
|
punpcklqdq xmm4, xmm3 ;d1 a1
|
|
punpckhqdq xmm5, xmm3 ;c1 b1
|
|
|
|
movdqa xmm1, xmm5 ;c1 b1
|
|
paddw xmm5, xmm4 ;dl+cl a1+b1 aka op[4] op[0]
|
|
psubw xmm4, xmm1 ;d1-c1 a1-b1 aka op[12] op[8]
|
|
|
|
paddw xmm5, xmm0
|
|
paddw xmm4, xmm0
|
|
psraw xmm5, 3
|
|
psraw xmm4, 3
|
|
|
|
movd eax, xmm5
|
|
movd ecx, xmm4
|
|
psrldq xmm5, 4
|
|
psrldq xmm4, 4
|
|
mov word ptr[rdx+32*0], ax
|
|
mov word ptr[rdx+32*2], cx
|
|
shr eax, 16
|
|
shr ecx, 16
|
|
mov word ptr[rdx+32*4], ax
|
|
mov word ptr[rdx+32*6], cx
|
|
movd eax, xmm5
|
|
movd ecx, xmm4
|
|
psrldq xmm5, 4
|
|
psrldq xmm4, 4
|
|
mov word ptr[rdx+32*8], ax
|
|
mov word ptr[rdx+32*10], cx
|
|
shr eax, 16
|
|
shr ecx, 16
|
|
mov word ptr[rdx+32*12], ax
|
|
mov word ptr[rdx+32*14], cx
|
|
|
|
movd eax, xmm5
|
|
movd ecx, xmm4
|
|
psrldq xmm5, 4
|
|
psrldq xmm4, 4
|
|
mov word ptr[rdx+32*1], ax
|
|
mov word ptr[rdx+32*3], cx
|
|
shr eax, 16
|
|
shr ecx, 16
|
|
mov word ptr[rdx+32*5], ax
|
|
mov word ptr[rdx+32*7], cx
|
|
movd eax, xmm5
|
|
movd ecx, xmm4
|
|
mov word ptr[rdx+32*9], ax
|
|
mov word ptr[rdx+32*11], cx
|
|
shr eax, 16
|
|
shr ecx, 16
|
|
mov word ptr[rdx+32*13], ax
|
|
mov word ptr[rdx+32*15], cx
|
|
|
|
; begin epilog
|
|
UNSHADOW_ARGS
|
|
pop rbp
|
|
ret
|