mirror of
https://gitlab.winehq.org/wine/wine-gecko.git
synced 2024-09-13 09:24:08 -07:00
1f162027b6
This updates our in-tree copy of libvpx to match the 1.2.0 git tag. All but one of the patches we were carrying are in this upstream version. Our update.sh script should copy the new files needed but will not remove the old ones for you. Runtime cpu detection was rewritten upstream. We now generate per-platform headers for this and include the correct one from a vpx_rtcd.h wrapper like we were already doing for vpx_config.h This revision includes improved assembly optimizations and should be faster on all platforms. Includes work by Jan Gerber and Ralph Giles. --HG-- rename : media/libvpx/vp8/common/arm/neon/save_neon_reg.asm => media/libvpx/vp8/common/arm/neon/save_reg_neon.asm rename : media/libvpx/vp8/common/arm/armv6/vp8_mse16x16_armv6.asm => media/libvpx/vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm rename : media/libvpx/vp8/common/arm/neon/vp8_mse16x16_neon.asm => media/libvpx/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm rename : media/libvpx/vp8/encoder/x86/x86_csystemdependent.c => media/libvpx/vp8/encoder/x86/vp8_enc_stubs_mmx.c rename : media/libvpx/vpx_config_arm-linux-gcc.c => media/libvpx/vpx_config_armv7-android-gcc.c rename : media/libvpx/vpx_config_arm-linux-gcc.h => media/libvpx/vpx_config_armv7-android-gcc.h
275 lines
7.1 KiB
NASM
275 lines
7.1 KiB
NASM
;
|
|
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
|
;
|
|
; Use of this source code is governed by a BSD-style license
|
|
; that can be found in the LICENSE file in the root of the source
|
|
; tree. An additional intellectual property rights grant can be found
|
|
; in the file PATENTS. All contributing project authors may
|
|
; be found in the AUTHORS file in the root of the source tree.
|
|
;
|
|
|
|
|
|
%include "vpx_ports/x86_abi_support.asm"
|
|
|
|
|
|
;void copy_mem8x8_mmx(
|
|
; unsigned char *src,
|
|
; int src_stride,
|
|
; unsigned char *dst,
|
|
; int dst_stride
|
|
; )
|
|
global sym(vp8_copy_mem8x8_mmx) PRIVATE
|
|
sym(vp8_copy_mem8x8_mmx):
|
|
push rbp
|
|
mov rbp, rsp
|
|
SHADOW_ARGS_TO_STACK 4
|
|
push rsi
|
|
push rdi
|
|
; end prolog
|
|
|
|
mov rsi, arg(0) ;src;
|
|
movq mm0, [rsi]
|
|
|
|
movsxd rax, dword ptr arg(1) ;src_stride;
|
|
mov rdi, arg(2) ;dst;
|
|
|
|
movq mm1, [rsi+rax]
|
|
movq mm2, [rsi+rax*2]
|
|
|
|
movsxd rcx, dword ptr arg(3) ;dst_stride
|
|
lea rsi, [rsi+rax*2]
|
|
|
|
movq [rdi], mm0
|
|
add rsi, rax
|
|
|
|
movq [rdi+rcx], mm1
|
|
movq [rdi+rcx*2], mm2
|
|
|
|
|
|
lea rdi, [rdi+rcx*2]
|
|
movq mm3, [rsi]
|
|
|
|
add rdi, rcx
|
|
movq mm4, [rsi+rax]
|
|
|
|
movq mm5, [rsi+rax*2]
|
|
movq [rdi], mm3
|
|
|
|
lea rsi, [rsi+rax*2]
|
|
movq [rdi+rcx], mm4
|
|
|
|
movq [rdi+rcx*2], mm5
|
|
lea rdi, [rdi+rcx*2]
|
|
|
|
movq mm0, [rsi+rax]
|
|
movq mm1, [rsi+rax*2]
|
|
|
|
movq [rdi+rcx], mm0
|
|
movq [rdi+rcx*2],mm1
|
|
|
|
; begin epilog
|
|
pop rdi
|
|
pop rsi
|
|
UNSHADOW_ARGS
|
|
pop rbp
|
|
ret
|
|
|
|
|
|
;void copy_mem8x4_mmx(
|
|
; unsigned char *src,
|
|
; int src_stride,
|
|
; unsigned char *dst,
|
|
; int dst_stride
|
|
; )
|
|
global sym(vp8_copy_mem8x4_mmx) PRIVATE
|
|
sym(vp8_copy_mem8x4_mmx):
|
|
push rbp
|
|
mov rbp, rsp
|
|
SHADOW_ARGS_TO_STACK 4
|
|
push rsi
|
|
push rdi
|
|
; end prolog
|
|
|
|
mov rsi, arg(0) ;src;
|
|
movq mm0, [rsi]
|
|
|
|
movsxd rax, dword ptr arg(1) ;src_stride;
|
|
mov rdi, arg(2) ;dst;
|
|
|
|
movq mm1, [rsi+rax]
|
|
movq mm2, [rsi+rax*2]
|
|
|
|
movsxd rcx, dword ptr arg(3) ;dst_stride
|
|
lea rsi, [rsi+rax*2]
|
|
|
|
movq [rdi], mm0
|
|
movq [rdi+rcx], mm1
|
|
|
|
movq [rdi+rcx*2], mm2
|
|
lea rdi, [rdi+rcx*2]
|
|
|
|
movq mm3, [rsi+rax]
|
|
movq [rdi+rcx], mm3
|
|
|
|
; begin epilog
|
|
pop rdi
|
|
pop rsi
|
|
UNSHADOW_ARGS
|
|
pop rbp
|
|
ret
|
|
|
|
|
|
;void copy_mem16x16_mmx(
|
|
; unsigned char *src,
|
|
; int src_stride,
|
|
; unsigned char *dst,
|
|
; int dst_stride
|
|
; )
|
|
global sym(vp8_copy_mem16x16_mmx) PRIVATE
|
|
sym(vp8_copy_mem16x16_mmx):
|
|
push rbp
|
|
mov rbp, rsp
|
|
SHADOW_ARGS_TO_STACK 4
|
|
push rsi
|
|
push rdi
|
|
; end prolog
|
|
|
|
mov rsi, arg(0) ;src;
|
|
movsxd rax, dword ptr arg(1) ;src_stride;
|
|
|
|
mov rdi, arg(2) ;dst;
|
|
movsxd rcx, dword ptr arg(3) ;dst_stride
|
|
|
|
movq mm0, [rsi]
|
|
movq mm3, [rsi+8];
|
|
|
|
movq mm1, [rsi+rax]
|
|
movq mm4, [rsi+rax+8]
|
|
|
|
movq mm2, [rsi+rax*2]
|
|
movq mm5, [rsi+rax*2+8]
|
|
|
|
lea rsi, [rsi+rax*2]
|
|
add rsi, rax
|
|
|
|
movq [rdi], mm0
|
|
movq [rdi+8], mm3
|
|
|
|
movq [rdi+rcx], mm1
|
|
movq [rdi+rcx+8], mm4
|
|
|
|
movq [rdi+rcx*2], mm2
|
|
movq [rdi+rcx*2+8], mm5
|
|
|
|
lea rdi, [rdi+rcx*2]
|
|
add rdi, rcx
|
|
|
|
movq mm0, [rsi]
|
|
movq mm3, [rsi+8];
|
|
|
|
movq mm1, [rsi+rax]
|
|
movq mm4, [rsi+rax+8]
|
|
|
|
movq mm2, [rsi+rax*2]
|
|
movq mm5, [rsi+rax*2+8]
|
|
|
|
lea rsi, [rsi+rax*2]
|
|
add rsi, rax
|
|
|
|
movq [rdi], mm0
|
|
movq [rdi+8], mm3
|
|
|
|
movq [rdi+rcx], mm1
|
|
movq [rdi+rcx+8], mm4
|
|
|
|
movq [rdi+rcx*2], mm2
|
|
movq [rdi+rcx*2+8], mm5
|
|
|
|
lea rdi, [rdi+rcx*2]
|
|
add rdi, rcx
|
|
|
|
movq mm0, [rsi]
|
|
movq mm3, [rsi+8];
|
|
|
|
movq mm1, [rsi+rax]
|
|
movq mm4, [rsi+rax+8]
|
|
|
|
movq mm2, [rsi+rax*2]
|
|
movq mm5, [rsi+rax*2+8]
|
|
|
|
lea rsi, [rsi+rax*2]
|
|
add rsi, rax
|
|
|
|
movq [rdi], mm0
|
|
movq [rdi+8], mm3
|
|
|
|
movq [rdi+rcx], mm1
|
|
movq [rdi+rcx+8], mm4
|
|
|
|
movq [rdi+rcx*2], mm2
|
|
movq [rdi+rcx*2+8], mm5
|
|
|
|
lea rdi, [rdi+rcx*2]
|
|
add rdi, rcx
|
|
|
|
movq mm0, [rsi]
|
|
movq mm3, [rsi+8];
|
|
|
|
movq mm1, [rsi+rax]
|
|
movq mm4, [rsi+rax+8]
|
|
|
|
movq mm2, [rsi+rax*2]
|
|
movq mm5, [rsi+rax*2+8]
|
|
|
|
lea rsi, [rsi+rax*2]
|
|
add rsi, rax
|
|
|
|
movq [rdi], mm0
|
|
movq [rdi+8], mm3
|
|
|
|
movq [rdi+rcx], mm1
|
|
movq [rdi+rcx+8], mm4
|
|
|
|
movq [rdi+rcx*2], mm2
|
|
movq [rdi+rcx*2+8], mm5
|
|
|
|
lea rdi, [rdi+rcx*2]
|
|
add rdi, rcx
|
|
|
|
movq mm0, [rsi]
|
|
movq mm3, [rsi+8];
|
|
|
|
movq mm1, [rsi+rax]
|
|
movq mm4, [rsi+rax+8]
|
|
|
|
movq mm2, [rsi+rax*2]
|
|
movq mm5, [rsi+rax*2+8]
|
|
|
|
lea rsi, [rsi+rax*2]
|
|
add rsi, rax
|
|
|
|
movq [rdi], mm0
|
|
movq [rdi+8], mm3
|
|
|
|
movq [rdi+rcx], mm1
|
|
movq [rdi+rcx+8], mm4
|
|
|
|
movq [rdi+rcx*2], mm2
|
|
movq [rdi+rcx*2+8], mm5
|
|
|
|
lea rdi, [rdi+rcx*2]
|
|
add rdi, rcx
|
|
|
|
movq mm0, [rsi]
|
|
movq mm3, [rsi+8];
|
|
|
|
movq [rdi], mm0
|
|
movq [rdi+8], mm3
|
|
|
|
; begin epilog
|
|
pop rdi
|
|
pop rsi
|
|
UNSHADOW_ARGS
|
|
pop rbp
|
|
ret
|