gecko/media/libvpx/vp8/common/x86/recon_mmx.asm
Jan Gerber 1f162027b6 Bug 763495 - Update libvpx source to 1.2.0. r=cpearce,glandium
This updates our in-tree copy of libvpx to match the 1.2.0 git
tag. All but one of the patches we were carrying are in this
upstream version. Our update.sh script should copy the new
files needed but will not remove the old ones for you.

Runtime cpu detection was rewritten upstream. We now generate
per-platform headers for this and include the correct one from
a vpx_rtcd.h wrapper like we were already doing for vpx_config.h

This revision includes improved assembly optimizations and should
be faster on all platforms.

Includes work by Jan Gerber and Ralph Giles.

--HG--
rename : media/libvpx/vp8/common/arm/neon/save_neon_reg.asm => media/libvpx/vp8/common/arm/neon/save_reg_neon.asm
rename : media/libvpx/vp8/common/arm/armv6/vp8_mse16x16_armv6.asm => media/libvpx/vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm
rename : media/libvpx/vp8/common/arm/neon/vp8_mse16x16_neon.asm => media/libvpx/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm
rename : media/libvpx/vp8/encoder/x86/x86_csystemdependent.c => media/libvpx/vp8/encoder/x86/vp8_enc_stubs_mmx.c
rename : media/libvpx/vpx_config_arm-linux-gcc.c => media/libvpx/vpx_config_armv7-android-gcc.c
rename : media/libvpx/vpx_config_arm-linux-gcc.h => media/libvpx/vpx_config_armv7-android-gcc.h
2013-11-29 06:02:00 -08:00

275 lines
7.1 KiB
NASM

;
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
%include "vpx_ports/x86_abi_support.asm"
;void copy_mem8x8_mmx(
; unsigned char *src,
; int src_stride,
; unsigned char *dst,
; int dst_stride
; )
global sym(vp8_copy_mem8x8_mmx) PRIVATE
sym(vp8_copy_mem8x8_mmx):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 4
push rsi
push rdi
; end prolog
mov rsi, arg(0) ;src;
movq mm0, [rsi]
movsxd rax, dword ptr arg(1) ;src_stride;
mov rdi, arg(2) ;dst;
movq mm1, [rsi+rax]
movq mm2, [rsi+rax*2]
movsxd rcx, dword ptr arg(3) ;dst_stride
lea rsi, [rsi+rax*2]
movq [rdi], mm0
add rsi, rax
movq [rdi+rcx], mm1
movq [rdi+rcx*2], mm2
lea rdi, [rdi+rcx*2]
movq mm3, [rsi]
add rdi, rcx
movq mm4, [rsi+rax]
movq mm5, [rsi+rax*2]
movq [rdi], mm3
lea rsi, [rsi+rax*2]
movq [rdi+rcx], mm4
movq [rdi+rcx*2], mm5
lea rdi, [rdi+rcx*2]
movq mm0, [rsi+rax]
movq mm1, [rsi+rax*2]
movq [rdi+rcx], mm0
movq [rdi+rcx*2],mm1
; begin epilog
pop rdi
pop rsi
UNSHADOW_ARGS
pop rbp
ret
;void copy_mem8x4_mmx(
; unsigned char *src,
; int src_stride,
; unsigned char *dst,
; int dst_stride
; )
global sym(vp8_copy_mem8x4_mmx) PRIVATE
sym(vp8_copy_mem8x4_mmx):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 4
push rsi
push rdi
; end prolog
mov rsi, arg(0) ;src;
movq mm0, [rsi]
movsxd rax, dword ptr arg(1) ;src_stride;
mov rdi, arg(2) ;dst;
movq mm1, [rsi+rax]
movq mm2, [rsi+rax*2]
movsxd rcx, dword ptr arg(3) ;dst_stride
lea rsi, [rsi+rax*2]
movq [rdi], mm0
movq [rdi+rcx], mm1
movq [rdi+rcx*2], mm2
lea rdi, [rdi+rcx*2]
movq mm3, [rsi+rax]
movq [rdi+rcx], mm3
; begin epilog
pop rdi
pop rsi
UNSHADOW_ARGS
pop rbp
ret
;void copy_mem16x16_mmx(
; unsigned char *src,
; int src_stride,
; unsigned char *dst,
; int dst_stride
; )
global sym(vp8_copy_mem16x16_mmx) PRIVATE
sym(vp8_copy_mem16x16_mmx):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 4
push rsi
push rdi
; end prolog
mov rsi, arg(0) ;src;
movsxd rax, dword ptr arg(1) ;src_stride;
mov rdi, arg(2) ;dst;
movsxd rcx, dword ptr arg(3) ;dst_stride
movq mm0, [rsi]
movq mm3, [rsi+8];
movq mm1, [rsi+rax]
movq mm4, [rsi+rax+8]
movq mm2, [rsi+rax*2]
movq mm5, [rsi+rax*2+8]
lea rsi, [rsi+rax*2]
add rsi, rax
movq [rdi], mm0
movq [rdi+8], mm3
movq [rdi+rcx], mm1
movq [rdi+rcx+8], mm4
movq [rdi+rcx*2], mm2
movq [rdi+rcx*2+8], mm5
lea rdi, [rdi+rcx*2]
add rdi, rcx
movq mm0, [rsi]
movq mm3, [rsi+8];
movq mm1, [rsi+rax]
movq mm4, [rsi+rax+8]
movq mm2, [rsi+rax*2]
movq mm5, [rsi+rax*2+8]
lea rsi, [rsi+rax*2]
add rsi, rax
movq [rdi], mm0
movq [rdi+8], mm3
movq [rdi+rcx], mm1
movq [rdi+rcx+8], mm4
movq [rdi+rcx*2], mm2
movq [rdi+rcx*2+8], mm5
lea rdi, [rdi+rcx*2]
add rdi, rcx
movq mm0, [rsi]
movq mm3, [rsi+8];
movq mm1, [rsi+rax]
movq mm4, [rsi+rax+8]
movq mm2, [rsi+rax*2]
movq mm5, [rsi+rax*2+8]
lea rsi, [rsi+rax*2]
add rsi, rax
movq [rdi], mm0
movq [rdi+8], mm3
movq [rdi+rcx], mm1
movq [rdi+rcx+8], mm4
movq [rdi+rcx*2], mm2
movq [rdi+rcx*2+8], mm5
lea rdi, [rdi+rcx*2]
add rdi, rcx
movq mm0, [rsi]
movq mm3, [rsi+8];
movq mm1, [rsi+rax]
movq mm4, [rsi+rax+8]
movq mm2, [rsi+rax*2]
movq mm5, [rsi+rax*2+8]
lea rsi, [rsi+rax*2]
add rsi, rax
movq [rdi], mm0
movq [rdi+8], mm3
movq [rdi+rcx], mm1
movq [rdi+rcx+8], mm4
movq [rdi+rcx*2], mm2
movq [rdi+rcx*2+8], mm5
lea rdi, [rdi+rcx*2]
add rdi, rcx
movq mm0, [rsi]
movq mm3, [rsi+8];
movq mm1, [rsi+rax]
movq mm4, [rsi+rax+8]
movq mm2, [rsi+rax*2]
movq mm5, [rsi+rax*2+8]
lea rsi, [rsi+rax*2]
add rsi, rax
movq [rdi], mm0
movq [rdi+8], mm3
movq [rdi+rcx], mm1
movq [rdi+rcx+8], mm4
movq [rdi+rcx*2], mm2
movq [rdi+rcx*2+8], mm5
lea rdi, [rdi+rcx*2]
add rdi, rcx
movq mm0, [rsi]
movq mm3, [rsi+8];
movq [rdi], mm0
movq [rdi+8], mm3
; begin epilog
pop rdi
pop rsi
UNSHADOW_ARGS
pop rbp
ret