From b045b2738e913015498207df5a10caa211aab0d3 Mon Sep 17 00:00:00 2001 From: Jan Gerber Date: Thu, 16 Jul 2015 02:38:00 -0700 Subject: [PATCH] Bug 1073319 - Enable AVX2 for libvpx on linux (libvpx). r=rillian --- media/libvpx/vp9_rtcd_x86-linux-gcc.h | 37 ++++++-- media/libvpx/vp9_rtcd_x86_64-linux-gcc.h | 34 ++++++- media/libvpx/vpx_config_x86-linux-gcc.h | 2 +- media/libvpx/vpx_config_x86_64-linux-gcc.h | 2 +- media/libvpx/vpx_dsp_rtcd_x86-linux-gcc.h | 74 ++++++++++++--- media/libvpx/vpx_dsp_rtcd_x86_64-linux-gcc.h | 95 ++++++++++++++++---- 6 files changed, 202 insertions(+), 42 deletions(-) diff --git a/media/libvpx/vp9_rtcd_x86-linux-gcc.h b/media/libvpx/vp9_rtcd_x86-linux-gcc.h index 8b7af70d737..8c4aae55bbf 100644 --- a/media/libvpx/vp9_rtcd_x86-linux-gcc.h +++ b/media/libvpx/vp9_rtcd_x86-linux-gcc.h @@ -38,7 +38,8 @@ unsigned int vp9_avg_8x8_sse2(const uint8_t *, int p); RTCD_EXTERN unsigned int (*vp9_avg_8x8)(const uint8_t *, int p); int64_t vp9_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); -#define vp9_block_error vp9_block_error_c +int64_t vp9_block_error_avx2(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); +RTCD_EXTERN int64_t (*vp9_block_error)(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); int64_t vp9_block_error_fp_c(const int16_t *coeff, const int16_t *dqcoeff, int block_size); int64_t vp9_block_error_fp_sse2(const int16_t *coeff, const int16_t *dqcoeff, int block_size); @@ -47,6 +48,7 @@ RTCD_EXTERN int64_t (*vp9_block_error_fp)(const int16_t *coeff, const int16_t *d void vp9_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void vp9_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); RTCD_EXTERN void (*vp9_convolve8)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); @@ -67,11 +69,13 @@ RTCD_EXTERN void (*vp9_convolve8_avg_vert)(const uint8_t *src, ptrdiff_t src_str void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve8_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve8_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void vp9_convolve8_horiz_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); RTCD_EXTERN void (*vp9_convolve8_horiz)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve8_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void vp9_convolve8_vert_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); RTCD_EXTERN void (*vp9_convolve8_vert)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); @@ -213,6 +217,7 @@ RTCD_EXTERN void (*vp9_fdct16x16_1)(const int16_t *input, tran_low_t *output, in void vp9_fdct32x32_c(const int16_t *input, tran_low_t *output, int stride); void vp9_fdct32x32_sse2(const int16_t *input, tran_low_t *output, int stride); +void vp9_fdct32x32_avx2(const int16_t *input, tran_low_t *output, int stride); RTCD_EXTERN void (*vp9_fdct32x32)(const int16_t *input, tran_low_t *output, int stride); void vp9_fdct32x32_1_c(const int16_t *input, tran_low_t *output, int stride); @@ -221,6 +226,7 @@ RTCD_EXTERN void (*vp9_fdct32x32_1)(const int16_t *input, tran_low_t *output, in void vp9_fdct32x32_rd_c(const int16_t *input, tran_low_t *output, int stride); void vp9_fdct32x32_rd_sse2(const int16_t *input, tran_low_t *output, int stride); +void vp9_fdct32x32_rd_avx2(const int16_t *input, tran_low_t *output, int stride); RTCD_EXTERN void (*vp9_fdct32x32_rd)(const int16_t *input, tran_low_t *output, int stride); void vp9_fdct4x4_c(const int16_t *input, tran_low_t *output, int stride); @@ -359,6 +365,7 @@ void vp9_iwht4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride void vp9_lpf_horizontal_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count); void vp9_lpf_horizontal_16_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count); +void vp9_lpf_horizontal_16_avx2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count); RTCD_EXTERN void (*vp9_lpf_horizontal_16)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count); void vp9_lpf_horizontal_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count); @@ -436,7 +443,8 @@ unsigned int vp9_sub_pixel_avg_variance32x16_c(const uint8_t *src_ptr, int sourc #define vp9_sub_pixel_avg_variance32x16 vp9_sub_pixel_avg_variance32x16_c unsigned int vp9_sub_pixel_avg_variance32x32_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); -#define vp9_sub_pixel_avg_variance32x32 vp9_sub_pixel_avg_variance32x32_c +unsigned int vp9_sub_pixel_avg_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vp9_sub_pixel_avg_variance32x32)(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); unsigned int vp9_sub_pixel_avg_variance32x64_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); #define vp9_sub_pixel_avg_variance32x64 vp9_sub_pixel_avg_variance32x64_c @@ -451,7 +459,8 @@ unsigned int vp9_sub_pixel_avg_variance64x32_c(const uint8_t *src_ptr, int sourc #define vp9_sub_pixel_avg_variance64x32 vp9_sub_pixel_avg_variance64x32_c unsigned int vp9_sub_pixel_avg_variance64x64_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); -#define vp9_sub_pixel_avg_variance64x64 vp9_sub_pixel_avg_variance64x64_c +unsigned int vp9_sub_pixel_avg_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vp9_sub_pixel_avg_variance64x64)(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); unsigned int vp9_sub_pixel_avg_variance8x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); #define vp9_sub_pixel_avg_variance8x16 vp9_sub_pixel_avg_variance8x16_c @@ -475,7 +484,8 @@ unsigned int vp9_sub_pixel_variance32x16_c(const uint8_t *src_ptr, int source_st #define vp9_sub_pixel_variance32x16 vp9_sub_pixel_variance32x16_c unsigned int vp9_sub_pixel_variance32x32_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_sub_pixel_variance32x32 vp9_sub_pixel_variance32x32_c +unsigned int vp9_sub_pixel_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vp9_sub_pixel_variance32x32)(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vp9_sub_pixel_variance32x64_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); #define vp9_sub_pixel_variance32x64 vp9_sub_pixel_variance32x64_c @@ -490,7 +500,8 @@ unsigned int vp9_sub_pixel_variance64x32_c(const uint8_t *src_ptr, int source_st #define vp9_sub_pixel_variance64x32 vp9_sub_pixel_variance64x32_c unsigned int vp9_sub_pixel_variance64x64_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_sub_pixel_variance64x64 vp9_sub_pixel_variance64x64_c +unsigned int vp9_sub_pixel_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vp9_sub_pixel_variance64x64)(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vp9_sub_pixel_variance8x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); #define vp9_sub_pixel_variance8x16 vp9_sub_pixel_variance8x16_c @@ -550,11 +561,14 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE2) vp9_avg_4x4 = vp9_avg_4x4_sse2; vp9_avg_8x8 = vp9_avg_8x8_c; if (flags & HAS_SSE2) vp9_avg_8x8 = vp9_avg_8x8_sse2; + vp9_block_error = vp9_block_error_c; + if (flags & HAS_AVX2) vp9_block_error = vp9_block_error_avx2; vp9_block_error_fp = vp9_block_error_fp_c; if (flags & HAS_SSE2) vp9_block_error_fp = vp9_block_error_fp_sse2; vp9_convolve8 = vp9_convolve8_c; if (flags & HAS_SSE2) vp9_convolve8 = vp9_convolve8_sse2; if (flags & HAS_SSSE3) vp9_convolve8 = vp9_convolve8_ssse3; + if (flags & HAS_AVX2) vp9_convolve8 = vp9_convolve8_avx2; vp9_convolve8_avg = vp9_convolve8_avg_c; if (flags & HAS_SSE2) vp9_convolve8_avg = vp9_convolve8_avg_sse2; if (flags & HAS_SSSE3) vp9_convolve8_avg = vp9_convolve8_avg_ssse3; @@ -567,19 +581,23 @@ static void setup_rtcd_internal(void) vp9_convolve8_horiz = vp9_convolve8_horiz_c; if (flags & HAS_SSE2) vp9_convolve8_horiz = vp9_convolve8_horiz_sse2; if (flags & HAS_SSSE3) vp9_convolve8_horiz = vp9_convolve8_horiz_ssse3; + if (flags & HAS_AVX2) vp9_convolve8_horiz = vp9_convolve8_horiz_avx2; vp9_convolve8_vert = vp9_convolve8_vert_c; if (flags & HAS_SSE2) vp9_convolve8_vert = vp9_convolve8_vert_sse2; if (flags & HAS_SSSE3) vp9_convolve8_vert = vp9_convolve8_vert_ssse3; + if (flags & HAS_AVX2) vp9_convolve8_vert = vp9_convolve8_vert_avx2; vp9_fdct16x16 = vp9_fdct16x16_c; if (flags & HAS_SSE2) vp9_fdct16x16 = vp9_fdct16x16_sse2; vp9_fdct16x16_1 = vp9_fdct16x16_1_c; if (flags & HAS_SSE2) vp9_fdct16x16_1 = vp9_fdct16x16_1_sse2; vp9_fdct32x32 = vp9_fdct32x32_c; if (flags & HAS_SSE2) vp9_fdct32x32 = vp9_fdct32x32_sse2; + if (flags & HAS_AVX2) vp9_fdct32x32 = vp9_fdct32x32_avx2; vp9_fdct32x32_1 = vp9_fdct32x32_1_c; if (flags & HAS_SSE2) vp9_fdct32x32_1 = vp9_fdct32x32_1_sse2; vp9_fdct32x32_rd = vp9_fdct32x32_rd_c; if (flags & HAS_SSE2) vp9_fdct32x32_rd = vp9_fdct32x32_rd_sse2; + if (flags & HAS_AVX2) vp9_fdct32x32_rd = vp9_fdct32x32_rd_avx2; vp9_fdct4x4 = vp9_fdct4x4_c; if (flags & HAS_SSE2) vp9_fdct4x4 = vp9_fdct4x4_sse2; vp9_fdct4x4_1 = vp9_fdct4x4_1_c; @@ -638,6 +656,7 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE2) vp9_int_pro_row = vp9_int_pro_row_sse2; vp9_lpf_horizontal_16 = vp9_lpf_horizontal_16_c; if (flags & HAS_SSE2) vp9_lpf_horizontal_16 = vp9_lpf_horizontal_16_sse2; + if (flags & HAS_AVX2) vp9_lpf_horizontal_16 = vp9_lpf_horizontal_16_avx2; vp9_lpf_horizontal_4 = vp9_lpf_horizontal_4_c; if (flags & HAS_MMX) vp9_lpf_horizontal_4 = vp9_lpf_horizontal_4_mmx; vp9_lpf_horizontal_4_dual = vp9_lpf_horizontal_4_dual_c; @@ -666,6 +685,14 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE2) vp9_quantize_fp = vp9_quantize_fp_sse2; vp9_satd = vp9_satd_c; if (flags & HAS_SSE2) vp9_satd = vp9_satd_sse2; + vp9_sub_pixel_avg_variance32x32 = vp9_sub_pixel_avg_variance32x32_c; + if (flags & HAS_AVX2) vp9_sub_pixel_avg_variance32x32 = vp9_sub_pixel_avg_variance32x32_avx2; + vp9_sub_pixel_avg_variance64x64 = vp9_sub_pixel_avg_variance64x64_c; + if (flags & HAS_AVX2) vp9_sub_pixel_avg_variance64x64 = vp9_sub_pixel_avg_variance64x64_avx2; + vp9_sub_pixel_variance32x32 = vp9_sub_pixel_variance32x32_c; + if (flags & HAS_AVX2) vp9_sub_pixel_variance32x32 = vp9_sub_pixel_variance32x32_avx2; + vp9_sub_pixel_variance64x64 = vp9_sub_pixel_variance64x64_c; + if (flags & HAS_AVX2) vp9_sub_pixel_variance64x64 = vp9_sub_pixel_variance64x64_avx2; vp9_temporal_filter_apply = vp9_temporal_filter_apply_c; if (flags & HAS_SSE2) vp9_temporal_filter_apply = vp9_temporal_filter_apply_sse2; vp9_vector_var = vp9_vector_var_c; diff --git a/media/libvpx/vp9_rtcd_x86_64-linux-gcc.h b/media/libvpx/vp9_rtcd_x86_64-linux-gcc.h index 34e7f3160e6..8d75771cf86 100644 --- a/media/libvpx/vp9_rtcd_x86_64-linux-gcc.h +++ b/media/libvpx/vp9_rtcd_x86_64-linux-gcc.h @@ -39,7 +39,8 @@ unsigned int vp9_avg_8x8_sse2(const uint8_t *, int p); int64_t vp9_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); int64_t vp9_block_error_sse2(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); -#define vp9_block_error vp9_block_error_sse2 +int64_t vp9_block_error_avx2(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); +RTCD_EXTERN int64_t (*vp9_block_error)(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); int64_t vp9_block_error_fp_c(const int16_t *coeff, const int16_t *dqcoeff, int block_size); int64_t vp9_block_error_fp_sse2(const int16_t *coeff, const int16_t *dqcoeff, int block_size); @@ -48,6 +49,7 @@ int64_t vp9_block_error_fp_sse2(const int16_t *coeff, const int16_t *dqcoeff, in void vp9_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void vp9_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); RTCD_EXTERN void (*vp9_convolve8)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); @@ -68,11 +70,13 @@ RTCD_EXTERN void (*vp9_convolve8_avg_vert)(const uint8_t *src, ptrdiff_t src_str void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve8_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve8_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void vp9_convolve8_horiz_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); RTCD_EXTERN void (*vp9_convolve8_horiz)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve8_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void vp9_convolve8_vert_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); RTCD_EXTERN void (*vp9_convolve8_vert)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); @@ -247,7 +251,8 @@ void vp9_fdct16x16_1_sse2(const int16_t *input, tran_low_t *output, int stride); void vp9_fdct32x32_c(const int16_t *input, tran_low_t *output, int stride); void vp9_fdct32x32_sse2(const int16_t *input, tran_low_t *output, int stride); -#define vp9_fdct32x32 vp9_fdct32x32_sse2 +void vp9_fdct32x32_avx2(const int16_t *input, tran_low_t *output, int stride); +RTCD_EXTERN void (*vp9_fdct32x32)(const int16_t *input, tran_low_t *output, int stride); void vp9_fdct32x32_1_c(const int16_t *input, tran_low_t *output, int stride); void vp9_fdct32x32_1_sse2(const int16_t *input, tran_low_t *output, int stride); @@ -255,7 +260,8 @@ void vp9_fdct32x32_1_sse2(const int16_t *input, tran_low_t *output, int stride); void vp9_fdct32x32_rd_c(const int16_t *input, tran_low_t *output, int stride); void vp9_fdct32x32_rd_sse2(const int16_t *input, tran_low_t *output, int stride); -#define vp9_fdct32x32_rd vp9_fdct32x32_rd_sse2 +void vp9_fdct32x32_rd_avx2(const int16_t *input, tran_low_t *output, int stride); +RTCD_EXTERN void (*vp9_fdct32x32_rd)(const int16_t *input, tran_low_t *output, int stride); void vp9_fdct4x4_c(const int16_t *input, tran_low_t *output, int stride); void vp9_fdct4x4_sse2(const int16_t *input, tran_low_t *output, int stride); @@ -402,7 +408,8 @@ void vp9_iwht4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride void vp9_lpf_horizontal_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count); void vp9_lpf_horizontal_16_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count); -#define vp9_lpf_horizontal_16 vp9_lpf_horizontal_16_sse2 +void vp9_lpf_horizontal_16_avx2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count); +RTCD_EXTERN void (*vp9_lpf_horizontal_16)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count); void vp9_lpf_horizontal_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count); void vp9_lpf_horizontal_4_mmx(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count); @@ -493,6 +500,7 @@ RTCD_EXTERN unsigned int (*vp9_sub_pixel_avg_variance32x16)(const uint8_t *src_p unsigned int vp9_sub_pixel_avg_variance32x32_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); unsigned int vp9_sub_pixel_avg_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); unsigned int vp9_sub_pixel_avg_variance32x32_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); +unsigned int vp9_sub_pixel_avg_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); RTCD_EXTERN unsigned int (*vp9_sub_pixel_avg_variance32x32)(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); unsigned int vp9_sub_pixel_avg_variance32x64_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); @@ -518,6 +526,7 @@ RTCD_EXTERN unsigned int (*vp9_sub_pixel_avg_variance64x32)(const uint8_t *src_p unsigned int vp9_sub_pixel_avg_variance64x64_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); unsigned int vp9_sub_pixel_avg_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); unsigned int vp9_sub_pixel_avg_variance64x64_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); +unsigned int vp9_sub_pixel_avg_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); RTCD_EXTERN unsigned int (*vp9_sub_pixel_avg_variance64x64)(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); unsigned int vp9_sub_pixel_avg_variance8x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); @@ -558,6 +567,7 @@ RTCD_EXTERN unsigned int (*vp9_sub_pixel_variance32x16)(const uint8_t *src_ptr, unsigned int vp9_sub_pixel_variance32x32_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vp9_sub_pixel_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vp9_sub_pixel_variance32x32_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vp9_sub_pixel_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); RTCD_EXTERN unsigned int (*vp9_sub_pixel_variance32x32)(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vp9_sub_pixel_variance32x64_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); @@ -583,6 +593,7 @@ RTCD_EXTERN unsigned int (*vp9_sub_pixel_variance64x32)(const uint8_t *src_ptr, unsigned int vp9_sub_pixel_variance64x64_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vp9_sub_pixel_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vp9_sub_pixel_variance64x64_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vp9_sub_pixel_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); RTCD_EXTERN unsigned int (*vp9_sub_pixel_variance64x64)(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vp9_sub_pixel_variance8x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); @@ -654,8 +665,11 @@ static void setup_rtcd_internal(void) (void)flags; + vp9_block_error = vp9_block_error_sse2; + if (flags & HAS_AVX2) vp9_block_error = vp9_block_error_avx2; vp9_convolve8 = vp9_convolve8_sse2; if (flags & HAS_SSSE3) vp9_convolve8 = vp9_convolve8_ssse3; + if (flags & HAS_AVX2) vp9_convolve8 = vp9_convolve8_avx2; vp9_convolve8_avg = vp9_convolve8_avg_sse2; if (flags & HAS_SSSE3) vp9_convolve8_avg = vp9_convolve8_avg_ssse3; vp9_convolve8_avg_horiz = vp9_convolve8_avg_horiz_sse2; @@ -664,8 +678,10 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSSE3) vp9_convolve8_avg_vert = vp9_convolve8_avg_vert_ssse3; vp9_convolve8_horiz = vp9_convolve8_horiz_sse2; if (flags & HAS_SSSE3) vp9_convolve8_horiz = vp9_convolve8_horiz_ssse3; + if (flags & HAS_AVX2) vp9_convolve8_horiz = vp9_convolve8_horiz_avx2; vp9_convolve8_vert = vp9_convolve8_vert_sse2; if (flags & HAS_SSSE3) vp9_convolve8_vert = vp9_convolve8_vert_ssse3; + if (flags & HAS_AVX2) vp9_convolve8_vert = vp9_convolve8_vert_avx2; vp9_d153_predictor_16x16 = vp9_d153_predictor_16x16_c; if (flags & HAS_SSSE3) vp9_d153_predictor_16x16 = vp9_d153_predictor_16x16_ssse3; vp9_d153_predictor_4x4 = vp9_d153_predictor_4x4_c; @@ -696,6 +712,10 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSSE3) vp9_d63_predictor_4x4 = vp9_d63_predictor_4x4_ssse3; vp9_d63_predictor_8x8 = vp9_d63_predictor_8x8_c; if (flags & HAS_SSSE3) vp9_d63_predictor_8x8 = vp9_d63_predictor_8x8_ssse3; + vp9_fdct32x32 = vp9_fdct32x32_sse2; + if (flags & HAS_AVX2) vp9_fdct32x32 = vp9_fdct32x32_avx2; + vp9_fdct32x32_rd = vp9_fdct32x32_rd_sse2; + if (flags & HAS_AVX2) vp9_fdct32x32_rd = vp9_fdct32x32_rd_avx2; vp9_fdct8x8 = vp9_fdct8x8_sse2; if (flags & HAS_SSSE3) vp9_fdct8x8 = vp9_fdct8x8_ssse3; vp9_fdct8x8_quant = vp9_fdct8x8_quant_sse2; @@ -717,6 +737,8 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSSE3) vp9_idct8x8_12_add = vp9_idct8x8_12_add_ssse3; vp9_idct8x8_64_add = vp9_idct8x8_64_add_sse2; if (flags & HAS_SSSE3) vp9_idct8x8_64_add = vp9_idct8x8_64_add_ssse3; + vp9_lpf_horizontal_16 = vp9_lpf_horizontal_16_sse2; + if (flags & HAS_AVX2) vp9_lpf_horizontal_16 = vp9_lpf_horizontal_16_avx2; vp9_quantize_b = vp9_quantize_b_sse2; if (flags & HAS_SSSE3) vp9_quantize_b = vp9_quantize_b_ssse3; vp9_quantize_b_32x32 = vp9_quantize_b_32x32_c; @@ -735,6 +757,7 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance32x16 = vp9_sub_pixel_avg_variance32x16_ssse3; vp9_sub_pixel_avg_variance32x32 = vp9_sub_pixel_avg_variance32x32_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance32x32 = vp9_sub_pixel_avg_variance32x32_ssse3; + if (flags & HAS_AVX2) vp9_sub_pixel_avg_variance32x32 = vp9_sub_pixel_avg_variance32x32_avx2; vp9_sub_pixel_avg_variance32x64 = vp9_sub_pixel_avg_variance32x64_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance32x64 = vp9_sub_pixel_avg_variance32x64_ssse3; vp9_sub_pixel_avg_variance4x4 = vp9_sub_pixel_avg_variance4x4_sse; @@ -745,6 +768,7 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance64x32 = vp9_sub_pixel_avg_variance64x32_ssse3; vp9_sub_pixel_avg_variance64x64 = vp9_sub_pixel_avg_variance64x64_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance64x64 = vp9_sub_pixel_avg_variance64x64_ssse3; + if (flags & HAS_AVX2) vp9_sub_pixel_avg_variance64x64 = vp9_sub_pixel_avg_variance64x64_avx2; vp9_sub_pixel_avg_variance8x16 = vp9_sub_pixel_avg_variance8x16_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance8x16 = vp9_sub_pixel_avg_variance8x16_ssse3; vp9_sub_pixel_avg_variance8x4 = vp9_sub_pixel_avg_variance8x4_sse2; @@ -761,6 +785,7 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSSE3) vp9_sub_pixel_variance32x16 = vp9_sub_pixel_variance32x16_ssse3; vp9_sub_pixel_variance32x32 = vp9_sub_pixel_variance32x32_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance32x32 = vp9_sub_pixel_variance32x32_ssse3; + if (flags & HAS_AVX2) vp9_sub_pixel_variance32x32 = vp9_sub_pixel_variance32x32_avx2; vp9_sub_pixel_variance32x64 = vp9_sub_pixel_variance32x64_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance32x64 = vp9_sub_pixel_variance32x64_ssse3; vp9_sub_pixel_variance4x4 = vp9_sub_pixel_variance4x4_sse; @@ -771,6 +796,7 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSSE3) vp9_sub_pixel_variance64x32 = vp9_sub_pixel_variance64x32_ssse3; vp9_sub_pixel_variance64x64 = vp9_sub_pixel_variance64x64_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance64x64 = vp9_sub_pixel_variance64x64_ssse3; + if (flags & HAS_AVX2) vp9_sub_pixel_variance64x64 = vp9_sub_pixel_variance64x64_avx2; vp9_sub_pixel_variance8x16 = vp9_sub_pixel_variance8x16_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance8x16 = vp9_sub_pixel_variance8x16_ssse3; vp9_sub_pixel_variance8x4 = vp9_sub_pixel_variance8x4_sse2; diff --git a/media/libvpx/vpx_config_x86-linux-gcc.h b/media/libvpx/vpx_config_x86-linux-gcc.h index edcf0ee9fc6..ff6b0186b80 100644 --- a/media/libvpx/vpx_config_x86-linux-gcc.h +++ b/media/libvpx/vpx_config_x86-linux-gcc.h @@ -29,7 +29,7 @@ #define HAVE_SSSE3 1 #define HAVE_SSE4_1 1 #define HAVE_AVX 1 -#define HAVE_AVX2 0 +#define HAVE_AVX2 1 #define HAVE_VPX_PORTS 1 #define HAVE_STDINT_H 1 #define HAVE_PTHREAD_H 1 diff --git a/media/libvpx/vpx_config_x86_64-linux-gcc.h b/media/libvpx/vpx_config_x86_64-linux-gcc.h index dcd1ad82a9f..d04556d4585 100644 --- a/media/libvpx/vpx_config_x86_64-linux-gcc.h +++ b/media/libvpx/vpx_config_x86_64-linux-gcc.h @@ -29,7 +29,7 @@ #define HAVE_SSSE3 1 #define HAVE_SSE4_1 1 #define HAVE_AVX 1 -#define HAVE_AVX2 0 +#define HAVE_AVX2 1 #define HAVE_VPX_PORTS 1 #define HAVE_STDINT_H 1 #define HAVE_PTHREAD_H 1 diff --git a/media/libvpx/vpx_dsp_rtcd_x86-linux-gcc.h b/media/libvpx/vpx_dsp_rtcd_x86-linux-gcc.h index 5cb89257744..d69c28c1a48 100644 --- a/media/libvpx/vpx_dsp_rtcd_x86-linux-gcc.h +++ b/media/libvpx/vpx_dsp_rtcd_x86-linux-gcc.h @@ -23,6 +23,7 @@ void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width, int void vpx_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); void vpx_get16x16var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +void vpx_get16x16var_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); RTCD_EXTERN void (*vpx_get16x16var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); unsigned int vpx_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride); @@ -41,6 +42,7 @@ RTCD_EXTERN unsigned int (*vpx_get_mb_ss)(const int16_t *); unsigned int vpx_mse16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); unsigned int vpx_mse16x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); unsigned int vpx_mse16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +unsigned int vpx_mse16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); RTCD_EXTERN unsigned int (*vpx_mse16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); unsigned int vpx_mse16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); @@ -103,34 +105,41 @@ void vpx_sad16x8x8_sse4_1(const uint8_t *src_ptr, int src_stride, const uint8_t RTCD_EXTERN void (*vpx_sad16x8x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); unsigned int vpx_sad32x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); -#define vpx_sad32x16 vpx_sad32x16_c +unsigned int vpx_sad32x16_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad32x16)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); unsigned int vpx_sad32x16_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vpx_sad32x16_avg vpx_sad32x16_avg_c +unsigned int vpx_sad32x16_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad32x16_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); void vpx_sad32x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); #define vpx_sad32x16x4d vpx_sad32x16x4d_c unsigned int vpx_sad32x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); -#define vpx_sad32x32 vpx_sad32x32_c +unsigned int vpx_sad32x32_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad32x32)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); unsigned int vpx_sad32x32_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vpx_sad32x32_avg vpx_sad32x32_avg_c +unsigned int vpx_sad32x32_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad32x32_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); void vpx_sad32x32x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); #define vpx_sad32x32x3 vpx_sad32x32x3_c void vpx_sad32x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); -#define vpx_sad32x32x4d vpx_sad32x32x4d_c +void vpx_sad32x32x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad32x32x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); void vpx_sad32x32x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); #define vpx_sad32x32x8 vpx_sad32x32x8_c unsigned int vpx_sad32x64_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); -#define vpx_sad32x64 vpx_sad32x64_c +unsigned int vpx_sad32x64_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad32x64)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); unsigned int vpx_sad32x64_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vpx_sad32x64_avg vpx_sad32x64_avg_c +unsigned int vpx_sad32x64_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad32x64_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); void vpx_sad32x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); #define vpx_sad32x64x4d vpx_sad32x64x4d_c @@ -166,25 +175,30 @@ void vpx_sad4x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_p #define vpx_sad4x8x8 vpx_sad4x8x8_c unsigned int vpx_sad64x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); -#define vpx_sad64x32 vpx_sad64x32_c +unsigned int vpx_sad64x32_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad64x32)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); unsigned int vpx_sad64x32_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vpx_sad64x32_avg vpx_sad64x32_avg_c +unsigned int vpx_sad64x32_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad64x32_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); void vpx_sad64x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); #define vpx_sad64x32x4d vpx_sad64x32x4d_c unsigned int vpx_sad64x64_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); -#define vpx_sad64x64 vpx_sad64x64_c +unsigned int vpx_sad64x64_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad64x64)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); unsigned int vpx_sad64x64_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vpx_sad64x64_avg vpx_sad64x64_avg_c +unsigned int vpx_sad64x64_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad64x64_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); void vpx_sad64x64x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); #define vpx_sad64x64x3 vpx_sad64x64x3_c void vpx_sad64x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); -#define vpx_sad64x64x4d vpx_sad64x64x4d_c +void vpx_sad64x64x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad64x64x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); void vpx_sad64x64x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); #define vpx_sad64x64x8 vpx_sad64x64x8_c @@ -240,6 +254,7 @@ RTCD_EXTERN void (*vpx_sad8x8x8)(const uint8_t *src_ptr, int src_stride, const u unsigned int vpx_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance16x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); RTCD_EXTERN unsigned int (*vpx_variance16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); @@ -253,10 +268,12 @@ RTCD_EXTERN unsigned int (*vpx_variance16x8)(const uint8_t *src_ptr, int source_ unsigned int vpx_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance32x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance32x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); RTCD_EXTERN unsigned int (*vpx_variance32x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); RTCD_EXTERN unsigned int (*vpx_variance32x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); @@ -274,10 +291,12 @@ RTCD_EXTERN unsigned int (*vpx_variance4x8)(const uint8_t *src_ptr, int source_s unsigned int vpx_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance64x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance64x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); RTCD_EXTERN unsigned int (*vpx_variance64x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); RTCD_EXTERN unsigned int (*vpx_variance64x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); @@ -306,6 +325,7 @@ static void setup_rtcd_internal(void) vpx_get16x16var = vpx_get16x16var_c; if (flags & HAS_SSE2) vpx_get16x16var = vpx_get16x16var_sse2; + if (flags & HAS_AVX2) vpx_get16x16var = vpx_get16x16var_avx2; vpx_get8x8var = vpx_get8x8var_c; if (flags & HAS_MMX) vpx_get8x8var = vpx_get8x8var_mmx; if (flags & HAS_SSE2) vpx_get8x8var = vpx_get8x8var_sse2; @@ -315,6 +335,7 @@ static void setup_rtcd_internal(void) vpx_mse16x16 = vpx_mse16x16_c; if (flags & HAS_MMX) vpx_mse16x16 = vpx_mse16x16_mmx; if (flags & HAS_SSE2) vpx_mse16x16 = vpx_mse16x16_sse2; + if (flags & HAS_AVX2) vpx_mse16x16 = vpx_mse16x16_avx2; vpx_mse16x8 = vpx_mse16x8_c; if (flags & HAS_SSE2) vpx_mse16x8 = vpx_mse16x8_sse2; vpx_mse8x16 = vpx_mse8x16_c; @@ -335,12 +356,36 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSSE3) vpx_sad16x8x3 = vpx_sad16x8x3_ssse3; vpx_sad16x8x8 = vpx_sad16x8x8_c; if (flags & HAS_SSE4_1) vpx_sad16x8x8 = vpx_sad16x8x8_sse4_1; + vpx_sad32x16 = vpx_sad32x16_c; + if (flags & HAS_AVX2) vpx_sad32x16 = vpx_sad32x16_avx2; + vpx_sad32x16_avg = vpx_sad32x16_avg_c; + if (flags & HAS_AVX2) vpx_sad32x16_avg = vpx_sad32x16_avg_avx2; + vpx_sad32x32 = vpx_sad32x32_c; + if (flags & HAS_AVX2) vpx_sad32x32 = vpx_sad32x32_avx2; + vpx_sad32x32_avg = vpx_sad32x32_avg_c; + if (flags & HAS_AVX2) vpx_sad32x32_avg = vpx_sad32x32_avg_avx2; + vpx_sad32x32x4d = vpx_sad32x32x4d_c; + if (flags & HAS_AVX2) vpx_sad32x32x4d = vpx_sad32x32x4d_avx2; + vpx_sad32x64 = vpx_sad32x64_c; + if (flags & HAS_AVX2) vpx_sad32x64 = vpx_sad32x64_avx2; + vpx_sad32x64_avg = vpx_sad32x64_avg_c; + if (flags & HAS_AVX2) vpx_sad32x64_avg = vpx_sad32x64_avg_avx2; vpx_sad4x4 = vpx_sad4x4_c; if (flags & HAS_MMX) vpx_sad4x4 = vpx_sad4x4_mmx; vpx_sad4x4x3 = vpx_sad4x4x3_c; if (flags & HAS_SSE3) vpx_sad4x4x3 = vpx_sad4x4x3_sse3; vpx_sad4x4x8 = vpx_sad4x4x8_c; if (flags & HAS_SSE4_1) vpx_sad4x4x8 = vpx_sad4x4x8_sse4_1; + vpx_sad64x32 = vpx_sad64x32_c; + if (flags & HAS_AVX2) vpx_sad64x32 = vpx_sad64x32_avx2; + vpx_sad64x32_avg = vpx_sad64x32_avg_c; + if (flags & HAS_AVX2) vpx_sad64x32_avg = vpx_sad64x32_avg_avx2; + vpx_sad64x64 = vpx_sad64x64_c; + if (flags & HAS_AVX2) vpx_sad64x64 = vpx_sad64x64_avx2; + vpx_sad64x64_avg = vpx_sad64x64_avg_c; + if (flags & HAS_AVX2) vpx_sad64x64_avg = vpx_sad64x64_avg_avx2; + vpx_sad64x64x4d = vpx_sad64x64x4d_c; + if (flags & HAS_AVX2) vpx_sad64x64x4d = vpx_sad64x64x4d_avx2; vpx_sad8x16 = vpx_sad8x16_c; if (flags & HAS_MMX) vpx_sad8x16 = vpx_sad8x16_mmx; vpx_sad8x16x3 = vpx_sad8x16x3_c; @@ -356,6 +401,7 @@ static void setup_rtcd_internal(void) vpx_variance16x16 = vpx_variance16x16_c; if (flags & HAS_MMX) vpx_variance16x16 = vpx_variance16x16_mmx; if (flags & HAS_SSE2) vpx_variance16x16 = vpx_variance16x16_sse2; + if (flags & HAS_AVX2) vpx_variance16x16 = vpx_variance16x16_avx2; vpx_variance16x32 = vpx_variance16x32_c; if (flags & HAS_SSE2) vpx_variance16x32 = vpx_variance16x32_sse2; vpx_variance16x8 = vpx_variance16x8_c; @@ -363,8 +409,10 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE2) vpx_variance16x8 = vpx_variance16x8_sse2; vpx_variance32x16 = vpx_variance32x16_c; if (flags & HAS_SSE2) vpx_variance32x16 = vpx_variance32x16_sse2; + if (flags & HAS_AVX2) vpx_variance32x16 = vpx_variance32x16_avx2; vpx_variance32x32 = vpx_variance32x32_c; if (flags & HAS_SSE2) vpx_variance32x32 = vpx_variance32x32_sse2; + if (flags & HAS_AVX2) vpx_variance32x32 = vpx_variance32x32_avx2; vpx_variance32x64 = vpx_variance32x64_c; if (flags & HAS_SSE2) vpx_variance32x64 = vpx_variance32x64_sse2; vpx_variance4x4 = vpx_variance4x4_c; @@ -374,8 +422,10 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE2) vpx_variance4x8 = vpx_variance4x8_sse2; vpx_variance64x32 = vpx_variance64x32_c; if (flags & HAS_SSE2) vpx_variance64x32 = vpx_variance64x32_sse2; + if (flags & HAS_AVX2) vpx_variance64x32 = vpx_variance64x32_avx2; vpx_variance64x64 = vpx_variance64x64_c; if (flags & HAS_SSE2) vpx_variance64x64 = vpx_variance64x64_sse2; + if (flags & HAS_AVX2) vpx_variance64x64 = vpx_variance64x64_avx2; vpx_variance8x16 = vpx_variance8x16_c; if (flags & HAS_MMX) vpx_variance8x16 = vpx_variance8x16_mmx; if (flags & HAS_SSE2) vpx_variance8x16 = vpx_variance8x16_sse2; diff --git a/media/libvpx/vpx_dsp_rtcd_x86_64-linux-gcc.h b/media/libvpx/vpx_dsp_rtcd_x86_64-linux-gcc.h index 227fe0d691b..d93c56eb765 100644 --- a/media/libvpx/vpx_dsp_rtcd_x86_64-linux-gcc.h +++ b/media/libvpx/vpx_dsp_rtcd_x86_64-linux-gcc.h @@ -23,7 +23,8 @@ void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width, int void vpx_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); void vpx_get16x16var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); -#define vpx_get16x16var vpx_get16x16var_sse2 +void vpx_get16x16var_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +RTCD_EXTERN void (*vpx_get16x16var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); unsigned int vpx_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride); #define vpx_get4x4sse_cs vpx_get4x4sse_cs_c @@ -41,7 +42,8 @@ unsigned int vpx_get_mb_ss_sse2(const int16_t *); unsigned int vpx_mse16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); unsigned int vpx_mse16x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); unsigned int vpx_mse16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -#define vpx_mse16x16 vpx_mse16x16_sse2 +unsigned int vpx_mse16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_mse16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); unsigned int vpx_mse16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); unsigned int vpx_mse16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); @@ -113,11 +115,13 @@ RTCD_EXTERN void (*vpx_sad16x8x8)(const uint8_t *src_ptr, int src_stride, const unsigned int vpx_sad32x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); unsigned int vpx_sad32x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); -#define vpx_sad32x16 vpx_sad32x16_sse2 +unsigned int vpx_sad32x16_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad32x16)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); unsigned int vpx_sad32x16_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); unsigned int vpx_sad32x16_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vpx_sad32x16_avg vpx_sad32x16_avg_sse2 +unsigned int vpx_sad32x16_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad32x16_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); void vpx_sad32x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); void vpx_sad32x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); @@ -125,29 +129,34 @@ void vpx_sad32x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t unsigned int vpx_sad32x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); unsigned int vpx_sad32x32_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); -#define vpx_sad32x32 vpx_sad32x32_sse2 +unsigned int vpx_sad32x32_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad32x32)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); unsigned int vpx_sad32x32_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); unsigned int vpx_sad32x32_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vpx_sad32x32_avg vpx_sad32x32_avg_sse2 +unsigned int vpx_sad32x32_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad32x32_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); void vpx_sad32x32x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); #define vpx_sad32x32x3 vpx_sad32x32x3_c void vpx_sad32x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); void vpx_sad32x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); -#define vpx_sad32x32x4d vpx_sad32x32x4d_sse2 +void vpx_sad32x32x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad32x32x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); void vpx_sad32x32x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); #define vpx_sad32x32x8 vpx_sad32x32x8_c unsigned int vpx_sad32x64_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); unsigned int vpx_sad32x64_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); -#define vpx_sad32x64 vpx_sad32x64_sse2 +unsigned int vpx_sad32x64_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad32x64)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); unsigned int vpx_sad32x64_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); unsigned int vpx_sad32x64_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vpx_sad32x64_avg vpx_sad32x64_avg_sse2 +unsigned int vpx_sad32x64_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad32x64_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); void vpx_sad32x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); void vpx_sad32x64x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); @@ -191,11 +200,13 @@ void vpx_sad4x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_p unsigned int vpx_sad64x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); unsigned int vpx_sad64x32_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); -#define vpx_sad64x32 vpx_sad64x32_sse2 +unsigned int vpx_sad64x32_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad64x32)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); unsigned int vpx_sad64x32_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); unsigned int vpx_sad64x32_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vpx_sad64x32_avg vpx_sad64x32_avg_sse2 +unsigned int vpx_sad64x32_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad64x32_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); void vpx_sad64x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); void vpx_sad64x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); @@ -203,18 +214,21 @@ void vpx_sad64x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t unsigned int vpx_sad64x64_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); unsigned int vpx_sad64x64_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); -#define vpx_sad64x64 vpx_sad64x64_sse2 +unsigned int vpx_sad64x64_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad64x64)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); unsigned int vpx_sad64x64_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); unsigned int vpx_sad64x64_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vpx_sad64x64_avg vpx_sad64x64_avg_sse2 +unsigned int vpx_sad64x64_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad64x64_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); void vpx_sad64x64x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); #define vpx_sad64x64x3 vpx_sad64x64x3_c void vpx_sad64x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); void vpx_sad64x64x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); -#define vpx_sad64x64x4d vpx_sad64x64x4d_sse2 +void vpx_sad64x64x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad64x64x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); void vpx_sad64x64x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); #define vpx_sad64x64x8 vpx_sad64x64x8_c @@ -279,7 +293,8 @@ RTCD_EXTERN void (*vpx_sad8x8x8)(const uint8_t *src_ptr, int src_stride, const u unsigned int vpx_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance16x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vpx_variance16x16 vpx_variance16x16_sse2 +unsigned int vpx_variance16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance16x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); @@ -292,11 +307,13 @@ unsigned int vpx_variance16x8_sse2(const uint8_t *src_ptr, int source_stride, co unsigned int vpx_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance32x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vpx_variance32x16 vpx_variance32x16_sse2 +unsigned int vpx_variance32x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance32x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vpx_variance32x32 vpx_variance32x32_sse2 +unsigned int vpx_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance32x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance32x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); @@ -313,11 +330,13 @@ unsigned int vpx_variance4x8_sse2(const uint8_t *src_ptr, int source_stride, con unsigned int vpx_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance64x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vpx_variance64x32 vpx_variance64x32_sse2 +unsigned int vpx_variance64x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance64x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vpx_variance64x64 vpx_variance64x64_sse2 +unsigned int vpx_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance64x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance8x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); @@ -343,6 +362,10 @@ static void setup_rtcd_internal(void) (void)flags; + vpx_get16x16var = vpx_get16x16var_sse2; + if (flags & HAS_AVX2) vpx_get16x16var = vpx_get16x16var_avx2; + vpx_mse16x16 = vpx_mse16x16_sse2; + if (flags & HAS_AVX2) vpx_mse16x16 = vpx_mse16x16_avx2; vpx_sad16x16x3 = vpx_sad16x16x3_c; if (flags & HAS_SSE3) vpx_sad16x16x3 = vpx_sad16x16x3_sse3; if (flags & HAS_SSSE3) vpx_sad16x16x3 = vpx_sad16x16x3_ssse3; @@ -353,10 +376,34 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSSE3) vpx_sad16x8x3 = vpx_sad16x8x3_ssse3; vpx_sad16x8x8 = vpx_sad16x8x8_c; if (flags & HAS_SSE4_1) vpx_sad16x8x8 = vpx_sad16x8x8_sse4_1; + vpx_sad32x16 = vpx_sad32x16_sse2; + if (flags & HAS_AVX2) vpx_sad32x16 = vpx_sad32x16_avx2; + vpx_sad32x16_avg = vpx_sad32x16_avg_sse2; + if (flags & HAS_AVX2) vpx_sad32x16_avg = vpx_sad32x16_avg_avx2; + vpx_sad32x32 = vpx_sad32x32_sse2; + if (flags & HAS_AVX2) vpx_sad32x32 = vpx_sad32x32_avx2; + vpx_sad32x32_avg = vpx_sad32x32_avg_sse2; + if (flags & HAS_AVX2) vpx_sad32x32_avg = vpx_sad32x32_avg_avx2; + vpx_sad32x32x4d = vpx_sad32x32x4d_sse2; + if (flags & HAS_AVX2) vpx_sad32x32x4d = vpx_sad32x32x4d_avx2; + vpx_sad32x64 = vpx_sad32x64_sse2; + if (flags & HAS_AVX2) vpx_sad32x64 = vpx_sad32x64_avx2; + vpx_sad32x64_avg = vpx_sad32x64_avg_sse2; + if (flags & HAS_AVX2) vpx_sad32x64_avg = vpx_sad32x64_avg_avx2; vpx_sad4x4x3 = vpx_sad4x4x3_c; if (flags & HAS_SSE3) vpx_sad4x4x3 = vpx_sad4x4x3_sse3; vpx_sad4x4x8 = vpx_sad4x4x8_c; if (flags & HAS_SSE4_1) vpx_sad4x4x8 = vpx_sad4x4x8_sse4_1; + vpx_sad64x32 = vpx_sad64x32_sse2; + if (flags & HAS_AVX2) vpx_sad64x32 = vpx_sad64x32_avx2; + vpx_sad64x32_avg = vpx_sad64x32_avg_sse2; + if (flags & HAS_AVX2) vpx_sad64x32_avg = vpx_sad64x32_avg_avx2; + vpx_sad64x64 = vpx_sad64x64_sse2; + if (flags & HAS_AVX2) vpx_sad64x64 = vpx_sad64x64_avx2; + vpx_sad64x64_avg = vpx_sad64x64_avg_sse2; + if (flags & HAS_AVX2) vpx_sad64x64_avg = vpx_sad64x64_avg_avx2; + vpx_sad64x64x4d = vpx_sad64x64x4d_sse2; + if (flags & HAS_AVX2) vpx_sad64x64x4d = vpx_sad64x64x4d_avx2; vpx_sad8x16x3 = vpx_sad8x16x3_c; if (flags & HAS_SSE3) vpx_sad8x16x3 = vpx_sad8x16x3_sse3; vpx_sad8x16x8 = vpx_sad8x16x8_c; @@ -365,6 +412,16 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE3) vpx_sad8x8x3 = vpx_sad8x8x3_sse3; vpx_sad8x8x8 = vpx_sad8x8x8_c; if (flags & HAS_SSE4_1) vpx_sad8x8x8 = vpx_sad8x8x8_sse4_1; + vpx_variance16x16 = vpx_variance16x16_sse2; + if (flags & HAS_AVX2) vpx_variance16x16 = vpx_variance16x16_avx2; + vpx_variance32x16 = vpx_variance32x16_sse2; + if (flags & HAS_AVX2) vpx_variance32x16 = vpx_variance32x16_avx2; + vpx_variance32x32 = vpx_variance32x32_sse2; + if (flags & HAS_AVX2) vpx_variance32x32 = vpx_variance32x32_avx2; + vpx_variance64x32 = vpx_variance64x32_sse2; + if (flags & HAS_AVX2) vpx_variance64x32 = vpx_variance64x32_avx2; + vpx_variance64x64 = vpx_variance64x64_sse2; + if (flags & HAS_AVX2) vpx_variance64x64 = vpx_variance64x64_avx2; } #endif