# HG changeset patch # Parent 5a1a0398f8503451582602525c3e7b35def5d0b9 # User Timothy B. Terriberry Fix variance overflow Upstream Change-Id: I1bad27ea0720067def6d71a6da5f789508cec265 diff --git a/media/libvpx/vp8/encoder/arm/armv6/vp8_variance16x16_armv6.asm b/media/libvpx/vp8/encoder/arm/armv6/vp8_variance16x16_armv6.asm --- a/media/libvpx/vp8/encoder/arm/armv6/vp8_variance16x16_armv6.asm +++ b/media/libvpx/vp8/encoder/arm/armv6/vp8_variance16x16_armv6.asm @@ -139,16 +139,16 @@ loop subs r12, r12, #1 bne loop ; return stuff ldr r6, [sp, #40] ; get address of sse mul r0, r8, r8 ; sum * sum str r11, [r6] ; store sse - sub r0, r11, r0, asr #8 ; return (sse - ((sum * sum) >> 8)) + sub r0, r11, r0, lsr #8 ; return (sse - ((sum * sum) >> 8)) ldmfd sp!, {r4-r12, pc} ENDP END diff --git a/media/libvpx/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm b/media/libvpx/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm --- a/media/libvpx/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm +++ b/media/libvpx/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm @@ -164,17 +164,17 @@ loop subs r12, r12, #1 bne loop ; return stuff ldr r6, [sp, #40] ; get address of sse mul r0, r8, r8 ; sum * sum str r11, [r6] ; store sse - sub r0, r11, r0, asr #8 ; return (sse - ((sum * sum) >> 8)) + sub r0, r11, r0, lsr #8 ; return (sse - ((sum * sum) >> 8)) ldmfd sp!, {r4-r12, pc} ENDP c80808080 DCD 0x80808080 diff --git a/media/libvpx/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm b/media/libvpx/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm --- a/media/libvpx/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm +++ b/media/libvpx/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm @@ -205,17 +205,17 @@ loop smlad r11, r7, r7, r11 ; dual signed multiply, add and accumulate (2) bne loop ; return stuff ldr r6, [sp, #40] ; get address of sse mul r0, r8, r8 ; sum * sum str r11, [r6] ; store sse - sub r0, r11, r0, asr #8 ; return (sse - ((sum * sum) >> 8)) + sub r0, r11, r0, lsr #8 ; return (sse - ((sum * sum) >> 8)) ldmfd sp!, {r4-r12, pc} ENDP c80808080 DCD 0x80808080 diff --git a/media/libvpx/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm b/media/libvpx/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm --- a/media/libvpx/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm +++ b/media/libvpx/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm @@ -166,17 +166,17 @@ loop subs r12, r12, #1 bne loop ; return stuff ldr r6, [sp, #40] ; get address of sse mul r0, r8, r8 ; sum * sum str r11, [r6] ; store sse - sub r0, r11, r0, asr #8 ; return (sse - ((sum * sum) >> 8)) + sub r0, r11, r0, lsr #8 ; return (sse - ((sum * sum) >> 8)) ldmfd sp!, {r4-r12, pc} ENDP c80808080 DCD 0x80808080 diff --git a/media/libvpx/vp8/encoder/arm/neon/variance_neon.asm b/media/libvpx/vp8/encoder/arm/neon/variance_neon.asm --- a/media/libvpx/vp8/encoder/arm/neon/variance_neon.asm +++ b/media/libvpx/vp8/encoder/arm/neon/variance_neon.asm @@ -72,24 +72,24 @@ variance16x16_neon_loop vpaddl.u32 q1, q10 vadd.s64 d0, d0, d1 vadd.u64 d1, d2, d3 ;vmov.32 r0, d0[0] ;this instruction costs a lot ;vmov.32 r1, d1[0] ;mul r0, r0, r0 ;str r1, [r12] - ;sub r0, r1, r0, asr #8 + ;sub r0, r1, r0, lsr #8 - ;sum is in [-255x256, 255x256]. sumxsum is 32-bit. Shift to right should - ;have sign-bit exension, which is vshr.s. Have to use s32 to make it right. + ; while sum is signed, sum * sum is always positive and must be treated as + ; unsigned to avoid propagating the sign bit. vmull.s32 q5, d0, d0 vst1.32 {d1[0]}, [r12] ;store sse - vshr.s32 d10, d10, #8 - vsub.s32 d0, d1, d10 + vshr.u32 d10, d10, #8 + vsub.u32 d0, d1, d10 vmov.32 r0, d0[0] ;return bx lr ENDP ;================================ ;unsigned int vp8_variance16x8_c( @@ -140,18 +140,18 @@ variance16x8_neon_loop ldr r12, [sp] ;load *sse from stack vpaddl.u32 q1, q10 vadd.s64 d0, d0, d1 vadd.u64 d1, d2, d3 vmull.s32 q5, d0, d0 vst1.32 {d1[0]}, [r12] ;store sse - vshr.s32 d10, d10, #7 - vsub.s32 d0, d1, d10 + vshr.u32 d10, d10, #7 + vsub.u32 d0, d1, d10 vmov.32 r0, d0[0] ;return bx lr ENDP ;================================= ;unsigned int vp8_variance8x16_c( @@ -195,18 +195,18 @@ variance8x16_neon_loop ldr r12, [sp] ;load *sse from stack vpaddl.u32 q1, q10 vadd.s64 d0, d0, d1 vadd.u64 d1, d2, d3 vmull.s32 q5, d0, d0 vst1.32 {d1[0]}, [r12] ;store sse - vshr.s32 d10, d10, #7 - vsub.s32 d0, d1, d10 + vshr.u32 d10, d10, #7 + vsub.u32 d0, d1, d10 vmov.32 r0, d0[0] ;return bx lr ENDP ;================================== ; r0 unsigned char *src_ptr @@ -260,17 +260,17 @@ variance8x8_neon_loop ldr r12, [sp] ;load *sse from stack vpaddl.u32 q1, q10 vadd.s64 d0, d0, d1 vadd.u64 d1, d2, d3 vmull.s32 q5, d0, d0 vst1.32 {d1[0]}, [r12] ;store sse - vshr.s32 d10, d10, #6 - vsub.s32 d0, d1, d10 + vshr.u32 d10, d10, #6 + vsub.u32 d0, d1, d10 vmov.32 r0, d0[0] ;return bx lr ENDP END diff --git a/media/libvpx/vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm b/media/libvpx/vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm --- a/media/libvpx/vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm +++ b/media/libvpx/vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm @@ -400,18 +400,18 @@ sub_pixel_variance16x16_neon_loop vpaddl.s32 q0, q8 ;accumulate sum vpaddl.u32 q1, q10 vadd.s64 d0, d0, d1 vadd.u64 d1, d2, d3 vmull.s32 q5, d0, d0 vst1.32 {d1[0]}, [r6] ;store sse - vshr.s32 d10, d10, #8 - vsub.s32 d0, d1, d10 + vshr.u32 d10, d10, #8 + vsub.u32 d0, d1, d10 add sp, sp, #528 vmov.32 r0, d0[0] ;return pop {r4-r6,pc} ENDP diff --git a/media/libvpx/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm b/media/libvpx/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm --- a/media/libvpx/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm +++ b/media/libvpx/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm @@ -107,18 +107,18 @@ vp8_filt_fpo16x16s_4_0_loop_neon vpaddl.s32 q0, q8 ;accumulate sum vpaddl.u32 q1, q10 vadd.s64 d0, d0, d1 vadd.u64 d1, d2, d3 vmull.s32 q5, d0, d0 vst1.32 {d1[0]}, [lr] ;store sse - vshr.s32 d10, d10, #8 - vsub.s32 d0, d1, d10 + vshr.u32 d10, d10, #8 + vsub.u32 d0, d1, d10 vmov.32 r0, d0[0] ;return pop {pc} ENDP ;================================================ ;unsigned int vp8_variance_halfpixvar16x16_v_neon ;( @@ -203,18 +203,18 @@ vp8_filt_spo16x16s_0_4_loop_neon vpaddl.s32 q0, q8 ;accumulate sum vpaddl.u32 q1, q10 vadd.s64 d0, d0, d1 vadd.u64 d1, d2, d3 vmull.s32 q5, d0, d0 vst1.32 {d1[0]}, [lr] ;store sse - vshr.s32 d10, d10, #8 - vsub.s32 d0, d1, d10 + vshr.u32 d10, d10, #8 + vsub.u32 d0, d1, d10 vmov.32 r0, d0[0] ;return pop {pc} ENDP ;================================================ ;unsigned int vp8_variance_halfpixvar16x16_hv_neon ;( @@ -322,18 +322,18 @@ vp8_filt16x16s_4_4_loop_neon vpaddl.s32 q0, q13 ;accumulate sum vpaddl.u32 q1, q15 vadd.s64 d0, d0, d1 vadd.u64 d1, d2, d3 vmull.s32 q5, d0, d0 vst1.32 {d1[0]}, [lr] ;store sse - vshr.s32 d10, d10, #8 - vsub.s32 d0, d1, d10 + vshr.u32 d10, d10, #8 + vsub.u32 d0, d1, d10 vmov.32 r0, d0[0] ;return pop {pc} ENDP ;============================== ; r0 unsigned char *src_ptr, ; r1 int src_pixels_per_line, @@ -555,18 +555,18 @@ sub_pixel_variance16x16s_neon_loop vpaddl.s32 q0, q8 ;accumulate sum vpaddl.u32 q1, q10 vadd.s64 d0, d0, d1 vadd.u64 d1, d2, d3 vmull.s32 q5, d0, d0 vst1.32 {d1[0]}, [lr] ;store sse - vshr.s32 d10, d10, #8 - vsub.s32 d0, d1, d10 + vshr.u32 d10, d10, #8 + vsub.u32 d0, d1, d10 add sp, sp, #256 vmov.32 r0, d0[0] ;return pop {r4, pc} ENDP END diff --git a/media/libvpx/vp8/encoder/arm/neon/vp8_subpixelvariance8x8_neon.asm b/media/libvpx/vp8/encoder/arm/neon/vp8_subpixelvariance8x8_neon.asm --- a/media/libvpx/vp8/encoder/arm/neon/vp8_subpixelvariance8x8_neon.asm +++ b/media/libvpx/vp8/encoder/arm/neon/vp8_subpixelvariance8x8_neon.asm @@ -201,18 +201,18 @@ sub_pixel_variance8x8_neon_loop vpaddl.s32 q0, q8 ;accumulate sum vpaddl.u32 q1, q10 vadd.s64 d0, d0, d1 vadd.u64 d1, d2, d3 vmull.s32 q5, d0, d0 vst1.32 {d1[0]}, [lr] ;store sse - vshr.s32 d10, d10, #6 - vsub.s32 d0, d1, d10 + vshr.u32 d10, d10, #6 + vsub.u32 d0, d1, d10 vmov.32 r0, d0[0] ;return pop {r4-r5, pc} ENDP ;----------------- diff --git a/media/libvpx/vp8/encoder/variance_c.c b/media/libvpx/vp8/encoder/variance_c.c --- a/media/libvpx/vp8/encoder/variance_c.c +++ b/media/libvpx/vp8/encoder/variance_c.c @@ -70,82 +70,82 @@ unsigned int vp8_variance16x16_c( unsigned int *sse) { unsigned int var; int avg; variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg); *sse = var; - return (var - ((avg * avg) >> 8)); + return (var - ((unsigned int)(avg * avg) >> 8)); } unsigned int vp8_variance8x16_c( const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int recon_stride, unsigned int *sse) { unsigned int var; int avg; variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg); *sse = var; - return (var - ((avg * avg) >> 7)); + return (var - ((unsigned int)(avg * avg) >> 7)); } unsigned int vp8_variance16x8_c( const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int recon_stride, unsigned int *sse) { unsigned int var; int avg; variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg); *sse = var; - return (var - ((avg * avg) >> 7)); + return (var - ((unsigned int)(avg * avg) >> 7)); } unsigned int vp8_variance8x8_c( const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int recon_stride, unsigned int *sse) { unsigned int var; int avg; variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg); *sse = var; - return (var - ((avg * avg) >> 6)); + return (var - ((unsigned int)(avg * avg) >> 6)); } unsigned int vp8_variance4x4_c( const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int recon_stride, unsigned int *sse) { unsigned int var; int avg; variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 4, &var, &avg); *sse = var; - return (var - ((avg * avg) >> 4)); + return (var - ((unsigned int)(avg * avg) >> 4)); } unsigned int vp8_mse16x16_c( const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int recon_stride, diff --git a/media/libvpx/vp8/encoder/x86/variance_mmx.c b/media/libvpx/vp8/encoder/x86/variance_mmx.c --- a/media/libvpx/vp8/encoder/x86/variance_mmx.c +++ b/media/libvpx/vp8/encoder/x86/variance_mmx.c @@ -86,34 +86,34 @@ unsigned int vp8_variance4x4_mmx( int recon_stride, unsigned int *sse) { unsigned int var; int avg; vp8_get4x4var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ; *sse = var; - return (var - ((avg * avg) >> 4)); + return (var - ((unsigned int)(avg * avg) >> 4)); } unsigned int vp8_variance8x8_mmx( const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int recon_stride, unsigned int *sse) { unsigned int var; int avg; vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ; *sse = var; - return (var - ((avg * avg) >> 6)); + return (var - ((unsigned int)(avg * avg) >> 6)); } unsigned int vp8_mse16x16_mmx( const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int recon_stride, @@ -148,17 +148,17 @@ unsigned int vp8_variance16x16_mmx( vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ; vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1); vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2) ; vp8_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3); var = sse0 + sse1 + sse2 + sse3; avg = sum0 + sum1 + sum2 + sum3; *sse = var; - return (var - ((avg * avg) >> 8)); + return (var - ((unsigned int)(avg * avg) >> 8)); } unsigned int vp8_variance16x8_mmx( const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int recon_stride, unsigned int *sse) @@ -167,17 +167,17 @@ unsigned int vp8_variance16x8_mmx( int sum0, sum1, avg; vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ; vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1); var = sse0 + sse1; avg = sum0 + sum1; *sse = var; - return (var - ((avg * avg) >> 7)); + return (var - ((unsigned int)(avg * avg) >> 7)); } unsigned int vp8_variance8x16_mmx( const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, @@ -189,17 +189,17 @@ unsigned int vp8_variance8x16_mmx( vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ; vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse1, &sum1) ; var = sse0 + sse1; avg = sum0 + sum1; *sse = var; - return (var - ((avg * avg) >> 7)); + return (var - ((unsigned int)(avg * avg) >> 7)); } unsigned int vp8_sub_pixel_variance4x4_mmx ( const unsigned char *src_ptr, int src_pixels_per_line, diff --git a/media/libvpx/vp8/encoder/x86/variance_sse2.c b/media/libvpx/vp8/encoder/x86/variance_sse2.c --- a/media/libvpx/vp8/encoder/x86/variance_sse2.c +++ b/media/libvpx/vp8/encoder/x86/variance_sse2.c @@ -143,34 +143,34 @@ unsigned int vp8_variance4x4_wmt( int recon_stride, unsigned int *sse) { unsigned int var; int avg; vp8_get4x4var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ; *sse = var; - return (var - ((avg * avg) >> 4)); + return (var - ((unsigned int)(avg * avg) >> 4)); } unsigned int vp8_variance8x8_wmt ( const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int recon_stride, unsigned int *sse) { unsigned int var; int avg; vp8_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ; *sse = var; - return (var - ((avg * avg) >> 6)); + return (var - ((unsigned int)(avg * avg) >> 6)); } unsigned int vp8_variance16x16_wmt ( const unsigned char *src_ptr, int source_stride, @@ -215,17 +215,17 @@ unsigned int vp8_variance16x8_wmt int sum0, sum1, avg; vp8_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ; vp8_get8x8var_sse2(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1); var = sse0 + sse1; avg = sum0 + sum1; *sse = var; - return (var - ((avg * avg) >> 7)); + return (var - ((unsigned int)(avg * avg) >> 7)); } unsigned int vp8_variance8x16_wmt ( const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, @@ -236,17 +236,17 @@ unsigned int vp8_variance8x16_wmt int sum0, sum1, avg; vp8_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ; vp8_get8x8var_sse2(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse1, &sum1) ; var = sse0 + sse1; avg = sum0 + sum1; *sse = var; - return (var - ((avg * avg) >> 7)); + return (var - ((unsigned int)(avg * avg) >> 7)); } unsigned int vp8_sub_pixel_variance4x4_wmt ( const unsigned char *src_ptr, int src_pixels_per_line, int xoffset, diff --git a/media/libvpx/vp8/encoder/x86/variance_ssse3.c b/media/libvpx/vp8/encoder/x86/variance_ssse3.c --- a/media/libvpx/vp8/encoder/x86/variance_ssse3.c +++ b/media/libvpx/vp8/encoder/x86/variance_ssse3.c @@ -107,17 +107,17 @@ unsigned int vp8_sub_pixel_variance16x16 vp8_filter_block2d_bil_var_ssse3( src_ptr, src_pixels_per_line, dst_ptr, dst_pixels_per_line, 16, xoffset, yoffset, &xsum0, &xxsum0); } *sse = xxsum0; - return (xxsum0 - ((xsum0 * xsum0) >> 8)); + return (xxsum0 - ((unsigned int)(xsum0 * xsum0) >> 8)); } unsigned int vp8_sub_pixel_variance16x8_ssse3 ( const unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, @@ -156,10 +156,10 @@ unsigned int vp8_sub_pixel_variance16x8_ vp8_filter_block2d_bil_var_ssse3( src_ptr, src_pixels_per_line, dst_ptr, dst_pixels_per_line, 8, xoffset, yoffset, &xsum0, &xxsum0); } *sse = xxsum0; - return (xxsum0 - ((xsum0 * xsum0) >> 7)); + return (xxsum0 - ((unsigned int)(xsum0 * xsum0) >> 7)); }