Index: lib/dec/x86_vc/mmxfrag.c =================================================================== --- lib/dec/x86_vc/mmxfrag.c (revision 16142) +++ lib/dec/x86_vc/mmxfrag.c (working copy) @@ -27,12 +27,14 @@ void oc_frag_recon_intra_mmx(unsigned char *_dst,int _dst_ystride, const ogg_int16_t *_residue){ + int _save_ebx; /* --------------------------------------------------------------------- This function does the inter reconstruction step with 8 iterations unrolled. The iteration for each instruction is noted by the #id in the comments (in case you want to reconstruct it) --------------------------------------------------------------------- */ _asm{ + mov [_save_ebx], ebx mov edi, [_residue] /* load residue ptr */ mov eax, 0x00800080 /* generate constant */ mov ebx, [_dst_ystride] /* load dst-stride */ @@ -93,6 +95,7 @@ packuswb mm3, mm4 /* #8 pack to byte */ movq [edx + ecx*2], mm1 /* #7 write row */ movq [edx + eax], mm3 /* #8 write row */ + mov ebx, [_save_ebx] } } @@ -100,6 +103,7 @@ void oc_frag_recon_inter_mmx (unsigned char *_dst, int _dst_ystride, const unsigned char *_src, int _src_ystride, const ogg_int16_t *_residue){ + int _save_ebx; /* --------------------------------------------------------------------- This function does the inter reconstruction step with two iterations running in parallel to hide some load-latencies and break the dependency @@ -107,6 +111,7 @@ comments (in case you want to reconstruct it) --------------------------------------------------------------------- */ _asm{ + mov [_save_ebx], ebx pxor mm0, mm0 /* generate constant 0 */ mov esi, [_src] mov edi, [_residue] @@ -143,6 +148,7 @@ movq [edx + ebx], mm7 /* #2 write row */ lea edx, [edx+ebx*2] /* dst += stride * 2 */ jne nextchunk + mov ebx, [_save_ebx] } } @@ -150,6 +156,7 @@ void oc_frag_recon_inter2_mmx(unsigned char *_dst, int _dst_ystride, const unsigned char *_src1, int _src1_ystride, const unsigned char *_src2, int _src2_ystride,const ogg_int16_t *_residue){ + int _save_ebx; /* --------------------------------------------------------------------- This function does the inter2 reconstruction step.The building of the average is done with a bit-twiddeling trick to avoid excessive register @@ -166,6 +173,7 @@ using the pavgb instruction let me know and I'll do the 3dnow codepath. --------------------------------------------------------------------- */ _asm{ + mov [_save_ebx], ebx mov eax, 0xfefefefe mov esi, [_src1] mov edi, [_src2] @@ -204,6 +212,7 @@ packuswb mm2, mm3 /* pack and saturate */ movq [edx], mm2 /* write row */ jne nextrow + mov ebx, [_save_ebx] } } Index: lib/dec/x86_vc/mmxloopfilter.c =================================================================== --- lib/dec/x86_vc/mmxloopfilter.c (revision 16142) +++ lib/dec/x86_vc/mmxloopfilter.c (working copy) @@ -38,7 +38,7 @@ _asm { mov eax, [_pix] mov edx, [_ystride] - mov ebx, [_ll] + mov ecx, [_ll] /* _pix -= ystride */ sub eax, edx @@ -104,7 +104,7 @@ /*Free up mm5.*/ packuswb mm4, mm5 /*mm0=L L L L*/ - movq mm0, [ebx] + movq mm0, [ecx] /*if(R_i<-2L||R_i>2L)R_i=0:*/ movq mm5, mm2 pxor mm6, mm6