gecko/media/libtheora/lib/dec/x86_vc/mmxidct.c

1007 lines
24 KiB
C
Raw Normal View History

/********************************************************************
* *
* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id:
********************************************************************/
/* -------------------------------------------------------------------
MMX based IDCT for the theora codec.
Originally written by Rudolf Marek, based on code from On2's VP3.
Converted to Visual Studio inline assembly by Nils Pipenbrinck.
---------------------------------------------------------------------*/
#if defined(USE_ASM)
#include <ogg/ogg.h>
#include "../dct.h"
#include "../idct.h"
#include "x86int.h"
/*A table of constants used by the MMX routines.*/
static const __declspec(align(16)) ogg_uint16_t
OC_IDCT_CONSTS[(7+1)*4]={
(ogg_uint16_t)OC_C1S7,(ogg_uint16_t)OC_C1S7,
(ogg_uint16_t)OC_C1S7,(ogg_uint16_t)OC_C1S7,
(ogg_uint16_t)OC_C2S6,(ogg_uint16_t)OC_C2S6,
(ogg_uint16_t)OC_C2S6,(ogg_uint16_t)OC_C2S6,
(ogg_uint16_t)OC_C3S5,(ogg_uint16_t)OC_C3S5,
(ogg_uint16_t)OC_C3S5,(ogg_uint16_t)OC_C3S5,
(ogg_uint16_t)OC_C4S4,(ogg_uint16_t)OC_C4S4,
(ogg_uint16_t)OC_C4S4,(ogg_uint16_t)OC_C4S4,
(ogg_uint16_t)OC_C5S3,(ogg_uint16_t)OC_C5S3,
(ogg_uint16_t)OC_C5S3,(ogg_uint16_t)OC_C5S3,
(ogg_uint16_t)OC_C6S2,(ogg_uint16_t)OC_C6S2,
(ogg_uint16_t)OC_C6S2,(ogg_uint16_t)OC_C6S2,
(ogg_uint16_t)OC_C7S1,(ogg_uint16_t)OC_C7S1,
(ogg_uint16_t)OC_C7S1,(ogg_uint16_t)OC_C7S1,
8, 8, 8, 8
};
void oc_idct8x8_10_mmx(ogg_int16_t _y[64]){
_asm {
mov edx, [_y]
mov eax, offset OC_IDCT_CONSTS
movq mm2, [edx + 30H]
movq mm6, [eax + 10H]
movq mm4, mm2
movq mm7, [edx + 18H]
pmulhw mm4, mm6
movq mm1, [eax + 20H]
pmulhw mm6, mm7
movq mm5, mm1
pmulhw mm1, mm2
movq mm3, [edx + 10H]
pmulhw mm5, mm7
movq mm0, [eax]
paddw mm4, mm2
paddw mm6, mm7
paddw mm2, mm1
movq mm1, [edx + 38H]
paddw mm7, mm5
movq mm5, mm0
pmulhw mm0, mm3
paddw mm4, mm7
pmulhw mm5, mm1
movq mm7, [eax + 30H]
psubw mm6, mm2
paddw mm0, mm3
pmulhw mm3, mm7
movq mm2, [edx + 20H]
pmulhw mm7, mm1
paddw mm5, mm1
movq mm1, mm2
pmulhw mm2, [eax + 08H]
psubw mm3, mm5
movq mm5, [edx + 28H]
paddw mm0, mm7
movq mm7, mm5
psubw mm0, mm4
pmulhw mm5, [eax + 08H]
paddw mm2, mm1
pmulhw mm1, [eax + 28H]
paddw mm4, mm4
paddw mm4, mm0
psubw mm3, mm6
paddw mm5, mm7
paddw mm6, mm6
pmulhw mm7, [eax + 28H]
paddw mm6, mm3
movq [edx + 10H], mm4
psubw mm1, mm5
movq mm4, [eax + 18H]
movq mm5, mm3
pmulhw mm3, mm4
paddw mm7, mm2
movq [edx + 20H], mm6
movq mm2, mm0
movq mm6, [edx]
pmulhw mm0, mm4
paddw mm5, mm3
movq mm3, [edx + 08H]
psubw mm5, mm1
paddw mm2, mm0
psubw mm6, mm3
movq mm0, mm6
pmulhw mm6, mm4
paddw mm3, mm3
paddw mm1, mm1
paddw mm3, mm0
paddw mm1, mm5
pmulhw mm4, mm3
paddw mm6, mm0
psubw mm6, mm2
paddw mm2, mm2
movq mm0, [edx + 10H]
paddw mm2, mm6
paddw mm4, mm3
psubw mm2, mm1
movq mm3, [edx + 20H]
psubw mm4, mm7
paddw mm1, mm1
paddw mm7, mm7
paddw mm1, mm2
paddw mm7, mm4
psubw mm4, mm3
paddw mm3, mm3
psubw mm6, mm5
paddw mm5, mm5
paddw mm3, mm4
paddw mm5, mm6
psubw mm7, mm0
paddw mm0, mm0
movq [edx + 10H], mm1
paddw mm0, mm7
movq mm1, mm4
punpcklwd mm4, mm5
movq [edx], mm0
punpckhwd mm1, mm5
movq mm0, mm6
punpcklwd mm6, mm7
movq mm5, mm4
punpckldq mm4, mm6
punpckhdq mm5, mm6
movq mm6, mm1
movq [edx + 08H], mm4
punpckhwd mm0, mm7
movq [edx + 18H], mm5
punpckhdq mm6, mm0
movq mm4, [edx]
punpckldq mm1, mm0
movq mm5, [edx + 10H]
movq mm0, mm4
movq [edx + 38H], mm6
punpcklwd mm0, mm5
movq [edx + 28H], mm1
punpckhwd mm4, mm5
movq mm5, mm2
punpcklwd mm2, mm3
movq mm1, mm0
punpckldq mm0, mm2
punpckhdq mm1, mm2
movq mm2, mm4
movq [edx], mm0
punpckhwd mm5, mm3
movq [edx + 10H], mm1
punpckhdq mm4, mm5
punpckldq mm2, mm5
movq [edx + 30H], mm4
movq [edx + 20H], mm2
movq mm2, [edx + 70H]
movq mm6, [eax + 10H]
movq mm4, mm2
movq mm7, [edx + 58H]
pmulhw mm4, mm6
movq mm1, [eax + 20H]
pmulhw mm6, mm7
movq mm5, mm1
pmulhw mm1, mm2
movq mm3, [edx + 50H]
pmulhw mm5, mm7
movq mm0, [eax]
paddw mm4, mm2
paddw mm6, mm7
paddw mm2, mm1
movq mm1, [edx + 78H]
paddw mm7, mm5
movq mm5, mm0
pmulhw mm0, mm3
paddw mm4, mm7
pmulhw mm5, mm1
movq mm7, [eax + 30H]
psubw mm6, mm2
paddw mm0, mm3
pmulhw mm3, mm7
movq mm2, [edx + 60H]
pmulhw mm7, mm1
paddw mm5, mm1
movq mm1, mm2
pmulhw mm2, [eax + 08H]
psubw mm3, mm5
movq mm5, [edx + 68H]
paddw mm0, mm7
movq mm7, mm5
psubw mm0, mm4
pmulhw mm5, [eax + 08H]
paddw mm2, mm1
pmulhw mm1, [eax + 28H]
paddw mm4, mm4
paddw mm4, mm0
psubw mm3, mm6
paddw mm5, mm7
paddw mm6, mm6
pmulhw mm7, [eax + 28H]
paddw mm6, mm3
movq [edx + 50H], mm4
psubw mm1, mm5
movq mm4, [eax + 18H]
movq mm5, mm3
pmulhw mm3, mm4
paddw mm7, mm2
movq [edx + 60H], mm6
movq mm2, mm0
movq mm6, [edx + 40H]
pmulhw mm0, mm4
paddw mm5, mm3
movq mm3, [edx + 48H]
psubw mm5, mm1
paddw mm2, mm0
psubw mm6, mm3
movq mm0, mm6
pmulhw mm6, mm4
paddw mm3, mm3
paddw mm1, mm1
paddw mm3, mm0
paddw mm1, mm5
pmulhw mm4, mm3
paddw mm6, mm0
psubw mm6, mm2
paddw mm2, mm2
movq mm0, [edx + 50H]
paddw mm2, mm6
paddw mm4, mm3
psubw mm2, mm1
movq mm3, [edx + 60H]
psubw mm4, mm7
paddw mm1, mm1
paddw mm7, mm7
paddw mm1, mm2
paddw mm7, mm4
psubw mm4, mm3
paddw mm3, mm3
psubw mm6, mm5
paddw mm5, mm5
paddw mm3, mm4
paddw mm5, mm6
psubw mm7, mm0
paddw mm0, mm0
movq [edx + 50H], mm1
paddw mm0, mm7
movq mm1, mm4
punpcklwd mm4, mm5
movq [edx + 40H], mm0
punpckhwd mm1, mm5
movq mm0, mm6
punpcklwd mm6, mm7
movq mm5, mm4
punpckldq mm4, mm6
punpckhdq mm5, mm6
movq mm6, mm1
movq [edx + 48H], mm4
punpckhwd mm0, mm7
movq [edx + 58H], mm5
punpckhdq mm6, mm0
movq mm4, [edx + 40H]
punpckldq mm1, mm0
movq mm5, [edx + 50H]
movq mm0, mm4
movq [edx + 78H], mm6
punpcklwd mm0, mm5
movq [edx + 68H], mm1
punpckhwd mm4, mm5
movq mm5, mm2
punpcklwd mm2, mm3
movq mm1, mm0
punpckldq mm0, mm2
punpckhdq mm1, mm2
movq mm2, mm4
movq [edx + 40H], mm0
punpckhwd mm5, mm3
movq [edx + 50H], mm1
punpckhdq mm4, mm5
punpckldq mm2, mm5
movq [edx + 70H], mm4
movq [edx + 60H], mm2
movq mm2, [edx + 30H]
movq mm6, [eax + 10H]
movq mm4, mm2
movq mm7, [edx + 50H]
pmulhw mm4, mm6
movq mm1, [eax + 20H]
pmulhw mm6, mm7
movq mm5, mm1
pmulhw mm1, mm2
movq mm3, [edx + 10H]
pmulhw mm5, mm7
movq mm0, [eax]
paddw mm4, mm2
paddw mm6, mm7
paddw mm2, mm1
movq mm1, [edx + 70H]
paddw mm7, mm5
movq mm5, mm0
pmulhw mm0, mm3
paddw mm4, mm7
pmulhw mm5, mm1
movq mm7, [eax + 30H]
psubw mm6, mm2
paddw mm0, mm3
pmulhw mm3, mm7
movq mm2, [edx + 20H]
pmulhw mm7, mm1
paddw mm5, mm1
movq mm1, mm2
pmulhw mm2, [eax + 08H]
psubw mm3, mm5
movq mm5, [edx + 60H]
paddw mm0, mm7
movq mm7, mm5
psubw mm0, mm4
pmulhw mm5, [eax + 08H]
paddw mm2, mm1
pmulhw mm1, [eax + 28H]
paddw mm4, mm4
paddw mm4, mm0
psubw mm3, mm6
paddw mm5, mm7
paddw mm6, mm6
pmulhw mm7, [eax + 28H]
paddw mm6, mm3
movq [edx + 10H], mm4
psubw mm1, mm5
movq mm4, [eax + 18H]
movq mm5, mm3
pmulhw mm3, mm4
paddw mm7, mm2
movq [edx + 20H], mm6
movq mm2, mm0
movq mm6, [edx]
pmulhw mm0, mm4
paddw mm5, mm3
movq mm3, [edx + 40H]
psubw mm5, mm1
paddw mm2, mm0
psubw mm6, mm3
movq mm0, mm6
pmulhw mm6, mm4
paddw mm3, mm3
paddw mm1, mm1
paddw mm3, mm0
paddw mm1, mm5
pmulhw mm4, mm3
paddw mm6, mm0
psubw mm6, mm2
paddw mm2, mm2
movq mm0, [edx + 10H]
paddw mm2, mm6
paddw mm4, mm3
psubw mm2, mm1
paddw mm2, [eax + 38H]
paddw mm1, mm1
paddw mm1, mm2
psraw mm2, 4
psubw mm4, mm7
psraw mm1, 4
movq mm3, [edx + 20H]
paddw mm7, mm7
movq [edx + 20H], mm2
paddw mm7, mm4
movq [edx + 10H], mm1
psubw mm4, mm3
paddw mm4, [eax + 38H]
paddw mm3, mm3
paddw mm3, mm4
psraw mm4, 4
psubw mm6, mm5
psraw mm3, 4
paddw mm6, [eax + 38H]
paddw mm5, mm5
paddw mm5, mm6
psraw mm6, 4
movq [edx + 40H], mm4
psraw mm5, 4
movq [edx + 30H], mm3
psubw mm7, mm0
paddw mm7, [eax + 38H]
paddw mm0, mm0
paddw mm0, mm7
psraw mm7, 4
movq [edx + 60H], mm6
psraw mm0, 4
movq [edx + 50H], mm5
movq [edx + 70H], mm7
movq [edx], mm0
movq mm2, [edx + 38H]
movq mm6, [eax + 10H]
movq mm4, mm2
movq mm7, [edx + 58H]
pmulhw mm4, mm6
movq mm1, [eax + 20H]
pmulhw mm6, mm7
movq mm5, mm1
pmulhw mm1, mm2
movq mm3, [edx + 18H]
pmulhw mm5, mm7
movq mm0, [eax]
paddw mm4, mm2
paddw mm6, mm7
paddw mm2, mm1
movq mm1, [edx + 78H]
paddw mm7, mm5
movq mm5, mm0
pmulhw mm0, mm3
paddw mm4, mm7
pmulhw mm5, mm1
movq mm7, [eax + 30H]
psubw mm6, mm2
paddw mm0, mm3
pmulhw mm3, mm7
movq mm2, [edx + 28H]
pmulhw mm7, mm1
paddw mm5, mm1
movq mm1, mm2
pmulhw mm2, [eax + 08H]
psubw mm3, mm5
movq mm5, [edx + 68H]
paddw mm0, mm7
movq mm7, mm5
psubw mm0, mm4
pmulhw mm5, [eax + 08H]
paddw mm2, mm1
pmulhw mm1, [eax + 28H]
paddw mm4, mm4
paddw mm4, mm0
psubw mm3, mm6
paddw mm5, mm7
paddw mm6, mm6
pmulhw mm7, [eax + 28H]
paddw mm6, mm3
movq [edx + 18H], mm4
psubw mm1, mm5
movq mm4, [eax + 18H]
movq mm5, mm3
pmulhw mm3, mm4
paddw mm7, mm2
movq [edx + 28H], mm6
movq mm2, mm0
movq mm6, [edx + 08H]
pmulhw mm0, mm4
paddw mm5, mm3
movq mm3, [edx + 48H]
psubw mm5, mm1
paddw mm2, mm0
psubw mm6, mm3
movq mm0, mm6
pmulhw mm6, mm4
paddw mm3, mm3
paddw mm1, mm1
paddw mm3, mm0
paddw mm1, mm5
pmulhw mm4, mm3
paddw mm6, mm0
psubw mm6, mm2
paddw mm2, mm2
movq mm0, [edx + 18H]
paddw mm2, mm6
paddw mm4, mm3
psubw mm2, mm1
paddw mm2, [eax + 38H]
paddw mm1, mm1
paddw mm1, mm2
psraw mm2, 4
psubw mm4, mm7
psraw mm1, 4
movq mm3, [edx + 28H]
paddw mm7, mm7
movq [edx + 28H], mm2
paddw mm7, mm4
movq [edx + 18H], mm1
psubw mm4, mm3
paddw mm4, [eax + 38H]
paddw mm3, mm3
paddw mm3, mm4
psraw mm4, 4
psubw mm6, mm5
psraw mm3, 4
paddw mm6, [eax + 38H]
paddw mm5, mm5
paddw mm5, mm6
psraw mm6, 4
movq [edx + 48H], mm4
psraw mm5, 4
movq [edx + 38H], mm3
psubw mm7, mm0
paddw mm7, [eax + 38H]
paddw mm0, mm0
paddw mm0, mm7
psraw mm7, 4
movq [edx + 68H], mm6
psraw mm0, 4
movq [edx + 58H], mm5
movq [edx + 78H], mm7
movq [edx + 08H], mm0
/* emms */
}
}
void oc_idct8x8_mmx(ogg_int16_t _y[64]){
_asm {
mov edx, [_y]
mov eax, offset OC_IDCT_CONSTS
movq mm2, [edx + 30H]
movq mm6, [eax + 10H]
movq mm4, mm2
movq mm7, [edx + 18H]
pmulhw mm4, mm6
movq mm1, [eax + 20H]
pmulhw mm6, mm7
movq mm5, mm1
pmulhw mm1, mm2
movq mm3, [edx + 10H]
pmulhw mm5, mm7
movq mm0, [eax]
paddw mm4, mm2
paddw mm6, mm7
paddw mm2, mm1
movq mm1, [edx + 38H]
paddw mm7, mm5
movq mm5, mm0
pmulhw mm0, mm3
paddw mm4, mm7
pmulhw mm5, mm1
movq mm7, [eax + 30H]
psubw mm6, mm2
paddw mm0, mm3
pmulhw mm3, mm7
movq mm2, [edx + 20H]
pmulhw mm7, mm1
paddw mm5, mm1
movq mm1, mm2
pmulhw mm2, [eax + 08H]
psubw mm3, mm5
movq mm5, [edx + 28H]
paddw mm0, mm7
movq mm7, mm5
psubw mm0, mm4
pmulhw mm5, [eax + 08H]
paddw mm2, mm1
pmulhw mm1, [eax + 28H]
paddw mm4, mm4
paddw mm4, mm0
psubw mm3, mm6
paddw mm5, mm7
paddw mm6, mm6
pmulhw mm7, [eax + 28H]
paddw mm6, mm3
movq [edx + 10H], mm4
psubw mm1, mm5
movq mm4, [eax + 18H]
movq mm5, mm3
pmulhw mm3, mm4
paddw mm7, mm2
movq [edx + 20H], mm6
movq mm2, mm0
movq mm6, [edx]
pmulhw mm0, mm4
paddw mm5, mm3
movq mm3, [edx + 08H]
psubw mm5, mm1
paddw mm2, mm0
psubw mm6, mm3
movq mm0, mm6
pmulhw mm6, mm4
paddw mm3, mm3
paddw mm1, mm1
paddw mm3, mm0
paddw mm1, mm5
pmulhw mm4, mm3
paddw mm6, mm0
psubw mm6, mm2
paddw mm2, mm2
movq mm0, [edx + 10H]
paddw mm2, mm6
paddw mm4, mm3
psubw mm2, mm1
movq mm3, [edx + 20H]
psubw mm4, mm7
paddw mm1, mm1
paddw mm7, mm7
paddw mm1, mm2
paddw mm7, mm4
psubw mm4, mm3
paddw mm3, mm3
psubw mm6, mm5
paddw mm5, mm5
paddw mm3, mm4
paddw mm5, mm6
psubw mm7, mm0
paddw mm0, mm0
movq [edx + 10H], mm1
paddw mm0, mm7
movq mm1, mm4
punpcklwd mm4, mm5
movq [edx], mm0
punpckhwd mm1, mm5
movq mm0, mm6
punpcklwd mm6, mm7
movq mm5, mm4
punpckldq mm4, mm6
punpckhdq mm5, mm6
movq mm6, mm1
movq [edx + 08H], mm4
punpckhwd mm0, mm7
movq [edx + 18H], mm5
punpckhdq mm6, mm0
movq mm4, [edx]
punpckldq mm1, mm0
movq mm5, [edx + 10H]
movq mm0, mm4
movq [edx + 38H], mm6
punpcklwd mm0, mm5
movq [edx + 28H], mm1
punpckhwd mm4, mm5
movq mm5, mm2
punpcklwd mm2, mm3
movq mm1, mm0
punpckldq mm0, mm2
punpckhdq mm1, mm2
movq mm2, mm4
movq [edx], mm0
punpckhwd mm5, mm3
movq [edx + 10H], mm1
punpckhdq mm4, mm5
punpckldq mm2, mm5
movq [edx + 30H], mm4
movq [edx + 20H], mm2
movq mm2, [edx + 70H]
movq mm6, [eax + 10H]
movq mm4, mm2
movq mm7, [edx + 58H]
pmulhw mm4, mm6
movq mm1, [eax + 20H]
pmulhw mm6, mm7
movq mm5, mm1
pmulhw mm1, mm2
movq mm3, [edx + 50H]
pmulhw mm5, mm7
movq mm0, [eax]
paddw mm4, mm2
paddw mm6, mm7
paddw mm2, mm1
movq mm1, [edx + 78H]
paddw mm7, mm5
movq mm5, mm0
pmulhw mm0, mm3
paddw mm4, mm7
pmulhw mm5, mm1
movq mm7, [eax + 30H]
psubw mm6, mm2
paddw mm0, mm3
pmulhw mm3, mm7
movq mm2, [edx + 60H]
pmulhw mm7, mm1
paddw mm5, mm1
movq mm1, mm2
pmulhw mm2, [eax + 08H]
psubw mm3, mm5
movq mm5, [edx + 68H]
paddw mm0, mm7
movq mm7, mm5
psubw mm0, mm4
pmulhw mm5, [eax + 08H]
paddw mm2, mm1
pmulhw mm1, [eax + 28H]
paddw mm4, mm4
paddw mm4, mm0
psubw mm3, mm6
paddw mm5, mm7
paddw mm6, mm6
pmulhw mm7, [eax + 28H]
paddw mm6, mm3
movq [edx + 50H], mm4
psubw mm1, mm5
movq mm4, [eax + 18H]
movq mm5, mm3
pmulhw mm3, mm4
paddw mm7, mm2
movq [edx + 60H], mm6
movq mm2, mm0
movq mm6, [edx + 40H]
pmulhw mm0, mm4
paddw mm5, mm3
movq mm3, [edx + 48H]
psubw mm5, mm1
paddw mm2, mm0
psubw mm6, mm3
movq mm0, mm6
pmulhw mm6, mm4
paddw mm3, mm3
paddw mm1, mm1
paddw mm3, mm0
paddw mm1, mm5
pmulhw mm4, mm3
paddw mm6, mm0
psubw mm6, mm2
paddw mm2, mm2
movq mm0, [edx + 50H]
paddw mm2, mm6
paddw mm4, mm3
psubw mm2, mm1
movq mm3, [edx + 60H]
psubw mm4, mm7
paddw mm1, mm1
paddw mm7, mm7
paddw mm1, mm2
paddw mm7, mm4
psubw mm4, mm3
paddw mm3, mm3
psubw mm6, mm5
paddw mm5, mm5
paddw mm3, mm4
paddw mm5, mm6
psubw mm7, mm0
paddw mm0, mm0
movq [edx + 50H], mm1
paddw mm0, mm7
movq mm1, mm4
punpcklwd mm4, mm5
movq [edx + 40H], mm0
punpckhwd mm1, mm5
movq mm0, mm6
punpcklwd mm6, mm7
movq mm5, mm4
punpckldq mm4, mm6
punpckhdq mm5, mm6
movq mm6, mm1
movq [edx + 48H], mm4
punpckhwd mm0, mm7
movq [edx + 58H], mm5
punpckhdq mm6, mm0
movq mm4, [edx + 40H]
punpckldq mm1, mm0
movq mm5, [edx + 50H]
movq mm0, mm4
movq [edx + 78H], mm6
punpcklwd mm0, mm5
movq [edx + 68H], mm1
punpckhwd mm4, mm5
movq mm5, mm2
punpcklwd mm2, mm3
movq mm1, mm0
punpckldq mm0, mm2
punpckhdq mm1, mm2
movq mm2, mm4
movq [edx + 40H], mm0
punpckhwd mm5, mm3
movq [edx + 50H], mm1
punpckhdq mm4, mm5
punpckldq mm2, mm5
movq [edx + 70H], mm4
movq [edx + 60H], mm2
movq mm2, [edx + 30H]
movq mm6, [eax + 10H]
movq mm4, mm2
movq mm7, [edx + 50H]
pmulhw mm4, mm6
movq mm1, [eax + 20H]
pmulhw mm6, mm7
movq mm5, mm1
pmulhw mm1, mm2
movq mm3, [edx + 10H]
pmulhw mm5, mm7
movq mm0, [eax]
paddw mm4, mm2
paddw mm6, mm7
paddw mm2, mm1
movq mm1, [edx + 70H]
paddw mm7, mm5
movq mm5, mm0
pmulhw mm0, mm3
paddw mm4, mm7
pmulhw mm5, mm1
movq mm7, [eax + 30H]
psubw mm6, mm2
paddw mm0, mm3
pmulhw mm3, mm7
movq mm2, [edx + 20H]
pmulhw mm7, mm1
paddw mm5, mm1
movq mm1, mm2
pmulhw mm2, [eax + 08H]
psubw mm3, mm5
movq mm5, [edx + 60H]
paddw mm0, mm7
movq mm7, mm5
psubw mm0, mm4
pmulhw mm5, [eax + 08H]
paddw mm2, mm1
pmulhw mm1, [eax + 28H]
paddw mm4, mm4
paddw mm4, mm0
psubw mm3, mm6
paddw mm5, mm7
paddw mm6, mm6
pmulhw mm7, [eax + 28H]
paddw mm6, mm3
movq [edx + 10H], mm4
psubw mm1, mm5
movq mm4, [eax + 18H]
movq mm5, mm3
pmulhw mm3, mm4
paddw mm7, mm2
movq [edx + 20H], mm6
movq mm2, mm0
movq mm6, [edx]
pmulhw mm0, mm4
paddw mm5, mm3
movq mm3, [edx + 40H]
psubw mm5, mm1
paddw mm2, mm0
psubw mm6, mm3
movq mm0, mm6
pmulhw mm6, mm4
paddw mm3, mm3
paddw mm1, mm1
paddw mm3, mm0
paddw mm1, mm5
pmulhw mm4, mm3
paddw mm6, mm0
psubw mm6, mm2
paddw mm2, mm2
movq mm0, [edx + 10H]
paddw mm2, mm6
paddw mm4, mm3
psubw mm2, mm1
paddw mm2, [eax + 38H]
paddw mm1, mm1
paddw mm1, mm2
psraw mm2, 4
psubw mm4, mm7
psraw mm1, 4
movq mm3, [edx + 20H]
paddw mm7, mm7
movq [edx + 20H], mm2
paddw mm7, mm4
movq [edx + 10H], mm1
psubw mm4, mm3
paddw mm4, [eax + 38H]
paddw mm3, mm3
paddw mm3, mm4
psraw mm4, 4
psubw mm6, mm5
psraw mm3, 4
paddw mm6, [eax + 38H]
paddw mm5, mm5
paddw mm5, mm6
psraw mm6, 4
movq [edx + 40H], mm4
psraw mm5, 4
movq [edx + 30H], mm3
psubw mm7, mm0
paddw mm7, [eax + 38H]
paddw mm0, mm0
paddw mm0, mm7
psraw mm7, 4
movq [edx + 60H], mm6
psraw mm0, 4
movq [edx + 50H], mm5
movq [edx + 70H], mm7
movq [edx], mm0
movq mm2, [edx + 38H]
movq mm6, [eax + 10H]
movq mm4, mm2
movq mm7, [edx + 58H]
pmulhw mm4, mm6
movq mm1, [eax + 20H]
pmulhw mm6, mm7
movq mm5, mm1
pmulhw mm1, mm2
movq mm3, [edx + 18H]
pmulhw mm5, mm7
movq mm0, [eax]
paddw mm4, mm2
paddw mm6, mm7
paddw mm2, mm1
movq mm1, [edx + 78H]
paddw mm7, mm5
movq mm5, mm0
pmulhw mm0, mm3
paddw mm4, mm7
pmulhw mm5, mm1
movq mm7, [eax + 30H]
psubw mm6, mm2
paddw mm0, mm3
pmulhw mm3, mm7
movq mm2, [edx + 28H]
pmulhw mm7, mm1
paddw mm5, mm1
movq mm1, mm2
pmulhw mm2, [eax + 08H]
psubw mm3, mm5
movq mm5, [edx + 68H]
paddw mm0, mm7
movq mm7, mm5
psubw mm0, mm4
pmulhw mm5, [eax + 08H]
paddw mm2, mm1
pmulhw mm1, [eax + 28H]
paddw mm4, mm4
paddw mm4, mm0
psubw mm3, mm6
paddw mm5, mm7
paddw mm6, mm6
pmulhw mm7, [eax + 28H]
paddw mm6, mm3
movq [edx + 18H], mm4
psubw mm1, mm5
movq mm4, [eax + 18H]
movq mm5, mm3
pmulhw mm3, mm4
paddw mm7, mm2
movq [edx + 28H], mm6
movq mm2, mm0
movq mm6, [edx + 08H]
pmulhw mm0, mm4
paddw mm5, mm3
movq mm3, [edx + 48H]
psubw mm5, mm1
paddw mm2, mm0
psubw mm6, mm3
movq mm0, mm6
pmulhw mm6, mm4
paddw mm3, mm3
paddw mm1, mm1
paddw mm3, mm0
paddw mm1, mm5
pmulhw mm4, mm3
paddw mm6, mm0
psubw mm6, mm2
paddw mm2, mm2
movq mm0, [edx + 18H]
paddw mm2, mm6
paddw mm4, mm3
psubw mm2, mm1
paddw mm2, [eax + 38H]
paddw mm1, mm1
paddw mm1, mm2
psraw mm2, 4
psubw mm4, mm7
psraw mm1, 4
movq mm3, [edx + 28H]
paddw mm7, mm7
movq [edx + 28H], mm2
paddw mm7, mm4
movq [edx + 18H], mm1
psubw mm4, mm3
paddw mm4, [eax + 38H]
paddw mm3, mm3
paddw mm3, mm4
psraw mm4, 4
psubw mm6, mm5
psraw mm3, 4
paddw mm6, [eax + 38H]
paddw mm5, mm5
paddw mm5, mm6
psraw mm6, 4
movq [edx + 48H], mm4
psraw mm5, 4
movq [edx + 38H], mm3
psubw mm7, mm0
paddw mm7, [eax + 38H]
paddw mm0, mm0
paddw mm0, mm7
psraw mm7, 4
movq [edx + 68H], mm6
psraw mm0, 4
movq [edx + 58H], mm5
movq [edx + 78H], mm7
movq [edx + 08H], mm0
/* emms */
}
}
#endif