mirror of
https://gitlab.winehq.org/wine/wine-gecko.git
synced 2024-09-13 09:24:08 -07:00
Bug 1158741 - Implement a version of omxSP_FFTInv_CCSToR_F32_Sfs in openmax DL's FFT that is not scaled r=padenot
The new routine actually multiplies by two for consistency with the other FFT routines in use. MozReview-Commit-ID: Hk2Dg3fR2cQ
This commit is contained in:
parent
762a4308cd
commit
552e030729
@ -121,10 +121,7 @@ public:
|
||||
#else
|
||||
#ifdef BUILD_ARM_NEON
|
||||
if (mozilla::supports_neon()) {
|
||||
omxSP_FFTInv_CCSToR_F32_Sfs(mOutputBuffer.Elements()->f, aDataOut, mOmxIFFT);
|
||||
// There is no function that computes de inverse FFT without scaling, so
|
||||
// we have to scale back up here. Bug 1158741.
|
||||
AudioBufferInPlaceScale(aDataOut, mFFTSize, mFFTSize);
|
||||
omxSP_FFTInv_CCSToR_F32_Sfs_unscaled(mOutputBuffer.Elements()->f, aDataOut, mOmxIFFT);
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
|
9
media/openmax_dl/README.mozilla
Normal file
9
media/openmax_dl/README.mozilla
Normal file
@ -0,0 +1,9 @@
|
||||
Bug 1158741 added an omxSP_FFTInv_CCSToR_F32_Sfs_unscaled function as an
|
||||
optimization which performs the same operation as
|
||||
omxSP_FFTInv_CCSToR_F32_Sfs except it doesn't scale the results by the
|
||||
length of the FFT. For consistency with other FFT routines used, it does
|
||||
multiply the results by two.
|
||||
|
||||
The affected files are:
|
||||
media/openmax_dl/dl/sp/api/omxSP.h
|
||||
media/openmax_dl/dl/sp/src/omxSP_FFTInv_CCSToR_F32_Sfs_unscaled_s.S
|
@ -68,6 +68,7 @@ if CONFIG['CPU_ARCH'] == 'arm' and CONFIG['BUILD_ARM_NEON']:
|
||||
'sp/src/omxSP_FFTFwd_RToCCS_S16S32_Sfs_s.S',
|
||||
'sp/src/omxSP_FFTFwd_RToCCS_S32_Sfs_s.S',
|
||||
'sp/src/omxSP_FFTInv_CCSToR_F32_Sfs_s.S',
|
||||
'sp/src/omxSP_FFTInv_CCSToR_F32_Sfs_unscaled_s.S',
|
||||
'sp/src/omxSP_FFTInv_CCSToR_S32_Sfs_s.S',
|
||||
'sp/src/omxSP_FFTInv_CCSToR_S32S16_Sfs_s.S',
|
||||
'sp/src/omxSP_FFTInv_CToC_FC32_Sfs_s.S',
|
||||
|
@ -2598,6 +2598,18 @@ extern OMXResult (*omxSP_FFTInv_CCSToR_F32)(
|
||||
#define omxSP_FFTInv_CCSToR_F32 omxSP_FFTInv_CCSToR_F32_Sfs
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Just like omxSP_FFTInv_CCSToR_F32_Sfs, but does not scale the result.
|
||||
* (Actually, we multiple by two for consistency with other FFT routines in
|
||||
* use.)
|
||||
*/
|
||||
OMXResult omxSP_FFTInv_CCSToR_F32_Sfs_unscaled(
|
||||
const OMX_F32* pSrc,
|
||||
OMX_F32* pDst,
|
||||
const OMXFFTSpec_R_F32* pFFTSpec
|
||||
);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
@ -0,0 +1,284 @@
|
||||
@//
|
||||
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
@//
|
||||
@// Copyright 2016, Mozilla Foundation and contributors
|
||||
@//
|
||||
@// Use of this source code is governed by a BSD-style license
|
||||
@// that can be found in the LICENSE file in the root of the source
|
||||
@// tree. An additional intellectual property rights grant can be found
|
||||
@// in the file PATENTS. All contributing project authors may
|
||||
@// be found in the AUTHORS file in the root of the source tree.
|
||||
@//
|
||||
@// This is a modification of omxSP_FFTInv_CCSToR_S32_Sfs_s.s
|
||||
@// to support float instead of SC32.
|
||||
@//
|
||||
@// It is further modified to produce an "unscaled" version, which
|
||||
@// actually multiplies by two for consistency with the other FFT functions
|
||||
@// in use.
|
||||
@//
|
||||
|
||||
@//
|
||||
@// Description:
|
||||
@// Compute an inverse FFT for a complex signal
|
||||
@//
|
||||
@//
|
||||
|
||||
|
||||
@// Include standard headers
|
||||
|
||||
#include "dl/api/armCOMM_s.h"
|
||||
#include "dl/api/omxtypes_s.h"
|
||||
|
||||
|
||||
@// Import symbols required from other files
|
||||
@// (For example tables)
|
||||
|
||||
.extern armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace_unsafe
|
||||
.extern armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace_unsafe
|
||||
.extern armSP_FFTInv_CToC_FC32_Radix8_fs_OutOfPlace_unsafe
|
||||
.extern armSP_FFTInv_CToC_FC32_Radix4_OutOfPlace_unsafe
|
||||
.extern armSP_FFTInv_CToC_FC32_Radix2_OutOfPlace_unsafe
|
||||
.extern armSP_FFTInv_CCSToR_F32_preTwiddleRadix2_unsafe
|
||||
|
||||
|
||||
@// Set debugging level
|
||||
@//DEBUG_ON SETL {TRUE}
|
||||
|
||||
|
||||
|
||||
@// Guarding implementation by the processor name
|
||||
|
||||
|
||||
|
||||
@// Guarding implementation by the processor name
|
||||
|
||||
@// Import symbols required from other files
|
||||
@// (For example tables)
|
||||
.extern armSP_FFTInv_CToC_FC32_Radix4_ls_OutOfPlace_unsafe
|
||||
.extern armSP_FFTInv_CToC_FC32_Radix2_ls_OutOfPlace_unsafe
|
||||
|
||||
|
||||
@//Input Registers
|
||||
|
||||
#define pSrc r0
|
||||
#define pDst r1
|
||||
#define pFFTSpec r2
|
||||
#define scale r3
|
||||
|
||||
|
||||
@// Output registers
|
||||
#define result r0
|
||||
|
||||
@//Local Scratch Registers
|
||||
|
||||
#define argTwiddle r1
|
||||
#define argDst r2
|
||||
#define argScale r4
|
||||
#define tmpOrder r4
|
||||
#define pTwiddle r4
|
||||
#define pOut r5
|
||||
#define subFFTSize r7
|
||||
#define subFFTNum r6
|
||||
#define N r6
|
||||
#define order r14
|
||||
#define diff r9
|
||||
@// Total num of radix stages required to comple the FFT
|
||||
#define count r8
|
||||
#define x0r r4
|
||||
#define x0i r5
|
||||
#define diffMinusOne r2
|
||||
#define round r3
|
||||
|
||||
#define pOut1 r2
|
||||
#define size r7
|
||||
#define step r8
|
||||
#define step1 r9
|
||||
#define twStep r10
|
||||
#define pTwiddleTmp r11
|
||||
#define argTwiddle1 r12
|
||||
#define zero r14
|
||||
|
||||
@// Neon registers
|
||||
|
||||
#define dX0 D0.F32
|
||||
#define dShift D1.F32
|
||||
#define dX1 D1.F32
|
||||
#define dY0 D2.F32
|
||||
#define dY1 D3.F32
|
||||
#define dX0r D0.F32
|
||||
#define dX0i D1.F32
|
||||
#define dX1r D2.F32
|
||||
#define dX1i D3.F32
|
||||
#define dW0r D4.F32
|
||||
#define dW0i D5.F32
|
||||
#define dW1r D6.F32
|
||||
#define dW1i D7.F32
|
||||
#define dT0 D8.F32
|
||||
#define dT1 D9.F32
|
||||
#define dT2 D10.F32
|
||||
#define dT3 D11.F32
|
||||
#define qT0 d12.F32
|
||||
#define qT1 d14.F32
|
||||
#define qT2 d16.F32
|
||||
#define qT3 d18.F32
|
||||
#define dY0r D4.F32
|
||||
#define dY0i D5.F32
|
||||
#define dY1r D6.F32
|
||||
#define dY1i D7.F32
|
||||
#define dzero D20.F32
|
||||
|
||||
#define dY2 D4.F32
|
||||
#define dY3 D5.F32
|
||||
#define dW0 D6.F32
|
||||
#define dW1 D7.F32
|
||||
#define dW0Tmp D10.F32
|
||||
#define dW1Neg D11.F32
|
||||
|
||||
#define sN S0.S32
|
||||
#define fN S1.F32
|
||||
@// two must be the same as dScale[0]!
|
||||
#define dScale D2.F32
|
||||
#define two S4.F32
|
||||
|
||||
|
||||
@// Allocate stack memory required by the function
|
||||
M_ALLOC4 complexFFTSize, 4
|
||||
|
||||
@// Write function header
|
||||
M_START omxSP_FFTInv_CCSToR_F32_Sfs_unscaled,r11,d15
|
||||
|
||||
@ Structure offsets for the FFTSpec
|
||||
.set ARMsFFTSpec_N, 0
|
||||
.set ARMsFFTSpec_pBitRev, 4
|
||||
.set ARMsFFTSpec_pTwiddle, 8
|
||||
.set ARMsFFTSpec_pBuf, 12
|
||||
|
||||
@// Define stack arguments
|
||||
|
||||
@// Read the size from structure and take log
|
||||
LDR N, [pFFTSpec, #ARMsFFTSpec_N]
|
||||
|
||||
@// Read other structure parameters
|
||||
LDR pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle]
|
||||
LDR pOut, [pFFTSpec, #ARMsFFTSpec_pBuf]
|
||||
|
||||
@// N=1 Treat seperately
|
||||
CMP N,#1
|
||||
BGT sizeGreaterThanOne
|
||||
VLD1 dX0[0],[pSrc]
|
||||
VST1 dX0[0],[pDst]
|
||||
|
||||
B End
|
||||
|
||||
sizeGreaterThanOne:
|
||||
|
||||
@// Call the preTwiddle Radix2 stage before doing the compledIFFT
|
||||
|
||||
|
||||
BL armSP_FFTInv_CCSToR_F32_preTwiddleRadix2_unsafe
|
||||
|
||||
|
||||
complexIFFT:
|
||||
|
||||
ASR N,N,#1 @// N/2 point complex IFFT
|
||||
M_STR N, complexFFTSize @ Save N for scaling later
|
||||
ADD pSrc,pOut,N,LSL #3 @// set pSrc as pOut1
|
||||
|
||||
CLZ order,N @// N = 2^order
|
||||
RSB order,order,#31
|
||||
MOV subFFTSize,#1
|
||||
@//MOV subFFTNum,N
|
||||
|
||||
CMP order,#3
|
||||
BGT orderGreaterthan3 @// order > 3
|
||||
|
||||
CMP order,#1
|
||||
BGE orderGreaterthan0 @// order > 0
|
||||
|
||||
VLD1 dX0,[pSrc]
|
||||
VST1 dX0,[pDst]
|
||||
MOV pSrc,pDst
|
||||
BLT FFTEnd
|
||||
|
||||
orderGreaterthan0:
|
||||
@// set the buffers appropriately for various orders
|
||||
CMP order,#2
|
||||
MOVNE argDst,pDst
|
||||
MOVEQ argDst,pOut
|
||||
@// Pass the first stage destination in RN5
|
||||
MOVEQ pOut,pDst
|
||||
MOV argTwiddle,pTwiddle
|
||||
|
||||
BGE orderGreaterthan1
|
||||
BLLT armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace_unsafe @// order = 1
|
||||
B FFTEnd
|
||||
|
||||
orderGreaterthan1:
|
||||
MOV tmpOrder,order @// tmpOrder = RN 4
|
||||
BL armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace_unsafe
|
||||
CMP tmpOrder,#2
|
||||
BLGT armSP_FFTInv_CToC_FC32_Radix2_OutOfPlace_unsafe
|
||||
BL armSP_FFTInv_CToC_FC32_Radix2_ls_OutOfPlace_unsafe
|
||||
B FFTEnd
|
||||
|
||||
|
||||
orderGreaterthan3:
|
||||
specialScaleCase:
|
||||
|
||||
@// Set input args to fft stages
|
||||
TST order, #2
|
||||
MOVNE argDst,pDst
|
||||
MOVEQ argDst,pOut
|
||||
@// Pass the first stage destination in RN5
|
||||
MOVEQ pOut,pDst
|
||||
MOV argTwiddle,pTwiddle
|
||||
|
||||
@//check for even or odd order
|
||||
@// NOTE: The following combination of BL's would work fine even though
|
||||
@// the first BL would corrupt the flags. This is because the end of
|
||||
@// the "grpZeroSetLoop" loop inside
|
||||
@// armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace_unsafe sets the Z flag
|
||||
@// to EQ
|
||||
|
||||
TST order,#0x00000001
|
||||
BLEQ armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace_unsafe
|
||||
BLNE armSP_FFTInv_CToC_FC32_Radix8_fs_OutOfPlace_unsafe
|
||||
|
||||
CMP subFFTNum,#4
|
||||
BLT FFTEnd
|
||||
|
||||
|
||||
unscaledRadix4Loop:
|
||||
BEQ lastStageUnscaledRadix4
|
||||
BL armSP_FFTInv_CToC_FC32_Radix4_OutOfPlace_unsafe
|
||||
CMP subFFTNum,#4
|
||||
B unscaledRadix4Loop
|
||||
|
||||
lastStageUnscaledRadix4:
|
||||
BL armSP_FFTInv_CToC_FC32_Radix4_ls_OutOfPlace_unsafe
|
||||
B FFTEnd
|
||||
|
||||
FFTEnd: @// Does only the scaling
|
||||
@ Scale inverse FFT result by 2 for consistency with other FFTs
|
||||
VMOV two, 2.0 @ two = dScale[0]
|
||||
|
||||
@// N = subFFTSize ; dataptr = pDst
|
||||
scaleFFTData:
|
||||
VLD1 {dX0},[pSrc] @// pSrc contains pDst pointer
|
||||
SUBS subFFTSize,subFFTSize,#1
|
||||
VMUL dX0, dX0, dScale[0]
|
||||
VST1 {dX0},[pSrc]!
|
||||
|
||||
BGT scaleFFTData
|
||||
|
||||
|
||||
End:
|
||||
@// Set return value
|
||||
MOV result, #OMX_Sts_NoErr
|
||||
|
||||
@// Write function tail
|
||||
M_END
|
||||
|
||||
|
||||
|
||||
.end
|
Loading…
Reference in New Issue
Block a user