Bug 1176300 - Update libsoundtouch to patched r222; r=padenot

2024-09-13 09:24:08 -07:00 · 2015-07-01 20:57:16 -07:00 · 2015-07-01 20:57:16 -07:00 · 03af7e4e5b
commit 03af7e4e5b
parent ac8b78175b
19 changed files with 105 additions and 90 deletions
--- a/media/libsoundtouch/README_MOZILLA
+++ b/media/libsoundtouch/README_MOZILLA
@ -1,5 +1,5 @@
 These files are from the SoundTouch library (http://www.surina.net/soundtouch/),
-and are extracted from the revision r198 of the svn repository at
+and are extracted from the revision r222 of the svn repository at
 https://soundtouch.svn.sourceforge.net/svnroot/soundtouch/trunk.

 The whole library is not used, only the relevant files are imported in the tree,
--- a/media/libsoundtouch/src/AAFilter.cpp
+++ b/media/libsoundtouch/src/AAFilter.cpp
@ -12,7 +12,7 @@
 ///
 ////////////////////////////////////////////////////////////////////////////////
 //
-// Last changed  : $Date: 2014-01-05 15:40:22 -0600 (Sun, 05 Jan 2014) $
+// Last changed  : $Date: 2014-01-05 21:40:22 +0000 (Sun, 05 Jan 2014) $
 // File revision : $Revision: 4 $
 //
 // $Id: AAFilter.cpp 177 2014-01-05 21:40:22Z oparviai $
--- a/media/libsoundtouch/src/AAFilter.h
+++ b/media/libsoundtouch/src/AAFilter.h
@ -13,7 +13,7 @@
 ///
 ////////////////////////////////////////////////////////////////////////////////
 //
-// Last changed  : $Date: 2014-01-07 13:41:23 -0600 (Tue, 07 Jan 2014) $
+// Last changed  : $Date: 2014-01-07 19:41:23 +0000 (Tue, 07 Jan 2014) $
 // File revision : $Revision: 4 $
 //
 // $Id: AAFilter.h 187 2014-01-07 19:41:23Z oparviai $
--- a/media/libsoundtouch/src/FIFOSampleBuffer.cpp
+++ b/media/libsoundtouch/src/FIFOSampleBuffer.cpp
@ -15,7 +15,7 @@
 ///
 ////////////////////////////////////////////////////////////////////////////////
 //
-// Last changed  : $Date: 2012-11-08 12:53:01 -0600 (Thu, 08 Nov 2012) $
+// Last changed  : $Date: 2012-11-08 18:53:01 +0000 (Thu, 08 Nov 2012) $
 // File revision : $Revision: 4 $
 //
 // $Id: FIFOSampleBuffer.cpp 160 2012-11-08 18:53:01Z oparviai $
--- a/media/libsoundtouch/src/FIFOSampleBuffer.h
+++ b/media/libsoundtouch/src/FIFOSampleBuffer.h
@ -15,7 +15,7 @@
 ///
 ////////////////////////////////////////////////////////////////////////////////
 //
-// Last changed  : $Date: 2014-01-05 15:40:22 -0600 (Sun, 05 Jan 2014) $
+// Last changed  : $Date: 2014-01-05 21:40:22 +0000 (Sun, 05 Jan 2014) $
 // File revision : $Revision: 4 $
 //
 // $Id: FIFOSampleBuffer.h 177 2014-01-05 21:40:22Z oparviai $
--- a/media/libsoundtouch/src/FIFOSamplePipe.h
+++ b/media/libsoundtouch/src/FIFOSamplePipe.h
@ -17,7 +17,7 @@
 ///
 ////////////////////////////////////////////////////////////////////////////////
 //
-// Last changed  : $Date: 2012-06-13 14:29:53 -0500 (Wed, 13 Jun 2012) $
+// Last changed  : $Date: 2012-06-13 19:29:53 +0000 (Wed, 13 Jun 2012) $
 // File revision : $Revision: 4 $
 //
 // $Id: FIFOSamplePipe.h 143 2012-06-13 19:29:53Z oparviai $
--- a/media/libsoundtouch/src/FIRFilter.cpp
+++ b/media/libsoundtouch/src/FIRFilter.cpp
@ -11,10 +11,10 @@
 ///
 ////////////////////////////////////////////////////////////////////////////////
 //
-// Last changed  : $Date: 2013-06-12 10:24:44 -0500 (Wed, 12 Jun 2013) $
+// Last changed  : $Date: 2015-02-21 21:24:29 +0000 (Sat, 21 Feb 2015) $
 // File revision : $Revision: 4 $
 //
-// $Id: FIRFilter.cpp 171 2013-06-12 15:24:44Z oparviai $
+// $Id: FIRFilter.cpp 202 2015-02-21 21:24:29Z oparviai $
 //
 ////////////////////////////////////////////////////////////////////////////////
 //
@ -46,11 +46,6 @@
 #include "FIRFilter.h"
 #include "cpu_detect.h"

-#ifdef _MSC_VER
-#include <malloc.h>
-#define alloca _alloca
-#endif
-
 using namespace soundtouch;

 /*****************************************************************************
@ -77,8 +72,7 @@ FIRFilter::~FIRFilter()
 // Usual C-version of the filter routine for stereo sound
 uint FIRFilter::evaluateFilterStereo(SAMPLETYPE *dest, const SAMPLETYPE *src, uint numSamples) const
 {
-    uint i, j, end;
-    LONG_SAMPLETYPE suml, sumr;
+    int j, end;
 #ifdef SOUNDTOUCH_FLOAT_SAMPLES
    // when using floating point samples, use a scaler instead of a divider
    // because division is much slower operation than multiplying.
@ -92,9 +86,12 @@ uint FIRFilter::evaluateFilterStereo(SAMPLETYPE *dest, const SAMPLETYPE *src, ui

    end = 2 * (numSamples - length);

+    #pragma omp parallel for
    for (j = 0; j < end; j += 2) 
    {
        const SAMPLETYPE *ptr;
+        LONG_SAMPLETYPE suml, sumr;
+        uint i;

        suml = sumr = 0;
        ptr = src + j;
@ -135,28 +132,31 @@ uint FIRFilter::evaluateFilterStereo(SAMPLETYPE *dest, const SAMPLETYPE *src, ui
 // Usual C-version of the filter routine for mono sound
 uint FIRFilter::evaluateFilterMono(SAMPLETYPE *dest, const SAMPLETYPE *src, uint numSamples) const
 {
-    uint i, j, end;
-    LONG_SAMPLETYPE sum;
+    int j, end;
 #ifdef SOUNDTOUCH_FLOAT_SAMPLES
    // when using floating point samples, use a scaler instead of a divider
    // because division is much slower operation than multiplying.
    double dScaler = 1.0 / (double)resultDivider;
 #endif

-
    assert(length != 0);

    end = numSamples - length;
+    #pragma omp parallel for
    for (j = 0; j < end; j ++) 
    {
+        const SAMPLETYPE *pSrc = src + j;
+        LONG_SAMPLETYPE sum;
+        uint i;
+
        sum = 0;
        for (i = 0; i < length; i += 4) 
        {
            // loop is unrolled by factor of 4 here for efficiency
-            sum += src[i + 0] * filterCoeffs[i + 0] + 
-                   src[i + 1] * filterCoeffs[i + 1] + 
-                   src[i + 2] * filterCoeffs[i + 2] + 
-                   src[i + 3] * filterCoeffs[i + 3];
+            sum += pSrc[i + 0] * filterCoeffs[i + 0] + 
+                   pSrc[i + 1] * filterCoeffs[i + 1] + 
+                   pSrc[i + 2] * filterCoeffs[i + 2] + 
+                   pSrc[i + 3] * filterCoeffs[i + 3];
        }
 #ifdef SOUNDTOUCH_INTEGER_SAMPLES
        sum >>= resultDivFactor;
@ -166,16 +166,15 @@ uint FIRFilter::evaluateFilterMono(SAMPLETYPE *dest, const SAMPLETYPE *src, uint
        sum *= dScaler;
 #endif // SOUNDTOUCH_INTEGER_SAMPLES
        dest[j] = (SAMPLETYPE)sum;
-        src ++;
    }
    return end;
 }


-uint FIRFilter::evaluateFilterMulti(SAMPLETYPE *dest, const SAMPLETYPE *src, uint numSamples, uint numChannels) const
+uint FIRFilter::evaluateFilterMulti(SAMPLETYPE *dest, const SAMPLETYPE *src, uint numSamples, uint numChannels)
 {
-    uint i, j, end, c;
-    LONG_SAMPLETYPE *sum=(LONG_SAMPLETYPE*)alloca(numChannels*sizeof(*sum));
+    int j, end;
+
 #ifdef SOUNDTOUCH_FLOAT_SAMPLES
    // when using floating point samples, use a scaler instead of a divider
    // because division is much slower operation than multiplying.
@ -186,17 +185,21 @@ uint FIRFilter::evaluateFilterMulti(SAMPLETYPE *dest, const SAMPLETYPE *src, uin
    assert(src != NULL);
    assert(dest != NULL);
    assert(filterCoeffs != NULL);
+    assert(numChannels < 16);

    end = numChannels * (numSamples - length);

-    for (c = 0; c < numChannels; c ++)
-    {
-        sum[c] = 0;
-    }
-
+    #pragma omp parallel for
    for (j = 0; j < end; j += numChannels)
    {
        const SAMPLETYPE *ptr;
+        LONG_SAMPLETYPE sums[16];
+        uint c, i;
+
+        for (c = 0; c < numChannels; c ++)
+        {
+            sums[c] = 0;
+        }

        ptr = src + j;

@ -205,7 +208,7 @@ uint FIRFilter::evaluateFilterMulti(SAMPLETYPE *dest, const SAMPLETYPE *src, uin
            SAMPLETYPE coef=filterCoeffs[i];
            for (c = 0; c < numChannels; c ++)
            {
-                sum[c] += ptr[0] * coef;
+                sums[c] += ptr[0] * coef;
                ptr ++;
            }
        }
@ -213,13 +216,11 @@ uint FIRFilter::evaluateFilterMulti(SAMPLETYPE *dest, const SAMPLETYPE *src, uin
        for (c = 0; c < numChannels; c ++)
        {
 #ifdef SOUNDTOUCH_INTEGER_SAMPLES
-            sum[c] >>= resultDivFactor;
+            sums[c] >>= resultDivFactor;
 #else
-            sum[c] *= dScaler;
+            sums[c] *= dScaler;
 #endif // SOUNDTOUCH_INTEGER_SAMPLES
-            *dest = (SAMPLETYPE)sum[c];
-            dest++;
-            sum[c] = 0;
+            dest[j+c] = (SAMPLETYPE)sums[c];
        }
    }
    return numSamples - length;
@ -258,7 +259,7 @@ uint FIRFilter::getLength() const
 //
 // Note : The amount of outputted samples is by value of 'filter_length' 
 // smaller than the amount of input samples.
-uint FIRFilter::evaluate(SAMPLETYPE *dest, const SAMPLETYPE *src, uint numSamples, uint numChannels) const
+uint FIRFilter::evaluate(SAMPLETYPE *dest, const SAMPLETYPE *src, uint numSamples, uint numChannels) 
 {
    assert(length > 0);
    assert(lengthDiv8 * 8 == length);
--- a/media/libsoundtouch/src/FIRFilter.h
+++ b/media/libsoundtouch/src/FIRFilter.h
@ -11,10 +11,10 @@
 ///
 ////////////////////////////////////////////////////////////////////////////////
 //
-// Last changed  : $Date: 2013-06-12 10:24:44 -0500 (Wed, 12 Jun 2013) $
+// Last changed  : $Date: 2015-02-21 21:24:29 +0000 (Sat, 21 Feb 2015) $
 // File revision : $Revision: 4 $
 //
-// $Id: FIRFilter.h 171 2013-06-12 15:24:44Z oparviai $
+// $Id: FIRFilter.h 202 2015-02-21 21:24:29Z oparviai $
 //
 ////////////////////////////////////////////////////////////////////////////////
 //
@ -71,7 +71,7 @@ protected:
    virtual uint evaluateFilterMono(SAMPLETYPE *dest, 
                                    const SAMPLETYPE *src, 
                                    uint numSamples) const;
-    virtual uint evaluateFilterMulti(SAMPLETYPE *dest, const SAMPLETYPE *src, uint numSamples, uint numChannels) const;
+    virtual uint evaluateFilterMulti(SAMPLETYPE *dest, const SAMPLETYPE *src, uint numSamples, uint numChannels);

 public:
    FIRFilter();
@ -91,7 +91,7 @@ public:
    uint evaluate(SAMPLETYPE *dest, 
                  const SAMPLETYPE *src, 
                  uint numSamples, 
-                  uint numChannels) const;
+                  uint numChannels);

    uint getLength() const;

--- a/media/libsoundtouch/src/RateTransposer.cpp
+++ b/media/libsoundtouch/src/RateTransposer.cpp
@ -10,7 +10,7 @@
 ///
 ////////////////////////////////////////////////////////////////////////////////
 //
-// Last changed  : $Date: 2014-04-06 10:57:21 -0500 (Sun, 06 Apr 2014) $
+// Last changed  : $Date: 2014-04-06 15:57:21 +0000 (Sun, 06 Apr 2014) $
 // File revision : $Revision: 4 $
 //
 // $Id: RateTransposer.cpp 195 2014-04-06 15:57:21Z oparviai $
--- a/media/libsoundtouch/src/RateTransposer.h
+++ b/media/libsoundtouch/src/RateTransposer.h
@ -14,7 +14,7 @@
 ///
 ////////////////////////////////////////////////////////////////////////////////
 //
-// Last changed  : $Date: 2014-04-06 10:57:21 -0500 (Sun, 06 Apr 2014) $
+// Last changed  : $Date: 2014-04-06 15:57:21 +0000 (Sun, 06 Apr 2014) $
 // File revision : $Revision: 4 $
 //
 // $Id: RateTransposer.h 195 2014-04-06 15:57:21Z oparviai $
--- a/media/libsoundtouch/src/STTypes.h
+++ b/media/libsoundtouch/src/STTypes.h
@ -8,10 +8,10 @@
 ///
 ////////////////////////////////////////////////////////////////////////////////
 //
-// Last changed  : $Date: 2014-04-06 10:57:21 -0500 (Sun, 06 Apr 2014) $
+// Last changed  : $Date: 2015-05-18 15:25:07 +0000 (Mon, 18 May 2015) $
 // File revision : $Revision: 3 $
 //
-// $Id: STTypes.h 195 2014-04-06 15:57:21Z oparviai $
+// $Id: STTypes.h 215 2015-05-18 15:25:07Z oparviai $
 //
 ////////////////////////////////////////////////////////////////////////////////
 //
@ -56,13 +56,9 @@ typedef unsigned long   ulong;

 #include "soundtouch_config.h"

-#if defined(WIN32) && defined(GKMEDIAS_SHARED_LIBRARY)
-#ifdef BUILDING_SOUNDTOUCH
+#if defined(WIN32)
 #define EXPORT __declspec(dllexport)
 #else
-#define EXPORT __declspec(dllimport)
-#endif
-#else
 #define EXPORT
 #endif

@ -80,7 +76,7 @@ namespace soundtouch
    /// runtime performance so recommendation is to keep this off.
    // #define USE_MULTICH_ALWAYS

-    #if (defined(__SOFTFP__))
+    #if (defined(__SOFTFP__) && defined(ANDROID))
        // For Android compilation: Force use of Integer samples in case that
        // compilation uses soft-floating point emulation - soft-fp is way too slow
        #undef  SOUNDTOUCH_FLOAT_SAMPLES
@ -169,7 +165,7 @@ namespace soundtouch
 };

 // define ST_NO_EXCEPTION_HANDLING switch to disable throwing std exceptions:
-#define ST_NO_EXCEPTION_HANDLING    1
+// #define ST_NO_EXCEPTION_HANDLING    1
 #ifdef ST_NO_EXCEPTION_HANDLING
    // Exceptions disabled. Throw asserts instead if enabled.
    #include <assert.h>
@ -177,6 +173,7 @@ namespace soundtouch
 #else
    // use c++ standard exceptions
    #include <stdexcept>
+    #include <string>
    #define ST_THROW_RT_ERROR(x)    {throw std::runtime_error(x);}
 #endif

--- a/media/libsoundtouch/src/SoundTouch.cpp
+++ b/media/libsoundtouch/src/SoundTouch.cpp
@ -41,10 +41,10 @@
 ///
 ////////////////////////////////////////////////////////////////////////////////
 //
-// Last changed  : $Date: 2014-04-06 10:57:21 -0500 (Sun, 06 Apr 2014) $
+// Last changed  : $Date: 2014-10-08 15:26:57 +0000 (Wed, 08 Oct 2014) $
 // File revision : $Revision: 4 $
 //
-// $Id: SoundTouch.cpp 195 2014-04-06 15:57:21Z oparviai $
+// $Id: SoundTouch.cpp 201 2014-10-08 15:26:57Z oparviai $
 //
 ////////////////////////////////////////////////////////////////////////////////
 //
@ -80,11 +80,6 @@
 #include "RateTransposer.h"
 #include "cpu_detect.h"

-#ifdef _MSC_VER
-#include <malloc.h>
-#define alloca _alloca
-#endif
-
 using namespace soundtouch;
    
 /// test if two floating point numbers are equal
@ -353,8 +348,8 @@ void SoundTouch::flush()
    int i;
    int nUnprocessed;
    int nOut;
-    SAMPLETYPE *buff=(SAMPLETYPE*)alloca(64*channels*sizeof(SAMPLETYPE));
-
+    SAMPLETYPE *buff = new SAMPLETYPE[64 * channels];
+    
    // check how many samples still await processing, and scale
    // that by tempo & rate to get expected output sample count
    nUnprocessed = numUnprocessedSamples();
@ -383,6 +378,8 @@ void SoundTouch::flush()
        }
    }

+    delete[] buff;
+
    // Clear working buffers
    pRateTransposer->clear();
    pTDStretch->clearInput();
--- a/media/libsoundtouch/src/SoundTouch.h
+++ b/media/libsoundtouch/src/SoundTouch.h
@ -41,10 +41,10 @@
 ///
 ////////////////////////////////////////////////////////////////////////////////
 //
-// Last changed  : $Date: 2014-04-06 10:57:21 -0500 (Sun, 06 Apr 2014) $
+// Last changed  : $Date: 2015-05-18 15:28:41 +0000 (Mon, 18 May 2015) $
 // File revision : $Revision: 4 $
 //
-// $Id: SoundTouch.h 195 2014-04-06 15:57:21Z oparviai $
+// $Id: SoundTouch.h 216 2015-05-18 15:28:41Z oparviai $
 //
 ////////////////////////////////////////////////////////////////////////////////
 //
@ -79,10 +79,10 @@ namespace soundtouch
 {

 /// Soundtouch library version string
-#define SOUNDTOUCH_VERSION          "1.8.0"
+#define SOUNDTOUCH_VERSION          "1.9.0"

 /// SoundTouch library version id
-#define SOUNDTOUCH_VERSION_ID       (10800)
+#define SOUNDTOUCH_VERSION_ID       (10900)

 //
 // Available setting IDs for the 'setSetting' & 'get_setting' functions:
--- a/media/libsoundtouch/src/TDStretch.cpp
+++ b/media/libsoundtouch/src/TDStretch.cpp
@ -13,10 +13,10 @@
 ///
 ////////////////////////////////////////////////////////////////////////////////
 //
-// Last changed  : $Date: 2014-04-06 10:57:21 -0500 (Sun, 06 Apr 2014) $
+// Last changed  : $Date: 2015-02-22 15:07:12 +0000 (Sun, 22 Feb 2015) $
 // File revision : $Revision: 1.12 $
 //
-// $Id: TDStretch.cpp 195 2014-04-06 15:57:21Z oparviai $
+// $Id: TDStretch.cpp 205 2015-02-22 15:07:12Z oparviai $
 //
 ////////////////////////////////////////////////////////////////////////////////
 //
@ -292,9 +292,9 @@ inline void TDStretch::overlap(SAMPLETYPE *pOutput, const SAMPLETYPE *pInput, ui
 int TDStretch::seekBestOverlapPositionFull(const SAMPLETYPE *refPos) 
 {
    int bestOffs;
-    double bestCorr, corr;
-    double norm;
+    double bestCorr;
    int i;
+    double norm;

    bestCorr = FLT_MIN;
    bestOffs = 0;
@ -302,14 +302,22 @@ int TDStretch::seekBestOverlapPositionFull(const SAMPLETYPE *refPos)
    // Scans for the best correlation value by testing each possible position
    // over the permitted range.
    bestCorr = calcCrossCorr(refPos, pMidBuffer, norm);
+
+    #pragma omp parallel for
    for (i = 1; i < seekLength; i ++) 
    {
-        // Calculates correlation value for the mixing position corresponding
-        // to 'i'. Now call "calcCrossCorrAccumulate" that is otherwise same as
-        // "calcCrossCorr", but saves time by reusing & updating previously stored 
+        double corr;
+        // Calculates correlation value for the mixing position corresponding to 'i'
+#ifdef _OPENMP
+        // in parallel OpenMP mode, can't use norm accumulator version as parallel executor won't
+        // iterate the loop in sequential order
+        corr = calcCrossCorr(refPos + channels * i, pMidBuffer, norm);
+#else
+        // In non-parallel version call "calcCrossCorrAccumulate" that is otherwise same
+        // as "calcCrossCorr", but saves time by reusing & updating previously stored 
        // "norm" value
        corr = calcCrossCorrAccumulate(refPos + channels * i, pMidBuffer, norm);
-
+#endif
        // heuristic rule to slightly favour values close to mid of the range
        double tmp = (double)(2 * i - seekLength) / (double)seekLength;
        corr = ((corr + 0.1) * (1.0 - 0.25 * tmp * tmp));
@ -317,8 +325,15 @@ int TDStretch::seekBestOverlapPositionFull(const SAMPLETYPE *refPos)
        // Checks for the highest correlation value
        if (corr > bestCorr) 
        {
-            bestCorr = corr;
-            bestOffs = i;
+            // For optimal performance, enter critical section only in case that best value found.
+            // in such case repeat 'if' condition as it's possible that parallel execution may have
+            // updated the bestCorr value in the mean time
+            #pragma omp critical
+            if (corr > bestCorr)
+            {
+                bestCorr = corr;
+                bestOffs = i;
+            }
        }
    }
    // clear cross correlation routine state if necessary (is so e.g. in MMX routines).
@ -883,9 +898,10 @@ void TDStretch::calculateOverlapLength(int overlapInMsec)


 /// Calculate cross-correlation
-double TDStretch::calcCrossCorr(const float *mixingPos, const float *compare, double &norm) const
+double TDStretch::calcCrossCorr(const float *mixingPos, const float *compare, double &anorm) const
 {
    double corr;
+    double norm;
    int i;

    corr = norm = 0;
@ -907,6 +923,7 @@ double TDStretch::calcCrossCorr(const float *mixingPos, const float *compare, do
                mixingPos[i + 3] * mixingPos[i + 3];
    }

+    anorm = norm;
    return corr / sqrt((norm < 1e-9 ? 1.0 : norm));
 }

--- a/media/libsoundtouch/src/TDStretch.h
+++ b/media/libsoundtouch/src/TDStretch.h
@ -13,7 +13,7 @@
 ///
 ////////////////////////////////////////////////////////////////////////////////
 //
-// Last changed  : $Date: 2014-04-06 10:57:21 -0500 (Sun, 06 Apr 2014) $
+// Last changed  : $Date: 2014-04-06 15:57:21 +0000 (Sun, 06 Apr 2014) $
 // File revision : $Revision: 4 $
 //
 // $Id: TDStretch.h 195 2014-04-06 15:57:21Z oparviai $
--- a/media/libsoundtouch/src/cpu_detect.h
+++ b/media/libsoundtouch/src/cpu_detect.h
@ -12,7 +12,7 @@
 ///
 ////////////////////////////////////////////////////////////////////////////////
 //
-// Last changed  : $Date: 2008-02-10 10:26:55 -0600 (Sun, 10 Feb 2008) $
+// Last changed  : $Date: 2008-02-10 16:26:55 +0000 (Sun, 10 Feb 2008) $
 // File revision : $Revision: 4 $
 //
 // $Id: cpu_detect.h 11 2008-02-10 16:26:55Z oparviai $
--- a/media/libsoundtouch/src/cpu_detect_x86.cpp
+++ b/media/libsoundtouch/src/cpu_detect_x86.cpp
@ -11,7 +11,7 @@
 ///
 ////////////////////////////////////////////////////////////////////////////////
 //
-// Last changed  : $Date: 2014-01-07 12:24:28 -0600 (Tue, 07 Jan 2014) $
+// Last changed  : $Date: 2014-01-07 18:24:28 +0000 (Tue, 07 Jan 2014) $
 // File revision : $Revision: 4 $
 //
 // $Id: cpu_detect_x86.cpp 183 2014-01-07 18:24:28Z oparviai $
--- a/media/libsoundtouch/src/mmx_optimized.cpp
+++ b/media/libsoundtouch/src/mmx_optimized.cpp
@ -20,10 +20,10 @@
 ///
 ////////////////////////////////////////////////////////////////////////////////
 //
-// Last changed  : $Date: 2014-01-07 12:25:40 -0600 (Tue, 07 Jan 2014) $
+// Last changed  : $Date: 2015-02-22 15:10:38 +0000 (Sun, 22 Feb 2015) $
 // File revision : $Revision: 4 $
 //
-// $Id: mmx_optimized.cpp 184 2014-01-07 18:25:40Z oparviai $
+// $Id: mmx_optimized.cpp 206 2015-02-22 15:10:38Z oparviai $
 //
 ////////////////////////////////////////////////////////////////////////////////
 //
@ -287,6 +287,7 @@ void TDStretchMMX::overlapStereo(short *output, const short *input) const

 FIRFilterMMX::FIRFilterMMX() : FIRFilter()
 {
+    filterCoeffsAlign = NULL;
    filterCoeffsUnalign = NULL;
 }

--- a/media/libsoundtouch/src/sse_optimized.cpp
+++ b/media/libsoundtouch/src/sse_optimized.cpp
@ -23,10 +23,10 @@
 ///
 ////////////////////////////////////////////////////////////////////////////////
 //
-// Last changed  : $Date: 2014-01-07 12:25:40 -0600 (Tue, 07 Jan 2014) $
+// Last changed  : $Date: 2015-02-21 21:24:29 +0000 (Sat, 21 Feb 2015) $
 // File revision : $Revision: 4 $
 //
-// $Id: sse_optimized.cpp 184 2014-01-07 18:25:40Z oparviai $
+// $Id: sse_optimized.cpp 202 2015-02-21 21:24:29Z oparviai $
 //
 ////////////////////////////////////////////////////////////////////////////////
 //
@ -71,7 +71,7 @@ using namespace soundtouch;
 #include <math.h>

 // Calculates cross correlation of two buffers
-double TDStretchSSE::calcCrossCorr(const float *pV1, const float *pV2, double &norm) const
+double TDStretchSSE::calcCrossCorr(const float *pV1, const float *pV2, double &anorm) const
 {
    int i;
    const float *pVec1;
@ -141,7 +141,8 @@ double TDStretchSSE::calcCrossCorr(const float *pV1, const float *pV2, double &n

    // return value = vSum[0] + vSum[1] + vSum[2] + vSum[3]
    float *pvNorm = (float*)&vNorm;
-    norm = (pvNorm[0] + pvNorm[1] + pvNorm[2] + pvNorm[3]);
+    float norm = (pvNorm[0] + pvNorm[1] + pvNorm[2] + pvNorm[3]);
+    anorm = norm;

    float *pvSum = (float*)&vSum;
    return (double)(pvSum[0] + pvSum[1] + pvSum[2] + pvSum[3]) / sqrt(norm < 1e-9 ? 1.0 : norm);
@ -258,14 +259,17 @@ uint FIRFilterSSE::evaluateFilterStereo(float *dest, const float *source, uint n
    assert(((ulongptr)filterCoeffsAlign) % 16 == 0);

    // filter is evaluated for two stereo samples with each iteration, thus use of 'j += 2'
+    #pragma omp parallel for
    for (j = 0; j < count; j += 2)
    {
        const float *pSrc;
+        float *pDest;
        const __m128 *pFil;
        __m128 sum1, sum2;
        uint i;

-        pSrc = (const float*)source;              // source audio data
+        pSrc = (const float*)source + j * 2;      // source audio data
+        pDest = dest + j * 2;                     // destination audio data
        pFil = (const __m128*)filterCoeffsAlign;  // filter coefficients. NOTE: Assumes coefficients 
                                                  // are aligned to 16-byte boundary
        sum1 = sum2 = _mm_setzero_ps();
@ -298,12 +302,10 @@ uint FIRFilterSSE::evaluateFilterStereo(float *dest, const float *source, uint n
        // to sum the two hi- and lo-floats of these registers together.

        // post-shuffle & add the filtered values and store to dest.
-        _mm_storeu_ps(dest, _mm_add_ps(
+        _mm_storeu_ps(pDest, _mm_add_ps(
                    _mm_shuffle_ps(sum1, sum2, _MM_SHUFFLE(1,0,3,2)),   // s2_1 s2_0 s1_3 s1_2
                    _mm_shuffle_ps(sum1, sum2, _MM_SHUFFLE(3,2,1,0))    // s2_3 s2_2 s1_1 s1_0
                    ));
-        source += 4;
-        dest += 4;
    }

    // Ideas for further improvement: