From 0f830b7e7da4bc1819dbd8844c4d6afabe2924af Mon Sep 17 00:00:00 2001 From: Garvan Keeley Date: Tue, 30 Jun 2015 18:40:32 -0400 Subject: [PATCH 01/61] Bug 1167614. Re-enable core location geo provider. r=hannosch --- browser/app/profile/firefox.js | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/browser/app/profile/firefox.js b/browser/app/profile/firefox.js index 4f0924340c3..cc34d8ffa02 100644 --- a/browser/app/profile/firefox.js +++ b/browser/app/profile/firefox.js @@ -1806,7 +1806,11 @@ pref("geo.wifi.uri", "https://location.services.mozilla.com/v1/geolocate?key=%MO #endif #ifdef XP_MACOSX +#ifdef RELEASE_BUILD pref("geo.provider.use_corelocation", false); +#else +pref("geo.provider.use_corelocation", true); +#endif #endif #ifdef XP_WIN From 8d468e46436aa3aad7705f97f4cb160c069a672b Mon Sep 17 00:00:00 2001 From: Ehsan Akhgari Date: Tue, 16 Jun 2015 21:21:08 -0400 Subject: [PATCH 02/61] Bug 1148935 - Correctly reflect worker and sharedworker RequestContext values; r=smaug --- dom/base/nsScriptLoader.cpp | 4 +- dom/fetch/InternalRequest.cpp | 8 ++- dom/fetch/InternalRequest.h | 7 +-- dom/workers/RuntimeService.cpp | 2 +- dom/workers/ScriptLoader.cpp | 17 ++++-- dom/workers/ScriptLoader.h | 2 + dom/workers/ServiceWorkerManager.cpp | 1 + dom/workers/ServiceWorkerScriptCache.cpp | 5 +- dom/workers/WorkerPrivate.cpp | 4 +- dom/workers/WorkerPrivate.h | 26 ++++++++- .../fetch/context/context_test.js | 16 ++++++ .../serviceworkers/fetch/context/index.html | 56 +++++++++++++++++++ .../fetch/context/parentsharedworker.js | 8 +++ .../fetch/context/parentworker.js | 4 ++ .../fetch/context/sharedworker.js | 5 ++ .../serviceworkers/fetch/context/worker.js | 1 + dom/workers/test/serviceworkers/mochitest.ini | 4 ++ 17 files changed, 154 insertions(+), 16 deletions(-) create mode 100644 dom/workers/test/serviceworkers/fetch/context/parentsharedworker.js create mode 100644 dom/workers/test/serviceworkers/fetch/context/parentworker.js create mode 100644 dom/workers/test/serviceworkers/fetch/context/sharedworker.js create mode 100644 dom/workers/test/serviceworkers/fetch/context/worker.js diff --git a/dom/base/nsScriptLoader.cpp b/dom/base/nsScriptLoader.cpp index f7aaca2df1d..d2ff823ac27 100644 --- a/dom/base/nsScriptLoader.cpp +++ b/dom/base/nsScriptLoader.cpp @@ -214,7 +214,7 @@ nsScriptLoader::CheckContentPolicy(nsIDocument* aDocument, const nsAString &aType) { int16_t shouldLoad = nsIContentPolicy::ACCEPT; - nsresult rv = NS_CheckContentLoadPolicy(nsIContentPolicy::TYPE_SCRIPT, + nsresult rv = NS_CheckContentLoadPolicy(nsIContentPolicy::TYPE_INTERNAL_SCRIPT, aURI, aDocument->NodePrincipal(), aContext, @@ -289,7 +289,7 @@ nsScriptLoader::StartLoad(nsScriptLoadRequest *aRequest, const nsAString &aType, aRequest->mURI, mDocument, nsILoadInfo::SEC_NORMAL, - nsIContentPolicy::TYPE_SCRIPT, + nsIContentPolicy::TYPE_INTERNAL_SCRIPT, loadGroup, prompter, nsIRequest::LOAD_NORMAL | diff --git a/dom/fetch/InternalRequest.cpp b/dom/fetch/InternalRequest.cpp index 13feb77da01..448d5d4ff07 100644 --- a/dom/fetch/InternalRequest.cpp +++ b/dom/fetch/InternalRequest.cpp @@ -113,9 +113,15 @@ InternalRequest::MapContentPolicyTypeToRequestContext(nsContentPolicyType aConte case nsIContentPolicy::TYPE_OTHER: context = RequestContext::Internal; break; - case nsIContentPolicy::TYPE_SCRIPT: + case nsIContentPolicy::TYPE_INTERNAL_SCRIPT: context = RequestContext::Script; break; + case nsIContentPolicy::TYPE_INTERNAL_WORKER: + context = RequestContext::Worker; + break; + case nsIContentPolicy::TYPE_INTERNAL_SHARED_WORKER: + context = RequestContext::Sharedworker; + break; case nsIContentPolicy::TYPE_IMAGE: context = RequestContext::Image; break; diff --git a/dom/fetch/InternalRequest.h b/dom/fetch/InternalRequest.h index ebc5be53434..92b6cc3bab1 100644 --- a/dom/fetch/InternalRequest.h +++ b/dom/fetch/InternalRequest.h @@ -53,13 +53,13 @@ namespace dom { * ping | TYPE_PING * plugin | TYPE_OBJECT_SUBREQUEST * prefetch | - * script | TYPE_SCRIPT - * sharedworker | + * script | TYPE_INTERNAL_SCRIPT + * sharedworker | TYPE_INTERNAL_SHARED_WORKER * subresource | Not supported by Gecko * style | TYPE_STYLESHEET * track | TYPE_INTERNAL_TRACK * video | TYPE_INTERNAL_VIDEO - * worker | + * worker | TYPE_INTERNAL_WORKER * xmlhttprequest | TYPE_XMLHTTPREQUEST * xslt | TYPE_XSLT * @@ -68,7 +68,6 @@ namespace dom { * TODO: Split TYPE_XMLHTTPREQUEST and TYPE_DATAREQUEST for EventSource * TODO: Figure out if TYPE_WEBSOCKET maps to anything useful * TODO: Differentiate between frame and iframe - * TODO: Add content types for different kinds of workers * TODO: Add a content type for prefetch * TODO: Use the content type for manifest when it becomes available * TODO: Add a content type for location diff --git a/dom/workers/RuntimeService.cpp b/dom/workers/RuntimeService.cpp index 1f3f4d4ba13..15e8b5efc9a 100644 --- a/dom/workers/RuntimeService.cpp +++ b/dom/workers/RuntimeService.cpp @@ -2384,7 +2384,7 @@ RuntimeService::CreateSharedWorkerInternal(const GlobalObject& aGlobal, nsresult rv = WorkerPrivate::GetLoadInfo(cx, window, nullptr, aScriptURL, false, WorkerPrivate::OverrideLoadGroup, - &loadInfo); + aType, &loadInfo); NS_ENSURE_SUCCESS(rv, rv); return CreateSharedWorkerFromLoadInfo(cx, &loadInfo, aScriptURL, aName, aType, diff --git a/dom/workers/ScriptLoader.cpp b/dom/workers/ScriptLoader.cpp index 298b76b4cf5..285ed1594f6 100644 --- a/dom/workers/ScriptLoader.cpp +++ b/dom/workers/ScriptLoader.cpp @@ -96,6 +96,7 @@ ChannelFromScriptURL(nsIPrincipal* principal, const nsAString& aScriptURL, bool aIsMainScript, WorkerScriptType aWorkerScriptType, + nsContentPolicyType aContentPolicyType, nsIChannel** aChannel) { AssertIsOnMainThread(); @@ -112,7 +113,7 @@ ChannelFromScriptURL(nsIPrincipal* principal, // If we're part of a document then check the content load policy. if (parentDoc) { int16_t shouldLoad = nsIContentPolicy::ACCEPT; - rv = NS_CheckContentLoadPolicy(nsIContentPolicy::TYPE_SCRIPT, uri, + rv = NS_CheckContentLoadPolicy(aContentPolicyType, uri, principal, parentDoc, NS_LITERAL_CSTRING("text/javascript"), nullptr, &shouldLoad, @@ -167,7 +168,7 @@ ChannelFromScriptURL(nsIPrincipal* principal, uri, parentDoc, nsILoadInfo::SEC_NORMAL, - nsIContentPolicy::TYPE_SCRIPT, + aContentPolicyType, loadGroup, nullptr, // aCallbacks flags, @@ -182,7 +183,7 @@ ChannelFromScriptURL(nsIPrincipal* principal, uri, principal, nsILoadInfo::SEC_NORMAL, - nsIContentPolicy::TYPE_SCRIPT, + aContentPolicyType, loadGroup, nullptr, // aCallbacks flags, @@ -840,7 +841,9 @@ private: if (!channel) { rv = ChannelFromScriptURL(principal, baseURI, parentDoc, loadGroup, ios, secMan, loadInfo.mURL, IsMainWorkerScript(), - mWorkerScriptType, getter_AddRefs(channel)); + mWorkerScriptType, + mWorkerPrivate->ContentPolicyType(), + getter_AddRefs(channel)); if (NS_WARN_IF(NS_FAILED(rv))) { return rv; } @@ -1578,6 +1581,8 @@ public: scriptloader::ChannelFromScriptURLMainThread(principal, baseURI, parentDoc, loadGroup, mScriptURL, + // Nested workers are always dedicated. + nsIContentPolicy::TYPE_INTERNAL_WORKER, getter_AddRefs(channel)); if (NS_SUCCEEDED(mResult)) { channel.forget(mChannel); @@ -1789,6 +1794,7 @@ ChannelFromScriptURLMainThread(nsIPrincipal* aPrincipal, nsIDocument* aParentDoc, nsILoadGroup* aLoadGroup, const nsAString& aScriptURL, + nsContentPolicyType aContentPolicyType, nsIChannel** aChannel) { AssertIsOnMainThread(); @@ -1799,7 +1805,8 @@ ChannelFromScriptURLMainThread(nsIPrincipal* aPrincipal, NS_ASSERTION(secMan, "This should never be null!"); return ChannelFromScriptURL(aPrincipal, aBaseURI, aParentDoc, aLoadGroup, - ios, secMan, aScriptURL, true, WorkerScript, aChannel); + ios, secMan, aScriptURL, true, WorkerScript, + aContentPolicyType, aChannel); } nsresult diff --git a/dom/workers/ScriptLoader.h b/dom/workers/ScriptLoader.h index cbf1e2ce4dc..35b0d253d09 100644 --- a/dom/workers/ScriptLoader.h +++ b/dom/workers/ScriptLoader.h @@ -8,6 +8,7 @@ #define mozilla_dom_workers_scriptloader_h__ #include "Workers.h" +#include "nsIContentPolicyBase.h" class nsIPrincipal; class nsIURI; @@ -37,6 +38,7 @@ ChannelFromScriptURLMainThread(nsIPrincipal* aPrincipal, nsIDocument* aParentDoc, nsILoadGroup* aLoadGroup, const nsAString& aScriptURL, + nsContentPolicyType aContentPolicyType, nsIChannel** aChannel); nsresult diff --git a/dom/workers/ServiceWorkerManager.cpp b/dom/workers/ServiceWorkerManager.cpp index 50eaf107609..fa891f5c221 100644 --- a/dom/workers/ServiceWorkerManager.cpp +++ b/dom/workers/ServiceWorkerManager.cpp @@ -2701,6 +2701,7 @@ ServiceWorkerManager::CreateServiceWorkerForWindow(nsPIDOMWindow* aWindow, NS_ConvertUTF8toUTF16(aInfo->ScriptSpec()), false, WorkerPrivate::OverrideLoadGroup, + WorkerTypeService, &loadInfo); if (NS_WARN_IF(NS_FAILED(rv))) { return rv; diff --git a/dom/workers/ServiceWorkerScriptCache.cpp b/dom/workers/ServiceWorkerScriptCache.cpp index b568ec0b8b5..400b38d74f0 100644 --- a/dom/workers/ServiceWorkerScriptCache.cpp +++ b/dom/workers/ServiceWorkerScriptCache.cpp @@ -107,10 +107,13 @@ public: return rv; } + // Note that because there is no "serviceworker" RequestContext type, we can + // use the external TYPE_SCRIPT content policy types when loading a service + // worker. rv = NS_NewChannel(getter_AddRefs(mChannel), uri, aPrincipal, nsILoadInfo::SEC_NORMAL, - nsIContentPolicy::TYPE_SCRIPT, // FIXME(nsm): TYPE_SERVICEWORKER + nsIContentPolicy::TYPE_SCRIPT, loadGroup); if (NS_WARN_IF(NS_FAILED(rv))) { return rv; diff --git a/dom/workers/WorkerPrivate.cpp b/dom/workers/WorkerPrivate.cpp index b5cd652a612..2bf4b8bc076 100644 --- a/dom/workers/WorkerPrivate.cpp +++ b/dom/workers/WorkerPrivate.cpp @@ -4854,7 +4854,7 @@ WorkerPrivate::Constructor(JSContext* aCx, nsresult rv = GetLoadInfo(aCx, nullptr, parent, aScriptURL, aIsChromeWorker, InheritLoadGroup, - stackLoadInfo.ptr()); + aWorkerType, stackLoadInfo.ptr()); if (NS_FAILED(rv)) { scriptloader::ReportLoadError(aCx, aScriptURL, rv, !parent); aRv.Throw(rv); @@ -4912,6 +4912,7 @@ WorkerPrivate::GetLoadInfo(JSContext* aCx, nsPIDOMWindow* aWindow, WorkerPrivate* aParent, const nsAString& aScriptURL, bool aIsChromeWorker, LoadGroupBehavior aLoadGroupBehavior, + WorkerType aWorkerType, WorkerLoadInfo* aLoadInfo) { using namespace mozilla::dom::workers::scriptloader; @@ -5165,6 +5166,7 @@ WorkerPrivate::GetLoadInfo(JSContext* aCx, nsPIDOMWindow* aWindow, rv = ChannelFromScriptURLMainThread(loadInfo.mPrincipal, loadInfo.mBaseURI, document, loadInfo.mLoadGroup, aScriptURL, + ContentPolicyType(aWorkerType), getter_AddRefs(loadInfo.mChannel)); NS_ENSURE_SUCCESS(rv, rv); diff --git a/dom/workers/WorkerPrivate.h b/dom/workers/WorkerPrivate.h index fe4dc8b29d8..a565944c0f1 100644 --- a/dom/workers/WorkerPrivate.h +++ b/dom/workers/WorkerPrivate.h @@ -9,6 +9,7 @@ #include "Workers.h" +#include "nsIContentPolicy.h" #include "nsIContentSecurityPolicy.h" #include "nsILoadGroup.h" #include "nsIWorkerDebugger.h" @@ -734,6 +735,28 @@ public: return mWorkerType == WorkerTypeService; } + nsContentPolicyType + ContentPolicyType() const + { + return ContentPolicyType(mWorkerType); + } + + static nsContentPolicyType + ContentPolicyType(WorkerType aWorkerType) + { + switch (aWorkerType) { + case WorkerTypeDedicated: + return nsIContentPolicy::TYPE_INTERNAL_WORKER; + case WorkerTypeShared: + return nsIContentPolicy::TYPE_INTERNAL_SHARED_WORKER; + case WorkerTypeService: + return nsIContentPolicy::TYPE_SCRIPT; + default: + MOZ_ASSERT_UNREACHABLE("Invalid worker type"); + return nsIContentPolicy::TYPE_INVALID; + } + } + const nsCString& SharedWorkerName() const { @@ -982,7 +1005,8 @@ public: static nsresult GetLoadInfo(JSContext* aCx, nsPIDOMWindow* aWindow, WorkerPrivate* aParent, const nsAString& aScriptURL, bool aIsChromeWorker, - LoadGroupBehavior aLoadGroupBehavior, WorkerLoadInfo* aLoadInfo); + LoadGroupBehavior aLoadGroupBehavior, WorkerType aWorkerType, + WorkerLoadInfo* aLoadInfo); static void OverrideLoadInfoLoadGroup(WorkerLoadInfo& aLoadInfo); diff --git a/dom/workers/test/serviceworkers/fetch/context/context_test.js b/dom/workers/test/serviceworkers/fetch/context/context_test.js index 773124d278c..66d22b7da5b 100644 --- a/dom/workers/test/serviceworkers/fetch/context/context_test.js +++ b/dom/workers/test/serviceworkers/fetch/context/context_test.js @@ -70,6 +70,22 @@ self.addEventListener("fetch", function(event) { } } else if (event.request.url.indexOf("xslt") >= 0) { respondToServiceWorker(event, "xslt"); + } else if (event.request.url.indexOf("myworker") >= 0) { + if (event.request.context == "worker") { + event.respondWith(fetch("worker.js")); + } + } else if (event.request.url.indexOf("myparentworker") >= 0) { + if (event.request.context == "worker") { + event.respondWith(fetch("parentworker.js")); + } + } else if (event.request.url.indexOf("mysharedworker") >= 0) { + if (event.request.context == "sharedworker") { + event.respondWith(fetch("sharedworker.js")); + } + } else if (event.request.url.indexOf("myparentsharedworker") >= 0) { + if (event.request.context == "sharedworker") { + event.respondWith(fetch("parentsharedworker.js")); + } } else if (event.request.url.indexOf("cache") >= 0) { var cache; var origContext = event.request.context; diff --git a/dom/workers/test/serviceworkers/fetch/context/index.html b/dom/workers/test/serviceworkers/fetch/context/index.html index c5775ea0e24..4cc1bec0658 100644 --- a/dom/workers/test/serviceworkers/fetch/context/index.html +++ b/dom/workers/test/serviceworkers/fetch/context/index.html @@ -341,6 +341,58 @@ }); } + function testWorker() { + return new Promise(function(resolve, reject) { + var worker = new Worker("myworker"); + worker.onmessage = function(e) { + if (e.data == "ack") { + worker.terminate(); + resolve(); + } + }; + worker.onerror = reject; + }); + } + + function testNestedWorker() { + return new Promise(function(resolve, reject) { + var worker = new Worker("myparentworker"); + worker.onmessage = function(e) { + if (e.data == "ack") { + worker.terminate(); + resolve(); + } + }; + worker.onerror = reject; + }); + } + + function testSharedWorker() { + return new Promise(function(resolve, reject) { + var worker = new SharedWorker("mysharedworker"); + worker.port.start(); + worker.port.onmessage = function(e) { + if (e.data == "ack") { + resolve(); + } + }; + worker.onerror = reject; + }); + } + + function testNestedWorkerInSharedWorker() { + return new Promise(function(resolve, reject) { + var worker = new SharedWorker("myparentsharedworker"); + worker.port.start(); + worker.port.onmessage = function(e) { + if (e.data == "ack") { + resolve(); + } + }; + worker.onerror = reject; + }); + } + function testCache() { return new Promise(function(resolve, reject) { // Issue an XHR that will be intercepted by the SW in order to start off @@ -383,6 +435,10 @@ testTrack(), testXHR(), testXSLT(), + testWorker(), + testNestedWorker(), + testSharedWorker(), + testNestedWorkerInSharedWorker(), // Also, test to see if the type of the request can be persisted in the database. testCache(), diff --git a/dom/workers/test/serviceworkers/fetch/context/parentsharedworker.js b/dom/workers/test/serviceworkers/fetch/context/parentsharedworker.js new file mode 100644 index 00000000000..eac8d5e7174 --- /dev/null +++ b/dom/workers/test/serviceworkers/fetch/context/parentsharedworker.js @@ -0,0 +1,8 @@ +onconnect = function(e) { + e.ports[0].start(); + var worker = new Worker("myworker?shared"); + worker.onmessage = function(e2) { + e.ports[0].postMessage(e2.data); + self.close(); + }; +}; diff --git a/dom/workers/test/serviceworkers/fetch/context/parentworker.js b/dom/workers/test/serviceworkers/fetch/context/parentworker.js new file mode 100644 index 00000000000..839fb6640b9 --- /dev/null +++ b/dom/workers/test/serviceworkers/fetch/context/parentworker.js @@ -0,0 +1,4 @@ +var worker = new Worker("myworker"); +worker.onmessage = function(e) { + postMessage(e.data); +}; diff --git a/dom/workers/test/serviceworkers/fetch/context/sharedworker.js b/dom/workers/test/serviceworkers/fetch/context/sharedworker.js new file mode 100644 index 00000000000..94dca58399b --- /dev/null +++ b/dom/workers/test/serviceworkers/fetch/context/sharedworker.js @@ -0,0 +1,5 @@ +onconnect = function(e) { + e.ports[0].start(); + e.ports[0].postMessage("ack"); + self.close(); +}; diff --git a/dom/workers/test/serviceworkers/fetch/context/worker.js b/dom/workers/test/serviceworkers/fetch/context/worker.js new file mode 100644 index 00000000000..e26e5bc6916 --- /dev/null +++ b/dom/workers/test/serviceworkers/fetch/context/worker.js @@ -0,0 +1 @@ +postMessage("ack"); diff --git a/dom/workers/test/serviceworkers/mochitest.ini b/dom/workers/test/serviceworkers/mochitest.ini index c100d48e441..162ee52b5cb 100644 --- a/dom/workers/test/serviceworkers/mochitest.ini +++ b/dom/workers/test/serviceworkers/mochitest.ini @@ -38,6 +38,10 @@ support-files = fetch/context/beacon.sjs fetch/context/csp-violate.sjs fetch/context/ping.html + fetch/context/worker.js + fetch/context/parentworker.js + fetch/context/sharedworker.js + fetch/context/parentsharedworker.js fetch/context/xml.xml fetch/https/index.html fetch/https/register.html From f656289759864c4815b63bac6af39e1c0e1d33e3 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 17 Jun 2015 21:09:27 -0700 Subject: [PATCH 03/61] Bug 1175807 - Remove PL_DHashTableEnumerate() uses from nsNSSShutdown. r=honzab. The doPK11Logout() change is straightforward. In contrast, the loop in evaporateAllNSSResources() is *weird*. Nevertheless, this change preserves its behaviour. --- security/manager/ssl/nsNSSShutDown.cpp | 60 +++++++++++--------------- security/manager/ssl/nsNSSShutDown.h | 6 --- 2 files changed, 24 insertions(+), 42 deletions(-) diff --git a/security/manager/ssl/nsNSSShutDown.cpp b/security/manager/ssl/nsNSSShutDown.cpp index 7a21a5cde07..24f12ad0643 100644 --- a/security/manager/ssl/nsNSSShutDown.cpp +++ b/security/manager/ssl/nsNSSShutDown.cpp @@ -133,25 +133,16 @@ nsresult nsNSSShutDownList::doPK11Logout() // This is guaranteed by holding the list lock. MutexAutoLock lock(singleton->mListLock); - PL_DHashTableEnumerate(&mPK11LogoutCancelObjects, doPK11LogoutHelper, 0); - - return NS_OK; -} - -PLDHashOperator -nsNSSShutDownList::doPK11LogoutHelper(PLDHashTable *table, - PLDHashEntryHdr *hdr, uint32_t number, void *arg) -{ - ObjectHashEntry *entry = static_cast(hdr); - - nsOnPK11LogoutCancelObject *pklco = - reinterpret_cast(entry->obj); - - if (pklco) { - pklco->logout(); + for (auto iter = mPK11LogoutCancelObjects.Iter(); !iter.Done(); iter.Next()) { + auto entry = static_cast(iter.Get()); + nsOnPK11LogoutCancelObject *pklco = + reinterpret_cast(entry->obj); + if (pklco) { + pklco->logout(); + } } - return PL_DHASH_NEXT; + return NS_OK; } bool nsNSSShutDownList::isUIActive() @@ -180,29 +171,26 @@ nsresult nsNSSShutDownList::evaporateAllNSSResources() } MOZ_LOG(gPIPNSSLog, LogLevel::Debug, ("now evaporating NSS resources\n")); - int removedCount; - do { - MutexAutoLock lock(mListLock); - removedCount = PL_DHashTableEnumerate(&mObjects, evaporateAllNSSResourcesHelper, 0); - } while (removedCount > 0); - mActivityState.releaseCurrentThreadActivityRestriction(); - return NS_OK; -} - -PLDHashOperator -nsNSSShutDownList::evaporateAllNSSResourcesHelper(PLDHashTable *table, - PLDHashEntryHdr *hdr, uint32_t number, void *arg) -{ - ObjectHashEntry *entry = static_cast(hdr); - { - MutexAutoUnlock unlock(singleton->mListLock); - entry->obj->shutdown(nsNSSShutDownObject::calledFromList); - } // Never free more than one entry, because other threads might be calling // us and remove themselves while we are iterating over the list, // and the behaviour of changing the list while iterating is undefined. - return (PLDHashOperator)(PL_DHASH_STOP | PL_DHASH_REMOVE); + while (true) { + MutexAutoLock lock(mListLock); + auto iter = mObjects.RemovingIter(); + if (iter.Done()) { + break; + } + auto entry = static_cast(iter.Get()); + { + MutexAutoUnlock unlock(singleton->mListLock); + entry->obj->shutdown(nsNSSShutDownObject::calledFromList); + } + iter.Remove(); + } + + mActivityState.releaseCurrentThreadActivityRestriction(); + return NS_OK; } nsNSSShutDownList *nsNSSShutDownList::construct() diff --git a/security/manager/ssl/nsNSSShutDown.h b/security/manager/ssl/nsNSSShutDown.h index f3558ecfa1a..6e954e7f928 100644 --- a/security/manager/ssl/nsNSSShutDown.h +++ b/security/manager/ssl/nsNSSShutDown.h @@ -147,13 +147,7 @@ public: private: nsNSSShutDownList(); - static PLDHashOperator - evaporateAllNSSResourcesHelper(PLDHashTable *table, PLDHashEntryHdr *hdr, - uint32_t number, void *arg); - static PLDHashOperator - doPK11LogoutHelper(PLDHashTable *table, PLDHashEntryHdr *hdr, - uint32_t number, void *arg); protected: mozilla::Mutex mListLock; static nsNSSShutDownList *singleton; From 9984461fd44fce11b784ff69c0ecdc79cf7b8d0a Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Thu, 18 Jun 2015 21:06:15 -0700 Subject: [PATCH 04/61] Bug 1176160 (part 1) - Remove simple uses of PL_DHashTableEnumerator() from rdf/. r=bsmedberg. --- rdf/base/nsInMemoryDataSource.cpp | 121 +++++++++--------------------- 1 file changed, 36 insertions(+), 85 deletions(-) diff --git a/rdf/base/nsInMemoryDataSource.cpp b/rdf/base/nsInMemoryDataSource.cpp index ef287db95ed..8515a253b36 100644 --- a/rdf/base/nsInMemoryDataSource.cpp +++ b/rdf/base/nsInMemoryDataSource.cpp @@ -81,10 +81,6 @@ using mozilla::LogLevel; class Assertion { public: - static PLDHashOperator - DeletePropertyHashEntry(PLDHashTable* aTable, PLDHashEntryHdr* aHdr, - uint32_t aNumber, void* aArg); - Assertion(nsIRDFResource* aSource, // normal assertion nsIRDFResource* aProperty, nsIRDFNode* aTarget, @@ -194,8 +190,18 @@ Assertion::Assertion(nsIRDFResource* aSource, Assertion::~Assertion() { if (mHashEntry && u.hash.mPropertyHash) { - PL_DHashTableEnumerate(u.hash.mPropertyHash, DeletePropertyHashEntry, - nullptr); + for (auto i = u.hash.mPropertyHash->Iter(); !i.Done(); i.Next()) { + auto entry = static_cast(i.Get()); + Assertion* as = entry->mAssertions; + while (as) { + Assertion* doomed = as; + as = as->mNext; + + // Unlink, and release the datasource's reference. + doomed->mNext = doomed->u.as.mInvNext = nullptr; + doomed->Release(); + } + } delete u.hash.mPropertyHash; u.hash.mPropertyHash = nullptr; } @@ -214,26 +220,6 @@ Assertion::~Assertion() } } -PLDHashOperator -Assertion::DeletePropertyHashEntry(PLDHashTable* aTable, PLDHashEntryHdr* aHdr, - uint32_t aNumber, void* aArg) -{ - Entry* entry = static_cast(aHdr); - - Assertion* as = entry->mAssertions; - while (as) { - Assertion* doomed = as; - as = as->mNext; - - // Unlink, and release the datasource's reference. - doomed->mNext = doomed->u.as.mInvNext = nullptr; - doomed->Release(); - } - return PL_DHASH_NEXT; -} - - - //////////////////////////////////////////////////////////////////////// // InMemoryDataSource class InMemoryArcsEnumeratorImpl; @@ -262,14 +248,6 @@ protected: // during mReadCount == 0 uint32_t mReadCount; - static PLDHashOperator - DeleteForwardArcsEntry(PLDHashTable* aTable, PLDHashEntryHdr* aHdr, - uint32_t aNumber, void* aArg); - - static PLDHashOperator - ResourceEnumerator(PLDHashTable* aTable, PLDHashEntryHdr* aHdr, - uint32_t aNumber, void* aArg); - friend class InMemoryArcsEnumeratorImpl; friend class InMemoryAssertionEnumeratorImpl; friend class InMemoryResourceEnumeratorImpl; // b/c it needs to enumerate mForwardArcs @@ -563,10 +541,6 @@ private: Assertion* mAssertion; nsCOMPtr mHashArcs; - static PLDHashOperator - ArcEnumerator(PLDHashTable* aTable, PLDHashEntryHdr* aHdr, - uint32_t aNumber, void* aArg); - virtual ~InMemoryArcsEnumeratorImpl(); public: @@ -582,19 +556,6 @@ public: }; -PLDHashOperator -InMemoryArcsEnumeratorImpl::ArcEnumerator(PLDHashTable* aTable, - PLDHashEntryHdr* aHdr, - uint32_t aNumber, void* aArg) -{ - Entry* entry = static_cast(aHdr); - nsISupportsArray* resources = static_cast(aArg); - - resources->AppendElement(entry->mNode); - return PL_DHASH_NEXT; -} - - InMemoryArcsEnumeratorImpl::InMemoryArcsEnumeratorImpl(InMemoryDataSource* aDataSource, nsIRDFResource* aSource, nsIRDFNode* aTarget) @@ -615,8 +576,13 @@ InMemoryArcsEnumeratorImpl::InMemoryArcsEnumeratorImpl(InMemoryDataSource* aData // its our magical HASH_ENTRY forward hash for assertions nsresult rv = NS_NewISupportsArray(getter_AddRefs(mHashArcs)); if (NS_SUCCEEDED(rv)) { - PL_DHashTableEnumerate(mAssertion->u.hash.mPropertyHash, - ArcEnumerator, mHashArcs.get()); + nsISupportsArray* resources = mHashArcs.get(); + for (auto i = mAssertion->u.hash.mPropertyHash->Iter(); + !i.Done(); + i.Next()) { + auto entry = static_cast(i.Get()); + resources->AppendElement(entry->mNode); + } } mAssertion = nullptr; } @@ -792,7 +758,18 @@ InMemoryDataSource::~InMemoryDataSource() // associated with this data source. We only need to do this // for the forward arcs, because the reverse arcs table // indexes the exact same set of resources. - PL_DHashTableEnumerate(&mForwardArcs, DeleteForwardArcsEntry, nullptr); + for (auto iter = mForwardArcs.Iter(); !iter.Done(); iter.Next()) { + auto entry = static_cast(iter.Get()); + Assertion* as = entry->mAssertions; + while (as) { + Assertion* doomed = as; + as = as->mNext; + + // Unlink, and release the datasource's reference. + doomed->mNext = doomed->u.as.mInvNext = nullptr; + doomed->Release(); + } + } } MOZ_LOG(gLog, LogLevel::Debug, @@ -801,24 +778,6 @@ InMemoryDataSource::~InMemoryDataSource() MOZ_COUNT_DTOR(InMemoryDataSource); } -PLDHashOperator -InMemoryDataSource::DeleteForwardArcsEntry(PLDHashTable* aTable, PLDHashEntryHdr* aHdr, - uint32_t aNumber, void* aArg) -{ - Entry* entry = static_cast(aHdr); - - Assertion* as = entry->mAssertions; - while (as) { - Assertion* doomed = as; - as = as->mNext; - - // Unlink, and release the datasource's reference. - doomed->mNext = doomed->u.as.mInvNext = nullptr; - doomed->Release(); - } - return PL_DHASH_NEXT; -} - //////////////////////////////////////////////////////////////////////// @@ -1609,16 +1568,6 @@ InMemoryDataSource::ArcLabelsOut(nsIRDFResource* aSource, nsISimpleEnumerator** return NS_OK; } -PLDHashOperator -InMemoryDataSource::ResourceEnumerator(PLDHashTable* aTable, - PLDHashEntryHdr* aHdr, - uint32_t aNumber, void* aArg) -{ - Entry* entry = static_cast(aHdr); - static_cast*>(aArg)->AppendObject(entry->mNode); - return PL_DHASH_NEXT; -} - NS_IMETHODIMP InMemoryDataSource::GetAllResources(nsISimpleEnumerator** aResult) @@ -1626,9 +1575,11 @@ InMemoryDataSource::GetAllResources(nsISimpleEnumerator** aResult) nsCOMArray nodes; nodes.SetCapacity(mForwardArcs.EntryCount()); - // Enumerate all of our entries into an nsCOMArray - PL_DHashTableEnumerate(&mForwardArcs, ResourceEnumerator, &nodes); - + // Get all of our entries into an nsCOMArray + for (auto iter = mForwardArcs.Iter(); !iter.Done(); iter.Next()) { + auto entry = static_cast(iter.Get()); + nodes.AppendObject(entry->mNode); + } return NS_NewArrayEnumerator(aResult, nodes); } From 6796a2861094b75ae21460f807815603f89f5eff Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Thu, 18 Jun 2015 21:40:36 -0700 Subject: [PATCH 05/61] Bug 1176160 (part 2) - Remove uses of PL_DHashTableEnumerator() involving VisitorClosure from rdf/. r=bsmedberg. --- rdf/base/nsInMemoryDataSource.cpp | 161 ++++++++++++------------------ 1 file changed, 64 insertions(+), 97 deletions(-) diff --git a/rdf/base/nsInMemoryDataSource.cpp b/rdf/base/nsInMemoryDataSource.cpp index 8515a253b36..f5228a98f47 100644 --- a/rdf/base/nsInMemoryDataSource.cpp +++ b/rdf/base/nsInMemoryDataSource.cpp @@ -1898,34 +1898,6 @@ InMemoryDataSource::SweepForwardArcsEntries(PLDHashTable* aTable, //////////////////////////////////////////////////////////////////////// // rdfIDataSource methods -class VisitorClosure -{ -public: - explicit VisitorClosure(rdfITripleVisitor* aVisitor) : - mVisitor(aVisitor), - mRv(NS_OK) - {} - rdfITripleVisitor* mVisitor; - nsresult mRv; -}; - -PLDHashOperator -SubjectEnumerator(PLDHashTable* aTable, PLDHashEntryHdr* aHdr, - uint32_t aNumber, void* aArg) { - Entry* entry = static_cast(aHdr); - VisitorClosure* closure = static_cast(aArg); - - nsresult rv; - nsCOMPtr subject = do_QueryInterface(entry->mNode, &rv); - NS_ENSURE_SUCCESS(rv, PL_DHASH_NEXT); - - closure->mRv = closure->mVisitor->Visit(subject, nullptr, nullptr, true); - if (NS_FAILED(closure->mRv) || closure->mRv == NS_RDF_STOP_VISIT) - return PL_DHASH_STOP; - - return PL_DHASH_NEXT; -} - NS_IMETHODIMP InMemoryDataSource::VisitAllSubjects(rdfITripleVisitor *aVisitor) { @@ -1933,78 +1905,27 @@ InMemoryDataSource::VisitAllSubjects(rdfITripleVisitor *aVisitor) ++mReadCount; // Enumerate all of our entries into an nsISupportsArray. - VisitorClosure cls(aVisitor); - PL_DHashTableEnumerate(&mForwardArcs, SubjectEnumerator, &cls); + nsresult rv = NS_OK; + for (auto iter = mForwardArcs.Iter(); !iter.Done(); iter.Next()) { + auto entry = static_cast(iter.Get()); + nsresult rv2; + nsCOMPtr subject = do_QueryInterface(entry->mNode, &rv2); + if (NS_FAILED(rv2)) { + NS_WARNING("QI to nsIRDFNode failed"); + continue; + } + rv = aVisitor->Visit(subject, nullptr, nullptr, true); + if (NS_FAILED(rv) || rv == NS_RDF_STOP_VISIT) { + break; + } + } // Unlock datasource --mReadCount; - return cls.mRv; -} - -class TriplesInnerClosure -{ -public: - TriplesInnerClosure(nsIRDFNode* aSubject, VisitorClosure* aClosure) : - mSubject(aSubject), mOuter(aClosure) {} - nsIRDFNode* mSubject; - VisitorClosure* mOuter; -}; - -PLDHashOperator -TriplesInnerEnumerator(PLDHashTable* aTable, PLDHashEntryHdr* aHdr, - uint32_t aNumber, void* aArg) { - Entry* entry = static_cast(aHdr); - Assertion* assertion = entry->mAssertions; - TriplesInnerClosure* closure = - static_cast(aArg); - while (assertion) { - NS_ASSERTION(!assertion->mHashEntry, "shouldn't have to hashes"); - VisitorClosure* cls = closure->mOuter; - cls->mRv = cls->mVisitor->Visit(closure->mSubject, - assertion->u.as.mProperty, - assertion->u.as.mTarget, - assertion->u.as.mTruthValue); - if (NS_FAILED(cls->mRv) || cls->mRv == NS_RDF_STOP_VISIT) { - return PL_DHASH_STOP; - } - assertion = assertion->mNext; - } - return PL_DHASH_NEXT; + return rv; } -PLDHashOperator -TriplesEnumerator(PLDHashTable* aTable, PLDHashEntryHdr* aHdr, - uint32_t aNumber, void* aArg) { - Entry* entry = static_cast(aHdr); - VisitorClosure* closure = static_cast(aArg); - nsresult rv; - nsCOMPtr subject = do_QueryInterface(entry->mNode, &rv); - NS_ENSURE_SUCCESS(rv, PL_DHASH_NEXT); - - if (entry->mAssertions->mHashEntry) { - TriplesInnerClosure cls(subject, closure); - PL_DHashTableEnumerate(entry->mAssertions->u.hash.mPropertyHash, - TriplesInnerEnumerator, &cls); - if (NS_FAILED(closure->mRv)) { - return PL_DHASH_STOP; - } - return PL_DHASH_NEXT; - } - Assertion* assertion = entry->mAssertions; - while (assertion) { - NS_ASSERTION(!assertion->mHashEntry, "shouldn't have to hashes"); - closure->mRv = closure->mVisitor->Visit(subject, - assertion->u.as.mProperty, - assertion->u.as.mTarget, - assertion->u.as.mTruthValue); - if (NS_FAILED(closure->mRv) || closure->mRv == NS_RDF_STOP_VISIT) { - return PL_DHASH_STOP; - } - assertion = assertion->mNext; - } - return PL_DHASH_NEXT; -} NS_IMETHODIMP InMemoryDataSource::VisitAllTriples(rdfITripleVisitor *aVisitor) { @@ -2012,13 +1933,59 @@ InMemoryDataSource::VisitAllTriples(rdfITripleVisitor *aVisitor) ++mReadCount; // Enumerate all of our entries into an nsISupportsArray. - VisitorClosure cls(aVisitor); - PL_DHashTableEnumerate(&mForwardArcs, TriplesEnumerator, &cls); + nsresult rv = NS_OK; + for (auto iter = mForwardArcs.Iter(); !iter.Done(); iter.Next()) { + auto entry = static_cast(iter.Get()); + nsresult rv2; + nsCOMPtr subject = do_QueryInterface(entry->mNode, &rv2); + if (NS_FAILED(rv2)) { + NS_WARNING("QI to nsIRDFNode failed"); + + } else if (entry->mAssertions->mHashEntry) { + for (auto iter = entry->mAssertions->u.hash.mPropertyHash->Iter(); + !iter.Done(); + iter.Next()) { + auto entry = static_cast(iter.Get()); + Assertion* assertion = entry->mAssertions; + while (assertion) { + NS_ASSERTION(!assertion->mHashEntry, "shouldn't have to hashes"); + rv = aVisitor->Visit(subject, assertion->u.as.mProperty, + assertion->u.as.mTarget, + assertion->u.as.mTruthValue); + if (NS_FAILED(rv)) { + goto end; + } + if (rv == NS_RDF_STOP_VISIT) { + goto inner_end; + } + assertion = assertion->mNext; + } + } + + } else { + Assertion* assertion = entry->mAssertions; + while (assertion) { + NS_ASSERTION(!assertion->mHashEntry, "shouldn't have to hashes"); + rv = aVisitor->Visit(subject, assertion->u.as.mProperty, + assertion->u.as.mTarget, + assertion->u.as.mTruthValue); + if (NS_FAILED(rv) || rv == NS_RDF_STOP_VISIT) { + goto end; + } + assertion = assertion->mNext; + } + } + + inner_end: + (void) 0; + } + + end: // Unlock datasource --mReadCount; - return cls.mRv; + return rv; } //////////////////////////////////////////////////////////////////////// From 1b1cbb5d5094cf8706b014b07232d03b474c2fd1 Mon Sep 17 00:00:00 2001 From: Rex Hung Date: Tue, 9 Jun 2015 15:51:38 -0700 Subject: [PATCH 06/61] bug 1173070 - expose VFY_EndWithSignature from NSS r=keeler --- config/external/nss/nss.def | 1 + 1 file changed, 1 insertion(+) diff --git a/config/external/nss/nss.def b/config/external/nss/nss.def index 222c677a9a4..c0b7fc37e2c 100644 --- a/config/external/nss/nss.def +++ b/config/external/nss/nss.def @@ -678,6 +678,7 @@ VFY_Begin VFY_CreateContext VFY_DestroyContext VFY_End +VFY_EndWithSignature VFY_Update VFY_VerifyData VFY_VerifyDataWithAlgorithmID From f554835a12bb66fa6f1ee735e93ba357a307140a Mon Sep 17 00:00:00 2001 From: Bill McCloskey Date: Tue, 30 Jun 2015 15:55:36 -0700 Subject: [PATCH 07/61] Bug 1166886 - Comment out some code that is supposed to be disabled (r=mconley) --- toolkit/components/addoncompat/Prefetcher.jsm | 2 ++ 1 file changed, 2 insertions(+) diff --git a/toolkit/components/addoncompat/Prefetcher.jsm b/toolkit/components/addoncompat/Prefetcher.jsm index cd3d567ddba..d4f15276393 100644 --- a/toolkit/components/addoncompat/Prefetcher.jsm +++ b/toolkit/components/addoncompat/Prefetcher.jsm @@ -93,10 +93,12 @@ function isPrimitive(v) { function objAddr(obj) { +/* if (!isPrimitive(obj)) { return String(obj) + "[" + Cu.getJSTestingFunctions().objectAddress(obj) + "]"; } return String(obj); +*/ } function log(/*...args*/) From b3bc35de2cfa3c048531a049270bf0fb09f57efb Mon Sep 17 00:00:00 2001 From: Jim Blandy Date: Thu, 4 Jun 2015 11:58:10 -0700 Subject: [PATCH 08/61] Bug 1163520: Make IsInternalFunctionObject take its argument by reference, as it must not be nullptr. r=shu --- js/src/jsapi-tests/testLookup.cpp | 2 +- js/src/jsfun.cpp | 4 ++-- js/src/jsobjinlines.h | 10 +++++----- js/src/vm/UbiNode.cpp | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/js/src/jsapi-tests/testLookup.cpp b/js/src/jsapi-tests/testLookup.cpp index 09900af7205..178d1cf520e 100644 --- a/js/src/jsapi-tests/testLookup.cpp +++ b/js/src/jsapi-tests/testLookup.cpp @@ -31,7 +31,7 @@ BEGIN_TEST(testLookup_bug522590) CHECK(r.isObject()); JSObject* funobj = &r.toObject(); CHECK(funobj->is()); - CHECK(!js::IsInternalFunctionObject(funobj)); + CHECK(!js::IsInternalFunctionObject(*funobj)); return true; } diff --git a/js/src/jsfun.cpp b/js/src/jsfun.cpp index be5ffdd2cf2..fd2e8c8dd4b 100644 --- a/js/src/jsfun.cpp +++ b/js/src/jsfun.cpp @@ -374,7 +374,7 @@ ResolveInterpretedFunctionPrototype(JSContext* cx, HandleFunction fun, HandleId // Assert that fun is not a compiler-created function object, which // must never leak to script or embedding code and then be mutated. // Also assert that fun is not bound, per the ES5 15.3.4.5 ref above. - MOZ_ASSERT(!IsInternalFunctionObject(fun)); + MOZ_ASSERT(!IsInternalFunctionObject(*fun)); MOZ_ASSERT(!fun->isBoundFunction()); // Make the prototype object an instance of Object with the same parent as @@ -461,7 +461,7 @@ fun_resolve(JSContext* cx, HandleObject obj, HandleId id, bool* resolvedp) bool isLength = JSID_IS_ATOM(id, cx->names().length); if (isLength || JSID_IS_ATOM(id, cx->names().name)) { - MOZ_ASSERT(!IsInternalFunctionObject(obj)); + MOZ_ASSERT(!IsInternalFunctionObject(*obj)); RootedValue v(cx); diff --git a/js/src/jsobjinlines.h b/js/src/jsobjinlines.h index 7a16c5d43a8..7b0b24a2866 100644 --- a/js/src/jsobjinlines.h +++ b/js/src/jsobjinlines.h @@ -593,12 +593,12 @@ ToPrimitive(JSContext* cx, JSType preferredType, MutableHandleValue vp) * or embedding code. */ inline bool -IsInternalFunctionObject(JSObject* funobj) +IsInternalFunctionObject(JSObject& funobj) { - JSFunction* fun = &funobj->as(); - MOZ_ASSERT_IF(fun->isLambda(), - fun->isInterpreted() || fun->isAsmJSNative()); - return fun->isLambda() && fun->isInterpreted() && !fun->environment(); + JSFunction& fun = funobj.as(); + MOZ_ASSERT_IF(fun.isLambda(), + fun.isInterpreted() || fun.isAsmJSNative()); + return fun.isLambda() && fun.isInterpreted() && !fun.environment(); } typedef AutoVectorRooter AutoPropertyDescriptorVector; diff --git a/js/src/vm/UbiNode.cpp b/js/src/vm/UbiNode.cpp index 3af1a76bb37..c8023fbc4b9 100644 --- a/js/src/vm/UbiNode.cpp +++ b/js/src/vm/UbiNode.cpp @@ -85,7 +85,7 @@ Node::exposeToJS() const JSObject& obj = *as(); if (obj.is()) { v.setUndefined(); - } else if (obj.is() && js::IsInternalFunctionObject(&obj)) { + } else if (obj.is() && js::IsInternalFunctionObject(obj)) { v.setUndefined(); } else { v.setObject(obj); From 9a1e5bcb96d1da2f47b221503375c81aa2529877 Mon Sep 17 00:00:00 2001 From: Jim Blandy Date: Thu, 4 Jun 2015 14:08:20 -0700 Subject: [PATCH 09/61] Bug 1163520: Don't hand out internal function objects via Debugger.Environment.prototype.callee. r=shu --- .../tests/debug/Environment-callee-04.js | 22 +++++++++++++ .../tests/debug/Environment-getVariable-15.js | 31 +++++++++++++++++++ js/src/vm/Debugger.cpp | 17 +++++++++- 3 files changed, 69 insertions(+), 1 deletion(-) create mode 100644 js/src/jit-test/tests/debug/Environment-callee-04.js create mode 100644 js/src/jit-test/tests/debug/Environment-getVariable-15.js diff --git a/js/src/jit-test/tests/debug/Environment-callee-04.js b/js/src/jit-test/tests/debug/Environment-callee-04.js new file mode 100644 index 00000000000..b2b9534d072 --- /dev/null +++ b/js/src/jit-test/tests/debug/Environment-callee-04.js @@ -0,0 +1,22 @@ +// We shouldn't hand out environment callees when we can only provide the +// internal function object, not the live function object. (We should never +// create Debugger.Object instances referring to internal function objects.) + +var g = newGlobal(); +var dbg = new Debugger(g); + +dbg.onDebuggerStatement = function (frame) { + assertEq(frame.older.environment.parent.callee, null); +} + +g.evaluate(` + + function h() { debugger; } + (function () { + return function () { + h(); + return 1; + } + })()(); + + `); diff --git a/js/src/jit-test/tests/debug/Environment-getVariable-15.js b/js/src/jit-test/tests/debug/Environment-getVariable-15.js new file mode 100644 index 00000000000..b6eee90e616 --- /dev/null +++ b/js/src/jit-test/tests/debug/Environment-getVariable-15.js @@ -0,0 +1,31 @@ +// Don't hand out internal function objects via Debugger.Environment.prototype.getVariable. + +// When the real scope chain object holding the binding for 'f' in 'function f() +// { ... }' is optimized out because it's never used, we whip up fake scope +// chain objects for Debugger to use, if it looks. However, the value of the +// variable f will be an internal function object, not a live function object, +// since the latter was not recorded. Internal function objects should not be +// exposed via Debugger. + +var g = newGlobal(); +var dbg = new Debugger(g); + +dbg.onDebuggerStatement = function (frame) { + var g_call_env = frame.older.environment; // g's locals + var g_decl_env = g_call_env.parent; // 'function g' binding + var f_call_env = g_decl_env.parent; // f's locals + var f_decl_env = f_call_env.parent; // 'function f' binding + assertEq(f_decl_env.getVariable('f').optimizedOut, true); +} + +g.evaluate(` + + function h() { debugger; } + (function f() { + return function g() { + h(); + return 1; + } + })()(); + + `); diff --git a/js/src/vm/Debugger.cpp b/js/src/vm/Debugger.cpp index 448f9769ca7..1a404dbcfeb 100644 --- a/js/src/vm/Debugger.cpp +++ b/js/src/vm/Debugger.cpp @@ -806,6 +806,7 @@ Debugger::wrapDebuggeeValue(JSContext* cx, MutableHandleValue vp) RootedObject obj(cx, &vp.toObject()); if (obj->is()) { + MOZ_ASSERT(!IsInternalFunctionObject(*obj)); RootedFunction fun(cx, &obj->as()); if (!EnsureFunctionHasScript(cx, fun)) return false; @@ -7628,7 +7629,11 @@ DebuggerEnv_getCallee(JSContext* cx, unsigned argc, Value* vp) if (callobj.isForEval()) return true; - args.rval().setObject(callobj.callee()); + JSFunction& callee = callobj.callee(); + if (IsInternalFunctionObject(callee)) + return true; + + args.rval().setObject(callee); if (!dbg->wrapDebuggeeValue(cx, args.rval())) return false; return true; @@ -7757,6 +7762,16 @@ DebuggerEnv_getVariable(JSContext* cx, unsigned argc, Value* vp) } } + // When we've faked up scope chain objects for optimized-out scopes, + // declarative environments may contain internal JSFunction objects, which + // we shouldn't expose to the user. + if (v.isObject()) { + RootedObject obj(cx, &v.toObject()); + if (obj->is() && + IsInternalFunctionObject(obj->as())) + v.setMagic(JS_OPTIMIZED_OUT); + } + if (!dbg->wrapDebuggeeValue(cx, &v)) return false; args.rval().set(v); From d6d2fca52849e3ac53d47c2e6badc5d72f6db028 Mon Sep 17 00:00:00 2001 From: Mike Shal Date: Tue, 30 Jun 2015 15:29:02 -0400 Subject: [PATCH 10/61] Bug 1178785 - Add --with-branding to browser l10n mozconfigs; r=rail --- browser/config/mozconfigs/linux32/l10n-mozconfig | 1 + browser/config/mozconfigs/linux64/l10n-mozconfig | 1 + browser/config/mozconfigs/macosx-universal/l10n-mozconfig | 1 + browser/config/mozconfigs/macosx64/l10n-mozconfig | 1 + browser/config/mozconfigs/win32/l10n-mozconfig | 1 + browser/config/mozconfigs/win64/l10n-mozconfig | 1 + 6 files changed, 6 insertions(+) diff --git a/browser/config/mozconfigs/linux32/l10n-mozconfig b/browser/config/mozconfigs/linux32/l10n-mozconfig index b7714d00434..501554f636c 100644 --- a/browser/config/mozconfigs/linux32/l10n-mozconfig +++ b/browser/config/mozconfigs/linux32/l10n-mozconfig @@ -4,6 +4,7 @@ no_sccache=1 ac_add_options --with-l10n-base=../../l10n ac_add_options --enable-update-channel=${MOZ_UPDATE_CHANNEL} ac_add_options --enable-update-packaging +ac_add_options --with-branding=browser/branding/nightly . $topsrcdir/build/unix/mozconfig.linux32 diff --git a/browser/config/mozconfigs/linux64/l10n-mozconfig b/browser/config/mozconfigs/linux64/l10n-mozconfig index fdc00bcd86e..f5d0af19061 100644 --- a/browser/config/mozconfigs/linux64/l10n-mozconfig +++ b/browser/config/mozconfigs/linux64/l10n-mozconfig @@ -4,6 +4,7 @@ no_sccache=1 ac_add_options --with-l10n-base=../../l10n ac_add_options --enable-update-channel=${MOZ_UPDATE_CHANNEL} ac_add_options --enable-update-packaging +ac_add_options --with-branding=browser/branding/nightly . $topsrcdir/build/unix/mozconfig.linux diff --git a/browser/config/mozconfigs/macosx-universal/l10n-mozconfig b/browser/config/mozconfigs/macosx-universal/l10n-mozconfig index bae4c14c928..542e3a07cf1 100644 --- a/browser/config/mozconfigs/macosx-universal/l10n-mozconfig +++ b/browser/config/mozconfigs/macosx-universal/l10n-mozconfig @@ -3,6 +3,7 @@ ac_add_options --with-l10n-base=../../../l10n ac_add_options --enable-update-channel=${MOZ_UPDATE_CHANNEL} ac_add_options --enable-update-packaging +ac_add_options --with-branding=browser/branding/nightly if test "${MOZ_UPDATE_CHANNEL}" = "nightly"; then ac_add_options --with-macbundlename-prefix=Firefox diff --git a/browser/config/mozconfigs/macosx64/l10n-mozconfig b/browser/config/mozconfigs/macosx64/l10n-mozconfig index d1584715587..7c15cb01997 100644 --- a/browser/config/mozconfigs/macosx64/l10n-mozconfig +++ b/browser/config/mozconfigs/macosx64/l10n-mozconfig @@ -3,6 +3,7 @@ ac_add_options --with-l10n-base=../../l10n ac_add_options --enable-update-channel=${MOZ_UPDATE_CHANNEL} ac_add_options --enable-update-packaging +ac_add_options --with-branding=browser/branding/nightly . "$topsrcdir/build/mozconfig.cache" . "$topsrcdir/build/mozconfig.common.override" diff --git a/browser/config/mozconfigs/win32/l10n-mozconfig b/browser/config/mozconfigs/win32/l10n-mozconfig index 9c8cea70d59..547f9f17fd5 100644 --- a/browser/config/mozconfigs/win32/l10n-mozconfig +++ b/browser/config/mozconfigs/win32/l10n-mozconfig @@ -4,6 +4,7 @@ ac_add_options --enable-update-channel=${MOZ_UPDATE_CHANNEL} ac_add_options --enable-update-packaging ac_add_options --with-l10n-base=../../l10n ac_add_options --with-windows-version=603 +ac_add_options --with-branding=browser/branding/nightly export MOZILLA_OFFICIAL=1 diff --git a/browser/config/mozconfigs/win64/l10n-mozconfig b/browser/config/mozconfigs/win64/l10n-mozconfig index 196dd5fb912..4f0d088c2f5 100644 --- a/browser/config/mozconfigs/win64/l10n-mozconfig +++ b/browser/config/mozconfigs/win64/l10n-mozconfig @@ -5,6 +5,7 @@ ac_add_options --enable-update-channel=${MOZ_UPDATE_CHANNEL} ac_add_options --enable-update-packaging ac_add_options --with-l10n-base=../../l10n ac_add_options --with-windows-version=603 +ac_add_options --with-branding=browser/branding/nightly export MOZILLA_OFFICIAL=1 From 9b1be8c20de075ddb0042c474b8ca6af22884b84 Mon Sep 17 00:00:00 2001 From: "L. David Baron" Date: Tue, 30 Jun 2015 16:08:19 -0700 Subject: [PATCH 11/61] Bug 980770 - Fully enable (for RELEASE_BUILD) off-main-thread animations on all platforms with off-main-thread compositing. r=birtles --- b2g/app/b2g.js | 1 - modules/libpref/init/all.js | 4 ---- 2 files changed, 5 deletions(-) diff --git a/b2g/app/b2g.js b/b2g/app/b2g.js index 259b86f60a1..c8f72162345 100644 --- a/b2g/app/b2g.js +++ b/b2g/app/b2g.js @@ -297,7 +297,6 @@ pref("ui.dragThresholdY", 25); // Layers Acceleration. We can only have nice things on gonk, because // they're not maintained anywhere else. pref("layers.offmainthreadcomposition.enabled", true); -pref("layers.offmainthreadcomposition.async-animations", true); #ifndef MOZ_WIDGET_GONK pref("dom.ipc.tabs.disabled", true); #else diff --git a/modules/libpref/init/all.js b/modules/libpref/init/all.js index ed66f538467..4e18b6ba615 100644 --- a/modules/libpref/init/all.js +++ b/modules/libpref/init/all.js @@ -4227,11 +4227,7 @@ pref("layers.offmainthreadcomposition.testing.enabled", false); pref("layers.offmainthreadcomposition.force-basic", false); // Whether to animate simple opacity and transforms on the compositor -#ifdef RELEASE_BUILD -pref("layers.offmainthreadcomposition.async-animations", false); -#else pref("layers.offmainthreadcomposition.async-animations", true); -#endif // Whether to log information about off main thread animations to stderr pref("layers.offmainthreadcomposition.log-animations", false); From b3824c44a228d420c23661e5b125e27885a2e678 Mon Sep 17 00:00:00 2001 From: Jim Blandy Date: Tue, 30 Jun 2015 13:47:01 -0700 Subject: [PATCH 12/61] Bug 1178976: Add a post-write barrier to ObjectWeakMap::add. r=terrence --- js/src/jsweakmap.cpp | 4 ++-- js/src/vm/WeakMapObject.h | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/js/src/jsweakmap.cpp b/js/src/jsweakmap.cpp index 3b599ebcbbe..3d835ae6982 100644 --- a/js/src/jsweakmap.cpp +++ b/js/src/jsweakmap.cpp @@ -345,8 +345,6 @@ TryPreserveReflector(JSContext* cx, HandleObject obj) static inline void WeakMapPostWriteBarrier(JSRuntime* rt, ObjectValueMap* weakMap, JSObject* key) { - // Strip the barriers from the type before inserting into the store buffer. - // This will automatically ensure that barriers do not fire during GC. if (key && IsInsideNursery(key)) rt->gc.storeBuffer.putGeneric(gc::HashKeyRef(weakMap, key)); } @@ -708,6 +706,8 @@ ObjectWeakMap::add(JSContext* cx, JSObject* obj, JSObject* target) ReportOutOfMemory(cx); return false; } + if (IsInsideNursery(obj)) + cx->runtime()->gc.storeBuffer.putGeneric(StoreBufferRef(&map, obj)); return true; } diff --git a/js/src/vm/WeakMapObject.h b/js/src/vm/WeakMapObject.h index 18a8be353f1..016f3d77c79 100644 --- a/js/src/vm/WeakMapObject.h +++ b/js/src/vm/WeakMapObject.h @@ -34,6 +34,7 @@ class ObjectWeakMap { private: ObjectValueMap map; + typedef gc::HashKeyRef StoreBufferRef; public: explicit ObjectWeakMap(JSContext* cx); From 50d0e1c75e911aac25e21155e167bb1a61761201 Mon Sep 17 00:00:00 2001 From: Jan de Mooij Date: Tue, 30 Jun 2015 11:20:56 -0700 Subject: [PATCH 13/61] Bug 1177892 part 1 - Remove BOOLEAN_TO_JSVAL and STRING_TO_JSVAL. r=evilpie --- dom/base/EventSource.cpp | 2 +- dom/base/WebSocket.cpp | 2 +- dom/base/nsDOMDataChannel.cpp | 2 +- dom/base/nsJSEnvironment.cpp | 8 ++++---- dom/bluetooth/BluetoothUtils.cpp | 6 +++--- dom/mobilemessage/MobileMessageCallback.cpp | 2 +- dom/plugins/base/nsJSNPRuntime.cpp | 4 ++-- dom/system/OSFileConstants.cpp | 6 +++--- dom/workers/XMLHttpRequest.cpp | 2 +- js/ipc/JavaScriptShared.cpp | 2 +- js/public/Value.h | 12 ------------ js/src/ctypes/CTypes.cpp | 6 +++--- js/src/gdb/tests/test-Root.cpp | 2 +- js/src/jsapi-tests/testParseJSON.cpp | 8 ++++---- js/src/jsapi.cpp | 2 +- js/xpconnect/src/XPCJSRuntime.cpp | 2 +- js/xpconnect/src/XPCLocale.cpp | 2 +- js/xpconnect/src/XPCShellImpl.cpp | 2 +- js/xpconnect/src/XPCWrappedNativeJSOps.cpp | 2 +- storage/mozStorageStatementJSHelper.cpp | 2 +- storage/mozStorageStatementRow.cpp | 2 +- 21 files changed, 33 insertions(+), 45 deletions(-) diff --git a/dom/base/EventSource.cpp b/dom/base/EventSource.cpp index 66babd66aff..21af38d9f67 100644 --- a/dom/base/EventSource.cpp +++ b/dom/base/EventSource.cpp @@ -1271,7 +1271,7 @@ EventSource::DispatchAllMessageEvents() message->mData.Length()); NS_ENSURE_TRUE_VOID(jsString); - jsData = STRING_TO_JSVAL(jsString); + jsData.setString(jsString); } // create an event that uses the MessageEvent interface, diff --git a/dom/base/WebSocket.cpp b/dom/base/WebSocket.cpp index dc5d7b1a59c..1522c6549be 100644 --- a/dom/base/WebSocket.cpp +++ b/dom/base/WebSocket.cpp @@ -1776,7 +1776,7 @@ WebSocket::CreateAndDispatchMessageEvent(JSContext* aCx, jsString = JS_NewUCStringCopyN(aCx, utf16Data.get(), utf16Data.Length()); NS_ENSURE_TRUE(jsString, NS_ERROR_FAILURE); - jsData = STRING_TO_JSVAL(jsString); + jsData.setString(jsString); } // create an event that uses the MessageEvent interface, diff --git a/dom/base/nsDOMDataChannel.cpp b/dom/base/nsDOMDataChannel.cpp index 919dfbd0841..9a6765c715a 100644 --- a/dom/base/nsDOMDataChannel.cpp +++ b/dom/base/nsDOMDataChannel.cpp @@ -399,7 +399,7 @@ nsDOMDataChannel::DoOnMessageAvailable(const nsACString& aData, JSString* jsString = JS_NewUCStringCopyN(cx, utf16data.get(), utf16data.Length()); NS_ENSURE_TRUE(jsString, NS_ERROR_FAILURE); - jsData = STRING_TO_JSVAL(jsString); + jsData.setString(jsString); } nsCOMPtr event; diff --git a/dom/base/nsJSEnvironment.cpp b/dom/base/nsJSEnvironment.cpp index 861bf9b9fd3..459d3354cc1 100644 --- a/dom/base/nsJSEnvironment.cpp +++ b/dom/base/nsJSEnvironment.cpp @@ -931,7 +931,7 @@ nsJSContext::AddSupportsPrimitiveTojsvals(nsISupports *aArg, JS::Value *aArgv) JSString *str = ::JS_NewStringCopyN(cx, data.get(), data.Length()); NS_ENSURE_TRUE(str, NS_ERROR_OUT_OF_MEMORY); - *aArgv = STRING_TO_JSVAL(str); + aArgv->setString(str); break; } @@ -949,7 +949,7 @@ nsJSContext::AddSupportsPrimitiveTojsvals(nsISupports *aArg, JS::Value *aArgv) ::JS_NewUCStringCopyN(cx, data.get(), data.Length()); NS_ENSURE_TRUE(str, NS_ERROR_OUT_OF_MEMORY); - *aArgv = STRING_TO_JSVAL(str); + aArgv->setString(str); break; } case nsISupportsPrimitive::TYPE_PRBOOL : { @@ -960,7 +960,7 @@ nsJSContext::AddSupportsPrimitiveTojsvals(nsISupports *aArg, JS::Value *aArgv) p->GetData(&data); - *aArgv = BOOLEAN_TO_JSVAL(data); + aArgv->setBoolean(data); break; } @@ -1011,7 +1011,7 @@ nsJSContext::AddSupportsPrimitiveTojsvals(nsISupports *aArg, JS::Value *aArgv) JSString *str = ::JS_NewStringCopyN(cx, &data, 1); NS_ENSURE_TRUE(str, NS_ERROR_OUT_OF_MEMORY); - *aArgv = STRING_TO_JSVAL(str); + aArgv->setString(str); break; } diff --git a/dom/bluetooth/BluetoothUtils.cpp b/dom/bluetooth/BluetoothUtils.cpp index ddbe635f1c3..92bfc31fbf5 100644 --- a/dom/bluetooth/BluetoothUtils.cpp +++ b/dom/bluetooth/BluetoothUtils.cpp @@ -155,14 +155,14 @@ SetJsObject(JSContext* aContext, v.get_nsString().BeginReading(), v.get_nsString().Length()); NS_ENSURE_TRUE(jsData, false); - val = STRING_TO_JSVAL(jsData); + val.setString(jsData); break; } case BluetoothValue::Tuint32_t: val = INT_TO_JSVAL(v.get_uint32_t()); break; case BluetoothValue::Tbool: - val = BOOLEAN_TO_JSVAL(v.get_bool()); + val.setBoolean(v.get_bool()); break; default: BT_WARNING("SetJsObject: Parameter is not handled"); @@ -197,7 +197,7 @@ BroadcastSystemMessage(const nsAString& aType, JSString* jsData = JS_NewUCStringCopyN(cx, aData.get_nsString().BeginReading(), aData.get_nsString().Length()); - value = STRING_TO_JSVAL(jsData); + value.setString(jsData); } else if (aData.type() == BluetoothValue::TArrayOfBluetoothNamedValue) { JS::Rooted obj(cx, JS_NewPlainObject(cx)); if (!obj) { diff --git a/dom/mobilemessage/MobileMessageCallback.cpp b/dom/mobilemessage/MobileMessageCallback.cpp index 47bd301bfaa..1e8e82ee762 100644 --- a/dom/mobilemessage/MobileMessageCallback.cpp +++ b/dom/mobilemessage/MobileMessageCallback.cpp @@ -290,7 +290,7 @@ MobileMessageCallback::NotifyGetSmscAddress(const nsAString& aSmscAddress) return NotifyError(nsIMobileMessageCallback::INTERNAL_ERROR); } - JS::Rooted val(cx, STRING_TO_JSVAL(smsc)); + JS::Rooted val(cx, JS::StringValue(smsc)); return NotifySuccess(val); } diff --git a/dom/plugins/base/nsJSNPRuntime.cpp b/dom/plugins/base/nsJSNPRuntime.cpp index 11c66bcc6f0..c0dd8ca3c00 100644 --- a/dom/plugins/base/nsJSNPRuntime.cpp +++ b/dom/plugins/base/nsJSNPRuntime.cpp @@ -489,7 +489,7 @@ NPVariantToJSVal(NPP npp, JSContext *cx, const NPVariant *variant) case NPVariantType_Null : return JS::NullValue(); case NPVariantType_Bool : - return BOOLEAN_TO_JSVAL(NPVARIANT_TO_BOOLEAN(*variant)); + return JS::BooleanValue(NPVARIANT_TO_BOOLEAN(*variant)); case NPVariantType_Int32 : { // Don't use INT_TO_JSVAL directly to prevent bugs when dealing @@ -509,7 +509,7 @@ NPVariantToJSVal(NPP npp, JSContext *cx, const NPVariant *variant) ::JS_NewUCStringCopyN(cx, utf16String.get(), utf16String.Length()); if (str) { - return STRING_TO_JSVAL(str); + return JS::StringValue(str); } break; diff --git a/dom/system/OSFileConstants.cpp b/dom/system/OSFileConstants.cpp index e3c63c52066..8594cfb61ba 100644 --- a/dom/system/OSFileConstants.cpp +++ b/dom/system/OSFileConstants.cpp @@ -824,7 +824,7 @@ bool SetStringProperty(JSContext *cx, JS::Handle aObject, const char } JSString* strValue = JS_NewUCStringCopyZ(cx, aValue.get()); NS_ENSURE_TRUE(strValue, false); - JS::Rooted valValue(cx, STRING_TO_JSVAL(strValue)); + JS::Rooted valValue(cx, JS::StringValue(strValue)); return JS_SetProperty(cx, aObject, aProperty, valValue); } @@ -891,7 +891,7 @@ bool DefineOSFileConstants(JSContext *cx, JS::Handle global) if (!strVersion){ return false; } - JS::Rooted valVersion(cx, STRING_TO_JSVAL(strVersion)); + JS::Rooted valVersion(cx, JS::StringValue(strVersion)); if (!JS_SetProperty(cx, objSys, "Name", valVersion)) { return false; } @@ -907,7 +907,7 @@ bool DefineOSFileConstants(JSContext *cx, JS::Handle global) return false; } - JS::Rooted valVersion(cx, STRING_TO_JSVAL(strVersion)); + JS::Rooted valVersion(cx, JS::StringValue(strVersion)); if (!JS_SetProperty(cx, objSys, "Name", valVersion)) { return false; } diff --git a/dom/workers/XMLHttpRequest.cpp b/dom/workers/XMLHttpRequest.cpp index f422b6a2dd7..1975893989b 100644 --- a/dom/workers/XMLHttpRequest.cpp +++ b/dom/workers/XMLHttpRequest.cpp @@ -2457,7 +2457,7 @@ XMLHttpRequest::GetResponse(JSContext* /* unused */, return; } - mStateData.mResponse = STRING_TO_JSVAL(str); + mStateData.mResponse.setString(str); } } diff --git a/js/ipc/JavaScriptShared.cpp b/js/ipc/JavaScriptShared.cpp index 300e375db0e..70d6d7207c5 100644 --- a/js/ipc/JavaScriptShared.cpp +++ b/js/ipc/JavaScriptShared.cpp @@ -362,7 +362,7 @@ JavaScriptShared::fromVariant(JSContext* cx, const JSVariant& from, MutableHandl return true; case JSVariant::Tbool: - to.set(BOOLEAN_TO_JSVAL(from.get_bool())); + to.setBoolean(from.get_bool()); return true; case JSVariant::TnsString: diff --git a/js/public/Value.h b/js/public/Value.h index 6934f1d10e6..a790c4aacf0 100644 --- a/js/public/Value.h +++ b/js/public/Value.h @@ -1971,12 +1971,6 @@ UINT_TO_JSVAL(uint32_t i) : DOUBLE_TO_JSVAL((double)i); } -static inline jsval -STRING_TO_JSVAL(JSString* str) -{ - return IMPL_TO_JSVAL(STRING_TO_JSVAL_IMPL(str)); -} - static inline jsval OBJECT_TO_JSVAL(JSObject* obj) { @@ -1985,12 +1979,6 @@ OBJECT_TO_JSVAL(JSObject* obj) return IMPL_TO_JSVAL(BUILD_JSVAL(JSVAL_TAG_NULL, 0)); } -static inline jsval -BOOLEAN_TO_JSVAL(bool b) -{ - return IMPL_TO_JSVAL(BOOLEAN_TO_JSVAL_IMPL(b)); -} - /* To be GC-safe, privates are tagged as doubles. */ static inline jsval diff --git a/js/src/ctypes/CTypes.cpp b/js/src/ctypes/CTypes.cpp index 9dd4445b4c7..912f4749756 100644 --- a/js/src/ctypes/CTypes.cpp +++ b/js/src/ctypes/CTypes.cpp @@ -3931,7 +3931,7 @@ CType::Create(JSContext* cx, if (ffiType) JS_SetReservedSlot(typeObj, SLOT_FFITYPE, PRIVATE_TO_JSVAL(ffiType)); if (name) - JS_SetReservedSlot(typeObj, SLOT_NAME, STRING_TO_JSVAL(name)); + JS_SetReservedSlot(typeObj, SLOT_NAME, StringValue(name)); JS_SetReservedSlot(typeObj, SLOT_SIZE, size); JS_SetReservedSlot(typeObj, SLOT_ALIGN, align); @@ -4291,7 +4291,7 @@ CType::GetName(JSContext* cx, HandleObject obj) JSString* name = BuildTypeName(cx, obj); if (!name) return nullptr; - JS_SetReservedSlot(obj, SLOT_NAME, STRING_TO_JSVAL(name)); + JS_SetReservedSlot(obj, SLOT_NAME, StringValue(name)); return name; } @@ -7086,7 +7086,7 @@ CData::Create(JSContext* cx, JS_SetReservedSlot(dataObj, SLOT_REFERENT, OBJECT_TO_JSVAL(refObj)); // Set our ownership flag. - JS_SetReservedSlot(dataObj, SLOT_OWNS, BOOLEAN_TO_JSVAL(ownResult)); + JS_SetReservedSlot(dataObj, SLOT_OWNS, BooleanValue(ownResult)); // attach the buffer. since it might not be 2-byte aligned, we need to // allocate an aligned space for it and store it there. :( diff --git a/js/src/gdb/tests/test-Root.cpp b/js/src/gdb/tests/test-Root.cpp index d877418587d..e067a0bea0a 100644 --- a/js/src/gdb/tests/test-Root.cpp +++ b/js/src/gdb/tests/test-Root.cpp @@ -28,7 +28,7 @@ FRAGMENT(Root, handle) { } FRAGMENT(Root, HeapSlot) { - JS::Rooted plinth(cx, STRING_TO_JSVAL(JS_NewStringCopyZ(cx, "plinth"))); + JS::Rooted plinth(cx, JS::StringValue(JS_NewStringCopyZ(cx, "plinth"))); JS::Rooted array(cx, JS_NewArrayObject(cx, JS::HandleValueArray(plinth))); breakpoint(); diff --git a/js/src/jsapi-tests/testParseJSON.cpp b/js/src/jsapi-tests/testParseJSON.cpp index 7fc56d183de..142b2090a7e 100644 --- a/js/src/jsapi-tests/testParseJSON.cpp +++ b/js/src/jsapi-tests/testParseJSON.cpp @@ -74,26 +74,26 @@ BEGIN_TEST(testParseJSON_success) const char16_t emptystr[] = { '\0' }; str = js::NewStringCopyN(cx, emptystr, 0); CHECK(str); - expected = STRING_TO_JSVAL(str); + expected = JS::StringValue(str); CHECK(TryParse(cx, "\"\"", expected)); const char16_t nullstr[] = { '\0' }; str = NewString(cx, nullstr); CHECK(str); - expected = STRING_TO_JSVAL(str); + expected = JS::StringValue(str); CHECK(TryParse(cx, "\"\\u0000\"", expected)); const char16_t backstr[] = { '\b' }; str = NewString(cx, backstr); CHECK(str); - expected = STRING_TO_JSVAL(str); + expected = JS::StringValue(str); CHECK(TryParse(cx, "\"\\b\"", expected)); CHECK(TryParse(cx, "\"\\u0008\"", expected)); const char16_t newlinestr[] = { '\n', }; str = NewString(cx, newlinestr); CHECK(str); - expected = STRING_TO_JSVAL(str); + expected = JS::StringValue(str); CHECK(TryParse(cx, "\"\\n\"", expected)); CHECK(TryParse(cx, "\"\\u000A\"", expected)); diff --git a/js/src/jsapi.cpp b/js/src/jsapi.cpp index bc5a561332c..2ec9a4cbcde 100644 --- a/js/src/jsapi.cpp +++ b/js/src/jsapi.cpp @@ -287,7 +287,7 @@ JS_GetPositiveInfinityValue(JSContext* cx) JS_PUBLIC_API(jsval) JS_GetEmptyStringValue(JSContext* cx) { - return STRING_TO_JSVAL(cx->runtime()->emptyString); + return StringValue(cx->runtime()->emptyString); } JS_PUBLIC_API(JSString*) diff --git a/js/xpconnect/src/XPCJSRuntime.cpp b/js/xpconnect/src/XPCJSRuntime.cpp index 178e787e0d1..60c573e8a71 100644 --- a/js/xpconnect/src/XPCJSRuntime.cpp +++ b/js/xpconnect/src/XPCJSRuntime.cpp @@ -3532,7 +3532,7 @@ XPCJSRuntime::OnJSContextNew(JSContext* cx) return false; } mStrIDs[i] = INTERNED_STRING_TO_JSID(cx, str); - mStrJSVals[i] = STRING_TO_JSVAL(str); + mStrJSVals[i].setString(str); } if (!mozilla::dom::DefineStaticJSVals(cx)) { diff --git a/js/xpconnect/src/XPCLocale.cpp b/js/xpconnect/src/XPCLocale.cpp index 2179554e43e..37b504f20b1 100644 --- a/js/xpconnect/src/XPCLocale.cpp +++ b/js/xpconnect/src/XPCLocale.cpp @@ -115,7 +115,7 @@ private: return false; } - rval.set(STRING_TO_JSVAL(ucstr)); + rval.setString(ucstr); return true; } diff --git a/js/xpconnect/src/XPCShellImpl.cpp b/js/xpconnect/src/XPCShellImpl.cpp index 710400be1d4..87bad4e6475 100644 --- a/js/xpconnect/src/XPCShellImpl.cpp +++ b/js/xpconnect/src/XPCShellImpl.cpp @@ -712,7 +712,7 @@ env_setProperty(JSContext* cx, HandleObject obj, HandleId id, MutableHandleValue JS_ReportError(cx, "can't set envariable %s to %s", name.ptr(), value.ptr()); return false; } - vp.set(STRING_TO_JSVAL(valstr)); + vp.setString(valstr); #endif /* !defined SOLARIS */ return result.succeed(); } diff --git a/js/xpconnect/src/XPCWrappedNativeJSOps.cpp b/js/xpconnect/src/XPCWrappedNativeJSOps.cpp index 71a5c94f5b6..8808a55e0d6 100644 --- a/js/xpconnect/src/XPCWrappedNativeJSOps.cpp +++ b/js/xpconnect/src/XPCWrappedNativeJSOps.cpp @@ -61,7 +61,7 @@ ToStringGuts(XPCCallContext& ccx) if (!str) return false; - ccx.SetRetVal(STRING_TO_JSVAL(str)); + ccx.SetRetVal(JS::StringValue(str)); return true; } diff --git a/storage/mozStorageStatementJSHelper.cpp b/storage/mozStorageStatementJSHelper.cpp index 9db095e55b1..59db1f4454f 100644 --- a/storage/mozStorageStatementJSHelper.cpp +++ b/storage/mozStorageStatementJSHelper.cpp @@ -71,7 +71,7 @@ stepFunc(JSContext *aCtx, return false; } - *_vp = BOOLEAN_TO_JSVAL(hasMore); + _vp->setBoolean(hasMore); return true; } diff --git a/storage/mozStorageStatementRow.cpp b/storage/mozStorageStatementRow.cpp index ee5409831c1..f582a31a573 100644 --- a/storage/mozStorageStatementRow.cpp +++ b/storage/mozStorageStatementRow.cpp @@ -80,7 +80,7 @@ StatementRow::GetProperty(nsIXPConnectWrappedNative *aWrapper, *_retval = false; return NS_OK; } - *_vp = STRING_TO_JSVAL(str); + _vp->setString(str); } else if (type == mozIStorageValueArray::VALUE_TYPE_BLOB) { uint32_t length; From 2d19dfc5f622d6b27dedbaf5e3f2da733e8605c5 Mon Sep 17 00:00:00 2001 From: Jan de Mooij Date: Tue, 30 Jun 2015 11:20:58 -0700 Subject: [PATCH 14/61] Bug 1177892 part 2 - Remove PRIVATE_TO_JSVAL. r=evilpie --- dom/bindings/BindingUtils.h | 2 +- dom/xbl/nsXBLBinding.cpp | 2 +- js/public/Value.h | 8 -------- js/src/ctypes/CTypes.cpp | 16 ++++++++-------- js/src/ctypes/Library.cpp | 6 +++--- js/src/shell/js.cpp | 2 +- js/xpconnect/wrappers/XrayWrapper.cpp | 2 +- 7 files changed, 15 insertions(+), 23 deletions(-) diff --git a/dom/bindings/BindingUtils.h b/dom/bindings/BindingUtils.h index b66477717b1..6379c6a3aa4 100644 --- a/dom/bindings/BindingUtils.h +++ b/dom/bindings/BindingUtils.h @@ -3028,7 +3028,7 @@ CreateGlobal(JSContext* aCx, T* aNative, nsWrapperCache* aCache, JSAutoCompartment ac(aCx, aGlobal); { - js::SetReservedSlot(aGlobal, DOM_OBJECT_SLOT, PRIVATE_TO_JSVAL(aNative)); + js::SetReservedSlot(aGlobal, DOM_OBJECT_SLOT, JS::PrivateValue(aNative)); NS_ADDREF(aNative); aCache->SetWrapper(aGlobal); diff --git a/dom/xbl/nsXBLBinding.cpp b/dom/xbl/nsXBLBinding.cpp index 063ad50ab4b..222d2c76eaa 100644 --- a/dom/xbl/nsXBLBinding.cpp +++ b/dom/xbl/nsXBLBinding.cpp @@ -1029,7 +1029,7 @@ nsXBLBinding::DoInitJSClass(JSContext *cx, nsXBLDocumentInfo* docInfo = aProtoBinding->XBLDocumentInfo(); ::JS_SetPrivate(proto, docInfo); NS_ADDREF(docInfo); - JS_SetReservedSlot(proto, 0, PRIVATE_TO_JSVAL(aProtoBinding)); + JS_SetReservedSlot(proto, 0, JS::PrivateValue(aProtoBinding)); // Next, enter the compartment of the property holder, wrap the proto, and // stick it on. diff --git a/js/public/Value.h b/js/public/Value.h index a790c4aacf0..c775ffc4698 100644 --- a/js/public/Value.h +++ b/js/public/Value.h @@ -1979,14 +1979,6 @@ OBJECT_TO_JSVAL(JSObject* obj) return IMPL_TO_JSVAL(BUILD_JSVAL(JSVAL_TAG_NULL, 0)); } -/* To be GC-safe, privates are tagged as doubles. */ - -static inline jsval -PRIVATE_TO_JSVAL(void* ptr) -{ - return IMPL_TO_JSVAL(PRIVATE_PTR_TO_JSVAL_IMPL(ptr)); -} - namespace JS { extern JS_PUBLIC_DATA(const HandleValue) NullHandleValue; diff --git a/js/src/ctypes/CTypes.cpp b/js/src/ctypes/CTypes.cpp index 912f4749756..ccc3a20fc77 100644 --- a/js/src/ctypes/CTypes.cpp +++ b/js/src/ctypes/CTypes.cpp @@ -2010,7 +2010,7 @@ JS_SetCTypesCallbacks(JSObject* ctypesObj, const JSCTypesCallbacks* callbacks) // Set the callbacks on a reserved slot. JS_SetReservedSlot(ctypesObj, SLOT_CALLBACKS, - PRIVATE_TO_JSVAL(const_cast(callbacks))); + PrivateValue(const_cast(callbacks))); } namespace js { @@ -3929,7 +3929,7 @@ CType::Create(JSContext* cx, // Set up the reserved slots. JS_SetReservedSlot(typeObj, SLOT_TYPECODE, INT_TO_JSVAL(type)); if (ffiType) - JS_SetReservedSlot(typeObj, SLOT_FFITYPE, PRIVATE_TO_JSVAL(ffiType)); + JS_SetReservedSlot(typeObj, SLOT_FFITYPE, PrivateValue(ffiType)); if (name) JS_SetReservedSlot(typeObj, SLOT_NAME, StringValue(name)); JS_SetReservedSlot(typeObj, SLOT_SIZE, size); @@ -4274,7 +4274,7 @@ CType::GetFFIType(JSContext* cx, JSObject* obj) if (!result) return nullptr; - JS_SetReservedSlot(obj, SLOT_FFITYPE, PRIVATE_TO_JSVAL(result.get())); + JS_SetReservedSlot(obj, SLOT_FFITYPE, PrivateValue(result.get())); return result.release(); } @@ -5636,7 +5636,7 @@ StructType::DefineInternal(JSContext* cx, JSObject* typeObj_, JSObject* fieldsOb if (!SizeTojsval(cx, structSize, &sizeVal)) return false; - JS_SetReservedSlot(typeObj, SLOT_FIELDINFO, PRIVATE_TO_JSVAL(fields.release())); + JS_SetReservedSlot(typeObj, SLOT_FIELDINFO, PrivateValue(fields.release())); JS_SetReservedSlot(typeObj, SLOT_SIZE, sizeVal); JS_SetReservedSlot(typeObj, SLOT_ALIGN, INT_TO_JSVAL(structAlign)); @@ -6297,7 +6297,7 @@ CreateFunctionInfo(JSContext* cx, } // Stash the FunctionInfo in a reserved slot. - JS_SetReservedSlot(typeObj, SLOT_FNINFO, PRIVATE_TO_JSVAL(fninfo)); + JS_SetReservedSlot(typeObj, SLOT_FNINFO, PrivateValue(fninfo)); ffi_abi abi; if (!GetABI(cx, abiType, &abi)) { @@ -6840,7 +6840,7 @@ CClosure::Create(JSContext* cx, cinfo->jsfnObj = fnObj; // Stash the ClosureInfo struct on our new object. - JS_SetReservedSlot(result, SLOT_CLOSUREINFO, PRIVATE_TO_JSVAL(cinfo)); + JS_SetReservedSlot(result, SLOT_CLOSUREINFO, PrivateValue(cinfo)); // Create an ffi_closure object and initialize it. void* code; @@ -7117,7 +7117,7 @@ CData::Create(JSContext* cx, } *buffer = data; - JS_SetReservedSlot(dataObj, SLOT_DATA, PRIVATE_TO_JSVAL(buffer)); + JS_SetReservedSlot(dataObj, SLOT_DATA, PrivateValue(buffer)); return dataObj; } @@ -8061,7 +8061,7 @@ Int64Base::Construct(JSContext* cx, return nullptr; } - JS_SetReservedSlot(result, SLOT_INT64, PRIVATE_TO_JSVAL(buffer)); + JS_SetReservedSlot(result, SLOT_INT64, PrivateValue(buffer)); if (!JS_FreezeObject(cx, result)) return nullptr; diff --git a/js/src/ctypes/Library.cpp b/js/src/ctypes/Library.cpp index 394fa8a5882..c6f80c56b69 100644 --- a/js/src/ctypes/Library.cpp +++ b/js/src/ctypes/Library.cpp @@ -89,7 +89,7 @@ Library::Create(JSContext* cx, jsval path_, const JSCTypesCallbacks* callbacks) return nullptr; // initialize the library - JS_SetReservedSlot(libraryObj, SLOT_LIBRARY, PRIVATE_TO_JSVAL(nullptr)); + JS_SetReservedSlot(libraryObj, SLOT_LIBRARY, PrivateValue(nullptr)); // attach API functions if (!JS_DefineFunctions(cx, libraryObj, sLibraryFunctions)) @@ -166,7 +166,7 @@ Library::Create(JSContext* cx, jsval path_, const JSCTypesCallbacks* callbacks) #endif // stash the library - JS_SetReservedSlot(libraryObj, SLOT_LIBRARY, PRIVATE_TO_JSVAL(library)); + JS_SetReservedSlot(libraryObj, SLOT_LIBRARY, PrivateValue(library)); return libraryObj; } @@ -244,7 +244,7 @@ Library::Close(JSContext* cx, unsigned argc, jsval* vp) // delete our internal objects UnloadLibrary(obj); - JS_SetReservedSlot(obj, SLOT_LIBRARY, PRIVATE_TO_JSVAL(nullptr)); + JS_SetReservedSlot(obj, SLOT_LIBRARY, PrivateValue(nullptr)); args.rval().setUndefined(); return true; diff --git a/js/src/shell/js.cpp b/js/src/shell/js.cpp index f77a98b5733..82800cf1104 100644 --- a/js/src/shell/js.cpp +++ b/js/src/shell/js.cpp @@ -5436,7 +5436,7 @@ static void InitDOMObject(HandleObject obj) { /* Fow now just initialize to a constant we can check. */ - SetReservedSlot(obj, DOM_OBJECT_SLOT, PRIVATE_TO_JSVAL((void*)0x1234)); + SetReservedSlot(obj, DOM_OBJECT_SLOT, PrivateValue((void*)0x1234)); } static bool diff --git a/js/xpconnect/wrappers/XrayWrapper.cpp b/js/xpconnect/wrappers/XrayWrapper.cpp index 8da20efffba..63b05588af8 100644 --- a/js/xpconnect/wrappers/XrayWrapper.cpp +++ b/js/xpconnect/wrappers/XrayWrapper.cpp @@ -998,7 +998,7 @@ XrayTraits::attachExpandoObject(JSContext* cx, HandleObject target, // AddRef and store the principal. NS_ADDREF(origin); - JS_SetReservedSlot(expandoObject, JSSLOT_EXPANDO_ORIGIN, PRIVATE_TO_JSVAL(origin)); + JS_SetReservedSlot(expandoObject, JSSLOT_EXPANDO_ORIGIN, JS::PrivateValue(origin)); // Note the exclusive global, if any. JS_SetReservedSlot(expandoObject, JSSLOT_EXPANDO_EXCLUSIVE_GLOBAL, From 04f24125c592d7458aff09e823bd80729616614a Mon Sep 17 00:00:00 2001 From: Jean-Yves Avenard Date: Wed, 24 Jun 2015 13:30:15 -0700 Subject: [PATCH 15/61] Bug 1177147: Set mediasource duration to group end timestamp. r=karlt --- dom/media/mediasource/SourceBuffer.cpp | 2 +- dom/media/mediasource/SourceBufferContentManager.h | 1 + dom/media/mediasource/TrackBuffer.h | 6 ++++++ dom/media/mediasource/TrackBuffersManager.cpp | 9 +++++++++ dom/media/mediasource/TrackBuffersManager.h | 2 ++ 5 files changed, 19 insertions(+), 1 deletion(-) diff --git a/dom/media/mediasource/SourceBuffer.cpp b/dom/media/mediasource/SourceBuffer.cpp index 3148fd571cf..18c99f53f87 100644 --- a/dom/media/mediasource/SourceBuffer.cpp +++ b/dom/media/mediasource/SourceBuffer.cpp @@ -427,7 +427,7 @@ SourceBuffer::CheckEndTime() { MOZ_ASSERT(NS_IsMainThread()); // Check if we need to update mMediaSource duration - double endTime = GetBufferedEnd(); + double endTime = mContentManager->GroupEndTimestamp().ToSeconds(); double duration = mMediaSource->Duration(); if (endTime > duration) { mMediaSource->SetDuration(endTime, MSRangeRemovalAction::SKIP); diff --git a/dom/media/mediasource/SourceBufferContentManager.h b/dom/media/mediasource/SourceBufferContentManager.h index 291dadf20e0..3d49f7a5efe 100644 --- a/dom/media/mediasource/SourceBufferContentManager.h +++ b/dom/media/mediasource/SourceBufferContentManager.h @@ -100,6 +100,7 @@ public: virtual void SetGroupStartTimestamp(const TimeUnit& aGroupStartTimestamp) {} virtual void RestartGroupStartTimestamp() {} + virtual TimeUnit GroupEndTimestamp() = 0; #if defined(DEBUG) virtual void Dump(const char* aPath) { } diff --git a/dom/media/mediasource/TrackBuffer.h b/dom/media/mediasource/TrackBuffer.h index 750b796d4a7..09bdf03eeb1 100644 --- a/dom/media/mediasource/TrackBuffer.h +++ b/dom/media/mediasource/TrackBuffer.h @@ -68,6 +68,12 @@ public: void Detach() override; + TimeUnit GroupEndTimestamp() override + { + return Buffered().GetEnd(); + } + + // Mark the current decoder's resource as ended, clear mCurrentDecoder and // reset mLast{Start,End}Timestamp. Main thread only. void DiscardCurrentDecoder(); diff --git a/dom/media/mediasource/TrackBuffersManager.cpp b/dom/media/mediasource/TrackBuffersManager.cpp index 911f10a04ee..c7e0b4a5cf1 100644 --- a/dom/media/mediasource/TrackBuffersManager.cpp +++ b/dom/media/mediasource/TrackBuffersManager.cpp @@ -1228,6 +1228,8 @@ TrackBuffersManager::CompleteCodedFrameProcessing() MSE_DEBUG("video new buffered range = %s", DumpTimeRanges(mVideoBufferedRanges).get()); } + + mOfficialGroupEndTimestamp = mGroupEndTimestamp; } // Update our reported total size. @@ -1673,6 +1675,13 @@ TrackBuffersManager::RestartGroupStartTimestamp() mGroupStartTimestamp = Some(mGroupEndTimestamp); } +TimeUnit +TrackBuffersManager::GroupEndTimestamp() +{ + MonitorAutoLock mon(mMonitor); + return mOfficialGroupEndTimestamp; +} + MediaInfo TrackBuffersManager::GetMetadata() { diff --git a/dom/media/mediasource/TrackBuffersManager.h b/dom/media/mediasource/TrackBuffersManager.h index 916119e3c3d..d9f803809cf 100644 --- a/dom/media/mediasource/TrackBuffersManager.h +++ b/dom/media/mediasource/TrackBuffersManager.h @@ -71,6 +71,7 @@ public: void SetGroupStartTimestamp(const TimeUnit& aGroupStartTimestamp) override; void RestartGroupStartTimestamp() override; + TimeUnit GroupEndTimestamp() override; // Interface for MediaSourceDemuxer MediaInfo GetMetadata(); @@ -318,6 +319,7 @@ private: // Stable audio and video track time ranges. TimeIntervals mVideoBufferedRanges; TimeIntervals mAudioBufferedRanges; + TimeUnit mOfficialGroupEndTimestamp; // MediaInfo of the first init segment read. MediaInfo mInfo; }; From 14e97a550f1328cda1f42c61529a63b52adb4be8 Mon Sep 17 00:00:00 2001 From: Jean-Yves Avenard Date: Sat, 27 Jun 2015 19:07:16 -0700 Subject: [PATCH 16/61] Bug 1171760: P1. Add IntervalSet operator-=. r=mattwoodrow --- dom/media/Intervals.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/dom/media/Intervals.h b/dom/media/Intervals.h index e0e1796bb00..a4f14019f4a 100644 --- a/dom/media/Intervals.h +++ b/dom/media/Intervals.h @@ -427,6 +427,14 @@ public: return Intersection(intervals); } + SelfType& operator-= (const SelfType& aIntervals) + { + for (const auto& interval : aIntervals.mIntervals) { + *this -= interval; + } + return *this; + } + SelfType operator- (const ElemType& aInterval) { SelfType intervals(*this); From c59d9802a6d0c399c0b3c7e38bb491a1246bf5b6 Mon Sep 17 00:00:00 2001 From: Jean-Yves Avenard Date: Sat, 27 Jun 2015 19:09:14 -0700 Subject: [PATCH 17/61] Bug 1171760: P2. Add TimeUnit operator/= (int). r=mattwoodrow --- dom/media/TimeUnits.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dom/media/TimeUnits.h b/dom/media/TimeUnits.h index 7d3287f8204..cbb3ede1abf 100644 --- a/dom/media/TimeUnits.h +++ b/dom/media/TimeUnits.h @@ -188,6 +188,9 @@ public: friend TimeUnit operator* (const TimeUnit& aUnit, int aVal) { return TimeUnit(aUnit.mValue * aVal); } + friend TimeUnit operator/ (const TimeUnit& aUnit, int aVal) { + return TimeUnit(aUnit.mValue / aVal); + } bool IsValid() const { From 2a5dcdb97082328c10c164e5ccd52ada9a43344e Mon Sep 17 00:00:00 2001 From: Jean-Yves Avenard Date: Sat, 27 Jun 2015 19:17:35 -0700 Subject: [PATCH 18/61] Bug 1171760: P3. Returns error for invalid web mimetype. r=kentuckyfriedtakahe Invalid error rather than not supported. --- dom/media/mediasource/MediaSource.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/dom/media/mediasource/MediaSource.cpp b/dom/media/mediasource/MediaSource.cpp index b8a9facf9cc..5c56a715efc 100644 --- a/dom/media/mediasource/MediaSource.cpp +++ b/dom/media/mediasource/MediaSource.cpp @@ -85,10 +85,6 @@ IsTypeSupported(const nsAString& aType) if (NS_FAILED(rv)) { return NS_ERROR_DOM_NOT_SUPPORTED_ERR; } - if (Preferences::GetBool("media.mediasource.format-reader", false) && - !mimeType.EqualsASCII("video/mp4") && !mimeType.EqualsASCII("audio/mp4")) { - return NS_ERROR_DOM_NOT_SUPPORTED_ERR; - } bool found = false; for (uint32_t i = 0; gMediaSourceTypes[i]; ++i) { if (mimeType.EqualsASCII(gMediaSourceTypes[i])) { @@ -120,6 +116,12 @@ IsTypeSupported(const nsAString& aType) if (dom::HTMLMediaElement::GetCanPlay(aType) == CANPLAY_NO) { return NS_ERROR_DOM_NOT_SUPPORTED_ERR; } + + if (Preferences::GetBool("media.mediasource.format-reader", false) && + !mimeType.EqualsASCII("video/mp4") && !mimeType.EqualsASCII("audio/mp4")) { + return NS_ERROR_DOM_NOT_SUPPORTED_ERR; + } + return NS_OK; } From e39a6f68a63afb020c33fd049301a10102a0f1f9 Mon Sep 17 00:00:00 2001 From: Jean-Yves Avenard Date: Sat, 27 Jun 2015 19:15:06 -0700 Subject: [PATCH 19/61] Bug 1171760: P4. Process an entire media segment at a time rather than frame by frame. r=gerald This provides significant speed improvement, halving the CPU usage. Rationalise similar methods. This assumes that frames within a media segment are continuous. --- dom/media/mediasource/SourceBuffer.cpp | 8 +- dom/media/mediasource/SourceBuffer.h | 2 +- dom/media/mediasource/TrackBuffersManager.cpp | 757 +++++++++--------- dom/media/mediasource/TrackBuffersManager.h | 13 +- 4 files changed, 392 insertions(+), 388 deletions(-) diff --git a/dom/media/mediasource/SourceBuffer.cpp b/dom/media/mediasource/SourceBuffer.cpp index 18c99f53f87..3365f0fd7e4 100644 --- a/dom/media/mediasource/SourceBuffer.cpp +++ b/dom/media/mediasource/SourceBuffer.cpp @@ -73,7 +73,7 @@ SourceBuffer::SetMode(SourceBufferAppendMode aMode, ErrorResult& aRv) aRv.Throw(NS_ERROR_DOM_NOT_SUPPORTED_ERR); return; } - if (mIsUsingFormatReader && mGenerateTimestamp && + if (mIsUsingFormatReader && mGenerateTimestamps && aMode == SourceBufferAppendMode::Segments) { aRv.Throw(NS_ERROR_DOM_INVALID_ACCESS_ERR); return; @@ -331,14 +331,14 @@ SourceBuffer::SourceBuffer(MediaSource* aMediaSource, const nsACString& aType) mContentManager.get()); if (aType.LowerCaseEqualsLiteral("audio/mpeg") || aType.LowerCaseEqualsLiteral("audio/aac")) { - mGenerateTimestamp = true; + mGenerateTimestamps = true; } else { - mGenerateTimestamp = false; + mGenerateTimestamps = false; } mIsUsingFormatReader = Preferences::GetBool("media.mediasource.format-reader", false); ErrorResult dummy; - if (mGenerateTimestamp) { + if (mGenerateTimestamps) { SetMode(SourceBufferAppendMode::Sequence, dummy); } else { SetMode(SourceBufferAppendMode::Segments, dummy); diff --git a/dom/media/mediasource/SourceBuffer.h b/dom/media/mediasource/SourceBuffer.h index 652a4a72a0a..cdaf3a8e540 100644 --- a/dom/media/mediasource/SourceBuffer.h +++ b/dom/media/mediasource/SourceBuffer.h @@ -184,7 +184,7 @@ private: SourceBufferAppendMode mAppendMode; bool mUpdating; - bool mGenerateTimestamp; + bool mGenerateTimestamps; bool mIsUsingFormatReader; mozilla::Atomic mActive; diff --git a/dom/media/mediasource/TrackBuffersManager.cpp b/dom/media/mediasource/TrackBuffersManager.cpp index c7e0b4a5cf1..c8745de494f 100644 --- a/dom/media/mediasource/TrackBuffersManager.cpp +++ b/dom/media/mediasource/TrackBuffersManager.cpp @@ -475,6 +475,7 @@ TrackBuffersManager::CodedFrameRemoval(TimeInterval aInterval) } TimeUnit duration{TimeUnit::FromSeconds(mMediaSourceDuration.Ref().ref())}; +#if DEBUG MSE_DEBUG("duration:%.2f", duration.ToSeconds()); if (HasVideo()) { MSE_DEBUG("before video ranges=%s", @@ -484,6 +485,7 @@ TrackBuffersManager::CodedFrameRemoval(TimeInterval aInterval) MSE_DEBUG("before audio ranges=%s", DumpTimeRanges(mAudioTracks.mBufferedRanges).get()); } +#endif // 1. Let start be the starting presentation timestamp for the removal range. TimeUnit start = aInterval.mStart; @@ -510,96 +512,12 @@ TrackBuffersManager::CodedFrameRemoval(TimeInterval aInterval) } } - bool removeCurrentCodedFrameGroup = false; - // 3. Remove all media data, from this track buffer, that contain starting // timestamps greater than or equal to start and less than the remove end timestamp. - TimeInterval removedInterval; - Maybe firstRemovedIndex; - TrackBuffer& data = track->mBuffers.LastElement(); - for (uint32_t i = 0; i < data.Length();) { - const auto& frame = data[i]; - if (frame->mTime >= start.ToMicroseconds() && - frame->mTime < removeEndTimestamp.ToMicroseconds()) { - if (firstRemovedIndex.isNothing()) { - removedInterval = - TimeInterval(TimeUnit::FromMicroseconds(frame->mTime), - TimeUnit::FromMicroseconds(frame->mTime + frame->mDuration)); - firstRemovedIndex = Some(i); - } else { - removedInterval = removedInterval.Span( - TimeInterval(TimeUnit::FromMicroseconds(frame->mTime), - TimeUnit::FromMicroseconds(frame->mTime + frame->mDuration))); - } - track->mSizeBuffer -= sizeof(*frame) + frame->mSize; - data.RemoveElementAt(i); - removeCurrentCodedFrameGroup |= - track->mNextInsertionIndex.isSome() && - track->mNextInsertionIndex.ref() == i; - if (!removeCurrentCodedFrameGroup && - track->mNextInsertionIndex.isSome() && - track->mNextInsertionIndex.ref() > i) { - track->mNextInsertionIndex.ref()--; - } - - if (track->mNextGetSampleIndex.isSome()) { - if (track->mNextGetSampleIndex.ref() == i) { - MSE_DEBUG("Next sample to be played got evicted"); - track->mNextGetSampleIndex.reset(); - } else if (track->mNextGetSampleIndex.ref() > i) { - track->mNextGetSampleIndex.ref()--; - } - } - } else { - i++; - } - } // 4. Remove decoding dependencies of the coded frames removed in the previous step: // Remove all coded frames between the coded frames removed in the previous step and the next random access point after those removed frames. - if (firstRemovedIndex.isSome()) { - uint32_t start = firstRemovedIndex.ref(); - uint32_t end = start; - for (;end < data.Length(); end++) { - MediaRawData* sample = data[end].get(); - if (sample->mKeyframe) { - break; - } - removedInterval = removedInterval.Span( - TimeInterval(TimeUnit::FromMicroseconds(sample->mTime), - TimeUnit::FromMicroseconds(sample->GetEndTime()))); - track->mSizeBuffer -= sizeof(*sample) + sample->mSize; - } - data.RemoveElementsAt(start, end - start); - - removeCurrentCodedFrameGroup |= - track->mNextInsertionIndex.isSome() && - track->mNextInsertionIndex.ref() >= start && - track->mNextInsertionIndex.ref() < end; - if (!removeCurrentCodedFrameGroup && - track->mNextInsertionIndex.isSome() && - track->mNextInsertionIndex.ref() >= end) { - track->mNextInsertionIndex.ref() -= end - start; - } - - if (track->mNextGetSampleIndex.isSome()) { - if (track->mNextGetSampleIndex.ref() >= start && - track->mNextGetSampleIndex.ref() < end) { - MSE_DEBUG("Next sample to be played got evicted"); - track->mNextGetSampleIndex.reset(); - } else if (track->mNextGetSampleIndex.ref() >= end) { - track->mNextGetSampleIndex.ref() -= end - start; - } - } - - MSE_DEBUG("Removing undecodable frames from:%u (frames:%d) ([%f, %f))", - start, end - start, - removedInterval.mStart.ToSeconds(), removedInterval.mEnd.ToSeconds()); - track->mBufferedRanges -= removedInterval; - dataRemoved = true; - if (removeCurrentCodedFrameGroup) { - track->ResetAppendState(); - } - } + TimeIntervals removedInterval{TimeInterval(start, removeEndTimestamp)}; + RemoveFrames(removedInterval, *track, 0); // 5. If this object is in activeSourceBuffers, the current playback position // is greater than or equal to start and less than the remove end timestamp, @@ -609,20 +527,7 @@ TrackBuffersManager::CodedFrameRemoval(TimeInterval aInterval) // TODO properly, so it works even if paused. } - { - MonitorAutoLock mon(mMonitor); - mVideoBufferedRanges = mVideoTracks.mBufferedRanges; - mAudioBufferedRanges = mAudioTracks.mBufferedRanges; - } - - if (HasVideo()) { - MSE_DEBUG("after video ranges=%s", - DumpTimeRanges(mVideoTracks.mBufferedRanges).get()); - } - if (HasAudio()) { - MSE_DEBUG("after audio ranges=%s", - DumpTimeRanges(mAudioTracks.mBufferedRanges).get()); - } + UpdateBufferedRanges(); // Update our reported total size. mSizeSourceBuffer = mVideoTracks.mSizeBuffer + mAudioTracks.mSizeBuffer; @@ -639,6 +544,28 @@ TrackBuffersManager::CodedFrameRemoval(TimeInterval aInterval) return dataRemoved; } +void +TrackBuffersManager::UpdateBufferedRanges() +{ + MonitorAutoLock mon(mMonitor); + + mVideoBufferedRanges = mVideoTracks.mBufferedRanges; + mAudioBufferedRanges = mAudioTracks.mBufferedRanges; + +#if DEBUG + if (HasVideo()) { + MSE_DEBUG("after video ranges=%s", + DumpTimeRanges(mVideoTracks.mBufferedRanges).get()); + } + if (HasAudio()) { + MSE_DEBUG("after audio ranges=%s", + DumpTimeRanges(mAudioTracks.mBufferedRanges).get()); + } +#endif + + mOfficialGroupEndTimestamp = mGroupEndTimestamp; +} + nsRefPtr TrackBuffersManager::InitSegmentParserLoop() { @@ -668,6 +595,10 @@ TrackBuffersManager::AppendIncomingBuffers() mLastTimestampOffset = mTimestampOffset; } mIncomingBuffers.Clear(); + + mAppendWindow = + TimeInterval(TimeUnit::FromSeconds(mParent->AppendWindowStart()), + TimeUnit::FromSeconds(mParent->AppendWindowEnd())); } void @@ -1175,15 +1106,10 @@ TrackBuffersManager::CompleteCodedFrameProcessing() MSE_DEBUG("mAbort:%d", static_cast(mAbort)); // 1. For each coded frame in the media segment run the following steps: - - for (auto& sample : mVideoTracks.mQueuedSamples) { - while (true) { - if (!ProcessFrame(sample, mVideoTracks)) { - break; - } - } - } + // Coded Frame Processing steps 1.1 to 1.21. + ProcessFrames(mVideoTracks.mQueuedSamples, mVideoTracks); mVideoTracks.mQueuedSamples.Clear(); + #if defined(DEBUG) if (HasVideo()) { const auto& track = mVideoTracks.mBuffers.LastElement(); @@ -1195,14 +1121,9 @@ TrackBuffersManager::CompleteCodedFrameProcessing() } #endif - for (auto& sample : mAudioTracks.mQueuedSamples) { - while (true) { - if (!ProcessFrame(sample, mAudioTracks)) { - break; - } - } - } + ProcessFrames(mAudioTracks.mQueuedSamples, mAudioTracks); mAudioTracks.mQueuedSamples.Clear(); + #if defined(DEBUG) if (HasAudio()) { const auto& track = mAudioTracks.mBuffers.LastElement(); @@ -1214,23 +1135,7 @@ TrackBuffersManager::CompleteCodedFrameProcessing() } #endif - { - MonitorAutoLock mon(mMonitor); - - // Save our final tracks buffered ranges. - mVideoBufferedRanges = mVideoTracks.mBufferedRanges; - mAudioBufferedRanges = mAudioTracks.mBufferedRanges; - if (HasAudio()) { - MSE_DEBUG("audio new buffered range = %s", - DumpTimeRanges(mAudioBufferedRanges).get()); - } - if (HasVideo()) { - MSE_DEBUG("video new buffered range = %s", - DumpTimeRanges(mVideoBufferedRanges).get()); - } - - mOfficialGroupEndTimestamp = mGroupEndTimestamp; - } + UpdateBufferedRanges(); // Update our reported total size. mSizeSourceBuffer = mVideoTracks.mSizeBuffer + mAudioTracks.mSizeBuffer; @@ -1288,22 +1193,9 @@ TrackBuffersManager::ResolveProcessing(bool aResolveValue, const char* aName) mProcessingPromise.ResolveIfExists(aResolveValue, __func__); } -bool -TrackBuffersManager::ProcessFrame(MediaRawData* aSample, - TrackData& aTrackData) +void +TrackBuffersManager::CheckSequenceDiscontinuity() { - TimeUnit presentationTimestamp; - TimeUnit decodeTimestamp; - - if (!mParent->mGenerateTimestamp) { - presentationTimestamp = TimeUnit::FromMicroseconds(aSample->mTime); - decodeTimestamp = TimeUnit::FromMicroseconds(aSample->mTimecode); - } - - // 2. Let frame duration be a double precision floating point representation of the coded frame's duration in seconds. - TimeUnit frameDuration{TimeUnit::FromMicroseconds(aSample->mDuration)}; - - // 3. If mode equals "sequence" and group start timestamp is set, then run the following steps: if (mParent->mAppendMode == SourceBufferAppendMode::Sequence && mGroupStartTimestamp.isSome()) { mTimestampOffset = mGroupStartTimestamp.ref(); @@ -1312,93 +1204,253 @@ TrackBuffersManager::ProcessFrame(MediaRawData* aSample, mAudioTracks.mNeedRandomAccessPoint = true; mGroupStartTimestamp.reset(); } +} - // 4. If timestampOffset is not 0, then run the following steps: - if (mTimestampOffset != TimeUnit::FromSeconds(0)) { - presentationTimestamp += mTimestampOffset; - decodeTimestamp += mTimestampOffset; +void +TrackBuffersManager::ProcessFrames(TrackBuffer& aSamples, TrackData& aTrackData) +{ + if (!aSamples.Length()) { + return; } - MSE_DEBUGV("Processing %s frame(pts:%lld end:%lld, dts:%lld, duration:%lld, " - "kf:%d)", - aTrackData.mInfo->mMimeType.get(), - presentationTimestamp.ToMicroseconds(), - (presentationTimestamp + frameDuration).ToMicroseconds(), - decodeTimestamp.ToMicroseconds(), - frameDuration.ToMicroseconds(), - aSample->mKeyframe); + // 3. If mode equals "sequence" and group start timestamp is set, then run the following steps: + CheckSequenceDiscontinuity(); // 5. Let track buffer equal the track buffer that the coded frame will be added to. auto& trackBuffer = aTrackData; - // 6. If last decode timestamp for track buffer is set and decode timestamp is less than last decode timestamp: - // OR - // If last decode timestamp for track buffer is set and the difference between decode timestamp and last decode timestamp is greater than 2 times last frame duration: - - // TODO: Maybe we should be using TimeStamp and TimeDuration instead? - - // Some MP4 content may exhibit an extremely short frame duration. - // As such, we can't use the last frame duration as a way to detect - // discontinuities as required per step 6 above. - // Instead we use the biggest duration seen so far in this run (init + media - // segment). - if ((trackBuffer.mLastDecodeTimestamp.isSome() && - decodeTimestamp < trackBuffer.mLastDecodeTimestamp.ref()) || - (trackBuffer.mLastDecodeTimestamp.isSome() && - decodeTimestamp - trackBuffer.mLastDecodeTimestamp.ref() > 2*trackBuffer.mLongestFrameDuration.ref())) { - - // 1a. If mode equals "segments": - if (mParent->mAppendMode == SourceBufferAppendMode::Segments) { - // Set group end timestamp to presentation timestamp. - mGroupEndTimestamp = presentationTimestamp; - } - // 1b. If mode equals "sequence": - if (mParent->mAppendMode == SourceBufferAppendMode::Sequence) { - // Set group start timestamp equal to the group end timestamp. - mGroupStartTimestamp = Some(mGroupEndTimestamp); - } - for (auto& track : GetTracksList()) { - // 2. Unset the last decode timestamp on all track buffers. - // 3. Unset the last frame duration on all track buffers. - // 4. Unset the highest end timestamp on all track buffers. - // 5. Set the need random access point flag on all track buffers to true. - track->ResetAppendState(); - } - - MSE_DEBUG("Discontinuity detected. Restarting process"); - // 6. Jump to the Loop Top step above to restart processing of the current coded frame. - return true; - } - - // 7. Let frame end timestamp equal the sum of presentation timestamp and frame duration. - TimeUnit frameEndTimestamp = presentationTimestamp + frameDuration; - - // 8. If presentation timestamp is less than appendWindowStart, then set the need random access point flag to true, drop the coded frame, and jump to the top of the loop to start processing the next coded frame. - // 9. If frame end timestamp is greater than appendWindowEnd, then set the need random access point flag to true, drop the coded frame, and jump to the top of the loop to start processing the next coded frame. - - // We apply a fuzz search += mLongestFrameDuration to get around videos where + // We apply a fuzz search +- mLongestFrameDuration to get around videos where // the start time is negative but close to 0. TimeInterval targetWindow{ - TimeInterval(TimeUnit::FromSeconds(mParent->mAppendWindowStart), - TimeUnit::FromSeconds(mParent->mAppendWindowEnd), - trackBuffer.mLongestFrameDuration.valueOr(frameDuration))}; - TimeInterval frameInterval{presentationTimestamp, frameEndTimestamp}; + TimeInterval(mAppendWindow.mStart, mAppendWindow.mEnd, + trackBuffer.mLongestFrameDuration.refOr(TimeUnit::FromMicroseconds(aSamples[0]->mDuration)))}; - if (!targetWindow.Contains(frameInterval)) { - trackBuffer.mNeedRandomAccessPoint = true; - return false; - } + TimeIntervals samplesRange; + uint32_t sizeNewSamples = 0; + TrackBuffer samples; // array that will contain the frames to be added + // to our track buffer. - // 10. If the need random access point flag on track buffer equals true, then run the following steps: - if (trackBuffer.mNeedRandomAccessPoint) { - // 1. If the coded frame is not a random access point, then drop the coded frame and jump to the top of the loop to start processing the next coded frame. - if (!aSample->mKeyframe) { - return false; + // We assume that no frames are contiguous within a media segment and as such + // don't need to check for discontinuity except for the first frame and should + // a frame be ignored due to the target window. + bool needDiscontinuityCheck = true; + + for (auto& sample : aSamples) { + MSE_DEBUGV("Processing %s frame(pts:%lld end:%lld, dts:%lld, duration:%lld, " + "kf:%d)", + aTrackData.mInfo->mMimeType.get(), + sample->mTime, + sample->GetEndTime(), + sample->mTimecode, + sample->mDuration, + sample->mKeyframe); + + // We perform step 10 right away as we can't do anything should a keyframe + // be needed until we have one. + + // 10. If the need random access point flag on track buffer equals true, then run the following steps: + if (trackBuffer.mNeedRandomAccessPoint) { + // 1. If the coded frame is not a random access point, then drop the coded frame and jump to the top of the loop to start processing the next coded frame. + if (!sample->mKeyframe) { + continue; + } + // 2. Set the need random access point flag on track buffer to false. + trackBuffer.mNeedRandomAccessPoint = false; + } + + // We perform step 1,2 and 4 at once: + // 1. If generate timestamps flag equals true: + // Let presentation timestamp equal 0. + // Let decode timestamp equal 0. + // Otherwise: + // Let presentation timestamp be a double precision floating point representation of the coded frame's presentation timestamp in seconds. + // Let decode timestamp be a double precision floating point representation of the coded frame's decode timestamp in seconds. + + // 2. Let frame duration be a double precision floating point representation of the coded frame's duration in seconds. + // Step 3 is performed earlier or when a discontinuity has been detected. + // 4. If timestampOffset is not 0, then run the following steps: + + TimeInterval sampleInterval = + mParent->mGenerateTimestamps + ? TimeInterval(mTimestampOffset, + mTimestampOffset + TimeUnit::FromMicroseconds(sample->mDuration)) + : TimeInterval(TimeUnit::FromMicroseconds(sample->mTime) + mTimestampOffset, + TimeUnit::FromMicroseconds(sample->GetEndTime()) + mTimestampOffset); + TimeUnit decodeTimestamp = + mParent->mGenerateTimestamps + ? mTimestampOffset + : TimeUnit::FromMicroseconds(sample->mTimecode) + mTimestampOffset; + + // 6. If last decode timestamp for track buffer is set and decode timestamp is less than last decode timestamp: + // OR + // If last decode timestamp for track buffer is set and the difference between decode timestamp and last decode timestamp is greater than 2 times last frame duration: + + if (needDiscontinuityCheck && trackBuffer.mLastDecodeTimestamp.isSome() && + (decodeTimestamp < trackBuffer.mLastDecodeTimestamp.ref() || + decodeTimestamp - trackBuffer.mLastDecodeTimestamp.ref() > 2*trackBuffer.mLongestFrameDuration.ref())) { + MSE_DEBUG("Discontinuity detected."); + // 1a. If mode equals "segments": + if (mParent->mAppendMode == SourceBufferAppendMode::Segments) { + // Set group end timestamp to presentation timestamp. + mGroupEndTimestamp = sampleInterval.mStart; + } + // 1b. If mode equals "sequence": + if (mParent->mAppendMode == SourceBufferAppendMode::Sequence) { + // Set group start timestamp equal to the group end timestamp. + mGroupStartTimestamp = Some(mGroupEndTimestamp); + } + for (auto& track : GetTracksList()) { + // 2. Unset the last decode timestamp on all track buffers. + // 3. Unset the last frame duration on all track buffers. + // 4. Unset the highest end timestamp on all track buffers. + // 5. Set the need random access point flag on all track buffers to true. + track->ResetAppendState(); + } + // 6. Jump to the Loop Top step above to restart processing of the current coded frame. + // Rather that restarting the process for the frame, we run the first + // steps again instead. + // 3. If mode equals "sequence" and group start timestamp is set, then run the following steps: + CheckSequenceDiscontinuity(); + + if (!sample->mKeyframe) { + continue; + } + if (mParent->mAppendMode == SourceBufferAppendMode::Sequence) { + // mTimestampOffset was modified during CheckSequenceDiscontinuity. + // We need to update our variables. + sampleInterval = + mParent->mGenerateTimestamps + ? TimeInterval(mTimestampOffset, + mTimestampOffset + TimeUnit::FromMicroseconds(sample->mDuration)) + : TimeInterval(TimeUnit::FromMicroseconds(sample->mTime) + mTimestampOffset, + TimeUnit::FromMicroseconds(sample->GetEndTime()) + mTimestampOffset); + decodeTimestamp = + mParent->mGenerateTimestamps + ? mTimestampOffset + : TimeUnit::FromMicroseconds(sample->mTimecode) + mTimestampOffset; + } + trackBuffer.mNeedRandomAccessPoint = false; + needDiscontinuityCheck = false; + } + + // 7. Let frame end timestamp equal the sum of presentation timestamp and frame duration. + // This is sampleInterval.mEnd + + // 8. If presentation timestamp is less than appendWindowStart, then set the need random access point flag to true, drop the coded frame, and jump to the top of the loop to start processing the next coded frame. + // 9. If frame end timestamp is greater than appendWindowEnd, then set the need random access point flag to true, drop the coded frame, and jump to the top of the loop to start processing the next coded frame. + if (!targetWindow.Contains(sampleInterval)) { + if (samples.Length()) { + // We are creating a discontinuity in the samples. + // Insert the samples processed so far. + InsertFrames(samples, samplesRange, trackBuffer); + samples.Clear(); + samplesRange = TimeIntervals(); + trackBuffer.mSizeBuffer += sizeNewSamples; + sizeNewSamples = 0; + } + trackBuffer.mNeedRandomAccessPoint = true; + needDiscontinuityCheck = true; + continue; + } + + samplesRange += sampleInterval; + sizeNewSamples += sizeof(*sample) + sample->mSize; + sample->mTime = sampleInterval.mStart.ToMicroseconds(); + sample->mTimecode = decodeTimestamp.ToMicroseconds(); + sample->mTrackInfo = trackBuffer.mLastInfo; + samples.AppendElement(sample); + + // Steps 11,12,13,14, 15 and 16 will be done in one block in InsertFrames. + + // 17. Set last decode timestamp for track buffer to decode timestamp. + trackBuffer.mLastDecodeTimestamp = + Some(TimeUnit::FromMicroseconds(sample->mTimecode)); + // 18. Set last frame duration for track buffer to frame duration. + trackBuffer.mLastFrameDuration = + Some(TimeUnit::FromMicroseconds(sample->mDuration)); + + trackBuffer.mLongestFrameDuration = + Some(trackBuffer.mLongestFrameDuration.isNothing() + ? trackBuffer.mLastFrameDuration.ref() + : std::max(trackBuffer.mLastFrameDuration.ref(), + trackBuffer.mLongestFrameDuration.ref())); + + // 19. If highest end timestamp for track buffer is unset or frame end timestamp is greater than highest end timestamp, then set highest end timestamp for track buffer to frame end timestamp. + if (trackBuffer.mHighestEndTimestamp.isNothing() || + sampleInterval.mEnd > trackBuffer.mHighestEndTimestamp.ref()) { + trackBuffer.mHighestEndTimestamp = Some(sampleInterval.mEnd); + } + // 20. If frame end timestamp is greater than group end timestamp, then set group end timestamp equal to frame end timestamp. + if (sampleInterval.mEnd > mGroupEndTimestamp) { + mGroupEndTimestamp = sampleInterval.mEnd; + } + // 21. If generate timestamps flag equals true, then set timestampOffset equal to frame end timestamp. + if (mParent->mGenerateTimestamps) { + mTimestampOffset = sampleInterval.mEnd; } - // 2. Set the need random access point flag on track buffer to false. - trackBuffer.mNeedRandomAccessPoint = false; } + if (samples.Length()) { + InsertFrames(samples, samplesRange, trackBuffer); + trackBuffer.mSizeBuffer += sizeNewSamples; + } +} + +void +TrackBuffersManager::CheckNextInsertionIndex(TrackData& aTrackData, + const TimeUnit& aSampleTime) +{ + if (aTrackData.mNextInsertionIndex.isSome()) { + return; + } + + TrackBuffer& data = aTrackData.mBuffers.LastElement(); + + if (data.IsEmpty() || aSampleTime < aTrackData.mBufferedRanges.GetStart()) { + aTrackData.mNextInsertionIndex = Some(size_t(0)); + return; + } + + // Find which discontinuity we should insert the frame before. + TimeInterval target; + for (const auto& interval : aTrackData.mBufferedRanges) { + if (aSampleTime < interval.mStart) { + target = interval; + break; + } + } + if (target.IsEmpty()) { + // No target found, it will be added at the end of the track buffer. + aTrackData.mNextInsertionIndex = Some(data.Length()); + return; + } + for (uint32_t i = 0; i < data.Length(); i++) { + const nsRefPtr& sample = data[i]; + TimeInterval sampleInterval{ + TimeUnit::FromMicroseconds(sample->mTime), + TimeUnit::FromMicroseconds(sample->GetEndTime())}; + if (target.Intersects(sampleInterval)) { + aTrackData.mNextInsertionIndex = Some(size_t(i)); + return; + } + } + MOZ_CRASH("Insertion Index Not Found"); +} + +void +TrackBuffersManager::InsertFrames(TrackBuffer& aSamples, + const TimeIntervals& aIntervals, + TrackData& aTrackData) +{ + // 5. Let track buffer equal the track buffer that the coded frame will be added to. + auto& trackBuffer = aTrackData; + + MSE_DEBUGV("Processing %d %s frames(start:%lld end:%lld)", + aSamples.Length(), + aTrackData.mInfo->mMimeType.get(), + aIntervals.GetStart().ToMicroseconds(), + aIntervals.GetEnd().ToMicroseconds()); + // TODO: Handle splicing of audio (and text) frames. // 11. Let spliced audio frame be an unset variable for holding audio splice information // 12. Let spliced timed text frame be an unset variable for holding timed text splice information @@ -1416,180 +1468,121 @@ TrackBuffersManager::ProcessFrame(MediaRawData* aSample, // There is an ambiguity on how to remove frames, which was lodged with: // https://www.w3.org/Bugs/Public/show_bug.cgi?id=28710, implementing as per // bug description. - Maybe firstRemovedIndex; - TimeInterval removedInterval; - TrackBuffer& data = trackBuffer.mBuffers.LastElement(); - bool removeCodedFrames = - trackBuffer.mHighestEndTimestamp.isSome() - ? trackBuffer.mHighestEndTimestamp.ref() <= presentationTimestamp - : true; - if (removeCodedFrames) { - TimeUnit lowerBound = - trackBuffer.mHighestEndTimestamp.valueOr(presentationTimestamp); - if (trackBuffer.mBufferedRanges.ContainsStrict(lowerBound)) { - for (uint32_t i = 0; i < data.Length();) { - MediaRawData* sample = data[i].get(); - if (sample->mTime >= lowerBound.ToMicroseconds() && - sample->mTime < frameEndTimestamp.ToMicroseconds()) { - if (firstRemovedIndex.isNothing()) { - removedInterval = - TimeInterval(TimeUnit::FromMicroseconds(sample->mTime), - TimeUnit::FromMicroseconds(sample->GetEndTime())); - firstRemovedIndex = Some(i); - } else { - removedInterval = removedInterval.Span( - TimeInterval(TimeUnit::FromMicroseconds(sample->mTime), - TimeUnit::FromMicroseconds(sample->GetEndTime()))); - } - trackBuffer.mSizeBuffer -= sizeof(*sample) + sample->mSize; - MSE_DEBUGV("Overlapping frame:%u ([%f, %f))", - i, - TimeUnit::FromMicroseconds(sample->mTime).ToSeconds(), - TimeUnit::FromMicroseconds(sample->GetEndTime()).ToSeconds()); - data.RemoveElementAt(i); - if (trackBuffer.mNextGetSampleIndex.isSome()) { - if (trackBuffer.mNextGetSampleIndex.ref() == i) { - MSE_DEBUG("Next sample to be played got evicted"); - trackBuffer.mNextGetSampleIndex.reset(); - } else if (trackBuffer.mNextGetSampleIndex.ref() > i) { - trackBuffer.mNextGetSampleIndex.ref()--; - } - } - } else { - i++; - } - } - } - // 15. Remove decoding dependencies of the coded frames removed in the previous step: - // Remove all coded frames between the coded frames removed in the previous step and the next random access point after those removed frames. - if (firstRemovedIndex.isSome()) { - uint32_t start = firstRemovedIndex.ref(); - uint32_t end = start; - for (;end < data.Length(); end++) { - MediaRawData* sample = data[end].get(); - if (sample->mKeyframe) { - break; - } - removedInterval = removedInterval.Span( - TimeInterval(TimeUnit::FromMicroseconds(sample->mTime), - TimeUnit::FromMicroseconds(sample->GetEndTime()))); - trackBuffer.mSizeBuffer -= sizeof(*sample) + sample->mSize; - } - data.RemoveElementsAt(start, end - start); + // 15. Remove decoding dependencies of the coded frames removed in the previous step: + // Remove all coded frames between the coded frames removed in the previous step and the next random access point after those removed frames. - MSE_DEBUG("Removing undecodable frames from:%u (frames:%u) ([%f, %f))", - start, end - start, - removedInterval.mStart.ToSeconds(), removedInterval.mEnd.ToSeconds()); + TimeIntervals intersection = trackBuffer.mBufferedRanges; + intersection.Intersection(aIntervals); - if (trackBuffer.mNextGetSampleIndex.isSome()) { - if (trackBuffer.mNextGetSampleIndex.ref() >= start && - trackBuffer.mNextGetSampleIndex.ref() < end) { - MSE_DEBUG("Next sample to be played got evicted"); - trackBuffer.mNextGetSampleIndex.reset(); - } else if (trackBuffer.mNextGetSampleIndex.ref() >= end) { - trackBuffer.mNextGetSampleIndex.ref() -= end - start; - } - } - - // Update our buffered range to exclude the range just removed. - trackBuffer.mBufferedRanges -= removedInterval; - MOZ_ASSERT(trackBuffer.mNextInsertionIndex.isNothing() || - trackBuffer.mNextInsertionIndex.ref() <= start); - } + if (intersection.Length()) { + RemoveFrames(aIntervals, trackBuffer, trackBuffer.mNextInsertionIndex.refOr(0)); } // 16. Add the coded frame with the presentation timestamp, decode timestamp, and frame duration to the track buffer. - aSample->mTime = presentationTimestamp.ToMicroseconds(); - aSample->mTimecode = decodeTimestamp.ToMicroseconds(); - aSample->mTrackInfo = trackBuffer.mLastInfo; + CheckNextInsertionIndex(aTrackData, + TimeUnit::FromMicroseconds(aSamples[0]->mTime)); - if (data.IsEmpty()) { - data.AppendElement(aSample); - MOZ_ASSERT(aSample->mKeyframe); - trackBuffer.mNextInsertionIndex = Some(data.Length()); - } else if (trackBuffer.mNextInsertionIndex.isSome()) { - data.InsertElementAt(trackBuffer.mNextInsertionIndex.ref(), aSample); - MOZ_ASSERT(trackBuffer.mNextInsertionIndex.ref() == 0 || - data[trackBuffer.mNextInsertionIndex.ref()]->mTrackInfo->GetID() == data[trackBuffer.mNextInsertionIndex.ref()-1]->mTrackInfo->GetID() || - data[trackBuffer.mNextInsertionIndex.ref()]->mKeyframe); - trackBuffer.mNextInsertionIndex.ref()++; - } else if (presentationTimestamp < trackBuffer.mBufferedRanges.GetStart()) { - data.InsertElementAt(0, aSample); - MOZ_ASSERT(aSample->mKeyframe); - trackBuffer.mNextInsertionIndex = Some(size_t(1)); - } else { - // Find which discontinuity we should insert the frame before. - TimeInterval target; - for (const auto& interval : trackBuffer.mBufferedRanges) { - if (presentationTimestamp < interval.mStart) { - target = interval; - break; - } - } - if (target.IsEmpty()) { - // No existing ranges found after our frame presentation time. - // Insert frame at the end of array. - data.AppendElement(aSample); - MOZ_ASSERT(data.Length() <= 2 || - data[data.Length()-1]->mTrackInfo->GetID() == data[data.Length()-2]->mTrackInfo->GetID() || - data[data.Length()-1]->mKeyframe); - trackBuffer.mNextInsertionIndex = Some(data.Length()); - } - for (uint32_t i = 0; i < data.Length(); i++) { - const nsRefPtr& sample = data[i]; - TimeInterval sampleInterval{ - TimeUnit::FromMicroseconds(sample->mTime), - TimeUnit::FromMicroseconds(sample->GetEndTime())}; - if (target.Intersects(sampleInterval)) { - data.InsertElementAt(i, aSample); - MOZ_ASSERT(i != 0 && - (data[i]->mTrackInfo->GetID() == data[i-1]->mTrackInfo->GetID() || - data[i]->mKeyframe)); - trackBuffer.mNextInsertionIndex = Some(size_t(i) + 1); - break; - } - } - MOZ_ASSERT(aSample->mKeyframe); - } - trackBuffer.mSizeBuffer += sizeof(*aSample) + aSample->mSize; - - // 17. Set last decode timestamp for track buffer to decode timestamp. - trackBuffer.mLastDecodeTimestamp = Some(decodeTimestamp); - // 18. Set last frame duration for track buffer to frame duration. - trackBuffer.mLastFrameDuration = - Some(TimeUnit::FromMicroseconds(aSample->mDuration)); - - if (trackBuffer.mLongestFrameDuration.isNothing()) { - trackBuffer.mLongestFrameDuration = trackBuffer.mLastFrameDuration; - } else { - trackBuffer.mLongestFrameDuration = - Some(std::max(trackBuffer.mLongestFrameDuration.ref(), - trackBuffer.mLastFrameDuration.ref())); - } - - // 19. If highest end timestamp for track buffer is unset or frame end timestamp is greater than highest end timestamp, then set highest end timestamp for track buffer to frame end timestamp. - if (trackBuffer.mHighestEndTimestamp.isNothing() || - frameEndTimestamp > trackBuffer.mHighestEndTimestamp.ref()) { - trackBuffer.mHighestEndTimestamp = Some(frameEndTimestamp); - } - // 20. If frame end timestamp is greater than group end timestamp, then set group end timestamp equal to frame end timestamp. - if (frameEndTimestamp > mGroupEndTimestamp) { - mGroupEndTimestamp = frameEndTimestamp; - } - // 21. If generate timestamps flag equals true, then set timestampOffset equal to frame end timestamp. - if (mParent->mGenerateTimestamp) { - mTimestampOffset = frameEndTimestamp; - } + TrackBuffer& data = trackBuffer.mBuffers.LastElement(); + data.InsertElementsAt(trackBuffer.mNextInsertionIndex.ref(), aSamples); + trackBuffer.mNextInsertionIndex.ref() += aSamples.Length(); // Update our buffered range with new sample interval. // We allow a fuzz factor in our interval of half a frame length, // as fuzz is +/- value, giving an effective leeway of a full frame // length. - trackBuffer.mBufferedRanges += - TimeInterval(presentationTimestamp, frameEndTimestamp, - TimeUnit::FromMicroseconds(aSample->mDuration / 2)); - return false; + TimeIntervals range(aIntervals); + range.SetFuzz(trackBuffer.mLongestFrameDuration.ref() / 2); + trackBuffer.mBufferedRanges += range; +} + +void +TrackBuffersManager::RemoveFrames(const TimeIntervals& aIntervals, + TrackData& aTrackData, + uint32_t aStartIndex) +{ + TrackBuffer& data = aTrackData.mBuffers.LastElement(); + Maybe firstRemovedIndex; + uint32_t lastRemovedIndex; + + // We loop from aStartIndex to avoid removing frames that we inserted earlier + // and part of the current coded frame group. This is allows to handle step + // 14 of the coded frame processing algorithm without having to check the value + // of highest end timestamp: + // "Remove existing coded frames in track buffer: + // If highest end timestamp for track buffer is not set: + // Remove all coded frames from track buffer that have a presentation timestamp greater than or equal to presentation timestamp and less than frame end timestamp. + // If highest end timestamp for track buffer is set and less than or equal to presentation timestamp: + // Remove all coded frames from track buffer that have a presentation timestamp greater than or equal to highest end timestamp and less than frame end timestamp" + for (uint32_t i = aStartIndex; i < data.Length(); i++) { + MediaRawData* sample = data[i].get(); + TimeInterval sampleInterval = + TimeInterval(TimeUnit::FromMicroseconds(sample->mTime), + TimeUnit::FromMicroseconds(sample->GetEndTime())); + if (aIntervals.Contains(sampleInterval)) { + if (firstRemovedIndex.isNothing()) { + firstRemovedIndex = Some(i); + } + lastRemovedIndex = i; + } + } + + if (firstRemovedIndex.isNothing()) { + return; + } + + // Remove decoding dependencies of the coded frames removed in the previous step: + // Remove all coded frames between the coded frames removed in the previous step and the next random access point after those removed frames. + for (uint32_t i = lastRemovedIndex + 1; i < data.Length(); i++) { + MediaRawData* sample = data[i].get(); + if (sample->mKeyframe) { + break; + } + lastRemovedIndex = i; + } + + TimeIntervals removedIntervals; + for (uint32_t i = firstRemovedIndex.ref(); i <= lastRemovedIndex; i++) { + MediaRawData* sample = data[i].get(); + TimeInterval sampleInterval = + TimeInterval(TimeUnit::FromMicroseconds(sample->mTime), + TimeUnit::FromMicroseconds(sample->GetEndTime())); + removedIntervals += sampleInterval; + aTrackData.mSizeBuffer -= sizeof(*sample) + sample->mSize; + } + + MSE_DEBUG("Removing frames from:%u (frames:%u) ([%f, %f))", + firstRemovedIndex.ref(), + lastRemovedIndex - firstRemovedIndex.ref() + 1, + removedIntervals.GetStart().ToSeconds(), + removedIntervals.GetEnd().ToSeconds()); + + if (aTrackData.mNextGetSampleIndex.isSome()) { + if (aTrackData.mNextGetSampleIndex.ref() >= firstRemovedIndex.ref() && + aTrackData.mNextGetSampleIndex.ref() <= lastRemovedIndex) { + MSE_DEBUG("Next sample to be played got evicted"); + aTrackData.mNextGetSampleIndex.reset(); + } else if (aTrackData.mNextGetSampleIndex.ref() > lastRemovedIndex) { + aTrackData.mNextGetSampleIndex.ref() -= + lastRemovedIndex - firstRemovedIndex.ref() + 1; + } + } + + if (aTrackData.mNextInsertionIndex.isSome()) { + if (aTrackData.mNextInsertionIndex.ref() > firstRemovedIndex.ref() && + aTrackData.mNextInsertionIndex.ref() <= lastRemovedIndex + 1) { + aTrackData.ResetAppendState(); + } else if (aTrackData.mNextInsertionIndex.ref() > lastRemovedIndex + 1) { + aTrackData.mNextInsertionIndex.ref() -= + lastRemovedIndex - firstRemovedIndex.ref() + 1; + } + } + + // Update our buffered range to exclude the range just removed. + aTrackData.mBufferedRanges -= removedIntervals; + + data.RemoveElementsAt(firstRemovedIndex.ref(), + lastRemovedIndex - firstRemovedIndex.ref() + 1); } void diff --git a/dom/media/mediasource/TrackBuffersManager.h b/dom/media/mediasource/TrackBuffersManager.h index d9f803809cf..4486b7f22a8 100644 --- a/dom/media/mediasource/TrackBuffersManager.h +++ b/dom/media/mediasource/TrackBuffersManager.h @@ -255,7 +255,17 @@ private: } }; - bool ProcessFrame(MediaRawData* aSample, TrackData& aTrackData); + void CheckSequenceDiscontinuity(); + void ProcessFrames(TrackBuffer& aSamples, TrackData& aTrackData); + void CheckNextInsertionIndex(TrackData& aTrackData, + const TimeUnit& aSampleTime); + void InsertFrames(TrackBuffer& aSamples, + const TimeIntervals& aIntervals, + TrackData& aTrackData); + void RemoveFrames(const TimeIntervals& aIntervals, + TrackData& aTrackData, + uint32_t aStartIndex); + void UpdateBufferedRanges(); void RejectProcessing(nsresult aRejectValue, const char* aName); void ResolveProcessing(bool aResolveValue, const char* aName); MediaPromiseRequestHolder mProcessingRequest; @@ -292,6 +302,7 @@ private: } RefPtr mTaskQueue; + TimeInterval mAppendWindow; TimeUnit mTimestampOffset; TimeUnit mLastTimestampOffset; void RestoreCachedVariables(); From 1bfb6b99d3dc21a1ce3eefa82c0860d5f0c73861 Mon Sep 17 00:00:00 2001 From: Jean-Yves Avenard Date: Mon, 29 Jun 2015 06:46:11 +1200 Subject: [PATCH 20/61] Bug 1171760: P5. Add MediaSourceSamples logging. r=cpearce This allows for MediaSource log to be usable as displaying log samples is extremely verbose. Fix static initialization. --- dom/media/mediasource/MediaSource.cpp | 4 ++-- dom/media/mediasource/SourceBufferResource.cpp | 2 +- dom/media/mediasource/TrackBuffersManager.cpp | 14 +++++++++++++- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/dom/media/mediasource/MediaSource.cpp b/dom/media/mediasource/MediaSource.cpp index 5c56a715efc..d6b4db85c23 100644 --- a/dom/media/mediasource/MediaSource.cpp +++ b/dom/media/mediasource/MediaSource.cpp @@ -41,7 +41,7 @@ class JSObject; PRLogModuleInfo* GetMediaSourceLog() { - static PRLogModuleInfo* sLogModule; + static PRLogModuleInfo* sLogModule = nullptr; if (!sLogModule) { sLogModule = PR_NewLogModule("MediaSource"); } @@ -50,7 +50,7 @@ PRLogModuleInfo* GetMediaSourceLog() PRLogModuleInfo* GetMediaSourceAPILog() { - static PRLogModuleInfo* sLogModule; + static PRLogModuleInfo* sLogModule = nullptr; if (!sLogModule) { sLogModule = PR_NewLogModule("MediaSource"); } diff --git a/dom/media/mediasource/SourceBufferResource.cpp b/dom/media/mediasource/SourceBufferResource.cpp index 4f91ed3afaa..3f787ecbf18 100644 --- a/dom/media/mediasource/SourceBufferResource.cpp +++ b/dom/media/mediasource/SourceBufferResource.cpp @@ -15,7 +15,7 @@ PRLogModuleInfo* GetSourceBufferResourceLog() { - static PRLogModuleInfo* sLogModule; + static PRLogModuleInfo* sLogModule = nullptr; if (!sLogModule) { sLogModule = PR_NewLogModule("SourceBufferResource"); } diff --git a/dom/media/mediasource/TrackBuffersManager.cpp b/dom/media/mediasource/TrackBuffersManager.cpp index c8745de494f..2fc8cfd9a78 100644 --- a/dom/media/mediasource/TrackBuffersManager.cpp +++ b/dom/media/mediasource/TrackBuffersManager.cpp @@ -20,6 +20,16 @@ extern PRLogModuleInfo* GetMediaSourceLog(); #define MSE_DEBUG(arg, ...) MOZ_LOG(GetMediaSourceLog(), mozilla::LogLevel::Debug, ("TrackBuffersManager(%p:%s)::%s: " arg, this, mType.get(), __func__, ##__VA_ARGS__)) #define MSE_DEBUGV(arg, ...) MOZ_LOG(GetMediaSourceLog(), mozilla::LogLevel::Verbose, ("TrackBuffersManager(%p:%s)::%s: " arg, this, mType.get(), __func__, ##__VA_ARGS__)) +PRLogModuleInfo* GetMediaSourceSamplesLog() +{ + static PRLogModuleInfo* sLogModule = nullptr; + if (!sLogModule) { + sLogModule = PR_NewLogModule("MediaSourceSamples"); + } + return sLogModule; +} +#define SAMPLE_DEBUG(arg, ...) MOZ_LOG(GetMediaSourceSamplesLog(), mozilla::LogLevel::Debug, ("TrackBuffersManager(%p:%s)::%s: " arg, this, mType.get(), __func__, ##__VA_ARGS__)) + namespace mozilla { static const char* @@ -1236,7 +1246,7 @@ TrackBuffersManager::ProcessFrames(TrackBuffer& aSamples, TrackData& aTrackData) bool needDiscontinuityCheck = true; for (auto& sample : aSamples) { - MSE_DEBUGV("Processing %s frame(pts:%lld end:%lld, dts:%lld, duration:%lld, " + SAMPLE_DEBUG("Processing %s frame(pts:%lld end:%lld, dts:%lld, duration:%lld, " "kf:%d)", aTrackData.mInfo->mMimeType.get(), sample->mTime, @@ -1875,3 +1885,5 @@ TrackBuffersManager::GetNextRandomAccessPoint(TrackInfo::TrackType aTrack) } #undef MSE_DEBUG +#undef MSE_DEBUGV +#undef SAMPLE_DEBUG From c8e9e025718425535967caf39d26f92f1bba39aa Mon Sep 17 00:00:00 2001 From: Jean-Yves Avenard Date: Mon, 29 Jun 2015 07:31:29 +1200 Subject: [PATCH 21/61] Bug 1171760: P6. Use MediaSourceSamples logging in ContainerParser. r=cpearce --- dom/media/mediasource/ContainerParser.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dom/media/mediasource/ContainerParser.cpp b/dom/media/mediasource/ContainerParser.cpp index 3ee7a756e0e..833b2b3e92c 100644 --- a/dom/media/mediasource/ContainerParser.cpp +++ b/dom/media/mediasource/ContainerParser.cpp @@ -19,7 +19,7 @@ #endif #include "SourceBufferResource.h" -extern PRLogModuleInfo* GetMediaSourceLog(); +extern PRLogModuleInfo* GetMediaSourceSamplesLog(); /* Polyfill __func__ on MSVC to pass to the log. */ #ifdef _MSC_VER @@ -28,8 +28,8 @@ extern PRLogModuleInfo* GetMediaSourceLog(); #define STRINGIFY(x) #x #define TOSTRING(x) STRINGIFY(x) -#define MSE_DEBUG(name, arg, ...) MOZ_LOG(GetMediaSourceLog(), mozilla::LogLevel::Debug, (TOSTRING(name) "(%p:%s)::%s: " arg, this, mType.get(), __func__, ##__VA_ARGS__)) -#define MSE_DEBUGV(name, arg, ...) MOZ_LOG(GetMediaSourceLog(), mozilla::LogLevel::Verbose, (TOSTRING(name) "(%p:%s)::%s: " arg, this, mType.get(), __func__, ##__VA_ARGS__)) +#define MSE_DEBUG(name, arg, ...) MOZ_LOG(GetMediaSourceSamplesLog(), mozilla::LogLevel::Debug, (TOSTRING(name) "(%p:%s)::%s: " arg, this, mType.get(), __func__, ##__VA_ARGS__)) +#define MSE_DEBUGV(name, arg, ...) MOZ_LOG(GetMediaSourceSamplesLog(), mozilla::LogLevel::Verbose, (TOSTRING(name) "(%p:%s)::%s: " arg, this, mType.get(), __func__, ##__VA_ARGS__)) namespace mozilla { From 236fc43884bc4a032cd719dff7f113bed7bf0e3b Mon Sep 17 00:00:00 2001 From: Ben Turner Date: Tue, 30 Jun 2015 18:55:53 -0700 Subject: [PATCH 22/61] Bug 1167431 - Kill child processes that misuse PBackground, r=billm. --- ipc/glue/BackgroundImpl.cpp | 100 ++++++++++++++++++++++++++++-------- 1 file changed, 79 insertions(+), 21 deletions(-) diff --git a/ipc/glue/BackgroundImpl.cpp b/ipc/glue/BackgroundImpl.cpp index 10db4325826..7c96f485438 100644 --- a/ipc/glue/BackgroundImpl.cpp +++ b/ipc/glue/BackgroundImpl.cpp @@ -292,7 +292,13 @@ private: mLiveActorArray->AppendElement(this); } + already_AddRefed + GetContentParent() const; + // These methods are only called by IPDL. + virtual void + ProcessingError(Result aCode, const char* aReason) override; + virtual IToplevelProtocol* CloneToplevel(const InfallibleTArray& aFds, ProcessHandle aPeerProcess, @@ -1014,27 +1020,7 @@ ParentImpl::GetContentParent(PBackgroundParent* aBackgroundActor) AssertIsOnBackgroundThread(); MOZ_ASSERT(aBackgroundActor); - auto actor = static_cast(aBackgroundActor); - if (actor->mActorDestroyed) { - MOZ_ASSERT(false, "GetContentParent called after ActorDestroy was called!"); - return nullptr; - } - - if (actor->mContent) { - // We need to hand out a reference to our ContentParent but we also need to - // keep the one we have. We can't call AddRef here because ContentParent is - // not threadsafe so instead we dispatch a runnable to the main thread to do - // it for us. This is safe since we are guaranteed that our AddRef runnable - // will run before the reference we hand out can be released, and the - // ContentParent can't die as long as the existing reference is maintained. - nsCOMPtr runnable = - NS_NewNonOwningRunnableMethod(actor->mContent, &ContentParent::AddRef); - MOZ_ASSERT(runnable); - - MOZ_ALWAYS_TRUE(NS_SUCCEEDED(NS_DispatchToMainThread(runnable))); - } - - return already_AddRefed(actor->mContent.get()); + return static_cast(aBackgroundActor)->GetContentParent(); } // static @@ -1328,6 +1314,78 @@ ParentImpl::MainThreadActorDestroy() Release(); } +already_AddRefed +ParentImpl::GetContentParent() const +{ + if (mActorDestroyed) { + MOZ_ASSERT(false, "GetContentParent called after ActorDestroy was called!"); + return nullptr; + } + + if (mContent) { + // We need to hand out a reference to our ContentParent but we also need to + // keep the one we have. We can't call AddRef here because ContentParent is + // not threadsafe so instead we dispatch a runnable to the main thread to do + // it for us. This is safe since we are guaranteed that our AddRef runnable + // will run before the reference we hand out can be released, and the + // ContentParent can't die as long as the existing reference is maintained. + nsCOMPtr runnable = + NS_NewNonOwningRunnableMethod(mContent, &ContentParent::AddRef); + MOZ_ASSERT(runnable); + + MOZ_ALWAYS_TRUE(NS_SUCCEEDED(NS_DispatchToMainThread(runnable))); + } + + return already_AddRefed(mContent.get()); +} + +void +ParentImpl::ProcessingError(Result aCode, const char* aReason) +{ + AssertIsInMainProcess(); + AssertIsOnBackgroundThread(); + MOZ_ASSERT(!mActorDestroyed); + + BackgroundParentImpl::ProcessingError(aCode, aReason); + + if (!mIsOtherProcessActor) { + // Warning is about all we can really do here, short of intentionally + // crashing the parent process. + return; + } + + if (aCode == MsgDropped) { + // Ignore this; it just means that the child process can't receive any + // more messages. + return; + } + + nsRefPtr content = GetContentParent(); + if (NS_WARN_IF(!content)) { + return; + } + + // Transfer ownership to the lambda. + ContentParent* owningContent = content.forget().take(); + nsCString owningReason(aReason); + + nsCOMPtr runnable = NS_NewRunnableFunction( + [owningContent, owningReason]() + { + MOZ_ASSERT(NS_IsMainThread()); + + // Transfer ownership back to the stack. + nsRefPtr content = dont_AddRef(owningContent); + MOZ_ASSERT(content); + + content->KillHard(owningReason.get()); + } + ); + MOZ_ASSERT(runnable); + + MOZ_ALWAYS_TRUE(NS_SUCCEEDED(NS_DispatchToMainThread(runnable))); +} + IToplevelProtocol* ParentImpl::CloneToplevel(const InfallibleTArray& aFds, ProcessHandle aPeerProcess, From 22cae9bc6b0826c187f836ec9bf97ff4b35957fc Mon Sep 17 00:00:00 2001 From: Seth Fowler Date: Tue, 30 Jun 2015 18:57:03 -0700 Subject: [PATCH 23/61] Bug 1139641 - Return more information from SurfaceCache::Lookup and SurfaceCache::LookupBestMatch. r=dholbert --- image/FrameAnimator.cpp | 19 +++++++----- image/FrameAnimator.h | 4 +-- image/LookupResult.h | 69 +++++++++++++++++++++++++++++++++++++++++ image/RasterImage.cpp | 48 ++++++++++++++-------------- image/RasterImage.h | 7 +++-- image/SurfaceCache.cpp | 51 +++++++++++++++++------------- image/SurfaceCache.h | 33 +++++++++++--------- image/VectorImage.cpp | 9 +++--- image/imgFrame.h | 4 +++ 9 files changed, 168 insertions(+), 76 deletions(-) create mode 100644 image/LookupResult.h diff --git a/image/FrameAnimator.cpp b/image/FrameAnimator.cpp index 1589792ba17..c4cf5fb3a60 100644 --- a/image/FrameAnimator.cpp +++ b/image/FrameAnimator.cpp @@ -8,6 +8,7 @@ #include "mozilla/MemoryReporting.h" #include "mozilla/Move.h" #include "imgIContainer.h" +#include "LookupResult.h" #include "MainThreadUtils.h" #include "RasterImage.h" @@ -265,25 +266,27 @@ FrameAnimator::GetFirstFrameRefreshArea() const return mFirstFrameRefreshArea; } -DrawableFrameRef +LookupResult FrameAnimator::GetCompositedFrame(uint32_t aFrameNum) { MOZ_ASSERT(aFrameNum != 0, "First frame is never composited"); // If we have a composited version of this frame, return that. if (mLastCompositedFrameIndex == int32_t(aFrameNum)) { - return mCompositingFrame->DrawableRef(); + return LookupResult(mCompositingFrame->DrawableRef(), + /* aIsExactMatch = */ true); } // Otherwise return the raw frame. DoBlend is required to ensure that we only // hit this case if the frame is not paletted and doesn't require compositing. - DrawableFrameRef ref = + LookupResult result = SurfaceCache::Lookup(ImageKey(mImage), RasterSurfaceKey(mSize, 0, // Default decode flags. aFrameNum)); - MOZ_ASSERT(!ref || !ref->GetIsPaletted(), "About to return a paletted frame"); - return ref; + MOZ_ASSERT(!result || !result.DrawableRef()->GetIsPaletted(), + "About to return a paletted frame"); + return result; } int32_t @@ -367,13 +370,13 @@ FrameAnimator::CollectSizeOfCompositingSurfaces( RawAccessFrameRef FrameAnimator::GetRawFrame(uint32_t aFrameNum) const { - DrawableFrameRef ref = + LookupResult result = SurfaceCache::Lookup(ImageKey(mImage), RasterSurfaceKey(mSize, 0, // Default decode flags. aFrameNum)); - return ref ? ref->RawAccessRef() - : RawAccessFrameRef(); + return result ? result.DrawableRef()->RawAccessRef() + : RawAccessFrameRef(); } //****************************************************************************** diff --git a/image/FrameAnimator.h b/image/FrameAnimator.h index 00fccf54776..a404d01c714 100644 --- a/image/FrameAnimator.h +++ b/image/FrameAnimator.h @@ -128,10 +128,10 @@ public: /** * If we have a composited frame for @aFrameNum, returns it. Otherwise, - * returns an empty DrawableFrameRef. It is an error to call this method with + * returns an empty LookupResult. It is an error to call this method with * aFrameNum == 0, because the first frame is never composited. */ - DrawableFrameRef GetCompositedFrame(uint32_t aFrameNum); + LookupResult GetCompositedFrame(uint32_t aFrameNum); /* * Returns the frame's adjusted timeout. If the animation loops and the diff --git a/image/LookupResult.h b/image/LookupResult.h new file mode 100644 index 00000000000..91fb2b4033a --- /dev/null +++ b/image/LookupResult.h @@ -0,0 +1,69 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/** + * LookupResult is the return type of SurfaceCache's Lookup*() functions. It + * combines a surface with relevant metadata tracked by SurfaceCache. + */ + +#ifndef mozilla_image_LookupResult_h +#define mozilla_image_LookupResult_h + +#include "mozilla/Attributes.h" +#include "mozilla/Move.h" +#include "imgFrame.h" + +namespace mozilla { +namespace image { + +/** + * LookupResult is the return type of SurfaceCache's Lookup*() functions. It + * combines a surface with relevant metadata tracked by SurfaceCache. + */ +class MOZ_STACK_CLASS LookupResult +{ +public: + LookupResult() + : mIsExactMatch(false) + { } + + LookupResult(LookupResult&& aOther) + : mDrawableRef(Move(aOther.mDrawableRef)) + , mIsExactMatch(aOther.mIsExactMatch) + { } + + LookupResult(DrawableFrameRef&& aDrawableRef, bool aIsExactMatch) + : mDrawableRef(Move(aDrawableRef)) + , mIsExactMatch(aIsExactMatch) + { } + + LookupResult& operator=(LookupResult&& aOther) + { + MOZ_ASSERT(&aOther != this, "Self-move-assignment is not supported"); + mDrawableRef = Move(aOther.mDrawableRef); + mIsExactMatch = aOther.mIsExactMatch; + return *this; + } + + DrawableFrameRef& DrawableRef() { return mDrawableRef; } + const DrawableFrameRef& DrawableRef() const { return mDrawableRef; } + + /// @return true if this LookupResult contains a surface. + explicit operator bool() const { return bool(mDrawableRef); } + + /// @return true if the surface is an exact match for the Lookup*() arguments. + bool IsExactMatch() const { return mIsExactMatch; } + +private: + LookupResult(const LookupResult&) = delete; + + DrawableFrameRef mDrawableRef; + bool mIsExactMatch; +}; + +} // namespace image +} // namespace mozilla + +#endif // mozilla_image_LookupResult_h diff --git a/image/RasterImage.cpp b/image/RasterImage.cpp index 06bf5e9395d..603af3d88e0 100644 --- a/image/RasterImage.cpp +++ b/image/RasterImage.cpp @@ -20,6 +20,7 @@ #include "ImageContainer.h" #include "ImageRegion.h" #include "Layers.h" +#include "LookupResult.h" #include "nsPresContext.h" #include "SourceBuffer.h" #include "SurfaceCache.h" @@ -469,7 +470,7 @@ RasterImage::GetType(uint16_t* aType) return NS_OK; } -DrawableFrameRef +LookupResult RasterImage::LookupFrameInternal(uint32_t aFrameNum, const IntSize& aSize, uint32_t aFlags) @@ -522,14 +523,14 @@ RasterImage::LookupFrame(uint32_t aFrameNum, IntSize requestedSize = CanDownscaleDuringDecode(aSize, aFlags) ? aSize : mSize; - DrawableFrameRef ref = LookupFrameInternal(aFrameNum, requestedSize, aFlags); + LookupResult result = LookupFrameInternal(aFrameNum, requestedSize, aFlags); - if (!ref && !mHasSize) { + if (!result && !mHasSize) { // We can't request a decode without knowing our intrinsic size. Give up. return DrawableFrameRef(); } - if (!ref || ref->GetImageSize() != requestedSize) { + if (!result || !result.IsExactMatch()) { // The OS threw this frame away. We need to redecode if we can. MOZ_ASSERT(!mAnim, "Animated frames should be locked"); @@ -537,29 +538,30 @@ RasterImage::LookupFrame(uint32_t aFrameNum, // If we can sync decode, we should already have the frame. if (aFlags & FLAG_SYNC_DECODE) { - ref = LookupFrameInternal(aFrameNum, requestedSize, aFlags); + result = LookupFrameInternal(aFrameNum, requestedSize, aFlags); } } - if (!ref) { + if (!result) { // We still weren't able to get a frame. Give up. return DrawableFrameRef(); } - if (ref->GetCompositingFailed()) { + if (result.DrawableRef()->GetCompositingFailed()) { return DrawableFrameRef(); } - MOZ_ASSERT(!ref || !ref->GetIsPaletted(), "Should not have paletted frame"); + MOZ_ASSERT(!result.DrawableRef()->GetIsPaletted(), + "Should not have a paletted frame"); // Sync decoding guarantees that we got the frame, but if it's owned by an // async decoder that's currently running, the contents of the frame may not // be available yet. Make sure we get everything. - if (ref && mHasSourceData && (aFlags & FLAG_SYNC_DECODE)) { - ref->WaitUntilComplete(); + if (mHasSourceData && (aFlags & FLAG_SYNC_DECODE)) { + result.DrawableRef()->WaitUntilComplete(); } - return ref; + return Move(result.DrawableRef()); } uint32_t @@ -1835,19 +1837,19 @@ RasterImage::DrawWithPreDownscaleIfNeeded(DrawableFrameRef&& aFrameRef, DrawableFrameRef frameRef; if (CanScale(aFilter, aSize, aFlags)) { - frameRef = + LookupResult result = SurfaceCache::Lookup(ImageKey(this), RasterSurfaceKey(aSize, DecodeFlags(aFlags), 0)); - if (!frameRef) { + if (!result) { // We either didn't have a matching scaled frame or the OS threw it away. // Request a new one so we'll be ready next time. For now, we'll fall back // to aFrameRef below. RequestScale(aFrameRef.get(), aFlags, aSize); } - if (frameRef && !frameRef->IsImageComplete()) { - frameRef.reset(); // We're still scaling, so we can't use this yet. + if (result && result.DrawableRef()->IsImageComplete()) { + frameRef = Move(result.DrawableRef()); // The scaled version is ready. } } @@ -2247,21 +2249,21 @@ RasterImage::OptimalImageSizeForDest(const gfxSize& aDest, uint32_t aWhichFrame, CanDownscaleDuringDecode(destSize, aFlags)) { return destSize; } else if (CanScale(aFilter, destSize, aFlags)) { - DrawableFrameRef frameRef = + LookupResult result = SurfaceCache::Lookup(ImageKey(this), RasterSurfaceKey(destSize, DecodeFlags(aFlags), 0)); - if (frameRef && frameRef->IsImageComplete()) { - return destSize; // We have an existing HQ scale for this size. + if (result && result.DrawableRef()->IsImageComplete()) { + return destSize; // We have an existing HQ scale for this size. } - if (!frameRef) { + if (!result) { // We could HQ scale to this size, but we haven't. Request a scale now. - frameRef = LookupFrame(GetRequestedFrameIndex(aWhichFrame), - mSize, aFlags); - if (frameRef) { - RequestScale(frameRef.get(), aFlags, destSize); + DrawableFrameRef ref = LookupFrame(GetRequestedFrameIndex(aWhichFrame), + mSize, aFlags); + if (ref) { + RequestScale(ref.get(), aFlags, destSize); } } } diff --git a/image/RasterImage.h b/image/RasterImage.h index b579f05df22..cc060345be7 100644 --- a/image/RasterImage.h +++ b/image/RasterImage.h @@ -23,6 +23,7 @@ #include "nsIProperties.h" #include "nsTArray.h" #include "imgFrame.h" +#include "LookupResult.h" #include "nsThreadUtils.h" #include "DecodePool.h" #include "Orientation.h" @@ -301,9 +302,9 @@ private: Pair> GetFrameInternal(uint32_t aWhichFrame, uint32_t aFlags); - DrawableFrameRef LookupFrameInternal(uint32_t aFrameNum, - const gfx::IntSize& aSize, - uint32_t aFlags); + LookupResult LookupFrameInternal(uint32_t aFrameNum, + const gfx::IntSize& aSize, + uint32_t aFlags); DrawableFrameRef LookupFrame(uint32_t aFrameNum, const nsIntSize& aSize, uint32_t aFlags); diff --git a/image/SurfaceCache.cpp b/image/SurfaceCache.cpp index a919ad1c20a..5d0b3850343 100644 --- a/image/SurfaceCache.cpp +++ b/image/SurfaceCache.cpp @@ -25,6 +25,7 @@ #include "gfxPrefs.h" #include "imgFrame.h" #include "Image.h" +#include "LookupResult.h" #include "nsAutoPtr.h" #include "nsExpirationTracker.h" #include "nsHashKeys.h" @@ -543,17 +544,17 @@ public: "More available cost than we started with"); } - DrawableFrameRef Lookup(const ImageKey aImageKey, - const SurfaceKey& aSurfaceKey) + LookupResult Lookup(const ImageKey aImageKey, + const SurfaceKey& aSurfaceKey) { nsRefPtr cache = GetImageCache(aImageKey); if (!cache) { - return DrawableFrameRef(); // No cached surfaces for this image. + return LookupResult(); // No cached surfaces for this image. } nsRefPtr surface = cache->Lookup(aSurfaceKey); if (!surface) { - return DrawableFrameRef(); // Lookup in the per-image cache missed. + return LookupResult(); // Lookup in the per-image cache missed. } DrawableFrameRef ref = surface->DrawableRef(); @@ -561,7 +562,7 @@ public: // The surface was released by the operating system. Remove the cache // entry as well. Remove(surface); - return DrawableFrameRef(); + return LookupResult(); } if (cache->IsLocked()) { @@ -570,16 +571,16 @@ public: mExpirationTracker.MarkUsed(surface); } - return ref; + return LookupResult(Move(ref), /* aIsExactMatch = */ true); } - DrawableFrameRef LookupBestMatch(const ImageKey aImageKey, - const SurfaceKey& aSurfaceKey, - const Maybe& aAlternateFlags) + LookupResult LookupBestMatch(const ImageKey aImageKey, + const SurfaceKey& aSurfaceKey, + const Maybe& aAlternateFlags) { nsRefPtr cache = GetImageCache(aImageKey); if (!cache) { - return DrawableFrameRef(); // No cached surfaces for this image. + return LookupResult(); // No cached surfaces for this image. } // Repeatedly look up the best match, trying again if the resulting surface @@ -593,7 +594,7 @@ public: while (true) { surface = cache->LookupBestMatch(aSurfaceKey, aAlternateFlags); if (!surface) { - return DrawableFrameRef(); // Lookup in the per-image cache missed. + return LookupResult(); // Lookup in the per-image cache missed. } ref = surface->DrawableRef(); @@ -612,7 +613,15 @@ public: mExpirationTracker.MarkUsed(surface); } - return ref; + SurfaceKey key = surface->GetSurfaceKey(); + const bool isExactMatch = key.Size() == aSurfaceKey.Size(); + + MOZ_ASSERT(isExactMatch == + (key == aSurfaceKey || + (aAlternateFlags && key == aSurfaceKey.WithNewFlags(*aAlternateFlags))), + "Result differs in a way other than size or alternate flags"); + + return LookupResult(Move(ref), isExactMatch); } void RemoveSurface(const ImageKey aImageKey, @@ -970,34 +979,34 @@ SurfaceCache::Shutdown() sInstance = nullptr; } -/* static */ DrawableFrameRef +/* static */ LookupResult SurfaceCache::Lookup(const ImageKey aImageKey, const SurfaceKey& aSurfaceKey, const Maybe& aAlternateFlags /* = Nothing() */) { if (!sInstance) { - return DrawableFrameRef(); + return LookupResult(); } MutexAutoLock lock(sInstance->GetMutex()); - DrawableFrameRef ref = sInstance->Lookup(aImageKey, aSurfaceKey); - if (!ref && aAlternateFlags) { - ref = sInstance->Lookup(aImageKey, - aSurfaceKey.WithNewFlags(*aAlternateFlags)); + LookupResult result = sInstance->Lookup(aImageKey, aSurfaceKey); + if (!result && aAlternateFlags) { + result = sInstance->Lookup(aImageKey, + aSurfaceKey.WithNewFlags(*aAlternateFlags)); } - return ref; + return result; } -/* static */ DrawableFrameRef +/* static */ LookupResult SurfaceCache::LookupBestMatch(const ImageKey aImageKey, const SurfaceKey& aSurfaceKey, const Maybe& aAlternateFlags /* = Nothing() */) { if (!sInstance) { - return DrawableFrameRef(); + return LookupResult(); } MutexAutoLock lock(sInstance->GetMutex()); diff --git a/image/SurfaceCache.h b/image/SurfaceCache.h index bd600d17e71..cedf38d0837 100644 --- a/image/SurfaceCache.h +++ b/image/SurfaceCache.h @@ -24,9 +24,9 @@ namespace mozilla { namespace image { -class DrawableFrameRef; class Image; class imgFrame; +class LookupResult; struct SurfaceMemoryCounter; /* @@ -174,7 +174,7 @@ struct SurfaceCache * * If the imgFrame was found in the cache, but had stored its surface in a * volatile buffer which was discarded by the OS, then it is automatically - * removed from the cache and an empty DrawableFrameRef is returned. Note that + * removed from the cache and an empty LookupResult is returned. Note that * this will never happen to persistent surfaces associated with a locked * image; the cache keeps a strong reference to such surfaces internally. * @@ -190,14 +190,13 @@ struct SurfaceCache * than calling Lookup() twice, which requires taking a * lock each time. * - * @return a DrawableFrameRef to the imgFrame wrapping the - * requested surface, or an empty DrawableFrameRef if - * not found. + * @return a LookupResult, which will either contain a + * DrawableFrameRef to the requested surface, or an + * empty DrawableFrameRef if the surface was not found. */ - static DrawableFrameRef Lookup(const ImageKey aImageKey, - const SurfaceKey& aSurfaceKey, - const Maybe& aAlternateFlags - = Nothing()); + static LookupResult Lookup(const ImageKey aImageKey, + const SurfaceKey& aSurfaceKey, + const Maybe& aAlternateFlags = Nothing()); /** * Looks up the best matching surface in the cache and returns a drawable @@ -216,13 +215,17 @@ struct SurfaceCache * acceptable to the caller. This is much more * efficient than calling LookupBestMatch() twice. * - * @return a DrawableFrameRef to the imgFrame wrapping a surface similar to - * the requested surface, or an empty DrawableFrameRef if not found. + * @return a LookupResult, which will either contain a + * DrawableFrameRef to a surface similar to the + * requested surface, or an empty DrawableFrameRef if + * the surface was not found. Callers can use + * LookupResult::IsExactMatch() to check whether the + * returned surface exactly matches @aSurfaceKey. */ - static DrawableFrameRef LookupBestMatch(const ImageKey aImageKey, - const SurfaceKey& aSurfaceKey, - const Maybe& aAlternateFlags - = Nothing()); + static LookupResult LookupBestMatch(const ImageKey aImageKey, + const SurfaceKey& aSurfaceKey, + const Maybe& aAlternateFlags + = Nothing()); /** * Insert a surface into the cache. If a surface with the same ImageKey and diff --git a/image/VectorImage.cpp b/image/VectorImage.cpp index 469ea1b9fcc..5c60a7ce5fd 100644 --- a/image/VectorImage.cpp +++ b/image/VectorImage.cpp @@ -27,6 +27,7 @@ #include "nsSVGEffects.h" // for nsSVGRenderingObserver #include "nsWindowMemoryReporter.h" #include "ImageRegion.h" +#include "LookupResult.h" #include "Orientation.h" #include "SVGDocumentWrapper.h" #include "nsIDOMEventListener.h" @@ -829,18 +830,18 @@ VectorImage::Draw(gfxContext* aContext, return DrawResult::SUCCESS; } - DrawableFrameRef frameRef = + LookupResult result = SurfaceCache::Lookup(ImageKey(this), VectorSurfaceKey(params.size, params.svgContext, params.animationTime)); // Draw. - if (frameRef) { - RefPtr surface = frameRef->GetSurface(); + if (result) { + RefPtr surface = result.DrawableRef()->GetSurface(); if (surface) { nsRefPtr svgDrawable = - new gfxSurfaceDrawable(surface, frameRef->GetSize()); + new gfxSurfaceDrawable(surface, result.DrawableRef()->GetSize()); Show(svgDrawable, params); return DrawResult::SUCCESS; } diff --git a/image/imgFrame.h b/image/imgFrame.h index 43b212edb10..460ff837b81 100644 --- a/image/imgFrame.h +++ b/image/imgFrame.h @@ -444,6 +444,8 @@ public: } private: + DrawableFrameRef(const DrawableFrameRef& aOther) = delete; + nsRefPtr mFrame; VolatileBufferPtr mRef; }; @@ -526,6 +528,8 @@ public: } private: + RawAccessFrameRef(const RawAccessFrameRef& aOther) = delete; + nsRefPtr mFrame; }; From 8012bf37885f5b015426213737df2873fed889e5 Mon Sep 17 00:00:00 2001 From: Ben Turner Date: Tue, 30 Jun 2015 19:45:58 -0700 Subject: [PATCH 24/61] Backout 316c0601d7db (bug 1167431) for static analysis bustage on CLOSED TREE --- ipc/glue/BackgroundImpl.cpp | 100 ++++++++---------------------------- 1 file changed, 21 insertions(+), 79 deletions(-) diff --git a/ipc/glue/BackgroundImpl.cpp b/ipc/glue/BackgroundImpl.cpp index 7c96f485438..10db4325826 100644 --- a/ipc/glue/BackgroundImpl.cpp +++ b/ipc/glue/BackgroundImpl.cpp @@ -292,13 +292,7 @@ private: mLiveActorArray->AppendElement(this); } - already_AddRefed - GetContentParent() const; - // These methods are only called by IPDL. - virtual void - ProcessingError(Result aCode, const char* aReason) override; - virtual IToplevelProtocol* CloneToplevel(const InfallibleTArray& aFds, ProcessHandle aPeerProcess, @@ -1020,7 +1014,27 @@ ParentImpl::GetContentParent(PBackgroundParent* aBackgroundActor) AssertIsOnBackgroundThread(); MOZ_ASSERT(aBackgroundActor); - return static_cast(aBackgroundActor)->GetContentParent(); + auto actor = static_cast(aBackgroundActor); + if (actor->mActorDestroyed) { + MOZ_ASSERT(false, "GetContentParent called after ActorDestroy was called!"); + return nullptr; + } + + if (actor->mContent) { + // We need to hand out a reference to our ContentParent but we also need to + // keep the one we have. We can't call AddRef here because ContentParent is + // not threadsafe so instead we dispatch a runnable to the main thread to do + // it for us. This is safe since we are guaranteed that our AddRef runnable + // will run before the reference we hand out can be released, and the + // ContentParent can't die as long as the existing reference is maintained. + nsCOMPtr runnable = + NS_NewNonOwningRunnableMethod(actor->mContent, &ContentParent::AddRef); + MOZ_ASSERT(runnable); + + MOZ_ALWAYS_TRUE(NS_SUCCEEDED(NS_DispatchToMainThread(runnable))); + } + + return already_AddRefed(actor->mContent.get()); } // static @@ -1314,78 +1328,6 @@ ParentImpl::MainThreadActorDestroy() Release(); } -already_AddRefed -ParentImpl::GetContentParent() const -{ - if (mActorDestroyed) { - MOZ_ASSERT(false, "GetContentParent called after ActorDestroy was called!"); - return nullptr; - } - - if (mContent) { - // We need to hand out a reference to our ContentParent but we also need to - // keep the one we have. We can't call AddRef here because ContentParent is - // not threadsafe so instead we dispatch a runnable to the main thread to do - // it for us. This is safe since we are guaranteed that our AddRef runnable - // will run before the reference we hand out can be released, and the - // ContentParent can't die as long as the existing reference is maintained. - nsCOMPtr runnable = - NS_NewNonOwningRunnableMethod(mContent, &ContentParent::AddRef); - MOZ_ASSERT(runnable); - - MOZ_ALWAYS_TRUE(NS_SUCCEEDED(NS_DispatchToMainThread(runnable))); - } - - return already_AddRefed(mContent.get()); -} - -void -ParentImpl::ProcessingError(Result aCode, const char* aReason) -{ - AssertIsInMainProcess(); - AssertIsOnBackgroundThread(); - MOZ_ASSERT(!mActorDestroyed); - - BackgroundParentImpl::ProcessingError(aCode, aReason); - - if (!mIsOtherProcessActor) { - // Warning is about all we can really do here, short of intentionally - // crashing the parent process. - return; - } - - if (aCode == MsgDropped) { - // Ignore this; it just means that the child process can't receive any - // more messages. - return; - } - - nsRefPtr content = GetContentParent(); - if (NS_WARN_IF(!content)) { - return; - } - - // Transfer ownership to the lambda. - ContentParent* owningContent = content.forget().take(); - nsCString owningReason(aReason); - - nsCOMPtr runnable = NS_NewRunnableFunction( - [owningContent, owningReason]() - { - MOZ_ASSERT(NS_IsMainThread()); - - // Transfer ownership back to the stack. - nsRefPtr content = dont_AddRef(owningContent); - MOZ_ASSERT(content); - - content->KillHard(owningReason.get()); - } - ); - MOZ_ASSERT(runnable); - - MOZ_ALWAYS_TRUE(NS_SUCCEEDED(NS_DispatchToMainThread(runnable))); -} - IToplevelProtocol* ParentImpl::CloneToplevel(const InfallibleTArray& aFds, ProcessHandle aPeerProcess, From 0f319b82883581474c03d69f6de6ecb5dd4b0ad8 Mon Sep 17 00:00:00 2001 From: Lars T Hansen Date: Wed, 1 Jul 2015 05:07:06 +0200 Subject: [PATCH 25/61] Bug 1141994 - implement Atomics.isLockFree. r=h4writer r=luke --- js/src/asmjs/AsmJSLink.cpp | 1 + js/src/asmjs/AsmJSModule.h | 3 +- js/src/asmjs/AsmJSValidate.cpp | 24 ++++++- js/src/builtin/AtomicsObject.cpp | 14 ++++ js/src/builtin/AtomicsObject.h | 1 + js/src/jit-test/tests/asm.js/testAtomics.js | 72 ++++++++++++++++++++ js/src/jit-test/tests/atomics/basic-tests.js | 37 ++++++++++ js/src/jit/AtomicOperations-inl.h | 17 +++++ js/src/jit/AtomicOperations.h | 13 +++- js/src/jit/CodeGenerator.cpp | 25 +++++++ js/src/jit/CodeGenerator.h | 1 + js/src/jit/IonBuilder.h | 1 + js/src/jit/LIR-Common.h | 13 ++++ js/src/jit/LOpcodes.h | 1 + js/src/jit/Lowering.cpp | 6 ++ js/src/jit/Lowering.h | 1 + js/src/jit/MCallOptimize.cpp | 20 ++++++ js/src/jit/MIR.cpp | 17 +++++ js/src/jit/MIR.h | 36 ++++++++++ js/src/jit/MOpcodes.h | 1 + js/src/jit/Recover.cpp | 26 +++++++ js/src/jit/Recover.h | 15 +++- js/src/vm/Interpreter.cpp | 12 ++++ js/src/vm/Interpreter.h | 3 + 24 files changed, 355 insertions(+), 5 deletions(-) diff --git a/js/src/asmjs/AsmJSLink.cpp b/js/src/asmjs/AsmJSLink.cpp index bd991f9a28f..04dcbc49493 100644 --- a/js/src/asmjs/AsmJSLink.cpp +++ b/js/src/asmjs/AsmJSLink.cpp @@ -425,6 +425,7 @@ ValidateAtomicsBuiltinFunction(JSContext* cx, AsmJSModule::Global& global, Handl case AsmJSAtomicsBuiltin_and: native = atomics_and; break; case AsmJSAtomicsBuiltin_or: native = atomics_or; break; case AsmJSAtomicsBuiltin_xor: native = atomics_xor; break; + case AsmJSAtomicsBuiltin_isLockFree: native = atomics_isLockFree; break; } if (!IsNativeFunction(v, native)) diff --git a/js/src/asmjs/AsmJSModule.h b/js/src/asmjs/AsmJSModule.h index ed573143296..e19a071c080 100644 --- a/js/src/asmjs/AsmJSModule.h +++ b/js/src/asmjs/AsmJSModule.h @@ -77,7 +77,8 @@ enum AsmJSAtomicsBuiltinFunction AsmJSAtomicsBuiltin_sub, AsmJSAtomicsBuiltin_and, AsmJSAtomicsBuiltin_or, - AsmJSAtomicsBuiltin_xor + AsmJSAtomicsBuiltin_xor, + AsmJSAtomicsBuiltin_isLockFree }; // Set of known global object SIMD's attributes, i.e. types diff --git a/js/src/asmjs/AsmJSValidate.cpp b/js/src/asmjs/AsmJSValidate.cpp index bb5f5c5e483..78fe82df9ee 100644 --- a/js/src/asmjs/AsmJSValidate.cpp +++ b/js/src/asmjs/AsmJSValidate.cpp @@ -35,6 +35,7 @@ #include "asmjs/AsmJSSignalHandlers.h" #include "builtin/SIMD.h" #include "frontend/Parser.h" +#include "jit/AtomicOperations.h" #include "jit/CodeGenerator.h" #include "jit/CompileWrappers.h" #include "jit/MIR.h" @@ -49,6 +50,7 @@ #include "frontend/ParseNode-inl.h" #include "frontend/Parser-inl.h" +#include "jit/AtomicOperations-inl.h" #include "jit/MacroAssembler-inl.h" using namespace js; @@ -1540,7 +1542,8 @@ class MOZ_STACK_CLASS ModuleCompiler !addStandardLibraryAtomicsName("sub", AsmJSAtomicsBuiltin_sub) || !addStandardLibraryAtomicsName("and", AsmJSAtomicsBuiltin_and) || !addStandardLibraryAtomicsName("or", AsmJSAtomicsBuiltin_or) || - !addStandardLibraryAtomicsName("xor", AsmJSAtomicsBuiltin_xor)) + !addStandardLibraryAtomicsName("xor", AsmJSAtomicsBuiltin_xor) || + !addStandardLibraryAtomicsName("isLockFree", AsmJSAtomicsBuiltin_isLockFree)) { return false; } @@ -5037,6 +5040,23 @@ CheckAtomicsBinop(FunctionCompiler& f, ParseNode* call, MDefinition** def, Type* return true; } +static bool +CheckAtomicsIsLockFree(FunctionCompiler& f, ParseNode* call, MDefinition** def, Type* type) +{ + if (CallArgListLength(call) != 1) + return f.fail(call, "Atomics.isLockFree must be passed 1 argument"); + + ParseNode* sizeArg = CallArgList(call); + + uint32_t size; + if (!IsLiteralInt(f.m(), sizeArg, &size)) + return f.fail(sizeArg, "Atomics.isLockFree requires an integer literal argument"); + + *def = f.constant(Int32Value(AtomicOperations::isLockfree(size)), Type::Int); + *type = Type::Int; + return true; +} + static bool CheckAtomicsCompareExchange(FunctionCompiler& f, ParseNode* call, MDefinition** def, Type* type) { @@ -5102,6 +5122,8 @@ CheckAtomicsBuiltinCall(FunctionCompiler& f, ParseNode* callNode, AsmJSAtomicsBu return CheckAtomicsBinop(f, callNode, resultDef, resultType, AtomicFetchOrOp); case AsmJSAtomicsBuiltin_xor: return CheckAtomicsBinop(f, callNode, resultDef, resultType, AtomicFetchXorOp); + case AsmJSAtomicsBuiltin_isLockFree: + return CheckAtomicsIsLockFree(f, callNode, resultDef, resultType); default: MOZ_CRASH("unexpected atomicsBuiltin function"); } diff --git a/js/src/builtin/AtomicsObject.cpp b/js/src/builtin/AtomicsObject.cpp index 07965225ab3..17de24521b5 100644 --- a/js/src/builtin/AtomicsObject.cpp +++ b/js/src/builtin/AtomicsObject.cpp @@ -507,6 +507,19 @@ js::atomics_xor(JSContext* cx, unsigned argc, Value* vp) return atomics_binop_impl(cx, args.get(0), args.get(1), args.get(2), args.rval()); } +bool +js::atomics_isLockFree(JSContext* cx, unsigned argc, Value* vp) +{ + CallArgs args = CallArgsFromVp(argc, vp); + HandleValue v = args.get(0); + if (!v.isInt32()) { + args.rval().setBoolean(false); + return true; + } + args.rval().setBoolean(jit::AtomicOperations::isLockfree(v.toInt32())); + return true; +} + // asm.js callouts for platforms that do not have non-word-sized // atomics where we don't want to inline the logic for the atomics. // @@ -1195,6 +1208,7 @@ const JSFunctionSpec AtomicsMethods[] = { JS_FN("and", atomics_and, 3,0), JS_FN("or", atomics_or, 3,0), JS_FN("xor", atomics_xor, 3,0), + JS_FN("isLockFree", atomics_isLockFree, 1,0), JS_FN("futexWait", atomics_futexWait, 4,0), JS_FN("futexWake", atomics_futexWake, 3,0), JS_FN("futexWakeOrRequeue", atomics_futexWakeOrRequeue, 5,0), diff --git a/js/src/builtin/AtomicsObject.h b/js/src/builtin/AtomicsObject.h index ef8ae926c5b..21e78c1d043 100644 --- a/js/src/builtin/AtomicsObject.h +++ b/js/src/builtin/AtomicsObject.h @@ -40,6 +40,7 @@ bool atomics_sub(JSContext* cx, unsigned argc, Value* vp); bool atomics_and(JSContext* cx, unsigned argc, Value* vp); bool atomics_or(JSContext* cx, unsigned argc, Value* vp); bool atomics_xor(JSContext* cx, unsigned argc, Value* vp); +bool atomics_isLockFree(JSContext* cx, unsigned argc, Value* vp); bool atomics_futexWait(JSContext* cx, unsigned argc, Value* vp); bool atomics_futexWake(JSContext* cx, unsigned argc, Value* vp); bool atomics_futexWakeOrRequeue(JSContext* cx, unsigned argc, Value* vp); diff --git a/js/src/jit-test/tests/asm.js/testAtomics.js b/js/src/jit-test/tests/asm.js/testAtomics.js index 4b222d14cf1..db383c6331e 100644 --- a/js/src/jit-test/tests/asm.js/testAtomics.js +++ b/js/src/jit-test/tests/asm.js/testAtomics.js @@ -498,4 +498,76 @@ assertEq(i8m.and_i(80001), 0); assertEq(i8m.add_i(80001), 0); assertEq(i8m.sub_i(80001), 0); +function loadModule_misc(stdlib, foreign, heap) { + "use asm"; + + var atomic_isLockFree = stdlib.Atomics.isLockFree; + + function ilf1() { + return atomic_isLockFree(1)|0; + } + + function ilf2() { + return atomic_isLockFree(2)|0; + } + + function ilf3() { + return atomic_isLockFree(3)|0; + } + + function ilf4() { + return atomic_isLockFree(4)|0; + } + + function ilf5() { + return atomic_isLockFree(5)|0; + } + + function ilf6() { + return atomic_isLockFree(6)|0; + } + + function ilf7() { + return atomic_isLockFree(7)|0; + } + + function ilf8() { + return atomic_isLockFree(8)|0; + } + + function ilf9() { + return atomic_isLockFree(9)|0; + } + + return { ilf1: ilf1, + ilf2: ilf2, + ilf3: ilf3, + ilf4: ilf4, + ilf5: ilf5, + ilf6: ilf6, + ilf7: ilf7, + ilf8: ilf8, + ilf9: ilf9 }; +} + +if (isAsmJSCompilationAvailable()) + assertEq(isAsmJSModule(loadModule_misc), true); + +function test_misc(heap) { + var misc = loadModule_misc(this, {}, heap); + + assertEq(misc.ilf1(), 1); + assertEq(misc.ilf2(), 1); + assertEq(misc.ilf3(), 0); + assertEq(misc.ilf4(), 1); + assertEq(misc.ilf5(), 0); + assertEq(misc.ilf6(), 0); + assertEq(misc.ilf7(), 0); + var v = misc.ilf8(); + assertEq(v === 0 || v === 1, true); + assertEq(misc.ilf9(), 0); +} + +test_misc(heap); + print("Done"); diff --git a/js/src/jit-test/tests/atomics/basic-tests.js b/js/src/jit-test/tests/atomics/basic-tests.js index edb5d9b8a32..7b807110814 100644 --- a/js/src/jit-test/tests/atomics/basic-tests.js +++ b/js/src/jit-test/tests/atomics/basic-tests.js @@ -333,6 +333,40 @@ function testUint32(a) { assertEq(sum, k); } +var sizes = [ 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12]; +var answers = [ true, true, false, true, false, false, false, {}, + false, false, false, false]; + +function testIsLockFree() { + var saved8 = "Invalid"; + + // This ought to defeat most compile-time resolution. + for ( var i=0 ; i < sizes.length ; i++ ) { + var v = Atomics.isLockFree(sizes[i]); + var a = answers[i]; + assertEq(typeof v, 'boolean'); + if (typeof a == 'boolean') + assertEq(v, a); + else + saved8 = v; + } + + // This ought to be optimizable. + assertEq(Atomics.isLockFree(1), true); + assertEq(Atomics.isLockFree(2), true); + assertEq(Atomics.isLockFree(3), false); + assertEq(Atomics.isLockFree(4), true); + assertEq(Atomics.isLockFree(5), false); + assertEq(Atomics.isLockFree(6), false); + assertEq(Atomics.isLockFree(7), false); + assertEq(Atomics.isLockFree(8), saved8); + assertEq(Atomics.isLockFree(9), false); + assertEq(Atomics.isLockFree(10), false); + assertEq(Atomics.isLockFree(11), false); + assertEq(Atomics.isLockFree(12), false); +} + function isLittleEndian() { var xxx = new ArrayBuffer(2); var xxa = new Int16Array(xxx); @@ -417,6 +451,9 @@ function runTests() { testUint8Extremes(new SharedUint8Array(sab)); testInt16Extremes(new SharedInt16Array(sab)); testUint32(new SharedUint32Array(sab)); + + // Misc + testIsLockFree(); } if (this.Atomics && this.SharedArrayBuffer && this.SharedInt32Array) diff --git a/js/src/jit/AtomicOperations-inl.h b/js/src/jit/AtomicOperations-inl.h index 5683ee3e9c7..7ae0ce06bf6 100644 --- a/js/src/jit/AtomicOperations-inl.h +++ b/js/src/jit/AtomicOperations-inl.h @@ -21,4 +21,21 @@ # error "Atomic operations must be defined for this platform" #endif +inline bool +js::jit::AtomicOperations::isLockfree(int32_t size) +{ + // Keep this in sync with visitAtomicIsLockFree() in jit/CodeGenerator.cpp. + + switch (size) { + case 1: + case 2: + case 4: + return true; + case 8: + return AtomicOperations::isLockfree8(); + default: + return false; + } +} + #endif // jit_AtomicOperations_inl_h diff --git a/js/src/jit/AtomicOperations.h b/js/src/jit/AtomicOperations.h index 367aa615e95..de34447cee1 100644 --- a/js/src/jit/AtomicOperations.h +++ b/js/src/jit/AtomicOperations.h @@ -35,8 +35,14 @@ class AtomicOperations { public: - // Execute a full memory barrier (LoadLoad+LoadStore+StoreLoad+StoreStore). - static inline void fenceSeqCst(); + // Test lock-freedom for any integer value. + // + // This implements a platform-independent pattern, as follows: + // + // 1, 2, and 4 bytes are always lock free, lock-freedom for 8 + // bytes is determined by the platform's isLockfree8(), and there + // is no lock-freedom for any other values on any platform. + static inline bool isLockfree(int32_t n); // If the return value is true then a call to the 64-bit (8-byte) // routines below will work, otherwise those functions will assert in @@ -45,6 +51,9 @@ class AtomicOperations // during a run. static inline bool isLockfree8(); + // Execute a full memory barrier (LoadLoad+LoadStore+StoreLoad+StoreStore). + static inline void fenceSeqCst(); + // The following functions are defined for T = int8_t, uint8_t, // int16_t, uint16_t, int32_t, uint32_t, int64_t, and uint64_t diff --git a/js/src/jit/CodeGenerator.cpp b/js/src/jit/CodeGenerator.cpp index 34027f91442..7d5f87ad01d 100644 --- a/js/src/jit/CodeGenerator.cpp +++ b/js/src/jit/CodeGenerator.cpp @@ -22,6 +22,7 @@ #include "builtin/TypedObject.h" #include "gc/Nursery.h" #include "irregexp/NativeRegExpMacroAssembler.h" +#include "jit/AtomicOperations.h" #include "jit/BaselineCompiler.h" #include "jit/IonBuilder.h" #include "jit/IonCaches.h" @@ -39,6 +40,7 @@ #include "jsboolinlines.h" +#include "jit/AtomicOperations-inl.h" #include "jit/MacroAssembler-inl.h" #include "jit/shared/CodeGenerator-shared-inl.h" #include "vm/Interpreter-inl.h" @@ -9231,6 +9233,29 @@ CodeGenerator::visitStoreTypedArrayElementHole(LStoreTypedArrayElementHole* lir) masm.bind(&skip); } +void +CodeGenerator::visitAtomicIsLockFree(LAtomicIsLockFree* lir) +{ + Register value = ToRegister(lir->value()); + Register output = ToRegister(lir->output()); + + // Keep this in sync with isLockfree() in jit/AtomicOperations-inl.h. + + Label Ldone, Lfailed; + masm.move32(Imm32(1), output); + if (AtomicOperations::isLockfree8()) + masm.branch32(Assembler::Equal, value, Imm32(8), &Ldone); + else + masm.branch32(Assembler::Equal, value, Imm32(8), &Lfailed); + masm.branch32(Assembler::Equal, value, Imm32(4), &Ldone); + masm.branch32(Assembler::Equal, value, Imm32(2), &Ldone); + masm.branch32(Assembler::Equal, value, Imm32(1), &Ldone); + if (!AtomicOperations::isLockfree8()) + masm.bind(&Lfailed); + masm.move32(Imm32(0), output); + masm.bind(&Ldone); +} + void CodeGenerator::visitCompareExchangeTypedArrayElement(LCompareExchangeTypedArrayElement* lir) { diff --git a/js/src/jit/CodeGenerator.h b/js/src/jit/CodeGenerator.h index a09358b1ff5..708288c5582 100644 --- a/js/src/jit/CodeGenerator.h +++ b/js/src/jit/CodeGenerator.h @@ -278,6 +278,7 @@ class CodeGenerator : public CodeGeneratorSpecific void visitLoadTypedArrayElementHole(LLoadTypedArrayElementHole* lir); void visitStoreUnboxedScalar(LStoreUnboxedScalar* lir); void visitStoreTypedArrayElementHole(LStoreTypedArrayElementHole* lir); + void visitAtomicIsLockFree(LAtomicIsLockFree* lir); void visitCompareExchangeTypedArrayElement(LCompareExchangeTypedArrayElement* lir); void visitAtomicTypedArrayElementBinop(LAtomicTypedArrayElementBinop* lir); void visitAtomicTypedArrayElementBinopForEffect(LAtomicTypedArrayElementBinopForEffect* lir); diff --git a/js/src/jit/IonBuilder.h b/js/src/jit/IonBuilder.h index a6c5ea6fdae..90ef34d3060 100644 --- a/js/src/jit/IonBuilder.h +++ b/js/src/jit/IonBuilder.h @@ -781,6 +781,7 @@ class IonBuilder InliningStatus inlineAtomicsStore(CallInfo& callInfo); InliningStatus inlineAtomicsFence(CallInfo& callInfo); InliningStatus inlineAtomicsBinop(CallInfo& callInfo, JSFunction* target); + InliningStatus inlineAtomicsIsLockFree(CallInfo& callInfo); // Slot intrinsics. InliningStatus inlineUnsafeSetReservedSlot(CallInfo& callInfo); diff --git a/js/src/jit/LIR-Common.h b/js/src/jit/LIR-Common.h index 9bdcf837cab..9c55ed26568 100644 --- a/js/src/jit/LIR-Common.h +++ b/js/src/jit/LIR-Common.h @@ -5108,6 +5108,19 @@ class LStoreTypedArrayElementStatic : public LInstructionHelper<0, 2, 0> } }; +class LAtomicIsLockFree : public LInstructionHelper<1, 1, 0> +{ + public: + LIR_HEADER(AtomicIsLockFree) + + explicit LAtomicIsLockFree(const LAllocation& value) { + setOperand(0, value); + } + const LAllocation* value() { + return getOperand(0); + } +}; + class LCompareExchangeTypedArrayElement : public LInstructionHelper<1, 4, 1> { public: diff --git a/js/src/jit/LOpcodes.h b/js/src/jit/LOpcodes.h index 692a405b2ca..24b80b3b9bf 100644 --- a/js/src/jit/LOpcodes.h +++ b/js/src/jit/LOpcodes.h @@ -246,6 +246,7 @@ _(LoadTypedArrayElementStatic) \ _(StoreTypedArrayElementHole) \ _(StoreTypedArrayElementStatic) \ + _(AtomicIsLockFree) \ _(CompareExchangeTypedArrayElement) \ _(AtomicTypedArrayElementBinop) \ _(AtomicTypedArrayElementBinopForEffect) \ diff --git a/js/src/jit/Lowering.cpp b/js/src/jit/Lowering.cpp index a78c11bc351..b1968bb40ca 100644 --- a/js/src/jit/Lowering.cpp +++ b/js/src/jit/Lowering.cpp @@ -4207,6 +4207,12 @@ LIRGenerator::visitDebugger(MDebugger* ins) add(lir, ins); } +void +LIRGenerator::visitAtomicIsLockFree(MAtomicIsLockFree* ins) +{ + define(new(alloc()) LAtomicIsLockFree(useRegister(ins->input())), ins); +} + static void SpewResumePoint(MBasicBlock* block, MInstruction* ins, MResumePoint* resumePoint) { diff --git a/js/src/jit/Lowering.h b/js/src/jit/Lowering.h index b4f2fefad49..be27a8a0bc1 100644 --- a/js/src/jit/Lowering.h +++ b/js/src/jit/Lowering.h @@ -300,6 +300,7 @@ class LIRGenerator : public LIRGeneratorSpecific void visitDebugger(MDebugger* ins); void visitNewTarget(MNewTarget* ins); void visitArrowNewTarget(MArrowNewTarget* ins); + void visitAtomicIsLockFree(MAtomicIsLockFree* ins); }; } // namespace jit diff --git a/js/src/jit/MCallOptimize.cpp b/js/src/jit/MCallOptimize.cpp index bf647952df6..81a7fb94831 100644 --- a/js/src/jit/MCallOptimize.cpp +++ b/js/src/jit/MCallOptimize.cpp @@ -71,6 +71,8 @@ IonBuilder::inlineNativeCall(CallInfo& callInfo, JSFunction* target) { return inlineAtomicsBinop(callInfo, target); } + if (native == atomics_isLockFree) + return inlineAtomicsIsLockFree(callInfo); // Array natives. if (native == ArrayConstructor) @@ -2955,6 +2957,24 @@ IonBuilder::inlineAtomicsBinop(CallInfo& callInfo, JSFunction* target) return InliningStatus_Inlined; } +IonBuilder::InliningStatus +IonBuilder::inlineAtomicsIsLockFree(CallInfo& callInfo) +{ + if (callInfo.argc() != 1 || callInfo.constructing()) { + trackOptimizationOutcome(TrackedOutcome::CantInlineNativeBadForm); + return InliningStatus_NotInlined; + } + + callInfo.setImplicitlyUsedUnchecked(); + + MAtomicIsLockFree* ilf = + MAtomicIsLockFree::New(alloc(), callInfo.getArg(0)); + current->add(ilf); + current->push(ilf); + + return InliningStatus_Inlined; +} + bool IonBuilder::atomicsMeetsPreconditions(CallInfo& callInfo, Scalar::Type* arrayType, AtomicCheckResult checkResult) diff --git a/js/src/jit/MIR.cpp b/js/src/jit/MIR.cpp index e5f0b0cf115..fb27e95f99a 100644 --- a/js/src/jit/MIR.cpp +++ b/js/src/jit/MIR.cpp @@ -15,6 +15,7 @@ #include "jslibmath.h" #include "jsstr.h" +#include "jit/AtomicOperations.h" #include "jit/BaselineInspector.h" #include "jit/IonBuilder.h" #include "jit/JitSpewer.h" @@ -25,6 +26,8 @@ #include "jsatominlines.h" #include "jsobjinlines.h" +#include "jit/AtomicOperations-inl.h" + using namespace js; using namespace js::jit; @@ -1211,6 +1214,20 @@ MMathFunction::foldsTo(TempAllocator& alloc) return MConstant::New(alloc, DoubleValue(out)); } +MDefinition* +MAtomicIsLockFree::foldsTo(TempAllocator& alloc) +{ + MDefinition* input = getOperand(0); + if (!input->isConstantValue()) + return this; + + Value val = input->constantValue(); + if (!val.isInt32()) + return this; + + return MConstant::New(alloc, BooleanValue(AtomicOperations::isLockfree(val.toInt32()))); +} + MParameter* MParameter::New(TempAllocator& alloc, int32_t index, TemporaryTypeSet* types) { diff --git a/js/src/jit/MIR.h b/js/src/jit/MIR.h index 6d8133624fa..a60e00997ec 100644 --- a/js/src/jit/MIR.h +++ b/js/src/jit/MIR.h @@ -12879,6 +12879,42 @@ class MMemoryBarrier } }; +class MAtomicIsLockFree + : public MUnaryInstruction, + public ConvertToInt32Policy<0>::Data +{ + explicit MAtomicIsLockFree(MDefinition* value) + : MUnaryInstruction(value) + { + setResultType(MIRType_Boolean); + setMovable(); + } + + public: + INSTRUCTION_HEADER(AtomicIsLockFree) + + static MAtomicIsLockFree* New(TempAllocator& alloc, MDefinition* value) { + return new(alloc) MAtomicIsLockFree(value); + } + + MDefinition* foldsTo(TempAllocator& alloc) override; + + AliasSet getAliasSet() const override { + return AliasSet::None(); + } + + bool congruentTo(const MDefinition* ins) const override { + return congruentIfOperandsEqual(ins); + } + + bool writeRecoverData(CompactBufferWriter& writer) const override; + bool canRecoverOnBailout() const override { + return true; + } + + ALLOW_CLONE(MAtomicIsLockFree) +}; + class MCompareExchangeTypedArrayElement : public MAryInstruction<4>, public Mix4Policy, IntPolicy<1>, IntPolicy<2>, IntPolicy<3>>::Data diff --git a/js/src/jit/MOpcodes.h b/js/src/jit/MOpcodes.h index 7eebfadc78f..af4d856afd8 100644 --- a/js/src/jit/MOpcodes.h +++ b/js/src/jit/MOpcodes.h @@ -206,6 +206,7 @@ namespace jit { _(LoadTypedArrayElementStatic) \ _(StoreTypedArrayElementHole) \ _(StoreTypedArrayElementStatic) \ + _(AtomicIsLockFree) \ _(CompareExchangeTypedArrayElement) \ _(AtomicTypedArrayElementBinop) \ _(EffectiveAddress) \ diff --git a/js/src/jit/Recover.cpp b/js/src/jit/Recover.cpp index eb048adf4e0..54a2e4bbfdc 100644 --- a/js/src/jit/Recover.cpp +++ b/js/src/jit/Recover.cpp @@ -1496,3 +1496,29 @@ bool RStringReplace::recover(JSContext* cx, SnapshotIterator& iter) const iter.storeInstructionResult(result); return true; } + +bool +MAtomicIsLockFree::writeRecoverData(CompactBufferWriter& writer) const +{ + MOZ_ASSERT(canRecoverOnBailout()); + writer.writeUnsigned(uint32_t(RInstruction::Recover_AtomicIsLockFree)); + return true; +} + +RAtomicIsLockFree::RAtomicIsLockFree(CompactBufferReader& reader) +{ } + +bool +RAtomicIsLockFree::recover(JSContext* cx, SnapshotIterator& iter) const +{ + RootedValue operand(cx, iter.read()); + MOZ_ASSERT(operand.isInt32()); + + int32_t result; + if (!js::AtomicIsLockFree(cx, operand, &result)) + return false; + + RootedValue rootedResult(cx, js::Int32Value(result)); + iter.storeInstructionResult(rootedResult); + return true; +} diff --git a/js/src/jit/Recover.h b/js/src/jit/Recover.h index 81473c07dba..5dedbc56c86 100644 --- a/js/src/jit/Recover.h +++ b/js/src/jit/Recover.h @@ -35,7 +35,7 @@ namespace jit { // For each MIR instruction where |canRecoverOnBailout| might return true, we // have a RInstruction of the same name. // -// Recover instructions are encoded by code generator into a compact buffer +// Recover instructions are encoded by the code generator into a compact buffer // (RecoverWriter). The MIR instruction method |writeRecoverData| should write a // tag in the |CompactBufferWriter| which is used by // |RInstruction::readRecoverData| to dispatch to the right Recover @@ -104,6 +104,7 @@ namespace jit { _(SimdBox) \ _(ObjectState) \ _(ArrayState) \ + _(AtomicIsLockFree) \ _(AssertRecoveredOnBailout) class RResumePoint; @@ -773,6 +774,18 @@ class RArrayState final : public RInstruction bool recover(JSContext* cx, SnapshotIterator& iter) const; }; +class RAtomicIsLockFree final : public RInstruction +{ + public: + RINSTRUCTION_HEADER_(AtomicIsLockFree) + + virtual uint32_t numOperands() const { + return 1; + } + + bool recover(JSContext* cx, SnapshotIterator& iter) const; +}; + class RAssertRecoveredOnBailout final : public RInstruction { public: diff --git a/js/src/vm/Interpreter.cpp b/js/src/vm/Interpreter.cpp index a20c9577613..06194a17a13 100644 --- a/js/src/vm/Interpreter.cpp +++ b/js/src/vm/Interpreter.cpp @@ -33,6 +33,7 @@ #include "jsstr.h" #include "builtin/Eval.h" +#include "jit/AtomicOperations.h" #include "jit/BaselineJIT.h" #include "jit/Ion.h" #include "jit/IonAnalysis.h" @@ -47,6 +48,7 @@ #include "jsfuninlines.h" #include "jsscriptinlines.h" +#include "jit/AtomicOperations-inl.h" #include "jit/JitFrames-inl.h" #include "vm/Debugger-inl.h" #include "vm/NativeObject-inl.h" @@ -4433,6 +4435,16 @@ js::UrshValues(JSContext* cx, MutableHandleValue lhs, MutableHandleValue rhs, Mu return UrshOperation(cx, lhs, rhs, res); } +bool +js::AtomicIsLockFree(JSContext* cx, HandleValue in, int* out) +{ + int i; + if (!ToInt32(cx, in, &i)) + return false; + *out = js::jit::AtomicOperations::isLockfree(i); + return true; +} + bool js::DeleteNameOperation(JSContext* cx, HandlePropertyName name, HandleObject scopeObj, MutableHandleValue res) diff --git a/js/src/vm/Interpreter.h b/js/src/vm/Interpreter.h index 2ed6fc23ad5..2940843d664 100644 --- a/js/src/vm/Interpreter.h +++ b/js/src/vm/Interpreter.h @@ -394,6 +394,9 @@ ModValues(JSContext* cx, MutableHandleValue lhs, MutableHandleValue rhs, Mutable bool UrshValues(JSContext* cx, MutableHandleValue lhs, MutableHandleValue rhs, MutableHandleValue res); +bool +AtomicIsLockFree(JSContext* cx, HandleValue in, int* out); + template bool DeletePropertyJit(JSContext* ctx, HandleValue val, HandlePropertyName name, bool* bv); From 78e2eff30c7cc3d0a4cc9497ad95cc98c8208bc6 Mon Sep 17 00:00:00 2001 From: Lars T Hansen Date: Wed, 1 Jul 2015 05:07:09 +0200 Subject: [PATCH 26/61] Bug 1155176 - correct return types for atomics. r=luke --- js/src/asmjs/AsmJSValidate.cpp | 8 +- .../tests/asm.js/testAtomic-effect.js | 3 + js/src/jit-test/tests/asm.js/testAtomics.js | 78 ++++++++++--------- .../jit-test/tests/asm.js/testBug1155176.js | 49 ++++++++++++ .../jit-test/tests/asm.js/testBug1164391.js | 5 +- 5 files changed, 102 insertions(+), 41 deletions(-) create mode 100644 js/src/jit-test/tests/asm.js/testBug1155176.js diff --git a/js/src/asmjs/AsmJSValidate.cpp b/js/src/asmjs/AsmJSValidate.cpp index 78fe82df9ee..4458eb3611e 100644 --- a/js/src/asmjs/AsmJSValidate.cpp +++ b/js/src/asmjs/AsmJSValidate.cpp @@ -4970,7 +4970,7 @@ CheckAtomicsLoad(FunctionCompiler& f, ParseNode* call, MDefinition** def, Type* PrepareArrayIndex(f, &pointerDef, needsBoundsCheck, mask); *def = f.atomicLoadHeap(viewType, pointerDef, needsBoundsCheck); - *type = Type::Signed; + *type = Type::Intish; return true; } @@ -5004,7 +5004,7 @@ CheckAtomicsStore(FunctionCompiler& f, ParseNode* call, MDefinition** def, Type* f.atomicStoreHeap(viewType, pointerDef, rhsDef, needsBoundsCheck); *def = rhsDef; - *type = Type::Signed; + *type = rhsType; return true; } @@ -5036,7 +5036,7 @@ CheckAtomicsBinop(FunctionCompiler& f, ParseNode* call, MDefinition** def, Type* PrepareArrayIndex(f, &pointerDef, needsBoundsCheck, mask); *def = f.atomicBinopHeap(op, viewType, pointerDef, valueArgDef, needsBoundsCheck); - *type = Type::Signed; + *type = Type::Intish; return true; } @@ -5095,7 +5095,7 @@ CheckAtomicsCompareExchange(FunctionCompiler& f, ParseNode* call, MDefinition** *def = f.atomicCompareExchangeHeap(viewType, pointerDef, oldValueArgDef, newValueArgDef, needsBoundsCheck); - *type = Type::Signed; + *type = Type::Intish; return true; } diff --git a/js/src/jit-test/tests/asm.js/testAtomic-effect.js b/js/src/jit-test/tests/asm.js/testAtomic-effect.js index 2cabd206327..632e104fa84 100644 --- a/js/src/jit-test/tests/asm.js/testAtomic-effect.js +++ b/js/src/jit-test/tests/asm.js/testAtomic-effect.js @@ -30,6 +30,9 @@ function m(stdlib, ffi, heap) return {add_sharedEv:add_sharedEv}; } +if (isAsmJSCompilationAvailable()) + assertEq(isAsmJSModule(m), true); + var x; var sab = new SharedArrayBuffer(65536); diff --git a/js/src/jit-test/tests/asm.js/testAtomics.js b/js/src/jit-test/tests/asm.js/testAtomics.js index db383c6331e..0f988bb55ad 100644 --- a/js/src/jit-test/tests/asm.js/testAtomics.js +++ b/js/src/jit-test/tests/asm.js/testAtomics.js @@ -24,7 +24,7 @@ function loadModule_int32(stdlib, foreign, heap) { // Load element 0 function do_load() { var v = 0; - v = atomic_load(i32a, 0); + v = atomic_load(i32a, 0)|0; return v|0; } @@ -32,14 +32,14 @@ function loadModule_int32(stdlib, foreign, heap) { function do_load_i(i) { i = i|0; var v = 0; - v = atomic_load(i32a, i>>2); + v = atomic_load(i32a, i>>2)|0; return v|0; } // Store 37 in element 0 function do_store() { var v = 0; - v = atomic_store(i32a, 0, 37); + v = atomic_store(i32a, 0, 37)|0; return v|0; } @@ -47,14 +47,14 @@ function loadModule_int32(stdlib, foreign, heap) { function do_store_i(i) { i = i|0; var v = 0; - v = atomic_store(i32a, i>>2, 37); + v = atomic_store(i32a, i>>2, 37)|0; return v|0; } // Add 37 to element 10 function do_add() { var v = 0; - v = atomic_add(i32a, 10, 37); + v = atomic_add(i32a, 10, 37)|0; return v|0; } @@ -62,14 +62,14 @@ function loadModule_int32(stdlib, foreign, heap) { function do_add_i(i) { i = i|0; var v = 0; - v = atomic_add(i32a, i>>2, 37); + v = atomic_add(i32a, i>>2, 37)|0; return v|0; } // Subtract 148 from element 20 function do_sub() { var v = 0; - v = atomic_sub(i32a, 20, 148); + v = atomic_sub(i32a, 20, 148)|0; return v|0; } @@ -77,14 +77,14 @@ function loadModule_int32(stdlib, foreign, heap) { function do_sub_i(i) { i = i|0; var v = 0; - v = atomic_sub(i32a, i>>2, 148); + v = atomic_sub(i32a, i>>2, 148)|0; return v|0; } // AND 0x33333333 into element 30 function do_and() { var v = 0; - v = atomic_and(i32a, 30, 0x33333333); + v = atomic_and(i32a, 30, 0x33333333)|0; return v|0; } @@ -92,14 +92,14 @@ function loadModule_int32(stdlib, foreign, heap) { function do_and_i(i) { i = i|0; var v = 0; - v = atomic_and(i32a, i>>2, 0x33333333); + v = atomic_and(i32a, i>>2, 0x33333333)|0; return v|0; } // OR 0x33333333 into element 40 function do_or() { var v = 0; - v = atomic_or(i32a, 40, 0x33333333); + v = atomic_or(i32a, 40, 0x33333333)|0; return v|0; } @@ -107,14 +107,14 @@ function loadModule_int32(stdlib, foreign, heap) { function do_or_i(i) { i = i|0; var v = 0; - v = atomic_or(i32a, i>>2, 0x33333333); + v = atomic_or(i32a, i>>2, 0x33333333)|0; return v|0; } // XOR 0x33333333 into element 50 function do_xor() { var v = 0; - v = atomic_xor(i32a, 50, 0x33333333); + v = atomic_xor(i32a, 50, 0x33333333)|0; return v|0; } @@ -122,21 +122,21 @@ function loadModule_int32(stdlib, foreign, heap) { function do_xor_i(i) { i = i|0; var v = 0; - v = atomic_xor(i32a, i>>2, 0x33333333); + v = atomic_xor(i32a, i>>2, 0x33333333)|0; return v|0; } // CAS element 100: 0 -> -1 function do_cas1() { var v = 0; - v = atomic_cmpxchg(i32a, 100, 0, -1); + v = atomic_cmpxchg(i32a, 100, 0, -1)|0; return v|0; } // CAS element 100: -1 -> 0x5A5A5A5A function do_cas2() { var v = 0; - v = atomic_cmpxchg(i32a, 100, -1, 0x5A5A5A5A); + v = atomic_cmpxchg(i32a, 100, -1, 0x5A5A5A5A)|0; return v|0; } @@ -144,7 +144,7 @@ function loadModule_int32(stdlib, foreign, heap) { function do_cas1_i(i) { i = i|0; var v = 0; - v = atomic_cmpxchg(i32a, i>>2, 0, -1); + v = atomic_cmpxchg(i32a, i>>2, 0, -1)|0; return v|0; } @@ -152,7 +152,7 @@ function loadModule_int32(stdlib, foreign, heap) { function do_cas2_i(i) { i = i|0; var v = 0; - v = atomic_cmpxchg(i32a, i>>2, -1, 0x5A5A5A5A); + v = atomic_cmpxchg(i32a, i>>2, -1, 0x5A5A5A5A)|0; return v|0; } @@ -177,6 +177,9 @@ function loadModule_int32(stdlib, foreign, heap) { cas2_i: do_cas2_i }; } +if (isAsmJSCompilationAvailable()) + assertEq(isAsmJSModule(loadModule_int32), true); + function loadModule_int8(stdlib, foreign, heap) { "use asm"; @@ -194,7 +197,7 @@ function loadModule_int8(stdlib, foreign, heap) { // Load element 0 function do_load() { var v = 0; - v = atomic_load(i8a, 0); + v = atomic_load(i8a, 0)|0; return v|0; } @@ -202,14 +205,14 @@ function loadModule_int8(stdlib, foreign, heap) { function do_load_i(i) { i = i|0; var v = 0; - v = atomic_load(i8a, i); + v = atomic_load(i8a, i)|0; return v|0; } // Store 37 in element 0 function do_store() { var v = 0; - v = atomic_store(i8a, 0, 37); + v = atomic_store(i8a, 0, 37)|0; return v|0; } @@ -217,14 +220,14 @@ function loadModule_int8(stdlib, foreign, heap) { function do_store_i(i) { i = i|0; var v = 0; - v = atomic_store(i8a, i, 37); + v = atomic_store(i8a, i, 37)|0; return v|0; } // Add 37 to element 10 function do_add() { var v = 0; - v = atomic_add(i8a, 10, 37); + v = atomic_add(i8a, 10, 37)|0; return v|0; } @@ -232,14 +235,14 @@ function loadModule_int8(stdlib, foreign, heap) { function do_add_i(i) { i = i|0; var v = 0; - v = atomic_add(i8a, i, 37); + v = atomic_add(i8a, i, 37)|0; return v|0; } // Subtract 108 from element 20 function do_sub() { var v = 0; - v = atomic_sub(i8a, 20, 108); + v = atomic_sub(i8a, 20, 108)|0; return v|0; } @@ -247,14 +250,14 @@ function loadModule_int8(stdlib, foreign, heap) { function do_sub_i(i) { i = i|0; var v = 0; - v = atomic_sub(i8a, i, 108); + v = atomic_sub(i8a, i, 108)|0; return v|0; } // AND 0x33 into element 30 function do_and() { var v = 0; - v = atomic_and(i8a, 30, 0x33); + v = atomic_and(i8a, 30, 0x33)|0; return v|0; } @@ -262,14 +265,14 @@ function loadModule_int8(stdlib, foreign, heap) { function do_and_i(i) { i = i|0; var v = 0; - v = atomic_and(i8a, i, 0x33); + v = atomic_and(i8a, i, 0x33)|0; return v|0; } // OR 0x33 into element 40 function do_or() { var v = 0; - v = atomic_or(i8a, 40, 0x33); + v = atomic_or(i8a, 40, 0x33)|0; return v|0; } @@ -277,14 +280,14 @@ function loadModule_int8(stdlib, foreign, heap) { function do_or_i(i) { i = i|0; var v = 0; - v = atomic_or(i8a, i, 0x33); + v = atomic_or(i8a, i, 0x33)|0; return v|0; } // XOR 0x33 into element 50 function do_xor() { var v = 0; - v = atomic_xor(i8a, 50, 0x33); + v = atomic_xor(i8a, 50, 0x33)|0; return v|0; } @@ -292,21 +295,21 @@ function loadModule_int8(stdlib, foreign, heap) { function do_xor_i(i) { i = i|0; var v = 0; - v = atomic_xor(i8a, i, 0x33); + v = atomic_xor(i8a, i, 0x33)|0; return v|0; } // CAS element 100: 0 -> -1 function do_cas1() { var v = 0; - v = atomic_cmpxchg(i8a, 100, 0, -1); + v = atomic_cmpxchg(i8a, 100, 0, -1)|0; return v|0; } // CAS element 100: -1 -> 0x5A function do_cas2() { var v = 0; - v = atomic_cmpxchg(i8a, 100, -1, 0x5A); + v = atomic_cmpxchg(i8a, 100, -1, 0x5A)|0; return v|0; } @@ -314,7 +317,7 @@ function loadModule_int8(stdlib, foreign, heap) { function do_cas1_i(i) { i = i|0; var v = 0; - v = atomic_cmpxchg(i8a, i, 0, -1); + v = atomic_cmpxchg(i8a, i, 0, -1)|0; return v|0; } @@ -322,7 +325,7 @@ function loadModule_int8(stdlib, foreign, heap) { function do_cas2_i(i) { i = i|0; var v = 0; - v = atomic_cmpxchg(i8a, i, -1, 0x5A); + v = atomic_cmpxchg(i8a, i, -1, 0x5A)|0; return v|0; } @@ -346,6 +349,9 @@ function loadModule_int8(stdlib, foreign, heap) { cas2_i: do_cas2_i }; } +if (isAsmJSCompilationAvailable()) + assertEq(isAsmJSModule(loadModule_int8), true); + // TODO: halfword arrays // TODO: signed vs unsigned; negative results diff --git a/js/src/jit-test/tests/asm.js/testBug1155176.js b/js/src/jit-test/tests/asm.js/testBug1155176.js new file mode 100644 index 00000000000..ad545df84f3 --- /dev/null +++ b/js/src/jit-test/tests/asm.js/testBug1155176.js @@ -0,0 +1,49 @@ +if (!this.SharedArrayBuffer || !isAsmJSCompilationAvailable()) + quit(0); + +// The way this is constructed, either the first module does not +// verify as asm.js (if the >>>0 is left off, which was legal prior to +// bug 1155176), or the results of the two modules have to be equal. + +function m(stdlib, ffi, heap) { + "use asm"; + + var view = new stdlib.SharedUint32Array(heap); + var cas = stdlib.Atomics.compareExchange; + var hi = ffi.hi; + + function run() { + hi(+(cas(view, 37, 0, 0)>>>0)); + } + + return run; +} + +assertEq(isAsmJSModule(m), true); + +function nonm(stdlib, ffi, heap) { + + var view = new stdlib.SharedUint32Array(heap); + var cas = stdlib.Atomics.compareExchange; + var hi = ffi.hi; + + function run() { + hi(+cas(view, 37, 0, 0)); + } + + return run; +} + +var sab = new SharedArrayBuffer(65536); +var ua = new SharedUint32Array(sab); +var results = []; +var mrun = m(this, {hi: function (x) { results.push(x) }}, sab); +var nonmrun = nonm(this, {hi: function (x) { results.push(x) }}, sab); + +ua[37] = 0x80000001; + +mrun(); +nonmrun(); + +assertEq(results[0], ua[37]); +assertEq(results[0], results[1]); diff --git a/js/src/jit-test/tests/asm.js/testBug1164391.js b/js/src/jit-test/tests/asm.js/testBug1164391.js index 6acdd88b2c0..7a160753ba7 100644 --- a/js/src/jit-test/tests/asm.js/testBug1164391.js +++ b/js/src/jit-test/tests/asm.js/testBug1164391.js @@ -14,7 +14,10 @@ function m(stdlib, ffi, heap) { } return {add_sharedEv:add_sharedEv}; } + +if (isAsmJSCompilationAvailable()) + assertEq(isAsmJSModule(m), true); + var sab = new SharedArrayBuffer(65536); var {add_sharedEv} = m(this, {}, sab); add_sharedEv(sab.byteLength); - From 737be459978b8b5202ff3f2677e95b82a1a42341 Mon Sep 17 00:00:00 2001 From: Lars T Hansen Date: Wed, 1 Jul 2015 05:07:12 +0200 Subject: [PATCH 27/61] Bug 1175494 - comprehensive atomics tests for asm.js. r=luke --- js/src/jit-test/tests/asm.js/testAtomics.js | 1252 +++++++++++++++++-- 1 file changed, 1132 insertions(+), 120 deletions(-) diff --git a/js/src/jit-test/tests/asm.js/testAtomics.js b/js/src/jit-test/tests/asm.js/testAtomics.js index 0f988bb55ad..439cf96f1b2 100644 --- a/js/src/jit-test/tests/asm.js/testAtomics.js +++ b/js/src/jit-test/tests/asm.js/testAtomics.js @@ -2,6 +2,9 @@ if (!this.SharedArrayBuffer || !this.SharedInt32Array || !this.Atomics) quit(); +// The code duplication below is very far from elegant but provides +// flexibility that comes in handy several places. + function loadModule_int32(stdlib, foreign, heap) { "use asm"; @@ -180,6 +183,833 @@ function loadModule_int32(stdlib, foreign, heap) { if (isAsmJSCompilationAvailable()) assertEq(isAsmJSModule(loadModule_int32), true); +function test_int32(heap) { + var i32a = new SharedInt32Array(heap); + var i32m = loadModule_int32(this, {}, heap); + + var size = SharedInt32Array.BYTES_PER_ELEMENT; + + i32m.fence(); + + i32a[0] = 12345; + assertEq(i32m.load(), 12345); + assertEq(i32m.load_i(size*0), 12345); + + assertEq(i32m.store(), 37); + assertEq(i32a[0], 37); + assertEq(i32m.store_i(size*0), 37); + + i32a[10] = 18; + assertEq(i32m.add(), 18); + assertEq(i32a[10], 18+37); + assertEq(i32m.add_i(size*10), 18+37); + assertEq(i32a[10], 18+37+37); + + i32a[20] = 4972; + assertEq(i32m.sub(), 4972); + assertEq(i32a[20], 4972 - 148); + assertEq(i32m.sub_i(size*20), 4972 - 148); + assertEq(i32a[20], 4972 - 148 - 148); + + i32a[30] = 0x66666666; + assertEq(i32m.and(), 0x66666666); + assertEq(i32a[30], 0x22222222); + i32a[30] = 0x66666666; + assertEq(i32m.and_i(size*30), 0x66666666); + assertEq(i32a[30], 0x22222222); + + i32a[40] = 0x22222222; + assertEq(i32m.or(), 0x22222222); + assertEq(i32a[40], 0x33333333); + i32a[40] = 0x22222222; + assertEq(i32m.or_i(size*40), 0x22222222); + assertEq(i32a[40], 0x33333333); + + i32a[50] = 0x22222222; + assertEq(i32m.xor(), 0x22222222); + assertEq(i32a[50], 0x11111111); + i32a[50] = 0x22222222; + assertEq(i32m.xor_i(size*50), 0x22222222); + assertEq(i32a[50], 0x11111111); + + i32a[100] = 0; + assertEq(i32m.cas1(), 0); + assertEq(i32m.cas2(), -1); + assertEq(i32a[100], 0x5A5A5A5A); + + i32a[100] = 0; + assertEq(i32m.cas1_i(size*100), 0); + assertEq(i32m.cas2_i(size*100), -1); + assertEq(i32a[100], 0x5A5A5A5A); + + // Out-of-bounds accesses. + + var oob = (heap.byteLength * 2) & ~7; + + assertEq(i32m.cas1_i(oob), 0); + assertEq(i32m.cas2_i(oob), 0); + + assertEq(i32m.or_i(oob), 0); + assertEq(i32m.xor_i(oob), 0); + assertEq(i32m.and_i(oob), 0); + assertEq(i32m.add_i(oob), 0); + assertEq(i32m.sub_i(oob), 0); +} + +function loadModule_uint32(stdlib, foreign, heap) { + "use asm"; + + var atomic_fence = stdlib.Atomics.fence; + var atomic_load = stdlib.Atomics.load; + var atomic_store = stdlib.Atomics.store; + var atomic_cmpxchg = stdlib.Atomics.compareExchange; + var atomic_add = stdlib.Atomics.add; + var atomic_sub = stdlib.Atomics.sub; + var atomic_and = stdlib.Atomics.and; + var atomic_or = stdlib.Atomics.or; + var atomic_xor = stdlib.Atomics.xor; + + var i32a = new stdlib.SharedUint32Array(heap); + + // Load element 0 + function do_load() { + var v = 0; + v = atomic_load(i32a, 0)|0; + return +(v>>>0); + } + + // Load element i + function do_load_i(i) { + i = i|0; + var v = 0; + v = atomic_load(i32a, i>>2)|0; + return +(v>>>0); + } + + // Store 37 in element 0 + function do_store() { + var v = 0; + v = atomic_store(i32a, 0, 37)|0; + return +(v>>>0); + } + + // Store 37 in element i + function do_store_i(i) { + i = i|0; + var v = 0; + v = atomic_store(i32a, i>>2, 37)|0; + return +(v>>>0); + } + + // Add 37 to element 10 + function do_add() { + var v = 0; + v = atomic_add(i32a, 10, 37)|0; + return +(v>>>0); + } + + // Add 37 to element i + function do_add_i(i) { + i = i|0; + var v = 0; + v = atomic_add(i32a, i>>2, 37)|0; + return +(v>>>0); + } + + // Subtract 148 from element 20 + function do_sub() { + var v = 0; + v = atomic_sub(i32a, 20, 148)|0; + return +(v>>>0); + } + + // Subtract 148 from element i + function do_sub_i(i) { + i = i|0; + var v = 0; + v = atomic_sub(i32a, i>>2, 148)|0; + return +(v>>>0); + } + + // AND 0x33333333 into element 30 + function do_and() { + var v = 0; + v = atomic_and(i32a, 30, 0x33333333)|0; + return +(v>>>0); + } + + // AND 0x33333333 into element i + function do_and_i(i) { + i = i|0; + var v = 0; + v = atomic_and(i32a, i>>2, 0x33333333)|0; + return +(v>>>0); + } + + // OR 0x33333333 into element 40 + function do_or() { + var v = 0; + v = atomic_or(i32a, 40, 0x33333333)|0; + return +(v>>>0); + } + + // OR 0x33333333 into element i + function do_or_i(i) { + i = i|0; + var v = 0; + v = atomic_or(i32a, i>>2, 0x33333333)|0; + return +(v>>>0); + } + + // XOR 0x33333333 into element 50 + function do_xor() { + var v = 0; + v = atomic_xor(i32a, 50, 0x33333333)|0; + return +(v>>>0); + } + + // XOR 0x33333333 into element i + function do_xor_i(i) { + i = i|0; + var v = 0; + v = atomic_xor(i32a, i>>2, 0x33333333)|0; + return +(v>>>0); + } + + // CAS element 100: 0 -> -1 + function do_cas1() { + var v = 0; + v = atomic_cmpxchg(i32a, 100, 0, -1)|0; + return +(v>>>0); + } + + // CAS element 100: -1 -> 0x5A5A5A5A + function do_cas2() { + var v = 0; + v = atomic_cmpxchg(i32a, 100, -1, 0x5A5A5A5A)|0; + return +(v>>>0); + } + + // CAS element i: 0 -> -1 + function do_cas1_i(i) { + i = i|0; + var v = 0; + v = atomic_cmpxchg(i32a, i>>2, 0, -1)|0; + return +(v>>>0); + } + + // CAS element i: -1 -> 0x5A5A5A5A + function do_cas2_i(i) { + i = i|0; + var v = 0; + v = atomic_cmpxchg(i32a, i>>2, -1, 0x5A5A5A5A)|0; + return +(v>>>0); + } + + return { load: do_load, + load_i: do_load_i, + store: do_store, + store_i: do_store_i, + add: do_add, + add_i: do_add_i, + sub: do_sub, + sub_i: do_sub_i, + and: do_and, + and_i: do_and_i, + or: do_or, + or_i: do_or_i, + xor: do_xor, + xor_i: do_xor_i, + cas1: do_cas1, + cas2: do_cas2, + cas1_i: do_cas1_i, + cas2_i: do_cas2_i }; +} + +if (isAsmJSCompilationAvailable()) + assertEq(isAsmJSModule(loadModule_uint32), true); + +function test_uint32(heap) { + var i32a = new SharedUint32Array(heap); + var i32m = loadModule_uint32(this, {}, heap); + + var size = SharedUint32Array.BYTES_PER_ELEMENT; + + i32a[0] = 12345; + assertEq(i32m.load(), 12345); + assertEq(i32m.load_i(size*0), 12345); + + assertEq(i32m.store(), 37); + assertEq(i32a[0], 37); + assertEq(i32m.store_i(size*0), 37); + + i32a[10] = 18; + assertEq(i32m.add(), 18); + assertEq(i32a[10], 18+37); + assertEq(i32m.add_i(size*10), 18+37); + assertEq(i32a[10], 18+37+37); + + i32a[20] = 4972; + assertEq(i32m.sub(), 4972); + assertEq(i32a[20], 4972 - 148); + assertEq(i32m.sub_i(size*20), 4972 - 148); + assertEq(i32a[20], 4972 - 148 - 148); + + i32a[30] = 0x66666666; + assertEq(i32m.and(), 0x66666666); + assertEq(i32a[30], 0x22222222); + i32a[30] = 0x66666666; + assertEq(i32m.and_i(size*30), 0x66666666); + assertEq(i32a[30], 0x22222222); + + i32a[40] = 0x22222222; + assertEq(i32m.or(), 0x22222222); + assertEq(i32a[40], 0x33333333); + i32a[40] = 0x22222222; + assertEq(i32m.or_i(size*40), 0x22222222); + assertEq(i32a[40], 0x33333333); + + i32a[50] = 0x22222222; + assertEq(i32m.xor(), 0x22222222); + assertEq(i32a[50], 0x11111111); + i32a[50] = 0x22222222; + assertEq(i32m.xor_i(size*50), 0x22222222); + assertEq(i32a[50], 0x11111111); + + i32a[100] = 0; + assertEq(i32m.cas1(), 0); + assertEq(i32m.cas2(), 0xFFFFFFFF); + assertEq(i32a[100], 0x5A5A5A5A); + + i32a[100] = 0; + assertEq(i32m.cas1_i(size*100), 0); + assertEq(i32m.cas2_i(size*100), 0xFFFFFFFF); + assertEq(i32a[100], 0x5A5A5A5A); + + // Out-of-bounds accesses. + + var oob = (heap.byteLength * 2) & ~7; + + assertEq(i32m.cas1_i(oob), 0); + assertEq(i32m.cas2_i(oob), 0); + + assertEq(i32m.or_i(oob), 0); + assertEq(i32m.xor_i(oob), 0); + assertEq(i32m.and_i(oob), 0); + assertEq(i32m.add_i(oob), 0); + assertEq(i32m.sub_i(oob), 0); +} + +function loadModule_int16(stdlib, foreign, heap) { + "use asm"; + + var atomic_fence = stdlib.Atomics.fence; + var atomic_load = stdlib.Atomics.load; + var atomic_store = stdlib.Atomics.store; + var atomic_cmpxchg = stdlib.Atomics.compareExchange; + var atomic_add = stdlib.Atomics.add; + var atomic_sub = stdlib.Atomics.sub; + var atomic_and = stdlib.Atomics.and; + var atomic_or = stdlib.Atomics.or; + var atomic_xor = stdlib.Atomics.xor; + + var i16a = new stdlib.SharedInt16Array(heap); + + function do_fence() { + atomic_fence(); + } + + // Load element 0 + function do_load() { + var v = 0; + v = atomic_load(i16a, 0)|0; + return v|0; + } + + // Load element i + function do_load_i(i) { + i = i|0; + var v = 0; + v = atomic_load(i16a, i>>1)|0; + return v|0; + } + + // Store 37 in element 0 + function do_store() { + var v = 0; + v = atomic_store(i16a, 0, 37)|0; + return v|0; + } + + // Store 37 in element i + function do_store_i(i) { + i = i|0; + var v = 0; + v = atomic_store(i16a, i>>1, 37)|0; + return v|0; + } + + // Add 37 to element 10 + function do_add() { + var v = 0; + v = atomic_add(i16a, 10, 37)|0; + return v|0; + } + + // Add 37 to element i + function do_add_i(i) { + i = i|0; + var v = 0; + v = atomic_add(i16a, i>>1, 37)|0; + return v|0; + } + + // Subtract 148 from element 20 + function do_sub() { + var v = 0; + v = atomic_sub(i16a, 20, 148)|0; + return v|0; + } + + // Subtract 148 from element i + function do_sub_i(i) { + i = i|0; + var v = 0; + v = atomic_sub(i16a, i>>1, 148)|0; + return v|0; + } + + // AND 0x3333 into element 30 + function do_and() { + var v = 0; + v = atomic_and(i16a, 30, 0x3333)|0; + return v|0; + } + + // AND 0x3333 into element i + function do_and_i(i) { + i = i|0; + var v = 0; + v = atomic_and(i16a, i>>1, 0x3333)|0; + return v|0; + } + + // OR 0x3333 into element 40 + function do_or() { + var v = 0; + v = atomic_or(i16a, 40, 0x3333)|0; + return v|0; + } + + // OR 0x3333 into element i + function do_or_i(i) { + i = i|0; + var v = 0; + v = atomic_or(i16a, i>>1, 0x3333)|0; + return v|0; + } + + // XOR 0x3333 into element 50 + function do_xor() { + var v = 0; + v = atomic_xor(i16a, 50, 0x3333)|0; + return v|0; + } + + // XOR 0x3333 into element i + function do_xor_i(i) { + i = i|0; + var v = 0; + v = atomic_xor(i16a, i>>1, 0x3333)|0; + return v|0; + } + + // CAS element 100: 0 -> -1 + function do_cas1() { + var v = 0; + v = atomic_cmpxchg(i16a, 100, 0, -1)|0; + return v|0; + } + + // CAS element 100: -1 -> 0x5A5A + function do_cas2() { + var v = 0; + v = atomic_cmpxchg(i16a, 100, -1, 0x5A5A)|0; + return v|0; + } + + // CAS element i: 0 -> -1 + function do_cas1_i(i) { + i = i|0; + var v = 0; + v = atomic_cmpxchg(i16a, i>>1, 0, -1)|0; + return v|0; + } + + // CAS element i: -1 -> 0x5A5A + function do_cas2_i(i) { + i = i|0; + var v = 0; + v = atomic_cmpxchg(i16a, i>>1, -1, 0x5A5A)|0; + return v|0; + } + + return { fence: do_fence, + load: do_load, + load_i: do_load_i, + store: do_store, + store_i: do_store_i, + add: do_add, + add_i: do_add_i, + sub: do_sub, + sub_i: do_sub_i, + and: do_and, + and_i: do_and_i, + or: do_or, + or_i: do_or_i, + xor: do_xor, + xor_i: do_xor_i, + cas1: do_cas1, + cas2: do_cas2, + cas1_i: do_cas1_i, + cas2_i: do_cas2_i }; +} + +if (isAsmJSCompilationAvailable()) + assertEq(isAsmJSModule(loadModule_int16), true); + +function test_int16(heap) { + var i16a = new SharedInt16Array(heap); + var i16m = loadModule_int16(this, {}, heap); + + var size = SharedInt16Array.BYTES_PER_ELEMENT; + + i16m.fence(); + + i16a[0] = 12345; + assertEq(i16m.load(), 12345); + assertEq(i16m.load_i(size*0), 12345); + + i16a[0] = -38; + assertEq(i16m.load(), -38); + assertEq(i16m.load_i(size*0), -38); + + assertEq(i16m.store(), 37); + assertEq(i16a[0], 37); + assertEq(i16m.store_i(size*0), 37); + + i16a[10] = 18; + assertEq(i16m.add(), 18); + assertEq(i16a[10], 18+37); + assertEq(i16m.add_i(size*10), 18+37); + assertEq(i16a[10], 18+37+37); + + i16a[10] = -38; + assertEq(i16m.add(), -38); + assertEq(i16a[10], -38+37); + assertEq(i16m.add_i(size*10), -38+37); + assertEq(i16a[10], -38+37+37); + + i16a[20] = 4972; + assertEq(i16m.sub(), 4972); + assertEq(i16a[20], 4972 - 148); + assertEq(i16m.sub_i(size*20), 4972 - 148); + assertEq(i16a[20], 4972 - 148 - 148); + + i16a[30] = 0x6666; + assertEq(i16m.and(), 0x6666); + assertEq(i16a[30], 0x2222); + i16a[30] = 0x6666; + assertEq(i16m.and_i(size*30), 0x6666); + assertEq(i16a[30], 0x2222); + + i16a[40] = 0x2222; + assertEq(i16m.or(), 0x2222); + assertEq(i16a[40], 0x3333); + i16a[40] = 0x2222; + assertEq(i16m.or_i(size*40), 0x2222); + assertEq(i16a[40], 0x3333); + + i16a[50] = 0x2222; + assertEq(i16m.xor(), 0x2222); + assertEq(i16a[50], 0x1111); + i16a[50] = 0x2222; + assertEq(i16m.xor_i(size*50), 0x2222); + assertEq(i16a[50], 0x1111); + + i16a[100] = 0; + assertEq(i16m.cas1(), 0); + assertEq(i16m.cas2(), -1); + assertEq(i16a[100], 0x5A5A); + + i16a[100] = 0; + assertEq(i16m.cas1_i(size*100), 0); + assertEq(i16m.cas2_i(size*100), -1); + assertEq(i16a[100], 0x5A5A); + + var oob = (heap.byteLength * 2) & ~7; + + assertEq(i16m.cas1_i(oob), 0); + assertEq(i16m.cas2_i(oob), 0); + + assertEq(i16m.or_i(oob), 0); + assertEq(i16m.xor_i(oob), 0); + assertEq(i16m.and_i(oob), 0); + assertEq(i16m.add_i(oob), 0); + assertEq(i16m.sub_i(oob), 0); +} + +function loadModule_uint16(stdlib, foreign, heap) { + "use asm"; + + var atomic_load = stdlib.Atomics.load; + var atomic_store = stdlib.Atomics.store; + var atomic_cmpxchg = stdlib.Atomics.compareExchange; + var atomic_add = stdlib.Atomics.add; + var atomic_sub = stdlib.Atomics.sub; + var atomic_and = stdlib.Atomics.and; + var atomic_or = stdlib.Atomics.or; + var atomic_xor = stdlib.Atomics.xor; + + var i16a = new stdlib.SharedUint16Array(heap); + + // Load element 0 + function do_load() { + var v = 0; + v = atomic_load(i16a, 0)|0; + return v|0; + } + + // Load element i + function do_load_i(i) { + i = i|0; + var v = 0; + v = atomic_load(i16a, i>>1)|0; + return v|0; + } + + // Store 37 in element 0 + function do_store() { + var v = 0; + v = atomic_store(i16a, 0, 37)|0; + return v|0; + } + + // Store 37 in element i + function do_store_i(i) { + i = i|0; + var v = 0; + v = atomic_store(i16a, i>>1, 37)|0; + return v|0; + } + + // Add 37 to element 10 + function do_add() { + var v = 0; + v = atomic_add(i16a, 10, 37)|0; + return v|0; + } + + // Add 37 to element i + function do_add_i(i) { + i = i|0; + var v = 0; + v = atomic_add(i16a, i>>1, 37)|0; + return v|0; + } + + // Subtract 148 from element 20 + function do_sub() { + var v = 0; + v = atomic_sub(i16a, 20, 148)|0; + return v|0; + } + + // Subtract 148 from element i + function do_sub_i(i) { + i = i|0; + var v = 0; + v = atomic_sub(i16a, i>>1, 148)|0; + return v|0; + } + + // AND 0x3333 into element 30 + function do_and() { + var v = 0; + v = atomic_and(i16a, 30, 0x3333)|0; + return v|0; + } + + // AND 0x3333 into element i + function do_and_i(i) { + i = i|0; + var v = 0; + v = atomic_and(i16a, i>>1, 0x3333)|0; + return v|0; + } + + // OR 0x3333 into element 40 + function do_or() { + var v = 0; + v = atomic_or(i16a, 40, 0x3333)|0; + return v|0; + } + + // OR 0x3333 into element i + function do_or_i(i) { + i = i|0; + var v = 0; + v = atomic_or(i16a, i>>1, 0x3333)|0; + return v|0; + } + + // XOR 0x3333 into element 50 + function do_xor() { + var v = 0; + v = atomic_xor(i16a, 50, 0x3333)|0; + return v|0; + } + + // XOR 0x3333 into element i + function do_xor_i(i) { + i = i|0; + var v = 0; + v = atomic_xor(i16a, i>>1, 0x3333)|0; + return v|0; + } + + // CAS element 100: 0 -> -1 + function do_cas1() { + var v = 0; + v = atomic_cmpxchg(i16a, 100, 0, -1)|0; + return v|0; + } + + // CAS element 100: -1 -> 0x5A5A + function do_cas2() { + var v = 0; + v = atomic_cmpxchg(i16a, 100, -1, 0x5A5A)|0; + return v|0; + } + + // CAS element i: 0 -> -1 + function do_cas1_i(i) { + i = i|0; + var v = 0; + v = atomic_cmpxchg(i16a, i>>1, 0, -1)|0; + return v|0; + } + + // CAS element i: -1 -> 0x5A5A + function do_cas2_i(i) { + i = i|0; + var v = 0; + v = atomic_cmpxchg(i16a, i>>1, -1, 0x5A5A)|0; + return v|0; + } + + return { load: do_load, + load_i: do_load_i, + store: do_store, + store_i: do_store_i, + add: do_add, + add_i: do_add_i, + sub: do_sub, + sub_i: do_sub_i, + and: do_and, + and_i: do_and_i, + or: do_or, + or_i: do_or_i, + xor: do_xor, + xor_i: do_xor_i, + cas1: do_cas1, + cas2: do_cas2, + cas1_i: do_cas1_i, + cas2_i: do_cas2_i }; +} + +if (isAsmJSCompilationAvailable()) + assertEq(isAsmJSModule(loadModule_uint16), true); + +function test_uint16(heap) { + var i16a = new SharedUint16Array(heap); + var i16m = loadModule_uint16(this, {}, heap); + + var size = SharedUint16Array.BYTES_PER_ELEMENT; + + i16a[0] = 12345; + assertEq(i16m.load(), 12345); + assertEq(i16m.load_i(size*0), 12345); + + i16a[0] = -38; + assertEq(i16m.load(), (0x10000-38)); + assertEq(i16m.load_i(size*0), (0x10000-38)); + + assertEq(i16m.store(), 37); + assertEq(i16a[0], 37); + assertEq(i16m.store_i(size*0), 37); + + i16a[10] = 18; + assertEq(i16m.add(), 18); + assertEq(i16a[10], 18+37); + assertEq(i16m.add_i(size*10), 18+37); + assertEq(i16a[10], 18+37+37); + + i16a[10] = -38; + assertEq(i16m.add(), (0x10000-38)); + assertEq(i16a[10], (0x10000-38)+37); + assertEq(i16m.add_i(size*10), (0x10000-38)+37); + assertEq(i16a[10], ((0x10000-38)+37+37) & 0xFFFF); + + i16a[20] = 4972; + assertEq(i16m.sub(), 4972); + assertEq(i16a[20], 4972 - 148); + assertEq(i16m.sub_i(size*20), 4972 - 148); + assertEq(i16a[20], 4972 - 148 - 148); + + i16a[30] = 0x6666; + assertEq(i16m.and(), 0x6666); + assertEq(i16a[30], 0x2222); + i16a[30] = 0x6666; + assertEq(i16m.and_i(size*30), 0x6666); + assertEq(i16a[30], 0x2222); + + i16a[40] = 0x2222; + assertEq(i16m.or(), 0x2222); + assertEq(i16a[40], 0x3333); + i16a[40] = 0x2222; + assertEq(i16m.or_i(size*40), 0x2222); + assertEq(i16a[40], 0x3333); + + i16a[50] = 0x2222; + assertEq(i16m.xor(), 0x2222); + assertEq(i16a[50], 0x1111); + i16a[50] = 0x2222; + assertEq(i16m.xor_i(size*50), 0x2222); + assertEq(i16a[50], 0x1111); + + i16a[100] = 0; + assertEq(i16m.cas1(), 0); + assertEq(i16m.cas2(), -1 & 0xFFFF); + assertEq(i16a[100], 0x5A5A); + + i16a[100] = 0; + assertEq(i16m.cas1_i(size*100), 0); + assertEq(i16m.cas2_i(size*100), -1 & 0xFFFF); + assertEq(i16a[100], 0x5A5A); + + var oob = (heap.byteLength * 2) & ~7; + + assertEq(i16m.cas1_i(oob), 0); + assertEq(i16m.cas2_i(oob), 0); + + assertEq(i16m.or_i(oob), 0); + assertEq(i16m.xor_i(oob), 0); + assertEq(i16m.and_i(oob), 0); + assertEq(i16m.add_i(oob), 0); + assertEq(i16m.sub_i(oob), 0); +} + function loadModule_int8(stdlib, foreign, heap) { "use asm"; @@ -352,157 +1182,331 @@ function loadModule_int8(stdlib, foreign, heap) { if (isAsmJSCompilationAvailable()) assertEq(isAsmJSModule(loadModule_int8), true); -// TODO: halfword arrays -// TODO: signed vs unsigned; negative results +function test_int8(heap) { + var i8a = new SharedInt8Array(heap); + var i8m = loadModule_int8(this, {}, heap); -var heap = new SharedArrayBuffer(65536); + for ( var i=0 ; i < i8a.length ; i++ ) + i8a[i] = 0; -//////////////////////////////////////////////////////////// -// -// int32 tests + var size = SharedInt8Array.BYTES_PER_ELEMENT; -var i32a = new SharedInt32Array(heap); -var i32m = loadModule_int32(this, {}, heap); + i8a[0] = 123; + assertEq(i8m.load(), 123); + assertEq(i8m.load_i(0), 123); -var size = 4; + assertEq(i8m.store(), 37); + assertEq(i8a[0], 37); + assertEq(i8m.store_i(0), 37); -i32m.fence(); + i8a[10] = 18; + assertEq(i8m.add(), 18); + assertEq(i8a[10], 18+37); + assertEq(i8m.add_i(10), 18+37); + assertEq(i8a[10], 18+37+37); -i32a[0] = 12345; -assertEq(i32m.load(), 12345); -assertEq(i32m.load_i(size*0), 12345); + i8a[20] = 49; + assertEq(i8m.sub(), 49); + assertEq(i8a[20], 49 - 108); + assertEq(i8m.sub_i(20), 49 - 108); + assertEq(i8a[20], ((49 - 108 - 108) << 24) >> 24); // Byte, sign extended -assertEq(i32m.store(), 37); -assertEq(i32a[0], 37); -assertEq(i32m.store_i(size*0), 37); + i8a[30] = 0x66; + assertEq(i8m.and(), 0x66); + assertEq(i8a[30], 0x22); + i8a[30] = 0x66; + assertEq(i8m.and_i(30), 0x66); + assertEq(i8a[30], 0x22); -i32a[10] = 18; -assertEq(i32m.add(), 18); -assertEq(i32a[10], 18+37); -assertEq(i32m.add_i(size*10), 18+37); -assertEq(i32a[10], 18+37+37); + i8a[40] = 0x22; + assertEq(i8m.or(), 0x22); + assertEq(i8a[40], 0x33); + i8a[40] = 0x22; + assertEq(i8m.or_i(40), 0x22); + assertEq(i8a[40], 0x33); -i32a[20] = 4972; -assertEq(i32m.sub(), 4972); -assertEq(i32a[20], 4972 - 148); -assertEq(i32m.sub_i(size*20), 4972 - 148); -assertEq(i32a[20], 4972 - 148 - 148); + i8a[50] = 0x22; + assertEq(i8m.xor(), 0x22); + assertEq(i8a[50], 0x11); + i8a[50] = 0x22; + assertEq(i8m.xor_i(50), 0x22); + assertEq(i8a[50], 0x11); -i32a[30] = 0x66666666; -assertEq(i32m.and(), 0x66666666); -assertEq(i32a[30], 0x22222222); -i32a[30] = 0x66666666; -assertEq(i32m.and_i(size*30), 0x66666666); -assertEq(i32a[30], 0x22222222); + i8a[100] = 0; + assertEq(i8m.cas1(), 0); + assertEq(i8m.cas2(), -1); + assertEq(i8a[100], 0x5A); -i32a[40] = 0x22222222; -assertEq(i32m.or(), 0x22222222); -assertEq(i32a[40], 0x33333333); -i32a[40] = 0x22222222; -assertEq(i32m.or_i(size*40), 0x22222222); -assertEq(i32a[40], 0x33333333); + i8a[100] = 0; + assertEq(i8m.cas1_i(100), 0); + assertEq(i8m.cas2_i(100), -1); + assertEq(i8a[100], 0x5A); -i32a[50] = 0x22222222; -assertEq(i32m.xor(), 0x22222222); -assertEq(i32a[50], 0x11111111); -i32a[50] = 0x22222222; -assertEq(i32m.xor_i(size*50), 0x22222222); -assertEq(i32a[50], 0x11111111); + var oob = (heap.byteLength * 2) & ~7; -i32a[100] = 0; -assertEq(i32m.cas1(), 0); -assertEq(i32m.cas2(), -1); -assertEq(i32a[100], 0x5A5A5A5A); + assertEq(i8m.cas1_i(oob), 0); + assertEq(i8m.cas2_i(oob), 0); -i32a[100] = 0; -assertEq(i32m.cas1_i(size*100), 0); -assertEq(i32m.cas2_i(size*100), -1); -assertEq(i32a[100], 0x5A5A5A5A); + assertEq(i8m.or_i(oob), 0); + assertEq(i8m.xor_i(oob), 0); + assertEq(i8m.and_i(oob), 0); + assertEq(i8m.add_i(oob), 0); + assertEq(i8m.sub_i(oob), 0); +} -// Out-of-bounds accesses. +function loadModule_uint8(stdlib, foreign, heap) { + "use asm"; -assertEq(i32m.cas1_i(size*20000), 0); -assertEq(i32m.cas2_i(size*20000), 0); + var atomic_load = stdlib.Atomics.load; + var atomic_store = stdlib.Atomics.store; + var atomic_cmpxchg = stdlib.Atomics.compareExchange; + var atomic_add = stdlib.Atomics.add; + var atomic_sub = stdlib.Atomics.sub; + var atomic_and = stdlib.Atomics.and; + var atomic_or = stdlib.Atomics.or; + var atomic_xor = stdlib.Atomics.xor; -assertEq(i32m.or_i(size*20001), 0); -assertEq(i32m.xor_i(size*20001), 0); -assertEq(i32m.and_i(size*20001), 0); -assertEq(i32m.add_i(size*20001), 0); -assertEq(i32m.sub_i(size*20001), 0); + var i8a = new stdlib.SharedUint8Array(heap); -//////////////////////////////////////////////////////////// -// -// int8 tests + // Load element 0 + function do_load() { + var v = 0; + v = atomic_load(i8a, 0)|0; + return v|0; + } -var i8a = new SharedInt8Array(heap); -var i8m = loadModule_int8(this, {}, heap); + // Load element i + function do_load_i(i) { + i = i|0; + var v = 0; + v = atomic_load(i8a, i)|0; + return v|0; + } -for ( var i=0 ; i < i8a.length ; i++ ) - i8a[i] = 0; + // Store 37 in element 0 + function do_store() { + var v = 0; + v = atomic_store(i8a, 0, 37)|0; + return v|0; + } -var size = 1; + // Store 37 in element i + function do_store_i(i) { + i = i|0; + var v = 0; + v = atomic_store(i8a, i, 37)|0; + return v|0; + } -i8a[0] = 123; -assertEq(i8m.load(), 123); -assertEq(i8m.load_i(0), 123); + // Add 37 to element 10 + function do_add() { + var v = 0; + v = atomic_add(i8a, 10, 37)|0; + return v|0; + } -assertEq(i8m.store(), 37); -assertEq(i8a[0], 37); -assertEq(i8m.store_i(0), 37); + // Add 37 to element i + function do_add_i(i) { + i = i|0; + var v = 0; + v = atomic_add(i8a, i, 37)|0; + return v|0; + } -i8a[10] = 18; -assertEq(i8m.add(), 18); -assertEq(i8a[10], 18+37); -assertEq(i8m.add_i(10), 18+37); -assertEq(i8a[10], 18+37+37); + // Subtract 108 from element 20 + function do_sub() { + var v = 0; + v = atomic_sub(i8a, 20, 108)|0; + return v|0; + } -i8a[20] = 49; -assertEq(i8m.sub(), 49); -assertEq(i8a[20], 49 - 108); -assertEq(i8m.sub_i(20), 49 - 108); -assertEq(i8a[20], ((49 - 108 - 108) << 24) >> 24); // Byte, sign extended + // Subtract 108 from element i + function do_sub_i(i) { + i = i|0; + var v = 0; + v = atomic_sub(i8a, i, 108)|0; + return v|0; + } -i8a[30] = 0x66; -assertEq(i8m.and(), 0x66); -assertEq(i8a[30], 0x22); -i8a[30] = 0x66; -assertEq(i8m.and_i(30), 0x66); -assertEq(i8a[30], 0x22); + // AND 0x33 into element 30 + function do_and() { + var v = 0; + v = atomic_and(i8a, 30, 0x33)|0; + return v|0; + } -i8a[40] = 0x22; -assertEq(i8m.or(), 0x22); -assertEq(i8a[40], 0x33); -i8a[40] = 0x22; -assertEq(i8m.or_i(40), 0x22); -assertEq(i8a[40], 0x33); + // AND 0x33 into element i + function do_and_i(i) { + i = i|0; + var v = 0; + v = atomic_and(i8a, i, 0x33)|0; + return v|0; + } -i8a[50] = 0x22; -assertEq(i8m.xor(), 0x22); -assertEq(i8a[50], 0x11); -i8a[50] = 0x22; -assertEq(i8m.xor_i(50), 0x22); -assertEq(i8a[50], 0x11); + // OR 0x33 into element 40 + function do_or() { + var v = 0; + v = atomic_or(i8a, 40, 0x33)|0; + return v|0; + } -i8a[100] = 0; -assertEq(i8m.cas1(), 0); -assertEq(i8m.cas2(), -1); -assertEq(i8a[100], 0x5A); + // OR 0x33 into element i + function do_or_i(i) { + i = i|0; + var v = 0; + v = atomic_or(i8a, i, 0x33)|0; + return v|0; + } -i8a[100] = 0; -assertEq(i8m.cas1_i(100), 0); -assertEq(i8m.cas2_i(100), -1); -assertEq(i8a[100], 0x5A); + // XOR 0x33 into element 50 + function do_xor() { + var v = 0; + v = atomic_xor(i8a, 50, 0x33)|0; + return v|0; + } -// Out-of-bounds accesses. + // XOR 0x33 into element i + function do_xor_i(i) { + i = i|0; + var v = 0; + v = atomic_xor(i8a, i, 0x33)|0; + return v|0; + } -assertEq(i8m.cas1_i(80000), 0); -assertEq(i8m.cas2_i(80000), 0); + // CAS element 100: 0 -> -1 + function do_cas1() { + var v = 0; + v = atomic_cmpxchg(i8a, 100, 0, -1)|0; + return v|0; + } -assertEq(i8m.or_i(80001), 0); -assertEq(i8m.xor_i(80001), 0); -assertEq(i8m.and_i(80001), 0); -assertEq(i8m.add_i(80001), 0); -assertEq(i8m.sub_i(80001), 0); + // CAS element 100: -1 -> 0x5A + function do_cas2() { + var v = 0; + v = atomic_cmpxchg(i8a, 100, -1, 0x5A)|0; + return v|0; + } + + // CAS element i: 0 -> -1 + function do_cas1_i(i) { + i = i|0; + var v = 0; + v = atomic_cmpxchg(i8a, i, 0, -1)|0; + return v|0; + } + + // CAS element i: -1 -> 0x5A + function do_cas2_i(i) { + i = i|0; + var v = 0; + v = atomic_cmpxchg(i8a, i, -1, 0x5A)|0; + return v|0; + } + + return { load: do_load, + load_i: do_load_i, + store: do_store, + store_i: do_store_i, + add: do_add, + add_i: do_add_i, + sub: do_sub, + sub_i: do_sub_i, + and: do_and, + and_i: do_and_i, + or: do_or, + or_i: do_or_i, + xor: do_xor, + xor_i: do_xor_i, + cas1: do_cas1, + cas2: do_cas2, + cas1_i: do_cas1_i, + cas2_i: do_cas2_i }; +} + +if (isAsmJSCompilationAvailable()) + assertEq(isAsmJSModule(loadModule_uint8), true); + +function test_uint8(heap) { + var i8a = new SharedUint8Array(heap); + var i8m = loadModule_uint8(this, {}, heap); + + for ( var i=0 ; i < i8a.length ; i++ ) + i8a[i] = 0; + + var size = SharedUint8Array.BYTES_PER_ELEMENT; + + i8a[0] = 123; + assertEq(i8m.load(), 123); + assertEq(i8m.load_i(0), 123); + + i8a[0] = -38; + assertEq(i8m.load(), (0x100-38)); + assertEq(i8m.load_i(size*0), (0x100-38)); + + assertEq(i8m.store(), 37); + assertEq(i8a[0], 37); + assertEq(i8m.store_i(0), 37); + + i8a[10] = 18; + assertEq(i8m.add(), 18); + assertEq(i8a[10], 18+37); + assertEq(i8m.add_i(10), 18+37); + assertEq(i8a[10], 18+37+37); + + i8a[10] = -38; + assertEq(i8m.add(), (0x100-38)); + assertEq(i8a[10], (0x100-38)+37); + assertEq(i8m.add_i(size*10), (0x100-38)+37); + assertEq(i8a[10], ((0x100-38)+37+37) & 0xFF); + + i8a[20] = 49; + assertEq(i8m.sub(), 49); + assertEq(i8a[20], (49 - 108) & 255); + assertEq(i8m.sub_i(20), (49 - 108) & 255); + assertEq(i8a[20], (49 - 108 - 108) & 255); // Byte, zero extended + + i8a[30] = 0x66; + assertEq(i8m.and(), 0x66); + assertEq(i8a[30], 0x22); + i8a[30] = 0x66; + assertEq(i8m.and_i(30), 0x66); + assertEq(i8a[30], 0x22); + + i8a[40] = 0x22; + assertEq(i8m.or(), 0x22); + assertEq(i8a[40], 0x33); + i8a[40] = 0x22; + assertEq(i8m.or_i(40), 0x22); + assertEq(i8a[40], 0x33); + + i8a[50] = 0x22; + assertEq(i8m.xor(), 0x22); + assertEq(i8a[50], 0x11); + i8a[50] = 0x22; + assertEq(i8m.xor_i(50), 0x22); + assertEq(i8a[50], 0x11); + + i8a[100] = 0; + assertEq(i8m.cas1(), 0); + assertEq(i8m.cas2(), 255); + assertEq(i8a[100], 0x5A); + + i8a[100] = 0; + assertEq(i8m.cas1_i(100), 0); + assertEq(i8m.cas2_i(100), 255); + assertEq(i8a[100], 0x5A); + + var oob = (heap.byteLength * 2) & ~7; + + assertEq(i8m.cas1_i(oob), 0); + assertEq(i8m.cas2_i(oob), 0); + + assertEq(i8m.or_i(oob), 0); + assertEq(i8m.xor_i(oob), 0); + assertEq(i8m.and_i(oob), 0); + assertEq(i8m.add_i(oob), 0); + assertEq(i8m.sub_i(oob), 0); +} function loadModule_misc(stdlib, foreign, heap) { "use asm"; @@ -574,6 +1578,14 @@ function test_misc(heap) { assertEq(misc.ilf9(), 0); } -test_misc(heap); +// SharedUint8ClampedArray is not supported for asm.js. -print("Done"); +var heap = new SharedArrayBuffer(65536); + +test_int8(heap); +test_uint8(heap); +test_int16(heap); +test_uint16(heap); +test_int32(heap); +test_uint32(heap); +test_misc(heap); From d72a34a531877b8958746f960c77c235166a5c63 Mon Sep 17 00:00:00 2001 From: JW Wang Date: Tue, 30 Jun 2015 15:58:05 +0800 Subject: [PATCH 28/61] Bug 1178622 - Fix enum-to-string mismatch in MediaDecoder.cpp. r=jya. --- dom/media/MediaDecoder.cpp | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/dom/media/MediaDecoder.cpp b/dom/media/MediaDecoder.cpp index 7c8db919cf2..12a039f226c 100644 --- a/dom/media/MediaDecoder.cpp +++ b/dom/media/MediaDecoder.cpp @@ -58,15 +58,20 @@ PRLogModuleInfo* gMediaDecoderLog; #define DECODER_LOG(x, ...) \ MOZ_LOG(gMediaDecoderLog, LogLevel::Debug, ("Decoder=%p " x, this, ##__VA_ARGS__)) -static const char* const gPlayStateStr[] = { - "START", - "LOADING", - "PAUSED", - "PLAYING", - "SEEKING", - "ENDED", - "SHUTDOWN" -}; +static const char* +ToPlayStateStr(MediaDecoder::PlayState aState) +{ + switch (aState) { + case MediaDecoder::PLAY_STATE_START: return "START"; + case MediaDecoder::PLAY_STATE_LOADING: return "LOADING"; + case MediaDecoder::PLAY_STATE_PAUSED: return "PAUSED"; + case MediaDecoder::PLAY_STATE_PLAYING: return "PLAYING"; + case MediaDecoder::PLAY_STATE_ENDED: return "ENDED"; + case MediaDecoder::PLAY_STATE_SHUTDOWN: return "SHUTDOWN"; + default: MOZ_ASSERT_UNREACHABLE("Invalid playState."); + } + return "UNKNOWN"; +} class MediaMemoryTracker : public nsIMemoryReporter { @@ -1019,7 +1024,7 @@ void MediaDecoder::ChangeState(PlayState aState) } DECODER_LOG("ChangeState %s => %s", - gPlayStateStr[mPlayState], gPlayStateStr[aState]); + ToPlayStateStr(mPlayState), ToPlayStateStr(aState)); mPlayState = aState; if (mPlayState == PLAY_STATE_PLAYING) { From af040b1cc3811dc9ce136b0eb0fd58759792fbf8 Mon Sep 17 00:00:00 2001 From: JW Wang Date: Wed, 1 Jul 2015 11:46:38 +0800 Subject: [PATCH 29/61] Bug 1178680 - Remove MediaDecoder::Observe as we have MediaShutdownManager. r=cpearce. --- dom/media/MediaDecoder.cpp | 14 +------------- dom/media/MediaDecoder.h | 1 - 2 files changed, 1 insertion(+), 14 deletions(-) diff --git a/dom/media/MediaDecoder.cpp b/dom/media/MediaDecoder.cpp index 12a039f226c..08910fb9580 100644 --- a/dom/media/MediaDecoder.cpp +++ b/dom/media/MediaDecoder.cpp @@ -141,7 +141,7 @@ MediaDecoder::InitStatics() NS_IMPL_ISUPPORTS(MediaMemoryTracker, nsIMemoryReporter) -NS_IMPL_ISUPPORTS(MediaDecoder, nsIObserver) +NS_IMPL_ISUPPORTS0(MediaDecoder) void MediaDecoder::NotifyOwnerActivityChanged() { @@ -812,18 +812,6 @@ void MediaDecoder::PlaybackEnded() } } -NS_IMETHODIMP MediaDecoder::Observe(nsISupports *aSubjet, - const char *aTopic, - const char16_t *someData) -{ - MOZ_ASSERT(NS_IsMainThread()); - if (strcmp(aTopic, NS_XPCOM_SHUTDOWN_OBSERVER_ID) == 0) { - Shutdown(); - } - - return NS_OK; -} - MediaDecoder::Statistics MediaDecoder::GetStatistics() { diff --git a/dom/media/MediaDecoder.h b/dom/media/MediaDecoder.h index a92fa11d9cb..beb520c2b8f 100644 --- a/dom/media/MediaDecoder.h +++ b/dom/media/MediaDecoder.h @@ -277,7 +277,6 @@ public: typedef MediaPromise SeekPromise; NS_DECL_THREADSAFE_ISUPPORTS - NS_DECL_NSIOBSERVER // Enumeration for the valid play states (see mPlayState) enum PlayState { From 91656f1d39f2ca14858dee91195cd41beda197c7 Mon Sep 17 00:00:00 2001 From: Jan de Mooij Date: Tue, 30 Jun 2015 21:09:46 -0700 Subject: [PATCH 30/61] Bug 1177892 part 3 - Remove OBJECT_TO_JSVAL. r=evilpie --- dom/archivereader/ArchiveRequest.cpp | 2 +- dom/base/WebSocket.cpp | 2 +- dom/base/nsDOMClassInfo.cpp | 2 +- dom/base/nsDOMDataChannel.cpp | 2 +- dom/indexedDB/KeyPath.cpp | 2 +- dom/plugins/base/nsJSNPRuntime.cpp | 2 +- dom/workers/WorkerScope.cpp | 2 +- js/public/Value.h | 8 ---- js/src/ctypes/CTypes.cpp | 39 +++++++++---------- .../jsapi-tests/testAddPropertyPropcache.cpp | 4 +- .../testDefineGetterSetterNonEnumerable.cpp | 6 +-- js/src/jsapi-tests/testLooselyEqual.cpp | 2 +- js/src/jsapi-tests/testMappedArrayBuffer.cpp | 6 +-- js/src/jsapi-tests/testNewObject.cpp | 6 +-- js/src/jsapi-tests/testOps.cpp | 2 +- js/src/jsapi-tests/testParseJSON.cpp | 2 +- js/src/shell/js.cpp | 2 +- js/xpconnect/src/Sandbox.cpp | 2 +- js/xpconnect/src/XPCWrappedJSClass.cpp | 2 +- js/xpconnect/src/XPCWrappedNativeInfo.cpp | 2 +- js/xpconnect/src/XPCWrappedNativeJSOps.cpp | 4 +- js/xpconnect/wrappers/XrayWrapper.cpp | 4 +- storage/mozStorageAsyncStatementJSHelper.cpp | 2 +- storage/mozStorageStatementJSHelper.cpp | 4 +- storage/mozStorageStatementRow.cpp | 2 +- .../directory/nsDirectoryViewer.cpp | 2 +- 26 files changed, 53 insertions(+), 62 deletions(-) diff --git a/dom/archivereader/ArchiveRequest.cpp b/dom/archivereader/ArchiveRequest.cpp index b840a82b2d1..57ddfb90226 100644 --- a/dom/archivereader/ArchiveRequest.cpp +++ b/dom/archivereader/ArchiveRequest.cpp @@ -201,7 +201,7 @@ ArchiveRequest::GetFilenamesResult(JSContext* aCx, return NS_ERROR_FAILURE; } - *aValue = OBJECT_TO_JSVAL(array); + aValue->setObject(*array); return NS_OK; } diff --git a/dom/base/WebSocket.cpp b/dom/base/WebSocket.cpp index 1522c6549be..7cb0dc1d9e7 100644 --- a/dom/base/WebSocket.cpp +++ b/dom/base/WebSocket.cpp @@ -1764,7 +1764,7 @@ WebSocket::CreateAndDispatchMessageEvent(JSContext* aCx, nsresult rv = nsContentUtils::CreateArrayBuffer(aCx, aData, arrayBuf.address()); NS_ENSURE_SUCCESS(rv, rv); - jsData = OBJECT_TO_JSVAL(arrayBuf); + jsData.setObject(*arrayBuf); } else { NS_RUNTIMEABORT("Unknown binary type!"); return NS_ERROR_UNEXPECTED; diff --git a/dom/base/nsDOMClassInfo.cpp b/dom/base/nsDOMClassInfo.cpp index 25a78b517f2..f40283c3de9 100644 --- a/dom/base/nsDOMClassInfo.cpp +++ b/dom/base/nsDOMClassInfo.cpp @@ -1885,7 +1885,7 @@ ResolvePrototype(nsIXPConnect *aXPConnect, nsGlobalWindow *aWin, JSContext *cx, } } - v = OBJECT_TO_JSVAL(dot_prototype); + v.setObject(*dot_prototype); JSAutoCompartment ac(cx, class_obj); diff --git a/dom/base/nsDOMDataChannel.cpp b/dom/base/nsDOMDataChannel.cpp index 9a6765c715a..a96da7373ed 100644 --- a/dom/base/nsDOMDataChannel.cpp +++ b/dom/base/nsDOMDataChannel.cpp @@ -389,7 +389,7 @@ nsDOMDataChannel::DoOnMessageAvailable(const nsACString& aData, JS::Rooted arrayBuf(cx); rv = nsContentUtils::CreateArrayBuffer(cx, aData, arrayBuf.address()); NS_ENSURE_SUCCESS(rv, rv); - jsData = OBJECT_TO_JSVAL(arrayBuf); + jsData.setObject(*arrayBuf); } else { NS_RUNTIMEABORT("Unknown binary type!"); return NS_ERROR_UNEXPECTED; diff --git a/dom/indexedDB/KeyPath.cpp b/dom/indexedDB/KeyPath.cpp index 19c488c8d84..690540e8c6e 100644 --- a/dom/indexedDB/KeyPath.cpp +++ b/dom/indexedDB/KeyPath.cpp @@ -365,7 +365,7 @@ KeyPath::ExtractKeyAsJSVal(JSContext* aCx, const JS::Value& aValue, } } - *aOutVal = OBJECT_TO_JSVAL(arrayObj); + aOutVal->setObject(*arrayObj); return NS_OK; } diff --git a/dom/plugins/base/nsJSNPRuntime.cpp b/dom/plugins/base/nsJSNPRuntime.cpp index c0dd8ca3c00..4bf5b38d1f8 100644 --- a/dom/plugins/base/nsJSNPRuntime.cpp +++ b/dom/plugins/base/nsJSNPRuntime.cpp @@ -521,7 +521,7 @@ NPVariantToJSVal(NPP npp, JSContext *cx, const NPVariant *variant) nsNPObjWrapper::GetNewOrUsed(npp, cx, NPVARIANT_TO_OBJECT(*variant)); if (obj) { - return OBJECT_TO_JSVAL(obj); + return JS::ObjectValue(*obj); } } diff --git a/dom/workers/WorkerScope.cpp b/dom/workers/WorkerScope.cpp index 98dde6b188f..d10decd96da 100644 --- a/dom/workers/WorkerScope.cpp +++ b/dom/workers/WorkerScope.cpp @@ -742,7 +742,7 @@ workerdebuggersandbox_convert(JSContext *cx, JS::Handle obj, JSType type, JS::MutableHandle vp) { if (type == JSTYPE_OBJECT) { - vp.set(OBJECT_TO_JSVAL(obj)); + vp.setObject(*obj); return true; } diff --git a/js/public/Value.h b/js/public/Value.h index c775ffc4698..4c769e04910 100644 --- a/js/public/Value.h +++ b/js/public/Value.h @@ -1971,14 +1971,6 @@ UINT_TO_JSVAL(uint32_t i) : DOUBLE_TO_JSVAL((double)i); } -static inline jsval -OBJECT_TO_JSVAL(JSObject* obj) -{ - if (obj) - return IMPL_TO_JSVAL(OBJECT_TO_JSVAL_IMPL(obj)); - return IMPL_TO_JSVAL(BUILD_JSVAL(JSVAL_TAG_NULL, 0)); -} - namespace JS { extern JS_PUBLIC_DATA(const HandleValue) NullHandleValue; diff --git a/js/src/ctypes/CTypes.cpp b/js/src/ctypes/CTypes.cpp index ccc3a20fc77..537959d0deb 100644 --- a/js/src/ctypes/CTypes.cpp +++ b/js/src/ctypes/CTypes.cpp @@ -1645,7 +1645,7 @@ InitTypeConstructor(JSContext* cx, // Stash ctypes.{Pointer,Array,Struct}Type.prototype on a reserved slot of // the type constructor, for faster lookup. - js::SetFunctionNativeReserved(obj, SLOT_FN_CTORPROTO, OBJECT_TO_JSVAL(typeProto)); + js::SetFunctionNativeReserved(obj, SLOT_FN_CTORPROTO, ObjectValue(*typeProto)); // Create an object to serve as the common ancestor for all CData objects // created from the given type constructor. This has ctypes.CData.prototype @@ -1665,7 +1665,7 @@ InitTypeConstructor(JSContext* cx, return false; // Link the type prototype to the data prototype. - JS_SetReservedSlot(typeProto, SLOT_OURDATAPROTO, OBJECT_TO_JSVAL(dataProto)); + JS_SetReservedSlot(typeProto, SLOT_OURDATAPROTO, ObjectValue(*dataProto)); if (!JS_FreezeObject(cx, obj) || //!JS_FreezeObject(cx, dataProto) || // XXX fixme - see bug 541212! @@ -1702,8 +1702,7 @@ InitInt64Class(JSContext* cx, if (!fun) return nullptr; - js::SetFunctionNativeReserved(fun, SLOT_FN_INT64PROTO, - OBJECT_TO_JSVAL(prototype)); + js::SetFunctionNativeReserved(fun, SLOT_FN_INT64PROTO, ObjectValue(*prototype)); if (!JS_FreezeObject(cx, ctor)) return nullptr; @@ -1720,7 +1719,7 @@ AttachProtos(JSObject* proto, const AutoObjectVector& protos) // to the appropriate CTypeProtoSlot. (SLOT_CTYPES is the last slot // of [[Class]] "CTypeProto" that we fill in this automated manner.) for (uint32_t i = 0; i <= SLOT_CTYPES; ++i) - JS_SetReservedSlot(proto, i, OBJECT_TO_JSVAL(protos[i])); + JS_SetReservedSlot(proto, i, ObjectOrNullValue(protos[i])); } static bool @@ -1759,7 +1758,7 @@ InitTypeClasses(JSContext* cx, HandleObject ctypesObj) return false; // Link CTypeProto to CDataProto. - JS_SetReservedSlot(CTypeProto, SLOT_OURDATAPROTO, OBJECT_TO_JSVAL(CDataProto)); + JS_SetReservedSlot(CTypeProto, SLOT_OURDATAPROTO, ObjectValue(*CDataProto)); // Create and attach the special class constructors: ctypes.PointerType, // ctypes.ArrayType, ctypes.StructType, and ctypes.FunctionType. @@ -3948,7 +3947,7 @@ CType::Create(JSContext* cx, // Set the 'prototype' object. //if (!JS_FreezeObject(cx, prototype)) // XXX fixme - see bug 541212! // return nullptr; - JS_SetReservedSlot(typeObj, SLOT_PROTO, OBJECT_TO_JSVAL(prototype)); + JS_SetReservedSlot(typeObj, SLOT_PROTO, ObjectValue(*prototype)); } if (!JS_FreezeObject(cx, typeObj)) @@ -4630,10 +4629,10 @@ PointerType::CreateInternal(JSContext* cx, HandleObject baseType) return nullptr; // Set the target type. (This will be 'null' for an opaque pointer type.) - JS_SetReservedSlot(typeObj, SLOT_TARGET_T, OBJECT_TO_JSVAL(baseType)); + JS_SetReservedSlot(typeObj, SLOT_TARGET_T, ObjectValue(*baseType)); // Finally, cache our newly-created PointerType on our pointed-to CType. - JS_SetReservedSlot(baseType, SLOT_PTR, OBJECT_TO_JSVAL(typeObj)); + JS_SetReservedSlot(baseType, SLOT_PTR, ObjectValue(*typeObj)); return typeObj; } @@ -4955,7 +4954,7 @@ ArrayType::CreateInternal(JSContext* cx, return nullptr; // Set the element type. - JS_SetReservedSlot(typeObj, SLOT_ELEMENT_T, OBJECT_TO_JSVAL(baseType)); + JS_SetReservedSlot(typeObj, SLOT_ELEMENT_T, ObjectValue(*baseType)); // Set the length. JS_SetReservedSlot(typeObj, SLOT_LENGTH, lengthVal); @@ -5642,7 +5641,7 @@ StructType::DefineInternal(JSContext* cx, JSObject* typeObj_, JSObject* fieldsOb JS_SetReservedSlot(typeObj, SLOT_ALIGN, INT_TO_JSVAL(structAlign)); //if (!JS_FreezeObject(cx, prototype)0 // XXX fixme - see bug 541212! // return false; - JS_SetReservedSlot(typeObj, SLOT_PROTO, OBJECT_TO_JSVAL(prototype)); + JS_SetReservedSlot(typeObj, SLOT_PROTO, ObjectValue(*prototype)); return true; } @@ -5922,7 +5921,7 @@ StructType::FieldsArrayGetter(JSContext* cx, JS::CallArgs args) JSObject* fields = BuildFieldsArray(cx, obj); if (!fields) return false; - JS_SetReservedSlot(obj, SLOT_FIELDS, OBJECT_TO_JSVAL(fields)); + JS_SetReservedSlot(obj, SLOT_FIELDS, ObjectValue(*fields)); args.rval().setObject(*fields); } @@ -6207,7 +6206,7 @@ PrepareCIF(JSContext* cx, FunctionInfo* fninfo) { ffi_abi abi; - if (!GetABI(cx, OBJECT_TO_JSVAL(fninfo->mABI), &abi)) { + if (!GetABI(cx, ObjectOrNullValue(fninfo->mABI), &abi)) { JS_ReportError(cx, "Invalid ABI specification"); return false; } @@ -6473,7 +6472,7 @@ FunctionType::ConstructData(JSContext* cx, return false; // Set the closure object as the referent of the new CData object. - JS_SetReservedSlot(dataObj, SLOT_REFERENT, OBJECT_TO_JSVAL(closureObj)); + JS_SetReservedSlot(dataObj, SLOT_REFERENT, ObjectValue(*closureObj)); // Seal the CData object, to prevent modification of the function pointer. // This permanently associates this object with the closure, and avoids @@ -6595,7 +6594,7 @@ FunctionType::Call(JSContext* cx, return false; } if (!(type = CData::GetCType(obj)) || - !(type = PrepareType(cx, OBJECT_TO_JSVAL(type))) || + !(type = PrepareType(cx, ObjectValue(*type))) || // Relying on ImplicitConvert only for the limited purpose of // converting one CType to another (e.g., T[] to T*). !ConvertArgument(cx, obj, i, args[i], type, &values[i], &strings) || @@ -7079,11 +7078,11 @@ CData::Create(JSContext* cx, return nullptr; // set the CData's associated type - JS_SetReservedSlot(dataObj, SLOT_CTYPE, OBJECT_TO_JSVAL(typeObj)); + JS_SetReservedSlot(dataObj, SLOT_CTYPE, ObjectValue(*typeObj)); // Stash the referent object, if any, for GC safety. if (refObj) - JS_SetReservedSlot(dataObj, SLOT_REFERENT, OBJECT_TO_JSVAL(refObj)); + JS_SetReservedSlot(dataObj, SLOT_REFERENT, ObjectValue(*refObj)); // Set our ownership flag. JS_SetReservedSlot(dataObj, SLOT_OWNS, BooleanValue(ownResult)); @@ -7777,15 +7776,15 @@ CDataFinalizer::Construct(JSContext* cx, unsigned argc, jsval* vp) // Used by GetCType JS_SetReservedSlot(objResult, SLOT_DATAFINALIZER_VALTYPE, - OBJECT_TO_JSVAL(objBestArgType)); + ObjectOrNullValue(objBestArgType)); // Used by ToSource JS_SetReservedSlot(objResult, SLOT_DATAFINALIZER_CODETYPE, - OBJECT_TO_JSVAL(objCodePtrType)); + ObjectValue(*objCodePtrType)); ffi_abi abi; - if (!GetABI(cx, OBJECT_TO_JSVAL(funInfoFinalizer->mABI), &abi)) { + if (!GetABI(cx, ObjectOrNullValue(funInfoFinalizer->mABI), &abi)) { JS_ReportError(cx, "Internal Error: " "Invalid ABI specification in CDataFinalizer"); return false; diff --git a/js/src/jsapi-tests/testAddPropertyPropcache.cpp b/js/src/jsapi-tests/testAddPropertyPropcache.cpp index d3212fdcd5f..a079c8c437d 100644 --- a/js/src/jsapi-tests/testAddPropertyPropcache.cpp +++ b/js/src/jsapi-tests/testAddPropertyPropcache.cpp @@ -32,13 +32,13 @@ BEGIN_TEST(testAddPropertyHook) JS::RootedObject obj(cx, JS_NewPlainObject(cx)); CHECK(obj); - JS::RootedValue proto(cx, OBJECT_TO_JSVAL(obj)); + JS::RootedValue proto(cx, JS::ObjectValue(*obj)); JS_InitClass(cx, global, obj, &AddPropertyClass, nullptr, 0, nullptr, nullptr, nullptr, nullptr); obj = JS_NewArrayObject(cx, 0); CHECK(obj); - JS::RootedValue arr(cx, OBJECT_TO_JSVAL(obj)); + JS::RootedValue arr(cx, JS::ObjectValue(*obj)); CHECK(JS_DefineProperty(cx, global, "arr", arr, JSPROP_ENUMERATE, diff --git a/js/src/jsapi-tests/testDefineGetterSetterNonEnumerable.cpp b/js/src/jsapi-tests/testDefineGetterSetterNonEnumerable.cpp index 58729b44ca5..adf2bffc41b 100644 --- a/js/src/jsapi-tests/testDefineGetterSetterNonEnumerable.cpp +++ b/js/src/jsapi-tests/testDefineGetterSetterNonEnumerable.cpp @@ -20,17 +20,17 @@ BEGIN_TEST(testDefineGetterSetterNonEnumerable) JS::RootedValue vobj(cx); JS::RootedObject obj(cx, JS_NewPlainObject(cx)); CHECK(obj); - vobj = OBJECT_TO_JSVAL(obj); + vobj.setObject(*obj); JSFunction* funGet = JS_NewFunction(cx, NativeGetterSetter, 0, 0, "get"); CHECK(funGet); JS::RootedObject funGetObj(cx, JS_GetFunctionObject(funGet)); - JS::RootedValue vget(cx, OBJECT_TO_JSVAL(funGetObj)); + JS::RootedValue vget(cx, JS::ObjectValue(*funGetObj)); JSFunction* funSet = JS_NewFunction(cx, NativeGetterSetter, 1, 0, "set"); CHECK(funSet); JS::RootedObject funSetObj(cx, JS_GetFunctionObject(funSet)); - JS::RootedValue vset(cx, OBJECT_TO_JSVAL(funSetObj)); + JS::RootedValue vset(cx, JS::ObjectValue(*funSetObj)); JS::RootedObject vObject(cx, vobj.toObjectOrNull()); CHECK(JS_DefineProperty(cx, vObject, PROPERTY_NAME, diff --git a/js/src/jsapi-tests/testLooselyEqual.cpp b/js/src/jsapi-tests/testLooselyEqual.cpp index 0bbca1a7ae1..4e4ffa27c7b 100644 --- a/js/src/jsapi-tests/testLooselyEqual.cpp +++ b/js/src/jsapi-tests/testLooselyEqual.cpp @@ -57,7 +57,7 @@ struct LooseEqualityData i42 = INT_TO_JSVAL(42); undef = JS::UndefinedValue(); null = JS::NullValue(); - obj = OBJECT_TO_JSVAL(JS::CurrentGlobalOrNull(cx)); + obj = JS::ObjectOrNullValue(JS::CurrentGlobalOrNull(cx)); poszero = DOUBLE_TO_JSVAL(0.0); negzero = DOUBLE_TO_JSVAL(-0.0); #ifdef XP_WIN diff --git a/js/src/jsapi-tests/testMappedArrayBuffer.cpp b/js/src/jsapi-tests/testMappedArrayBuffer.cpp index 0e858e714a1..fe598ca10a8 100644 --- a/js/src/jsapi-tests/testMappedArrayBuffer.cpp +++ b/js/src/jsapi-tests/testMappedArrayBuffer.cpp @@ -125,7 +125,7 @@ bool TestCloneObject() JS::RootedObject obj1(cx, CreateNewObject(8, 12)); CHECK(obj1); JSAutoStructuredCloneBuffer cloned_buffer; - JS::RootedValue v1(cx, OBJECT_TO_JSVAL(obj1)); + JS::RootedValue v1(cx, JS::ObjectValue(*obj1)); const JSStructuredCloneCallbacks* callbacks = js::GetContextStructuredCloneCallbacks(cx); CHECK(cloned_buffer.write(cx, v1, callbacks, nullptr)); JS::RootedValue v2(cx); @@ -152,14 +152,14 @@ bool TestTransferObject() { JS::RootedObject obj1(cx, CreateNewObject(8, 12)); CHECK(obj1); - JS::RootedValue v1(cx, OBJECT_TO_JSVAL(obj1)); + JS::RootedValue v1(cx, JS::ObjectValue(*obj1)); // Create an Array of transferable values. JS::AutoValueVector argv(cx); argv.append(v1); JS::RootedObject obj(cx, JS_NewArrayObject(cx, JS::HandleValueArray::subarray(argv, 0, 1))); CHECK(obj); - JS::RootedValue transferable(cx, OBJECT_TO_JSVAL(obj)); + JS::RootedValue transferable(cx, JS::ObjectValue(*obj)); JSAutoStructuredCloneBuffer cloned_buffer; const JSStructuredCloneCallbacks* callbacks = js::GetContextStructuredCloneCallbacks(cx); diff --git a/js/src/jsapi-tests/testNewObject.cpp b/js/src/jsapi-tests/testNewObject.cpp index 77c115d9be8..52b97f86002 100644 --- a/js/src/jsapi-tests/testNewObject.cpp +++ b/js/src/jsapi-tests/testNewObject.cpp @@ -75,7 +75,7 @@ BEGIN_TEST(testNewObject_1) argv[0].setInt32(4); obj = JS_New(cx, Array, JS::HandleValueArray::subarray(argv, 0, 1)); CHECK(obj); - rt = OBJECT_TO_JSVAL(obj); + rt = JS::ObjectValue(*obj); CHECK(JS_IsArrayObject(cx, obj)); CHECK(JS_GetArrayLength(cx, obj, &len)); CHECK_EQUAL(len, 4u); @@ -85,7 +85,7 @@ BEGIN_TEST(testNewObject_1) argv[i].setInt32(i); obj = JS_New(cx, Array, JS::HandleValueArray::subarray(argv, 0, N)); CHECK(obj); - rt = OBJECT_TO_JSVAL(obj); + rt = JS::ObjectValue(*obj); CHECK(JS_IsArrayObject(cx, obj)); CHECK(JS_GetArrayLength(cx, obj, &len)); CHECK_EQUAL(len, N); @@ -102,7 +102,7 @@ BEGIN_TEST(testNewObject_1) }; JS::RootedObject ctor(cx, JS_NewObject(cx, &cls)); CHECK(ctor); - JS::RootedValue rt2(cx, OBJECT_TO_JSVAL(ctor)); + JS::RootedValue rt2(cx, JS::ObjectValue(*ctor)); obj = JS_New(cx, ctor, JS::HandleValueArray::subarray(argv, 0, 3)); CHECK(obj); CHECK(JS_GetElement(cx, ctor, 0, &v)); diff --git a/js/src/jsapi-tests/testOps.cpp b/js/src/jsapi-tests/testOps.cpp index c3030f7c619..a1fe6a18d26 100644 --- a/js/src/jsapi-tests/testOps.cpp +++ b/js/src/jsapi-tests/testOps.cpp @@ -34,7 +34,7 @@ createMyObject(JSContext* context, unsigned argc, jsval* vp) //JS_GC(context); //<- if we make GC here, all is ok JSObject* myObject = JS_NewObject(context, &myClass); - *vp = OBJECT_TO_JSVAL(myObject); + *vp = JS::ObjectOrNullValue(myObject); JS_EndRequest(context); diff --git a/js/src/jsapi-tests/testParseJSON.cpp b/js/src/jsapi-tests/testParseJSON.cpp index 142b2090a7e..d70e065ada9 100644 --- a/js/src/jsapi-tests/testParseJSON.cpp +++ b/js/src/jsapi-tests/testParseJSON.cpp @@ -341,7 +341,7 @@ BEGIN_TEST(testParseJSON_reviver) JSFunction* fun = JS_NewFunction(cx, Censor, 0, 0, "censor"); CHECK(fun); - JS::RootedValue filter(cx, OBJECT_TO_JSVAL(JS_GetFunctionObject(fun))); + JS::RootedValue filter(cx, JS::ObjectValue(*JS_GetFunctionObject(fun))); CHECK(TryParse(cx, "true", filter)); CHECK(TryParse(cx, "false", filter)); diff --git a/js/src/shell/js.cpp b/js/src/shell/js.cpp index 82800cf1104..bf5854afd0e 100644 --- a/js/src/shell/js.cpp +++ b/js/src/shell/js.cpp @@ -990,7 +990,7 @@ CacheEntry_setBytecode(JSContext* cx, HandleObject cache, uint8_t* buffer, uint3 if (!arrayBuffer) return false; - SetReservedSlot(cache, CacheEntry_BYTECODE, OBJECT_TO_JSVAL(arrayBuffer)); + SetReservedSlot(cache, CacheEntry_BYTECODE, ObjectValue(*arrayBuffer)); return true; } diff --git a/js/xpconnect/src/Sandbox.cpp b/js/xpconnect/src/Sandbox.cpp index 402afcd03a6..23cfd653e96 100644 --- a/js/xpconnect/src/Sandbox.cpp +++ b/js/xpconnect/src/Sandbox.cpp @@ -440,7 +440,7 @@ static bool sandbox_convert(JSContext* cx, HandleObject obj, JSType type, MutableHandleValue vp) { if (type == JSTYPE_OBJECT) { - vp.set(OBJECT_TO_JSVAL(obj)); + vp.setObject(*obj); return true; } diff --git a/js/xpconnect/src/XPCWrappedJSClass.cpp b/js/xpconnect/src/XPCWrappedJSClass.cpp index 6209a81cc35..1619108c4cb 100644 --- a/js/xpconnect/src/XPCWrappedJSClass.cpp +++ b/js/xpconnect/src/XPCWrappedJSClass.cpp @@ -1189,7 +1189,7 @@ nsXPCWrappedJSClass::CallMethod(nsXPCWrappedJS* wrapper, uint16_t methodIndex, goto pre_call_clean_up; } } - *sp++ = OBJECT_TO_JSVAL(out_obj); + *sp++ = JS::ObjectValue(*out_obj); } else *sp++ = val; } diff --git a/js/xpconnect/src/XPCWrappedNativeInfo.cpp b/js/xpconnect/src/XPCWrappedNativeInfo.cpp index 0ee6778b0a6..edb47eda924 100644 --- a/js/xpconnect/src/XPCWrappedNativeInfo.cpp +++ b/js/xpconnect/src/XPCWrappedNativeInfo.cpp @@ -99,7 +99,7 @@ XPCNativeMember::Resolve(XPCCallContext& ccx, XPCNativeInterface* iface, js::SetFunctionNativeReserved(funobj, XPC_FUNCTION_PARENT_OBJECT_SLOT, ObjectValue(*parent)); - *vp = OBJECT_TO_JSVAL(funobj); + vp->setObject(*funobj); return true; } diff --git a/js/xpconnect/src/XPCWrappedNativeJSOps.cpp b/js/xpconnect/src/XPCWrappedNativeJSOps.cpp index 8808a55e0d6..a3f62b34098 100644 --- a/js/xpconnect/src/XPCWrappedNativeJSOps.cpp +++ b/js/xpconnect/src/XPCWrappedNativeJSOps.cpp @@ -454,7 +454,7 @@ static bool XPC_WN_Shared_Convert(JSContext* cx, HandleObject obj, JSType type, MutableHandleValue vp) { if (type == JSTYPE_OBJECT) { - vp.set(OBJECT_TO_JSVAL(obj)); + vp.setObject(*obj); return true; } @@ -469,7 +469,7 @@ XPC_WN_Shared_Convert(JSContext* cx, HandleObject obj, JSType type, MutableHandl XPCNativeScriptableInfo* si = wrapper->GetScriptableInfo(); if (si && (si->GetFlags().WantCall() || si->GetFlags().WantConstruct())) { - vp.set(OBJECT_TO_JSVAL(obj)); + vp.setObject(*obj); return true; } } diff --git a/js/xpconnect/wrappers/XrayWrapper.cpp b/js/xpconnect/wrappers/XrayWrapper.cpp index 63b05588af8..58ee6f67c3c 100644 --- a/js/xpconnect/wrappers/XrayWrapper.cpp +++ b/js/xpconnect/wrappers/XrayWrapper.cpp @@ -1002,7 +1002,7 @@ XrayTraits::attachExpandoObject(JSContext* cx, HandleObject target, // Note the exclusive global, if any. JS_SetReservedSlot(expandoObject, JSSLOT_EXPANDO_EXCLUSIVE_GLOBAL, - OBJECT_TO_JSVAL(exclusiveGlobal)); + ObjectOrNullValue(exclusiveGlobal)); // If this is our first expando object, take the opportunity to preserve // the wrapper. This keeps our expandos alive even if the Xray wrapper gets @@ -1012,7 +1012,7 @@ XrayTraits::attachExpandoObject(JSContext* cx, HandleObject target, preserveWrapper(target); // Insert it at the front of the chain. - JS_SetReservedSlot(expandoObject, JSSLOT_EXPANDO_NEXT, OBJECT_TO_JSVAL(chain)); + JS_SetReservedSlot(expandoObject, JSSLOT_EXPANDO_NEXT, ObjectOrNullValue(chain)); setExpandoChain(cx, target, expandoObject); return expandoObject; diff --git a/storage/mozStorageAsyncStatementJSHelper.cpp b/storage/mozStorageAsyncStatementJSHelper.cpp index f8efa16283d..1eac10796d7 100644 --- a/storage/mozStorageAsyncStatementJSHelper.cpp +++ b/storage/mozStorageAsyncStatementJSHelper.cpp @@ -66,7 +66,7 @@ AsyncStatementJSHelper::getParams(AsyncStatement *aStatement, obj = aStatement->mStatementParamsHolder->GetJSObject(); NS_ENSURE_STATE(obj); - *_params = OBJECT_TO_JSVAL(obj); + _params->setObject(*obj); return NS_OK; } diff --git a/storage/mozStorageStatementJSHelper.cpp b/storage/mozStorageStatementJSHelper.cpp index 59db1f4454f..ed4ef9d24ef 100644 --- a/storage/mozStorageStatementJSHelper.cpp +++ b/storage/mozStorageStatementJSHelper.cpp @@ -118,7 +118,7 @@ StatementJSHelper::getRow(Statement *aStatement, obj = aStatement->mStatementRowHolder->GetJSObject(); NS_ENSURE_STATE(obj); - *_row = OBJECT_TO_JSVAL(obj); + _row->setObject(*obj); return NS_OK; } @@ -164,7 +164,7 @@ StatementJSHelper::getParams(Statement *aStatement, obj = aStatement->mStatementParamsHolder->GetJSObject(); NS_ENSURE_STATE(obj); - *_params = OBJECT_TO_JSVAL(obj); + _params->setObject(*obj); return NS_OK; } diff --git a/storage/mozStorageStatementRow.cpp b/storage/mozStorageStatementRow.cpp index f582a31a573..2436b6e32ae 100644 --- a/storage/mozStorageStatementRow.cpp +++ b/storage/mozStorageStatementRow.cpp @@ -91,7 +91,7 @@ StatementRow::GetProperty(nsIXPConnectWrappedNative *aWrapper, *_retval = false; return NS_OK; } - *_vp = OBJECT_TO_JSVAL(obj); + _vp->setObject(*obj); // Copy the blob over to the JS array. for (uint32_t i = 0; i < length; i++) { diff --git a/xpfe/components/directory/nsDirectoryViewer.cpp b/xpfe/components/directory/nsDirectoryViewer.cpp index f20c5d02458..9f4e82aab47 100644 --- a/xpfe/components/directory/nsDirectoryViewer.cpp +++ b/xpfe/components/directory/nsDirectoryViewer.cpp @@ -254,7 +254,7 @@ nsHTTPIndex::OnStartRequest(nsIRequest *request, nsISupports* aContext) "unable to get jsobj from xpconnect wrapper"); if (!jsobj) return NS_ERROR_UNEXPECTED; - JS::Rooted jslistener(cx, OBJECT_TO_JSVAL(jsobj)); + JS::Rooted jslistener(cx, JS::ObjectValue(*jsobj)); // ...and stuff it into the global context bool ok = JS_SetProperty(cx, global, "HTTPIndex", jslistener); From 1578d8d59fe97b582f263d66f84329b3471b1d9c Mon Sep 17 00:00:00 2001 From: Jan de Mooij Date: Tue, 30 Jun 2015 21:10:04 -0700 Subject: [PATCH 31/61] Bug 1177892 part 4 - Remove INT_TO_JSVAL. r=evilpie --- dom/base/nsJSEnvironment.cpp | 10 +-- dom/bindings/Codegen.py | 2 +- dom/bluetooth/BluetoothUtils.cpp | 2 +- dom/system/OSFileConstants.cpp | 82 +++++++++++----------- js/public/Value.h | 14 +--- js/src/ctypes/CTypes.cpp | 24 +++---- js/src/jsapi-tests/testArgumentsObject.cpp | 4 +- js/src/jsapi-tests/testArrayBuffer.cpp | 16 ++--- js/src/jsapi-tests/testClassGetter.cpp | 4 +- js/src/jsapi-tests/testDebugger.cpp | 2 +- js/src/jsapi-tests/testLooselyEqual.cpp | 2 +- js/src/jsapi-tests/testNewObject.cpp | 2 +- js/src/jsapi-tests/testOps.cpp | 2 +- js/src/jsapi-tests/testParseJSON.cpp | 8 +-- js/src/jsapi-tests/testSetProperty.cpp | 2 +- js/src/jsapi-tests/testTypedArrays.cpp | 10 +-- js/src/jsapi.h | 2 +- js/xpconnect/src/XPCLocale.cpp | 2 +- layout/xul/tree/TreeBoxObject.cpp | 8 +-- toolkit/components/telemetry/Telemetry.cpp | 2 +- 20 files changed, 96 insertions(+), 104 deletions(-) diff --git a/dom/base/nsJSEnvironment.cpp b/dom/base/nsJSEnvironment.cpp index 459d3354cc1..877ab1da97b 100644 --- a/dom/base/nsJSEnvironment.cpp +++ b/dom/base/nsJSEnvironment.cpp @@ -972,7 +972,7 @@ nsJSContext::AddSupportsPrimitiveTojsvals(nsISupports *aArg, JS::Value *aArgv) p->GetData(&data); - *aArgv = INT_TO_JSVAL(data); + aArgv->setInt32(data); break; } @@ -984,7 +984,7 @@ nsJSContext::AddSupportsPrimitiveTojsvals(nsISupports *aArg, JS::Value *aArgv) p->GetData(&data); - *aArgv = INT_TO_JSVAL(data); + aArgv->setInt32(data); break; } @@ -996,7 +996,7 @@ nsJSContext::AddSupportsPrimitiveTojsvals(nsISupports *aArg, JS::Value *aArgv) p->GetData(&data); - *aArgv = INT_TO_JSVAL(data); + aArgv->setInt32(data); break; } @@ -1023,7 +1023,7 @@ nsJSContext::AddSupportsPrimitiveTojsvals(nsISupports *aArg, JS::Value *aArgv) p->GetData(&data); - *aArgv = INT_TO_JSVAL(data); + aArgv->setInt32(data); break; } @@ -1035,7 +1035,7 @@ nsJSContext::AddSupportsPrimitiveTojsvals(nsISupports *aArg, JS::Value *aArgv) p->GetData(&data); - *aArgv = INT_TO_JSVAL(data); + aArgv->setInt32(data); break; } diff --git a/dom/bindings/Codegen.py b/dom/bindings/Codegen.py index 4253a759c47..1af40129f59 100644 --- a/dom/bindings/Codegen.py +++ b/dom/bindings/Codegen.py @@ -5570,7 +5570,7 @@ def convertConstIDLValueToJSVal(value): tag = value.type.tag() if tag in [IDLType.Tags.int8, IDLType.Tags.uint8, IDLType.Tags.int16, IDLType.Tags.uint16, IDLType.Tags.int32]: - return "INT_TO_JSVAL(%s)" % (value.value) + return "JS::Int32Value(%s)" % (value.value) if tag == IDLType.Tags.uint32: return "UINT_TO_JSVAL(%sU)" % (value.value) if tag in [IDLType.Tags.int64, IDLType.Tags.uint64]: diff --git a/dom/bluetooth/BluetoothUtils.cpp b/dom/bluetooth/BluetoothUtils.cpp index 92bfc31fbf5..cf8c4dafafe 100644 --- a/dom/bluetooth/BluetoothUtils.cpp +++ b/dom/bluetooth/BluetoothUtils.cpp @@ -159,7 +159,7 @@ SetJsObject(JSContext* aContext, break; } case BluetoothValue::Tuint32_t: - val = INT_TO_JSVAL(v.get_uint32_t()); + val.setInt32(v.get_uint32_t()); break; case BluetoothValue::Tbool: val.setBoolean(v.get_bool()); diff --git a/dom/system/OSFileConstants.cpp b/dom/system/OSFileConstants.cpp index 8594cfb61ba..ca98d218229 100644 --- a/dom/system/OSFileConstants.cpp +++ b/dom/system/OSFileConstants.cpp @@ -357,7 +357,7 @@ void CleanupOSFileConstants() * Produces a |ConstantSpec|. */ #define INT_CONSTANT(name) \ - { #name, INT_TO_JSVAL(name) } + { #name, JS::Int32Value(name) } /** * Define a simple read-only property holding an unsigned integer. @@ -576,94 +576,94 @@ static const dom::ConstantSpec gLibcProperties[] = #if defined(XP_UNIX) // The size of |mode_t|. - { "OSFILE_SIZEOF_MODE_T", INT_TO_JSVAL(sizeof (mode_t)) }, + { "OSFILE_SIZEOF_MODE_T", JS::Int32Value(sizeof (mode_t)) }, // The size of |gid_t|. - { "OSFILE_SIZEOF_GID_T", INT_TO_JSVAL(sizeof (gid_t)) }, + { "OSFILE_SIZEOF_GID_T", JS::Int32Value(sizeof (gid_t)) }, // The size of |uid_t|. - { "OSFILE_SIZEOF_UID_T", INT_TO_JSVAL(sizeof (uid_t)) }, + { "OSFILE_SIZEOF_UID_T", JS::Int32Value(sizeof (uid_t)) }, // The size of |time_t|. - { "OSFILE_SIZEOF_TIME_T", INT_TO_JSVAL(sizeof (time_t)) }, + { "OSFILE_SIZEOF_TIME_T", JS::Int32Value(sizeof (time_t)) }, // The size of |fsblkcnt_t|. - { "OSFILE_SIZEOF_FSBLKCNT_T", INT_TO_JSVAL(sizeof (fsblkcnt_t)) }, + { "OSFILE_SIZEOF_FSBLKCNT_T", JS::Int32Value(sizeof (fsblkcnt_t)) }, #if !defined(ANDROID) // The size of |posix_spawn_file_actions_t|. - { "OSFILE_SIZEOF_POSIX_SPAWN_FILE_ACTIONS_T", INT_TO_JSVAL(sizeof (posix_spawn_file_actions_t)) }, + { "OSFILE_SIZEOF_POSIX_SPAWN_FILE_ACTIONS_T", JS::Int32Value(sizeof (posix_spawn_file_actions_t)) }, #endif // !defined(ANDROID) // Defining |dirent|. // Size - { "OSFILE_SIZEOF_DIRENT", INT_TO_JSVAL(sizeof (dirent)) }, + { "OSFILE_SIZEOF_DIRENT", JS::Int32Value(sizeof (dirent)) }, // Defining |flock|. #if defined(XP_UNIX) - { "OSFILE_SIZEOF_FLOCK", INT_TO_JSVAL(sizeof (struct flock)) }, - { "OSFILE_OFFSETOF_FLOCK_L_START", INT_TO_JSVAL(offsetof (struct flock, l_start)) }, - { "OSFILE_OFFSETOF_FLOCK_L_LEN", INT_TO_JSVAL(offsetof (struct flock, l_len)) }, - { "OSFILE_OFFSETOF_FLOCK_L_PID", INT_TO_JSVAL(offsetof (struct flock, l_pid)) }, - { "OSFILE_OFFSETOF_FLOCK_L_TYPE", INT_TO_JSVAL(offsetof (struct flock, l_type)) }, - { "OSFILE_OFFSETOF_FLOCK_L_WHENCE", INT_TO_JSVAL(offsetof (struct flock, l_whence)) }, + { "OSFILE_SIZEOF_FLOCK", JS::Int32Value(sizeof (struct flock)) }, + { "OSFILE_OFFSETOF_FLOCK_L_START", JS::Int32Value(offsetof (struct flock, l_start)) }, + { "OSFILE_OFFSETOF_FLOCK_L_LEN", JS::Int32Value(offsetof (struct flock, l_len)) }, + { "OSFILE_OFFSETOF_FLOCK_L_PID", JS::Int32Value(offsetof (struct flock, l_pid)) }, + { "OSFILE_OFFSETOF_FLOCK_L_TYPE", JS::Int32Value(offsetof (struct flock, l_type)) }, + { "OSFILE_OFFSETOF_FLOCK_L_WHENCE", JS::Int32Value(offsetof (struct flock, l_whence)) }, #endif // defined(XP_UNIX) // Offset of field |d_name|. - { "OSFILE_OFFSETOF_DIRENT_D_NAME", INT_TO_JSVAL(offsetof (struct dirent, d_name)) }, + { "OSFILE_OFFSETOF_DIRENT_D_NAME", JS::Int32Value(offsetof (struct dirent, d_name)) }, // An upper bound to the length of field |d_name| of struct |dirent|. // (may not be exact, depending on padding). - { "OSFILE_SIZEOF_DIRENT_D_NAME", INT_TO_JSVAL(sizeof (struct dirent) - offsetof (struct dirent, d_name)) }, + { "OSFILE_SIZEOF_DIRENT_D_NAME", JS::Int32Value(sizeof (struct dirent) - offsetof (struct dirent, d_name)) }, // Defining |timeval|. - { "OSFILE_SIZEOF_TIMEVAL", INT_TO_JSVAL(sizeof (struct timeval)) }, - { "OSFILE_OFFSETOF_TIMEVAL_TV_SEC", INT_TO_JSVAL(offsetof (struct timeval, tv_sec)) }, - { "OSFILE_OFFSETOF_TIMEVAL_TV_USEC", INT_TO_JSVAL(offsetof (struct timeval, tv_usec)) }, + { "OSFILE_SIZEOF_TIMEVAL", JS::Int32Value(sizeof (struct timeval)) }, + { "OSFILE_OFFSETOF_TIMEVAL_TV_SEC", JS::Int32Value(offsetof (struct timeval, tv_sec)) }, + { "OSFILE_OFFSETOF_TIMEVAL_TV_USEC", JS::Int32Value(offsetof (struct timeval, tv_usec)) }, #if defined(DT_UNKNOWN) // Position of field |d_type| in |dirent| // Not strictly posix, but seems defined on all platforms // except mingw32. - { "OSFILE_OFFSETOF_DIRENT_D_TYPE", INT_TO_JSVAL(offsetof (struct dirent, d_type)) }, + { "OSFILE_OFFSETOF_DIRENT_D_TYPE", JS::Int32Value(offsetof (struct dirent, d_type)) }, #endif // defined(DT_UNKNOWN) // Under MacOS X and BSDs, |dirfd| is a macro rather than a // function, so we need a little help to get it to work #if defined(dirfd) - { "OSFILE_SIZEOF_DIR", INT_TO_JSVAL(sizeof (DIR)) }, + { "OSFILE_SIZEOF_DIR", JS::Int32Value(sizeof (DIR)) }, - { "OSFILE_OFFSETOF_DIR_DD_FD", INT_TO_JSVAL(offsetof (DIR, __dd_fd)) }, + { "OSFILE_OFFSETOF_DIR_DD_FD", JS::Int32Value(offsetof (DIR, __dd_fd)) }, #endif // Defining |stat| - { "OSFILE_SIZEOF_STAT", INT_TO_JSVAL(sizeof (struct stat)) }, + { "OSFILE_SIZEOF_STAT", JS::Int32Value(sizeof (struct stat)) }, - { "OSFILE_OFFSETOF_STAT_ST_MODE", INT_TO_JSVAL(offsetof (struct stat, st_mode)) }, - { "OSFILE_OFFSETOF_STAT_ST_UID", INT_TO_JSVAL(offsetof (struct stat, st_uid)) }, - { "OSFILE_OFFSETOF_STAT_ST_GID", INT_TO_JSVAL(offsetof (struct stat, st_gid)) }, - { "OSFILE_OFFSETOF_STAT_ST_SIZE", INT_TO_JSVAL(offsetof (struct stat, st_size)) }, + { "OSFILE_OFFSETOF_STAT_ST_MODE", JS::Int32Value(offsetof (struct stat, st_mode)) }, + { "OSFILE_OFFSETOF_STAT_ST_UID", JS::Int32Value(offsetof (struct stat, st_uid)) }, + { "OSFILE_OFFSETOF_STAT_ST_GID", JS::Int32Value(offsetof (struct stat, st_gid)) }, + { "OSFILE_OFFSETOF_STAT_ST_SIZE", JS::Int32Value(offsetof (struct stat, st_size)) }, #if defined(HAVE_ST_ATIMESPEC) - { "OSFILE_OFFSETOF_STAT_ST_ATIME", INT_TO_JSVAL(offsetof (struct stat, st_atimespec)) }, - { "OSFILE_OFFSETOF_STAT_ST_MTIME", INT_TO_JSVAL(offsetof (struct stat, st_mtimespec)) }, - { "OSFILE_OFFSETOF_STAT_ST_CTIME", INT_TO_JSVAL(offsetof (struct stat, st_ctimespec)) }, + { "OSFILE_OFFSETOF_STAT_ST_ATIME", JS::Int32Value(offsetof (struct stat, st_atimespec)) }, + { "OSFILE_OFFSETOF_STAT_ST_MTIME", JS::Int32Value(offsetof (struct stat, st_mtimespec)) }, + { "OSFILE_OFFSETOF_STAT_ST_CTIME", JS::Int32Value(offsetof (struct stat, st_ctimespec)) }, #else - { "OSFILE_OFFSETOF_STAT_ST_ATIME", INT_TO_JSVAL(offsetof (struct stat, st_atime)) }, - { "OSFILE_OFFSETOF_STAT_ST_MTIME", INT_TO_JSVAL(offsetof (struct stat, st_mtime)) }, - { "OSFILE_OFFSETOF_STAT_ST_CTIME", INT_TO_JSVAL(offsetof (struct stat, st_ctime)) }, + { "OSFILE_OFFSETOF_STAT_ST_ATIME", JS::Int32Value(offsetof (struct stat, st_atime)) }, + { "OSFILE_OFFSETOF_STAT_ST_MTIME", JS::Int32Value(offsetof (struct stat, st_mtime)) }, + { "OSFILE_OFFSETOF_STAT_ST_CTIME", JS::Int32Value(offsetof (struct stat, st_ctime)) }, #endif // defined(HAVE_ST_ATIME) // Several OSes have a birthtime field. For the moment, supporting only Darwin. #if defined(_DARWIN_FEATURE_64_BIT_INODE) - { "OSFILE_OFFSETOF_STAT_ST_BIRTHTIME", INT_TO_JSVAL(offsetof (struct stat, st_birthtime)) }, + { "OSFILE_OFFSETOF_STAT_ST_BIRTHTIME", JS::Int32Value(offsetof (struct stat, st_birthtime)) }, #endif // defined(_DARWIN_FEATURE_64_BIT_INODE) // Defining |statvfs| - { "OSFILE_SIZEOF_STATVFS", INT_TO_JSVAL(sizeof (struct statvfs)) }, + { "OSFILE_SIZEOF_STATVFS", JS::Int32Value(sizeof (struct statvfs)) }, - { "OSFILE_OFFSETOF_STATVFS_F_BSIZE", INT_TO_JSVAL(offsetof (struct statvfs, f_bsize)) }, - { "OSFILE_OFFSETOF_STATVFS_F_BAVAIL", INT_TO_JSVAL(offsetof (struct statvfs, f_bavail)) }, + { "OSFILE_OFFSETOF_STATVFS_F_BSIZE", JS::Int32Value(offsetof (struct statvfs, f_bsize)) }, + { "OSFILE_OFFSETOF_STATVFS_F_BAVAIL", JS::Int32Value(offsetof (struct statvfs, f_bavail)) }, #endif // defined(XP_UNIX) @@ -678,7 +678,7 @@ static const dom::ConstantSpec gLibcProperties[] = // whenever macro _DARWIN_FEATURE_64_BIT_INODE is set. We export // this value to be able to do so from JavaScript. #if defined(_DARWIN_FEATURE_64_BIT_INODE) - { "_DARWIN_FEATURE_64_BIT_INODE", INT_TO_JSVAL(1) }, + { "_DARWIN_FEATURE_64_BIT_INODE", JS::Int32Value(1) }, #endif // defined(_DARWIN_FEATURE_64_BIT_INODE) // Similar feature for Linux @@ -738,7 +738,7 @@ static const dom::ConstantSpec gWinProperties[] = INT_CONSTANT(FILE_FLAG_BACKUP_SEMANTICS), // CreateFile error constant - { "INVALID_HANDLE_VALUE", INT_TO_JSVAL(INT_PTR(INVALID_HANDLE_VALUE)) }, + { "INVALID_HANDLE_VALUE", JS::Int32Value(INT_PTR(INVALID_HANDLE_VALUE)) }, // CreateFile flags @@ -922,9 +922,9 @@ bool DefineOSFileConstants(JSContext *cx, JS::Handle global) #endif #if defined(HAVE_64BIT_BUILD) - JS::Rooted valBits(cx, INT_TO_JSVAL(64)); + JS::Rooted valBits(cx, JS::Int32Value(64)); #else - JS::Rooted valBits(cx, INT_TO_JSVAL(32)); + JS::Rooted valBits(cx, JS::Int32Value(32)); #endif //defined (HAVE_64BIT_BUILD) if (!JS_SetProperty(cx, objSys, "bits", valBits)) { return false; diff --git a/js/public/Value.h b/js/public/Value.h index 4c769e04910..ddade8a3844 100644 --- a/js/public/Value.h +++ b/js/public/Value.h @@ -1392,12 +1392,10 @@ UndefinedValue() #endif } -static inline Value +static inline JS_VALUE_CONSTEXPR Value Int32Value(int32_t i32) { - Value v; - v.setInt32(i32); - return v; + return IMPL_TO_JSVAL(INT32_TO_JSVAL_IMPL(i32)); } static inline Value @@ -1934,12 +1932,6 @@ static_assert(sizeof(jsval_layout) == sizeof(JS::Value), /************************************************************************/ -static inline JS_VALUE_CONSTEXPR jsval -INT_TO_JSVAL(int32_t i) -{ - return IMPL_TO_JSVAL(INT32_TO_JSVAL_IMPL(i)); -} - static inline JS_VALUE_CONSTEXPR jsval DOUBLE_TO_JSVAL(double d) { @@ -1967,7 +1959,7 @@ static inline JS_VALUE_CONSTEXPR jsval UINT_TO_JSVAL(uint32_t i) { return i <= JSVAL_INT_MAX - ? INT_TO_JSVAL((int32_t)i) + ? JS::Int32Value(int32_t(i)) : DOUBLE_TO_JSVAL((double)i); } diff --git a/js/src/ctypes/CTypes.cpp b/js/src/ctypes/CTypes.cpp index 537959d0deb..7ea0634827b 100644 --- a/js/src/ctypes/CTypes.cpp +++ b/js/src/ctypes/CTypes.cpp @@ -1590,7 +1590,7 @@ DefineABIConstant(JSContext* cx, RootedObject obj(cx, JS_NewObjectWithGivenProto(cx, &sCABIClass, prototype)); if (!obj) return false; - JS_SetReservedSlot(obj, SLOT_ABICODE, INT_TO_JSVAL(code)); + JS_SetReservedSlot(obj, SLOT_ABICODE, Int32Value(code)); if (!JS_FreezeObject(cx, obj)) return false; @@ -1866,8 +1866,8 @@ InitTypeClasses(JSContext* cx, HandleObject ctypesObj) #define DEFINE_TYPE(name, type, ffiType) \ RootedObject typeObj_##name(cx, \ CType::DefineBuiltin(cx, ctypesObj, #name, CTypeProto, CDataProto, #name, \ - TYPE_##name, INT_TO_JSVAL(sizeof(type)), \ - INT_TO_JSVAL(ffiType.alignment), &ffiType)); \ + TYPE_##name, Int32Value(sizeof(type)), \ + Int32Value(ffiType.alignment), &ffiType)); \ if (!typeObj_##name) \ return false; CTYPES_FOR_EACH_TYPE(DEFINE_TYPE) @@ -3926,7 +3926,7 @@ CType::Create(JSContext* cx, return nullptr; // Set up the reserved slots. - JS_SetReservedSlot(typeObj, SLOT_TYPECODE, INT_TO_JSVAL(type)); + JS_SetReservedSlot(typeObj, SLOT_TYPECODE, Int32Value(type)); if (ffiType) JS_SetReservedSlot(typeObj, SLOT_FFITYPE, PrivateValue(ffiType)); if (name) @@ -4622,8 +4622,8 @@ PointerType::CreateInternal(JSContext* cx, HandleObject baseType) // Create a new CType object with the common properties and slots. JSObject* typeObj = CType::Create(cx, typeProto, dataProto, TYPE_pointer, - nullptr, INT_TO_JSVAL(sizeof(void*)), - INT_TO_JSVAL(ffi_type_pointer.alignment), + nullptr, Int32Value(sizeof(void*)), + Int32Value(ffi_type_pointer.alignment), &ffi_type_pointer); if (!typeObj) return nullptr; @@ -4949,7 +4949,7 @@ ArrayType::CreateInternal(JSContext* cx, // Create a new CType object with the common properties and slots. JSObject* typeObj = CType::Create(cx, typeProto, dataProto, TYPE_array, nullptr, - sizeVal, INT_TO_JSVAL(align), nullptr); + sizeVal, Int32Value(align), nullptr); if (!typeObj) return nullptr; @@ -5638,7 +5638,7 @@ StructType::DefineInternal(JSContext* cx, JSObject* typeObj_, JSObject* fieldsOb JS_SetReservedSlot(typeObj, SLOT_FIELDINFO, PrivateValue(fields.release())); JS_SetReservedSlot(typeObj, SLOT_SIZE, sizeVal); - JS_SetReservedSlot(typeObj, SLOT_ALIGN, INT_TO_JSVAL(structAlign)); + JS_SetReservedSlot(typeObj, SLOT_ALIGN, Int32Value(structAlign)); //if (!JS_FreezeObject(cx, prototype)0 // XXX fixme - see bug 541212! // return false; JS_SetReservedSlot(typeObj, SLOT_PROTO, ObjectValue(*prototype)); @@ -6654,9 +6654,9 @@ FunctionType::Call(JSContext* cx, if (!objCTypes) return false; - JS_SetReservedSlot(objCTypes, SLOT_ERRNO, INT_TO_JSVAL(errnoStatus)); + JS_SetReservedSlot(objCTypes, SLOT_ERRNO, Int32Value(errnoStatus)); #if defined(XP_WIN) - JS_SetReservedSlot(objCTypes, SLOT_LASTERROR, INT_TO_JSVAL(lastErrorStatus)); + JS_SetReservedSlot(objCTypes, SLOT_LASTERROR, Int32Value(lastErrorStatus)); #endif // defined(XP_WIN) // Small integer types get returned as a word-sized ffi_arg. Coerce it back @@ -7965,9 +7965,9 @@ CDataFinalizer::Methods::Dispose(JSContext* cx, unsigned argc, jsval* vp) CDataFinalizer::CallFinalizer(p, &errnoStatus, nullptr); #endif // defined(XP_WIN) - JS_SetReservedSlot(objCTypes, SLOT_ERRNO, INT_TO_JSVAL(errnoStatus)); + JS_SetReservedSlot(objCTypes, SLOT_ERRNO, Int32Value(errnoStatus)); #if defined(XP_WIN) - JS_SetReservedSlot(objCTypes, SLOT_LASTERROR, INT_TO_JSVAL(lastErrorStatus)); + JS_SetReservedSlot(objCTypes, SLOT_LASTERROR, Int32Value(lastErrorStatus)); #endif // defined(XP_WIN) if (ConvertToJS(cx, resultType, nullptr, p->rvalue, false, true, &result)) { diff --git a/js/src/jsapi-tests/testArgumentsObject.cpp b/js/src/jsapi-tests/testArgumentsObject.cpp index 5880d21ceab..af7b8ce3ad5 100644 --- a/js/src/jsapi-tests/testArgumentsObject.cpp +++ b/js/src/jsapi-tests/testArgumentsObject.cpp @@ -92,10 +92,10 @@ ExhaustiveTest(const char funcode[]) ClearElements(elems); CHECK(argsobj->maybeGetElements(i, j, elems.begin())); for (size_t k = 0; k < j; k++) - CHECK_SAME(elems[k], INT_TO_JSVAL(i + k)); + CHECK(elems[k].isInt32(i + k)); for (size_t k = j; k < MAX_ELEMS - 1; k++) CHECK(elems[k].isNull()); - CHECK_SAME(elems[MAX_ELEMS - 1], INT_TO_JSVAL(42)); + CHECK(elems[MAX_ELEMS - 1].isInt32(42)); } } diff --git a/js/src/jsapi-tests/testArrayBuffer.cpp b/js/src/jsapi-tests/testArrayBuffer.cpp index f83a2b3698f..db84d0e7aa0 100644 --- a/js/src/jsapi-tests/testArrayBuffer.cpp +++ b/js/src/jsapi-tests/testArrayBuffer.cpp @@ -39,9 +39,9 @@ BEGIN_TEST(testArrayBuffer_bug720949_steal) CHECK(JS_IsArrayBufferObject(obj)); CHECK_EQUAL(JS_GetArrayBufferByteLength(obj), size); JS_GetProperty(cx, obj, "byteLength", &v); - CHECK_SAME(v, INT_TO_JSVAL(size)); + CHECK(v.isInt32(size)); JS_GetProperty(cx, view, "byteLength", &v); - CHECK_SAME(v, INT_TO_JSVAL(size)); + CHECK(v.isInt32(size)); // Modifying the underlying data should update the value returned through the view { @@ -51,7 +51,7 @@ BEGIN_TEST(testArrayBuffer_bug720949_steal) *reinterpret_cast(data) = MAGIC_VALUE_2; } CHECK(JS_GetElement(cx, view, 0, &v)); - CHECK_SAME(v, INT_TO_JSVAL(MAGIC_VALUE_2)); + CHECK(v.isInt32(MAGIC_VALUE_2)); // Steal the contents void* contents = JS_StealArrayBufferContents(cx, obj); @@ -60,13 +60,13 @@ BEGIN_TEST(testArrayBuffer_bug720949_steal) // Check that the original ArrayBuffer is neutered CHECK_EQUAL(JS_GetArrayBufferByteLength(obj), 0u); CHECK(JS_GetProperty(cx, obj, "byteLength", &v)); - CHECK_SAME(v, INT_TO_JSVAL(0)); + CHECK(v.isInt32(0)); CHECK(JS_GetProperty(cx, view, "byteLength", &v)); - CHECK_SAME(v, INT_TO_JSVAL(0)); + CHECK(v.isInt32(0)); CHECK(JS_GetProperty(cx, view, "byteOffset", &v)); - CHECK_SAME(v, INT_TO_JSVAL(0)); + CHECK(v.isInt32(0)); CHECK(JS_GetProperty(cx, view, "length", &v)); - CHECK_SAME(v, INT_TO_JSVAL(0)); + CHECK(v.isInt32(0)); CHECK_EQUAL(JS_GetArrayBufferByteLength(obj), 0u); v.setUndefined(); JS_GetElement(cx, obj, 0, &v); @@ -91,7 +91,7 @@ BEGIN_TEST(testArrayBuffer_bug720949_steal) CHECK_EQUAL(*reinterpret_cast(data), MAGIC_VALUE_2); } CHECK(JS_GetElement(cx, dstview, 0, &v)); - CHECK_SAME(v, INT_TO_JSVAL(MAGIC_VALUE_2)); + CHECK(v.isInt32(MAGIC_VALUE_2)); } return true; diff --git a/js/src/jsapi-tests/testClassGetter.cpp b/js/src/jsapi-tests/testClassGetter.cpp index 2fab23b8961..b94feebd9fc 100644 --- a/js/src/jsapi-tests/testClassGetter.cpp +++ b/js/src/jsapi-tests/testClassGetter.cpp @@ -62,8 +62,8 @@ BEGIN_TEST(testClassGetter_isCalled) JS::RootedValue rval(cx); CHECK(JS_CallFunctionName(cx, global, "check", JS::HandleValueArray::empty(), &rval)); - CHECK_SAME(INT_TO_JSVAL(called_test_fn), INT_TO_JSVAL(i)); - CHECK_SAME(INT_TO_JSVAL(called_test_prop_get), INT_TO_JSVAL(4 * i)); + CHECK(called_test_fn == i); + CHECK(called_test_prop_get == 4 * i); } return true; } diff --git a/js/src/jsapi-tests/testDebugger.cpp b/js/src/jsapi-tests/testDebugger.cpp index 44e70f7ec1c..aaecf9f4223 100644 --- a/js/src/jsapi-tests/testDebugger.cpp +++ b/js/src/jsapi-tests/testDebugger.cpp @@ -57,7 +57,7 @@ bool testIndirectEval(JS::HandleObject scope, const char* code) JS::RootedValue hitsv(cx); EVAL("hits", &hitsv); - CHECK_SAME(hitsv, INT_TO_JSVAL(1)); + CHECK(hitsv.isInt32(1)); return true; } END_TEST(testDebugger_newScriptHook) diff --git a/js/src/jsapi-tests/testLooselyEqual.cpp b/js/src/jsapi-tests/testLooselyEqual.cpp index 4e4ffa27c7b..888ec485cb6 100644 --- a/js/src/jsapi-tests/testLooselyEqual.cpp +++ b/js/src/jsapi-tests/testLooselyEqual.cpp @@ -54,7 +54,7 @@ struct LooseEqualityData qNaN = DOUBLE_TO_JSVAL(numeric_limits::quiet_NaN()); sNaN = DOUBLE_TO_JSVAL(numeric_limits::signaling_NaN()); d42 = DOUBLE_TO_JSVAL(42.0); - i42 = INT_TO_JSVAL(42); + i42 = JS::Int32Value(42); undef = JS::UndefinedValue(); null = JS::NullValue(); obj = JS::ObjectOrNullValue(JS::CurrentGlobalOrNull(cx)); diff --git a/js/src/jsapi-tests/testNewObject.cpp b/js/src/jsapi-tests/testNewObject.cpp index 52b97f86002..3178140f479 100644 --- a/js/src/jsapi-tests/testNewObject.cpp +++ b/js/src/jsapi-tests/testNewObject.cpp @@ -90,7 +90,7 @@ BEGIN_TEST(testNewObject_1) CHECK(JS_GetArrayLength(cx, obj, &len)); CHECK_EQUAL(len, N); CHECK(JS_GetElement(cx, obj, N - 1, &v)); - CHECK_SAME(v, INT_TO_JSVAL(N - 1)); + CHECK(v.isInt32(N - 1)); // With JSClass.construct. static const JSClass cls = { diff --git a/js/src/jsapi-tests/testOps.cpp b/js/src/jsapi-tests/testOps.cpp index a1fe6a18d26..1cd34cac6e5 100644 --- a/js/src/jsapi-tests/testOps.cpp +++ b/js/src/jsapi-tests/testOps.cpp @@ -57,7 +57,7 @@ BEGIN_TEST(testOps_bug559006) JS::RootedValue rval(cx); CHECK(JS_CallFunctionName(cx, global, "main", JS::HandleValueArray::empty(), &rval)); - CHECK_SAME(rval, INT_TO_JSVAL(123)); + CHECK(rval.isInt32(123)); } return true; } diff --git a/js/src/jsapi-tests/testParseJSON.cpp b/js/src/jsapi-tests/testParseJSON.cpp index d70e065ada9..f54a891409b 100644 --- a/js/src/jsapi-tests/testParseJSON.cpp +++ b/js/src/jsapi-tests/testParseJSON.cpp @@ -48,13 +48,13 @@ BEGIN_TEST(testParseJSON_success) expected = JS::NullValue(); CHECK(TryParse(cx, "null", expected)); - expected = INT_TO_JSVAL(0); + expected.setInt32(0); CHECK(TryParse(cx, "0", expected)); - expected = INT_TO_JSVAL(1); + expected.setInt32(1); CHECK(TryParse(cx, "1", expected)); - expected = INT_TO_JSVAL(-1); + expected.setInt32(-1); CHECK(TryParse(cx, "-1", expected)); expected = DOUBLE_TO_JSVAL(1); @@ -130,7 +130,7 @@ BEGIN_TEST(testParseJSON_success) obj = &v.toObject(); CHECK(!JS_IsArrayObject(cx, obj)); CHECK(JS_GetProperty(cx, obj, "f", &v2)); - CHECK_SAME(v2, INT_TO_JSVAL(17)); + CHECK(v2.isInt32(17)); return true; } diff --git a/js/src/jsapi-tests/testSetProperty.cpp b/js/src/jsapi-tests/testSetProperty.cpp index 69556d49944..a3c65c6b8e0 100644 --- a/js/src/jsapi-tests/testSetProperty.cpp +++ b/js/src/jsapi-tests/testSetProperty.cpp @@ -58,7 +58,7 @@ BEGIN_TEST(testSetProperty_NativeGetterStubSetter) static bool NativeGet(JSContext* cx, JS::HandleObject obj, JS::HandleId id, JS::MutableHandleValue vp) { - vp.set(INT_TO_JSVAL(17)); + vp.setInt32(17); return true; } END_TEST(testSetProperty_NativeGetterStubSetter) diff --git a/js/src/jsapi-tests/testTypedArrays.cpp b/js/src/jsapi-tests/testTypedArrays.cpp index fc31e3721a0..031c8425346 100644 --- a/js/src/jsapi-tests/testTypedArrays.cpp +++ b/js/src/jsapi-tests/testTypedArrays.cpp @@ -83,7 +83,7 @@ TestPlainTypedArray(JSContext* cx) } RootedValue v(cx); CHECK(JS_GetElement(cx, array, 0, &v)); - CHECK_SAME(v, INT_TO_JSVAL(13)); + CHECK_SAME(v, Int32Value(13)); return true; } @@ -135,7 +135,7 @@ TestArrayFromBuffer(JSContext* cx) CHECK_EQUAL(JS_GetTypedArrayByteLength(ofsArray), nbytes / 2); // Make sure all 3 views reflect the same buffer at the expected locations - JS::RootedValue v(cx, INT_TO_JSVAL(39)); + JS::RootedValue v(cx, JS::Int32Value(39)); JS_SetElement(cx, array, 0, v); JS::RootedValue v2(cx); CHECK(JS_GetElement(cx, array, 0, &v2)); @@ -149,7 +149,7 @@ TestArrayFromBuffer(JSContext* cx) CHECK_EQUAL(long(v.toInt32()), long(reinterpret_cast(data)[0])); } - v = INT_TO_JSVAL(40); + v.setInt32(40); JS_SetElement(cx, array, elts / 2, v); CHECK(JS_GetElement(cx, array, elts / 2, &v2)); CHECK_SAME(v, v2); @@ -162,7 +162,7 @@ TestArrayFromBuffer(JSContext* cx) CHECK_EQUAL(long(v.toInt32()), long(reinterpret_cast(data)[elts / 2])); } - v = INT_TO_JSVAL(41); + v.setInt32(41); JS_SetElement(cx, array, elts - 1, v); CHECK(JS_GetElement(cx, array, elts - 1, &v2)); CHECK_SAME(v, v2); @@ -181,7 +181,7 @@ TestArrayFromBuffer(JSContext* cx) CHECK_SAME(v, v2); /* The copy should not see changes in the original */ - v2 = INT_TO_JSVAL(42); + v2.setInt32(42); JS_SetElement(cx, array, 0, v2); CHECK(JS_GetElement(cx, copy, 0, &v2)); CHECK_SAME(v2, v); /* v is still the original value from 'array' */ diff --git a/js/src/jsapi.h b/js/src/jsapi.h index a2492935c2d..11fecd5b370 100644 --- a/js/src/jsapi.h +++ b/js/src/jsapi.h @@ -740,7 +740,7 @@ JS_NumberValue(double d) int32_t i; d = JS::CanonicalizeNaN(d); if (mozilla::NumberIsInt32(d, &i)) - return INT_TO_JSVAL(i); + return JS::Int32Value(i); return DOUBLE_TO_JSVAL(d); } diff --git a/js/xpconnect/src/XPCLocale.cpp b/js/xpconnect/src/XPCLocale.cpp index 37b504f20b1..220d76fbd5d 100644 --- a/js/xpconnect/src/XPCLocale.cpp +++ b/js/xpconnect/src/XPCLocale.cpp @@ -162,7 +162,7 @@ private: return false; } - rval.set(INT_TO_JSVAL(result)); + rval.setInt32(result); return true; } diff --git a/layout/xul/tree/TreeBoxObject.cpp b/layout/xul/tree/TreeBoxObject.cpp index e7f1b5b722c..61f3db24f1a 100644 --- a/layout/xul/tree/TreeBoxObject.cpp +++ b/layout/xul/tree/TreeBoxObject.cpp @@ -592,22 +592,22 @@ TreeBoxObject::GetCoordsForCellItem(JSContext* cx, { int32_t x, y, w, h; GetCoordsForCellItem(row, &col, element, &x, &y, &w, &h); - JS::Rooted v(cx, INT_TO_JSVAL(x)); + JS::Rooted v(cx, JS::Int32Value(x)); if (!JS_SetProperty(cx, xOut, "value", v)) { aRv.Throw(NS_ERROR_XPC_CANT_SET_OUT_VAL); return; } - v = INT_TO_JSVAL(y); + v.setInt32(y); if (!JS_SetProperty(cx, yOut, "value", v)) { aRv.Throw(NS_ERROR_XPC_CANT_SET_OUT_VAL); return; } - v = INT_TO_JSVAL(w); + v.setInt32(w); if (!JS_SetProperty(cx, widthOut, "value", v)) { aRv.Throw(NS_ERROR_XPC_CANT_SET_OUT_VAL); return; } - v = INT_TO_JSVAL(h); + v.setInt32(h); if (!JS_SetProperty(cx, heightOut, "value", v)) { aRv.Throw(NS_ERROR_XPC_CANT_SET_OUT_VAL); return; diff --git a/toolkit/components/telemetry/Telemetry.cpp b/toolkit/components/telemetry/Telemetry.cpp index 259b8a80d2d..e4a88f85860 100644 --- a/toolkit/components/telemetry/Telemetry.cpp +++ b/toolkit/components/telemetry/Telemetry.cpp @@ -1139,7 +1139,7 @@ FillRanges(JSContext *cx, JS::Handle array, Histogram *h) { JS::Rooted range(cx); for (size_t i = 0; i < h->bucket_count(); i++) { - range = INT_TO_JSVAL(h->ranges(i)); + range.setInt32(h->ranges(i)); if (!JS_DefineElement(cx, array, i, range, JSPROP_ENUMERATE)) return false; } From 368a7512741314decc4f11cec2f64210877a399b Mon Sep 17 00:00:00 2001 From: Boris Zbarsky Date: Wed, 1 Jul 2015 00:15:30 -0400 Subject: [PATCH 32/61] Bug 1178803. Fix the handling of the 'length' key in IDB keypaths when operating on a string. r=bent --- dom/indexedDB/KeyPath.cpp | 20 ++++++++++++------- .../test/unit/test_indexes_funny_things.js | 2 +- .../meta/IndexedDB/keypath.htm.ini | 8 -------- 3 files changed, 14 insertions(+), 16 deletions(-) delete mode 100644 testing/web-platform/meta/IndexedDB/keypath.htm.ini diff --git a/dom/indexedDB/KeyPath.cpp b/dom/indexedDB/KeyPath.cpp index 690540e8c6e..27eac5e4c82 100644 --- a/dom/indexedDB/KeyPath.cpp +++ b/dom/indexedDB/KeyPath.cpp @@ -89,8 +89,8 @@ GetJSValFromKeyPathString(JSContext* aCx, nsString targetObjectPropName; JS::Rooted targetObject(aCx, nullptr); - JS::Rooted obj(aCx, - aValue.isPrimitive() ? nullptr : aValue.toObjectOrNull()); + JS::Rooted currentVal(aCx, aValue); + JS::Rooted obj(aCx); while (tokenizer.hasMoreTokens()) { const nsDependentSubstring& token = tokenizer.nextToken(); @@ -103,9 +103,18 @@ GetJSValFromKeyPathString(JSContext* aCx, bool hasProp; if (!targetObject) { // We're still walking the chain of existing objects - if (!obj) { + // http://w3c.github.io/IndexedDB/#dfn-evaluate-a-key-path-on-a-value + // step 4 substep 1: check for .length on a String value. + if (currentVal.isString() && !tokenizer.hasMoreTokens() && + token.EqualsLiteral("length") && aOptions == DoNotCreateProperties) { + aKeyJSVal->setNumber(double(JS_GetStringLength(currentVal.toString()))); + break; + } + + if (!currentVal.isObject()) { return NS_ERROR_DOM_INDEXEDDB_DATA_ERR; } + obj = ¤tVal.toObject(); bool ok = JS_HasUCProperty(aCx, obj, keyPathChars, keyPathLen, &hasProp); @@ -123,10 +132,7 @@ GetJSValFromKeyPathString(JSContext* aCx, } if (tokenizer.hasMoreTokens()) { // ...and walk to it if there are more steps... - if (intermediate.isPrimitive()) { - return NS_ERROR_DOM_INDEXEDDB_DATA_ERR; - } - obj = intermediate.toObjectOrNull(); + currentVal = intermediate; } else { // ...otherwise use it as key diff --git a/dom/indexedDB/test/unit/test_indexes_funny_things.js b/dom/indexedDB/test/unit/test_indexes_funny_things.js index 33386e079f9..af0384c7719 100644 --- a/dom/indexedDB/test/unit/test_indexes_funny_things.js +++ b/dom/indexedDB/test/unit/test_indexes_funny_things.js @@ -45,7 +45,7 @@ function testSteps() const objectStoreDataLengthSort = [ { key: "5", value: arr}, - //{ key: "4", value: str}, + { key: "4", value: str}, ]; let request = indexedDB.open(name, 1); diff --git a/testing/web-platform/meta/IndexedDB/keypath.htm.ini b/testing/web-platform/meta/IndexedDB/keypath.htm.ini deleted file mode 100644 index 511ccc70a16..00000000000 --- a/testing/web-platform/meta/IndexedDB/keypath.htm.ini +++ /dev/null @@ -1,8 +0,0 @@ -[keypath.htm] - type: testharness - [Keypath - str.length] - expected: FAIL - - [Keypath - length] - expected: FAIL - From c70e842138710384d28991a8bf034057dc0fca91 Mon Sep 17 00:00:00 2001 From: Boris Zbarsky Date: Wed, 1 Jul 2015 00:15:30 -0400 Subject: [PATCH 33/61] Bug 1178806. IndexedDB autoincrement overflow should be throwing ConstraintError per spec. r=bent --- dom/indexedDB/ActorsParent.cpp | 3 +-- .../meta/IndexedDB/keygenerator-overflow.htm.ini | 5 ----- 2 files changed, 1 insertion(+), 7 deletions(-) delete mode 100644 testing/web-platform/meta/IndexedDB/keygenerator-overflow.htm.ini diff --git a/dom/indexedDB/ActorsParent.cpp b/dom/indexedDB/ActorsParent.cpp index b75d7b75ea6..bb4e7e22634 100644 --- a/dom/indexedDB/ActorsParent.cpp +++ b/dom/indexedDB/ActorsParent.cpp @@ -23070,8 +23070,7 @@ ObjectStoreAddOrPutRequestOp::DoDatabaseWork(DatabaseConnection* aConnection) MOZ_ASSERT(autoIncrementNum > 0); if (autoIncrementNum > (1LL << 53)) { - IDB_REPORT_INTERNAL_ERR(); - return NS_ERROR_DOM_INDEXEDDB_UNKNOWN_ERR; + return NS_ERROR_DOM_INDEXEDDB_CONSTRAINT_ERR; } key.SetFromInteger(autoIncrementNum); diff --git a/testing/web-platform/meta/IndexedDB/keygenerator-overflow.htm.ini b/testing/web-platform/meta/IndexedDB/keygenerator-overflow.htm.ini deleted file mode 100644 index 9caef9570fb..00000000000 --- a/testing/web-platform/meta/IndexedDB/keygenerator-overflow.htm.ini +++ /dev/null @@ -1,5 +0,0 @@ -[keygenerator-overflow.htm] - type: testharness - [Keygenerator overflow] - expected: FAIL - From e0a63ff0a154c0f522bc72f06ee80d902977c3c5 Mon Sep 17 00:00:00 2001 From: Boris Zbarsky Date: Wed, 1 Jul 2015 00:15:30 -0400 Subject: [PATCH 34/61] Bug 1176800 part 1. When getting all types, include the types Promises are parametrized over. r=peterv --- dom/bindings/Configuration.py | 12 +++++++++--- dom/bindings/parser/WebIDL.py | 14 +++++++++++--- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/dom/bindings/Configuration.py b/dom/bindings/Configuration.py index 0df6d65bdbe..2a43369a819 100644 --- a/dom/bindings/Configuration.py +++ b/dom/bindings/Configuration.py @@ -133,9 +133,15 @@ class Configuration: self.unionsPerFilename = defaultdict(list) for (t, descriptor, _) in getAllTypes(self.descriptors, self.dictionaries, self.callbacks): - if t.isMozMap(): - t = t.inner - t = t.unroll() + while True: + if t.isMozMap(): + t = t.inner + elif t.isPromise(): + t = t.promiseInnerType() + elif t.unroll() != t: + t = t.unroll() + else: + break if t.isUnion(): filenamesForUnion = self.filenamesPerUnion[t.name] if t.filename() not in filenamesForUnion: diff --git a/dom/bindings/parser/WebIDL.py b/dom/bindings/parser/WebIDL.py index 0b288e8df6e..5ddb2b56086 100644 --- a/dom/bindings/parser/WebIDL.py +++ b/dom/bindings/parser/WebIDL.py @@ -1273,7 +1273,11 @@ class IDLInterface(IDLObjectWithScope, IDLExposureMixins): args = attr.args() if attr.hasArgs() else [] - retType = IDLWrapperType(self.location, self) + if self.identifier.name == "Promise": + promiseType = BuiltinTypes[IDLBuiltinType.Types.any] + else: + promiseType = None + retType = IDLWrapperType(self.location, self, promiseType) if identifier == "Constructor" or identifier == "ChromeConstructor": name = "constructor" @@ -2633,6 +2637,10 @@ class IDLWrapperType(IDLType): return isinstance(self.inner, IDLInterface) and \ self.inner.identifier.name == "Promise" + def promiseInnerType(self): + assert self.isPromise() + return self._promiseInnerType + def isSerializable(self): if self.isInterface(): if self.inner.isExternal(): @@ -2709,9 +2717,9 @@ class IDLWrapperType(IDLType): # Let's say true, though ideally we'd only do this when # exposureSet contains the primary global's name. return True - if (iface.identifier.name == "Promise" and + if (self.isPromise() and # Check the internal type - not self._promiseInnerType.unroll().isExposedInAllOf(exposureSet)): + not self.promiseInnerType().unroll().isExposedInAllOf(exposureSet)): return False return iface.exposureSet.issuperset(exposureSet) From 9c534d67df1fd7cc5842f7ccbbcd50cc38ca29fb Mon Sep 17 00:00:00 2001 From: Boris Zbarsky Date: Wed, 1 Jul 2015 00:15:30 -0400 Subject: [PATCH 35/61] Bug 1176800 part 2. Add a ToJSValue overload for owning unions. r=peterv --- dom/bindings/ToJSValue.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/dom/bindings/ToJSValue.h b/dom/bindings/ToJSValue.h index d72fee023d2..6767bf7fdd0 100644 --- a/dom/bindings/ToJSValue.h +++ b/dom/bindings/ToJSValue.h @@ -281,6 +281,18 @@ ToJSValue(JSContext* aCx, ErrorResult& aArgument, JS::MutableHandle aValue); +// Accept owning WebIDL unions. +template +MOZ_WARN_UNUSED_RESULT +typename EnableIf::value, bool>::Type +ToJSValue(JSContext* aCx, + const T& aArgument, + JS::MutableHandle aValue) +{ + JS::Rooted global(aCx, JS::CurrentGlobalOrNull(aCx)); + return aArgument.ToJSVal(aCx, global, aValue); +} + // Accept pointers to other things we accept template MOZ_WARN_UNUSED_RESULT From c7035c05563c8e2bc830c5c48fdbafca36011e84 Mon Sep 17 00:00:00 2001 From: "L. David Baron" Date: Tue, 30 Jun 2015 21:35:30 -0700 Subject: [PATCH 36/61] Bug 1176969 followup - Increase error amounts for intermittent fuzzy-if(cocoaWidget). --- layout/reftests/transform-3d/reftest.list | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/layout/reftests/transform-3d/reftest.list b/layout/reftests/transform-3d/reftest.list index ad1b4df3ad9..feead0fa9a4 100644 --- a/layout/reftests/transform-3d/reftest.list +++ b/layout/reftests/transform-3d/reftest.list @@ -63,5 +63,5 @@ fuzzy(3,99) fuzzy-if(/^Windows\x20NT\x206\.1/.test(http.oscpu)&&!layersGPUAccele fuzzy(3,99) == animate-cube-degrees.html animate-cube-degrees-ref.html # subpixel AA == animate-cube-degrees-zoom.html animate-cube-degrees-zoom-ref.html != animate-cube-degrees-ref.html animate-cube-degrees-zoom-ref.html -fuzzy-if(cocoaWidget,3,1) skip-if(Android) == animate-preserve3d-parent.html animate-preserve3d-ref.html # intermittently fuzzy on Mac -fuzzy-if(cocoaWidget,3,1) == animate-preserve3d-child.html animate-preserve3d-ref.html # intermittently fuzzy on Mac +fuzzy-if(cocoaWidget,128,3) skip-if(Android) == animate-preserve3d-parent.html animate-preserve3d-ref.html # intermittently fuzzy on Mac +fuzzy-if(cocoaWidget,128,3) == animate-preserve3d-child.html animate-preserve3d-ref.html # intermittently fuzzy on Mac From 173e0f1423d0db6697657dff9b6119e510a893aa Mon Sep 17 00:00:00 2001 From: David Anderson Date: Sat, 20 Jun 2015 19:47:57 -0400 Subject: [PATCH 37/61] Bug 1148582 - Factor mask layer creation out of ContainerState::SetupMaskLayer. r=mstange --- layout/base/FrameLayerBuilder.cpp | 44 ++++++++++++++++++++++--------- 1 file changed, 32 insertions(+), 12 deletions(-) diff --git a/layout/base/FrameLayerBuilder.cpp b/layout/base/FrameLayerBuilder.cpp index 9a08ee759d2..0f9b05b8101 100644 --- a/layout/base/FrameLayerBuilder.cpp +++ b/layout/base/FrameLayerBuilder.cpp @@ -1089,7 +1089,7 @@ protected: * mask layer which has been used for aLayer before), or create one if such * a layer doesn't exist. */ - already_AddRefed CreateOrRecycleMaskImageLayerFor(Layer* aLayer); + already_AddRefed CreateOrRecycleMaskImageLayerFor(Layer* aLayer, const Maybe& aForAncestorMaskLayer); /** * Grabs all PaintedLayers and ColorLayers from the ContainerLayer and makes them * available for recycling. @@ -1191,6 +1191,12 @@ protected: const nsIntRegion& aLayerVisibleRegion, uint32_t aRoundedRectClipCount = UINT32_MAX); + already_AddRefed CreateMaskLayer( + Layer *aLayer, const DisplayItemClip& aClip, + const nsIntRegion& aLayerVisibleRegion, + const Maybe& aForAncestorMaskLayer, + uint32_t aRoundedRectClipCount = UINT32_MAX); + bool ChooseAnimatedGeometryRoot(const nsDisplayList& aList, const nsIFrame **aAnimatedGeometryRoot); @@ -1927,10 +1933,10 @@ ContainerState::CreateOrRecycleImageLayer(PaintedLayer *aPainted) } already_AddRefed -ContainerState::CreateOrRecycleMaskImageLayerFor(Layer* aLayer) +ContainerState::CreateOrRecycleMaskImageLayerFor(Layer* aLayer, const Maybe& aForAncestorMaskLayer) { nsRefPtr result = mRecycledMaskImageLayers.Get(aLayer); - if (result) { + if (result && !aForAncestorMaskLayer) { mRecycledMaskImageLayers.Remove(aLayer); aLayer->ClearExtraDumpInfo(); // XXX if we use clip on mask layers, null it out here @@ -5637,8 +5643,27 @@ ContainerState::SetupMaskLayer(Layer *aLayer, return; } + nsRefPtr maskLayer = + CreateMaskLayer(aLayer, aClip, aLayerVisibleRegion, Nothing(), aRoundedRectClipCount); + + if (!maskLayer) { + SetClipCount(paintedData, 0); + return; + } + + aLayer->SetMaskLayer(maskLayer); + SetClipCount(paintedData, aRoundedRectClipCount); +} + +already_AddRefed +ContainerState::CreateMaskLayer(Layer *aLayer, + const DisplayItemClip& aClip, + const nsIntRegion& aLayerVisibleRegion, + const Maybe& aForAncestorMaskLayer, + uint32_t aRoundedRectClipCount) +{ // check if we can re-use the mask layer - nsRefPtr maskLayer = CreateOrRecycleMaskImageLayerFor(aLayer); + nsRefPtr maskLayer = CreateOrRecycleMaskImageLayerFor(aLayer, aForAncestorMaskLayer); MaskLayerUserData* userData = GetMaskLayerUserData(maskLayer); MaskLayerUserData newData; @@ -5649,9 +5674,7 @@ ContainerState::SetupMaskLayer(Layer *aLayer, newData.mAppUnitsPerDevPixel = mContainerFrame->PresContext()->AppUnitsPerDevPixel(); if (*userData == newData) { - aLayer->SetMaskLayer(maskLayer); - SetClipCount(paintedData, aRoundedRectClipCount); - return; + return maskLayer.forget(); } // calculate a more precise bounding rect @@ -5704,8 +5727,7 @@ ContainerState::SetupMaskLayer(Layer *aLayer, // fail if we can't get the right surface if (!dt) { NS_WARNING("Could not create DrawTarget for mask layer."); - SetClipCount(paintedData, 0); - return; + return nullptr; } nsRefPtr context = new gfxContext(dt); @@ -5750,9 +5772,7 @@ ContainerState::SetupMaskLayer(Layer *aLayer, userData->mRoundedClipRects.SwapElements(newData.mRoundedClipRects); userData->mImageKey = lookupKey; - aLayer->SetMaskLayer(maskLayer); - SetClipCount(paintedData, aRoundedRectClipCount); - return; + return maskLayer.forget(); } } // namespace mozilla From 48bab4c4b7c5e254b38df19fd1f50b1b78c1e6ad Mon Sep 17 00:00:00 2001 From: David Anderson Date: Sun, 21 Jun 2015 12:27:31 -0400 Subject: [PATCH 38/61] Bug 1148582 - Add mask layers to FrameMetrics for ancestor scroll frame clips. r=mattwoodrow --- gfx/ipc/GfxMessageUtils.h | 2 + gfx/layers/FrameMetrics.h | 13 +++++++ gfx/layers/LayerTreeInvalidation.cpp | 35 ++++++++++++++++- gfx/layers/Layers.cpp | 11 +++++- gfx/layers/Layers.h | 26 +++++++++++++ gfx/layers/ReadbackProcessor.cpp | 4 +- gfx/layers/apz/src/AsyncPanZoomController.cpp | 1 + gfx/layers/client/ClientCanvasLayer.cpp | 4 +- gfx/layers/client/ClientColorLayer.cpp | 4 +- gfx/layers/client/ClientContainerLayer.h | 4 +- gfx/layers/client/ClientImageLayer.cpp | 4 +- gfx/layers/client/ClientLayerManager.h | 10 +++++ gfx/layers/client/ClientPaintedLayer.cpp | 4 +- gfx/layers/client/ClientTiledPaintedLayer.cpp | 6 +-- .../composite/LayerManagerComposite.cpp | 2 +- gfx/layers/ipc/CompositorParent.cpp | 3 ++ gfx/layers/ipc/LayerTransactionParent.cpp | 7 ++++ gfx/layers/ipc/LayersMessages.ipdlh | 1 + gfx/layers/ipc/ShadowLayers.cpp | 4 ++ layout/base/FrameLayerBuilder.cpp | 38 ++++++++++++++++++- layout/generic/nsGfxScrollFrame.cpp | 23 ++++++----- layout/generic/nsGfxScrollFrame.h | 26 ++++++------- layout/generic/nsIScrollableFrame.h | 18 ++++++--- 23 files changed, 196 insertions(+), 54 deletions(-) diff --git a/gfx/ipc/GfxMessageUtils.h b/gfx/ipc/GfxMessageUtils.h index d83dac7abda..9a5a0dbaaf1 100644 --- a/gfx/ipc/GfxMessageUtils.h +++ b/gfx/ipc/GfxMessageUtils.h @@ -733,6 +733,7 @@ struct ParamTraits WriteParam(aMsg, aParam.GetPageScrollAmount()); WriteParam(aMsg, aParam.AllowVerticalScrollWithWheel()); WriteParam(aMsg, aParam.mClipRect); + WriteParam(aMsg, aParam.mMaskLayerIndex); WriteParam(aMsg, aParam.mIsLayersIdRoot); WriteParam(aMsg, aParam.mUsesContainerScrolling); WriteParam(aMsg, aParam.GetContentDescription()); @@ -778,6 +779,7 @@ struct ParamTraits ReadParam(aMsg, aIter, &aResult->mPageScrollAmount) && ReadParam(aMsg, aIter, &aResult->mAllowVerticalScrollWithWheel) && ReadParam(aMsg, aIter, &aResult->mClipRect) && + ReadParam(aMsg, aIter, &aResult->mMaskLayerIndex) && ReadParam(aMsg, aIter, &aResult->mIsLayersIdRoot) && ReadParam(aMsg, aIter, &aResult->mUsesContainerScrolling) && ReadContentDescription(aMsg, aIter, aResult)); diff --git a/gfx/layers/FrameMetrics.h b/gfx/layers/FrameMetrics.h index ef335a4f4d0..09ce3d3489f 100644 --- a/gfx/layers/FrameMetrics.h +++ b/gfx/layers/FrameMetrics.h @@ -104,6 +104,7 @@ public: mPageScrollAmount == aOther.mPageScrollAmount && mAllowVerticalScrollWithWheel == aOther.mAllowVerticalScrollWithWheel && mClipRect == aOther.mClipRect && + mMaskLayerIndex == aOther.mMaskLayerIndex && mIsLayersIdRoot == aOther.mIsLayersIdRoot && mUsesContainerScrolling == aOther.mUsesContainerScrolling; } @@ -525,6 +526,13 @@ public: return mClipRect.ref(); } + void SetMaskLayerIndex(const Maybe& aIndex) { + mMaskLayerIndex = aIndex; + } + const Maybe& GetMaskLayerIndex() const { + return mMaskLayerIndex; + } + void SetIsLayersIdRoot(bool aValue) { mIsLayersIdRoot = aValue; } @@ -714,6 +722,11 @@ private: // The clip rect to use when compositing a layer with this FrameMetrics. Maybe mClipRect; + // An extra clip mask layer to use when compositing a layer with this + // FrameMetrics. This is an index into the MetricsMaskLayers array on + // the Layer. + Maybe mMaskLayerIndex; + // Whether these framemetrics are for the root scroll frame (root element if // we don't have a root scroll frame) for its layers id. bool mIsLayersIdRoot; diff --git a/gfx/layers/LayerTreeInvalidation.cpp b/gfx/layers/LayerTreeInvalidation.cpp index 7154639943b..da69db054cf 100644 --- a/gfx/layers/LayerTreeInvalidation.cpp +++ b/gfx/layers/LayerTreeInvalidation.cpp @@ -83,6 +83,10 @@ NotifySubdocumentInvalidationRecursive(Layer* aLayer, NotifySubDocInvalidationFu if (aLayer->GetMaskLayer()) { NotifySubdocumentInvalidationRecursive(aLayer->GetMaskLayer(), aCallback); } + for (size_t i = 0; i < aLayer->GetAncestorMaskLayerCount(); i++) { + Layer* maskLayer = aLayer->GetAncestorMaskLayerAt(i); + NotifySubdocumentInvalidationRecursive(maskLayer, aCallback); + } if (!container) { return; @@ -111,6 +115,10 @@ struct LayerPropertiesBase : public LayerProperties if (aLayer->GetMaskLayer()) { mMaskLayer = CloneLayerTreePropertiesInternal(aLayer->GetMaskLayer(), true); } + for (size_t i = 0; i < aLayer->GetAncestorMaskLayerCount(); i++) { + Layer* maskLayer = aLayer->GetAncestorMaskLayerAt(i); + mAncestorMaskLayers.AppendElement(CloneLayerTreePropertiesInternal(maskLayer, true)); + } if (mUseClipRect) { mClipRect = *aLayer->GetClipRect(); } @@ -139,10 +147,22 @@ struct LayerPropertiesBase : public LayerProperties bool transformChanged = !mTransform.FuzzyEqualsMultiplicative(mLayer->GetLocalTransform()) || mLayer->GetPostXScale() != mPostXScale || mLayer->GetPostYScale() != mPostYScale; - Layer* otherMask = mLayer->GetMaskLayer(); const Maybe& otherClip = mLayer->GetClipRect(); nsIntRegion result; + + bool ancestorMaskChanged = mAncestorMaskLayers.Length() != mLayer->GetAncestorMaskLayerCount(); + if (!ancestorMaskChanged) { + for (size_t i = 0; i < mAncestorMaskLayers.Length(); i++) { + if (mLayer->GetAncestorMaskLayerAt(i) != mAncestorMaskLayers[i]->mLayer) { + ancestorMaskChanged = true; + break; + } + } + } + + Layer* otherMask = mLayer->GetMaskLayer(); if ((mMaskLayer ? mMaskLayer->mLayer : nullptr) != otherMask || + ancestorMaskChanged || (mUseClipRect != !!otherClip) || mLayer->GetLocalOpacity() != mOpacity || transformChanged) @@ -162,6 +182,15 @@ struct LayerPropertiesBase : public LayerProperties mTransform); } + for (size_t i = 0; + i < std::min(mAncestorMaskLayers.Length(), mLayer->GetAncestorMaskLayerCount()); + i++) + { + AddTransformedRegion(result, + mAncestorMaskLayers[i]->ComputeChange(aCallback, aGeometryChanged), + mTransform); + } + if (mUseClipRect && otherClip) { if (!mClipRect.IsEqualInterior(*otherClip)) { aGeometryChanged = true; @@ -193,6 +222,7 @@ struct LayerPropertiesBase : public LayerProperties nsRefPtr mLayer; UniquePtr mMaskLayer; + nsTArray> mAncestorMaskLayers; nsIntRegion mVisibleRegion; nsIntRegion mInvalidRegion; Matrix4x4 mTransform; @@ -443,6 +473,9 @@ LayerProperties::ClearInvalidations(Layer *aLayer) if (aLayer->GetMaskLayer()) { ClearInvalidations(aLayer->GetMaskLayer()); } + for (size_t i = 0; i < aLayer->GetAncestorMaskLayerCount(); i++) { + ClearInvalidations(aLayer->GetAncestorMaskLayerAt(i)); + } ContainerLayer* container = aLayer->AsContainerLayer(); if (!container) { diff --git a/gfx/layers/Layers.cpp b/gfx/layers/Layers.cpp index 9e5a77fe30a..80e9d9600ed 100644 --- a/gfx/layers/Layers.cpp +++ b/gfx/layers/Layers.cpp @@ -1190,7 +1190,7 @@ ContainerLayer::DefaultComputeEffectiveTransforms(const Matrix4x4& aTransformToS mEffectiveTransform = SnapTransformTranslation(idealTransform, &residual); bool useIntermediateSurface; - if (GetMaskLayer() || + if (HasMaskLayers() || GetForceIsolatedGroup()) { useIntermediateSurface = true; #ifdef MOZ_DUMP_PAINTING @@ -1219,7 +1219,7 @@ ContainerLayer::DefaultComputeEffectiveTransforms(const Matrix4x4& aTransformToS * Nor for a child with a mask layer. */ if ((clipRect && !clipRect->IsEmpty() && !child->GetVisibleRegion().IsEmpty()) || - child->GetMaskLayer()) { + child->HasMaskLayers()) { useIntermediateSurface = true; break; } @@ -1521,6 +1521,13 @@ Layer::Dump(std::stringstream& aStream, const char* aPrefix, bool aDumpHtml) mask->Dump(aStream, pfx.get(), aDumpHtml); } + for (size_t i = 0; i < GetAncestorMaskLayerCount(); i++) { + aStream << nsPrintfCString("%s Ancestor mask layer %d:\n", aPrefix, uint32_t(i)).get(); + nsAutoCString pfx(aPrefix); + pfx += " "; + GetAncestorMaskLayerAt(i)->Dump(aStream, pfx.get(), aDumpHtml); + } + #ifdef MOZ_DUMP_PAINTING for (size_t i = 0; i < mExtraDumpInfo.Length(); i++) { const nsCString& str = mExtraDumpInfo[i]; diff --git a/gfx/layers/Layers.h b/gfx/layers/Layers.h index db72e254f55..6f63988f52a 100644 --- a/gfx/layers/Layers.h +++ b/gfx/layers/Layers.h @@ -1058,6 +1058,18 @@ public: } } + /** + * CONSTRUCTION PHASE ONLY + * Add a FrameMetrics-associated mask layer. + */ + void SetAncestorMaskLayers(const nsTArray>& aLayers) { + if (aLayers != mAncestorMaskLayers) { + MOZ_LAYERS_LOG_IF_SHADOWABLE(this, ("Layer::Mutated(%p) AncestorMaskLayers", this)); + mAncestorMaskLayers = aLayers; + Mutated(); + } + } + /** * CONSTRUCTION PHASE ONLY * Tell this layer what its transform should be. The transformation @@ -1271,6 +1283,19 @@ public: bool IsScrollbarContainer() { return mIsScrollbarContainer; } Layer* GetMaskLayer() const { return mMaskLayer; } + // Ancestor mask layers are associated with FrameMetrics, but for simplicity + // in maintaining the layer tree structure we attach them to the layer. + size_t GetAncestorMaskLayerCount() const { + return mAncestorMaskLayers.Length(); + } + Layer* GetAncestorMaskLayerAt(size_t aIndex) const { + return mAncestorMaskLayers.ElementAt(aIndex); + } + + bool HasMaskLayers() const { + return GetMaskLayer() || mAncestorMaskLayers.Length() > 0; + } + /* * Get the combined clip rect of the Layer clip and all clips on FrameMetrics. * This is intended for use in Layout. The compositor needs to apply async @@ -1695,6 +1720,7 @@ protected: Layer* mPrevSibling; void* mImplData; nsRefPtr mMaskLayer; + nsTArray> mAncestorMaskLayers; gfx::UserData mUserData; gfx::IntRect mLayerBounds; nsIntRegion mVisibleRegion; diff --git a/gfx/layers/ReadbackProcessor.cpp b/gfx/layers/ReadbackProcessor.cpp index 263f54ce090..732ab115526 100644 --- a/gfx/layers/ReadbackProcessor.cpp +++ b/gfx/layers/ReadbackProcessor.cpp @@ -73,9 +73,11 @@ FindBackgroundLayer(ReadbackLayer* aLayer, nsIntPoint* aOffset) return nullptr; if (l->GetEffectiveOpacity() != 1.0 || - l->GetMaskLayer() || + l->HasMaskLayers() || !(l->GetContentFlags() & Layer::CONTENT_OPAQUE)) + { return nullptr; + } // cliprects are post-transform const Maybe& clipRect = l->GetEffectiveClipRect(); diff --git a/gfx/layers/apz/src/AsyncPanZoomController.cpp b/gfx/layers/apz/src/AsyncPanZoomController.cpp index c60df57fe31..ff07d7a4a4e 100644 --- a/gfx/layers/apz/src/AsyncPanZoomController.cpp +++ b/gfx/layers/apz/src/AsyncPanZoomController.cpp @@ -2917,6 +2917,7 @@ void AsyncPanZoomController::NotifyLayersUpdated(const FrameMetrics& aLayerMetri mFrameMetrics.SetLineScrollAmount(aLayerMetrics.GetLineScrollAmount()); mFrameMetrics.SetPageScrollAmount(aLayerMetrics.GetPageScrollAmount()); mFrameMetrics.SetClipRect(aLayerMetrics.GetClipRect()); + mFrameMetrics.SetMaskLayerIndex(aLayerMetrics.GetMaskLayerIndex()); mFrameMetrics.SetIsLayersIdRoot(aLayerMetrics.IsLayersIdRoot()); mFrameMetrics.SetUsesContainerScrolling(aLayerMetrics.UsesContainerScrolling()); diff --git a/gfx/layers/client/ClientCanvasLayer.cpp b/gfx/layers/client/ClientCanvasLayer.cpp index a80b9a5c360..3ef5a9399f0 100644 --- a/gfx/layers/client/ClientCanvasLayer.cpp +++ b/gfx/layers/client/ClientCanvasLayer.cpp @@ -133,9 +133,7 @@ ClientCanvasLayer::RenderLayer() PROFILER_LABEL("ClientCanvasLayer", "RenderLayer", js::ProfileEntry::Category::GRAPHICS); - if (GetMaskLayer()) { - ToClientLayer(GetMaskLayer())->RenderLayer(); - } + RenderMaskLayers(this); if (!IsDirty()) { return; diff --git a/gfx/layers/client/ClientColorLayer.cpp b/gfx/layers/client/ClientColorLayer.cpp index 0a21737524e..708ee8a4026 100644 --- a/gfx/layers/client/ClientColorLayer.cpp +++ b/gfx/layers/client/ClientColorLayer.cpp @@ -43,9 +43,7 @@ public: virtual void RenderLayer() { - if (GetMaskLayer()) { - ToClientLayer(GetMaskLayer())->RenderLayer(); - } + RenderMaskLayers(this); } virtual void FillSpecificAttributes(SpecificLayerAttributes& aAttrs) diff --git a/gfx/layers/client/ClientContainerLayer.h b/gfx/layers/client/ClientContainerLayer.h index cb6cafc07ba..0b2f3a0d807 100644 --- a/gfx/layers/client/ClientContainerLayer.h +++ b/gfx/layers/client/ClientContainerLayer.h @@ -47,9 +47,7 @@ protected: public: virtual void RenderLayer() override { - if (GetMaskLayer()) { - ToClientLayer(GetMaskLayer())->RenderLayer(); - } + RenderMaskLayers(this); DefaultComputeSupportsComponentAlphaChildren(); diff --git a/gfx/layers/client/ClientImageLayer.cpp b/gfx/layers/client/ClientImageLayer.cpp index a2c006cccd4..c18f9cfba52 100644 --- a/gfx/layers/client/ClientImageLayer.cpp +++ b/gfx/layers/client/ClientImageLayer.cpp @@ -126,9 +126,7 @@ protected: void ClientImageLayer::RenderLayer() { - if (GetMaskLayer()) { - ToClientLayer(GetMaskLayer())->RenderLayer(); - } + RenderMaskLayers(this); if (!mContainer) { return; diff --git a/gfx/layers/client/ClientLayerManager.h b/gfx/layers/client/ClientLayerManager.h index 482e2caf82f..8efc3c5abef 100644 --- a/gfx/layers/client/ClientLayerManager.h +++ b/gfx/layers/client/ClientLayerManager.h @@ -390,6 +390,16 @@ public: { return static_cast(aLayer->ImplData()); } + + template + static inline void RenderMaskLayers(LayerType* aLayer) { + if (aLayer->GetMaskLayer()) { + ToClientLayer(aLayer->GetMaskLayer())->RenderLayer(); + } + for (size_t i = 0; i < aLayer->GetAncestorMaskLayerCount(); i++) { + ToClientLayer(aLayer->GetAncestorMaskLayerAt(i))->RenderLayer(); + } + } }; // Create a shadow layer (PLayerChild) for aLayer, if we're forwarding diff --git a/gfx/layers/client/ClientPaintedLayer.cpp b/gfx/layers/client/ClientPaintedLayer.cpp index dff60c0cb91..160e35e45a1 100644 --- a/gfx/layers/client/ClientPaintedLayer.cpp +++ b/gfx/layers/client/ClientPaintedLayer.cpp @@ -117,9 +117,7 @@ ClientPaintedLayer::PaintThebes() void ClientPaintedLayer::RenderLayerWithReadback(ReadbackProcessor *aReadback) { - if (GetMaskLayer()) { - ToClientLayer(GetMaskLayer())->RenderLayer(); - } + RenderMaskLayers(this); if (!mContentClient) { mContentClient = ContentClient::CreateContentClient(ClientManager()->AsShadowForwarder()); diff --git a/gfx/layers/client/ClientTiledPaintedLayer.cpp b/gfx/layers/client/ClientTiledPaintedLayer.cpp index ee46f0b6f1a..0e0cd5dd530 100644 --- a/gfx/layers/client/ClientTiledPaintedLayer.cpp +++ b/gfx/layers/client/ClientTiledPaintedLayer.cpp @@ -443,10 +443,8 @@ ClientTiledPaintedLayer::RenderLayer() } if (!ClientManager()->IsRepeatTransaction()) { - // Only paint the mask layer on the first transaction. - if (GetMaskLayer()) { - ToClientLayer(GetMaskLayer())->RenderLayer(); - } + // Only paint the mask layers on the first transaction. + RenderMaskLayers(this); // For more complex cases we need to calculate a bunch of metrics before we // can do the paint. diff --git a/gfx/layers/composite/LayerManagerComposite.cpp b/gfx/layers/composite/LayerManagerComposite.cpp index f713dd4a567..8224c97e30b 100644 --- a/gfx/layers/composite/LayerManagerComposite.cpp +++ b/gfx/layers/composite/LayerManagerComposite.cpp @@ -234,7 +234,7 @@ LayerManagerComposite::ApplyOcclusionCulling(Layer* aLayer, nsIntRegion& aOpaque // If we have a simple transform, then we can add our opaque area into // aOpaqueRegion. if (isTranslation && - !aLayer->GetMaskLayer() && + !aLayer->HasMaskLayers() && aLayer->GetLocalOpacity() == 1.0f) { if (aLayer->GetContentFlags() & Layer::CONTENT_OPAQUE) { localOpaque.Or(localOpaque, composite->GetFullyRenderedRegion()); diff --git a/gfx/layers/ipc/CompositorParent.cpp b/gfx/layers/ipc/CompositorParent.cpp index f98739a47c9..47dcb0172cb 100644 --- a/gfx/layers/ipc/CompositorParent.cpp +++ b/gfx/layers/ipc/CompositorParent.cpp @@ -1071,6 +1071,9 @@ CompositorParent::SetShadowProperties(Layer* aLayer) if (Layer* maskLayer = aLayer->GetMaskLayer()) { SetShadowProperties(maskLayer); } + for (size_t i = 0; i < aLayer->GetAncestorMaskLayerCount(); i++) { + SetShadowProperties(aLayer->GetAncestorMaskLayerAt(i)); + } // FIXME: Bug 717688 -- Do these updates in LayerTransactionParent::RecvUpdate. LayerComposite* layerComposite = aLayer->AsLayerComposite(); diff --git a/gfx/layers/ipc/LayerTransactionParent.cpp b/gfx/layers/ipc/LayerTransactionParent.cpp index 24643b60f70..6a910fa6b0a 100644 --- a/gfx/layers/ipc/LayerTransactionParent.cpp +++ b/gfx/layers/ipc/LayerTransactionParent.cpp @@ -352,6 +352,13 @@ LayerTransactionParent::RecvUpdate(InfallibleTArray&& cset, layer->SetInvalidRegion(common.invalidRegion()); layer->SetFrameMetrics(common.metrics()); + nsTArray> maskLayers; + for (size_t i = 0; i < common.ancestorMaskLayersParent().Length(); i++) { + Layer* maskLayer = cast(common.ancestorMaskLayersParent().ElementAt(i))->AsLayer(); + maskLayers.AppendElement(maskLayer); + } + layer->SetAncestorMaskLayers(maskLayers); + typedef SpecificLayerAttributes Specific; const SpecificLayerAttributes& specific = attrs.specific(); switch (specific.type()) { diff --git a/gfx/layers/ipc/LayersMessages.ipdlh b/gfx/layers/ipc/LayersMessages.ipdlh index 22fdd827bd3..857b155c43e 100644 --- a/gfx/layers/ipc/LayersMessages.ipdlh +++ b/gfx/layers/ipc/LayersMessages.ipdlh @@ -225,6 +225,7 @@ struct CommonLayerAttributes { int8_t mixBlendMode; bool forceIsolatedGroup; nullable PLayer maskLayer; + PLayer[] ancestorMaskLayers; // Animated colors will only honored for ColorLayers. Animation[] animations; nsIntRegion invalidRegion; diff --git a/gfx/layers/ipc/ShadowLayers.cpp b/gfx/layers/ipc/ShadowLayers.cpp index 5c932b755a9..744c8d6fc37 100644 --- a/gfx/layers/ipc/ShadowLayers.cpp +++ b/gfx/layers/ipc/ShadowLayers.cpp @@ -607,6 +607,10 @@ ShadowLayerForwarder::EndTransaction(InfallibleTArray* aReplies, common.animations() = mutant->GetAnimations(); common.invalidRegion() = mutant->GetInvalidRegion(); common.metrics() = mutant->GetAllFrameMetrics(); + for (size_t i = 0; i < mutant->GetAncestorMaskLayerCount(); i++) { + auto layer = Shadow(mutant->GetAncestorMaskLayerAt(i)->AsShadowableLayer()); + common.ancestorMaskLayersChild().AppendElement(layer); + } attrs.specific() = null_t(); mutant->FillSpecificAttributes(attrs.specific()); diff --git a/layout/base/FrameLayerBuilder.cpp b/layout/base/FrameLayerBuilder.cpp index 0f9b05b8101..ab2b3b0afa6 100644 --- a/layout/base/FrameLayerBuilder.cpp +++ b/layout/base/FrameLayerBuilder.cpp @@ -4364,9 +4364,16 @@ ContainerState::SetupScrollingMetadata(NewLayerEntry* aEntry) nsAutoTArray metricsArray; if (aEntry->mBaseFrameMetrics) { metricsArray.AppendElement(*aEntry->mBaseFrameMetrics); + + // The base FrameMetrics was not computed by the nsIScrollableframe, so it + // should not have a mask layer. + MOZ_ASSERT(!aEntry->mBaseFrameMetrics->GetMaskLayerIndex()); } uint32_t baseLength = metricsArray.Length(); + // Any extra mask layers we need to attach to FrameMetrics. + nsTArray> maskLayers; + nsIFrame* fParent; for (const nsIFrame* f = aEntry->mAnimatedGeometryRoot; f != mContainerAnimatedGeometryRoot; @@ -4391,11 +4398,38 @@ ContainerState::SetupScrollingMetadata(NewLayerEntry* aEntry) continue; } - scrollFrame->ComputeFrameMetrics(aEntry->mLayer, mContainerReferenceFrame, - mParameters, &metricsArray); + Maybe info = + scrollFrame->ComputeFrameMetrics(aEntry->mLayer, mContainerReferenceFrame, mParameters); + if (!info) { + continue; + } + + FrameMetrics& metrics = info->metrics; + const DisplayItemClip* clip = info->clip; + + if (clip && + clip->HasClip() && + clip->GetRoundedRectCount() > 0) + { + // The clip in between this scrollframe and its ancestor scrollframe + // requires a mask layer. Since this mask layer should not move with + // the APZC associated with this FrameMetrics, we attach the mask + // layer as an additional, separate clip. + Maybe nextIndex = Some(maskLayers.Length()); + nsRefPtr maskLayer = + CreateMaskLayer(aEntry->mLayer, *clip, aEntry->mVisibleRegion, nextIndex, clip->GetRoundedRectCount()); + if (maskLayer) { + metrics.SetMaskLayerIndex(nextIndex); + maskLayers.AppendElement(maskLayer); + } + } + + metricsArray.AppendElement(metrics); } + // Watch out for FrameMetrics copies in profiles aEntry->mLayer->SetFrameMetrics(metricsArray); + aEntry->mLayer->SetAncestorMaskLayers(maskLayers); } static void diff --git a/layout/generic/nsGfxScrollFrame.cpp b/layout/generic/nsGfxScrollFrame.cpp index efe9a5d7880..ce67362905c 100644 --- a/layout/generic/nsGfxScrollFrame.cpp +++ b/layout/generic/nsGfxScrollFrame.cpp @@ -3052,14 +3052,13 @@ ScrollFrameHelper::BuildDisplayList(nsDisplayListBuilder* aBuilder, scrolledContent.MoveTo(aLists); } -void +Maybe ScrollFrameHelper::ComputeFrameMetrics(Layer* aLayer, nsIFrame* aContainerReferenceFrame, - const ContainerLayerParameters& aParameters, - nsTArray* aOutput) const + const ContainerLayerParameters& aParameters) const { if (!mShouldBuildScrollableLayer || mIsScrollableLayerInRootContainer) { - return; + return Nothing(); } bool needsParentLayerClip = true; @@ -3117,17 +3116,21 @@ ScrollFrameHelper::ComputeFrameMetrics(Layer* aLayer, } // Return early, since if we don't use APZ we don't need FrameMetrics. - return; + return Nothing(); } MOZ_ASSERT(mScrolledFrame->GetContent()); + FrameMetricsAndClip result; + nsRect scrollport = mScrollPort + toReferenceFrame; - *aOutput->AppendElement() = - nsLayoutUtils::ComputeFrameMetrics( - mScrolledFrame, mOuter, mOuter->GetContent(), - aContainerReferenceFrame, aLayer, mScrollParentID, - scrollport, parentLayerClip, isRootContent, aParameters); + result.metrics = nsLayoutUtils::ComputeFrameMetrics( + mScrolledFrame, mOuter, mOuter->GetContent(), + aContainerReferenceFrame, aLayer, mScrollParentID, + scrollport, parentLayerClip, isRootContent, aParameters); + result.clip = mAncestorClip; + + return Some(result); } bool diff --git a/layout/generic/nsGfxScrollFrame.h b/layout/generic/nsGfxScrollFrame.h index 55d94e344b6..fe2f79caac8 100644 --- a/layout/generic/nsGfxScrollFrame.h +++ b/layout/generic/nsGfxScrollFrame.h @@ -377,9 +377,9 @@ public: } } bool WantAsyncScroll() const; - void ComputeFrameMetrics(Layer* aLayer, nsIFrame* aContainerReferenceFrame, - const ContainerLayerParameters& aParameters, - nsTArray* aOutput) const; + Maybe ComputeFrameMetrics( + Layer* aLayer, nsIFrame* aContainerReferenceFrame, + const ContainerLayerParameters& aParameters) const; // nsIScrollbarMediator void ScrollByPage(nsScrollbarFrame* aScrollbar, int32_t aDirection, @@ -834,11 +834,11 @@ public: virtual bool WantAsyncScroll() const override { return mHelper.WantAsyncScroll(); } - virtual void ComputeFrameMetrics(Layer* aLayer, nsIFrame* aContainerReferenceFrame, - const ContainerLayerParameters& aParameters, - nsTArray* aOutput) const override { - mHelper.ComputeFrameMetrics(aLayer, aContainerReferenceFrame, - aParameters, aOutput); + virtual mozilla::Maybe ComputeFrameMetrics( + Layer* aLayer, nsIFrame* aContainerReferenceFrame, + const ContainerLayerParameters& aParameters) const override + { + return mHelper.ComputeFrameMetrics(aLayer, aContainerReferenceFrame, aParameters); } virtual bool IsIgnoringViewportClipping() const override { return mHelper.IsIgnoringViewportClipping(); @@ -1229,11 +1229,11 @@ public: virtual bool WantAsyncScroll() const override { return mHelper.WantAsyncScroll(); } - virtual void ComputeFrameMetrics(Layer* aLayer, nsIFrame* aContainerReferenceFrame, - const ContainerLayerParameters& aParameters, - nsTArray* aOutput) const override { - mHelper.ComputeFrameMetrics(aLayer, aContainerReferenceFrame, - aParameters, aOutput); + virtual mozilla::Maybe ComputeFrameMetrics( + Layer* aLayer, nsIFrame* aContainerReferenceFrame, + const ContainerLayerParameters& aParameters) const override + { + return mHelper.ComputeFrameMetrics(aLayer, aContainerReferenceFrame, aParameters); } virtual bool IsIgnoringViewportClipping() const override { return mHelper.IsIgnoringViewportClipping(); diff --git a/layout/generic/nsIScrollableFrame.h b/layout/generic/nsIScrollableFrame.h index 523175be067..e35e985901e 100644 --- a/layout/generic/nsIScrollableFrame.h +++ b/layout/generic/nsIScrollableFrame.h @@ -32,10 +32,18 @@ class nsDisplayListBuilder; namespace mozilla { struct ContainerLayerParameters; +class DisplayItemClip; namespace layers { class Layer; } -} + +struct FrameMetricsAndClip +{ + layers::FrameMetrics metrics; + const DisplayItemClip* clip; +}; + +} // namespace mozilla /** * Interface for frames that are scrollable. This interface exposes @@ -415,10 +423,10 @@ public: * aLayer's animated geometry root is this frame. If there needs to be a * FrameMetrics contributed by this frame, append it to aOutput. */ - virtual void ComputeFrameMetrics(mozilla::layers::Layer* aLayer, - nsIFrame* aContainerReferenceFrame, - const ContainerLayerParameters& aParameters, - nsTArray* aOutput) const = 0; + virtual mozilla::Maybe ComputeFrameMetrics( + mozilla::layers::Layer* aLayer, + nsIFrame* aContainerReferenceFrame, + const ContainerLayerParameters& aParameters) const = 0; /** * If this scroll frame is ignoring viewporting clipping From e18ceb4c2afd7d1dee87967c71b052cc30df232b Mon Sep 17 00:00:00 2001 From: David Anderson Date: Sun, 21 Jun 2015 12:28:10 -0400 Subject: [PATCH 39/61] Bug 1148582 - Recycle mask layers attached to FrameMetrics. r=mattwoodrow --- layout/base/FrameLayerBuilder.cpp | 56 +++++++++++++++++++++++++------ 1 file changed, 46 insertions(+), 10 deletions(-) diff --git a/layout/base/FrameLayerBuilder.cpp b/layout/base/FrameLayerBuilder.cpp index ab2b3b0afa6..beaa60f5a76 100644 --- a/layout/base/FrameLayerBuilder.cpp +++ b/layout/base/FrameLayerBuilder.cpp @@ -1088,8 +1088,14 @@ protected: * Grab a recyclable ImageLayer for use as a mask layer for aLayer (that is a * mask layer which has been used for aLayer before), or create one if such * a layer doesn't exist. + * + * Since mask layers can exist either on the layer directly, or as a side- + * attachment to FrameMetrics (for ancestor scrollframe clips), we key the + * recycle operation on both the originating layer and the mask layer's + * index in the layer, if any. */ - already_AddRefed CreateOrRecycleMaskImageLayerFor(Layer* aLayer, const Maybe& aForAncestorMaskLayer); + struct MaskLayerKey; + already_AddRefed CreateOrRecycleMaskImageLayerFor(const MaskLayerKey& aKey); /** * Grabs all PaintedLayers and ColorLayers from the ContainerLayer and makes them * available for recycling. @@ -1229,11 +1235,33 @@ protected: typedef nsAutoTArray AutoLayersArray; AutoLayersArray mNewChildLayers; nsTHashtable> mPaintedLayersAvailableForRecycling; - nsDataHashtable, nsRefPtr > - mRecycledMaskImageLayers; nscoord mAppUnitsPerDevPixel; bool mSnappingEnabled; bool mFlattenToSingleLayer; + + struct MaskLayerKey { + MaskLayerKey() : mLayer(nullptr) {} + MaskLayerKey(Layer* aLayer, const Maybe& aAncestorIndex) + : mLayer(aLayer), + mAncestorIndex(aAncestorIndex) + {} + + PLDHashNumber Hash() const { + // Hash the layer and add the layer index to the hash. + return (NS_PTR_TO_UINT32(mLayer) >> 2) + + (mAncestorIndex ? (*mAncestorIndex + 1) : 0); + } + bool operator ==(const MaskLayerKey& aOther) const { + return mLayer == aOther.mLayer && + mAncestorIndex == aOther.mAncestorIndex; + } + + Layer* mLayer; + Maybe mAncestorIndex; + }; + + nsDataHashtable, nsRefPtr> + mRecycledMaskImageLayers; }; class PaintedDisplayItemLayerUserData : public LayerUserData @@ -1933,12 +1961,12 @@ ContainerState::CreateOrRecycleImageLayer(PaintedLayer *aPainted) } already_AddRefed -ContainerState::CreateOrRecycleMaskImageLayerFor(Layer* aLayer, const Maybe& aForAncestorMaskLayer) +ContainerState::CreateOrRecycleMaskImageLayerFor(const MaskLayerKey& aKey) { - nsRefPtr result = mRecycledMaskImageLayers.Get(aLayer); - if (result && !aForAncestorMaskLayer) { - mRecycledMaskImageLayers.Remove(aLayer); - aLayer->ClearExtraDumpInfo(); + nsRefPtr result = mRecycledMaskImageLayers.Get(aKey); + if (result) { + mRecycledMaskImageLayers.Remove(aKey); + aKey.mLayer->ClearExtraDumpInfo(); // XXX if we use clip on mask layers, null it out here } else { // Create a new layer @@ -4326,7 +4354,14 @@ ContainerState::CollectOldLayers() if (Layer* maskLayer = layer->GetMaskLayer()) { NS_ASSERTION(maskLayer->GetType() == Layer::TYPE_IMAGE, "Could not recycle mask layer, unsupported layer type."); - mRecycledMaskImageLayers.Put(layer, static_cast(maskLayer)); + mRecycledMaskImageLayers.Put(MaskLayerKey(layer, Nothing()), static_cast(maskLayer)); + } + for (size_t i = 0; i < layer->GetAncestorMaskLayerCount(); i++) { + Layer* maskLayer = layer->GetAncestorMaskLayerAt(i); + + NS_ASSERTION(maskLayer->GetType() == Layer::TYPE_IMAGE, + "Could not recycle mask layer, unsupported layer type."); + mRecycledMaskImageLayers.Put(MaskLayerKey(layer, Some(i)), static_cast(maskLayer)); } } } @@ -5697,7 +5732,8 @@ ContainerState::CreateMaskLayer(Layer *aLayer, uint32_t aRoundedRectClipCount) { // check if we can re-use the mask layer - nsRefPtr maskLayer = CreateOrRecycleMaskImageLayerFor(aLayer, aForAncestorMaskLayer); + MaskLayerKey recycleKey(aLayer, aForAncestorMaskLayer); + nsRefPtr maskLayer = CreateOrRecycleMaskImageLayerFor(recycleKey); MaskLayerUserData* userData = GetMaskLayerUserData(maskLayer); MaskLayerUserData newData; From 5cff589f5bef725738d6526201bb00e089e8353f Mon Sep 17 00:00:00 2001 From: David Anderson Date: Tue, 30 Jun 2015 21:07:09 -0400 Subject: [PATCH 40/61] Bug 1148582 - Apply async transforms to (ancestor) mask layers correctly. r=botond --- gfx/layers/ImageLayers.cpp | 2 +- gfx/layers/Layers.cpp | 36 ++++++++++++------- gfx/layers/Layers.h | 12 ++++--- gfx/layers/basic/BasicContainerLayer.cpp | 4 +-- gfx/layers/basic/BasicPaintedLayer.h | 2 +- .../composite/AsyncCompositionManager.cpp | 33 +++++++++++++++++ gfx/layers/composite/ImageLayerComposite.cpp | 2 +- 7 files changed, 68 insertions(+), 23 deletions(-) diff --git a/gfx/layers/ImageLayers.cpp b/gfx/layers/ImageLayers.cpp index 693c47c5e92..1fe0672a6b7 100644 --- a/gfx/layers/ImageLayers.cpp +++ b/gfx/layers/ImageLayers.cpp @@ -57,7 +57,7 @@ void ImageLayer::ComputeEffectiveTransforms(const gfx::Matrix4x4& aTransformToSu mEffectiveTransformForBuffer = mEffectiveTransform; } - ComputeEffectiveTransformForMaskLayer(aTransformToSurface); + ComputeEffectiveTransformForMaskLayers(aTransformToSurface); } } diff --git a/gfx/layers/Layers.cpp b/gfx/layers/Layers.cpp index 80e9d9600ed..61a21a228cb 100644 --- a/gfx/layers/Layers.cpp +++ b/gfx/layers/Layers.cpp @@ -857,21 +857,31 @@ Layer::DeprecatedGetEffectiveMixBlendMode() } void -Layer::ComputeEffectiveTransformForMaskLayer(const Matrix4x4& aTransformToSurface) +Layer::ComputeEffectiveTransformForMaskLayers(const gfx::Matrix4x4& aTransformToSurface) { - if (mMaskLayer) { - mMaskLayer->mEffectiveTransform = aTransformToSurface; + if (GetMaskLayer()) { + ComputeEffectiveTransformForMaskLayer(GetMaskLayer(), aTransformToSurface); + } + for (size_t i = 0; i < GetAncestorMaskLayerCount(); i++) { + Layer* maskLayer = GetAncestorMaskLayerAt(i); + ComputeEffectiveTransformForMaskLayer(maskLayer, aTransformToSurface); + } +} + +/* static */ void +Layer::ComputeEffectiveTransformForMaskLayer(Layer* aMaskLayer, const gfx::Matrix4x4& aTransformToSurface) +{ + aMaskLayer->mEffectiveTransform = aTransformToSurface; #ifdef DEBUG - bool maskIs2D = mMaskLayer->GetTransform().CanDraw2D(); - NS_ASSERTION(maskIs2D, "How did we end up with a 3D transform here?!"); + bool maskIs2D = aMaskLayer->GetTransform().CanDraw2D(); + NS_ASSERTION(maskIs2D, "How did we end up with a 3D transform here?!"); #endif - // The mask layer can have an async transform applied to it in some - // situations, so be sure to use its GetLocalTransform() rather than - // its GetTransform(). - mMaskLayer->mEffectiveTransform = mMaskLayer->GetLocalTransform() * - mMaskLayer->mEffectiveTransform; - } + // The mask layer can have an async transform applied to it in some + // situations, so be sure to use its GetLocalTransform() rather than + // its GetTransform(). + aMaskLayer->mEffectiveTransform = aMaskLayer->GetLocalTransform() * + aMaskLayer->mEffectiveTransform; } RenderTargetRect @@ -1236,9 +1246,9 @@ ContainerLayer::DefaultComputeEffectiveTransforms(const Matrix4x4& aTransformToS } if (idealTransform.CanDraw2D()) { - ComputeEffectiveTransformForMaskLayer(aTransformToSurface); + ComputeEffectiveTransformForMaskLayers(aTransformToSurface); } else { - ComputeEffectiveTransformForMaskLayer(Matrix4x4()); + ComputeEffectiveTransformForMaskLayers(Matrix4x4()); } } diff --git a/gfx/layers/Layers.h b/gfx/layers/Layers.h index 6f63988f52a..1d7b9f276d6 100644 --- a/gfx/layers/Layers.h +++ b/gfx/layers/Layers.h @@ -1514,9 +1514,11 @@ public: virtual void ComputeEffectiveTransforms(const gfx::Matrix4x4& aTransformToSurface) = 0; /** - * computes the effective transform for a mask layer, if this layer has one + * Computes the effective transform for mask layers, if this layer has any. */ - void ComputeEffectiveTransformForMaskLayer(const gfx::Matrix4x4& aTransformToSurface); + void ComputeEffectiveTransformForMaskLayers(const gfx::Matrix4x4& aTransformToSurface); + static void ComputeEffectiveTransformForMaskLayer(Layer* aMaskLayer, + const gfx::Matrix4x4& aTransformToSurface); /** * Calculate the scissor rect required when rendering this layer. @@ -1832,7 +1834,7 @@ public: "Residual translation out of range"); mValidRegion.SetEmpty(); } - ComputeEffectiveTransformForMaskLayer(aTransformToSurface); + ComputeEffectiveTransformForMaskLayers(aTransformToSurface); } LayerManager::PaintedLayerCreationHint GetCreationHint() const { return mCreationHint; } @@ -2143,7 +2145,7 @@ public: { gfx::Matrix4x4 idealTransform = GetLocalTransform() * aTransformToSurface; mEffectiveTransform = SnapTransformTranslation(idealTransform, nullptr); - ComputeEffectiveTransformForMaskLayer(aTransformToSurface); + ComputeEffectiveTransformForMaskLayers(aTransformToSurface); } protected: @@ -2295,7 +2297,7 @@ public: SnapTransform(GetLocalTransform(), gfxRect(0, 0, mBounds.width, mBounds.height), nullptr)* SnapTransformTranslation(aTransformToSurface, nullptr); - ComputeEffectiveTransformForMaskLayer(aTransformToSurface); + ComputeEffectiveTransformForMaskLayers(aTransformToSurface); } protected: diff --git a/gfx/layers/basic/BasicContainerLayer.cpp b/gfx/layers/basic/BasicContainerLayer.cpp index 5e2b01fdb7c..e80e27291bd 100644 --- a/gfx/layers/basic/BasicContainerLayer.cpp +++ b/gfx/layers/basic/BasicContainerLayer.cpp @@ -44,7 +44,7 @@ BasicContainerLayer::ComputeEffectiveTransforms(const Matrix4x4& aTransformToSur if (!idealTransform.CanDraw2D()) { mEffectiveTransform = idealTransform; ComputeEffectiveTransformsForChildren(Matrix4x4()); - ComputeEffectiveTransformForMaskLayer(Matrix4x4()); + ComputeEffectiveTransformForMaskLayers(Matrix4x4()); mUseIntermediateSurface = true; return; } @@ -54,7 +54,7 @@ BasicContainerLayer::ComputeEffectiveTransforms(const Matrix4x4& aTransformToSur // need to apply any compensation using the residual from SnapTransformTranslation. ComputeEffectiveTransformsForChildren(idealTransform); - ComputeEffectiveTransformForMaskLayer(aTransformToSurface); + ComputeEffectiveTransformForMaskLayers(aTransformToSurface); Layer* child = GetFirstChild(); bool hasSingleBlendingChild = false; diff --git a/gfx/layers/basic/BasicPaintedLayer.h b/gfx/layers/basic/BasicPaintedLayer.h index 3139e604355..bacfba1a77c 100644 --- a/gfx/layers/basic/BasicPaintedLayer.h +++ b/gfx/layers/basic/BasicPaintedLayer.h @@ -86,7 +86,7 @@ public: mResidualTranslation = gfxPoint(0,0); mValidRegion.SetEmpty(); } - ComputeEffectiveTransformForMaskLayer(aTransformToSurface); + ComputeEffectiveTransformForMaskLayers(aTransformToSurface); return; } PaintedLayer::ComputeEffectiveTransforms(aTransformToSurface); diff --git a/gfx/layers/composite/AsyncCompositionManager.cpp b/gfx/layers/composite/AsyncCompositionManager.cpp index 84602b1541b..f1c2ebed0f4 100644 --- a/gfx/layers/composite/AsyncCompositionManager.cpp +++ b/gfx/layers/composite/AsyncCompositionManager.cpp @@ -614,6 +614,18 @@ AsyncCompositionManager::ApplyAsyncContentTransformToTree(Layer *aLayer) // The final clip for the layer is the intersection of these clips. Maybe asyncClip = aLayer->GetClipRect(); + // The transform of a mask layer is relative to the masked layer's parent + // layer. So whenever we apply an async transform to a layer, we need to + // apply that same transform to the layer's own mask layer. + // A layer can also have "ancestor" mask layers for any rounded clips from + // its ancestor scroll frames. A scroll frame mask layer only needs to be + // async transformed for async scrolls of this scroll frame's ancestor + // scroll frames, not for async scrolls of this scroll frame itself. + // In the loop below, we iterate over scroll frames from inside to outside. + // At each iteration, this array contains the layer's ancestor mask layers + // of all scroll frames inside the current one. + nsTArray ancestorMaskLayers; + for (uint32_t i = 0; i < aLayer->GetFrameMetricsCount(); i++) { AsyncPanZoomController* controller = aLayer->GetAsyncPanZoomController(i); if (!controller) { @@ -676,6 +688,21 @@ AsyncCompositionManager::ApplyAsyncContentTransformToTree(Layer *aLayer) } } + // Do the same for the ancestor mask layers: ancestorMaskLayers contains + // the ancestor mask layers for scroll frames *inside* the current scroll + // frame, so these are the ones we need to shift by our async transform. + for (Layer* ancestorMaskLayer : ancestorMaskLayers) { + SetShadowTransform(ancestorMaskLayer, + ancestorMaskLayer->GetLocalTransform() * asyncTransform); + } + + // Append the ancestor mask layer for this scroll frame to ancestorMaskLayers. + if (metrics.GetMaskLayerIndex()) { + size_t maskLayerIndex = metrics.GetMaskLayerIndex().value(); + Layer* ancestorMaskLayer = aLayer->GetAncestorMaskLayerAt(maskLayerIndex); + ancestorMaskLayers.AppendElement(ancestorMaskLayer); + } + combinedAsyncTransformWithoutOverscroll *= asyncTransformWithoutOverscroll; combinedAsyncTransform *= asyncTransform; } @@ -692,6 +719,12 @@ AsyncCompositionManager::ApplyAsyncContentTransformToTree(Layer *aLayer) SetShadowTransform(aLayer, aLayer->GetLocalTransform() * AdjustForClip(combinedAsyncTransform, aLayer)); + // Do the same for the layer's own mask layer, if it has one. + if (Layer* maskLayer = aLayer->GetMaskLayer()) { + SetShadowTransform(maskLayer, + maskLayer->GetLocalTransform() * combinedAsyncTransform); + } + const FrameMetrics& bottom = LayerMetricsWrapper::BottommostScrollableMetrics(aLayer); MOZ_ASSERT(bottom.IsScrollable()); // must be true because hasAsyncTransform is true diff --git a/gfx/layers/composite/ImageLayerComposite.cpp b/gfx/layers/composite/ImageLayerComposite.cpp index 025ba722ba3..6d6261b2a1e 100644 --- a/gfx/layers/composite/ImageLayerComposite.cpp +++ b/gfx/layers/composite/ImageLayerComposite.cpp @@ -143,7 +143,7 @@ ImageLayerComposite::ComputeEffectiveTransforms(const gfx::Matrix4x4& aTransform mEffectiveTransformForBuffer = mEffectiveTransform; } - ComputeEffectiveTransformForMaskLayer(aTransformToSurface); + ComputeEffectiveTransformForMaskLayers(aTransformToSurface); } CompositableHost* From c8fc96a6361673fd1fde35fa0699747b3fd85047 Mon Sep 17 00:00:00 2001 From: Markus Stange Date: Mon, 22 Jun 2015 01:38:23 -0400 Subject: [PATCH 41/61] Bug 1148582 - Include the rounded clip of the async scrolled scroll frame in its mAncestorClip. --- layout/generic/nsGfxScrollFrame.cpp | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/layout/generic/nsGfxScrollFrame.cpp b/layout/generic/nsGfxScrollFrame.cpp index ce67362905c..2eceb64bced 100644 --- a/layout/generic/nsGfxScrollFrame.cpp +++ b/layout/generic/nsGfxScrollFrame.cpp @@ -2942,6 +2942,18 @@ ScrollFrameHelper::BuildDisplayList(nsDisplayListBuilder* aBuilder, ? nsLayoutUtils::FindOrCreateIDFor(mScrolledFrame->GetContent()) : aBuilder->GetCurrentScrollParentId()); DisplayListClipState::AutoSaveRestore clipState(aBuilder); + if (!mIsRoot || !usingDisplayport) { + nsRect clip = mScrollPort + aBuilder->ToReferenceFrame(mOuter); + nscoord radii[8]; + bool haveRadii = mOuter->GetPaddingBoxBorderRadii(radii); + // Our override of GetBorderRadii ensures we never have a radius at + // the corners where we have a scrollbar. + if (mClipAllDescendants) { + clipState.ClipContentDescendants(clip, haveRadii ? radii : nullptr); + } else { + clipState.ClipContainingBlockDescendants(clip, haveRadii ? radii : nullptr); + } + } if (usingDisplayport) { // Capture the clip state of the parent scroll frame. This will be saved @@ -2962,17 +2974,6 @@ ScrollFrameHelper::BuildDisplayList(nsDisplayListBuilder* aBuilder, // the entire displayport, but it lets the compositor know to // clip to the scroll port after compositing. clipState.Clear(); - } else { - nsRect clip = mScrollPort + aBuilder->ToReferenceFrame(mOuter); - nscoord radii[8]; - bool haveRadii = mOuter->GetPaddingBoxBorderRadii(radii); - // Our override of GetBorderRadii ensures we never have a radius at - // the corners where we have a scrollbar. - if (mClipAllDescendants) { - clipState.ClipContentDescendants(clip, haveRadii ? radii : nullptr); - } else { - clipState.ClipContainingBlockDescendants(clip, haveRadii ? radii : nullptr); - } } aBuilder->StoreDirtyRectForScrolledContents(mOuter, dirtyRect); From c0d1e334f158e198a355f8428efa31090ba3ead2 Mon Sep 17 00:00:00 2001 From: Markus Stange Date: Wed, 1 Jul 2015 00:52:09 -0400 Subject: [PATCH 42/61] Bug 1148582 - Support multiple mask layers per layer in LayerManagerComposite. --- gfx/layers/composite/CanvasLayerComposite.cpp | 18 ++- gfx/layers/composite/ColorLayerComposite.cpp | 29 ++-- .../composite/ContainerLayerComposite.cpp | 27 ++-- gfx/layers/composite/ImageLayerComposite.cpp | 20 ++- gfx/layers/composite/LayerManagerComposite.h | 133 ++++++++++++++++++ .../composite/PaintedLayerComposite.cpp | 28 ++-- 6 files changed, 182 insertions(+), 73 deletions(-) diff --git a/gfx/layers/composite/CanvasLayerComposite.cpp b/gfx/layers/composite/CanvasLayerComposite.cpp index 072f670f6b1..ca371305763 100644 --- a/gfx/layers/composite/CanvasLayerComposite.cpp +++ b/gfx/layers/composite/CanvasLayerComposite.cpp @@ -95,17 +95,15 @@ CanvasLayerComposite::RenderLayer(const IntRect& aClipRect) } #endif - EffectChain effectChain(this); - AddBlendModeEffect(effectChain); + RenderWithAllMasks(this, mCompositor, aClipRect, + [&](EffectChain& effectChain, const Rect& clipRect) { + mCompositableHost->Composite(effectChain, + GetEffectiveOpacity(), + GetEffectiveTransform(), + GetEffectFilter(), + clipRect); + }); - LayerManagerComposite::AutoAddMaskEffect autoMaskEffect(mMaskLayer, effectChain); - gfx::Rect clipRect(aClipRect.x, aClipRect.y, aClipRect.width, aClipRect.height); - - mCompositableHost->Composite(effectChain, - GetEffectiveOpacity(), - GetEffectiveTransform(), - GetEffectFilter(), - clipRect); mCompositableHost->BumpFlashCounter(); } diff --git a/gfx/layers/composite/ColorLayerComposite.cpp b/gfx/layers/composite/ColorLayerComposite.cpp index cdc23320114..8ac5b8041a7 100644 --- a/gfx/layers/composite/ColorLayerComposite.cpp +++ b/gfx/layers/composite/ColorLayerComposite.cpp @@ -21,28 +21,17 @@ namespace layers { void ColorLayerComposite::RenderLayer(const gfx::IntRect& aClipRect) { - EffectChain effects(this); - - GenEffectChain(effects); - - gfx::IntRect boundRect = GetBounds(); - - LayerManagerComposite::AutoAddMaskEffect autoMaskEffect(GetMaskLayer(), - effects); - - gfx::Rect rect(boundRect.x, boundRect.y, - boundRect.width, boundRect.height); - gfx::Rect clipRect(aClipRect.x, aClipRect.y, - aClipRect.width, aClipRect.height); - - float opacity = GetEffectiveOpacity(); - - AddBlendModeEffect(effects); - + gfx::Rect rect(GetBounds()); const gfx::Matrix4x4& transform = GetEffectiveTransform(); - mCompositor->DrawQuad(rect, clipRect, effects, opacity, transform); + + RenderWithAllMasks(this, mCompositor, aClipRect, + [&](EffectChain& effectChain, const Rect& clipRect) { + GenEffectChain(effectChain); + mCompositor->DrawQuad(rect, clipRect, effectChain, GetEffectiveOpacity(), transform); + }); + mCompositor->DrawDiagnostics(DiagnosticFlags::COLOR, - rect, clipRect, + rect, Rect(aClipRect), transform); } diff --git a/gfx/layers/composite/ContainerLayerComposite.cpp b/gfx/layers/composite/ContainerLayerComposite.cpp index bd3607ffa08..f384ff595f6 100755 --- a/gfx/layers/composite/ContainerLayerComposite.cpp +++ b/gfx/layers/composite/ContainerLayerComposite.cpp @@ -513,9 +513,7 @@ ContainerRender(ContainerT* aContainer, return; } - float opacity = aContainer->GetEffectiveOpacity(); - - gfx::IntRect visibleRect = aContainer->GetEffectiveVisibleRegion().GetBounds(); + gfx::Rect visibleRect(aContainer->GetEffectiveVisibleRegion().GetBounds()); #ifdef MOZ_DUMP_PAINTING if (gfxUtils::sDumpPainting) { RefPtr surf = surface->Dump(aManager->GetCompositor()); @@ -525,22 +523,13 @@ ContainerRender(ContainerT* aContainer, } #endif - EffectChain effectChain(aContainer); - LayerManagerComposite::AutoAddMaskEffect autoMaskEffect(aContainer->GetMaskLayer(), - effectChain, - !aContainer->GetTransform().CanDraw2D()); - if (autoMaskEffect.Failed()) { - NS_WARNING("Failed to apply a mask effect."); - return; - } - - aContainer->AddBlendModeEffect(effectChain); - effectChain.mPrimaryEffect = new EffectRenderTarget(surface); - - gfx::Rect rect(visibleRect.x, visibleRect.y, visibleRect.width, visibleRect.height); - gfx::Rect clipRect(aClipRect.x, aClipRect.y, aClipRect.width, aClipRect.height); - aManager->GetCompositor()->DrawQuad(rect, clipRect, effectChain, opacity, - aContainer->GetEffectiveTransform()); + RenderWithAllMasks(aContainer, aManager->GetCompositor(), aClipRect, + [&](EffectChain& effectChain, const Rect& clipRect) { + effectChain.mPrimaryEffect = new EffectRenderTarget(surface); + aManager->GetCompositor()->DrawQuad(visibleRect, clipRect, effectChain, + aContainer->GetEffectiveOpacity(), + aContainer->GetEffectiveTransform()); + }); } else { RenderLayers(aContainer, aManager, RenderTargetPixel::FromUntyped(aClipRect)); } diff --git a/gfx/layers/composite/ImageLayerComposite.cpp b/gfx/layers/composite/ImageLayerComposite.cpp index 6d6261b2a1e..0c7a662d533 100644 --- a/gfx/layers/composite/ImageLayerComposite.cpp +++ b/gfx/layers/composite/ImageLayerComposite.cpp @@ -95,17 +95,15 @@ ImageLayerComposite::RenderLayer(const IntRect& aClipRect) mCompositor->MakeCurrent(); - EffectChain effectChain(this); - LayerManagerComposite::AutoAddMaskEffect autoMaskEffect(mMaskLayer, effectChain); - AddBlendModeEffect(effectChain); - - gfx::Rect clipRect(aClipRect.x, aClipRect.y, aClipRect.width, aClipRect.height); - mImageHost->SetCompositor(mCompositor); - mImageHost->Composite(effectChain, - GetEffectiveOpacity(), - GetEffectiveTransformForBuffer(), - GetEffectFilter(), - clipRect); + RenderWithAllMasks(this, mCompositor, aClipRect, + [&](EffectChain& effectChain, const Rect& clipRect) { + mImageHost->SetCompositor(mCompositor); + mImageHost->Composite(effectChain, + GetEffectiveOpacity(), + GetEffectiveTransformForBuffer(), + GetEffectFilter(), + clipRect); + }); mImageHost->BumpFlashCounter(); } diff --git a/gfx/layers/composite/LayerManagerComposite.h b/gfx/layers/composite/LayerManagerComposite.h index e82a417805a..4f18d62da12 100644 --- a/gfx/layers/composite/LayerManagerComposite.h +++ b/gfx/layers/composite/LayerManagerComposite.h @@ -18,6 +18,7 @@ #include "mozilla/gfx/Rect.h" // for Rect #include "mozilla/gfx/Types.h" // for SurfaceFormat #include "mozilla/layers/CompositorTypes.h" +#include "mozilla/layers/Effects.h" // for EffectChain #include "mozilla/layers/LayersTypes.h" // for LayersBackend, etc #include "mozilla/Maybe.h" // for Maybe #include "mozilla/RefPtr.h" @@ -457,6 +458,138 @@ protected: gfx::IntRect mClearRect; }; +// Render aLayer using aCompositor and apply all mask layers of aLayer: The +// layer's own mask layer (aLayer->GetMaskLayer()), and any ancestor mask +// layers. +// If more than one mask layer needs to be applied, we use intermediate surfaces +// (CompositingRenderTargets) for rendering, applying one mask layer at a time. +// Callers need to provide a callback function aRenderCallback that does the +// actual rendering of the source. It needs to have the following form: +// void (EffectChain& effectChain, const Rect& clipRect) +// aRenderCallback is called exactly once, inside this function, unless aLayer's +// visible region is completely clipped out (in that case, aRenderCallback won't +// be called at all). +// This function calls aLayer->AsLayerComposite()->AddBlendModeEffect for the +// final rendering pass. +// +// (This function should really live in LayerManagerComposite.cpp, but we +// need to use templates for passing lambdas until bug 1164522 is resolved.) +template +void +RenderWithAllMasks(Layer* aLayer, Compositor* aCompositor, + const gfx::IntRect& aClipRect, + RenderCallbackType aRenderCallback) +{ + Layer* firstMask = nullptr; + size_t maskLayerCount = 0; + size_t nextAncestorMaskLayer = 0; + + size_t ancestorMaskLayerCount = aLayer->GetAncestorMaskLayerCount(); + if (Layer* ownMask = aLayer->GetMaskLayer()) { + firstMask = ownMask; + maskLayerCount = ancestorMaskLayerCount + 1; + nextAncestorMaskLayer = 0; + } else if (ancestorMaskLayerCount > 0) { + firstMask = aLayer->GetAncestorMaskLayerAt(0); + maskLayerCount = ancestorMaskLayerCount; + nextAncestorMaskLayer = 1; + } else { + // no mask layers at all + } + + bool firstMaskIs3D = false; + if (ContainerLayer* container = aLayer->AsContainerLayer()) { + firstMaskIs3D = !container->GetTransform().CanDraw2D(); + } + + if (maskLayerCount <= 1) { + // This is the common case. Render in one pass and return. + EffectChain effectChain(aLayer); + LayerManagerComposite::AutoAddMaskEffect + autoMaskEffect(firstMask, effectChain, firstMaskIs3D); + aLayer->AsLayerComposite()->AddBlendModeEffect(effectChain); + aRenderCallback(effectChain, gfx::Rect(aClipRect)); + return; + } + + // We have multiple mask layers. + // We split our list of mask layers into three parts: + // (1) The first mask + // (2) The list of intermediate masks (every mask except first and last) + // (3) The final mask. + // Part (2) can be empty. + // For parts (1) and (2) we need to allocate intermediate surfaces to render + // into. The final mask gets rendered into the original render target. + + // Calculate the size of the intermediate surfaces. + gfx::Rect visibleRect(aLayer->GetEffectiveVisibleRegion().GetBounds()); + gfx::Matrix4x4 transform = aLayer->GetEffectiveTransform(); + // TODO: Use RenderTargetIntRect and TransformTo<...> here + gfx::IntRect surfaceRect = + RoundedOut(transform.TransformBounds(visibleRect)).Intersect(aClipRect); + if (surfaceRect.IsEmpty()) { + return; + } + + RefPtr originalTarget = + aCompositor->GetCurrentRenderTarget(); + + RefPtr firstTarget = + aCompositor->CreateRenderTarget(surfaceRect, INIT_MODE_CLEAR); + if (!firstTarget) { + return; + } + + // Render the source while applying the first mask. + aCompositor->SetRenderTarget(firstTarget); + { + EffectChain firstEffectChain(aLayer); + LayerManagerComposite::AutoAddMaskEffect + firstMaskEffect(firstMask, firstEffectChain, firstMaskIs3D); + aRenderCallback(firstEffectChain, gfx::Rect(aClipRect - surfaceRect.TopLeft())); + // firstTarget now contains the transformed source with the first mask and + // opacity already applied. + } + + // Apply the intermediate masks. + gfx::Rect intermediateClip(surfaceRect - surfaceRect.TopLeft()); + RefPtr previousTarget = firstTarget; + for (size_t i = nextAncestorMaskLayer; i < ancestorMaskLayerCount - 1; i++) { + Layer* intermediateMask = aLayer->GetAncestorMaskLayerAt(i); + RefPtr intermediateTarget = + aCompositor->CreateRenderTarget(surfaceRect, INIT_MODE_CLEAR); + if (!intermediateTarget) { + break; + } + aCompositor->SetRenderTarget(intermediateTarget); + EffectChain intermediateEffectChain(aLayer); + LayerManagerComposite::AutoAddMaskEffect + intermediateMaskEffect(intermediateMask, intermediateEffectChain); + if (intermediateMaskEffect.Failed()) { + continue; + } + intermediateEffectChain.mPrimaryEffect = new EffectRenderTarget(previousTarget); + aCompositor->DrawQuad(gfx::Rect(surfaceRect), intermediateClip, + intermediateEffectChain, 1.0, gfx::Matrix4x4()); + previousTarget = intermediateTarget; + } + + aCompositor->SetRenderTarget(originalTarget); + + // Apply the final mask, rendering into originalTarget. + EffectChain finalEffectChain(aLayer); + finalEffectChain.mPrimaryEffect = new EffectRenderTarget(previousTarget); + Layer* finalMask = aLayer->GetAncestorMaskLayerAt(ancestorMaskLayerCount - 1); + + // The blend mode needs to be applied in this final step, because this is + // where we're blending with the actual background (which is in originalTarget). + aLayer->AsLayerComposite()->AddBlendModeEffect(finalEffectChain); + LayerManagerComposite::AutoAddMaskEffect autoMaskEffect(finalMask, finalEffectChain); + if (!autoMaskEffect.Failed()) { + aCompositor->DrawQuad(gfx::Rect(surfaceRect), gfx::Rect(aClipRect), + finalEffectChain, 1.0, gfx::Matrix4x4()); + } +} } /* layers */ } /* mozilla */ diff --git a/gfx/layers/composite/PaintedLayerComposite.cpp b/gfx/layers/composite/PaintedLayerComposite.cpp index 20f79e0bcb5..4a3fcf1b3e0 100644 --- a/gfx/layers/composite/PaintedLayerComposite.cpp +++ b/gfx/layers/composite/PaintedLayerComposite.cpp @@ -119,12 +119,13 @@ PaintedLayerComposite::RenderLayer(const gfx::IntRect& aClipRect) PROFILER_LABEL("PaintedLayerComposite", "RenderLayer", js::ProfileEntry::Category::GRAPHICS); - MOZ_ASSERT(mBuffer->GetCompositor() == mCompositeManager->GetCompositor() && + Compositor* compositor = mCompositeManager->GetCompositor(); + + MOZ_ASSERT(mBuffer->GetCompositor() == compositor && mBuffer->GetLayer() == this, "buffer is corrupted"); const nsIntRegion& visibleRegion = GetEffectiveVisibleRegion(); - gfx::Rect clipRect(aClipRect.x, aClipRect.y, aClipRect.width, aClipRect.height); #ifdef MOZ_DUMP_PAINTING if (gfxUtils::sDumpPainting) { @@ -135,21 +136,22 @@ PaintedLayerComposite::RenderLayer(const gfx::IntRect& aClipRect) } #endif - EffectChain effectChain(this); - LayerManagerComposite::AutoAddMaskEffect autoMaskEffect(mMaskLayer, effectChain); - AddBlendModeEffect(effectChain); - mBuffer->SetPaintWillResample(MayResample()); + RenderWithAllMasks(this, compositor, aClipRect, + [&](EffectChain& effectChain, const Rect& clipRect) { + mBuffer->SetPaintWillResample(MayResample()); + + mBuffer->Composite(effectChain, + GetEffectiveOpacity(), + GetEffectiveTransform(), + GetEffectFilter(), + clipRect, + &visibleRegion); + }); - mBuffer->Composite(effectChain, - GetEffectiveOpacity(), - GetEffectiveTransform(), - GetEffectFilter(), - clipRect, - &visibleRegion); mBuffer->BumpFlashCounter(); - mCompositeManager->GetCompositor()->MakeCurrent(); + compositor->MakeCurrent(); } CompositableHost* From 1ff20a74cb9c6338b17c29c54ce81cbd4f29e41c Mon Sep 17 00:00:00 2001 From: Markus Stange Date: Wed, 1 Jul 2015 01:01:50 -0400 Subject: [PATCH 43/61] Bug 1148582 - Don't capture refcounted LayerManager in the lambda; capture the compositor instead. --- gfx/layers/composite/ContainerLayerComposite.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/gfx/layers/composite/ContainerLayerComposite.cpp b/gfx/layers/composite/ContainerLayerComposite.cpp index f384ff595f6..53cd1ee6609 100755 --- a/gfx/layers/composite/ContainerLayerComposite.cpp +++ b/gfx/layers/composite/ContainerLayerComposite.cpp @@ -514,21 +514,22 @@ ContainerRender(ContainerT* aContainer, } gfx::Rect visibleRect(aContainer->GetEffectiveVisibleRegion().GetBounds()); + Compositor* compositor = aManager->GetCompositor(); #ifdef MOZ_DUMP_PAINTING if (gfxUtils::sDumpPainting) { - RefPtr surf = surface->Dump(aManager->GetCompositor()); + RefPtr surf = surface->Dump(compositor); if (surf) { WriteSnapshotToDumpFile(aContainer, surf); } } #endif - RenderWithAllMasks(aContainer, aManager->GetCompositor(), aClipRect, + RenderWithAllMasks(aContainer, compositor, aClipRect, [&](EffectChain& effectChain, const Rect& clipRect) { effectChain.mPrimaryEffect = new EffectRenderTarget(surface); - aManager->GetCompositor()->DrawQuad(visibleRect, clipRect, effectChain, - aContainer->GetEffectiveOpacity(), - aContainer->GetEffectiveTransform()); + compositor->DrawQuad(visibleRect, clipRect, effectChain, + aContainer->GetEffectiveOpacity(), + aContainer->GetEffectiveTransform()); }); } else { RenderLayers(aContainer, aManager, RenderTargetPixel::FromUntyped(aClipRect)); From 26c466292ecdbbe47e93db336a7b7f39592993de Mon Sep 17 00:00:00 2001 From: Markus Stange Date: Wed, 1 Jul 2015 01:20:59 -0400 Subject: [PATCH 44/61] Back out bug 1148582 for static analysis bustage. --- gfx/ipc/GfxMessageUtils.h | 2 - gfx/layers/FrameMetrics.h | 13 -- gfx/layers/ImageLayers.cpp | 2 +- gfx/layers/LayerTreeInvalidation.cpp | 35 +---- gfx/layers/Layers.cpp | 47 ++----- gfx/layers/Layers.h | 38 +---- gfx/layers/ReadbackProcessor.cpp | 4 +- gfx/layers/apz/src/AsyncPanZoomController.cpp | 1 - gfx/layers/basic/BasicContainerLayer.cpp | 4 +- gfx/layers/basic/BasicPaintedLayer.h | 2 +- gfx/layers/client/ClientCanvasLayer.cpp | 4 +- gfx/layers/client/ClientColorLayer.cpp | 4 +- gfx/layers/client/ClientContainerLayer.h | 4 +- gfx/layers/client/ClientImageLayer.cpp | 4 +- gfx/layers/client/ClientLayerManager.h | 10 -- gfx/layers/client/ClientPaintedLayer.cpp | 4 +- gfx/layers/client/ClientTiledPaintedLayer.cpp | 6 +- .../composite/AsyncCompositionManager.cpp | 33 ----- gfx/layers/composite/CanvasLayerComposite.cpp | 18 +-- gfx/layers/composite/ColorLayerComposite.cpp | 29 ++-- .../composite/ContainerLayerComposite.cpp | 30 ++-- gfx/layers/composite/ImageLayerComposite.cpp | 22 +-- .../composite/LayerManagerComposite.cpp | 2 +- gfx/layers/composite/LayerManagerComposite.h | 133 ------------------ .../composite/PaintedLayerComposite.cpp | 28 ++-- gfx/layers/ipc/CompositorParent.cpp | 3 - gfx/layers/ipc/LayerTransactionParent.cpp | 7 - gfx/layers/ipc/LayersMessages.ipdlh | 1 - gfx/layers/ipc/ShadowLayers.cpp | 4 - layout/base/FrameLayerBuilder.cpp | 128 +++-------------- layout/generic/nsGfxScrollFrame.cpp | 46 +++--- layout/generic/nsGfxScrollFrame.h | 26 ++-- layout/generic/nsIScrollableFrame.h | 18 +-- 33 files changed, 179 insertions(+), 533 deletions(-) diff --git a/gfx/ipc/GfxMessageUtils.h b/gfx/ipc/GfxMessageUtils.h index 9a5a0dbaaf1..d83dac7abda 100644 --- a/gfx/ipc/GfxMessageUtils.h +++ b/gfx/ipc/GfxMessageUtils.h @@ -733,7 +733,6 @@ struct ParamTraits WriteParam(aMsg, aParam.GetPageScrollAmount()); WriteParam(aMsg, aParam.AllowVerticalScrollWithWheel()); WriteParam(aMsg, aParam.mClipRect); - WriteParam(aMsg, aParam.mMaskLayerIndex); WriteParam(aMsg, aParam.mIsLayersIdRoot); WriteParam(aMsg, aParam.mUsesContainerScrolling); WriteParam(aMsg, aParam.GetContentDescription()); @@ -779,7 +778,6 @@ struct ParamTraits ReadParam(aMsg, aIter, &aResult->mPageScrollAmount) && ReadParam(aMsg, aIter, &aResult->mAllowVerticalScrollWithWheel) && ReadParam(aMsg, aIter, &aResult->mClipRect) && - ReadParam(aMsg, aIter, &aResult->mMaskLayerIndex) && ReadParam(aMsg, aIter, &aResult->mIsLayersIdRoot) && ReadParam(aMsg, aIter, &aResult->mUsesContainerScrolling) && ReadContentDescription(aMsg, aIter, aResult)); diff --git a/gfx/layers/FrameMetrics.h b/gfx/layers/FrameMetrics.h index 09ce3d3489f..ef335a4f4d0 100644 --- a/gfx/layers/FrameMetrics.h +++ b/gfx/layers/FrameMetrics.h @@ -104,7 +104,6 @@ public: mPageScrollAmount == aOther.mPageScrollAmount && mAllowVerticalScrollWithWheel == aOther.mAllowVerticalScrollWithWheel && mClipRect == aOther.mClipRect && - mMaskLayerIndex == aOther.mMaskLayerIndex && mIsLayersIdRoot == aOther.mIsLayersIdRoot && mUsesContainerScrolling == aOther.mUsesContainerScrolling; } @@ -526,13 +525,6 @@ public: return mClipRect.ref(); } - void SetMaskLayerIndex(const Maybe& aIndex) { - mMaskLayerIndex = aIndex; - } - const Maybe& GetMaskLayerIndex() const { - return mMaskLayerIndex; - } - void SetIsLayersIdRoot(bool aValue) { mIsLayersIdRoot = aValue; } @@ -722,11 +714,6 @@ private: // The clip rect to use when compositing a layer with this FrameMetrics. Maybe mClipRect; - // An extra clip mask layer to use when compositing a layer with this - // FrameMetrics. This is an index into the MetricsMaskLayers array on - // the Layer. - Maybe mMaskLayerIndex; - // Whether these framemetrics are for the root scroll frame (root element if // we don't have a root scroll frame) for its layers id. bool mIsLayersIdRoot; diff --git a/gfx/layers/ImageLayers.cpp b/gfx/layers/ImageLayers.cpp index 1fe0672a6b7..693c47c5e92 100644 --- a/gfx/layers/ImageLayers.cpp +++ b/gfx/layers/ImageLayers.cpp @@ -57,7 +57,7 @@ void ImageLayer::ComputeEffectiveTransforms(const gfx::Matrix4x4& aTransformToSu mEffectiveTransformForBuffer = mEffectiveTransform; } - ComputeEffectiveTransformForMaskLayers(aTransformToSurface); + ComputeEffectiveTransformForMaskLayer(aTransformToSurface); } } diff --git a/gfx/layers/LayerTreeInvalidation.cpp b/gfx/layers/LayerTreeInvalidation.cpp index da69db054cf..7154639943b 100644 --- a/gfx/layers/LayerTreeInvalidation.cpp +++ b/gfx/layers/LayerTreeInvalidation.cpp @@ -83,10 +83,6 @@ NotifySubdocumentInvalidationRecursive(Layer* aLayer, NotifySubDocInvalidationFu if (aLayer->GetMaskLayer()) { NotifySubdocumentInvalidationRecursive(aLayer->GetMaskLayer(), aCallback); } - for (size_t i = 0; i < aLayer->GetAncestorMaskLayerCount(); i++) { - Layer* maskLayer = aLayer->GetAncestorMaskLayerAt(i); - NotifySubdocumentInvalidationRecursive(maskLayer, aCallback); - } if (!container) { return; @@ -115,10 +111,6 @@ struct LayerPropertiesBase : public LayerProperties if (aLayer->GetMaskLayer()) { mMaskLayer = CloneLayerTreePropertiesInternal(aLayer->GetMaskLayer(), true); } - for (size_t i = 0; i < aLayer->GetAncestorMaskLayerCount(); i++) { - Layer* maskLayer = aLayer->GetAncestorMaskLayerAt(i); - mAncestorMaskLayers.AppendElement(CloneLayerTreePropertiesInternal(maskLayer, true)); - } if (mUseClipRect) { mClipRect = *aLayer->GetClipRect(); } @@ -147,22 +139,10 @@ struct LayerPropertiesBase : public LayerProperties bool transformChanged = !mTransform.FuzzyEqualsMultiplicative(mLayer->GetLocalTransform()) || mLayer->GetPostXScale() != mPostXScale || mLayer->GetPostYScale() != mPostYScale; + Layer* otherMask = mLayer->GetMaskLayer(); const Maybe& otherClip = mLayer->GetClipRect(); nsIntRegion result; - - bool ancestorMaskChanged = mAncestorMaskLayers.Length() != mLayer->GetAncestorMaskLayerCount(); - if (!ancestorMaskChanged) { - for (size_t i = 0; i < mAncestorMaskLayers.Length(); i++) { - if (mLayer->GetAncestorMaskLayerAt(i) != mAncestorMaskLayers[i]->mLayer) { - ancestorMaskChanged = true; - break; - } - } - } - - Layer* otherMask = mLayer->GetMaskLayer(); if ((mMaskLayer ? mMaskLayer->mLayer : nullptr) != otherMask || - ancestorMaskChanged || (mUseClipRect != !!otherClip) || mLayer->GetLocalOpacity() != mOpacity || transformChanged) @@ -182,15 +162,6 @@ struct LayerPropertiesBase : public LayerProperties mTransform); } - for (size_t i = 0; - i < std::min(mAncestorMaskLayers.Length(), mLayer->GetAncestorMaskLayerCount()); - i++) - { - AddTransformedRegion(result, - mAncestorMaskLayers[i]->ComputeChange(aCallback, aGeometryChanged), - mTransform); - } - if (mUseClipRect && otherClip) { if (!mClipRect.IsEqualInterior(*otherClip)) { aGeometryChanged = true; @@ -222,7 +193,6 @@ struct LayerPropertiesBase : public LayerProperties nsRefPtr mLayer; UniquePtr mMaskLayer; - nsTArray> mAncestorMaskLayers; nsIntRegion mVisibleRegion; nsIntRegion mInvalidRegion; Matrix4x4 mTransform; @@ -473,9 +443,6 @@ LayerProperties::ClearInvalidations(Layer *aLayer) if (aLayer->GetMaskLayer()) { ClearInvalidations(aLayer->GetMaskLayer()); } - for (size_t i = 0; i < aLayer->GetAncestorMaskLayerCount(); i++) { - ClearInvalidations(aLayer->GetAncestorMaskLayerAt(i)); - } ContainerLayer* container = aLayer->AsContainerLayer(); if (!container) { diff --git a/gfx/layers/Layers.cpp b/gfx/layers/Layers.cpp index 61a21a228cb..9e5a77fe30a 100644 --- a/gfx/layers/Layers.cpp +++ b/gfx/layers/Layers.cpp @@ -857,31 +857,21 @@ Layer::DeprecatedGetEffectiveMixBlendMode() } void -Layer::ComputeEffectiveTransformForMaskLayers(const gfx::Matrix4x4& aTransformToSurface) +Layer::ComputeEffectiveTransformForMaskLayer(const Matrix4x4& aTransformToSurface) { - if (GetMaskLayer()) { - ComputeEffectiveTransformForMaskLayer(GetMaskLayer(), aTransformToSurface); - } - for (size_t i = 0; i < GetAncestorMaskLayerCount(); i++) { - Layer* maskLayer = GetAncestorMaskLayerAt(i); - ComputeEffectiveTransformForMaskLayer(maskLayer, aTransformToSurface); - } -} - -/* static */ void -Layer::ComputeEffectiveTransformForMaskLayer(Layer* aMaskLayer, const gfx::Matrix4x4& aTransformToSurface) -{ - aMaskLayer->mEffectiveTransform = aTransformToSurface; + if (mMaskLayer) { + mMaskLayer->mEffectiveTransform = aTransformToSurface; #ifdef DEBUG - bool maskIs2D = aMaskLayer->GetTransform().CanDraw2D(); - NS_ASSERTION(maskIs2D, "How did we end up with a 3D transform here?!"); + bool maskIs2D = mMaskLayer->GetTransform().CanDraw2D(); + NS_ASSERTION(maskIs2D, "How did we end up with a 3D transform here?!"); #endif - // The mask layer can have an async transform applied to it in some - // situations, so be sure to use its GetLocalTransform() rather than - // its GetTransform(). - aMaskLayer->mEffectiveTransform = aMaskLayer->GetLocalTransform() * - aMaskLayer->mEffectiveTransform; + // The mask layer can have an async transform applied to it in some + // situations, so be sure to use its GetLocalTransform() rather than + // its GetTransform(). + mMaskLayer->mEffectiveTransform = mMaskLayer->GetLocalTransform() * + mMaskLayer->mEffectiveTransform; + } } RenderTargetRect @@ -1200,7 +1190,7 @@ ContainerLayer::DefaultComputeEffectiveTransforms(const Matrix4x4& aTransformToS mEffectiveTransform = SnapTransformTranslation(idealTransform, &residual); bool useIntermediateSurface; - if (HasMaskLayers() || + if (GetMaskLayer() || GetForceIsolatedGroup()) { useIntermediateSurface = true; #ifdef MOZ_DUMP_PAINTING @@ -1229,7 +1219,7 @@ ContainerLayer::DefaultComputeEffectiveTransforms(const Matrix4x4& aTransformToS * Nor for a child with a mask layer. */ if ((clipRect && !clipRect->IsEmpty() && !child->GetVisibleRegion().IsEmpty()) || - child->HasMaskLayers()) { + child->GetMaskLayer()) { useIntermediateSurface = true; break; } @@ -1246,9 +1236,9 @@ ContainerLayer::DefaultComputeEffectiveTransforms(const Matrix4x4& aTransformToS } if (idealTransform.CanDraw2D()) { - ComputeEffectiveTransformForMaskLayers(aTransformToSurface); + ComputeEffectiveTransformForMaskLayer(aTransformToSurface); } else { - ComputeEffectiveTransformForMaskLayers(Matrix4x4()); + ComputeEffectiveTransformForMaskLayer(Matrix4x4()); } } @@ -1531,13 +1521,6 @@ Layer::Dump(std::stringstream& aStream, const char* aPrefix, bool aDumpHtml) mask->Dump(aStream, pfx.get(), aDumpHtml); } - for (size_t i = 0; i < GetAncestorMaskLayerCount(); i++) { - aStream << nsPrintfCString("%s Ancestor mask layer %d:\n", aPrefix, uint32_t(i)).get(); - nsAutoCString pfx(aPrefix); - pfx += " "; - GetAncestorMaskLayerAt(i)->Dump(aStream, pfx.get(), aDumpHtml); - } - #ifdef MOZ_DUMP_PAINTING for (size_t i = 0; i < mExtraDumpInfo.Length(); i++) { const nsCString& str = mExtraDumpInfo[i]; diff --git a/gfx/layers/Layers.h b/gfx/layers/Layers.h index 1d7b9f276d6..db72e254f55 100644 --- a/gfx/layers/Layers.h +++ b/gfx/layers/Layers.h @@ -1058,18 +1058,6 @@ public: } } - /** - * CONSTRUCTION PHASE ONLY - * Add a FrameMetrics-associated mask layer. - */ - void SetAncestorMaskLayers(const nsTArray>& aLayers) { - if (aLayers != mAncestorMaskLayers) { - MOZ_LAYERS_LOG_IF_SHADOWABLE(this, ("Layer::Mutated(%p) AncestorMaskLayers", this)); - mAncestorMaskLayers = aLayers; - Mutated(); - } - } - /** * CONSTRUCTION PHASE ONLY * Tell this layer what its transform should be. The transformation @@ -1283,19 +1271,6 @@ public: bool IsScrollbarContainer() { return mIsScrollbarContainer; } Layer* GetMaskLayer() const { return mMaskLayer; } - // Ancestor mask layers are associated with FrameMetrics, but for simplicity - // in maintaining the layer tree structure we attach them to the layer. - size_t GetAncestorMaskLayerCount() const { - return mAncestorMaskLayers.Length(); - } - Layer* GetAncestorMaskLayerAt(size_t aIndex) const { - return mAncestorMaskLayers.ElementAt(aIndex); - } - - bool HasMaskLayers() const { - return GetMaskLayer() || mAncestorMaskLayers.Length() > 0; - } - /* * Get the combined clip rect of the Layer clip and all clips on FrameMetrics. * This is intended for use in Layout. The compositor needs to apply async @@ -1514,11 +1489,9 @@ public: virtual void ComputeEffectiveTransforms(const gfx::Matrix4x4& aTransformToSurface) = 0; /** - * Computes the effective transform for mask layers, if this layer has any. + * computes the effective transform for a mask layer, if this layer has one */ - void ComputeEffectiveTransformForMaskLayers(const gfx::Matrix4x4& aTransformToSurface); - static void ComputeEffectiveTransformForMaskLayer(Layer* aMaskLayer, - const gfx::Matrix4x4& aTransformToSurface); + void ComputeEffectiveTransformForMaskLayer(const gfx::Matrix4x4& aTransformToSurface); /** * Calculate the scissor rect required when rendering this layer. @@ -1722,7 +1695,6 @@ protected: Layer* mPrevSibling; void* mImplData; nsRefPtr mMaskLayer; - nsTArray> mAncestorMaskLayers; gfx::UserData mUserData; gfx::IntRect mLayerBounds; nsIntRegion mVisibleRegion; @@ -1834,7 +1806,7 @@ public: "Residual translation out of range"); mValidRegion.SetEmpty(); } - ComputeEffectiveTransformForMaskLayers(aTransformToSurface); + ComputeEffectiveTransformForMaskLayer(aTransformToSurface); } LayerManager::PaintedLayerCreationHint GetCreationHint() const { return mCreationHint; } @@ -2145,7 +2117,7 @@ public: { gfx::Matrix4x4 idealTransform = GetLocalTransform() * aTransformToSurface; mEffectiveTransform = SnapTransformTranslation(idealTransform, nullptr); - ComputeEffectiveTransformForMaskLayers(aTransformToSurface); + ComputeEffectiveTransformForMaskLayer(aTransformToSurface); } protected: @@ -2297,7 +2269,7 @@ public: SnapTransform(GetLocalTransform(), gfxRect(0, 0, mBounds.width, mBounds.height), nullptr)* SnapTransformTranslation(aTransformToSurface, nullptr); - ComputeEffectiveTransformForMaskLayers(aTransformToSurface); + ComputeEffectiveTransformForMaskLayer(aTransformToSurface); } protected: diff --git a/gfx/layers/ReadbackProcessor.cpp b/gfx/layers/ReadbackProcessor.cpp index 732ab115526..263f54ce090 100644 --- a/gfx/layers/ReadbackProcessor.cpp +++ b/gfx/layers/ReadbackProcessor.cpp @@ -73,11 +73,9 @@ FindBackgroundLayer(ReadbackLayer* aLayer, nsIntPoint* aOffset) return nullptr; if (l->GetEffectiveOpacity() != 1.0 || - l->HasMaskLayers() || + l->GetMaskLayer() || !(l->GetContentFlags() & Layer::CONTENT_OPAQUE)) - { return nullptr; - } // cliprects are post-transform const Maybe& clipRect = l->GetEffectiveClipRect(); diff --git a/gfx/layers/apz/src/AsyncPanZoomController.cpp b/gfx/layers/apz/src/AsyncPanZoomController.cpp index ff07d7a4a4e..c60df57fe31 100644 --- a/gfx/layers/apz/src/AsyncPanZoomController.cpp +++ b/gfx/layers/apz/src/AsyncPanZoomController.cpp @@ -2917,7 +2917,6 @@ void AsyncPanZoomController::NotifyLayersUpdated(const FrameMetrics& aLayerMetri mFrameMetrics.SetLineScrollAmount(aLayerMetrics.GetLineScrollAmount()); mFrameMetrics.SetPageScrollAmount(aLayerMetrics.GetPageScrollAmount()); mFrameMetrics.SetClipRect(aLayerMetrics.GetClipRect()); - mFrameMetrics.SetMaskLayerIndex(aLayerMetrics.GetMaskLayerIndex()); mFrameMetrics.SetIsLayersIdRoot(aLayerMetrics.IsLayersIdRoot()); mFrameMetrics.SetUsesContainerScrolling(aLayerMetrics.UsesContainerScrolling()); diff --git a/gfx/layers/basic/BasicContainerLayer.cpp b/gfx/layers/basic/BasicContainerLayer.cpp index e80e27291bd..5e2b01fdb7c 100644 --- a/gfx/layers/basic/BasicContainerLayer.cpp +++ b/gfx/layers/basic/BasicContainerLayer.cpp @@ -44,7 +44,7 @@ BasicContainerLayer::ComputeEffectiveTransforms(const Matrix4x4& aTransformToSur if (!idealTransform.CanDraw2D()) { mEffectiveTransform = idealTransform; ComputeEffectiveTransformsForChildren(Matrix4x4()); - ComputeEffectiveTransformForMaskLayers(Matrix4x4()); + ComputeEffectiveTransformForMaskLayer(Matrix4x4()); mUseIntermediateSurface = true; return; } @@ -54,7 +54,7 @@ BasicContainerLayer::ComputeEffectiveTransforms(const Matrix4x4& aTransformToSur // need to apply any compensation using the residual from SnapTransformTranslation. ComputeEffectiveTransformsForChildren(idealTransform); - ComputeEffectiveTransformForMaskLayers(aTransformToSurface); + ComputeEffectiveTransformForMaskLayer(aTransformToSurface); Layer* child = GetFirstChild(); bool hasSingleBlendingChild = false; diff --git a/gfx/layers/basic/BasicPaintedLayer.h b/gfx/layers/basic/BasicPaintedLayer.h index bacfba1a77c..3139e604355 100644 --- a/gfx/layers/basic/BasicPaintedLayer.h +++ b/gfx/layers/basic/BasicPaintedLayer.h @@ -86,7 +86,7 @@ public: mResidualTranslation = gfxPoint(0,0); mValidRegion.SetEmpty(); } - ComputeEffectiveTransformForMaskLayers(aTransformToSurface); + ComputeEffectiveTransformForMaskLayer(aTransformToSurface); return; } PaintedLayer::ComputeEffectiveTransforms(aTransformToSurface); diff --git a/gfx/layers/client/ClientCanvasLayer.cpp b/gfx/layers/client/ClientCanvasLayer.cpp index 3ef5a9399f0..a80b9a5c360 100644 --- a/gfx/layers/client/ClientCanvasLayer.cpp +++ b/gfx/layers/client/ClientCanvasLayer.cpp @@ -133,7 +133,9 @@ ClientCanvasLayer::RenderLayer() PROFILER_LABEL("ClientCanvasLayer", "RenderLayer", js::ProfileEntry::Category::GRAPHICS); - RenderMaskLayers(this); + if (GetMaskLayer()) { + ToClientLayer(GetMaskLayer())->RenderLayer(); + } if (!IsDirty()) { return; diff --git a/gfx/layers/client/ClientColorLayer.cpp b/gfx/layers/client/ClientColorLayer.cpp index 708ee8a4026..0a21737524e 100644 --- a/gfx/layers/client/ClientColorLayer.cpp +++ b/gfx/layers/client/ClientColorLayer.cpp @@ -43,7 +43,9 @@ public: virtual void RenderLayer() { - RenderMaskLayers(this); + if (GetMaskLayer()) { + ToClientLayer(GetMaskLayer())->RenderLayer(); + } } virtual void FillSpecificAttributes(SpecificLayerAttributes& aAttrs) diff --git a/gfx/layers/client/ClientContainerLayer.h b/gfx/layers/client/ClientContainerLayer.h index 0b2f3a0d807..cb6cafc07ba 100644 --- a/gfx/layers/client/ClientContainerLayer.h +++ b/gfx/layers/client/ClientContainerLayer.h @@ -47,7 +47,9 @@ protected: public: virtual void RenderLayer() override { - RenderMaskLayers(this); + if (GetMaskLayer()) { + ToClientLayer(GetMaskLayer())->RenderLayer(); + } DefaultComputeSupportsComponentAlphaChildren(); diff --git a/gfx/layers/client/ClientImageLayer.cpp b/gfx/layers/client/ClientImageLayer.cpp index c18f9cfba52..a2c006cccd4 100644 --- a/gfx/layers/client/ClientImageLayer.cpp +++ b/gfx/layers/client/ClientImageLayer.cpp @@ -126,7 +126,9 @@ protected: void ClientImageLayer::RenderLayer() { - RenderMaskLayers(this); + if (GetMaskLayer()) { + ToClientLayer(GetMaskLayer())->RenderLayer(); + } if (!mContainer) { return; diff --git a/gfx/layers/client/ClientLayerManager.h b/gfx/layers/client/ClientLayerManager.h index 8efc3c5abef..482e2caf82f 100644 --- a/gfx/layers/client/ClientLayerManager.h +++ b/gfx/layers/client/ClientLayerManager.h @@ -390,16 +390,6 @@ public: { return static_cast(aLayer->ImplData()); } - - template - static inline void RenderMaskLayers(LayerType* aLayer) { - if (aLayer->GetMaskLayer()) { - ToClientLayer(aLayer->GetMaskLayer())->RenderLayer(); - } - for (size_t i = 0; i < aLayer->GetAncestorMaskLayerCount(); i++) { - ToClientLayer(aLayer->GetAncestorMaskLayerAt(i))->RenderLayer(); - } - } }; // Create a shadow layer (PLayerChild) for aLayer, if we're forwarding diff --git a/gfx/layers/client/ClientPaintedLayer.cpp b/gfx/layers/client/ClientPaintedLayer.cpp index 160e35e45a1..dff60c0cb91 100644 --- a/gfx/layers/client/ClientPaintedLayer.cpp +++ b/gfx/layers/client/ClientPaintedLayer.cpp @@ -117,7 +117,9 @@ ClientPaintedLayer::PaintThebes() void ClientPaintedLayer::RenderLayerWithReadback(ReadbackProcessor *aReadback) { - RenderMaskLayers(this); + if (GetMaskLayer()) { + ToClientLayer(GetMaskLayer())->RenderLayer(); + } if (!mContentClient) { mContentClient = ContentClient::CreateContentClient(ClientManager()->AsShadowForwarder()); diff --git a/gfx/layers/client/ClientTiledPaintedLayer.cpp b/gfx/layers/client/ClientTiledPaintedLayer.cpp index 0e0cd5dd530..ee46f0b6f1a 100644 --- a/gfx/layers/client/ClientTiledPaintedLayer.cpp +++ b/gfx/layers/client/ClientTiledPaintedLayer.cpp @@ -443,8 +443,10 @@ ClientTiledPaintedLayer::RenderLayer() } if (!ClientManager()->IsRepeatTransaction()) { - // Only paint the mask layers on the first transaction. - RenderMaskLayers(this); + // Only paint the mask layer on the first transaction. + if (GetMaskLayer()) { + ToClientLayer(GetMaskLayer())->RenderLayer(); + } // For more complex cases we need to calculate a bunch of metrics before we // can do the paint. diff --git a/gfx/layers/composite/AsyncCompositionManager.cpp b/gfx/layers/composite/AsyncCompositionManager.cpp index f1c2ebed0f4..84602b1541b 100644 --- a/gfx/layers/composite/AsyncCompositionManager.cpp +++ b/gfx/layers/composite/AsyncCompositionManager.cpp @@ -614,18 +614,6 @@ AsyncCompositionManager::ApplyAsyncContentTransformToTree(Layer *aLayer) // The final clip for the layer is the intersection of these clips. Maybe asyncClip = aLayer->GetClipRect(); - // The transform of a mask layer is relative to the masked layer's parent - // layer. So whenever we apply an async transform to a layer, we need to - // apply that same transform to the layer's own mask layer. - // A layer can also have "ancestor" mask layers for any rounded clips from - // its ancestor scroll frames. A scroll frame mask layer only needs to be - // async transformed for async scrolls of this scroll frame's ancestor - // scroll frames, not for async scrolls of this scroll frame itself. - // In the loop below, we iterate over scroll frames from inside to outside. - // At each iteration, this array contains the layer's ancestor mask layers - // of all scroll frames inside the current one. - nsTArray ancestorMaskLayers; - for (uint32_t i = 0; i < aLayer->GetFrameMetricsCount(); i++) { AsyncPanZoomController* controller = aLayer->GetAsyncPanZoomController(i); if (!controller) { @@ -688,21 +676,6 @@ AsyncCompositionManager::ApplyAsyncContentTransformToTree(Layer *aLayer) } } - // Do the same for the ancestor mask layers: ancestorMaskLayers contains - // the ancestor mask layers for scroll frames *inside* the current scroll - // frame, so these are the ones we need to shift by our async transform. - for (Layer* ancestorMaskLayer : ancestorMaskLayers) { - SetShadowTransform(ancestorMaskLayer, - ancestorMaskLayer->GetLocalTransform() * asyncTransform); - } - - // Append the ancestor mask layer for this scroll frame to ancestorMaskLayers. - if (metrics.GetMaskLayerIndex()) { - size_t maskLayerIndex = metrics.GetMaskLayerIndex().value(); - Layer* ancestorMaskLayer = aLayer->GetAncestorMaskLayerAt(maskLayerIndex); - ancestorMaskLayers.AppendElement(ancestorMaskLayer); - } - combinedAsyncTransformWithoutOverscroll *= asyncTransformWithoutOverscroll; combinedAsyncTransform *= asyncTransform; } @@ -719,12 +692,6 @@ AsyncCompositionManager::ApplyAsyncContentTransformToTree(Layer *aLayer) SetShadowTransform(aLayer, aLayer->GetLocalTransform() * AdjustForClip(combinedAsyncTransform, aLayer)); - // Do the same for the layer's own mask layer, if it has one. - if (Layer* maskLayer = aLayer->GetMaskLayer()) { - SetShadowTransform(maskLayer, - maskLayer->GetLocalTransform() * combinedAsyncTransform); - } - const FrameMetrics& bottom = LayerMetricsWrapper::BottommostScrollableMetrics(aLayer); MOZ_ASSERT(bottom.IsScrollable()); // must be true because hasAsyncTransform is true diff --git a/gfx/layers/composite/CanvasLayerComposite.cpp b/gfx/layers/composite/CanvasLayerComposite.cpp index ca371305763..072f670f6b1 100644 --- a/gfx/layers/composite/CanvasLayerComposite.cpp +++ b/gfx/layers/composite/CanvasLayerComposite.cpp @@ -95,15 +95,17 @@ CanvasLayerComposite::RenderLayer(const IntRect& aClipRect) } #endif - RenderWithAllMasks(this, mCompositor, aClipRect, - [&](EffectChain& effectChain, const Rect& clipRect) { - mCompositableHost->Composite(effectChain, - GetEffectiveOpacity(), - GetEffectiveTransform(), - GetEffectFilter(), - clipRect); - }); + EffectChain effectChain(this); + AddBlendModeEffect(effectChain); + LayerManagerComposite::AutoAddMaskEffect autoMaskEffect(mMaskLayer, effectChain); + gfx::Rect clipRect(aClipRect.x, aClipRect.y, aClipRect.width, aClipRect.height); + + mCompositableHost->Composite(effectChain, + GetEffectiveOpacity(), + GetEffectiveTransform(), + GetEffectFilter(), + clipRect); mCompositableHost->BumpFlashCounter(); } diff --git a/gfx/layers/composite/ColorLayerComposite.cpp b/gfx/layers/composite/ColorLayerComposite.cpp index 8ac5b8041a7..cdc23320114 100644 --- a/gfx/layers/composite/ColorLayerComposite.cpp +++ b/gfx/layers/composite/ColorLayerComposite.cpp @@ -21,17 +21,28 @@ namespace layers { void ColorLayerComposite::RenderLayer(const gfx::IntRect& aClipRect) { - gfx::Rect rect(GetBounds()); + EffectChain effects(this); + + GenEffectChain(effects); + + gfx::IntRect boundRect = GetBounds(); + + LayerManagerComposite::AutoAddMaskEffect autoMaskEffect(GetMaskLayer(), + effects); + + gfx::Rect rect(boundRect.x, boundRect.y, + boundRect.width, boundRect.height); + gfx::Rect clipRect(aClipRect.x, aClipRect.y, + aClipRect.width, aClipRect.height); + + float opacity = GetEffectiveOpacity(); + + AddBlendModeEffect(effects); + const gfx::Matrix4x4& transform = GetEffectiveTransform(); - - RenderWithAllMasks(this, mCompositor, aClipRect, - [&](EffectChain& effectChain, const Rect& clipRect) { - GenEffectChain(effectChain); - mCompositor->DrawQuad(rect, clipRect, effectChain, GetEffectiveOpacity(), transform); - }); - + mCompositor->DrawQuad(rect, clipRect, effects, opacity, transform); mCompositor->DrawDiagnostics(DiagnosticFlags::COLOR, - rect, Rect(aClipRect), + rect, clipRect, transform); } diff --git a/gfx/layers/composite/ContainerLayerComposite.cpp b/gfx/layers/composite/ContainerLayerComposite.cpp index 53cd1ee6609..bd3607ffa08 100755 --- a/gfx/layers/composite/ContainerLayerComposite.cpp +++ b/gfx/layers/composite/ContainerLayerComposite.cpp @@ -513,24 +513,34 @@ ContainerRender(ContainerT* aContainer, return; } - gfx::Rect visibleRect(aContainer->GetEffectiveVisibleRegion().GetBounds()); - Compositor* compositor = aManager->GetCompositor(); + float opacity = aContainer->GetEffectiveOpacity(); + + gfx::IntRect visibleRect = aContainer->GetEffectiveVisibleRegion().GetBounds(); #ifdef MOZ_DUMP_PAINTING if (gfxUtils::sDumpPainting) { - RefPtr surf = surface->Dump(compositor); + RefPtr surf = surface->Dump(aManager->GetCompositor()); if (surf) { WriteSnapshotToDumpFile(aContainer, surf); } } #endif - RenderWithAllMasks(aContainer, compositor, aClipRect, - [&](EffectChain& effectChain, const Rect& clipRect) { - effectChain.mPrimaryEffect = new EffectRenderTarget(surface); - compositor->DrawQuad(visibleRect, clipRect, effectChain, - aContainer->GetEffectiveOpacity(), - aContainer->GetEffectiveTransform()); - }); + EffectChain effectChain(aContainer); + LayerManagerComposite::AutoAddMaskEffect autoMaskEffect(aContainer->GetMaskLayer(), + effectChain, + !aContainer->GetTransform().CanDraw2D()); + if (autoMaskEffect.Failed()) { + NS_WARNING("Failed to apply a mask effect."); + return; + } + + aContainer->AddBlendModeEffect(effectChain); + effectChain.mPrimaryEffect = new EffectRenderTarget(surface); + + gfx::Rect rect(visibleRect.x, visibleRect.y, visibleRect.width, visibleRect.height); + gfx::Rect clipRect(aClipRect.x, aClipRect.y, aClipRect.width, aClipRect.height); + aManager->GetCompositor()->DrawQuad(rect, clipRect, effectChain, opacity, + aContainer->GetEffectiveTransform()); } else { RenderLayers(aContainer, aManager, RenderTargetPixel::FromUntyped(aClipRect)); } diff --git a/gfx/layers/composite/ImageLayerComposite.cpp b/gfx/layers/composite/ImageLayerComposite.cpp index 0c7a662d533..025ba722ba3 100644 --- a/gfx/layers/composite/ImageLayerComposite.cpp +++ b/gfx/layers/composite/ImageLayerComposite.cpp @@ -95,15 +95,17 @@ ImageLayerComposite::RenderLayer(const IntRect& aClipRect) mCompositor->MakeCurrent(); - RenderWithAllMasks(this, mCompositor, aClipRect, - [&](EffectChain& effectChain, const Rect& clipRect) { - mImageHost->SetCompositor(mCompositor); - mImageHost->Composite(effectChain, - GetEffectiveOpacity(), - GetEffectiveTransformForBuffer(), - GetEffectFilter(), - clipRect); - }); + EffectChain effectChain(this); + LayerManagerComposite::AutoAddMaskEffect autoMaskEffect(mMaskLayer, effectChain); + AddBlendModeEffect(effectChain); + + gfx::Rect clipRect(aClipRect.x, aClipRect.y, aClipRect.width, aClipRect.height); + mImageHost->SetCompositor(mCompositor); + mImageHost->Composite(effectChain, + GetEffectiveOpacity(), + GetEffectiveTransformForBuffer(), + GetEffectFilter(), + clipRect); mImageHost->BumpFlashCounter(); } @@ -141,7 +143,7 @@ ImageLayerComposite::ComputeEffectiveTransforms(const gfx::Matrix4x4& aTransform mEffectiveTransformForBuffer = mEffectiveTransform; } - ComputeEffectiveTransformForMaskLayers(aTransformToSurface); + ComputeEffectiveTransformForMaskLayer(aTransformToSurface); } CompositableHost* diff --git a/gfx/layers/composite/LayerManagerComposite.cpp b/gfx/layers/composite/LayerManagerComposite.cpp index 8224c97e30b..f713dd4a567 100644 --- a/gfx/layers/composite/LayerManagerComposite.cpp +++ b/gfx/layers/composite/LayerManagerComposite.cpp @@ -234,7 +234,7 @@ LayerManagerComposite::ApplyOcclusionCulling(Layer* aLayer, nsIntRegion& aOpaque // If we have a simple transform, then we can add our opaque area into // aOpaqueRegion. if (isTranslation && - !aLayer->HasMaskLayers() && + !aLayer->GetMaskLayer() && aLayer->GetLocalOpacity() == 1.0f) { if (aLayer->GetContentFlags() & Layer::CONTENT_OPAQUE) { localOpaque.Or(localOpaque, composite->GetFullyRenderedRegion()); diff --git a/gfx/layers/composite/LayerManagerComposite.h b/gfx/layers/composite/LayerManagerComposite.h index 4f18d62da12..e82a417805a 100644 --- a/gfx/layers/composite/LayerManagerComposite.h +++ b/gfx/layers/composite/LayerManagerComposite.h @@ -18,7 +18,6 @@ #include "mozilla/gfx/Rect.h" // for Rect #include "mozilla/gfx/Types.h" // for SurfaceFormat #include "mozilla/layers/CompositorTypes.h" -#include "mozilla/layers/Effects.h" // for EffectChain #include "mozilla/layers/LayersTypes.h" // for LayersBackend, etc #include "mozilla/Maybe.h" // for Maybe #include "mozilla/RefPtr.h" @@ -458,138 +457,6 @@ protected: gfx::IntRect mClearRect; }; -// Render aLayer using aCompositor and apply all mask layers of aLayer: The -// layer's own mask layer (aLayer->GetMaskLayer()), and any ancestor mask -// layers. -// If more than one mask layer needs to be applied, we use intermediate surfaces -// (CompositingRenderTargets) for rendering, applying one mask layer at a time. -// Callers need to provide a callback function aRenderCallback that does the -// actual rendering of the source. It needs to have the following form: -// void (EffectChain& effectChain, const Rect& clipRect) -// aRenderCallback is called exactly once, inside this function, unless aLayer's -// visible region is completely clipped out (in that case, aRenderCallback won't -// be called at all). -// This function calls aLayer->AsLayerComposite()->AddBlendModeEffect for the -// final rendering pass. -// -// (This function should really live in LayerManagerComposite.cpp, but we -// need to use templates for passing lambdas until bug 1164522 is resolved.) -template -void -RenderWithAllMasks(Layer* aLayer, Compositor* aCompositor, - const gfx::IntRect& aClipRect, - RenderCallbackType aRenderCallback) -{ - Layer* firstMask = nullptr; - size_t maskLayerCount = 0; - size_t nextAncestorMaskLayer = 0; - - size_t ancestorMaskLayerCount = aLayer->GetAncestorMaskLayerCount(); - if (Layer* ownMask = aLayer->GetMaskLayer()) { - firstMask = ownMask; - maskLayerCount = ancestorMaskLayerCount + 1; - nextAncestorMaskLayer = 0; - } else if (ancestorMaskLayerCount > 0) { - firstMask = aLayer->GetAncestorMaskLayerAt(0); - maskLayerCount = ancestorMaskLayerCount; - nextAncestorMaskLayer = 1; - } else { - // no mask layers at all - } - - bool firstMaskIs3D = false; - if (ContainerLayer* container = aLayer->AsContainerLayer()) { - firstMaskIs3D = !container->GetTransform().CanDraw2D(); - } - - if (maskLayerCount <= 1) { - // This is the common case. Render in one pass and return. - EffectChain effectChain(aLayer); - LayerManagerComposite::AutoAddMaskEffect - autoMaskEffect(firstMask, effectChain, firstMaskIs3D); - aLayer->AsLayerComposite()->AddBlendModeEffect(effectChain); - aRenderCallback(effectChain, gfx::Rect(aClipRect)); - return; - } - - // We have multiple mask layers. - // We split our list of mask layers into three parts: - // (1) The first mask - // (2) The list of intermediate masks (every mask except first and last) - // (3) The final mask. - // Part (2) can be empty. - // For parts (1) and (2) we need to allocate intermediate surfaces to render - // into. The final mask gets rendered into the original render target. - - // Calculate the size of the intermediate surfaces. - gfx::Rect visibleRect(aLayer->GetEffectiveVisibleRegion().GetBounds()); - gfx::Matrix4x4 transform = aLayer->GetEffectiveTransform(); - // TODO: Use RenderTargetIntRect and TransformTo<...> here - gfx::IntRect surfaceRect = - RoundedOut(transform.TransformBounds(visibleRect)).Intersect(aClipRect); - if (surfaceRect.IsEmpty()) { - return; - } - - RefPtr originalTarget = - aCompositor->GetCurrentRenderTarget(); - - RefPtr firstTarget = - aCompositor->CreateRenderTarget(surfaceRect, INIT_MODE_CLEAR); - if (!firstTarget) { - return; - } - - // Render the source while applying the first mask. - aCompositor->SetRenderTarget(firstTarget); - { - EffectChain firstEffectChain(aLayer); - LayerManagerComposite::AutoAddMaskEffect - firstMaskEffect(firstMask, firstEffectChain, firstMaskIs3D); - aRenderCallback(firstEffectChain, gfx::Rect(aClipRect - surfaceRect.TopLeft())); - // firstTarget now contains the transformed source with the first mask and - // opacity already applied. - } - - // Apply the intermediate masks. - gfx::Rect intermediateClip(surfaceRect - surfaceRect.TopLeft()); - RefPtr previousTarget = firstTarget; - for (size_t i = nextAncestorMaskLayer; i < ancestorMaskLayerCount - 1; i++) { - Layer* intermediateMask = aLayer->GetAncestorMaskLayerAt(i); - RefPtr intermediateTarget = - aCompositor->CreateRenderTarget(surfaceRect, INIT_MODE_CLEAR); - if (!intermediateTarget) { - break; - } - aCompositor->SetRenderTarget(intermediateTarget); - EffectChain intermediateEffectChain(aLayer); - LayerManagerComposite::AutoAddMaskEffect - intermediateMaskEffect(intermediateMask, intermediateEffectChain); - if (intermediateMaskEffect.Failed()) { - continue; - } - intermediateEffectChain.mPrimaryEffect = new EffectRenderTarget(previousTarget); - aCompositor->DrawQuad(gfx::Rect(surfaceRect), intermediateClip, - intermediateEffectChain, 1.0, gfx::Matrix4x4()); - previousTarget = intermediateTarget; - } - - aCompositor->SetRenderTarget(originalTarget); - - // Apply the final mask, rendering into originalTarget. - EffectChain finalEffectChain(aLayer); - finalEffectChain.mPrimaryEffect = new EffectRenderTarget(previousTarget); - Layer* finalMask = aLayer->GetAncestorMaskLayerAt(ancestorMaskLayerCount - 1); - - // The blend mode needs to be applied in this final step, because this is - // where we're blending with the actual background (which is in originalTarget). - aLayer->AsLayerComposite()->AddBlendModeEffect(finalEffectChain); - LayerManagerComposite::AutoAddMaskEffect autoMaskEffect(finalMask, finalEffectChain); - if (!autoMaskEffect.Failed()) { - aCompositor->DrawQuad(gfx::Rect(surfaceRect), gfx::Rect(aClipRect), - finalEffectChain, 1.0, gfx::Matrix4x4()); - } -} } /* layers */ } /* mozilla */ diff --git a/gfx/layers/composite/PaintedLayerComposite.cpp b/gfx/layers/composite/PaintedLayerComposite.cpp index 4a3fcf1b3e0..20f79e0bcb5 100644 --- a/gfx/layers/composite/PaintedLayerComposite.cpp +++ b/gfx/layers/composite/PaintedLayerComposite.cpp @@ -119,13 +119,12 @@ PaintedLayerComposite::RenderLayer(const gfx::IntRect& aClipRect) PROFILER_LABEL("PaintedLayerComposite", "RenderLayer", js::ProfileEntry::Category::GRAPHICS); - Compositor* compositor = mCompositeManager->GetCompositor(); - - MOZ_ASSERT(mBuffer->GetCompositor() == compositor && + MOZ_ASSERT(mBuffer->GetCompositor() == mCompositeManager->GetCompositor() && mBuffer->GetLayer() == this, "buffer is corrupted"); const nsIntRegion& visibleRegion = GetEffectiveVisibleRegion(); + gfx::Rect clipRect(aClipRect.x, aClipRect.y, aClipRect.width, aClipRect.height); #ifdef MOZ_DUMP_PAINTING if (gfxUtils::sDumpPainting) { @@ -136,22 +135,21 @@ PaintedLayerComposite::RenderLayer(const gfx::IntRect& aClipRect) } #endif + EffectChain effectChain(this); + LayerManagerComposite::AutoAddMaskEffect autoMaskEffect(mMaskLayer, effectChain); + AddBlendModeEffect(effectChain); - RenderWithAllMasks(this, compositor, aClipRect, - [&](EffectChain& effectChain, const Rect& clipRect) { - mBuffer->SetPaintWillResample(MayResample()); - - mBuffer->Composite(effectChain, - GetEffectiveOpacity(), - GetEffectiveTransform(), - GetEffectFilter(), - clipRect, - &visibleRegion); - }); + mBuffer->SetPaintWillResample(MayResample()); + mBuffer->Composite(effectChain, + GetEffectiveOpacity(), + GetEffectiveTransform(), + GetEffectFilter(), + clipRect, + &visibleRegion); mBuffer->BumpFlashCounter(); - compositor->MakeCurrent(); + mCompositeManager->GetCompositor()->MakeCurrent(); } CompositableHost* diff --git a/gfx/layers/ipc/CompositorParent.cpp b/gfx/layers/ipc/CompositorParent.cpp index 47dcb0172cb..f98739a47c9 100644 --- a/gfx/layers/ipc/CompositorParent.cpp +++ b/gfx/layers/ipc/CompositorParent.cpp @@ -1071,9 +1071,6 @@ CompositorParent::SetShadowProperties(Layer* aLayer) if (Layer* maskLayer = aLayer->GetMaskLayer()) { SetShadowProperties(maskLayer); } - for (size_t i = 0; i < aLayer->GetAncestorMaskLayerCount(); i++) { - SetShadowProperties(aLayer->GetAncestorMaskLayerAt(i)); - } // FIXME: Bug 717688 -- Do these updates in LayerTransactionParent::RecvUpdate. LayerComposite* layerComposite = aLayer->AsLayerComposite(); diff --git a/gfx/layers/ipc/LayerTransactionParent.cpp b/gfx/layers/ipc/LayerTransactionParent.cpp index 6a910fa6b0a..24643b60f70 100644 --- a/gfx/layers/ipc/LayerTransactionParent.cpp +++ b/gfx/layers/ipc/LayerTransactionParent.cpp @@ -352,13 +352,6 @@ LayerTransactionParent::RecvUpdate(InfallibleTArray&& cset, layer->SetInvalidRegion(common.invalidRegion()); layer->SetFrameMetrics(common.metrics()); - nsTArray> maskLayers; - for (size_t i = 0; i < common.ancestorMaskLayersParent().Length(); i++) { - Layer* maskLayer = cast(common.ancestorMaskLayersParent().ElementAt(i))->AsLayer(); - maskLayers.AppendElement(maskLayer); - } - layer->SetAncestorMaskLayers(maskLayers); - typedef SpecificLayerAttributes Specific; const SpecificLayerAttributes& specific = attrs.specific(); switch (specific.type()) { diff --git a/gfx/layers/ipc/LayersMessages.ipdlh b/gfx/layers/ipc/LayersMessages.ipdlh index 857b155c43e..22fdd827bd3 100644 --- a/gfx/layers/ipc/LayersMessages.ipdlh +++ b/gfx/layers/ipc/LayersMessages.ipdlh @@ -225,7 +225,6 @@ struct CommonLayerAttributes { int8_t mixBlendMode; bool forceIsolatedGroup; nullable PLayer maskLayer; - PLayer[] ancestorMaskLayers; // Animated colors will only honored for ColorLayers. Animation[] animations; nsIntRegion invalidRegion; diff --git a/gfx/layers/ipc/ShadowLayers.cpp b/gfx/layers/ipc/ShadowLayers.cpp index 744c8d6fc37..5c932b755a9 100644 --- a/gfx/layers/ipc/ShadowLayers.cpp +++ b/gfx/layers/ipc/ShadowLayers.cpp @@ -607,10 +607,6 @@ ShadowLayerForwarder::EndTransaction(InfallibleTArray* aReplies, common.animations() = mutant->GetAnimations(); common.invalidRegion() = mutant->GetInvalidRegion(); common.metrics() = mutant->GetAllFrameMetrics(); - for (size_t i = 0; i < mutant->GetAncestorMaskLayerCount(); i++) { - auto layer = Shadow(mutant->GetAncestorMaskLayerAt(i)->AsShadowableLayer()); - common.ancestorMaskLayersChild().AppendElement(layer); - } attrs.specific() = null_t(); mutant->FillSpecificAttributes(attrs.specific()); diff --git a/layout/base/FrameLayerBuilder.cpp b/layout/base/FrameLayerBuilder.cpp index beaa60f5a76..9a08ee759d2 100644 --- a/layout/base/FrameLayerBuilder.cpp +++ b/layout/base/FrameLayerBuilder.cpp @@ -1088,14 +1088,8 @@ protected: * Grab a recyclable ImageLayer for use as a mask layer for aLayer (that is a * mask layer which has been used for aLayer before), or create one if such * a layer doesn't exist. - * - * Since mask layers can exist either on the layer directly, or as a side- - * attachment to FrameMetrics (for ancestor scrollframe clips), we key the - * recycle operation on both the originating layer and the mask layer's - * index in the layer, if any. */ - struct MaskLayerKey; - already_AddRefed CreateOrRecycleMaskImageLayerFor(const MaskLayerKey& aKey); + already_AddRefed CreateOrRecycleMaskImageLayerFor(Layer* aLayer); /** * Grabs all PaintedLayers and ColorLayers from the ContainerLayer and makes them * available for recycling. @@ -1197,12 +1191,6 @@ protected: const nsIntRegion& aLayerVisibleRegion, uint32_t aRoundedRectClipCount = UINT32_MAX); - already_AddRefed CreateMaskLayer( - Layer *aLayer, const DisplayItemClip& aClip, - const nsIntRegion& aLayerVisibleRegion, - const Maybe& aForAncestorMaskLayer, - uint32_t aRoundedRectClipCount = UINT32_MAX); - bool ChooseAnimatedGeometryRoot(const nsDisplayList& aList, const nsIFrame **aAnimatedGeometryRoot); @@ -1235,33 +1223,11 @@ protected: typedef nsAutoTArray AutoLayersArray; AutoLayersArray mNewChildLayers; nsTHashtable> mPaintedLayersAvailableForRecycling; + nsDataHashtable, nsRefPtr > + mRecycledMaskImageLayers; nscoord mAppUnitsPerDevPixel; bool mSnappingEnabled; bool mFlattenToSingleLayer; - - struct MaskLayerKey { - MaskLayerKey() : mLayer(nullptr) {} - MaskLayerKey(Layer* aLayer, const Maybe& aAncestorIndex) - : mLayer(aLayer), - mAncestorIndex(aAncestorIndex) - {} - - PLDHashNumber Hash() const { - // Hash the layer and add the layer index to the hash. - return (NS_PTR_TO_UINT32(mLayer) >> 2) - + (mAncestorIndex ? (*mAncestorIndex + 1) : 0); - } - bool operator ==(const MaskLayerKey& aOther) const { - return mLayer == aOther.mLayer && - mAncestorIndex == aOther.mAncestorIndex; - } - - Layer* mLayer; - Maybe mAncestorIndex; - }; - - nsDataHashtable, nsRefPtr> - mRecycledMaskImageLayers; }; class PaintedDisplayItemLayerUserData : public LayerUserData @@ -1961,12 +1927,12 @@ ContainerState::CreateOrRecycleImageLayer(PaintedLayer *aPainted) } already_AddRefed -ContainerState::CreateOrRecycleMaskImageLayerFor(const MaskLayerKey& aKey) +ContainerState::CreateOrRecycleMaskImageLayerFor(Layer* aLayer) { - nsRefPtr result = mRecycledMaskImageLayers.Get(aKey); + nsRefPtr result = mRecycledMaskImageLayers.Get(aLayer); if (result) { - mRecycledMaskImageLayers.Remove(aKey); - aKey.mLayer->ClearExtraDumpInfo(); + mRecycledMaskImageLayers.Remove(aLayer); + aLayer->ClearExtraDumpInfo(); // XXX if we use clip on mask layers, null it out here } else { // Create a new layer @@ -4354,14 +4320,7 @@ ContainerState::CollectOldLayers() if (Layer* maskLayer = layer->GetMaskLayer()) { NS_ASSERTION(maskLayer->GetType() == Layer::TYPE_IMAGE, "Could not recycle mask layer, unsupported layer type."); - mRecycledMaskImageLayers.Put(MaskLayerKey(layer, Nothing()), static_cast(maskLayer)); - } - for (size_t i = 0; i < layer->GetAncestorMaskLayerCount(); i++) { - Layer* maskLayer = layer->GetAncestorMaskLayerAt(i); - - NS_ASSERTION(maskLayer->GetType() == Layer::TYPE_IMAGE, - "Could not recycle mask layer, unsupported layer type."); - mRecycledMaskImageLayers.Put(MaskLayerKey(layer, Some(i)), static_cast(maskLayer)); + mRecycledMaskImageLayers.Put(layer, static_cast(maskLayer)); } } } @@ -4399,16 +4358,9 @@ ContainerState::SetupScrollingMetadata(NewLayerEntry* aEntry) nsAutoTArray metricsArray; if (aEntry->mBaseFrameMetrics) { metricsArray.AppendElement(*aEntry->mBaseFrameMetrics); - - // The base FrameMetrics was not computed by the nsIScrollableframe, so it - // should not have a mask layer. - MOZ_ASSERT(!aEntry->mBaseFrameMetrics->GetMaskLayerIndex()); } uint32_t baseLength = metricsArray.Length(); - // Any extra mask layers we need to attach to FrameMetrics. - nsTArray> maskLayers; - nsIFrame* fParent; for (const nsIFrame* f = aEntry->mAnimatedGeometryRoot; f != mContainerAnimatedGeometryRoot; @@ -4433,38 +4385,11 @@ ContainerState::SetupScrollingMetadata(NewLayerEntry* aEntry) continue; } - Maybe info = - scrollFrame->ComputeFrameMetrics(aEntry->mLayer, mContainerReferenceFrame, mParameters); - if (!info) { - continue; - } - - FrameMetrics& metrics = info->metrics; - const DisplayItemClip* clip = info->clip; - - if (clip && - clip->HasClip() && - clip->GetRoundedRectCount() > 0) - { - // The clip in between this scrollframe and its ancestor scrollframe - // requires a mask layer. Since this mask layer should not move with - // the APZC associated with this FrameMetrics, we attach the mask - // layer as an additional, separate clip. - Maybe nextIndex = Some(maskLayers.Length()); - nsRefPtr maskLayer = - CreateMaskLayer(aEntry->mLayer, *clip, aEntry->mVisibleRegion, nextIndex, clip->GetRoundedRectCount()); - if (maskLayer) { - metrics.SetMaskLayerIndex(nextIndex); - maskLayers.AppendElement(maskLayer); - } - } - - metricsArray.AppendElement(metrics); + scrollFrame->ComputeFrameMetrics(aEntry->mLayer, mContainerReferenceFrame, + mParameters, &metricsArray); } - // Watch out for FrameMetrics copies in profiles aEntry->mLayer->SetFrameMetrics(metricsArray); - aEntry->mLayer->SetAncestorMaskLayers(maskLayers); } static void @@ -5712,28 +5637,8 @@ ContainerState::SetupMaskLayer(Layer *aLayer, return; } - nsRefPtr maskLayer = - CreateMaskLayer(aLayer, aClip, aLayerVisibleRegion, Nothing(), aRoundedRectClipCount); - - if (!maskLayer) { - SetClipCount(paintedData, 0); - return; - } - - aLayer->SetMaskLayer(maskLayer); - SetClipCount(paintedData, aRoundedRectClipCount); -} - -already_AddRefed -ContainerState::CreateMaskLayer(Layer *aLayer, - const DisplayItemClip& aClip, - const nsIntRegion& aLayerVisibleRegion, - const Maybe& aForAncestorMaskLayer, - uint32_t aRoundedRectClipCount) -{ // check if we can re-use the mask layer - MaskLayerKey recycleKey(aLayer, aForAncestorMaskLayer); - nsRefPtr maskLayer = CreateOrRecycleMaskImageLayerFor(recycleKey); + nsRefPtr maskLayer = CreateOrRecycleMaskImageLayerFor(aLayer); MaskLayerUserData* userData = GetMaskLayerUserData(maskLayer); MaskLayerUserData newData; @@ -5744,7 +5649,9 @@ ContainerState::CreateMaskLayer(Layer *aLayer, newData.mAppUnitsPerDevPixel = mContainerFrame->PresContext()->AppUnitsPerDevPixel(); if (*userData == newData) { - return maskLayer.forget(); + aLayer->SetMaskLayer(maskLayer); + SetClipCount(paintedData, aRoundedRectClipCount); + return; } // calculate a more precise bounding rect @@ -5797,7 +5704,8 @@ ContainerState::CreateMaskLayer(Layer *aLayer, // fail if we can't get the right surface if (!dt) { NS_WARNING("Could not create DrawTarget for mask layer."); - return nullptr; + SetClipCount(paintedData, 0); + return; } nsRefPtr context = new gfxContext(dt); @@ -5842,7 +5750,9 @@ ContainerState::CreateMaskLayer(Layer *aLayer, userData->mRoundedClipRects.SwapElements(newData.mRoundedClipRects); userData->mImageKey = lookupKey; - return maskLayer.forget(); + aLayer->SetMaskLayer(maskLayer); + SetClipCount(paintedData, aRoundedRectClipCount); + return; } } // namespace mozilla diff --git a/layout/generic/nsGfxScrollFrame.cpp b/layout/generic/nsGfxScrollFrame.cpp index 2eceb64bced..efe9a5d7880 100644 --- a/layout/generic/nsGfxScrollFrame.cpp +++ b/layout/generic/nsGfxScrollFrame.cpp @@ -2942,18 +2942,6 @@ ScrollFrameHelper::BuildDisplayList(nsDisplayListBuilder* aBuilder, ? nsLayoutUtils::FindOrCreateIDFor(mScrolledFrame->GetContent()) : aBuilder->GetCurrentScrollParentId()); DisplayListClipState::AutoSaveRestore clipState(aBuilder); - if (!mIsRoot || !usingDisplayport) { - nsRect clip = mScrollPort + aBuilder->ToReferenceFrame(mOuter); - nscoord radii[8]; - bool haveRadii = mOuter->GetPaddingBoxBorderRadii(radii); - // Our override of GetBorderRadii ensures we never have a radius at - // the corners where we have a scrollbar. - if (mClipAllDescendants) { - clipState.ClipContentDescendants(clip, haveRadii ? radii : nullptr); - } else { - clipState.ClipContainingBlockDescendants(clip, haveRadii ? radii : nullptr); - } - } if (usingDisplayport) { // Capture the clip state of the parent scroll frame. This will be saved @@ -2974,6 +2962,17 @@ ScrollFrameHelper::BuildDisplayList(nsDisplayListBuilder* aBuilder, // the entire displayport, but it lets the compositor know to // clip to the scroll port after compositing. clipState.Clear(); + } else { + nsRect clip = mScrollPort + aBuilder->ToReferenceFrame(mOuter); + nscoord radii[8]; + bool haveRadii = mOuter->GetPaddingBoxBorderRadii(radii); + // Our override of GetBorderRadii ensures we never have a radius at + // the corners where we have a scrollbar. + if (mClipAllDescendants) { + clipState.ClipContentDescendants(clip, haveRadii ? radii : nullptr); + } else { + clipState.ClipContainingBlockDescendants(clip, haveRadii ? radii : nullptr); + } } aBuilder->StoreDirtyRectForScrolledContents(mOuter, dirtyRect); @@ -3053,13 +3052,14 @@ ScrollFrameHelper::BuildDisplayList(nsDisplayListBuilder* aBuilder, scrolledContent.MoveTo(aLists); } -Maybe +void ScrollFrameHelper::ComputeFrameMetrics(Layer* aLayer, nsIFrame* aContainerReferenceFrame, - const ContainerLayerParameters& aParameters) const + const ContainerLayerParameters& aParameters, + nsTArray* aOutput) const { if (!mShouldBuildScrollableLayer || mIsScrollableLayerInRootContainer) { - return Nothing(); + return; } bool needsParentLayerClip = true; @@ -3117,21 +3117,17 @@ ScrollFrameHelper::ComputeFrameMetrics(Layer* aLayer, } // Return early, since if we don't use APZ we don't need FrameMetrics. - return Nothing(); + return; } MOZ_ASSERT(mScrolledFrame->GetContent()); - FrameMetricsAndClip result; - nsRect scrollport = mScrollPort + toReferenceFrame; - result.metrics = nsLayoutUtils::ComputeFrameMetrics( - mScrolledFrame, mOuter, mOuter->GetContent(), - aContainerReferenceFrame, aLayer, mScrollParentID, - scrollport, parentLayerClip, isRootContent, aParameters); - result.clip = mAncestorClip; - - return Some(result); + *aOutput->AppendElement() = + nsLayoutUtils::ComputeFrameMetrics( + mScrolledFrame, mOuter, mOuter->GetContent(), + aContainerReferenceFrame, aLayer, mScrollParentID, + scrollport, parentLayerClip, isRootContent, aParameters); } bool diff --git a/layout/generic/nsGfxScrollFrame.h b/layout/generic/nsGfxScrollFrame.h index fe2f79caac8..55d94e344b6 100644 --- a/layout/generic/nsGfxScrollFrame.h +++ b/layout/generic/nsGfxScrollFrame.h @@ -377,9 +377,9 @@ public: } } bool WantAsyncScroll() const; - Maybe ComputeFrameMetrics( - Layer* aLayer, nsIFrame* aContainerReferenceFrame, - const ContainerLayerParameters& aParameters) const; + void ComputeFrameMetrics(Layer* aLayer, nsIFrame* aContainerReferenceFrame, + const ContainerLayerParameters& aParameters, + nsTArray* aOutput) const; // nsIScrollbarMediator void ScrollByPage(nsScrollbarFrame* aScrollbar, int32_t aDirection, @@ -834,11 +834,11 @@ public: virtual bool WantAsyncScroll() const override { return mHelper.WantAsyncScroll(); } - virtual mozilla::Maybe ComputeFrameMetrics( - Layer* aLayer, nsIFrame* aContainerReferenceFrame, - const ContainerLayerParameters& aParameters) const override - { - return mHelper.ComputeFrameMetrics(aLayer, aContainerReferenceFrame, aParameters); + virtual void ComputeFrameMetrics(Layer* aLayer, nsIFrame* aContainerReferenceFrame, + const ContainerLayerParameters& aParameters, + nsTArray* aOutput) const override { + mHelper.ComputeFrameMetrics(aLayer, aContainerReferenceFrame, + aParameters, aOutput); } virtual bool IsIgnoringViewportClipping() const override { return mHelper.IsIgnoringViewportClipping(); @@ -1229,11 +1229,11 @@ public: virtual bool WantAsyncScroll() const override { return mHelper.WantAsyncScroll(); } - virtual mozilla::Maybe ComputeFrameMetrics( - Layer* aLayer, nsIFrame* aContainerReferenceFrame, - const ContainerLayerParameters& aParameters) const override - { - return mHelper.ComputeFrameMetrics(aLayer, aContainerReferenceFrame, aParameters); + virtual void ComputeFrameMetrics(Layer* aLayer, nsIFrame* aContainerReferenceFrame, + const ContainerLayerParameters& aParameters, + nsTArray* aOutput) const override { + mHelper.ComputeFrameMetrics(aLayer, aContainerReferenceFrame, + aParameters, aOutput); } virtual bool IsIgnoringViewportClipping() const override { return mHelper.IsIgnoringViewportClipping(); diff --git a/layout/generic/nsIScrollableFrame.h b/layout/generic/nsIScrollableFrame.h index e35e985901e..523175be067 100644 --- a/layout/generic/nsIScrollableFrame.h +++ b/layout/generic/nsIScrollableFrame.h @@ -32,18 +32,10 @@ class nsDisplayListBuilder; namespace mozilla { struct ContainerLayerParameters; -class DisplayItemClip; namespace layers { class Layer; } - -struct FrameMetricsAndClip -{ - layers::FrameMetrics metrics; - const DisplayItemClip* clip; -}; - -} // namespace mozilla +} /** * Interface for frames that are scrollable. This interface exposes @@ -423,10 +415,10 @@ public: * aLayer's animated geometry root is this frame. If there needs to be a * FrameMetrics contributed by this frame, append it to aOutput. */ - virtual mozilla::Maybe ComputeFrameMetrics( - mozilla::layers::Layer* aLayer, - nsIFrame* aContainerReferenceFrame, - const ContainerLayerParameters& aParameters) const = 0; + virtual void ComputeFrameMetrics(mozilla::layers::Layer* aLayer, + nsIFrame* aContainerReferenceFrame, + const ContainerLayerParameters& aParameters, + nsTArray* aOutput) const = 0; /** * If this scroll frame is ignoring viewporting clipping From c589f29790e16d88aefb0d17eae1c8f3a3bb0581 Mon Sep 17 00:00:00 2001 From: Jan Gerber Date: Mon, 29 Jun 2015 23:06:04 +0200 Subject: [PATCH 45/61] Bug 1178215 - update update.py for new libvpx r=rillian libvpx dropped vpx_mem_set_functions, only use it if an external libvpx is used and still has it. update update.py add vpx_dsp_rtcd.h rebase disable_pthread_on_mingw.patch add vp9_filter_restore_aligment.patch drop msvc2015.patch --- configure.in | 3 ++ layout/media/symbols.def.in | 1 - media/libvpx/README_MOZILLA | 2 +- media/libvpx/disable_pthread_on_mingw.patch | 28 ++-------- media/libvpx/msvc2015.patch | 15 ------ media/libvpx/update.py | 54 +++++++++++++++---- .../libvpx/vp9_filter_restore_aligment.patch | 27 ++++++++++ media/libvpx/vpx_dsp_rtcd.h | 48 +++++++++++++++++ 8 files changed, 127 insertions(+), 51 deletions(-) delete mode 100644 media/libvpx/msvc2015.patch create mode 100644 media/libvpx/vp9_filter_restore_aligment.patch create mode 100644 media/libvpx/vpx_dsp_rtcd.h diff --git a/configure.in b/configure.in index 7abca25cea8..4c1cc9e033d 100644 --- a/configure.in +++ b/configure.in @@ -5591,6 +5591,9 @@ if test -n "$MOZ_VPX" -a -z "$MOZ_NATIVE_LIBVPX"; then else AC_MSG_WARN([No assembler or assembly support for libvpx. Using unoptimized C routines.]) fi + + dnl native libvpx no longer has vpx_mem_set_functions + AC_DEFINE(MOZ_VPX_NO_MEM_REPORTING) fi dnl ======================================================== diff --git a/layout/media/symbols.def.in b/layout/media/symbols.def.in index c7589cc171e..1a2cd720a20 100644 --- a/layout/media/symbols.def.in +++ b/layout/media/symbols.def.in @@ -62,7 +62,6 @@ vpx_codec_get_cx_data vpx_codec_enc_config_default vpx_img_alloc vpx_codec_encode -vpx_mem_set_functions #endif #endif #ifdef MOZ_VORBIS diff --git a/media/libvpx/README_MOZILLA b/media/libvpx/README_MOZILLA index cd22278afe8..523a148d29f 100644 --- a/media/libvpx/README_MOZILLA +++ b/media/libvpx/README_MOZILLA @@ -6,6 +6,6 @@ Mozilla build system. The libvpx git repository is: - https://gerrit.chromium.org/gerrit/webm/libvpx + https://chromium.googlesource.com/webm/libvpx The git commit ID used was c74bf6d889992c3cabe017ec353ca85c323107cd diff --git a/media/libvpx/disable_pthread_on_mingw.patch b/media/libvpx/disable_pthread_on_mingw.patch index a7e99c94fb6..b458fd35737 100644 --- a/media/libvpx/disable_pthread_on_mingw.patch +++ b/media/libvpx/disable_pthread_on_mingw.patch @@ -3,50 +3,30 @@ Don't use pthread for libvpx in mingw builds. diff --git a/media/libvpx/vpx_config_x86-win32-gcc.h b/media/libvpx/vpx_config_x86-win32-gcc.h -index c5452c0..12b5c84 100644 +index 5bc3efb..e60f84d 100644 --- a/media/libvpx/vpx_config_x86-win32-gcc.h +++ b/media/libvpx/vpx_config_x86-win32-gcc.h -@@ -30,17 +30,18 @@ - #define HAVE_SSSE3 1 - #define HAVE_SSE4_1 1 - #define HAVE_AVX 1 +@@ -32,7 +32,8 @@ #define HAVE_AVX2 1 - #define HAVE_ALTIVEC 0 #define HAVE_VPX_PORTS 1 #define HAVE_STDINT_H 1 - #define HAVE_ALT_TREE_LAYOUT 0 -#define HAVE_PTHREAD_H 1 +#undef HAVE_PTHREAD_H +#define HAVE_PTHREAD_H 0 #define HAVE_SYS_MMAN_H 1 #define HAVE_UNISTD_H 1 #define CONFIG_DEPENDENCY_TRACKING 1 - #define CONFIG_EXTERNAL_BUILD 0 - #define CONFIG_INSTALL_DOCS 0 - #define CONFIG_INSTALL_BINS 1 - #define CONFIG_INSTALL_LIBS 1 - #define CONFIG_INSTALL_SRCS 0 diff --git a/media/libvpx/vpx_config_x86_64-win64-gcc.h b/media/libvpx/vpx_config_x86_64-win64-gcc.h -index 04f382b..4d75a4b 100644 +index 4ff4339..b056a0e 100644 --- a/media/libvpx/vpx_config_x86_64-win64-gcc.h +++ b/media/libvpx/vpx_config_x86_64-win64-gcc.h -@@ -30,17 +30,18 @@ - #define HAVE_SSSE3 1 - #define HAVE_SSE4_1 1 - #define HAVE_AVX 1 +@@ -32,7 +32,8 @@ #define HAVE_AVX2 1 - #define HAVE_ALTIVEC 0 #define HAVE_VPX_PORTS 1 #define HAVE_STDINT_H 1 - #define HAVE_ALT_TREE_LAYOUT 0 -#define HAVE_PTHREAD_H 1 +#undef HAVE_PTHREAD_H +#define HAVE_PTHREAD_H 0 #define HAVE_SYS_MMAN_H 1 #define HAVE_UNISTD_H 1 #define CONFIG_DEPENDENCY_TRACKING 1 - #define CONFIG_EXTERNAL_BUILD 0 - #define CONFIG_INSTALL_DOCS 0 - #define CONFIG_INSTALL_BINS 1 - #define CONFIG_INSTALL_LIBS 1 - #define CONFIG_INSTALL_SRCS 0 diff --git a/media/libvpx/msvc2015.patch b/media/libvpx/msvc2015.patch deleted file mode 100644 index 5fff11b3dd5..00000000000 --- a/media/libvpx/msvc2015.patch +++ /dev/null @@ -1,15 +0,0 @@ -diff --git a/vp9/common/vp9_systemdependent.h b/vp9/common/vp9_systemdependent.h -index e971158..19d445d 100644 ---- a/vp9/common/vp9_systemdependent.h -+++ b/vp9/common/vp9_systemdependent.h -@@ -17,7 +17,9 @@ - # include - # define USE_MSC_INTRIN - # endif --# define snprintf _snprintf -+# if _MSC_VER < 1900 -+# define snprintf _snprintf -+# endif - #endif - - #ifdef __cplusplus diff --git a/media/libvpx/update.py b/media/libvpx/update.py index acea0bbda61..9fbc6d17ee5 100755 --- a/media/libvpx/update.py +++ b/media/libvpx/update.py @@ -27,12 +27,13 @@ PLATFORMS= [ mk_files = [ 'vp8/vp8_common.mk', - 'vp8/vp8cx_arm.mk', 'vp8/vp8cx.mk', 'vp8/vp8dx.mk', + 'vp8/vp8cx_arm.mk', 'vp9/vp9_common.mk', 'vp9/vp9cx.mk', 'vp9/vp9dx.mk', + 'vpx_dsp/vpx_dsp.mk', 'vpx_mem/vpx_mem.mk', 'vpx_ports/vpx_ports.mk', 'vpx_scale/vpx_scale.mk', @@ -48,6 +49,8 @@ MODULES = { 'API_EXPORTS', 'API_SRCS-$(CONFIG_VP8_DECODER)', 'API_SRCS-yes', + 'DSP_SRCS-yes', + 'DSP_SRCS-yes+$(CONFIG_ENCODERS)', 'MEM_SRCS-yes', 'PORTS_SRCS-yes', 'SCALE_SRCS-$(CONFIG_SPATIAL_RESAMPLING)', @@ -86,8 +89,16 @@ MODULES = { 'VP9_CX_SRCS-yes', ], 'X86_ASM': [ + 'DSP_SRCS-$(HAVE_MMX)', + 'DSP_SRCS-$(HAVE_MMX)+$(CONFIG_ENCODERS)', + 'DSP_SRCS-$(HAVE_SSE2)', + 'DSP_SRCS-$(HAVE_SSE2)+$(CONFIG_ENCODERS)', + 'DSP_SRCS-$(HAVE_SSE3)+$(CONFIG_ENCODERS)', + 'DSP_SRCS-$(HAVE_SSE4_1)+$(CONFIG_ENCODERS)', + 'DSP_SRCS-$(HAVE_SSSE3)+$(CONFIG_ENCODERS)', 'PORTS_SRCS-$(BUILD_LIBVPX)', 'PORTS_SRCS-$(BUILD_LIBVPX)+$(ARCH_X86)$(ARCH_X86_64)', + 'PORTS_SRCS-yes+$(ARCH_X86)$(ARCH_X86_64)', 'VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64)', 'VP8_COMMON_SRCS-$(HAVE_MMX)', 'VP8_COMMON_SRCS-$(HAVE_MMX)+$(CONFIG_POSTPROC)', @@ -127,6 +138,10 @@ MODULES = { 'VP9_CX_SRCS-$(HAVE_SSSE3)+$(ARCH_X86_64)', ], 'ARM_ASM': [ + 'DSP_SRCS-$(HAVE_MEDIA)', + 'DSP_SRCS-$(HAVE_MEDIA)+$(CONFIG_ENCODERS)', + 'DSP_SRCS-$(HAVE_NEON)', + 'DSP_SRCS-$(HAVE_NEON)+$(CONFIG_ENCODERS)', 'PORTS_SRCS-$(ARCH_ARM)', 'SCALE_SRCS-$(HAVE_NEON)', 'VP8_COMMON_SRCS-$(ARCH_ARM)', @@ -146,6 +161,8 @@ MODULES = { 'VP8_DX_SRCS-$(CONFIG_ERROR_CONCEALMENT)', ], 'AVX2': [ + 'DSP_SRCS-$(HAVE_AVX2)', + 'DSP_SRCS-$(HAVE_AVX2)+$(CONFIG_ENCODERS)', 'VP9_COMMON_SRCS-$(HAVE_AVX2)', 'VP9_CX_SRCS-$(HAVE_AVX2)', ], @@ -170,6 +187,9 @@ DISABLED_MODULES = [ 'VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS)+$(CONFIG_VP9_POSTPROC)', 'VP9_CX_SRCS-$(CONFIG_VP9_TEMPORAL_DENOISING)', 'VP9_CX_SRCS-$(HAVE_SSE2)+$(CONFIG_VP9_TEMPORAL_DENOISING)', + + # VP9_HIGHBITDEPTH + 'DSP_SRCS-$(HAVE_SSE2)+$(CONFIG_VP9_HIGHBITDEPTH)', 'VP9_COMMON_SRCS-$(HAVE_SSE2)+$(CONFIG_VP9_HIGHBITDEPTH)', 'VP9_CX_SRCS-$(HAVE_SSE2)+$(CONFIG_VP9_HIGHBITDEPTH)', @@ -183,6 +203,8 @@ DISABLED_MODULES = [ 'VP8_COMMON_SRCS-$(HAVE_DSPR2)', 'VP9_COMMON_SRCS-$(HAVE_DSPR2)', 'VP8_CX_SRCS_REMOVE-$(HAVE_EDSP)', + 'VP9_COMMON_SRCS-$(HAVE_MSA)', + 'VP9_CX_SRCS-$(HAVE_MSA)', ] libvpx_files = [ @@ -221,8 +243,12 @@ ignore_folders = [ ] rename_files = { + #avoid clash between vpx_dsp/x86 and vp8/common/x86 + 'vp8/common/x86/variance_mmx.c': 'vp8/common/x86/vp8_variance_mmx.c', + 'vp8/common/x86/variance_sse2.c': 'vp8/common/x86/vp8_variance_sse2.c', + 'vp8/common/x86/variance_impl_mmx.asm': 'vp8/common/x86/vp8_variance_impl_mmx.asm', #avoid clash with common/arm/neon/vp9_avg_neon.c - 'vp9/encoder/arm/neon/vp9_avg_neon.c': 'vp9/encoder/arm/neon/vp9enc_avg_neon.c' + 'vp9/encoder/arm/neon/vp9_avg_neon.c': 'vp9/encoder/arm/neon/vp9enc_avg_neon.c', } files = { @@ -250,7 +276,6 @@ files = { ], 'SOURCES': [ 'vp8/common/rtcd.c', - 'vp8/common/sad_c.c', 'vp8/encoder/bitstream.c', 'vp8/encoder/onyx_if.c', 'vp8/vp8_dx_iface.c', @@ -300,7 +325,6 @@ files = { 'vp9/encoder/vp9_ratectrl.c', 'vp9/encoder/vp9_rdopt.c', 'vp9/encoder/vp9_resize.c', - 'vp9/encoder/vp9_sad.c', 'vp9/encoder/vp9_segmentation.c', 'vp9/encoder/vp9_subexp.c', 'vp9/encoder/vp9_temporal_filter.c', @@ -348,14 +372,14 @@ platform_files = [ 'vpx_config.asm', 'vpx_config.h', 'vpx_scale_rtcd.h', + 'vpx_dsp_rtcd.h', ] def prepare_upstream(prefix, commit=None): upstream_url = 'https://chromium.googlesource.com/webm/libvpx' if os.path.exists(prefix): - print "Using existing repo in '%s'" % prefix os.chdir(prefix) - subprocess.call(['git', 'fetch', upstream_url, prefix]) + subprocess.call(['git', 'fetch', 'origin']) else: subprocess.call(['git', 'clone', upstream_url, prefix]) os.chdir(prefix) @@ -368,7 +392,8 @@ def prepare_upstream(prefix, commit=None): for target in PLATFORMS: target_objdir = os.path.join(prefix, 'objdir', target) - os.makedirs(target_objdir) + if not os.path.exists(target_objdir): + os.makedirs(target_objdir) os.chdir(target_objdir) configure = ['../../configure', '--target=%s' % target, '--disable-examples', '--disable-install-docs', @@ -386,10 +411,14 @@ def prepare_upstream(prefix, commit=None): if target == 'armv7-android-gcc': configure += ['--sdk-path=%s' % ndk_path] - + print "\n" + target_objdir + print " ".join(configure) + sys.stdout.flush() subprocess.call(configure) make_targets = [f for f in platform_files if not os.path.exists(f)] if make_targets: + print " ".join(['make'] + make_targets) + sys.stdout.flush() subprocess.call(['make'] + make_targets) for f in make_targets: if not os.path.exists(f): @@ -543,6 +572,10 @@ def update_and_remove_files(prefix, libvpx_files, files): # Remove unknown files from tree removed_files = [f for f in current_files if f not in libvpx_files and f not in rename_files.values()] + for f in rename_files: + if os.path.exists(f) and os.path.exists(rename_files[f]) and not f in removed_files: + removed_files.append(f) + if removed_files: print "Remove files:" for f in removed_files: @@ -552,12 +585,13 @@ def update_and_remove_files(prefix, libvpx_files, files): def apply_patches(): # Patch to permit vpx users to specify their own types. os.system("patch -p0 < stdint.patch") - # Patch to allow MSVC 2015 to compile libvpx - os.system("patch -p1 < msvc2015.patch") # Patch to fix a crash caused by MSVC 2013 os.system("patch -p3 < bug1137614.patch") # Bug 1176730 - Don't use pthread for libvpx in mingw builds. os.system("patch -p3 < disable_pthread_on_mingw.patch") + # Cherry pick https://chromium-review.googlesource.com/#/c/276889/ + # to fix crash on 32bit + os.system("patch -p1 < vp9_filter_restore_aligment.patch") def update_readme(commit): with open('README_MOZILLA') as f: diff --git a/media/libvpx/vp9_filter_restore_aligment.patch b/media/libvpx/vp9_filter_restore_aligment.patch new file mode 100644 index 00000000000..03fe58de3ce --- /dev/null +++ b/media/libvpx/vp9_filter_restore_aligment.patch @@ -0,0 +1,27 @@ +commit 33b3953c548a20c0aee705657df0440a740c28b7 +Author: James Zern +Date: Thu Jun 11 15:12:22 2015 -0700 + + vp9_filter: restore vp9_bilinear_filters alignment + + the declaration containing the alignment in vp9_filter.h was removed in: + eb88b17 Make vp9 subpixel match vp8 + + fixes a crash in 32-bit builds + + Change-Id: I9a97e6b4e8e94698e43ff79d0d8bb85043b73c61 + +diff --git a/vp9/common/vp9_filter.c b/vp9/common/vp9_filter.c +index afcdf22..b256d4a 100644 +--- a/vp9/common/vp9_filter.c ++++ b/vp9/common/vp9_filter.c +@@ -12,7 +12,8 @@ + + #include "vp9/common/vp9_filter.h" + +-const InterpKernel vp9_bilinear_filters[SUBPEL_SHIFTS] = { ++DECLARE_ALIGNED(256, const InterpKernel, ++ vp9_bilinear_filters[SUBPEL_SHIFTS]) = { + { 0, 0, 0, 128, 0, 0, 0, 0 }, + { 0, 0, 0, 120, 8, 0, 0, 0 }, + { 0, 0, 0, 112, 16, 0, 0, 0 }, diff --git a/media/libvpx/vpx_dsp_rtcd.h b/media/libvpx/vpx_dsp_rtcd.h new file mode 100644 index 00000000000..616fe5f3d4a --- /dev/null +++ b/media/libvpx/vpx_dsp_rtcd.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2013 Mozilla Foundation. All Rights Reserved. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. + */ + +#if defined(_WIN64) +/* 64 bit Windows */ +#ifdef _MSC_VER +#include "vpx_dsp_rtcd_x86_64-win64-vs12.h" +#else +#include "vpx_dsp_rtcd_x86_64-win64-gcc.h" +#endif + +#elif defined(_WIN32) +/* 32 bit Windows, MSVC. */ +#ifdef _MSC_VER +#include "vpx_dsp_rtcd_x86-win32-vs12.h" +#else +#include "vpx_dsp_rtcd_x86-win32-gcc.h" +#endif + +#elif defined(__APPLE__) && defined(__x86_64__) +/* 64 bit MacOS. */ +#include "vpx_dsp_rtcd_x86_64-darwin9-gcc.h" + +#elif defined(__APPLE__) && defined(__i386__) +/* 32 bit MacOS. */ +#include "vpx_dsp_rtcd_x86-darwin9-gcc.h" + +#elif defined(__ELF__) && (defined(__i386) || defined(__i386__)) +/* 32 bit ELF platforms. */ +#include "vpx_dsp_rtcd_x86-linux-gcc.h" + +#elif defined(__ELF__) && (defined(__x86_64) || defined(__x86_64__)) +/* 64 bit ELF platforms. */ +#include "vpx_dsp_rtcd_x86_64-linux-gcc.h" + +#elif defined(VPX_ARM_ASM) +/* Android */ +#include "vpx_dsp_rtcd_armv7-android-gcc.h" + +#else +/* Assume generic GNU/GCC configuration. */ +#include "vpx_dsp_rtcd_generic-gnu.h" +#endif From 74e7eaa1936c4d02d0055af7398785b7d2d51c3d Mon Sep 17 00:00:00 2001 From: Jan Gerber Date: Mon, 29 Jun 2015 23:07:20 +0200 Subject: [PATCH 46/61] Bug 1178215 - Update libvpx. r=rillian Update libvpx to e67d45d4ce92468ba193288b59093fef0a502662 * * * Bug 1178215 - Bump clobber for libvpx file renames. r=me --- CLOBBER | 2 +- media/libvpx/PATENTS | 2 +- media/libvpx/README_MOZILLA | 2 +- media/libvpx/sources.mozbuild | 63 +- media/libvpx/third_party/x86inc/x86inc.asm | 2 + media/libvpx/vp8/common/alloccommon.c | 9 +- .../vp8/common/arm/armv6/dequant_idct_v6.asm | 2 +- .../arm/armv6/vp8_variance16x16_armv6.asm | 154 -- .../arm/armv6/vp8_variance8x8_armv6.asm | 101 -- media/libvpx/vp8/common/arm/filter_arm.c | 6 +- media/libvpx/vp8/common/arm/neon/sad_neon.c | 184 --- .../vp8/common/arm/neon/variance_neon.c | 320 ---- .../arm/neon/vp8_subpixelvariance_neon.c | 34 +- media/libvpx/vp8/common/arm/variance_arm.c | 19 +- media/libvpx/vp8/common/common.h | 8 +- media/libvpx/vp8/common/copy_c.c | 32 + media/libvpx/vp8/common/debugmodes.c | 2 - media/libvpx/vp8/common/dequantize.c | 2 +- media/libvpx/vp8/common/entropy.c | 3 +- media/libvpx/vp8/common/entropymode.c | 8 +- media/libvpx/vp8/common/extend.c | 10 +- media/libvpx/vp8/common/filter.c | 1 + .../vp8/common/generic/systemdependent.c | 1 + media/libvpx/vp8/common/idct_blk.c | 6 +- media/libvpx/vp8/common/idctllm.c | 1 + media/libvpx/vp8/common/loopfilter.c | 13 +- media/libvpx/vp8/common/mfqe.c | 47 +- media/libvpx/vp8/common/postproc.c | 12 +- media/libvpx/vp8/common/reconinter.c | 57 +- media/libvpx/vp8/common/reconintra.c | 22 +- media/libvpx/vp8/common/rtcd.c | 6 +- media/libvpx/vp8/common/sad_c.c | 302 ---- media/libvpx/vp8/common/setupintrarecon.c | 12 +- media/libvpx/vp8/common/variance.h | 71 +- media/libvpx/vp8/common/variance_c.c | 167 +- media/libvpx/vp8/common/x86/copy_sse2.asm | 93 ++ media/libvpx/vp8/common/x86/copy_sse3.asm | 146 ++ media/libvpx/vp8/common/x86/idct_blk_mmx.c | 16 +- media/libvpx/vp8/common/x86/sad_sse2.asm | 410 ----- media/libvpx/vp8/common/x86/sad_sse3.asm | 960 ------------ media/libvpx/vp8/common/x86/sad_sse4.asm | 353 ----- .../vp8/common/x86/variance_impl_sse2.asm | 387 ----- media/libvpx/vp8/common/x86/variance_ssse3.c | 10 +- media/libvpx/vp8/common/x86/vp8_asm_stubs.c | 22 +- .../vp8/common/x86/vp8_variance_impl_mmx.asm | 353 +++++ .../{variance_mmx.c => vp8_variance_mmx.c} | 155 +- .../{variance_sse2.c => vp8_variance_sse2.c} | 156 +- media/libvpx/vp8/decoder/decodeframe.c | 63 +- media/libvpx/vp8/decoder/detokenize.c | 4 +- media/libvpx/vp8/decoder/error_concealment.c | 2 +- media/libvpx/vp8/decoder/onyxd_if.c | 4 +- media/libvpx/vp8/decoder/threading.c | 52 +- .../encoder/arm/armv6/vp8_mse16x16_armv6.asm | 138 -- .../vp8/encoder/arm/neon/vp8_mse16x16_neon.c | 131 -- media/libvpx/vp8/encoder/bitstream.c | 4 +- media/libvpx/vp8/encoder/dct.c | 2 + media/libvpx/vp8/encoder/denoising.c | 14 +- media/libvpx/vp8/encoder/encodeframe.c | 14 +- media/libvpx/vp8/encoder/encodeintra.c | 3 +- media/libvpx/vp8/encoder/encodemb.c | 12 +- media/libvpx/vp8/encoder/ethreading.c | 26 +- media/libvpx/vp8/encoder/firstpass.c | 44 +- media/libvpx/vp8/encoder/mcomp.c | 56 +- media/libvpx/vp8/encoder/modecosts.c | 1 + media/libvpx/vp8/encoder/modecosts.h | 4 +- media/libvpx/vp8/encoder/onyx_if.c | 129 +- media/libvpx/vp8/encoder/onyx_int.h | 7 + media/libvpx/vp8/encoder/pickinter.c | 56 +- media/libvpx/vp8/encoder/picklpf.c | 12 +- media/libvpx/vp8/encoder/quantize.c | 4 +- media/libvpx/vp8/encoder/ratectrl.c | 56 +- media/libvpx/vp8/encoder/ratectrl.h | 2 + media/libvpx/vp8/encoder/rdopt.c | 81 +- media/libvpx/vp8/encoder/rdopt.h | 3 + media/libvpx/vp8/encoder/segmentation.c | 2 +- media/libvpx/vp8/encoder/temporal_filter.c | 12 +- media/libvpx/vp8/encoder/tokenize.c | 10 +- media/libvpx/vp8/encoder/x86/quantize_sse2.c | 8 +- media/libvpx/vp8/vp8_cx_iface.c | 19 +- media/libvpx/vp8/vp8_dx_iface.c | 10 +- media/libvpx/vp8_rtcd_armv7-android-gcc.h | 134 +- media/libvpx/vp8_rtcd_generic-gnu.h | 87 - media/libvpx/vp8_rtcd_x86-darwin9-gcc.h | 214 +-- media/libvpx/vp8_rtcd_x86-linux-gcc.h | 214 +-- media/libvpx/vp8_rtcd_x86-win32-gcc.h | 214 +-- media/libvpx/vp8_rtcd_x86-win32-vs12.h | 214 +-- media/libvpx/vp8_rtcd_x86_64-darwin9-gcc.h | 174 +- media/libvpx/vp8_rtcd_x86_64-linux-gcc.h | 174 +- media/libvpx/vp8_rtcd_x86_64-win64-gcc.h | 174 +- media/libvpx/vp8_rtcd_x86_64-win64-vs12.h | 174 +- ...vp9_avg_neon.c => vp9_convolve_avg_neon.c} | 0 ..._asm.asm => vp9_convolve_avg_neon_asm.asm} | 0 .../vp9/common/arm/neon/vp9_convolve_neon.c | 4 +- .../arm/neon/vp9_idct16x16_1_add_neon.c | 2 + .../arm/neon/vp9_idct32x32_1_add_neon.c | 4 +- .../common/arm/neon/vp9_idct4x4_1_add_neon.c | 2 + .../common/arm/neon/vp9_idct8x8_1_add_neon.c | 2 + .../vp9/common/arm/neon/vp9_reconintra_neon.c | 965 +++++++----- .../arm/neon/vp9_reconintra_neon_asm.asm | 34 +- media/libvpx/vp9/common/vp9_alloccommon.c | 51 +- media/libvpx/vp9/common/vp9_alloccommon.h | 4 +- media/libvpx/vp9/common/vp9_blockd.c | 6 +- media/libvpx/vp9/common/vp9_blockd.h | 94 +- media/libvpx/vp9/common/vp9_common.h | 17 +- media/libvpx/vp9/common/vp9_convolve.c | 8 +- media/libvpx/vp9/common/vp9_debugmodes.c | 14 +- media/libvpx/vp9/common/vp9_entropy.c | 18 +- media/libvpx/vp9/common/vp9_entropy.h | 34 +- media/libvpx/vp9/common/vp9_entropymode.c | 8 +- media/libvpx/vp9/common/vp9_entropymode.h | 15 +- media/libvpx/vp9/common/vp9_enums.h | 38 + media/libvpx/vp9/common/vp9_filter.c | 3 +- media/libvpx/vp9/common/vp9_filter.h | 8 - media/libvpx/vp9/common/vp9_frame_buffers.c | 2 +- media/libvpx/vp9/common/vp9_idct.c | 11 +- media/libvpx/vp9/common/vp9_idct.h | 1 + media/libvpx/vp9/common/vp9_loopfilter.c | 509 +++--- media/libvpx/vp9/common/vp9_loopfilter.h | 23 +- .../vp9/common/vp9_loopfilter_filters.c | 1 + media/libvpx/vp9/common/vp9_mfqe.c | 15 +- media/libvpx/vp9/common/vp9_mvref_common.c | 12 +- media/libvpx/vp9/common/vp9_onyxc_int.h | 52 +- media/libvpx/vp9/common/vp9_postproc.c | 67 +- media/libvpx/vp9/common/vp9_pred_common.c | 2 +- media/libvpx/vp9/common/vp9_reconinter.c | 21 +- media/libvpx/vp9/common/vp9_reconintra.c | 489 ++++-- media/libvpx/vp9/common/vp9_reconintra.h | 2 +- media/libvpx/vp9/common/vp9_rtcd.c | 3 - media/libvpx/vp9/common/vp9_scan.h | 12 + media/libvpx/vp9/common/vp9_systemdependent.h | 11 +- media/libvpx/vp9/common/vp9_thread_common.c | 41 +- media/libvpx/vp9/common/x86/convolve.h | 296 ++++ media/libvpx/vp9/common/x86/vp9_asm_stubs.c | 416 +---- .../x86/vp9_high_loopfilter_intrin_sse2.c | 25 +- .../vp9/common/x86/vp9_idct_intrin_sse2.c | 1393 +++++++---------- .../vp9/common/x86/vp9_idct_intrin_sse2.h | 33 +- .../vp9/common/x86/vp9_idct_intrin_ssse3.c | 762 --------- .../vp9/common/x86/vp9_intrapred_sse2.asm | 310 ++++ .../common/x86/vp9_loopfilter_intrin_avx2.c | 2 + .../common/x86/vp9_loopfilter_intrin_sse2.c | 36 +- .../common/x86/vp9_subpixel_8t_intrin_avx2.c | 122 +- .../common/x86/vp9_subpixel_8t_intrin_ssse3.c | 201 ++- media/libvpx/vp9/decoder/vp9_decodeframe.c | 609 +++---- media/libvpx/vp9/decoder/vp9_decodemv.c | 128 +- media/libvpx/vp9/decoder/vp9_decodemv.h | 1 - media/libvpx/vp9/decoder/vp9_decoder.c | 23 +- media/libvpx/vp9/decoder/vp9_detokenize.c | 55 +- media/libvpx/vp9/decoder/vp9_detokenize.h | 6 +- media/libvpx/vp9/decoder/vp9_dthread.c | 4 + .../vp9/encoder/arm/neon/vp9_quantize_neon.c | 4 +- .../vp9/encoder/arm/neon/vp9_variance_neon.c | 250 +-- media/libvpx/vp9/encoder/vp9_aq_complexity.c | 4 +- media/libvpx/vp9/encoder/vp9_aq_complexity.h | 2 + .../libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c | 65 +- media/libvpx/vp9/encoder/vp9_aq_variance.c | 71 +- media/libvpx/vp9/encoder/vp9_avg.c | 119 ++ media/libvpx/vp9/encoder/vp9_bitstream.c | 23 +- media/libvpx/vp9/encoder/vp9_blockiness.c | 138 ++ media/libvpx/vp9/encoder/vp9_dct.c | 5 +- media/libvpx/vp9/encoder/vp9_denoiser.c | 4 +- media/libvpx/vp9/encoder/vp9_encodeframe.c | 760 +++++---- media/libvpx/vp9/encoder/vp9_encodeframe.h | 4 +- media/libvpx/vp9/encoder/vp9_encodemb.c | 18 +- media/libvpx/vp9/encoder/vp9_encodemv.c | 4 +- media/libvpx/vp9/encoder/vp9_encodemv.h | 2 +- media/libvpx/vp9/encoder/vp9_encoder.c | 1214 ++++++++------ media/libvpx/vp9/encoder/vp9_encoder.h | 69 +- media/libvpx/vp9/encoder/vp9_ethread.c | 4 +- media/libvpx/vp9/encoder/vp9_extend.c | 17 +- media/libvpx/vp9/encoder/vp9_fastssim.c | 465 ++++++ media/libvpx/vp9/encoder/vp9_firstpass.c | 228 ++- media/libvpx/vp9/encoder/vp9_firstpass.h | 2 + media/libvpx/vp9/encoder/vp9_mbgraph.c | 26 +- media/libvpx/vp9/encoder/vp9_mcomp.c | 80 +- media/libvpx/vp9/encoder/vp9_mcomp.h | 3 +- media/libvpx/vp9/encoder/vp9_picklpf.c | 3 +- media/libvpx/vp9/encoder/vp9_pickmode.c | 679 ++++++-- media/libvpx/vp9/encoder/vp9_psnrhvs.c | 223 +++ media/libvpx/vp9/encoder/vp9_quantize.c | 51 +- media/libvpx/vp9/encoder/vp9_ratectrl.c | 141 +- media/libvpx/vp9/encoder/vp9_ratectrl.h | 8 +- media/libvpx/vp9/encoder/vp9_rd.c | 33 +- media/libvpx/vp9/encoder/vp9_rd.h | 2 +- media/libvpx/vp9/encoder/vp9_rdopt.c | 335 ++-- media/libvpx/vp9/encoder/vp9_rdopt.h | 9 + media/libvpx/vp9/encoder/vp9_resize.c | 3 +- media/libvpx/vp9/encoder/vp9_segmentation.c | 34 +- media/libvpx/vp9/encoder/vp9_skin_detection.c | 4 +- media/libvpx/vp9/encoder/vp9_speed_features.c | 52 +- media/libvpx/vp9/encoder/vp9_speed_features.h | 10 +- media/libvpx/vp9/encoder/vp9_ssim.h | 52 + media/libvpx/vp9/encoder/vp9_subexp.c | 3 +- media/libvpx/vp9/encoder/vp9_subexp.h | 10 +- .../libvpx/vp9/encoder/vp9_svc_layercontext.c | 477 ++++-- .../libvpx/vp9/encoder/vp9_svc_layercontext.h | 14 +- .../libvpx/vp9/encoder/vp9_temporal_filter.c | 36 +- .../libvpx/vp9/encoder/vp9_temporal_filter.h | 2 +- media/libvpx/vp9/encoder/vp9_tokenize.c | 21 +- media/libvpx/vp9/encoder/vp9_variance.c | 365 +---- media/libvpx/vp9/encoder/vp9_variance.h | 32 +- media/libvpx/vp9/encoder/vp9_writer.h | 2 +- .../vp9/encoder/x86/vp9_avg_intrin_sse2.c | 252 +++ ...t32x32_avx2.c => vp9_dct32x32_avx2_impl.h} | 2 + ...t32x32_sse2.c => vp9_dct32x32_sse2_impl.h} | 2 + media/libvpx/vp9/encoder/x86/vp9_dct_avx2.c | 4 +- media/libvpx/vp9/encoder/x86/vp9_dct_sse2.c | 36 +- ...p9_dct_impl_sse2.c => vp9_dct_sse2_impl.h} | 4 +- media/libvpx/vp9/encoder/x86/vp9_dct_ssse3.c | 49 +- .../vp9/encoder/x86/vp9_dct_ssse3_x86_64.asm | 73 + .../vp9/encoder/x86/vp9_denoiser_sse2.c | 61 +- .../vp9/encoder/x86/vp9_error_intrin_avx2.c | 3 +- .../libvpx/vp9/encoder/x86/vp9_error_sse2.asm | 46 + .../x86/vp9_highbd_quantize_intrin_sse2.c | 9 +- .../x86/vp9_highbd_subpel_variance.asm | 16 - .../encoder/x86/vp9_highbd_variance_sse2.c | 231 --- .../vp9/encoder/x86/vp9_quantize_sse2.c | 48 +- .../encoder/x86/vp9_quantize_ssse3_x86_64.asm | 8 +- .../libvpx/vp9/encoder/x86/vp9_sad_ssse3.asm | 370 ----- .../vp9/encoder/x86/vp9_subpel_variance.asm | 24 - .../vp9_subpel_variance_impl_intrin_avx2.c | 18 +- .../vp9/encoder/x86/vp9_variance_avx2.c | 88 +- .../vp9/encoder/x86/vp9_variance_sse2.c | 300 +--- media/libvpx/vp9/vp9_cx_iface.c | 158 +- media/libvpx/vp9/vp9_dx_iface.c | 34 +- media/libvpx/vp9/vp9_iface_common.h | 2 + media/libvpx/vp9_rtcd_armv7-android-gcc.h | 321 +--- media/libvpx/vp9_rtcd_generic-gnu.h | 236 +-- media/libvpx/vp9_rtcd_x86-darwin9-gcc.h | 549 +------ media/libvpx/vp9_rtcd_x86-linux-gcc.h | 313 +--- media/libvpx/vp9_rtcd_x86-win32-gcc.h | 549 +------ media/libvpx/vp9_rtcd_x86-win32-vs12.h | 549 +------ media/libvpx/vp9_rtcd_x86_64-darwin9-gcc.h | 425 +---- media/libvpx/vp9_rtcd_x86_64-linux-gcc.h | 368 +---- media/libvpx/vp9_rtcd_x86_64-win64-gcc.h | 425 +---- media/libvpx/vp9_rtcd_x86_64-win64-vs12.h | 425 +---- .../libvpx/vpx/internal/vpx_codec_internal.h | 10 +- media/libvpx/vpx/src/svc_encodeframe.c | 122 +- media/libvpx/vpx/svc_context.h | 5 +- media/libvpx/vpx/vp8cx.h | 68 +- media/libvpx/vpx/vp8dx.h | 7 + media/libvpx/vpx/vpx_encoder.h | 38 +- media/libvpx/vpx_config_armv7-android-gcc.asm | 9 +- media/libvpx/vpx_config_armv7-android-gcc.h | 9 +- media/libvpx/vpx_config_generic-gnu.asm | 9 +- media/libvpx/vpx_config_generic-gnu.h | 9 +- media/libvpx/vpx_config_x86-darwin9-gcc.asm | 9 +- media/libvpx/vpx_config_x86-darwin9-gcc.h | 9 +- media/libvpx/vpx_config_x86-linux-gcc.asm | 9 +- media/libvpx/vpx_config_x86-linux-gcc.h | 9 +- media/libvpx/vpx_config_x86-win32-gcc.asm | 9 +- media/libvpx/vpx_config_x86-win32-gcc.h | 9 +- media/libvpx/vpx_config_x86-win32-vs12.asm | 9 +- media/libvpx/vpx_config_x86-win32-vs12.h | 9 +- .../libvpx/vpx_config_x86_64-darwin9-gcc.asm | 9 +- media/libvpx/vpx_config_x86_64-darwin9-gcc.h | 9 +- media/libvpx/vpx_config_x86_64-linux-gcc.asm | 9 +- media/libvpx/vpx_config_x86_64-linux-gcc.h | 9 +- media/libvpx/vpx_config_x86_64-win64-gcc.asm | 9 +- media/libvpx/vpx_config_x86_64-win64-gcc.h | 9 +- media/libvpx/vpx_config_x86_64-win64-vs12.asm | 9 +- media/libvpx/vpx_config_x86_64-win64-vs12.h | 9 +- .../arm/sad4d_neon.c} | 16 +- .../arm/sad_media.asm} | 5 +- .../vp9_sad_neon.c => vpx_dsp/arm/sad_neon.c} | 112 +- media/libvpx/vpx_dsp/arm/variance_media.asm | 358 +++++ media/libvpx/vpx_dsp/arm/variance_neon.c | 418 +++++ .../{vp9/encoder/vp9_sad.c => vpx_dsp/sad.c} | 104 +- media/libvpx/vpx_dsp/variance.c | 306 ++++ media/libvpx/vpx_dsp/vpx_dsp_rtcd.c | 17 + .../x86/highbd_sad4d_sse2.asm} | 6 +- .../x86/highbd_sad_sse2.asm} | 10 +- .../x86/highbd_variance_impl_sse2.asm} | 12 +- .../libvpx/vpx_dsp/x86/highbd_variance_sse2.c | 245 +++ .../x86/sad4d_avx2.c} | 47 +- .../x86/sad4d_sse2.asm} | 6 +- .../x86/sad_avx2.c} | 9 +- .../{vp8/common => vpx_dsp}/x86/sad_mmx.asm | 30 +- .../x86/sad_sse2.asm} | 12 +- .../x86/sad_sse3.asm} | 34 +- .../x86/sad_sse4.asm} | 30 +- .../{vp8/common => vpx_dsp}/x86/sad_ssse3.asm | 172 +- media/libvpx/vpx_dsp/x86/variance_avx2.c | 93 ++ .../x86/variance_impl_avx2.c} | 6 +- .../x86/variance_impl_mmx.asm | 447 +----- media/libvpx/vpx_dsp/x86/variance_mmx.c | 107 ++ media/libvpx/vpx_dsp/x86/variance_sse2.c | 309 ++++ media/libvpx/vpx_dsp_rtcd_armv7-android-gcc.h | 341 ++++ media/libvpx/vpx_dsp_rtcd_generic-gnu.h | 266 ++++ media/libvpx/vpx_dsp_rtcd_x86-darwin9-gcc.h | 544 +++++++ media/libvpx/vpx_dsp_rtcd_x86-linux-gcc.h | 394 +++++ media/libvpx/vpx_dsp_rtcd_x86-win32-gcc.h | 544 +++++++ media/libvpx/vpx_dsp_rtcd_x86-win32-vs12.h | 544 +++++++ .../libvpx/vpx_dsp_rtcd_x86_64-darwin9-gcc.h | 432 +++++ media/libvpx/vpx_dsp_rtcd_x86_64-linux-gcc.h | 375 +++++ media/libvpx/vpx_dsp_rtcd_x86_64-win64-gcc.h | 432 +++++ media/libvpx/vpx_dsp_rtcd_x86_64-win64-vs12.h | 432 +++++ media/libvpx/vpx_mem/include/vpx_mem_intrnl.h | 64 - .../libvpx/vpx_mem/include/vpx_mem_tracker.h | 179 --- media/libvpx/vpx_mem/vpx_mem.c | 586 +------ media/libvpx/vpx_mem/vpx_mem.h | 131 +- media/libvpx/vpx_ports/mem.h | 23 +- media/libvpx/vpx_ports/msvc.h | 22 + media/libvpx/vpx_ports/vpx_once.h | 2 +- media/libvpx/vpx_ports/x86.h | 14 +- media/libvpx/vpx_scale/generic/gen_scalers.c | 4 +- media/libvpx/vpx_scale/generic/vpx_scale.c | 19 +- media/libvpx/vpx_scale/generic/yv12config.c | 10 +- media/libvpx/vpx_scale/generic/yv12extend.c | 26 +- media/libvpx/vpx_scale/vpx_scale_rtcd.c | 4 +- media/libvpx/vpx_version.h | 6 +- 310 files changed, 16687 insertions(+), 18757 deletions(-) delete mode 100644 media/libvpx/vp8/common/arm/armv6/vp8_variance16x16_armv6.asm delete mode 100644 media/libvpx/vp8/common/arm/armv6/vp8_variance8x8_armv6.asm delete mode 100644 media/libvpx/vp8/common/arm/neon/sad_neon.c delete mode 100644 media/libvpx/vp8/common/arm/neon/variance_neon.c create mode 100644 media/libvpx/vp8/common/copy_c.c delete mode 100644 media/libvpx/vp8/common/sad_c.c create mode 100644 media/libvpx/vp8/common/x86/copy_sse2.asm create mode 100644 media/libvpx/vp8/common/x86/copy_sse3.asm delete mode 100644 media/libvpx/vp8/common/x86/sad_sse2.asm delete mode 100644 media/libvpx/vp8/common/x86/sad_sse3.asm delete mode 100644 media/libvpx/vp8/common/x86/sad_sse4.asm create mode 100644 media/libvpx/vp8/common/x86/vp8_variance_impl_mmx.asm rename media/libvpx/vp8/common/x86/{variance_mmx.c => vp8_variance_mmx.c} (59%) rename media/libvpx/vp8/common/x86/{variance_sse2.c => vp8_variance_sse2.c} (75%) delete mode 100644 media/libvpx/vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm delete mode 100644 media/libvpx/vp8/encoder/arm/neon/vp8_mse16x16_neon.c rename media/libvpx/vp9/common/arm/neon/{vp9_avg_neon.c => vp9_convolve_avg_neon.c} (100%) rename media/libvpx/vp9/common/arm/neon/{vp9_avg_neon_asm.asm => vp9_convolve_avg_neon_asm.asm} (100%) create mode 100644 media/libvpx/vp9/common/x86/convolve.h delete mode 100644 media/libvpx/vp9/common/x86/vp9_idct_intrin_ssse3.c create mode 100644 media/libvpx/vp9/encoder/vp9_blockiness.c create mode 100644 media/libvpx/vp9/encoder/vp9_fastssim.c create mode 100644 media/libvpx/vp9/encoder/vp9_psnrhvs.c rename media/libvpx/vp9/encoder/x86/{vp9_dct32x32_avx2.c => vp9_dct32x32_avx2_impl.h} (99%) rename media/libvpx/vp9/encoder/x86/{vp9_dct32x32_sse2.c => vp9_dct32x32_sse2_impl.h} (99%) rename media/libvpx/vp9/encoder/x86/{vp9_dct_impl_sse2.c => vp9_dct_sse2_impl.h} (99%) delete mode 100644 media/libvpx/vp9/encoder/x86/vp9_sad_ssse3.asm rename media/libvpx/{vp9/encoder/arm/neon/vp9_sad4d_neon.c => vpx_dsp/arm/sad4d_neon.c} (96%) rename media/libvpx/{vp8/common/arm/armv6/vp8_sad16x16_armv6.asm => vpx_dsp/arm/sad_media.asm} (97%) rename media/libvpx/{vp9/encoder/arm/neon/vp9_sad_neon.c => vpx_dsp/arm/sad_neon.c} (65%) create mode 100644 media/libvpx/vpx_dsp/arm/variance_media.asm create mode 100644 media/libvpx/vpx_dsp/arm/variance_neon.c rename media/libvpx/{vp9/encoder/vp9_sad.c => vpx_dsp/sad.c} (59%) create mode 100644 media/libvpx/vpx_dsp/variance.c create mode 100644 media/libvpx/vpx_dsp/vpx_dsp_rtcd.c rename media/libvpx/{vp9/encoder/x86/vp9_highbd_sad4d_sse2.asm => vpx_dsp/x86/highbd_sad4d_sse2.asm} (98%) rename media/libvpx/{vp9/encoder/x86/vp9_highbd_sad_sse2.asm => vpx_dsp/x86/highbd_sad_sse2.asm} (97%) rename media/libvpx/{vp9/encoder/x86/vp9_highbd_variance_impl_sse2.asm => vpx_dsp/x86/highbd_variance_impl_sse2.asm} (97%) create mode 100644 media/libvpx/vpx_dsp/x86/highbd_variance_sse2.c rename media/libvpx/{vp9/encoder/x86/vp9_sad4d_intrin_avx2.c => vpx_dsp/x86/sad4d_avx2.c} (79%) rename media/libvpx/{vp9/encoder/x86/vp9_sad4d_sse2.asm => vpx_dsp/x86/sad4d_sse2.asm} (98%) rename media/libvpx/{vp9/encoder/x86/vp9_sad_intrin_avx2.c => vpx_dsp/x86/sad_avx2.c} (95%) rename media/libvpx/{vp8/common => vpx_dsp}/x86/sad_mmx.asm (95%) rename media/libvpx/{vp9/encoder/x86/vp9_sad_sse2.asm => vpx_dsp/x86/sad_sse2.asm} (95%) rename media/libvpx/{vp9/encoder/x86/vp9_sad_sse3.asm => vpx_dsp/x86/sad_sse3.asm} (94%) rename media/libvpx/{vp9/encoder/x86/vp9_sad_sse4.asm => vpx_dsp/x86/sad_sse4.asm} (95%) rename media/libvpx/{vp8/common => vpx_dsp}/x86/sad_ssse3.asm (64%) create mode 100644 media/libvpx/vpx_dsp/x86/variance_avx2.c rename media/libvpx/{vp9/encoder/x86/vp9_variance_impl_intrin_avx2.c => vpx_dsp/x86/variance_impl_avx2.c} (98%) rename media/libvpx/{vp8/common => vpx_dsp}/x86/variance_impl_mmx.asm (52%) create mode 100644 media/libvpx/vpx_dsp/x86/variance_mmx.c create mode 100644 media/libvpx/vpx_dsp/x86/variance_sse2.c create mode 100644 media/libvpx/vpx_dsp_rtcd_armv7-android-gcc.h create mode 100644 media/libvpx/vpx_dsp_rtcd_generic-gnu.h create mode 100644 media/libvpx/vpx_dsp_rtcd_x86-darwin9-gcc.h create mode 100644 media/libvpx/vpx_dsp_rtcd_x86-linux-gcc.h create mode 100644 media/libvpx/vpx_dsp_rtcd_x86-win32-gcc.h create mode 100644 media/libvpx/vpx_dsp_rtcd_x86-win32-vs12.h create mode 100644 media/libvpx/vpx_dsp_rtcd_x86_64-darwin9-gcc.h create mode 100644 media/libvpx/vpx_dsp_rtcd_x86_64-linux-gcc.h create mode 100644 media/libvpx/vpx_dsp_rtcd_x86_64-win64-gcc.h create mode 100644 media/libvpx/vpx_dsp_rtcd_x86_64-win64-vs12.h delete mode 100644 media/libvpx/vpx_mem/include/vpx_mem_tracker.h create mode 100644 media/libvpx/vpx_ports/msvc.h diff --git a/CLOBBER b/CLOBBER index a7910bb33dd..53392c0aeaa 100644 --- a/CLOBBER +++ b/CLOBBER @@ -22,4 +22,4 @@ # changes to stick? As of bug 928195, this shouldn't be necessary! Please # don't change CLOBBER for WebIDL changes any more. -Bug 1075758 (updating ICU to 55) requires a clobber. +Bug 1178215 requires clobber for libvpx file moves. diff --git a/media/libvpx/PATENTS b/media/libvpx/PATENTS index 79d17d7d6a9..caedf607e95 100644 --- a/media/libvpx/PATENTS +++ b/media/libvpx/PATENTS @@ -17,7 +17,7 @@ or agree to the institution of patent litigation or any other patent enforcement activity against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that any of these implementations of WebM or any code incorporated within any of these implementations of WebM -constitutes direct or contributory patent infringement, or inducement of +constitute direct or contributory patent infringement, or inducement of patent infringement, then any patent rights granted to you under this License for these implementations of WebM shall terminate as of the date such litigation is filed. diff --git a/media/libvpx/README_MOZILLA b/media/libvpx/README_MOZILLA index 523a148d29f..c47c6ba573c 100644 --- a/media/libvpx/README_MOZILLA +++ b/media/libvpx/README_MOZILLA @@ -8,4 +8,4 @@ The libvpx git repository is: https://chromium.googlesource.com/webm/libvpx -The git commit ID used was c74bf6d889992c3cabe017ec353ca85c323107cd +The git commit ID used was e67d45d4ce92468ba193288b59093fef0a502662 diff --git a/media/libvpx/sources.mozbuild b/media/libvpx/sources.mozbuild index 5f59124794c..f68f8a3a983 100644 --- a/media/libvpx/sources.mozbuild +++ b/media/libvpx/sources.mozbuild @@ -14,9 +14,6 @@ files = { 'vp8/common/arm/armv6/loopfilter_v6.asm', 'vp8/common/arm/armv6/simpleloopfilter_v6.asm', 'vp8/common/arm/armv6/sixtappredict8x4_v6.asm', - 'vp8/common/arm/armv6/vp8_sad16x16_armv6.asm', - 'vp8/common/arm/armv6/vp8_variance16x16_armv6.asm', - 'vp8/common/arm/armv6/vp8_variance8x8_armv6.asm', 'vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm', 'vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm', 'vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm', @@ -38,13 +35,10 @@ files = { 'vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.c', 'vp8/common/arm/neon/mbloopfilter_neon.c', 'vp8/common/arm/neon/reconintra_neon.c', - 'vp8/common/arm/neon/sad_neon.c', 'vp8/common/arm/neon/shortidct4x4llm_neon.c', 'vp8/common/arm/neon/sixtappredict_neon.c', - 'vp8/common/arm/neon/variance_neon.c', 'vp8/common/arm/neon/vp8_subpixelvariance_neon.c', 'vp8/common/arm/variance_arm.c', - 'vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm', 'vp8/encoder/arm/armv6/vp8_short_fdct4x4_armv6.asm', 'vp8/encoder/arm/armv6/walsh_v6.asm', 'vp8/encoder/arm/dct_arm.c', @@ -52,11 +46,10 @@ files = { 'vp8/encoder/arm/neon/fastquantizeb_neon.c', 'vp8/encoder/arm/neon/shortfdct_neon.c', 'vp8/encoder/arm/neon/subtract_neon.c', - 'vp8/encoder/arm/neon/vp8_mse16x16_neon.c', 'vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.c', - 'vp9/common/arm/neon/vp9_avg_neon_asm.asm', 'vp9/common/arm/neon/vp9_convolve8_avg_neon_asm.asm', 'vp9/common/arm/neon/vp9_convolve8_neon_asm.asm', + 'vp9/common/arm/neon/vp9_convolve_avg_neon_asm.asm', 'vp9/common/arm/neon/vp9_convolve_neon.c', 'vp9/common/arm/neon/vp9_copy_neon_asm.asm', 'vp9/common/arm/neon/vp9_idct16x16_1_add_neon_asm.asm', @@ -75,26 +68,30 @@ files = { 'vp9/common/arm/neon/vp9_loopfilter_8_neon_asm.asm', 'vp9/common/arm/neon/vp9_loopfilter_neon.c', 'vp9/common/arm/neon/vp9_mb_lpf_neon.asm', + 'vp9/common/arm/neon/vp9_reconintra_neon.c', 'vp9/common/arm/neon/vp9_reconintra_neon_asm.asm', 'vp9/common/arm/neon/vp9_save_reg_neon.asm', 'vp9/encoder/arm/neon/vp9_dct_neon.c', 'vp9/encoder/arm/neon/vp9_quantize_neon.c', - 'vp9/encoder/arm/neon/vp9_sad4d_neon.c', - 'vp9/encoder/arm/neon/vp9_sad_neon.c', 'vp9/encoder/arm/neon/vp9_subtract_neon.c', 'vp9/encoder/arm/neon/vp9_variance_neon.c', 'vp9/encoder/arm/neon/vp9enc_avg_neon.c', + 'vpx_dsp/arm/sad4d_neon.c', + 'vpx_dsp/arm/sad_media.asm', + 'vpx_dsp/arm/sad_neon.c', + 'vpx_dsp/arm/variance_media.asm', + 'vpx_dsp/arm/variance_neon.c', 'vpx_ports/arm_cpudetect.c'], 'AVX2': ['vp9/common/x86/vp9_loopfilter_intrin_avx2.c', 'vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c', - 'vp9/encoder/x86/vp9_dct32x32_avx2.c', 'vp9/encoder/x86/vp9_dct_avx2.c', 'vp9/encoder/x86/vp9_error_intrin_avx2.c', - 'vp9/encoder/x86/vp9_sad4d_intrin_avx2.c', - 'vp9/encoder/x86/vp9_sad_intrin_avx2.c', 'vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c', 'vp9/encoder/x86/vp9_variance_avx2.c', - 'vp9/encoder/x86/vp9_variance_impl_intrin_avx2.c'], + 'vpx_dsp/x86/sad4d_avx2.c', + 'vpx_dsp/x86/sad_avx2.c', + 'vpx_dsp/x86/variance_avx2.c', + 'vpx_dsp/x86/variance_impl_avx2.c'], 'ERROR_CONCEALMENT': ['vp8/decoder/error_concealment.c'], 'EXPORTS': ['vpx/vp8.h', 'vpx/vp8cx.h', @@ -115,6 +112,7 @@ files = { 'vpx_scale/yv12config.h'], 'SOURCES': ['vp8/common/alloccommon.c', 'vp8/common/blockd.c', + 'vp8/common/copy_c.c', 'vp8/common/debugmodes.c', 'vp8/common/dequantize.c', 'vp8/common/entropy.c', @@ -135,7 +133,6 @@ files = { 'vp8/common/reconintra.c', 'vp8/common/reconintra4x4.c', 'vp8/common/rtcd.c', - 'vp8/common/sad_c.c', 'vp8/common/setupintrarecon.c', 'vp8/common/swapyv12buffer.c', 'vp8/common/treecoder.c', @@ -230,7 +227,6 @@ files = { 'vp9/encoder/vp9_rd.c', 'vp9/encoder/vp9_rdopt.c', 'vp9/encoder/vp9_resize.c', - 'vp9/encoder/vp9_sad.c', 'vp9/encoder/vp9_segmentation.c', 'vp9/encoder/vp9_skin_detection.c', 'vp9/encoder/vp9_speed_features.c', @@ -249,6 +245,9 @@ files = { 'vpx/src/vpx_encoder.c', 'vpx/src/vpx_image.c', 'vpx/src/vpx_psnr.c', + 'vpx_dsp/sad.c', + 'vpx_dsp/variance.c', + 'vpx_dsp/vpx_dsp_rtcd.c', 'vpx_mem/vpx_mem.c', 'vpx_scale/generic/gen_scalers.c', 'vpx_scale/generic/vpx_scale.c', @@ -266,7 +265,9 @@ files = { 'vp9/encoder/x86/vp9_dct_ssse3_x86_64.asm', 'vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm', 'vp9/encoder/x86/vp9_ssim_opt_x86_64.asm'], - 'X86_ASM': ['vp8/common/x86/dequantize_mmx.asm', + 'X86_ASM': ['vp8/common/x86/copy_sse2.asm', + 'vp8/common/x86/copy_sse3.asm', + 'vp8/common/x86/dequantize_mmx.asm', 'vp8/common/x86/filter_x86.c', 'vp8/common/x86/idct_blk_mmx.c', 'vp8/common/x86/idct_blk_sse2.c', @@ -283,21 +284,16 @@ files = { 'vp8/common/x86/recon_mmx.asm', 'vp8/common/x86/recon_sse2.asm', 'vp8/common/x86/recon_wrapper_sse2.c', - 'vp8/common/x86/sad_mmx.asm', - 'vp8/common/x86/sad_sse2.asm', - 'vp8/common/x86/sad_sse3.asm', - 'vp8/common/x86/sad_sse4.asm', - 'vp8/common/x86/sad_ssse3.asm', 'vp8/common/x86/subpixel_mmx.asm', 'vp8/common/x86/subpixel_sse2.asm', 'vp8/common/x86/subpixel_ssse3.asm', - 'vp8/common/x86/variance_impl_mmx.asm', 'vp8/common/x86/variance_impl_sse2.asm', 'vp8/common/x86/variance_impl_ssse3.asm', - 'vp8/common/x86/variance_mmx.c', - 'vp8/common/x86/variance_sse2.c', 'vp8/common/x86/variance_ssse3.c', 'vp8/common/x86/vp8_asm_stubs.c', + 'vp8/common/x86/vp8_variance_impl_mmx.asm', + 'vp8/common/x86/vp8_variance_mmx.c', + 'vp8/common/x86/vp8_variance_sse2.c', 'vp8/encoder/x86/dct_mmx.asm', 'vp8/encoder/x86/dct_sse2.asm', 'vp8/encoder/x86/denoising_sse2.c', @@ -315,7 +311,6 @@ files = { 'vp9/common/x86/vp9_asm_stubs.c', 'vp9/common/x86/vp9_copy_sse2.asm', 'vp9/common/x86/vp9_idct_intrin_sse2.c', - 'vp9/common/x86/vp9_idct_intrin_ssse3.c', 'vp9/common/x86/vp9_intrapred_sse2.asm', 'vp9/common/x86/vp9_intrapred_ssse3.asm', 'vp9/common/x86/vp9_loopfilter_intrin_sse2.c', @@ -326,21 +321,23 @@ files = { 'vp9/common/x86/vp9_subpixel_bilinear_sse2.asm', 'vp9/common/x86/vp9_subpixel_bilinear_ssse3.asm', 'vp9/encoder/x86/vp9_avg_intrin_sse2.c', - 'vp9/encoder/x86/vp9_dct32x32_sse2.c', - 'vp9/encoder/x86/vp9_dct_impl_sse2.c', 'vp9/encoder/x86/vp9_dct_mmx.asm', 'vp9/encoder/x86/vp9_dct_sse2.c', 'vp9/encoder/x86/vp9_dct_ssse3.c', 'vp9/encoder/x86/vp9_error_sse2.asm', 'vp9/encoder/x86/vp9_quantize_sse2.c', - 'vp9/encoder/x86/vp9_sad4d_sse2.asm', - 'vp9/encoder/x86/vp9_sad_sse2.asm', - 'vp9/encoder/x86/vp9_sad_sse3.asm', - 'vp9/encoder/x86/vp9_sad_sse4.asm', - 'vp9/encoder/x86/vp9_sad_ssse3.asm', 'vp9/encoder/x86/vp9_subpel_variance.asm', 'vp9/encoder/x86/vp9_subtract_sse2.asm', 'vp9/encoder/x86/vp9_temporal_filter_apply_sse2.asm', 'vp9/encoder/x86/vp9_variance_sse2.c', + 'vpx_dsp/x86/sad4d_sse2.asm', + 'vpx_dsp/x86/sad_mmx.asm', + 'vpx_dsp/x86/sad_sse2.asm', + 'vpx_dsp/x86/sad_sse3.asm', + 'vpx_dsp/x86/sad_sse4.asm', + 'vpx_dsp/x86/sad_ssse3.asm', + 'vpx_dsp/x86/variance_impl_mmx.asm', + 'vpx_dsp/x86/variance_mmx.c', + 'vpx_dsp/x86/variance_sse2.c', 'vpx_ports/emms.asm'] } diff --git a/media/libvpx/third_party/x86inc/x86inc.asm b/media/libvpx/third_party/x86inc/x86inc.asm index 9273fc97e65..bc8116995dd 100644 --- a/media/libvpx/third_party/x86inc/x86inc.asm +++ b/media/libvpx/third_party/x86inc/x86inc.asm @@ -36,7 +36,9 @@ %include "vpx_config.asm" +%ifndef program_name %define program_name vp9 +%endif %define UNIX64 0 diff --git a/media/libvpx/vp8/common/alloccommon.c b/media/libvpx/vp8/common/alloccommon.c index 54afc13355a..8dfd4ce203e 100644 --- a/media/libvpx/vp8/common/alloccommon.c +++ b/media/libvpx/vp8/common/alloccommon.c @@ -10,6 +10,7 @@ #include "vpx_config.h" +#include "alloccommon.h" #include "blockd.h" #include "vpx_mem/vpx_mem.h" #include "onyxc_int.h" @@ -103,9 +104,9 @@ int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height) goto allocation_fail; oci->post_proc_buffer_int_used = 0; - vpx_memset(&oci->postproc_state, 0, sizeof(oci->postproc_state)); - vpx_memset(oci->post_proc_buffer.buffer_alloc, 128, - oci->post_proc_buffer.frame_size); + memset(&oci->postproc_state, 0, sizeof(oci->postproc_state)); + memset(oci->post_proc_buffer.buffer_alloc, 128, + oci->post_proc_buffer.frame_size); /* Allocate buffer to store post-processing filter coefficients. * @@ -176,7 +177,7 @@ void vp8_create_common(VP8_COMMON *oci) oci->clamp_type = RECON_CLAMP_REQUIRED; /* Initialize reference frame sign bias structure to defaults */ - vpx_memset(oci->ref_frame_sign_bias, 0, sizeof(oci->ref_frame_sign_bias)); + memset(oci->ref_frame_sign_bias, 0, sizeof(oci->ref_frame_sign_bias)); /* Default disable buffer to buffer copying */ oci->copy_buffer_to_gf = 0; diff --git a/media/libvpx/vp8/common/arm/armv6/dequant_idct_v6.asm b/media/libvpx/vp8/common/arm/armv6/dequant_idct_v6.asm index 2510ad83835..db48ded5827 100644 --- a/media/libvpx/vp8/common/arm/armv6/dequant_idct_v6.asm +++ b/media/libvpx/vp8/common/arm/armv6/dequant_idct_v6.asm @@ -165,7 +165,7 @@ vp8_dequant_idct_loop2_v6 str r1, [r2], r12 ; store output to dst bne vp8_dequant_idct_loop2_v6 -; vpx_memset +; memset sub r0, r0, #32 add sp, sp, #4 diff --git a/media/libvpx/vp8/common/arm/armv6/vp8_variance16x16_armv6.asm b/media/libvpx/vp8/common/arm/armv6/vp8_variance16x16_armv6.asm deleted file mode 100644 index 39919579f80..00000000000 --- a/media/libvpx/vp8/common/arm/armv6/vp8_variance16x16_armv6.asm +++ /dev/null @@ -1,154 +0,0 @@ -; -; Copyright (c) 2011 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8_variance16x16_armv6| - - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 - -; r0 unsigned char *src_ptr -; r1 int source_stride -; r2 unsigned char *ref_ptr -; r3 int recon_stride -; stack unsigned int *sse -|vp8_variance16x16_armv6| PROC - - stmfd sp!, {r4-r12, lr} - - pld [r0, r1, lsl #0] - pld [r2, r3, lsl #0] - - mov r8, #0 ; initialize sum = 0 - mov r11, #0 ; initialize sse = 0 - mov r12, #16 ; set loop counter to 16 (=block height) - -loop - ; 1st 4 pixels - ldr r4, [r0, #0] ; load 4 src pixels - ldr r5, [r2, #0] ; load 4 ref pixels - - mov lr, #0 ; constant zero - - usub8 r6, r4, r5 ; calculate difference - pld [r0, r1, lsl #1] - sel r7, r6, lr ; select bytes with positive difference - usub8 r9, r5, r4 ; calculate difference with reversed operands - pld [r2, r3, lsl #1] - sel r6, r9, lr ; select bytes with negative difference - - ; calculate partial sums - usad8 r4, r7, lr ; calculate sum of positive differences - usad8 r5, r6, lr ; calculate sum of negative differences - orr r6, r6, r7 ; differences of all 4 pixels - ; calculate total sum - adds r8, r8, r4 ; add positive differences to sum - subs r8, r8, r5 ; subtract negative differences from sum - - ; calculate sse - uxtb16 r5, r6 ; byte (two pixels) to halfwords - uxtb16 r10, r6, ror #8 ; another two pixels to halfwords - smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) - - ; 2nd 4 pixels - ldr r4, [r0, #4] ; load 4 src pixels - ldr r5, [r2, #4] ; load 4 ref pixels - smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2) - - usub8 r6, r4, r5 ; calculate difference - sel r7, r6, lr ; select bytes with positive difference - usub8 r9, r5, r4 ; calculate difference with reversed operands - sel r6, r9, lr ; select bytes with negative difference - - ; calculate partial sums - usad8 r4, r7, lr ; calculate sum of positive differences - usad8 r5, r6, lr ; calculate sum of negative differences - orr r6, r6, r7 ; differences of all 4 pixels - - ; calculate total sum - add r8, r8, r4 ; add positive differences to sum - sub r8, r8, r5 ; subtract negative differences from sum - - ; calculate sse - uxtb16 r5, r6 ; byte (two pixels) to halfwords - uxtb16 r10, r6, ror #8 ; another two pixels to halfwords - smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) - - ; 3rd 4 pixels - ldr r4, [r0, #8] ; load 4 src pixels - ldr r5, [r2, #8] ; load 4 ref pixels - smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2) - - usub8 r6, r4, r5 ; calculate difference - sel r7, r6, lr ; select bytes with positive difference - usub8 r9, r5, r4 ; calculate difference with reversed operands - sel r6, r9, lr ; select bytes with negative difference - - ; calculate partial sums - usad8 r4, r7, lr ; calculate sum of positive differences - usad8 r5, r6, lr ; calculate sum of negative differences - orr r6, r6, r7 ; differences of all 4 pixels - - ; calculate total sum - add r8, r8, r4 ; add positive differences to sum - sub r8, r8, r5 ; subtract negative differences from sum - - ; calculate sse - uxtb16 r5, r6 ; byte (two pixels) to halfwords - uxtb16 r10, r6, ror #8 ; another two pixels to halfwords - smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) - - ; 4th 4 pixels - ldr r4, [r0, #12] ; load 4 src pixels - ldr r5, [r2, #12] ; load 4 ref pixels - smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2) - - usub8 r6, r4, r5 ; calculate difference - add r0, r0, r1 ; set src_ptr to next row - sel r7, r6, lr ; select bytes with positive difference - usub8 r9, r5, r4 ; calculate difference with reversed operands - add r2, r2, r3 ; set dst_ptr to next row - sel r6, r9, lr ; select bytes with negative difference - - ; calculate partial sums - usad8 r4, r7, lr ; calculate sum of positive differences - usad8 r5, r6, lr ; calculate sum of negative differences - orr r6, r6, r7 ; differences of all 4 pixels - - ; calculate total sum - add r8, r8, r4 ; add positive differences to sum - sub r8, r8, r5 ; subtract negative differences from sum - - ; calculate sse - uxtb16 r5, r6 ; byte (two pixels) to halfwords - uxtb16 r10, r6, ror #8 ; another two pixels to halfwords - smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) - smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2) - - - subs r12, r12, #1 - - bne loop - - ; return stuff - ldr r6, [sp, #40] ; get address of sse - mul r0, r8, r8 ; sum * sum - str r11, [r6] ; store sse - sub r0, r11, r0, lsr #8 ; return (sse - ((sum * sum) >> 8)) - - ldmfd sp!, {r4-r12, pc} - - ENDP - - END - diff --git a/media/libvpx/vp8/common/arm/armv6/vp8_variance8x8_armv6.asm b/media/libvpx/vp8/common/arm/armv6/vp8_variance8x8_armv6.asm deleted file mode 100644 index 915ee499309..00000000000 --- a/media/libvpx/vp8/common/arm/armv6/vp8_variance8x8_armv6.asm +++ /dev/null @@ -1,101 +0,0 @@ -; -; Copyright (c) 2011 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8_variance8x8_armv6| - - ARM - - AREA ||.text||, CODE, READONLY, ALIGN=2 - -; r0 unsigned char *src_ptr -; r1 int source_stride -; r2 unsigned char *ref_ptr -; r3 int recon_stride -; stack unsigned int *sse -|vp8_variance8x8_armv6| PROC - - push {r4-r10, lr} - - pld [r0, r1, lsl #0] - pld [r2, r3, lsl #0] - - mov r12, #8 ; set loop counter to 8 (=block height) - mov r4, #0 ; initialize sum = 0 - mov r5, #0 ; initialize sse = 0 - -loop - ; 1st 4 pixels - ldr r6, [r0, #0x0] ; load 4 src pixels - ldr r7, [r2, #0x0] ; load 4 ref pixels - - mov lr, #0 ; constant zero - - usub8 r8, r6, r7 ; calculate difference - pld [r0, r1, lsl #1] - sel r10, r8, lr ; select bytes with positive difference - usub8 r9, r7, r6 ; calculate difference with reversed operands - pld [r2, r3, lsl #1] - sel r8, r9, lr ; select bytes with negative difference - - ; calculate partial sums - usad8 r6, r10, lr ; calculate sum of positive differences - usad8 r7, r8, lr ; calculate sum of negative differences - orr r8, r8, r10 ; differences of all 4 pixels - ; calculate total sum - add r4, r4, r6 ; add positive differences to sum - sub r4, r4, r7 ; subtract negative differences from sum - - ; calculate sse - uxtb16 r7, r8 ; byte (two pixels) to halfwords - uxtb16 r10, r8, ror #8 ; another two pixels to halfwords - smlad r5, r7, r7, r5 ; dual signed multiply, add and accumulate (1) - - ; 2nd 4 pixels - ldr r6, [r0, #0x4] ; load 4 src pixels - ldr r7, [r2, #0x4] ; load 4 ref pixels - smlad r5, r10, r10, r5 ; dual signed multiply, add and accumulate (2) - - usub8 r8, r6, r7 ; calculate difference - add r0, r0, r1 ; set src_ptr to next row - sel r10, r8, lr ; select bytes with positive difference - usub8 r9, r7, r6 ; calculate difference with reversed operands - add r2, r2, r3 ; set dst_ptr to next row - sel r8, r9, lr ; select bytes with negative difference - - ; calculate partial sums - usad8 r6, r10, lr ; calculate sum of positive differences - usad8 r7, r8, lr ; calculate sum of negative differences - orr r8, r8, r10 ; differences of all 4 pixels - - ; calculate total sum - add r4, r4, r6 ; add positive differences to sum - sub r4, r4, r7 ; subtract negative differences from sum - - ; calculate sse - uxtb16 r7, r8 ; byte (two pixels) to halfwords - uxtb16 r10, r8, ror #8 ; another two pixels to halfwords - smlad r5, r7, r7, r5 ; dual signed multiply, add and accumulate (1) - subs r12, r12, #1 ; next row - smlad r5, r10, r10, r5 ; dual signed multiply, add and accumulate (2) - - bne loop - - ; return stuff - ldr r8, [sp, #32] ; get address of sse - mul r1, r4, r4 ; sum * sum - str r5, [r8] ; store sse - sub r0, r5, r1, ASR #6 ; return (sse - ((sum * sum) >> 6)) - - pop {r4-r10, pc} - - ENDP - - END diff --git a/media/libvpx/vp8/common/arm/filter_arm.c b/media/libvpx/vp8/common/arm/filter_arm.c index 7fe39674eb6..d6a6781d862 100644 --- a/media/libvpx/vp8/common/arm/filter_arm.c +++ b/media/libvpx/vp8/common/arm/filter_arm.c @@ -99,7 +99,7 @@ void vp8_sixtap_predict4x4_armv6 { const short *HFilter; const short *VFilter; - DECLARE_ALIGNED_ARRAY(4, short, FData, 12*4); /* Temp data buffer used in filtering */ + DECLARE_ALIGNED(4, short, FData[12*4]); /* Temp data buffer used in filtering */ HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */ @@ -147,7 +147,7 @@ void vp8_sixtap_predict8x8_armv6 { const short *HFilter; const short *VFilter; - DECLARE_ALIGNED_ARRAY(4, short, FData, 16*8); /* Temp data buffer used in filtering */ + DECLARE_ALIGNED(4, short, FData[16*8]); /* Temp data buffer used in filtering */ HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */ VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */ @@ -189,7 +189,7 @@ void vp8_sixtap_predict16x16_armv6 { const short *HFilter; const short *VFilter; - DECLARE_ALIGNED_ARRAY(4, short, FData, 24*16); /* Temp data buffer used in filtering */ + DECLARE_ALIGNED(4, short, FData[24*16]); /* Temp data buffer used in filtering */ HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */ VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */ diff --git a/media/libvpx/vp8/common/arm/neon/sad_neon.c b/media/libvpx/vp8/common/arm/neon/sad_neon.c deleted file mode 100644 index 6595ac0519b..00000000000 --- a/media/libvpx/vp8/common/arm/neon/sad_neon.c +++ /dev/null @@ -1,184 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include - -unsigned int vp8_sad8x8_neon( - unsigned char *src_ptr, - int src_stride, - unsigned char *ref_ptr, - int ref_stride) { - uint8x8_t d0, d8; - uint16x8_t q12; - uint32x4_t q1; - uint64x2_t q3; - uint32x2_t d5; - int i; - - d0 = vld1_u8(src_ptr); - src_ptr += src_stride; - d8 = vld1_u8(ref_ptr); - ref_ptr += ref_stride; - q12 = vabdl_u8(d0, d8); - - for (i = 0; i < 7; i++) { - d0 = vld1_u8(src_ptr); - src_ptr += src_stride; - d8 = vld1_u8(ref_ptr); - ref_ptr += ref_stride; - q12 = vabal_u8(q12, d0, d8); - } - - q1 = vpaddlq_u16(q12); - q3 = vpaddlq_u32(q1); - d5 = vadd_u32(vreinterpret_u32_u64(vget_low_u64(q3)), - vreinterpret_u32_u64(vget_high_u64(q3))); - - return vget_lane_u32(d5, 0); -} - -unsigned int vp8_sad8x16_neon( - unsigned char *src_ptr, - int src_stride, - unsigned char *ref_ptr, - int ref_stride) { - uint8x8_t d0, d8; - uint16x8_t q12; - uint32x4_t q1; - uint64x2_t q3; - uint32x2_t d5; - int i; - - d0 = vld1_u8(src_ptr); - src_ptr += src_stride; - d8 = vld1_u8(ref_ptr); - ref_ptr += ref_stride; - q12 = vabdl_u8(d0, d8); - - for (i = 0; i < 15; i++) { - d0 = vld1_u8(src_ptr); - src_ptr += src_stride; - d8 = vld1_u8(ref_ptr); - ref_ptr += ref_stride; - q12 = vabal_u8(q12, d0, d8); - } - - q1 = vpaddlq_u16(q12); - q3 = vpaddlq_u32(q1); - d5 = vadd_u32(vreinterpret_u32_u64(vget_low_u64(q3)), - vreinterpret_u32_u64(vget_high_u64(q3))); - - return vget_lane_u32(d5, 0); -} - -unsigned int vp8_sad4x4_neon( - unsigned char *src_ptr, - int src_stride, - unsigned char *ref_ptr, - int ref_stride) { - uint8x8_t d0, d8; - uint16x8_t q12; - uint32x2_t d1; - uint64x1_t d3; - int i; - - d0 = vld1_u8(src_ptr); - src_ptr += src_stride; - d8 = vld1_u8(ref_ptr); - ref_ptr += ref_stride; - q12 = vabdl_u8(d0, d8); - - for (i = 0; i < 3; i++) { - d0 = vld1_u8(src_ptr); - src_ptr += src_stride; - d8 = vld1_u8(ref_ptr); - ref_ptr += ref_stride; - q12 = vabal_u8(q12, d0, d8); - } - - d1 = vpaddl_u16(vget_low_u16(q12)); - d3 = vpaddl_u32(d1); - - return vget_lane_u32(vreinterpret_u32_u64(d3), 0); -} - -unsigned int vp8_sad16x16_neon( - unsigned char *src_ptr, - int src_stride, - unsigned char *ref_ptr, - int ref_stride) { - uint8x16_t q0, q4; - uint16x8_t q12, q13; - uint32x4_t q1; - uint64x2_t q3; - uint32x2_t d5; - int i; - - q0 = vld1q_u8(src_ptr); - src_ptr += src_stride; - q4 = vld1q_u8(ref_ptr); - ref_ptr += ref_stride; - q12 = vabdl_u8(vget_low_u8(q0), vget_low_u8(q4)); - q13 = vabdl_u8(vget_high_u8(q0), vget_high_u8(q4)); - - for (i = 0; i < 15; i++) { - q0 = vld1q_u8(src_ptr); - src_ptr += src_stride; - q4 = vld1q_u8(ref_ptr); - ref_ptr += ref_stride; - q12 = vabal_u8(q12, vget_low_u8(q0), vget_low_u8(q4)); - q13 = vabal_u8(q13, vget_high_u8(q0), vget_high_u8(q4)); - } - - q12 = vaddq_u16(q12, q13); - q1 = vpaddlq_u16(q12); - q3 = vpaddlq_u32(q1); - d5 = vadd_u32(vreinterpret_u32_u64(vget_low_u64(q3)), - vreinterpret_u32_u64(vget_high_u64(q3))); - - return vget_lane_u32(d5, 0); -} - -unsigned int vp8_sad16x8_neon( - unsigned char *src_ptr, - int src_stride, - unsigned char *ref_ptr, - int ref_stride) { - uint8x16_t q0, q4; - uint16x8_t q12, q13; - uint32x4_t q1; - uint64x2_t q3; - uint32x2_t d5; - int i; - - q0 = vld1q_u8(src_ptr); - src_ptr += src_stride; - q4 = vld1q_u8(ref_ptr); - ref_ptr += ref_stride; - q12 = vabdl_u8(vget_low_u8(q0), vget_low_u8(q4)); - q13 = vabdl_u8(vget_high_u8(q0), vget_high_u8(q4)); - - for (i = 0; i < 7; i++) { - q0 = vld1q_u8(src_ptr); - src_ptr += src_stride; - q4 = vld1q_u8(ref_ptr); - ref_ptr += ref_stride; - q12 = vabal_u8(q12, vget_low_u8(q0), vget_low_u8(q4)); - q13 = vabal_u8(q13, vget_high_u8(q0), vget_high_u8(q4)); - } - - q12 = vaddq_u16(q12, q13); - q1 = vpaddlq_u16(q12); - q3 = vpaddlq_u32(q1); - d5 = vadd_u32(vreinterpret_u32_u64(vget_low_u64(q3)), - vreinterpret_u32_u64(vget_high_u64(q3))); - - return vget_lane_u32(d5, 0); -} diff --git a/media/libvpx/vp8/common/arm/neon/variance_neon.c b/media/libvpx/vp8/common/arm/neon/variance_neon.c deleted file mode 100644 index 1b1979073e5..00000000000 --- a/media/libvpx/vp8/common/arm/neon/variance_neon.c +++ /dev/null @@ -1,320 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include -#include "vpx_ports/mem.h" - -unsigned int vp8_variance16x16_neon( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) { - int i; - int16x4_t d22s16, d23s16, d24s16, d25s16, d26s16, d27s16, d28s16, d29s16; - uint32x2_t d0u32, d10u32; - int64x1_t d0s64, d1s64; - uint8x16_t q0u8, q1u8, q2u8, q3u8; - uint16x8_t q11u16, q12u16, q13u16, q14u16; - int32x4_t q8s32, q9s32, q10s32; - int64x2_t q0s64, q1s64, q5s64; - - q8s32 = vdupq_n_s32(0); - q9s32 = vdupq_n_s32(0); - q10s32 = vdupq_n_s32(0); - - for (i = 0; i < 8; i++) { - q0u8 = vld1q_u8(src_ptr); - src_ptr += source_stride; - q1u8 = vld1q_u8(src_ptr); - src_ptr += source_stride; - __builtin_prefetch(src_ptr); - - q2u8 = vld1q_u8(ref_ptr); - ref_ptr += recon_stride; - q3u8 = vld1q_u8(ref_ptr); - ref_ptr += recon_stride; - __builtin_prefetch(ref_ptr); - - q11u16 = vsubl_u8(vget_low_u8(q0u8), vget_low_u8(q2u8)); - q12u16 = vsubl_u8(vget_high_u8(q0u8), vget_high_u8(q2u8)); - q13u16 = vsubl_u8(vget_low_u8(q1u8), vget_low_u8(q3u8)); - q14u16 = vsubl_u8(vget_high_u8(q1u8), vget_high_u8(q3u8)); - - d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16)); - d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16)); - q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q11u16)); - q9s32 = vmlal_s16(q9s32, d22s16, d22s16); - q10s32 = vmlal_s16(q10s32, d23s16, d23s16); - - d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16)); - d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16)); - q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q12u16)); - q9s32 = vmlal_s16(q9s32, d24s16, d24s16); - q10s32 = vmlal_s16(q10s32, d25s16, d25s16); - - d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16)); - d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16)); - q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q13u16)); - q9s32 = vmlal_s16(q9s32, d26s16, d26s16); - q10s32 = vmlal_s16(q10s32, d27s16, d27s16); - - d28s16 = vreinterpret_s16_u16(vget_low_u16(q14u16)); - d29s16 = vreinterpret_s16_u16(vget_high_u16(q14u16)); - q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q14u16)); - q9s32 = vmlal_s16(q9s32, d28s16, d28s16); - q10s32 = vmlal_s16(q10s32, d29s16, d29s16); - } - - q10s32 = vaddq_s32(q10s32, q9s32); - q0s64 = vpaddlq_s32(q8s32); - q1s64 = vpaddlq_s32(q10s32); - - d0s64 = vadd_s64(vget_low_s64(q0s64), vget_high_s64(q0s64)); - d1s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64)); - - q5s64 = vmull_s32(vreinterpret_s32_s64(d0s64), - vreinterpret_s32_s64(d0s64)); - vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d1s64), 0); - - d10u32 = vshr_n_u32(vreinterpret_u32_s64(vget_low_s64(q5s64)), 8); - d0u32 = vsub_u32(vreinterpret_u32_s64(d1s64), d10u32); - - return vget_lane_u32(d0u32, 0); -} - -unsigned int vp8_variance16x8_neon( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) { - int i; - int16x4_t d22s16, d23s16, d24s16, d25s16, d26s16, d27s16, d28s16, d29s16; - uint32x2_t d0u32, d10u32; - int64x1_t d0s64, d1s64; - uint8x16_t q0u8, q1u8, q2u8, q3u8; - uint16x8_t q11u16, q12u16, q13u16, q14u16; - int32x4_t q8s32, q9s32, q10s32; - int64x2_t q0s64, q1s64, q5s64; - - q8s32 = vdupq_n_s32(0); - q9s32 = vdupq_n_s32(0); - q10s32 = vdupq_n_s32(0); - - for (i = 0; i < 4; i++) { // variance16x8_neon_loop - q0u8 = vld1q_u8(src_ptr); - src_ptr += source_stride; - q1u8 = vld1q_u8(src_ptr); - src_ptr += source_stride; - __builtin_prefetch(src_ptr); - - q2u8 = vld1q_u8(ref_ptr); - ref_ptr += recon_stride; - q3u8 = vld1q_u8(ref_ptr); - ref_ptr += recon_stride; - __builtin_prefetch(ref_ptr); - - q11u16 = vsubl_u8(vget_low_u8(q0u8), vget_low_u8(q2u8)); - q12u16 = vsubl_u8(vget_high_u8(q0u8), vget_high_u8(q2u8)); - q13u16 = vsubl_u8(vget_low_u8(q1u8), vget_low_u8(q3u8)); - q14u16 = vsubl_u8(vget_high_u8(q1u8), vget_high_u8(q3u8)); - - d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16)); - d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16)); - q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q11u16)); - q9s32 = vmlal_s16(q9s32, d22s16, d22s16); - q10s32 = vmlal_s16(q10s32, d23s16, d23s16); - - d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16)); - d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16)); - q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q12u16)); - q9s32 = vmlal_s16(q9s32, d24s16, d24s16); - q10s32 = vmlal_s16(q10s32, d25s16, d25s16); - - d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16)); - d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16)); - q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q13u16)); - q9s32 = vmlal_s16(q9s32, d26s16, d26s16); - q10s32 = vmlal_s16(q10s32, d27s16, d27s16); - - d28s16 = vreinterpret_s16_u16(vget_low_u16(q14u16)); - d29s16 = vreinterpret_s16_u16(vget_high_u16(q14u16)); - q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q14u16)); - q9s32 = vmlal_s16(q9s32, d28s16, d28s16); - q10s32 = vmlal_s16(q10s32, d29s16, d29s16); - } - - q10s32 = vaddq_s32(q10s32, q9s32); - q0s64 = vpaddlq_s32(q8s32); - q1s64 = vpaddlq_s32(q10s32); - - d0s64 = vadd_s64(vget_low_s64(q0s64), vget_high_s64(q0s64)); - d1s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64)); - - q5s64 = vmull_s32(vreinterpret_s32_s64(d0s64), - vreinterpret_s32_s64(d0s64)); - vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d1s64), 0); - - d10u32 = vshr_n_u32(vreinterpret_u32_s64(vget_low_s64(q5s64)), 7); - d0u32 = vsub_u32(vreinterpret_u32_s64(d1s64), d10u32); - - return vget_lane_u32(d0u32, 0); -} - -unsigned int vp8_variance8x16_neon( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) { - int i; - uint8x8_t d0u8, d2u8, d4u8, d6u8; - int16x4_t d22s16, d23s16, d24s16, d25s16; - uint32x2_t d0u32, d10u32; - int64x1_t d0s64, d1s64; - uint16x8_t q11u16, q12u16; - int32x4_t q8s32, q9s32, q10s32; - int64x2_t q0s64, q1s64, q5s64; - - q8s32 = vdupq_n_s32(0); - q9s32 = vdupq_n_s32(0); - q10s32 = vdupq_n_s32(0); - - for (i = 0; i < 8; i++) { // variance8x16_neon_loop - d0u8 = vld1_u8(src_ptr); - src_ptr += source_stride; - d2u8 = vld1_u8(src_ptr); - src_ptr += source_stride; - __builtin_prefetch(src_ptr); - - d4u8 = vld1_u8(ref_ptr); - ref_ptr += recon_stride; - d6u8 = vld1_u8(ref_ptr); - ref_ptr += recon_stride; - __builtin_prefetch(ref_ptr); - - q11u16 = vsubl_u8(d0u8, d4u8); - q12u16 = vsubl_u8(d2u8, d6u8); - - d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16)); - d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16)); - q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q11u16)); - q9s32 = vmlal_s16(q9s32, d22s16, d22s16); - q10s32 = vmlal_s16(q10s32, d23s16, d23s16); - - d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16)); - d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16)); - q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q12u16)); - q9s32 = vmlal_s16(q9s32, d24s16, d24s16); - q10s32 = vmlal_s16(q10s32, d25s16, d25s16); - } - - q10s32 = vaddq_s32(q10s32, q9s32); - q0s64 = vpaddlq_s32(q8s32); - q1s64 = vpaddlq_s32(q10s32); - - d0s64 = vadd_s64(vget_low_s64(q0s64), vget_high_s64(q0s64)); - d1s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64)); - - q5s64 = vmull_s32(vreinterpret_s32_s64(d0s64), - vreinterpret_s32_s64(d0s64)); - vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d1s64), 0); - - d10u32 = vshr_n_u32(vreinterpret_u32_s64(vget_low_s64(q5s64)), 7); - d0u32 = vsub_u32(vreinterpret_u32_s64(d1s64), d10u32); - - return vget_lane_u32(d0u32, 0); -} - -unsigned int vp8_variance8x8_neon( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) { - int i; - uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8; - int16x4_t d22s16, d23s16, d24s16, d25s16, d26s16, d27s16, d28s16, d29s16; - uint32x2_t d0u32, d10u32; - int64x1_t d0s64, d1s64; - uint16x8_t q11u16, q12u16, q13u16, q14u16; - int32x4_t q8s32, q9s32, q10s32; - int64x2_t q0s64, q1s64, q5s64; - - q8s32 = vdupq_n_s32(0); - q9s32 = vdupq_n_s32(0); - q10s32 = vdupq_n_s32(0); - - for (i = 0; i < 2; i++) { // variance8x8_neon_loop - d0u8 = vld1_u8(src_ptr); - src_ptr += source_stride; - d1u8 = vld1_u8(src_ptr); - src_ptr += source_stride; - d2u8 = vld1_u8(src_ptr); - src_ptr += source_stride; - d3u8 = vld1_u8(src_ptr); - src_ptr += source_stride; - - d4u8 = vld1_u8(ref_ptr); - ref_ptr += recon_stride; - d5u8 = vld1_u8(ref_ptr); - ref_ptr += recon_stride; - d6u8 = vld1_u8(ref_ptr); - ref_ptr += recon_stride; - d7u8 = vld1_u8(ref_ptr); - ref_ptr += recon_stride; - - q11u16 = vsubl_u8(d0u8, d4u8); - q12u16 = vsubl_u8(d1u8, d5u8); - q13u16 = vsubl_u8(d2u8, d6u8); - q14u16 = vsubl_u8(d3u8, d7u8); - - d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16)); - d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16)); - q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q11u16)); - q9s32 = vmlal_s16(q9s32, d22s16, d22s16); - q10s32 = vmlal_s16(q10s32, d23s16, d23s16); - - d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16)); - d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16)); - q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q12u16)); - q9s32 = vmlal_s16(q9s32, d24s16, d24s16); - q10s32 = vmlal_s16(q10s32, d25s16, d25s16); - - d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16)); - d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16)); - q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q13u16)); - q9s32 = vmlal_s16(q9s32, d26s16, d26s16); - q10s32 = vmlal_s16(q10s32, d27s16, d27s16); - - d28s16 = vreinterpret_s16_u16(vget_low_u16(q14u16)); - d29s16 = vreinterpret_s16_u16(vget_high_u16(q14u16)); - q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q14u16)); - q9s32 = vmlal_s16(q9s32, d28s16, d28s16); - q10s32 = vmlal_s16(q10s32, d29s16, d29s16); - } - - q10s32 = vaddq_s32(q10s32, q9s32); - q0s64 = vpaddlq_s32(q8s32); - q1s64 = vpaddlq_s32(q10s32); - - d0s64 = vadd_s64(vget_low_s64(q0s64), vget_high_s64(q0s64)); - d1s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64)); - - q5s64 = vmull_s32(vreinterpret_s32_s64(d0s64), - vreinterpret_s32_s64(d0s64)); - vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d1s64), 0); - - d10u32 = vshr_n_u32(vreinterpret_u32_s64(vget_low_s64(q5s64)), 6); - d0u32 = vsub_u32(vreinterpret_u32_s64(d1s64), d10u32); - - return vget_lane_u32(d0u32, 0); -} diff --git a/media/libvpx/vp8/common/arm/neon/vp8_subpixelvariance_neon.c b/media/libvpx/vp8/common/arm/neon/vp8_subpixelvariance_neon.c index f2bb162068f..974d3b6532b 100644 --- a/media/libvpx/vp8/common/arm/neon/vp8_subpixelvariance_neon.c +++ b/media/libvpx/vp8/common/arm/neon/vp8_subpixelvariance_neon.c @@ -32,7 +32,7 @@ unsigned int vp8_sub_pixel_variance16x16_neon_func( int dst_pixels_per_line, unsigned int *sse) { int i; - DECLARE_ALIGNED_ARRAY(16, unsigned char, tmp, 528); + DECLARE_ALIGNED(16, unsigned char, tmp[528]); unsigned char *tmpp; unsigned char *tmpp2; uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8, d8u8, d9u8; @@ -911,12 +911,6 @@ unsigned int vp8_variance_halfpixvar16x16_hv_neon( return vget_lane_u32(d0u32, 0); } -enum { kWidth8 = 8 }; -enum { kHeight8 = 8 }; -enum { kHeight8PlusOne = 9 }; -enum { kPixelStepOne = 1 }; -enum { kAlign16 = 16 }; - #define FILTER_BITS 7 static INLINE int horizontal_add_s16x8(const int16x8_t v_16x8) { @@ -968,8 +962,8 @@ static unsigned int variance8x8_neon(const uint8_t *a, int a_stride, const uint8_t *b, int b_stride, unsigned int *sse) { int sum; - variance_neon_w8(a, a_stride, b, b_stride, kWidth8, kHeight8, sse, &sum); - return *sse - (((int64_t)sum * sum) / (kWidth8 * kHeight8)); + variance_neon_w8(a, a_stride, b, b_stride, 8, 8, sse, &sum); + return *sse - (((int64_t)sum * sum) / (8 * 8)); } static void var_filter_block2d_bil_w8(const uint8_t *src_ptr, @@ -1003,21 +997,21 @@ unsigned int vp8_sub_pixel_variance8x8_neon( const unsigned char *dst, int dst_stride, unsigned int *sse) { - DECLARE_ALIGNED_ARRAY(kAlign16, uint8_t, temp2, kHeight8PlusOne * kWidth8); - DECLARE_ALIGNED_ARRAY(kAlign16, uint8_t, fdata3, kHeight8PlusOne * kWidth8); + DECLARE_ALIGNED(16, uint8_t, temp2[9 * 8]); + DECLARE_ALIGNED(16, uint8_t, fdata3[9 * 8]); if (xoffset == 0) { - var_filter_block2d_bil_w8(src, temp2, src_stride, kWidth8, kHeight8, - kWidth8, bilinear_taps_coeff[yoffset]); + var_filter_block2d_bil_w8(src, temp2, src_stride, 8, 8, + 8, bilinear_taps_coeff[yoffset]); } else if (yoffset == 0) { - var_filter_block2d_bil_w8(src, temp2, src_stride, kPixelStepOne, - kHeight8PlusOne, kWidth8, + var_filter_block2d_bil_w8(src, temp2, src_stride, 1, + 9, 8, bilinear_taps_coeff[xoffset]); } else { - var_filter_block2d_bil_w8(src, fdata3, src_stride, kPixelStepOne, - kHeight8PlusOne, kWidth8, + var_filter_block2d_bil_w8(src, fdata3, src_stride, 1, + 9, 8, bilinear_taps_coeff[xoffset]); - var_filter_block2d_bil_w8(fdata3, temp2, kWidth8, kWidth8, kHeight8, - kWidth8, bilinear_taps_coeff[yoffset]); + var_filter_block2d_bil_w8(fdata3, temp2, 8, 8, 8, + 8, bilinear_taps_coeff[yoffset]); } - return variance8x8_neon(temp2, kWidth8, dst, dst_stride, sse); + return variance8x8_neon(temp2, 8, dst, dst_stride, sse); } diff --git a/media/libvpx/vp8/common/arm/variance_arm.c b/media/libvpx/vp8/common/arm/variance_arm.c index 467a509420e..0f293f03d94 100644 --- a/media/libvpx/vp8/common/arm/variance_arm.c +++ b/media/libvpx/vp8/common/arm/variance_arm.c @@ -9,10 +9,14 @@ */ #include "vpx_config.h" -#include "vp8_rtcd.h" +#include "./vp8_rtcd.h" +#include "./vpx_dsp_rtcd.h" #include "vp8/common/variance.h" #include "vp8/common/filter.h" +// TODO(johannkoenig): Move this to vpx_dsp or vp8/encoder +#if CONFIG_VP8_ENCODER + #if HAVE_MEDIA #include "vp8/common/arm/bilinearfilter_arm.h" @@ -40,8 +44,8 @@ unsigned int vp8_sub_pixel_variance8x8_armv6 vp8_filter_block2d_bil_second_pass_armv6(first_pass, second_pass, 8, 8, 8, VFilter); - return vp8_variance8x8_armv6(second_pass, 8, dst_ptr, - dst_pixels_per_line, sse); + return vpx_variance8x8_media(second_pass, 8, dst_ptr, + dst_pixels_per_line, sse); } unsigned int vp8_sub_pixel_variance16x16_armv6 @@ -86,13 +90,13 @@ unsigned int vp8_sub_pixel_variance16x16_armv6 vp8_filter_block2d_bil_second_pass_armv6(first_pass, second_pass, 16, 16, 16, VFilter); - var = vp8_variance16x16_armv6(second_pass, 16, dst_ptr, - dst_pixels_per_line, sse); + var = vpx_variance16x16_media(second_pass, 16, dst_ptr, + dst_pixels_per_line, sse); } return var; } -#endif /* HAVE_MEDIA */ +#endif // HAVE_MEDIA #if HAVE_NEON @@ -129,4 +133,5 @@ unsigned int vp8_sub_pixel_variance16x16_neon return vp8_sub_pixel_variance16x16_neon_func(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse); } -#endif +#endif // HAVE_NEON +#endif // CONFIG_VP8_ENCODER diff --git a/media/libvpx/vp8/common/common.h b/media/libvpx/vp8/common/common.h index 17262d6983c..ba3d9f54d1e 100644 --- a/media/libvpx/vp8/common/common.h +++ b/media/libvpx/vp8/common/common.h @@ -29,19 +29,19 @@ extern "C" { #define vp8_copy( Dest, Src) { \ assert( sizeof( Dest) == sizeof( Src)); \ - vpx_memcpy( Dest, Src, sizeof( Src)); \ + memcpy( Dest, Src, sizeof( Src)); \ } /* Use this for variably-sized arrays. */ #define vp8_copy_array( Dest, Src, N) { \ assert( sizeof( *Dest) == sizeof( *Src)); \ - vpx_memcpy( Dest, Src, N * sizeof( *Src)); \ + memcpy( Dest, Src, N * sizeof( *Src)); \ } -#define vp8_zero( Dest) vpx_memset( &Dest, 0, sizeof( Dest)); +#define vp8_zero( Dest) memset( &Dest, 0, sizeof( Dest)); -#define vp8_zero_array( Dest, N) vpx_memset( Dest, 0, N * sizeof( *Dest)); +#define vp8_zero_array( Dest, N) memset( Dest, 0, N * sizeof( *Dest)); #ifdef __cplusplus diff --git a/media/libvpx/vp8/common/copy_c.c b/media/libvpx/vp8/common/copy_c.c new file mode 100644 index 00000000000..e3392913f63 --- /dev/null +++ b/media/libvpx/vp8/common/copy_c.c @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#include + +#include "./vp8_rtcd.h" +#include "vpx/vpx_integer.h" + +/* Copy 2 macroblocks to a buffer */ +void vp8_copy32xn_c(const unsigned char *src_ptr, int src_stride, + unsigned char *dst_ptr, int dst_stride, + int height) +{ + int r; + + for (r = 0; r < height; r++) + { + memcpy(dst_ptr, src_ptr, 32); + + src_ptr += src_stride; + dst_ptr += dst_stride; + + } +} diff --git a/media/libvpx/vp8/common/debugmodes.c b/media/libvpx/vp8/common/debugmodes.c index 46064e61d53..159fddc6a76 100644 --- a/media/libvpx/vp8/common/debugmodes.c +++ b/media/libvpx/vp8/common/debugmodes.c @@ -81,7 +81,6 @@ void vp8_print_modes_and_motion_vectors(MODE_INFO *mi, int rows, int cols, int f fprintf(mvs, "\n"); /* print out the block modes */ - mb_index = 0; fprintf(mvs, "Mbs for Frame %d\n", frame); { int b_row; @@ -129,7 +128,6 @@ void vp8_print_modes_and_motion_vectors(MODE_INFO *mi, int rows, int cols, int f /* print out the block modes */ - mb_index = 0; fprintf(mvs, "MVs for Frame %d\n", frame); { int b_row; diff --git a/media/libvpx/vp8/common/dequantize.c b/media/libvpx/vp8/common/dequantize.c index 6e2f69a773e..f8b04fa4ee5 100644 --- a/media/libvpx/vp8/common/dequantize.c +++ b/media/libvpx/vp8/common/dequantize.c @@ -38,6 +38,6 @@ void vp8_dequant_idct_add_c(short *input, short *dq, vp8_short_idct4x4llm_c(input, dest, stride, dest, stride); - vpx_memset(input, 0, 32); + memset(input, 0, 32); } diff --git a/media/libvpx/vp8/common/entropy.c b/media/libvpx/vp8/common/entropy.c index 8c046a4f57c..c00e565f063 100644 --- a/media/libvpx/vp8/common/entropy.c +++ b/media/libvpx/vp8/common/entropy.c @@ -183,7 +183,6 @@ const vp8_extra_bit_struct vp8_extra_bits[12] = void vp8_default_coef_probs(VP8_COMMON *pc) { - vpx_memcpy(pc->fc.coef_probs, default_coef_probs, - sizeof(default_coef_probs)); + memcpy(pc->fc.coef_probs, default_coef_probs, sizeof(default_coef_probs)); } diff --git a/media/libvpx/vp8/common/entropymode.c b/media/libvpx/vp8/common/entropymode.c index 091e4c732b0..8981a8d3c2a 100644 --- a/media/libvpx/vp8/common/entropymode.c +++ b/media/libvpx/vp8/common/entropymode.c @@ -159,13 +159,13 @@ const vp8_tree_index vp8_small_mvtree [14] = void vp8_init_mbmode_probs(VP8_COMMON *x) { - vpx_memcpy(x->fc.ymode_prob, vp8_ymode_prob, sizeof(vp8_ymode_prob)); - vpx_memcpy(x->fc.uv_mode_prob, vp8_uv_mode_prob, sizeof(vp8_uv_mode_prob)); - vpx_memcpy(x->fc.sub_mv_ref_prob, sub_mv_ref_prob, sizeof(sub_mv_ref_prob)); + memcpy(x->fc.ymode_prob, vp8_ymode_prob, sizeof(vp8_ymode_prob)); + memcpy(x->fc.uv_mode_prob, vp8_uv_mode_prob, sizeof(vp8_uv_mode_prob)); + memcpy(x->fc.sub_mv_ref_prob, sub_mv_ref_prob, sizeof(sub_mv_ref_prob)); } void vp8_default_bmode_probs(vp8_prob p [VP8_BINTRAMODES-1]) { - vpx_memcpy(p, vp8_bmode_prob, sizeof(vp8_bmode_prob)); + memcpy(p, vp8_bmode_prob, sizeof(vp8_bmode_prob)); } diff --git a/media/libvpx/vp8/common/extend.c b/media/libvpx/vp8/common/extend.c index c9bdd21897d..2d938ad7825 100644 --- a/media/libvpx/vp8/common/extend.c +++ b/media/libvpx/vp8/common/extend.c @@ -40,9 +40,9 @@ static void copy_and_extend_plane for (i = 0; i < h; i++) { - vpx_memset(dest_ptr1, src_ptr1[0], el); - vpx_memcpy(dest_ptr1 + el, src_ptr1, w); - vpx_memset(dest_ptr2, src_ptr2[0], er); + memset(dest_ptr1, src_ptr1[0], el); + memcpy(dest_ptr1 + el, src_ptr1, w); + memset(dest_ptr2, src_ptr2[0], er); src_ptr1 += sp; src_ptr2 += sp; dest_ptr1 += dp; @@ -60,13 +60,13 @@ static void copy_and_extend_plane for (i = 0; i < et; i++) { - vpx_memcpy(dest_ptr1, src_ptr1, linesize); + memcpy(dest_ptr1, src_ptr1, linesize); dest_ptr1 += dp; } for (i = 0; i < eb; i++) { - vpx_memcpy(dest_ptr2, src_ptr2, linesize); + memcpy(dest_ptr2, src_ptr2, linesize); dest_ptr2 += dp; } } diff --git a/media/libvpx/vp8/common/filter.c b/media/libvpx/vp8/common/filter.c index 25266f86827..84c608effaa 100644 --- a/media/libvpx/vp8/common/filter.c +++ b/media/libvpx/vp8/common/filter.c @@ -10,6 +10,7 @@ #include "filter.h" +#include "./vp8_rtcd.h" DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]) = { diff --git a/media/libvpx/vp8/common/generic/systemdependent.c b/media/libvpx/vp8/common/generic/systemdependent.c index d84df334810..4393ced48c8 100644 --- a/media/libvpx/vp8/common/generic/systemdependent.c +++ b/media/libvpx/vp8/common/generic/systemdependent.c @@ -17,6 +17,7 @@ #include "vpx_ports/x86.h" #endif #include "vp8/common/onyxc_int.h" +#include "vp8/common/systemdependent.h" #if CONFIG_MULTITHREAD #if HAVE_UNISTD_H && !defined(__OS2__) diff --git a/media/libvpx/vp8/common/idct_blk.c b/media/libvpx/vp8/common/idct_blk.c index 65d5002c8bc..8aa7d9bf0ff 100644 --- a/media/libvpx/vp8/common/idct_blk.c +++ b/media/libvpx/vp8/common/idct_blk.c @@ -33,7 +33,7 @@ void vp8_dequant_idct_add_y_block_c else { vp8_dc_only_idct_add_c (q[0]*dq[0], dst, stride, dst, stride); - vpx_memset(q, 0, 2 * sizeof(q[0])); + memset(q, 0, 2 * sizeof(q[0])); } q += 16; @@ -59,7 +59,7 @@ void vp8_dequant_idct_add_uv_block_c else { vp8_dc_only_idct_add_c (q[0]*dq[0], dstu, stride, dstu, stride); - vpx_memset(q, 0, 2 * sizeof(q[0])); + memset(q, 0, 2 * sizeof(q[0])); } q += 16; @@ -78,7 +78,7 @@ void vp8_dequant_idct_add_uv_block_c else { vp8_dc_only_idct_add_c (q[0]*dq[0], dstv, stride, dstv, stride); - vpx_memset(q, 0, 2 * sizeof(q[0])); + memset(q, 0, 2 * sizeof(q[0])); } q += 16; diff --git a/media/libvpx/vp8/common/idctllm.c b/media/libvpx/vp8/common/idctllm.c index 47af52f04e7..f5403c5aaf7 100644 --- a/media/libvpx/vp8/common/idctllm.c +++ b/media/libvpx/vp8/common/idctllm.c @@ -8,6 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include "./vp8_rtcd.h" /**************************************************************************** * Notes: diff --git a/media/libvpx/vp8/common/loopfilter.c b/media/libvpx/vp8/common/loopfilter.c index da2275ee237..8b55dff92bf 100644 --- a/media/libvpx/vp8/common/loopfilter.c +++ b/media/libvpx/vp8/common/loopfilter.c @@ -82,11 +82,10 @@ void vp8_loop_filter_update_sharpness(loop_filter_info_n *lfi, if (block_inside_limit < 1) block_inside_limit = 1; - vpx_memset(lfi->lim[i], block_inside_limit, SIMD_WIDTH); - vpx_memset(lfi->blim[i], (2 * filt_lvl + block_inside_limit), - SIMD_WIDTH); - vpx_memset(lfi->mblim[i], (2 * (filt_lvl + 2) + block_inside_limit), - SIMD_WIDTH); + memset(lfi->lim[i], block_inside_limit, SIMD_WIDTH); + memset(lfi->blim[i], (2 * filt_lvl + block_inside_limit), SIMD_WIDTH); + memset(lfi->mblim[i], (2 * (filt_lvl + 2) + block_inside_limit), + SIMD_WIDTH); } } @@ -105,7 +104,7 @@ void vp8_loop_filter_init(VP8_COMMON *cm) /* init hev threshold const vectors */ for(i = 0; i < 4 ; i++) { - vpx_memset(lfi->hev_thr[i], i, SIMD_WIDTH); + memset(lfi->hev_thr[i], i, SIMD_WIDTH); } } @@ -151,7 +150,7 @@ void vp8_loop_filter_frame_init(VP8_COMMON *cm, /* we could get rid of this if we assume that deltas are set to * zero when not in use; encoder always uses deltas */ - vpx_memset(lfi->lvl[seg][0], lvl_seg, 4 * 4 ); + memset(lfi->lvl[seg][0], lvl_seg, 4 * 4 ); continue; } diff --git a/media/libvpx/vp8/common/mfqe.c b/media/libvpx/vp8/common/mfqe.c index 069332660e3..5c0680f42d4 100644 --- a/media/libvpx/vp8/common/mfqe.c +++ b/media/libvpx/vp8/common/mfqe.c @@ -17,10 +17,11 @@ * higher quality. */ -#include "postproc.h" -#include "variance.h" +#include "./vp8_rtcd.h" +#include "./vpx_dsp_rtcd.h" +#include "vp8/common/postproc.h" +#include "vp8/common/variance.h" #include "vpx_mem/vpx_mem.h" -#include "vp8_rtcd.h" #include "vpx_scale/yv12config.h" #include @@ -150,36 +151,36 @@ static void multiframe_quality_enhance_block if (blksize == 16) { - actd = (vp8_variance16x16(yd, yd_stride, VP8_ZEROS, 0, &sse)+128)>>8; - act = (vp8_variance16x16(y, y_stride, VP8_ZEROS, 0, &sse)+128)>>8; + actd = (vpx_variance16x16(yd, yd_stride, VP8_ZEROS, 0, &sse)+128)>>8; + act = (vpx_variance16x16(y, y_stride, VP8_ZEROS, 0, &sse)+128)>>8; #ifdef USE_SSD - sad = (vp8_variance16x16(y, y_stride, yd, yd_stride, &sse)); + vpx_variance16x16(y, y_stride, yd, yd_stride, &sse); sad = (sse + 128)>>8; - usad = (vp8_variance8x8(u, uv_stride, ud, uvd_stride, &sse)); + vpx_variance8x8(u, uv_stride, ud, uvd_stride, &sse); usad = (sse + 32)>>6; - vsad = (vp8_variance8x8(v, uv_stride, vd, uvd_stride, &sse)); + vpx_variance8x8(v, uv_stride, vd, uvd_stride, &sse); vsad = (sse + 32)>>6; #else - sad = (vp8_sad16x16(y, y_stride, yd, yd_stride, UINT_MAX) + 128) >> 8; - usad = (vp8_sad8x8(u, uv_stride, ud, uvd_stride, UINT_MAX) + 32) >> 6; - vsad = (vp8_sad8x8(v, uv_stride, vd, uvd_stride, UINT_MAX)+ 32) >> 6; + sad = (vpx_sad16x16(y, y_stride, yd, yd_stride) + 128) >> 8; + usad = (vpx_sad8x8(u, uv_stride, ud, uvd_stride) + 32) >> 6; + vsad = (vpx_sad8x8(v, uv_stride, vd, uvd_stride)+ 32) >> 6; #endif } else /* if (blksize == 8) */ { - actd = (vp8_variance8x8(yd, yd_stride, VP8_ZEROS, 0, &sse)+32)>>6; - act = (vp8_variance8x8(y, y_stride, VP8_ZEROS, 0, &sse)+32)>>6; + actd = (vpx_variance8x8(yd, yd_stride, VP8_ZEROS, 0, &sse)+32)>>6; + act = (vpx_variance8x8(y, y_stride, VP8_ZEROS, 0, &sse)+32)>>6; #ifdef USE_SSD - sad = (vp8_variance8x8(y, y_stride, yd, yd_stride, &sse)); + vpx_variance8x8(y, y_stride, yd, yd_stride, &sse); sad = (sse + 32)>>6; - usad = (vp8_variance4x4(u, uv_stride, ud, uvd_stride, &sse)); + vpx_variance4x4(u, uv_stride, ud, uvd_stride, &sse); usad = (sse + 8)>>4; - vsad = (vp8_variance4x4(v, uv_stride, vd, uvd_stride, &sse)); + vpx_variance4x4(v, uv_stride, vd, uvd_stride, &sse); vsad = (sse + 8)>>4; #else - sad = (vp8_sad8x8(y, y_stride, yd, yd_stride, UINT_MAX) + 32) >> 6; - usad = (vp8_sad4x4(u, uv_stride, ud, uvd_stride, UINT_MAX) + 8) >> 4; - vsad = (vp8_sad4x4(v, uv_stride, vd, uvd_stride, UINT_MAX) + 8) >> 4; + sad = (vpx_sad8x8(y, y_stride, yd, yd_stride) + 32) >> 6; + usad = (vpx_sad4x4(u, uv_stride, ud, uvd_stride) + 8) >> 4; + vsad = (vpx_sad4x4(v, uv_stride, vd, uvd_stride) + 8) >> 4; #endif } @@ -231,9 +232,9 @@ static void multiframe_quality_enhance_block { vp8_copy_mem8x8(y, y_stride, yd, yd_stride); for (up = u, udp = ud, i = 0; i < uvblksize; ++i, up += uv_stride, udp += uvd_stride) - vpx_memcpy(udp, up, uvblksize); + memcpy(udp, up, uvblksize); for (vp = v, vdp = vd, i = 0; i < uvblksize; ++i, vp += uv_stride, vdp += uvd_stride) - vpx_memcpy(vdp, vp, uvblksize); + memcpy(vdp, vp, uvblksize); } } } @@ -341,8 +342,8 @@ void vp8_multiframe_quality_enhance for (k = 0; k < 4; ++k, up += show->uv_stride, udp += dest->uv_stride, vp += show->uv_stride, vdp += dest->uv_stride) { - vpx_memcpy(udp, up, 4); - vpx_memcpy(vdp, vp, 4); + memcpy(udp, up, 4); + memcpy(vdp, vp, 4); } } } diff --git a/media/libvpx/vp8/common/postproc.c b/media/libvpx/vp8/common/postproc.c index b112663006c..a4e6ae170c9 100644 --- a/media/libvpx/vp8/common/postproc.c +++ b/media/libvpx/vp8/common/postproc.c @@ -355,8 +355,8 @@ void vp8_deblock(VP8_COMMON *cm, else mb_ppl = (unsigned char)ppl; - vpx_memset(ylptr, mb_ppl, 16); - vpx_memset(uvlptr, mb_ppl, 8); + memset(ylptr, mb_ppl, 16); + memset(uvlptr, mb_ppl, 8); ylptr += 16; uvlptr += 8; @@ -403,7 +403,7 @@ void vp8_de_noise(VP8_COMMON *cm, (void) low_var_thresh; (void) flag; - vpx_memset(limits, (unsigned char)ppl, 16 * mb_cols); + memset(limits, (unsigned char)ppl, 16 * mb_cols); /* TODO: The original code don't filter the 2 outer rows and columns. */ for (mbr = 0; mbr < mb_rows; mbr++) @@ -427,7 +427,7 @@ void vp8_de_noise(VP8_COMMON *cm, } } -double vp8_gaussian(double sigma, double mu, double x) +static double gaussian(double sigma, double mu, double x) { return 1 / (sigma * sqrt(2.0 * 3.14159265)) * (exp(-(x - mu) * (x - mu) / (2 * sigma * sigma))); @@ -455,7 +455,7 @@ static void fillrd(struct postproc_state *state, int q, int a) for (i = -32; i < 32; i++) { - const int v = (int)(.5 + 256 * vp8_gaussian(sigma, 0, i)); + const int v = (int)(.5 + 256 * gaussian(sigma, 0, i)); if (v) { @@ -763,7 +763,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t /* insure that postproc is set to all 0's so that post proc * doesn't pull random data in from edge */ - vpx_memset((&oci->post_proc_buffer_int)->buffer_alloc,128,(&oci->post_proc_buffer)->frame_size); + memset((&oci->post_proc_buffer_int)->buffer_alloc,128,(&oci->post_proc_buffer)->frame_size); } } diff --git a/media/libvpx/vp8/common/reconinter.c b/media/libvpx/vp8/common/reconinter.c index bac3c9474ee..e3025955871 100644 --- a/media/libvpx/vp8/common/reconinter.c +++ b/media/libvpx/vp8/common/reconinter.c @@ -10,6 +10,8 @@ #include +#include + #include "vpx_config.h" #include "vp8_rtcd.h" #include "vpx/vpx_integer.h" @@ -30,31 +32,8 @@ void vp8_copy_mem16x16_c( for (r = 0; r < 16; r++) { -#if !(CONFIG_FAST_UNALIGNED) - dst[0] = src[0]; - dst[1] = src[1]; - dst[2] = src[2]; - dst[3] = src[3]; - dst[4] = src[4]; - dst[5] = src[5]; - dst[6] = src[6]; - dst[7] = src[7]; - dst[8] = src[8]; - dst[9] = src[9]; - dst[10] = src[10]; - dst[11] = src[11]; - dst[12] = src[12]; - dst[13] = src[13]; - dst[14] = src[14]; - dst[15] = src[15]; + memcpy(dst, src, 16); -#else - ((uint32_t *)dst)[0] = ((uint32_t *)src)[0] ; - ((uint32_t *)dst)[1] = ((uint32_t *)src)[1] ; - ((uint32_t *)dst)[2] = ((uint32_t *)src)[2] ; - ((uint32_t *)dst)[3] = ((uint32_t *)src)[3] ; - -#endif src += src_stride; dst += dst_stride; @@ -72,19 +51,8 @@ void vp8_copy_mem8x8_c( for (r = 0; r < 8; r++) { -#if !(CONFIG_FAST_UNALIGNED) - dst[0] = src[0]; - dst[1] = src[1]; - dst[2] = src[2]; - dst[3] = src[3]; - dst[4] = src[4]; - dst[5] = src[5]; - dst[6] = src[6]; - dst[7] = src[7]; -#else - ((uint32_t *)dst)[0] = ((uint32_t *)src)[0] ; - ((uint32_t *)dst)[1] = ((uint32_t *)src)[1] ; -#endif + memcpy(dst, src, 8); + src += src_stride; dst += dst_stride; @@ -102,19 +70,8 @@ void vp8_copy_mem8x4_c( for (r = 0; r < 4; r++) { -#if !(CONFIG_FAST_UNALIGNED) - dst[0] = src[0]; - dst[1] = src[1]; - dst[2] = src[2]; - dst[3] = src[3]; - dst[4] = src[4]; - dst[5] = src[5]; - dst[6] = src[6]; - dst[7] = src[7]; -#else - ((uint32_t *)dst)[0] = ((uint32_t *)src)[0] ; - ((uint32_t *)dst)[1] = ((uint32_t *)src)[1] ; -#endif + memcpy(dst, src, 8); + src += src_stride; dst += dst_stride; diff --git a/media/libvpx/vp8/common/reconintra.c b/media/libvpx/vp8/common/reconintra.c index ec51ffe40d9..0a6c51b3531 100644 --- a/media/libvpx/vp8/common/reconintra.c +++ b/media/libvpx/vp8/common/reconintra.c @@ -70,10 +70,10 @@ void vp8_build_intra_predictors_mby_s_c(MACROBLOCKD *x, expected_dc = 128; } - /*vpx_memset(ypred_ptr, expected_dc, 256);*/ + /*memset(ypred_ptr, expected_dc, 256);*/ for (r = 0; r < 16; r++) { - vpx_memset(ypred_ptr, expected_dc, 16); + memset(ypred_ptr, expected_dc, 16); ypred_ptr += y_stride; } } @@ -98,7 +98,7 @@ void vp8_build_intra_predictors_mby_s_c(MACROBLOCKD *x, for (r = 0; r < 16; r++) { - vpx_memset(ypred_ptr, yleft_col[r], 16); + memset(ypred_ptr, yleft_col[r], 16); ypred_ptr += y_stride; } @@ -202,12 +202,12 @@ void vp8_build_intra_predictors_mbuv_s_c(MACROBLOCKD *x, } - /*vpx_memset(upred_ptr,expected_udc,64);*/ - /*vpx_memset(vpred_ptr,expected_vdc,64);*/ + /*memset(upred_ptr,expected_udc,64);*/ + /*memset(vpred_ptr,expected_vdc,64);*/ for (i = 0; i < 8; i++) { - vpx_memset(upred_ptr, expected_udc, 8); - vpx_memset(vpred_ptr, expected_vdc, 8); + memset(upred_ptr, expected_udc, 8); + memset(vpred_ptr, expected_vdc, 8); upred_ptr += pred_stride; vpred_ptr += pred_stride; } @@ -217,8 +217,8 @@ void vp8_build_intra_predictors_mbuv_s_c(MACROBLOCKD *x, { for (i = 0; i < 8; i++) { - vpx_memcpy(upred_ptr, uabove_row, 8); - vpx_memcpy(vpred_ptr, vabove_row, 8); + memcpy(upred_ptr, uabove_row, 8); + memcpy(vpred_ptr, vabove_row, 8); upred_ptr += pred_stride; vpred_ptr += pred_stride; } @@ -229,8 +229,8 @@ void vp8_build_intra_predictors_mbuv_s_c(MACROBLOCKD *x, { for (i = 0; i < 8; i++) { - vpx_memset(upred_ptr, uleft_col[i], 8); - vpx_memset(vpred_ptr, vleft_col[i], 8); + memset(upred_ptr, uleft_col[i], 8); + memset(vpred_ptr, vleft_col[i], 8); upred_ptr += pred_stride; vpred_ptr += pred_stride; } diff --git a/media/libvpx/vp8/common/rtcd.c b/media/libvpx/vp8/common/rtcd.c index 0b371b094aa..ab0e9b47fe8 100644 --- a/media/libvpx/vp8/common/rtcd.c +++ b/media/libvpx/vp8/common/rtcd.c @@ -7,15 +7,13 @@ * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ -#include "vpx_config.h" +#include "./vpx_config.h" #define RTCD_C -#include "vp8_rtcd.h" +#include "./vp8_rtcd.h" #include "vpx_ports/vpx_once.h" -extern void vpx_scale_rtcd(void); void vp8_rtcd() { - vpx_scale_rtcd(); once(setup_rtcd_internal); } diff --git a/media/libvpx/vp8/common/sad_c.c b/media/libvpx/vp8/common/sad_c.c deleted file mode 100644 index 5f36fc96e86..00000000000 --- a/media/libvpx/vp8/common/sad_c.c +++ /dev/null @@ -1,302 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include -#include -#include "vpx_config.h" -#include "vpx/vpx_integer.h" - -static unsigned int sad_mx_n_c(const unsigned char *src_ptr, int src_stride, - const unsigned char *ref_ptr, int ref_stride, - unsigned int max_sad, int m, int n) -{ - int r, c; - unsigned int sad = 0; - - for (r = 0; r < n; r++) - { - for (c = 0; c < m; c++) - { - sad += abs(src_ptr[c] - ref_ptr[c]); - } - - if (sad > max_sad) - break; - - src_ptr += src_stride; - ref_ptr += ref_stride; - } - - return sad; -} - -/* max_sad is provided as an optional optimization point. Alternative - * implementations of these functions are not required to check it. - */ - -unsigned int vp8_sad16x16_c(const unsigned char *src_ptr, int src_stride, - const unsigned char *ref_ptr, int ref_stride, - unsigned int max_sad) -{ - return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, max_sad, 16, 16); -} - -unsigned int vp8_sad8x8_c(const unsigned char *src_ptr, int src_stride, - const unsigned char *ref_ptr, int ref_stride, - unsigned int max_sad) -{ - return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, max_sad, 8, 8); -} - -unsigned int vp8_sad16x8_c(const unsigned char *src_ptr, int src_stride, - const unsigned char *ref_ptr, int ref_stride, - unsigned int max_sad) -{ - return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, max_sad, 16, 8); - -} - -unsigned int vp8_sad8x16_c(const unsigned char *src_ptr, int src_stride, - const unsigned char *ref_ptr, int ref_stride, - unsigned int max_sad) -{ - return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, max_sad, 8, 16); -} - -unsigned int vp8_sad4x4_c(const unsigned char *src_ptr, int src_stride, - const unsigned char *ref_ptr, int ref_stride, - unsigned int max_sad) -{ - return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, max_sad, 4, 4); -} - -void vp8_sad16x16x3_c(const unsigned char *src_ptr, int src_stride, - const unsigned char *ref_ptr, int ref_stride, - unsigned int *sad_array) -{ - sad_array[0] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX); - sad_array[1] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX); - sad_array[2] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX); -} - -void vp8_sad16x16x8_c(const unsigned char *src_ptr, int src_stride, - const unsigned char *ref_ptr, int ref_stride, - unsigned short *sad_array) -{ - sad_array[0] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX); - sad_array[1] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX); - sad_array[2] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX); - sad_array[3] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 3, ref_stride, UINT_MAX); - sad_array[4] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, UINT_MAX); - sad_array[5] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, UINT_MAX); - sad_array[6] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 6, ref_stride, UINT_MAX); - sad_array[7] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, UINT_MAX); -} - -void vp8_sad16x8x3_c(const unsigned char *src_ptr, int src_stride, - const unsigned char *ref_ptr, int ref_stride, - unsigned int *sad_array) -{ - sad_array[0] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX); - sad_array[1] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX); - sad_array[2] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX); -} - -void vp8_sad16x8x8_c(const unsigned char *src_ptr, int src_stride, - const unsigned char *ref_ptr, int ref_stride, - unsigned short *sad_array) -{ - sad_array[0] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX); - sad_array[1] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX); - sad_array[2] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX); - sad_array[3] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 3, ref_stride, UINT_MAX); - sad_array[4] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, UINT_MAX); - sad_array[5] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, UINT_MAX); - sad_array[6] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 6, ref_stride, UINT_MAX); - sad_array[7] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, UINT_MAX); -} - -void vp8_sad8x8x3_c(const unsigned char *src_ptr, int src_stride, - const unsigned char *ref_ptr, int ref_stride, - unsigned int *sad_array) -{ - sad_array[0] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX); - sad_array[1] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX); - sad_array[2] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX); -} - -void vp8_sad8x8x8_c(const unsigned char *src_ptr, int src_stride, - const unsigned char *ref_ptr, int ref_stride, - unsigned short *sad_array) -{ - sad_array[0] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX); - sad_array[1] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX); - sad_array[2] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX); - sad_array[3] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 3, ref_stride, UINT_MAX); - sad_array[4] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, UINT_MAX); - sad_array[5] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, UINT_MAX); - sad_array[6] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 6, ref_stride, UINT_MAX); - sad_array[7] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, UINT_MAX); -} - -void vp8_sad8x16x3_c(const unsigned char *src_ptr, int src_stride, - const unsigned char *ref_ptr, int ref_stride, - unsigned int *sad_array) -{ - sad_array[0] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX); - sad_array[1] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX); - sad_array[2] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX); -} - -void vp8_sad8x16x8_c(const unsigned char *src_ptr, int src_stride, - const unsigned char *ref_ptr, int ref_stride, - unsigned short *sad_array) -{ - sad_array[0] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX); - sad_array[1] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX); - sad_array[2] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX); - sad_array[3] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 3, ref_stride, UINT_MAX); - sad_array[4] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, UINT_MAX); - sad_array[5] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, UINT_MAX); - sad_array[6] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 6, ref_stride, UINT_MAX); - sad_array[7] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, UINT_MAX); -} - -void vp8_sad4x4x3_c(const unsigned char *src_ptr, int src_stride, - const unsigned char *ref_ptr, int ref_stride, - unsigned int *sad_array) -{ - sad_array[0] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX); - sad_array[1] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX); - sad_array[2] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX); -} - -void vp8_sad4x4x8_c(const unsigned char *src_ptr, int src_stride, - const unsigned char *ref_ptr, int ref_stride, - unsigned short *sad_array) -{ - sad_array[0] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX); - sad_array[1] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX); - sad_array[2] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX); - sad_array[3] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 3, ref_stride, UINT_MAX); - sad_array[4] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, UINT_MAX); - sad_array[5] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, UINT_MAX); - sad_array[6] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 6, ref_stride, UINT_MAX); - sad_array[7] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, UINT_MAX); -} - -void vp8_sad16x16x4d_c(const unsigned char *src_ptr, int src_stride, - const unsigned char * const ref_ptr[], int ref_stride, - unsigned int *sad_array) -{ - sad_array[0] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr[0], ref_stride, UINT_MAX); - sad_array[1] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr[1], ref_stride, UINT_MAX); - sad_array[2] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr[2], ref_stride, UINT_MAX); - sad_array[3] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr[3], ref_stride, UINT_MAX); -} - -void vp8_sad16x8x4d_c(const unsigned char *src_ptr, int src_stride, - const unsigned char * const ref_ptr[], int ref_stride, - unsigned int *sad_array) -{ - sad_array[0] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr[0], ref_stride, UINT_MAX); - sad_array[1] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr[1], ref_stride, UINT_MAX); - sad_array[2] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr[2], ref_stride, UINT_MAX); - sad_array[3] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr[3], ref_stride, UINT_MAX); -} - -void vp8_sad8x8x4d_c(const unsigned char *src_ptr, int src_stride, - const unsigned char * const ref_ptr[], int ref_stride, - unsigned int *sad_array) -{ - sad_array[0] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr[0], ref_stride, UINT_MAX); - sad_array[1] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr[1], ref_stride, UINT_MAX); - sad_array[2] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr[2], ref_stride, UINT_MAX); - sad_array[3] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr[3], ref_stride, UINT_MAX); -} - -void vp8_sad8x16x4d_c(const unsigned char *src_ptr, int src_stride, - const unsigned char * const ref_ptr[], int ref_stride, - unsigned int *sad_array) -{ - sad_array[0] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr[0], ref_stride, UINT_MAX); - sad_array[1] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr[1], ref_stride, UINT_MAX); - sad_array[2] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr[2], ref_stride, UINT_MAX); - sad_array[3] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr[3], ref_stride, UINT_MAX); -} - -void vp8_sad4x4x4d_c(const unsigned char *src_ptr, int src_stride, - const unsigned char * const ref_ptr[], int ref_stride, - unsigned int *sad_array) -{ - sad_array[0] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr[0], ref_stride, UINT_MAX); - sad_array[1] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr[1], ref_stride, UINT_MAX); - sad_array[2] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr[2], ref_stride, UINT_MAX); - sad_array[3] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr[3], ref_stride, UINT_MAX); -} - -/* Copy 2 macroblocks to a buffer */ -void vp8_copy32xn_c(unsigned char *src_ptr, int src_stride, - unsigned char *dst_ptr, int dst_stride, - int height) -{ - int r; - - for (r = 0; r < height; r++) - { -#if !(CONFIG_FAST_UNALIGNED) - dst_ptr[0] = src_ptr[0]; - dst_ptr[1] = src_ptr[1]; - dst_ptr[2] = src_ptr[2]; - dst_ptr[3] = src_ptr[3]; - dst_ptr[4] = src_ptr[4]; - dst_ptr[5] = src_ptr[5]; - dst_ptr[6] = src_ptr[6]; - dst_ptr[7] = src_ptr[7]; - dst_ptr[8] = src_ptr[8]; - dst_ptr[9] = src_ptr[9]; - dst_ptr[10] = src_ptr[10]; - dst_ptr[11] = src_ptr[11]; - dst_ptr[12] = src_ptr[12]; - dst_ptr[13] = src_ptr[13]; - dst_ptr[14] = src_ptr[14]; - dst_ptr[15] = src_ptr[15]; - dst_ptr[16] = src_ptr[16]; - dst_ptr[17] = src_ptr[17]; - dst_ptr[18] = src_ptr[18]; - dst_ptr[19] = src_ptr[19]; - dst_ptr[20] = src_ptr[20]; - dst_ptr[21] = src_ptr[21]; - dst_ptr[22] = src_ptr[22]; - dst_ptr[23] = src_ptr[23]; - dst_ptr[24] = src_ptr[24]; - dst_ptr[25] = src_ptr[25]; - dst_ptr[26] = src_ptr[26]; - dst_ptr[27] = src_ptr[27]; - dst_ptr[28] = src_ptr[28]; - dst_ptr[29] = src_ptr[29]; - dst_ptr[30] = src_ptr[30]; - dst_ptr[31] = src_ptr[31]; -#else - ((uint32_t *)dst_ptr)[0] = ((uint32_t *)src_ptr)[0] ; - ((uint32_t *)dst_ptr)[1] = ((uint32_t *)src_ptr)[1] ; - ((uint32_t *)dst_ptr)[2] = ((uint32_t *)src_ptr)[2] ; - ((uint32_t *)dst_ptr)[3] = ((uint32_t *)src_ptr)[3] ; - ((uint32_t *)dst_ptr)[4] = ((uint32_t *)src_ptr)[4] ; - ((uint32_t *)dst_ptr)[5] = ((uint32_t *)src_ptr)[5] ; - ((uint32_t *)dst_ptr)[6] = ((uint32_t *)src_ptr)[6] ; - ((uint32_t *)dst_ptr)[7] = ((uint32_t *)src_ptr)[7] ; -#endif - src_ptr += src_stride; - dst_ptr += dst_stride; - - } -} diff --git a/media/libvpx/vp8/common/setupintrarecon.c b/media/libvpx/vp8/common/setupintrarecon.c index 60afe519f56..669564db42b 100644 --- a/media/libvpx/vp8/common/setupintrarecon.c +++ b/media/libvpx/vp8/common/setupintrarecon.c @@ -17,15 +17,15 @@ void vp8_setup_intra_recon(YV12_BUFFER_CONFIG *ybf) int i; /* set up frame new frame for intra coded blocks */ - vpx_memset(ybf->y_buffer - 1 - ybf->y_stride, 127, ybf->y_width + 5); + memset(ybf->y_buffer - 1 - ybf->y_stride, 127, ybf->y_width + 5); for (i = 0; i < ybf->y_height; i++) ybf->y_buffer[ybf->y_stride *i - 1] = (unsigned char) 129; - vpx_memset(ybf->u_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5); + memset(ybf->u_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5); for (i = 0; i < ybf->uv_height; i++) ybf->u_buffer[ybf->uv_stride *i - 1] = (unsigned char) 129; - vpx_memset(ybf->v_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5); + memset(ybf->v_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5); for (i = 0; i < ybf->uv_height; i++) ybf->v_buffer[ybf->uv_stride *i - 1] = (unsigned char) 129; @@ -33,7 +33,7 @@ void vp8_setup_intra_recon(YV12_BUFFER_CONFIG *ybf) void vp8_setup_intra_recon_top_line(YV12_BUFFER_CONFIG *ybf) { - vpx_memset(ybf->y_buffer - 1 - ybf->y_stride, 127, ybf->y_width + 5); - vpx_memset(ybf->u_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5); - vpx_memset(ybf->v_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5); + memset(ybf->y_buffer - 1 - ybf->y_stride, 127, ybf->y_width + 5); + memset(ybf->u_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5); + memset(ybf->v_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5); } diff --git a/media/libvpx/vp8/common/variance.h b/media/libvpx/vp8/common/variance.h index 89a32a72268..c6c9f41bf6a 100644 --- a/media/libvpx/vp8/common/variance.h +++ b/media/libvpx/vp8/common/variance.h @@ -14,50 +14,42 @@ #include "vpx_config.h" +#include "vpx/vpx_integer.h" + #ifdef __cplusplus extern "C" { #endif -typedef unsigned int(*vp8_sad_fn_t)( - const unsigned char *src_ptr, +typedef unsigned int(*vpx_sad_fn_t)( + const uint8_t *src_ptr, int source_stride, - const unsigned char *ref_ptr, - int ref_stride, - unsigned int max_sad); + const uint8_t *ref_ptr, + int ref_stride); typedef void (*vp8_copy32xn_fn_t)( const unsigned char *src_ptr, int source_stride, - const unsigned char *ref_ptr, + unsigned char *ref_ptr, int ref_stride, int n); -typedef void (*vp8_sad_multi_fn_t)( +typedef void (*vpx_sad_multi_fn_t)( const unsigned char *src_ptr, int source_stride, - const unsigned char *ref_ptr, + const unsigned char *ref_array, int ref_stride, unsigned int *sad_array); -typedef void (*vp8_sad_multi1_fn_t) +typedef void (*vpx_sad_multi_d_fn_t) ( const unsigned char *src_ptr, int source_stride, - const unsigned char *ref_ptr, - int ref_stride, - unsigned short *sad_array - ); - -typedef void (*vp8_sad_multi_d_fn_t) - ( - const unsigned char *src_ptr, - int source_stride, - const unsigned char * const ref_ptr[], + const unsigned char * const ref_array[], int ref_stride, unsigned int *sad_array ); -typedef unsigned int (*vp8_variance_fn_t) +typedef unsigned int (*vpx_variance_fn_t) ( const unsigned char *src_ptr, int source_stride, @@ -77,40 +69,17 @@ typedef unsigned int (*vp8_subpixvariance_fn_t) unsigned int *sse ); -typedef void (*vp8_ssimpf_fn_t) - ( - unsigned char *s, - int sp, - unsigned char *r, - int rp, - unsigned long *sum_s, - unsigned long *sum_r, - unsigned long *sum_sq_s, - unsigned long *sum_sq_r, - unsigned long *sum_sxr - ); - -typedef unsigned int (*vp8_getmbss_fn_t)(const short *); - -typedef unsigned int (*vp8_get16x16prederror_fn_t) - ( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int ref_stride - ); - typedef struct variance_vtable { - vp8_sad_fn_t sdf; - vp8_variance_fn_t vf; + vpx_sad_fn_t sdf; + vpx_variance_fn_t vf; vp8_subpixvariance_fn_t svf; - vp8_variance_fn_t svf_halfpix_h; - vp8_variance_fn_t svf_halfpix_v; - vp8_variance_fn_t svf_halfpix_hv; - vp8_sad_multi_fn_t sdx3f; - vp8_sad_multi1_fn_t sdx8f; - vp8_sad_multi_d_fn_t sdx4df; + vpx_variance_fn_t svf_halfpix_h; + vpx_variance_fn_t svf_halfpix_v; + vpx_variance_fn_t svf_halfpix_hv; + vpx_sad_multi_fn_t sdx3f; + vpx_sad_multi_fn_t sdx8f; + vpx_sad_multi_d_fn_t sdx4df; #if ARCH_X86 || ARCH_X86_64 vp8_copy32xn_fn_t copymem; #endif diff --git a/media/libvpx/vp8/common/variance_c.c b/media/libvpx/vp8/common/variance_c.c index 773b655efc5..02915a4defd 100644 --- a/media/libvpx/vp8/common/variance_c.c +++ b/media/libvpx/vp8/common/variance_c.c @@ -8,43 +8,34 @@ * be found in the AUTHORS file in the root of the source tree. */ - -#include "variance.h" +#include "./vp8_rtcd.h" #include "filter.h" +#include "variance.h" - -unsigned int vp8_get_mb_ss_c -( - const short *src_ptr -) -{ - unsigned int i = 0, sum = 0; - - do - { - sum += (src_ptr[i] * src_ptr[i]); - i++; - } - while (i < 256); - - return sum; +/* This is a bad idea. + * ctz = count trailing zeros */ +static int ctz(int a) { + int b = 0; + while (a != 1) { + a >>= 1; + b++; + } + return b; } - -static void variance( +static unsigned int variance( const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int recon_stride, int w, int h, - unsigned int *sse, - int *sum) + unsigned int *sse) { int i, j; - int diff; + int diff, sum; - *sum = 0; + sum = 0; *sse = 0; for (i = 0; i < h; i++) @@ -52,114 +43,17 @@ static void variance( for (j = 0; j < w; j++) { diff = src_ptr[j] - ref_ptr[j]; - *sum += diff; + sum += diff; *sse += diff * diff; } src_ptr += source_stride; ref_ptr += recon_stride; } + + return (*sse - (((unsigned int)sum * sum) >> (int)((ctz(w) + ctz(h))))); } - -unsigned int vp8_variance16x16_c( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) -{ - unsigned int var; - int avg; - - - variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg); - *sse = var; - return (var - (((unsigned int)avg * avg) >> 8)); -} - -unsigned int vp8_variance8x16_c( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) -{ - unsigned int var; - int avg; - - - variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg); - *sse = var; - return (var - (((unsigned int)avg * avg) >> 7)); -} - -unsigned int vp8_variance16x8_c( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) -{ - unsigned int var; - int avg; - - - variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg); - *sse = var; - return (var - (((unsigned int)avg * avg) >> 7)); -} - - -unsigned int vp8_variance8x8_c( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) -{ - unsigned int var; - int avg; - - - variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg); - *sse = var; - return (var - (((unsigned int)avg * avg) >> 6)); -} - -unsigned int vp8_variance4x4_c( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) -{ - unsigned int var; - int avg; - - - variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 4, &var, &avg); - *sse = var; - return (var - (((unsigned int)avg * avg) >> 4)); -} - - -unsigned int vp8_mse16x16_c( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) -{ - unsigned int var; - int avg; - - variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg); - *sse = var; - return var; -} - - /**************************************************************************** * * ROUTINE : filter_block2d_bil_first_pass @@ -303,7 +197,7 @@ unsigned int vp8_sub_pixel_variance4x4_c /* Now filter Verticaly */ var_filter_block2d_bil_second_pass(FData3, temp2, 4, 4, 4, 4, VFilter); - return vp8_variance4x4_c(temp2, 4, dst_ptr, dst_pixels_per_line, sse); + return variance(temp2, 4, dst_ptr, dst_pixels_per_line, 4, 4, sse); } @@ -328,7 +222,7 @@ unsigned int vp8_sub_pixel_variance8x8_c var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 9, 8, HFilter); var_filter_block2d_bil_second_pass(FData3, temp2, 8, 8, 8, 8, VFilter); - return vp8_variance8x8_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse); + return variance(temp2, 8, dst_ptr, dst_pixels_per_line, 8, 8, sse); } unsigned int vp8_sub_pixel_variance16x16_c @@ -352,7 +246,7 @@ unsigned int vp8_sub_pixel_variance16x16_c var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 17, 16, HFilter); var_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 16, 16, VFilter); - return vp8_variance16x16_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse); + return variance(temp2, 16, dst_ptr, dst_pixels_per_line, 16, 16, sse); } @@ -392,21 +286,6 @@ unsigned int vp8_variance_halfpixvar16x16_hv_c( } -unsigned int vp8_sub_pixel_mse16x16_c -( - const unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - const unsigned char *dst_ptr, - int dst_pixels_per_line, - unsigned int *sse -) -{ - vp8_sub_pixel_variance16x16_c(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse); - return *sse; -} - unsigned int vp8_sub_pixel_variance16x8_c ( const unsigned char *src_ptr, @@ -428,7 +307,7 @@ unsigned int vp8_sub_pixel_variance16x8_c var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 9, 16, HFilter); var_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 8, 16, VFilter); - return vp8_variance16x8_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse); + return variance(temp2, 16, dst_ptr, dst_pixels_per_line, 16, 8, sse); } unsigned int vp8_sub_pixel_variance8x16_c @@ -454,5 +333,5 @@ unsigned int vp8_sub_pixel_variance8x16_c var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 17, 8, HFilter); var_filter_block2d_bil_second_pass(FData3, temp2, 8, 8, 16, 8, VFilter); - return vp8_variance8x16_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse); + return variance(temp2, 8, dst_ptr, dst_pixels_per_line, 8, 16, sse); } diff --git a/media/libvpx/vp8/common/x86/copy_sse2.asm b/media/libvpx/vp8/common/x86/copy_sse2.asm new file mode 100644 index 00000000000..86fae269563 --- /dev/null +++ b/media/libvpx/vp8/common/x86/copy_sse2.asm @@ -0,0 +1,93 @@ +; +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + + +%include "vpx_ports/x86_abi_support.asm" + + +;void vp8_copy32xn_sse2( +; unsigned char *src_ptr, +; int src_stride, +; unsigned char *dst_ptr, +; int dst_stride, +; int height); +global sym(vp8_copy32xn_sse2) PRIVATE +sym(vp8_copy32xn_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 5 + SAVE_XMM 7 + push rsi + push rdi + ; end prolog + + mov rsi, arg(0) ;src_ptr + mov rdi, arg(2) ;dst_ptr + + movsxd rax, dword ptr arg(1) ;src_stride + movsxd rdx, dword ptr arg(3) ;dst_stride + movsxd rcx, dword ptr arg(4) ;height + +.block_copy_sse2_loopx4: + movdqu xmm0, XMMWORD PTR [rsi] + movdqu xmm1, XMMWORD PTR [rsi + 16] + movdqu xmm2, XMMWORD PTR [rsi + rax] + movdqu xmm3, XMMWORD PTR [rsi + rax + 16] + + lea rsi, [rsi+rax*2] + + movdqu xmm4, XMMWORD PTR [rsi] + movdqu xmm5, XMMWORD PTR [rsi + 16] + movdqu xmm6, XMMWORD PTR [rsi + rax] + movdqu xmm7, XMMWORD PTR [rsi + rax + 16] + + lea rsi, [rsi+rax*2] + + movdqa XMMWORD PTR [rdi], xmm0 + movdqa XMMWORD PTR [rdi + 16], xmm1 + movdqa XMMWORD PTR [rdi + rdx], xmm2 + movdqa XMMWORD PTR [rdi + rdx + 16], xmm3 + + lea rdi, [rdi+rdx*2] + + movdqa XMMWORD PTR [rdi], xmm4 + movdqa XMMWORD PTR [rdi + 16], xmm5 + movdqa XMMWORD PTR [rdi + rdx], xmm6 + movdqa XMMWORD PTR [rdi + rdx + 16], xmm7 + + lea rdi, [rdi+rdx*2] + + sub rcx, 4 + cmp rcx, 4 + jge .block_copy_sse2_loopx4 + + cmp rcx, 0 + je .copy_is_done + +.block_copy_sse2_loop: + movdqu xmm0, XMMWORD PTR [rsi] + movdqu xmm1, XMMWORD PTR [rsi + 16] + lea rsi, [rsi+rax] + + movdqa XMMWORD PTR [rdi], xmm0 + movdqa XMMWORD PTR [rdi + 16], xmm1 + lea rdi, [rdi+rdx] + + sub rcx, 1 + jne .block_copy_sse2_loop + +.copy_is_done: + ; begin epilog + pop rdi + pop rsi + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret diff --git a/media/libvpx/vp8/common/x86/copy_sse3.asm b/media/libvpx/vp8/common/x86/copy_sse3.asm new file mode 100644 index 00000000000..d789a40ccf7 --- /dev/null +++ b/media/libvpx/vp8/common/x86/copy_sse3.asm @@ -0,0 +1,146 @@ +; +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + +%include "vpx_ports/x86_abi_support.asm" + +%macro STACK_FRAME_CREATE_X3 0 +%if ABI_IS_32BIT + %define src_ptr rsi + %define src_stride rax + %define ref_ptr rdi + %define ref_stride rdx + %define end_ptr rcx + %define ret_var rbx + %define result_ptr arg(4) + %define max_sad arg(4) + %define height dword ptr arg(4) + push rbp + mov rbp, rsp + push rsi + push rdi + push rbx + + mov rsi, arg(0) ; src_ptr + mov rdi, arg(2) ; ref_ptr + + movsxd rax, dword ptr arg(1) ; src_stride + movsxd rdx, dword ptr arg(3) ; ref_stride +%else + %if LIBVPX_YASM_WIN64 + SAVE_XMM 7, u + %define src_ptr rcx + %define src_stride rdx + %define ref_ptr r8 + %define ref_stride r9 + %define end_ptr r10 + %define ret_var r11 + %define result_ptr [rsp+xmm_stack_space+8+4*8] + %define max_sad [rsp+xmm_stack_space+8+4*8] + %define height dword ptr [rsp+xmm_stack_space+8+4*8] + %else + %define src_ptr rdi + %define src_stride rsi + %define ref_ptr rdx + %define ref_stride rcx + %define end_ptr r9 + %define ret_var r10 + %define result_ptr r8 + %define max_sad r8 + %define height r8 + %endif +%endif + +%endmacro + +%macro STACK_FRAME_DESTROY_X3 0 + %define src_ptr + %define src_stride + %define ref_ptr + %define ref_stride + %define end_ptr + %define ret_var + %define result_ptr + %define max_sad + %define height + +%if ABI_IS_32BIT + pop rbx + pop rdi + pop rsi + pop rbp +%else + %if LIBVPX_YASM_WIN64 + RESTORE_XMM + %endif +%endif + ret +%endmacro + + +;void vp8_copy32xn_sse3( +; unsigned char *src_ptr, +; int src_stride, +; unsigned char *dst_ptr, +; int dst_stride, +; int height); +global sym(vp8_copy32xn_sse3) PRIVATE +sym(vp8_copy32xn_sse3): + + STACK_FRAME_CREATE_X3 + +.block_copy_sse3_loopx4: + lea end_ptr, [src_ptr+src_stride*2] + + movdqu xmm0, XMMWORD PTR [src_ptr] + movdqu xmm1, XMMWORD PTR [src_ptr + 16] + movdqu xmm2, XMMWORD PTR [src_ptr + src_stride] + movdqu xmm3, XMMWORD PTR [src_ptr + src_stride + 16] + movdqu xmm4, XMMWORD PTR [end_ptr] + movdqu xmm5, XMMWORD PTR [end_ptr + 16] + movdqu xmm6, XMMWORD PTR [end_ptr + src_stride] + movdqu xmm7, XMMWORD PTR [end_ptr + src_stride + 16] + + lea src_ptr, [src_ptr+src_stride*4] + + lea end_ptr, [ref_ptr+ref_stride*2] + + movdqa XMMWORD PTR [ref_ptr], xmm0 + movdqa XMMWORD PTR [ref_ptr + 16], xmm1 + movdqa XMMWORD PTR [ref_ptr + ref_stride], xmm2 + movdqa XMMWORD PTR [ref_ptr + ref_stride + 16], xmm3 + movdqa XMMWORD PTR [end_ptr], xmm4 + movdqa XMMWORD PTR [end_ptr + 16], xmm5 + movdqa XMMWORD PTR [end_ptr + ref_stride], xmm6 + movdqa XMMWORD PTR [end_ptr + ref_stride + 16], xmm7 + + lea ref_ptr, [ref_ptr+ref_stride*4] + + sub height, 4 + cmp height, 4 + jge .block_copy_sse3_loopx4 + + ;Check to see if there is more rows need to be copied. + cmp height, 0 + je .copy_is_done + +.block_copy_sse3_loop: + movdqu xmm0, XMMWORD PTR [src_ptr] + movdqu xmm1, XMMWORD PTR [src_ptr + 16] + lea src_ptr, [src_ptr+src_stride] + + movdqa XMMWORD PTR [ref_ptr], xmm0 + movdqa XMMWORD PTR [ref_ptr + 16], xmm1 + lea ref_ptr, [ref_ptr+ref_stride] + + sub height, 1 + jne .block_copy_sse3_loop + +.copy_is_done: + STACK_FRAME_DESTROY_X3 diff --git a/media/libvpx/vp8/common/x86/idct_blk_mmx.c b/media/libvpx/vp8/common/x86/idct_blk_mmx.c index a1e4ce6b329..f2532b34da2 100644 --- a/media/libvpx/vp8/common/x86/idct_blk_mmx.c +++ b/media/libvpx/vp8/common/x86/idct_blk_mmx.c @@ -36,7 +36,7 @@ void vp8_dequant_idct_add_y_block_mmx else if (eobs[0] == 1) { vp8_dc_only_idct_add_mmx (q[0]*dq[0], dst, stride, dst, stride); - vpx_memset(q, 0, 2 * sizeof(q[0])); + memset(q, 0, 2 * sizeof(q[0])); } if (eobs[1] > 1) @@ -45,7 +45,7 @@ void vp8_dequant_idct_add_y_block_mmx { vp8_dc_only_idct_add_mmx (q[16]*dq[0], dst+4, stride, dst+4, stride); - vpx_memset(q + 16, 0, 2 * sizeof(q[0])); + memset(q + 16, 0, 2 * sizeof(q[0])); } if (eobs[2] > 1) @@ -54,7 +54,7 @@ void vp8_dequant_idct_add_y_block_mmx { vp8_dc_only_idct_add_mmx (q[32]*dq[0], dst+8, stride, dst+8, stride); - vpx_memset(q + 32, 0, 2 * sizeof(q[0])); + memset(q + 32, 0, 2 * sizeof(q[0])); } if (eobs[3] > 1) @@ -63,7 +63,7 @@ void vp8_dequant_idct_add_y_block_mmx { vp8_dc_only_idct_add_mmx (q[48]*dq[0], dst+12, stride, dst+12, stride); - vpx_memset(q + 48, 0, 2 * sizeof(q[0])); + memset(q + 48, 0, 2 * sizeof(q[0])); } q += 64; @@ -85,7 +85,7 @@ void vp8_dequant_idct_add_uv_block_mmx else if (eobs[0] == 1) { vp8_dc_only_idct_add_mmx (q[0]*dq[0], dstu, stride, dstu, stride); - vpx_memset(q, 0, 2 * sizeof(q[0])); + memset(q, 0, 2 * sizeof(q[0])); } if (eobs[1] > 1) @@ -94,7 +94,7 @@ void vp8_dequant_idct_add_uv_block_mmx { vp8_dc_only_idct_add_mmx (q[16]*dq[0], dstu+4, stride, dstu+4, stride); - vpx_memset(q + 16, 0, 2 * sizeof(q[0])); + memset(q + 16, 0, 2 * sizeof(q[0])); } q += 32; @@ -109,7 +109,7 @@ void vp8_dequant_idct_add_uv_block_mmx else if (eobs[0] == 1) { vp8_dc_only_idct_add_mmx (q[0]*dq[0], dstv, stride, dstv, stride); - vpx_memset(q, 0, 2 * sizeof(q[0])); + memset(q, 0, 2 * sizeof(q[0])); } if (eobs[1] > 1) @@ -118,7 +118,7 @@ void vp8_dequant_idct_add_uv_block_mmx { vp8_dc_only_idct_add_mmx (q[16]*dq[0], dstv+4, stride, dstv+4, stride); - vpx_memset(q + 16, 0, 2 * sizeof(q[0])); + memset(q + 16, 0, 2 * sizeof(q[0])); } q += 32; diff --git a/media/libvpx/vp8/common/x86/sad_sse2.asm b/media/libvpx/vp8/common/x86/sad_sse2.asm deleted file mode 100644 index 8d86abc0758..00000000000 --- a/media/libvpx/vp8/common/x86/sad_sse2.asm +++ /dev/null @@ -1,410 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" - -;unsigned int vp8_sad16x16_wmt( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride) -global sym(vp8_sad16x16_wmt) PRIVATE -sym(vp8_sad16x16_wmt): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 4 - SAVE_XMM 6 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;ref_ptr - - movsxd rax, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;ref_stride - - lea rcx, [rsi+rax*8] - - lea rcx, [rcx+rax*8] - pxor xmm6, xmm6 - -.x16x16sad_wmt_loop: - - movq xmm0, QWORD PTR [rsi] - movq xmm2, QWORD PTR [rsi+8] - - movq xmm1, QWORD PTR [rdi] - movq xmm3, QWORD PTR [rdi+8] - - movq xmm4, QWORD PTR [rsi+rax] - movq xmm5, QWORD PTR [rdi+rdx] - - - punpcklbw xmm0, xmm2 - punpcklbw xmm1, xmm3 - - psadbw xmm0, xmm1 - movq xmm2, QWORD PTR [rsi+rax+8] - - movq xmm3, QWORD PTR [rdi+rdx+8] - lea rsi, [rsi+rax*2] - - lea rdi, [rdi+rdx*2] - punpcklbw xmm4, xmm2 - - punpcklbw xmm5, xmm3 - psadbw xmm4, xmm5 - - paddw xmm6, xmm0 - paddw xmm6, xmm4 - - cmp rsi, rcx - jne .x16x16sad_wmt_loop - - movq xmm0, xmm6 - psrldq xmm6, 8 - - paddw xmm0, xmm6 - movq rax, xmm0 - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -;unsigned int vp8_sad8x16_wmt( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride, -; int max_sad) -global sym(vp8_sad8x16_wmt) PRIVATE -sym(vp8_sad8x16_wmt): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - push rbx - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;ref_ptr - - movsxd rbx, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;ref_stride - - lea rcx, [rsi+rbx*8] - - lea rcx, [rcx+rbx*8] - pxor mm7, mm7 - -.x8x16sad_wmt_loop: - - movq rax, mm7 - cmp eax, arg(4) - ja .x8x16sad_wmt_early_exit - - movq mm0, QWORD PTR [rsi] - movq mm1, QWORD PTR [rdi] - - movq mm2, QWORD PTR [rsi+rbx] - movq mm3, QWORD PTR [rdi+rdx] - - psadbw mm0, mm1 - psadbw mm2, mm3 - - lea rsi, [rsi+rbx*2] - lea rdi, [rdi+rdx*2] - - paddw mm7, mm0 - paddw mm7, mm2 - - cmp rsi, rcx - jne .x8x16sad_wmt_loop - - movq rax, mm7 - -.x8x16sad_wmt_early_exit: - - ; begin epilog - pop rdi - pop rsi - pop rbx - UNSHADOW_ARGS - pop rbp - ret - - -;unsigned int vp8_sad8x8_wmt( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride) -global sym(vp8_sad8x8_wmt) PRIVATE -sym(vp8_sad8x8_wmt): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - push rbx - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;ref_ptr - - movsxd rbx, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;ref_stride - - lea rcx, [rsi+rbx*8] - pxor mm7, mm7 - -.x8x8sad_wmt_loop: - - movq rax, mm7 - cmp eax, arg(4) - ja .x8x8sad_wmt_early_exit - - movq mm0, QWORD PTR [rsi] - movq mm1, QWORD PTR [rdi] - - psadbw mm0, mm1 - lea rsi, [rsi+rbx] - - add rdi, rdx - paddw mm7, mm0 - - cmp rsi, rcx - jne .x8x8sad_wmt_loop - - movq rax, mm7 -.x8x8sad_wmt_early_exit: - - ; begin epilog - pop rdi - pop rsi - pop rbx - UNSHADOW_ARGS - pop rbp - ret - -;unsigned int vp8_sad4x4_wmt( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride) -global sym(vp8_sad4x4_wmt) PRIVATE -sym(vp8_sad4x4_wmt): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 4 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;ref_ptr - - movsxd rax, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;ref_stride - - movd mm0, DWORD PTR [rsi] - movd mm1, DWORD PTR [rdi] - - movd mm2, DWORD PTR [rsi+rax] - movd mm3, DWORD PTR [rdi+rdx] - - punpcklbw mm0, mm2 - punpcklbw mm1, mm3 - - psadbw mm0, mm1 - lea rsi, [rsi+rax*2] - - lea rdi, [rdi+rdx*2] - movd mm4, DWORD PTR [rsi] - - movd mm5, DWORD PTR [rdi] - movd mm6, DWORD PTR [rsi+rax] - - movd mm7, DWORD PTR [rdi+rdx] - punpcklbw mm4, mm6 - - punpcklbw mm5, mm7 - psadbw mm4, mm5 - - paddw mm0, mm4 - movq rax, mm0 - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - - -;unsigned int vp8_sad16x8_wmt( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride) -global sym(vp8_sad16x8_wmt) PRIVATE -sym(vp8_sad16x8_wmt): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - push rbx - push rsi - push rdi - ; end prolog - - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;ref_ptr - - movsxd rbx, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;ref_stride - - lea rcx, [rsi+rbx*8] - pxor mm7, mm7 - -.x16x8sad_wmt_loop: - - movq rax, mm7 - cmp eax, arg(4) - ja .x16x8sad_wmt_early_exit - - movq mm0, QWORD PTR [rsi] - movq mm2, QWORD PTR [rsi+8] - - movq mm1, QWORD PTR [rdi] - movq mm3, QWORD PTR [rdi+8] - - movq mm4, QWORD PTR [rsi+rbx] - movq mm5, QWORD PTR [rdi+rdx] - - psadbw mm0, mm1 - psadbw mm2, mm3 - - movq mm1, QWORD PTR [rsi+rbx+8] - movq mm3, QWORD PTR [rdi+rdx+8] - - psadbw mm4, mm5 - psadbw mm1, mm3 - - lea rsi, [rsi+rbx*2] - lea rdi, [rdi+rdx*2] - - paddw mm0, mm2 - paddw mm4, mm1 - - paddw mm7, mm0 - paddw mm7, mm4 - - cmp rsi, rcx - jne .x16x8sad_wmt_loop - - movq rax, mm7 - -.x16x8sad_wmt_early_exit: - - ; begin epilog - pop rdi - pop rsi - pop rbx - UNSHADOW_ARGS - pop rbp - ret - -;void vp8_copy32xn_sse2( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *dst_ptr, -; int dst_stride, -; int height); -global sym(vp8_copy32xn_sse2) PRIVATE -sym(vp8_copy32xn_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;dst_ptr - - movsxd rax, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;dst_stride - movsxd rcx, dword ptr arg(4) ;height - -.block_copy_sse2_loopx4: - movdqu xmm0, XMMWORD PTR [rsi] - movdqu xmm1, XMMWORD PTR [rsi + 16] - movdqu xmm2, XMMWORD PTR [rsi + rax] - movdqu xmm3, XMMWORD PTR [rsi + rax + 16] - - lea rsi, [rsi+rax*2] - - movdqu xmm4, XMMWORD PTR [rsi] - movdqu xmm5, XMMWORD PTR [rsi + 16] - movdqu xmm6, XMMWORD PTR [rsi + rax] - movdqu xmm7, XMMWORD PTR [rsi + rax + 16] - - lea rsi, [rsi+rax*2] - - movdqa XMMWORD PTR [rdi], xmm0 - movdqa XMMWORD PTR [rdi + 16], xmm1 - movdqa XMMWORD PTR [rdi + rdx], xmm2 - movdqa XMMWORD PTR [rdi + rdx + 16], xmm3 - - lea rdi, [rdi+rdx*2] - - movdqa XMMWORD PTR [rdi], xmm4 - movdqa XMMWORD PTR [rdi + 16], xmm5 - movdqa XMMWORD PTR [rdi + rdx], xmm6 - movdqa XMMWORD PTR [rdi + rdx + 16], xmm7 - - lea rdi, [rdi+rdx*2] - - sub rcx, 4 - cmp rcx, 4 - jge .block_copy_sse2_loopx4 - - cmp rcx, 0 - je .copy_is_done - -.block_copy_sse2_loop: - movdqu xmm0, XMMWORD PTR [rsi] - movdqu xmm1, XMMWORD PTR [rsi + 16] - lea rsi, [rsi+rax] - - movdqa XMMWORD PTR [rdi], xmm0 - movdqa XMMWORD PTR [rdi + 16], xmm1 - lea rdi, [rdi+rdx] - - sub rcx, 1 - jne .block_copy_sse2_loop - -.copy_is_done: - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret diff --git a/media/libvpx/vp8/common/x86/sad_sse3.asm b/media/libvpx/vp8/common/x86/sad_sse3.asm deleted file mode 100644 index 69c8d376973..00000000000 --- a/media/libvpx/vp8/common/x86/sad_sse3.asm +++ /dev/null @@ -1,960 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - -%include "vpx_ports/x86_abi_support.asm" - -%macro STACK_FRAME_CREATE_X3 0 -%if ABI_IS_32BIT - %define src_ptr rsi - %define src_stride rax - %define ref_ptr rdi - %define ref_stride rdx - %define end_ptr rcx - %define ret_var rbx - %define result_ptr arg(4) - %define max_sad arg(4) - %define height dword ptr arg(4) - push rbp - mov rbp, rsp - push rsi - push rdi - push rbx - - mov rsi, arg(0) ; src_ptr - mov rdi, arg(2) ; ref_ptr - - movsxd rax, dword ptr arg(1) ; src_stride - movsxd rdx, dword ptr arg(3) ; ref_stride -%else - %if LIBVPX_YASM_WIN64 - SAVE_XMM 7, u - %define src_ptr rcx - %define src_stride rdx - %define ref_ptr r8 - %define ref_stride r9 - %define end_ptr r10 - %define ret_var r11 - %define result_ptr [rsp+xmm_stack_space+8+4*8] - %define max_sad [rsp+xmm_stack_space+8+4*8] - %define height dword ptr [rsp+xmm_stack_space+8+4*8] - %else - %define src_ptr rdi - %define src_stride rsi - %define ref_ptr rdx - %define ref_stride rcx - %define end_ptr r9 - %define ret_var r10 - %define result_ptr r8 - %define max_sad r8 - %define height r8 - %endif -%endif - -%endmacro - -%macro STACK_FRAME_DESTROY_X3 0 - %define src_ptr - %define src_stride - %define ref_ptr - %define ref_stride - %define end_ptr - %define ret_var - %define result_ptr - %define max_sad - %define height - -%if ABI_IS_32BIT - pop rbx - pop rdi - pop rsi - pop rbp -%else - %if LIBVPX_YASM_WIN64 - RESTORE_XMM - %endif -%endif - ret -%endmacro - -%macro STACK_FRAME_CREATE_X4 0 -%if ABI_IS_32BIT - %define src_ptr rsi - %define src_stride rax - %define r0_ptr rcx - %define r1_ptr rdx - %define r2_ptr rbx - %define r3_ptr rdi - %define ref_stride rbp - %define result_ptr arg(4) - push rbp - mov rbp, rsp - push rsi - push rdi - push rbx - - push rbp - mov rdi, arg(2) ; ref_ptr_base - - LOAD_X4_ADDRESSES rdi, rcx, rdx, rax, rdi - - mov rsi, arg(0) ; src_ptr - - movsxd rbx, dword ptr arg(1) ; src_stride - movsxd rbp, dword ptr arg(3) ; ref_stride - - xchg rbx, rax -%else - %if LIBVPX_YASM_WIN64 - SAVE_XMM 7, u - %define src_ptr rcx - %define src_stride rdx - %define r0_ptr rsi - %define r1_ptr r10 - %define r2_ptr r11 - %define r3_ptr r8 - %define ref_stride r9 - %define result_ptr [rsp+xmm_stack_space+16+4*8] - push rsi - - LOAD_X4_ADDRESSES r8, r0_ptr, r1_ptr, r2_ptr, r3_ptr - %else - %define src_ptr rdi - %define src_stride rsi - %define r0_ptr r9 - %define r1_ptr r10 - %define r2_ptr r11 - %define r3_ptr rdx - %define ref_stride rcx - %define result_ptr r8 - - LOAD_X4_ADDRESSES rdx, r0_ptr, r1_ptr, r2_ptr, r3_ptr - - %endif -%endif -%endmacro - -%macro STACK_FRAME_DESTROY_X4 0 - %define src_ptr - %define src_stride - %define r0_ptr - %define r1_ptr - %define r2_ptr - %define r3_ptr - %define ref_stride - %define result_ptr - -%if ABI_IS_32BIT - pop rbx - pop rdi - pop rsi - pop rbp -%else - %if LIBVPX_YASM_WIN64 - pop rsi - RESTORE_XMM - %endif -%endif - ret -%endmacro - -%macro PROCESS_16X2X3 5 -%if %1==0 - movdqa xmm0, XMMWORD PTR [%2] - lddqu xmm5, XMMWORD PTR [%3] - lddqu xmm6, XMMWORD PTR [%3+1] - lddqu xmm7, XMMWORD PTR [%3+2] - - psadbw xmm5, xmm0 - psadbw xmm6, xmm0 - psadbw xmm7, xmm0 -%else - movdqa xmm0, XMMWORD PTR [%2] - lddqu xmm1, XMMWORD PTR [%3] - lddqu xmm2, XMMWORD PTR [%3+1] - lddqu xmm3, XMMWORD PTR [%3+2] - - psadbw xmm1, xmm0 - psadbw xmm2, xmm0 - psadbw xmm3, xmm0 - - paddw xmm5, xmm1 - paddw xmm6, xmm2 - paddw xmm7, xmm3 -%endif - movdqa xmm0, XMMWORD PTR [%2+%4] - lddqu xmm1, XMMWORD PTR [%3+%5] - lddqu xmm2, XMMWORD PTR [%3+%5+1] - lddqu xmm3, XMMWORD PTR [%3+%5+2] - -%if %1==0 || %1==1 - lea %2, [%2+%4*2] - lea %3, [%3+%5*2] -%endif - - psadbw xmm1, xmm0 - psadbw xmm2, xmm0 - psadbw xmm3, xmm0 - - paddw xmm5, xmm1 - paddw xmm6, xmm2 - paddw xmm7, xmm3 -%endmacro - -%macro PROCESS_8X2X3 5 -%if %1==0 - movq mm0, QWORD PTR [%2] - movq mm5, QWORD PTR [%3] - movq mm6, QWORD PTR [%3+1] - movq mm7, QWORD PTR [%3+2] - - psadbw mm5, mm0 - psadbw mm6, mm0 - psadbw mm7, mm0 -%else - movq mm0, QWORD PTR [%2] - movq mm1, QWORD PTR [%3] - movq mm2, QWORD PTR [%3+1] - movq mm3, QWORD PTR [%3+2] - - psadbw mm1, mm0 - psadbw mm2, mm0 - psadbw mm3, mm0 - - paddw mm5, mm1 - paddw mm6, mm2 - paddw mm7, mm3 -%endif - movq mm0, QWORD PTR [%2+%4] - movq mm1, QWORD PTR [%3+%5] - movq mm2, QWORD PTR [%3+%5+1] - movq mm3, QWORD PTR [%3+%5+2] - -%if %1==0 || %1==1 - lea %2, [%2+%4*2] - lea %3, [%3+%5*2] -%endif - - psadbw mm1, mm0 - psadbw mm2, mm0 - psadbw mm3, mm0 - - paddw mm5, mm1 - paddw mm6, mm2 - paddw mm7, mm3 -%endmacro - -%macro LOAD_X4_ADDRESSES 5 - mov %2, [%1+REG_SZ_BYTES*0] - mov %3, [%1+REG_SZ_BYTES*1] - - mov %4, [%1+REG_SZ_BYTES*2] - mov %5, [%1+REG_SZ_BYTES*3] -%endmacro - -%macro PROCESS_16X2X4 8 -%if %1==0 - movdqa xmm0, XMMWORD PTR [%2] - lddqu xmm4, XMMWORD PTR [%3] - lddqu xmm5, XMMWORD PTR [%4] - lddqu xmm6, XMMWORD PTR [%5] - lddqu xmm7, XMMWORD PTR [%6] - - psadbw xmm4, xmm0 - psadbw xmm5, xmm0 - psadbw xmm6, xmm0 - psadbw xmm7, xmm0 -%else - movdqa xmm0, XMMWORD PTR [%2] - lddqu xmm1, XMMWORD PTR [%3] - lddqu xmm2, XMMWORD PTR [%4] - lddqu xmm3, XMMWORD PTR [%5] - - psadbw xmm1, xmm0 - psadbw xmm2, xmm0 - psadbw xmm3, xmm0 - - paddw xmm4, xmm1 - lddqu xmm1, XMMWORD PTR [%6] - paddw xmm5, xmm2 - paddw xmm6, xmm3 - - psadbw xmm1, xmm0 - paddw xmm7, xmm1 -%endif - movdqa xmm0, XMMWORD PTR [%2+%7] - lddqu xmm1, XMMWORD PTR [%3+%8] - lddqu xmm2, XMMWORD PTR [%4+%8] - lddqu xmm3, XMMWORD PTR [%5+%8] - - psadbw xmm1, xmm0 - psadbw xmm2, xmm0 - psadbw xmm3, xmm0 - - paddw xmm4, xmm1 - lddqu xmm1, XMMWORD PTR [%6+%8] - paddw xmm5, xmm2 - paddw xmm6, xmm3 - -%if %1==0 || %1==1 - lea %2, [%2+%7*2] - lea %3, [%3+%8*2] - - lea %4, [%4+%8*2] - lea %5, [%5+%8*2] - - lea %6, [%6+%8*2] -%endif - psadbw xmm1, xmm0 - paddw xmm7, xmm1 - -%endmacro - -%macro PROCESS_8X2X4 8 -%if %1==0 - movq mm0, QWORD PTR [%2] - movq mm4, QWORD PTR [%3] - movq mm5, QWORD PTR [%4] - movq mm6, QWORD PTR [%5] - movq mm7, QWORD PTR [%6] - - psadbw mm4, mm0 - psadbw mm5, mm0 - psadbw mm6, mm0 - psadbw mm7, mm0 -%else - movq mm0, QWORD PTR [%2] - movq mm1, QWORD PTR [%3] - movq mm2, QWORD PTR [%4] - movq mm3, QWORD PTR [%5] - - psadbw mm1, mm0 - psadbw mm2, mm0 - psadbw mm3, mm0 - - paddw mm4, mm1 - movq mm1, QWORD PTR [%6] - paddw mm5, mm2 - paddw mm6, mm3 - - psadbw mm1, mm0 - paddw mm7, mm1 -%endif - movq mm0, QWORD PTR [%2+%7] - movq mm1, QWORD PTR [%3+%8] - movq mm2, QWORD PTR [%4+%8] - movq mm3, QWORD PTR [%5+%8] - - psadbw mm1, mm0 - psadbw mm2, mm0 - psadbw mm3, mm0 - - paddw mm4, mm1 - movq mm1, QWORD PTR [%6+%8] - paddw mm5, mm2 - paddw mm6, mm3 - -%if %1==0 || %1==1 - lea %2, [%2+%7*2] - lea %3, [%3+%8*2] - - lea %4, [%4+%8*2] - lea %5, [%5+%8*2] - - lea %6, [%6+%8*2] -%endif - psadbw mm1, mm0 - paddw mm7, mm1 - -%endmacro - -;void int vp8_sad16x16x3_sse3( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride, -; int *results) -global sym(vp8_sad16x16x3_sse3) PRIVATE -sym(vp8_sad16x16x3_sse3): - - STACK_FRAME_CREATE_X3 - - PROCESS_16X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride - PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride - PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride - PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride - PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride - PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride - PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride - PROCESS_16X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride - - mov rcx, result_ptr - - movq xmm0, xmm5 - psrldq xmm5, 8 - - paddw xmm0, xmm5 - movd [rcx], xmm0 -;- - movq xmm0, xmm6 - psrldq xmm6, 8 - - paddw xmm0, xmm6 - movd [rcx+4], xmm0 -;- - movq xmm0, xmm7 - psrldq xmm7, 8 - - paddw xmm0, xmm7 - movd [rcx+8], xmm0 - - STACK_FRAME_DESTROY_X3 - -;void int vp8_sad16x8x3_sse3( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride, -; int *results) -global sym(vp8_sad16x8x3_sse3) PRIVATE -sym(vp8_sad16x8x3_sse3): - - STACK_FRAME_CREATE_X3 - - PROCESS_16X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride - PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride - PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride - PROCESS_16X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride - - mov rcx, result_ptr - - movq xmm0, xmm5 - psrldq xmm5, 8 - - paddw xmm0, xmm5 - movd [rcx], xmm0 -;- - movq xmm0, xmm6 - psrldq xmm6, 8 - - paddw xmm0, xmm6 - movd [rcx+4], xmm0 -;- - movq xmm0, xmm7 - psrldq xmm7, 8 - - paddw xmm0, xmm7 - movd [rcx+8], xmm0 - - STACK_FRAME_DESTROY_X3 - -;void int vp8_sad8x16x3_sse3( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride, -; int *results) -global sym(vp8_sad8x16x3_sse3) PRIVATE -sym(vp8_sad8x16x3_sse3): - - STACK_FRAME_CREATE_X3 - - PROCESS_8X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride - PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride - PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride - PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride - PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride - PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride - PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride - PROCESS_8X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride - - mov rcx, result_ptr - - punpckldq mm5, mm6 - - movq [rcx], mm5 - movd [rcx+8], mm7 - - STACK_FRAME_DESTROY_X3 - -;void int vp8_sad8x8x3_sse3( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride, -; int *results) -global sym(vp8_sad8x8x3_sse3) PRIVATE -sym(vp8_sad8x8x3_sse3): - - STACK_FRAME_CREATE_X3 - - PROCESS_8X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride - PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride - PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride - PROCESS_8X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride - - mov rcx, result_ptr - - punpckldq mm5, mm6 - - movq [rcx], mm5 - movd [rcx+8], mm7 - - STACK_FRAME_DESTROY_X3 - -;void int vp8_sad4x4x3_sse3( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride, -; int *results) -global sym(vp8_sad4x4x3_sse3) PRIVATE -sym(vp8_sad4x4x3_sse3): - - STACK_FRAME_CREATE_X3 - - movd mm0, DWORD PTR [src_ptr] - movd mm1, DWORD PTR [ref_ptr] - - movd mm2, DWORD PTR [src_ptr+src_stride] - movd mm3, DWORD PTR [ref_ptr+ref_stride] - - punpcklbw mm0, mm2 - punpcklbw mm1, mm3 - - movd mm4, DWORD PTR [ref_ptr+1] - movd mm5, DWORD PTR [ref_ptr+2] - - movd mm2, DWORD PTR [ref_ptr+ref_stride+1] - movd mm3, DWORD PTR [ref_ptr+ref_stride+2] - - psadbw mm1, mm0 - - punpcklbw mm4, mm2 - punpcklbw mm5, mm3 - - psadbw mm4, mm0 - psadbw mm5, mm0 - - lea src_ptr, [src_ptr+src_stride*2] - lea ref_ptr, [ref_ptr+ref_stride*2] - - movd mm0, DWORD PTR [src_ptr] - movd mm2, DWORD PTR [ref_ptr] - - movd mm3, DWORD PTR [src_ptr+src_stride] - movd mm6, DWORD PTR [ref_ptr+ref_stride] - - punpcklbw mm0, mm3 - punpcklbw mm2, mm6 - - movd mm3, DWORD PTR [ref_ptr+1] - movd mm7, DWORD PTR [ref_ptr+2] - - psadbw mm2, mm0 - - paddw mm1, mm2 - - movd mm2, DWORD PTR [ref_ptr+ref_stride+1] - movd mm6, DWORD PTR [ref_ptr+ref_stride+2] - - punpcklbw mm3, mm2 - punpcklbw mm7, mm6 - - psadbw mm3, mm0 - psadbw mm7, mm0 - - paddw mm3, mm4 - paddw mm7, mm5 - - mov rcx, result_ptr - - punpckldq mm1, mm3 - - movq [rcx], mm1 - movd [rcx+8], mm7 - - STACK_FRAME_DESTROY_X3 - -;unsigned int vp8_sad16x16_sse3( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride, -; int max_sad) -;%define lddqu movdqu -global sym(vp8_sad16x16_sse3) PRIVATE -sym(vp8_sad16x16_sse3): - - STACK_FRAME_CREATE_X3 - - mov end_ptr, 4 - pxor xmm7, xmm7 - -.vp8_sad16x16_sse3_loop: - movdqa xmm0, XMMWORD PTR [src_ptr] - movdqu xmm1, XMMWORD PTR [ref_ptr] - movdqa xmm2, XMMWORD PTR [src_ptr+src_stride] - movdqu xmm3, XMMWORD PTR [ref_ptr+ref_stride] - - lea src_ptr, [src_ptr+src_stride*2] - lea ref_ptr, [ref_ptr+ref_stride*2] - - movdqa xmm4, XMMWORD PTR [src_ptr] - movdqu xmm5, XMMWORD PTR [ref_ptr] - movdqa xmm6, XMMWORD PTR [src_ptr+src_stride] - - psadbw xmm0, xmm1 - - movdqu xmm1, XMMWORD PTR [ref_ptr+ref_stride] - - psadbw xmm2, xmm3 - psadbw xmm4, xmm5 - psadbw xmm6, xmm1 - - lea src_ptr, [src_ptr+src_stride*2] - lea ref_ptr, [ref_ptr+ref_stride*2] - - paddw xmm7, xmm0 - paddw xmm7, xmm2 - paddw xmm7, xmm4 - paddw xmm7, xmm6 - - sub end_ptr, 1 - jne .vp8_sad16x16_sse3_loop - - movq xmm0, xmm7 - psrldq xmm7, 8 - paddw xmm0, xmm7 - movq rax, xmm0 - - STACK_FRAME_DESTROY_X3 - -;void vp8_copy32xn_sse3( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *dst_ptr, -; int dst_stride, -; int height); -global sym(vp8_copy32xn_sse3) PRIVATE -sym(vp8_copy32xn_sse3): - - STACK_FRAME_CREATE_X3 - -.block_copy_sse3_loopx4: - lea end_ptr, [src_ptr+src_stride*2] - - movdqu xmm0, XMMWORD PTR [src_ptr] - movdqu xmm1, XMMWORD PTR [src_ptr + 16] - movdqu xmm2, XMMWORD PTR [src_ptr + src_stride] - movdqu xmm3, XMMWORD PTR [src_ptr + src_stride + 16] - movdqu xmm4, XMMWORD PTR [end_ptr] - movdqu xmm5, XMMWORD PTR [end_ptr + 16] - movdqu xmm6, XMMWORD PTR [end_ptr + src_stride] - movdqu xmm7, XMMWORD PTR [end_ptr + src_stride + 16] - - lea src_ptr, [src_ptr+src_stride*4] - - lea end_ptr, [ref_ptr+ref_stride*2] - - movdqa XMMWORD PTR [ref_ptr], xmm0 - movdqa XMMWORD PTR [ref_ptr + 16], xmm1 - movdqa XMMWORD PTR [ref_ptr + ref_stride], xmm2 - movdqa XMMWORD PTR [ref_ptr + ref_stride + 16], xmm3 - movdqa XMMWORD PTR [end_ptr], xmm4 - movdqa XMMWORD PTR [end_ptr + 16], xmm5 - movdqa XMMWORD PTR [end_ptr + ref_stride], xmm6 - movdqa XMMWORD PTR [end_ptr + ref_stride + 16], xmm7 - - lea ref_ptr, [ref_ptr+ref_stride*4] - - sub height, 4 - cmp height, 4 - jge .block_copy_sse3_loopx4 - - ;Check to see if there is more rows need to be copied. - cmp height, 0 - je .copy_is_done - -.block_copy_sse3_loop: - movdqu xmm0, XMMWORD PTR [src_ptr] - movdqu xmm1, XMMWORD PTR [src_ptr + 16] - lea src_ptr, [src_ptr+src_stride] - - movdqa XMMWORD PTR [ref_ptr], xmm0 - movdqa XMMWORD PTR [ref_ptr + 16], xmm1 - lea ref_ptr, [ref_ptr+ref_stride] - - sub height, 1 - jne .block_copy_sse3_loop - -.copy_is_done: - STACK_FRAME_DESTROY_X3 - -;void vp8_sad16x16x4d_sse3( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr_base, -; int ref_stride, -; int *results) -global sym(vp8_sad16x16x4d_sse3) PRIVATE -sym(vp8_sad16x16x4d_sse3): - - STACK_FRAME_CREATE_X4 - - PROCESS_16X2X4 0, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride - PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride - PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride - PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride - PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride - PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride - PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride - PROCESS_16X2X4 2, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride - -%if ABI_IS_32BIT - pop rbp -%endif - mov rcx, result_ptr - - movq xmm0, xmm4 - psrldq xmm4, 8 - - paddw xmm0, xmm4 - movd [rcx], xmm0 -;- - movq xmm0, xmm5 - psrldq xmm5, 8 - - paddw xmm0, xmm5 - movd [rcx+4], xmm0 -;- - movq xmm0, xmm6 - psrldq xmm6, 8 - - paddw xmm0, xmm6 - movd [rcx+8], xmm0 -;- - movq xmm0, xmm7 - psrldq xmm7, 8 - - paddw xmm0, xmm7 - movd [rcx+12], xmm0 - - STACK_FRAME_DESTROY_X4 - -;void vp8_sad16x8x4d_sse3( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr_base, -; int ref_stride, -; int *results) -global sym(vp8_sad16x8x4d_sse3) PRIVATE -sym(vp8_sad16x8x4d_sse3): - - STACK_FRAME_CREATE_X4 - - PROCESS_16X2X4 0, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride - PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride - PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride - PROCESS_16X2X4 2, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride - -%if ABI_IS_32BIT - pop rbp -%endif - mov rcx, result_ptr - - movq xmm0, xmm4 - psrldq xmm4, 8 - - paddw xmm0, xmm4 - movd [rcx], xmm0 -;- - movq xmm0, xmm5 - psrldq xmm5, 8 - - paddw xmm0, xmm5 - movd [rcx+4], xmm0 -;- - movq xmm0, xmm6 - psrldq xmm6, 8 - - paddw xmm0, xmm6 - movd [rcx+8], xmm0 -;- - movq xmm0, xmm7 - psrldq xmm7, 8 - - paddw xmm0, xmm7 - movd [rcx+12], xmm0 - - STACK_FRAME_DESTROY_X4 - -;void int vp8_sad8x16x4d_sse3( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride, -; int *results) -global sym(vp8_sad8x16x4d_sse3) PRIVATE -sym(vp8_sad8x16x4d_sse3): - - STACK_FRAME_CREATE_X4 - - PROCESS_8X2X4 0, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride - PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride - PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride - PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride - PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride - PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride - PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride - PROCESS_8X2X4 2, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride - -%if ABI_IS_32BIT - pop rbp -%endif - mov rcx, result_ptr - - punpckldq mm4, mm5 - punpckldq mm6, mm7 - - movq [rcx], mm4 - movq [rcx+8], mm6 - - STACK_FRAME_DESTROY_X4 - -;void int vp8_sad8x8x4d_sse3( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride, -; int *results) -global sym(vp8_sad8x8x4d_sse3) PRIVATE -sym(vp8_sad8x8x4d_sse3): - - STACK_FRAME_CREATE_X4 - - PROCESS_8X2X4 0, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride - PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride - PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride - PROCESS_8X2X4 2, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride - -%if ABI_IS_32BIT - pop rbp -%endif - mov rcx, result_ptr - - punpckldq mm4, mm5 - punpckldq mm6, mm7 - - movq [rcx], mm4 - movq [rcx+8], mm6 - - STACK_FRAME_DESTROY_X4 - -;void int vp8_sad4x4x4d_sse3( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride, -; int *results) -global sym(vp8_sad4x4x4d_sse3) PRIVATE -sym(vp8_sad4x4x4d_sse3): - - STACK_FRAME_CREATE_X4 - - movd mm0, DWORD PTR [src_ptr] - movd mm1, DWORD PTR [r0_ptr] - - movd mm2, DWORD PTR [src_ptr+src_stride] - movd mm3, DWORD PTR [r0_ptr+ref_stride] - - punpcklbw mm0, mm2 - punpcklbw mm1, mm3 - - movd mm4, DWORD PTR [r1_ptr] - movd mm5, DWORD PTR [r2_ptr] - - movd mm6, DWORD PTR [r3_ptr] - movd mm2, DWORD PTR [r1_ptr+ref_stride] - - movd mm3, DWORD PTR [r2_ptr+ref_stride] - movd mm7, DWORD PTR [r3_ptr+ref_stride] - - psadbw mm1, mm0 - - punpcklbw mm4, mm2 - punpcklbw mm5, mm3 - - punpcklbw mm6, mm7 - psadbw mm4, mm0 - - psadbw mm5, mm0 - psadbw mm6, mm0 - - - - lea src_ptr, [src_ptr+src_stride*2] - lea r0_ptr, [r0_ptr+ref_stride*2] - - lea r1_ptr, [r1_ptr+ref_stride*2] - lea r2_ptr, [r2_ptr+ref_stride*2] - - lea r3_ptr, [r3_ptr+ref_stride*2] - - movd mm0, DWORD PTR [src_ptr] - movd mm2, DWORD PTR [r0_ptr] - - movd mm3, DWORD PTR [src_ptr+src_stride] - movd mm7, DWORD PTR [r0_ptr+ref_stride] - - punpcklbw mm0, mm3 - punpcklbw mm2, mm7 - - movd mm3, DWORD PTR [r1_ptr] - movd mm7, DWORD PTR [r2_ptr] - - psadbw mm2, mm0 -%if ABI_IS_32BIT - mov rax, rbp - - pop rbp -%define ref_stride rax -%endif - mov rsi, result_ptr - - paddw mm1, mm2 - movd [rsi], mm1 - - movd mm2, DWORD PTR [r1_ptr+ref_stride] - movd mm1, DWORD PTR [r2_ptr+ref_stride] - - punpcklbw mm3, mm2 - punpcklbw mm7, mm1 - - psadbw mm3, mm0 - psadbw mm7, mm0 - - movd mm2, DWORD PTR [r3_ptr] - movd mm1, DWORD PTR [r3_ptr+ref_stride] - - paddw mm3, mm4 - paddw mm7, mm5 - - movd [rsi+4], mm3 - punpcklbw mm2, mm1 - - movd [rsi+8], mm7 - psadbw mm2, mm0 - - paddw mm2, mm6 - movd [rsi+12], mm2 - - - STACK_FRAME_DESTROY_X4 - diff --git a/media/libvpx/vp8/common/x86/sad_sse4.asm b/media/libvpx/vp8/common/x86/sad_sse4.asm deleted file mode 100644 index f7fccd77c58..00000000000 --- a/media/libvpx/vp8/common/x86/sad_sse4.asm +++ /dev/null @@ -1,353 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" - -%macro PROCESS_16X2X8 1 -%if %1 - movdqa xmm0, XMMWORD PTR [rsi] - movq xmm1, MMWORD PTR [rdi] - movq xmm3, MMWORD PTR [rdi+8] - movq xmm2, MMWORD PTR [rdi+16] - punpcklqdq xmm1, xmm3 - punpcklqdq xmm3, xmm2 - - movdqa xmm2, xmm1 - mpsadbw xmm1, xmm0, 0x0 - mpsadbw xmm2, xmm0, 0x5 - - psrldq xmm0, 8 - - movdqa xmm4, xmm3 - mpsadbw xmm3, xmm0, 0x0 - mpsadbw xmm4, xmm0, 0x5 - - paddw xmm1, xmm2 - paddw xmm1, xmm3 - paddw xmm1, xmm4 -%else - movdqa xmm0, XMMWORD PTR [rsi] - movq xmm5, MMWORD PTR [rdi] - movq xmm3, MMWORD PTR [rdi+8] - movq xmm2, MMWORD PTR [rdi+16] - punpcklqdq xmm5, xmm3 - punpcklqdq xmm3, xmm2 - - movdqa xmm2, xmm5 - mpsadbw xmm5, xmm0, 0x0 - mpsadbw xmm2, xmm0, 0x5 - - psrldq xmm0, 8 - - movdqa xmm4, xmm3 - mpsadbw xmm3, xmm0, 0x0 - mpsadbw xmm4, xmm0, 0x5 - - paddw xmm5, xmm2 - paddw xmm5, xmm3 - paddw xmm5, xmm4 - - paddw xmm1, xmm5 -%endif - movdqa xmm0, XMMWORD PTR [rsi + rax] - movq xmm5, MMWORD PTR [rdi+ rdx] - movq xmm3, MMWORD PTR [rdi+ rdx+8] - movq xmm2, MMWORD PTR [rdi+ rdx+16] - punpcklqdq xmm5, xmm3 - punpcklqdq xmm3, xmm2 - - lea rsi, [rsi+rax*2] - lea rdi, [rdi+rdx*2] - - movdqa xmm2, xmm5 - mpsadbw xmm5, xmm0, 0x0 - mpsadbw xmm2, xmm0, 0x5 - - psrldq xmm0, 8 - movdqa xmm4, xmm3 - mpsadbw xmm3, xmm0, 0x0 - mpsadbw xmm4, xmm0, 0x5 - - paddw xmm5, xmm2 - paddw xmm5, xmm3 - paddw xmm5, xmm4 - - paddw xmm1, xmm5 -%endmacro - -%macro PROCESS_8X2X8 1 -%if %1 - movq xmm0, MMWORD PTR [rsi] - movq xmm1, MMWORD PTR [rdi] - movq xmm3, MMWORD PTR [rdi+8] - punpcklqdq xmm1, xmm3 - - movdqa xmm2, xmm1 - mpsadbw xmm1, xmm0, 0x0 - mpsadbw xmm2, xmm0, 0x5 - paddw xmm1, xmm2 -%else - movq xmm0, MMWORD PTR [rsi] - movq xmm5, MMWORD PTR [rdi] - movq xmm3, MMWORD PTR [rdi+8] - punpcklqdq xmm5, xmm3 - - movdqa xmm2, xmm5 - mpsadbw xmm5, xmm0, 0x0 - mpsadbw xmm2, xmm0, 0x5 - paddw xmm5, xmm2 - - paddw xmm1, xmm5 -%endif - movq xmm0, MMWORD PTR [rsi + rax] - movq xmm5, MMWORD PTR [rdi+ rdx] - movq xmm3, MMWORD PTR [rdi+ rdx+8] - punpcklqdq xmm5, xmm3 - - lea rsi, [rsi+rax*2] - lea rdi, [rdi+rdx*2] - - movdqa xmm2, xmm5 - mpsadbw xmm5, xmm0, 0x0 - mpsadbw xmm2, xmm0, 0x5 - paddw xmm5, xmm2 - - paddw xmm1, xmm5 -%endmacro - -%macro PROCESS_4X2X8 1 -%if %1 - movd xmm0, [rsi] - movq xmm1, MMWORD PTR [rdi] - movq xmm3, MMWORD PTR [rdi+8] - punpcklqdq xmm1, xmm3 - - mpsadbw xmm1, xmm0, 0x0 -%else - movd xmm0, [rsi] - movq xmm5, MMWORD PTR [rdi] - movq xmm3, MMWORD PTR [rdi+8] - punpcklqdq xmm5, xmm3 - - mpsadbw xmm5, xmm0, 0x0 - - paddw xmm1, xmm5 -%endif - movd xmm0, [rsi + rax] - movq xmm5, MMWORD PTR [rdi+ rdx] - movq xmm3, MMWORD PTR [rdi+ rdx+8] - punpcklqdq xmm5, xmm3 - - lea rsi, [rsi+rax*2] - lea rdi, [rdi+rdx*2] - - mpsadbw xmm5, xmm0, 0x0 - - paddw xmm1, xmm5 -%endmacro - - -;void vp8_sad16x16x8_sse4( -; const unsigned char *src_ptr, -; int src_stride, -; const unsigned char *ref_ptr, -; int ref_stride, -; unsigned short *sad_array); -global sym(vp8_sad16x16x8_sse4) PRIVATE -sym(vp8_sad16x16x8_sse4): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;ref_ptr - - movsxd rax, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;ref_stride - - PROCESS_16X2X8 1 - PROCESS_16X2X8 0 - PROCESS_16X2X8 0 - PROCESS_16X2X8 0 - PROCESS_16X2X8 0 - PROCESS_16X2X8 0 - PROCESS_16X2X8 0 - PROCESS_16X2X8 0 - - mov rdi, arg(4) ;Results - movdqa XMMWORD PTR [rdi], xmm1 - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - - -;void vp8_sad16x8x8_sse4( -; const unsigned char *src_ptr, -; int src_stride, -; const unsigned char *ref_ptr, -; int ref_stride, -; unsigned short *sad_array -;); -global sym(vp8_sad16x8x8_sse4) PRIVATE -sym(vp8_sad16x8x8_sse4): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;ref_ptr - - movsxd rax, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;ref_stride - - PROCESS_16X2X8 1 - PROCESS_16X2X8 0 - PROCESS_16X2X8 0 - PROCESS_16X2X8 0 - - mov rdi, arg(4) ;Results - movdqa XMMWORD PTR [rdi], xmm1 - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - - -;void vp8_sad8x8x8_sse4( -; const unsigned char *src_ptr, -; int src_stride, -; const unsigned char *ref_ptr, -; int ref_stride, -; unsigned short *sad_array -;); -global sym(vp8_sad8x8x8_sse4) PRIVATE -sym(vp8_sad8x8x8_sse4): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;ref_ptr - - movsxd rax, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;ref_stride - - PROCESS_8X2X8 1 - PROCESS_8X2X8 0 - PROCESS_8X2X8 0 - PROCESS_8X2X8 0 - - mov rdi, arg(4) ;Results - movdqa XMMWORD PTR [rdi], xmm1 - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - - -;void vp8_sad8x16x8_sse4( -; const unsigned char *src_ptr, -; int src_stride, -; const unsigned char *ref_ptr, -; int ref_stride, -; unsigned short *sad_array -;); -global sym(vp8_sad8x16x8_sse4) PRIVATE -sym(vp8_sad8x16x8_sse4): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;ref_ptr - - movsxd rax, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;ref_stride - - PROCESS_8X2X8 1 - PROCESS_8X2X8 0 - PROCESS_8X2X8 0 - PROCESS_8X2X8 0 - PROCESS_8X2X8 0 - PROCESS_8X2X8 0 - PROCESS_8X2X8 0 - PROCESS_8X2X8 0 - mov rdi, arg(4) ;Results - movdqa XMMWORD PTR [rdi], xmm1 - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - - -;void vp8_sad4x4x8_c( -; const unsigned char *src_ptr, -; int src_stride, -; const unsigned char *ref_ptr, -; int ref_stride, -; unsigned short *sad_array -;); -global sym(vp8_sad4x4x8_sse4) PRIVATE -sym(vp8_sad4x4x8_sse4): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;ref_ptr - - movsxd rax, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;ref_stride - - PROCESS_4X2X8 1 - PROCESS_4X2X8 0 - - mov rdi, arg(4) ;Results - movdqa XMMWORD PTR [rdi], xmm1 - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - - - - diff --git a/media/libvpx/vp8/common/x86/variance_impl_sse2.asm b/media/libvpx/vp8/common/x86/variance_impl_sse2.asm index 761433c11ea..26de5e86097 100644 --- a/media/libvpx/vp8/common/x86/variance_impl_sse2.asm +++ b/media/libvpx/vp8/common/x86/variance_impl_sse2.asm @@ -13,393 +13,6 @@ %define xmm_filter_shift 7 -;unsigned int vp8_get_mb_ss_sse2 -;( -; short *src_ptr -;) -global sym(vp8_get_mb_ss_sse2) PRIVATE -sym(vp8_get_mb_ss_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 1 - GET_GOT rbx - push rsi - push rdi - sub rsp, 16 - ; end prolog - - - mov rax, arg(0) ;[src_ptr] - mov rcx, 8 - pxor xmm4, xmm4 - -.NEXTROW: - movdqa xmm0, [rax] - movdqa xmm1, [rax+16] - movdqa xmm2, [rax+32] - movdqa xmm3, [rax+48] - pmaddwd xmm0, xmm0 - pmaddwd xmm1, xmm1 - pmaddwd xmm2, xmm2 - pmaddwd xmm3, xmm3 - - paddd xmm0, xmm1 - paddd xmm2, xmm3 - paddd xmm4, xmm0 - paddd xmm4, xmm2 - - add rax, 0x40 - dec rcx - ja .NEXTROW - - movdqa xmm3,xmm4 - psrldq xmm4,8 - paddd xmm4,xmm3 - movdqa xmm3,xmm4 - psrldq xmm4,4 - paddd xmm4,xmm3 - movq rax,xmm4 - - - ; begin epilog - add rsp, 16 - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - - -;unsigned int vp8_get16x16var_sse2 -;( -; unsigned char * src_ptr, -; int source_stride, -; unsigned char * ref_ptr, -; int recon_stride, -; unsigned int * SSE, -; int * Sum -;) -global sym(vp8_get16x16var_sse2) PRIVATE -sym(vp8_get16x16var_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rbx - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;[src_ptr] - mov rdi, arg(2) ;[ref_ptr] - - movsxd rax, DWORD PTR arg(1) ;[source_stride] - movsxd rdx, DWORD PTR arg(3) ;[recon_stride] - - ; Prefetch data - lea rcx, [rax+rax*2] - prefetcht0 [rsi] - prefetcht0 [rsi+rax] - prefetcht0 [rsi+rax*2] - prefetcht0 [rsi+rcx] - lea rbx, [rsi+rax*4] - prefetcht0 [rbx] - prefetcht0 [rbx+rax] - prefetcht0 [rbx+rax*2] - prefetcht0 [rbx+rcx] - - lea rcx, [rdx+rdx*2] - prefetcht0 [rdi] - prefetcht0 [rdi+rdx] - prefetcht0 [rdi+rdx*2] - prefetcht0 [rdi+rcx] - lea rbx, [rdi+rdx*4] - prefetcht0 [rbx] - prefetcht0 [rbx+rdx] - prefetcht0 [rbx+rdx*2] - prefetcht0 [rbx+rcx] - - pxor xmm0, xmm0 ; clear xmm0 for unpack - pxor xmm7, xmm7 ; clear xmm7 for accumulating diffs - - pxor xmm6, xmm6 ; clear xmm6 for accumulating sse - mov rcx, 16 - -.var16loop: - movdqu xmm1, XMMWORD PTR [rsi] - movdqu xmm2, XMMWORD PTR [rdi] - - prefetcht0 [rsi+rax*8] - prefetcht0 [rdi+rdx*8] - - movdqa xmm3, xmm1 - movdqa xmm4, xmm2 - - - punpcklbw xmm1, xmm0 - punpckhbw xmm3, xmm0 - - punpcklbw xmm2, xmm0 - punpckhbw xmm4, xmm0 - - - psubw xmm1, xmm2 - psubw xmm3, xmm4 - - paddw xmm7, xmm1 - pmaddwd xmm1, xmm1 - - paddw xmm7, xmm3 - pmaddwd xmm3, xmm3 - - paddd xmm6, xmm1 - paddd xmm6, xmm3 - - add rsi, rax - add rdi, rdx - - sub rcx, 1 - jnz .var16loop - - - movdqa xmm1, xmm6 - pxor xmm6, xmm6 - - pxor xmm5, xmm5 - punpcklwd xmm6, xmm7 - - punpckhwd xmm5, xmm7 - psrad xmm5, 16 - - psrad xmm6, 16 - paddd xmm6, xmm5 - - movdqa xmm2, xmm1 - punpckldq xmm1, xmm0 - - punpckhdq xmm2, xmm0 - movdqa xmm7, xmm6 - - paddd xmm1, xmm2 - punpckldq xmm6, xmm0 - - punpckhdq xmm7, xmm0 - paddd xmm6, xmm7 - - movdqa xmm2, xmm1 - movdqa xmm7, xmm6 - - psrldq xmm1, 8 - psrldq xmm6, 8 - - paddd xmm7, xmm6 - paddd xmm1, xmm2 - - mov rax, arg(5) ;[Sum] - mov rdi, arg(4) ;[SSE] - - movd DWORD PTR [rax], xmm7 - movd DWORD PTR [rdi], xmm1 - - - ; begin epilog - pop rdi - pop rsi - pop rbx - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - - - - -;unsigned int vp8_get8x8var_sse2 -;( -; unsigned char * src_ptr, -; int source_stride, -; unsigned char * ref_ptr, -; int recon_stride, -; unsigned int * SSE, -; int * Sum -;) -global sym(vp8_get8x8var_sse2) PRIVATE -sym(vp8_get8x8var_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - sub rsp, 16 - ; end prolog - - mov rsi, arg(0) ;[src_ptr] - mov rdi, arg(2) ;[ref_ptr] - - movsxd rax, DWORD PTR arg(1) ;[source_stride] - movsxd rdx, DWORD PTR arg(3) ;[recon_stride] - - pxor xmm0, xmm0 ; clear xmm0 for unpack - pxor xmm7, xmm7 ; clear xmm7 for accumulating diffs - - movq xmm1, QWORD PTR [rsi] - movq xmm2, QWORD PTR [rdi] - - punpcklbw xmm1, xmm0 - punpcklbw xmm2, xmm0 - - psubsw xmm1, xmm2 - paddw xmm7, xmm1 - - pmaddwd xmm1, xmm1 - - movq xmm2, QWORD PTR[rsi + rax] - movq xmm3, QWORD PTR[rdi + rdx] - - punpcklbw xmm2, xmm0 - punpcklbw xmm3, xmm0 - - psubsw xmm2, xmm3 - paddw xmm7, xmm2 - - pmaddwd xmm2, xmm2 - paddd xmm1, xmm2 - - - movq xmm2, QWORD PTR[rsi + rax * 2] - movq xmm3, QWORD PTR[rdi + rdx * 2] - - punpcklbw xmm2, xmm0 - punpcklbw xmm3, xmm0 - - psubsw xmm2, xmm3 - paddw xmm7, xmm2 - - pmaddwd xmm2, xmm2 - paddd xmm1, xmm2 - - - lea rsi, [rsi + rax * 2] - lea rdi, [rdi + rdx * 2] - movq xmm2, QWORD PTR[rsi + rax] - movq xmm3, QWORD PTR[rdi + rdx] - - punpcklbw xmm2, xmm0 - punpcklbw xmm3, xmm0 - - psubsw xmm2, xmm3 - paddw xmm7, xmm2 - - pmaddwd xmm2, xmm2 - paddd xmm1, xmm2 - - movq xmm2, QWORD PTR[rsi + rax *2] - movq xmm3, QWORD PTR[rdi + rdx *2] - - punpcklbw xmm2, xmm0 - punpcklbw xmm3, xmm0 - - psubsw xmm2, xmm3 - paddw xmm7, xmm2 - - pmaddwd xmm2, xmm2 - paddd xmm1, xmm2 - - - lea rsi, [rsi + rax * 2] - lea rdi, [rdi + rdx * 2] - - - movq xmm2, QWORD PTR[rsi + rax] - movq xmm3, QWORD PTR[rdi + rdx] - - punpcklbw xmm2, xmm0 - punpcklbw xmm3, xmm0 - - psubsw xmm2, xmm3 - paddw xmm7, xmm2 - - pmaddwd xmm2, xmm2 - paddd xmm1, xmm2 - - movq xmm2, QWORD PTR[rsi + rax *2] - movq xmm3, QWORD PTR[rdi + rdx *2] - - punpcklbw xmm2, xmm0 - punpcklbw xmm3, xmm0 - - psubsw xmm2, xmm3 - paddw xmm7, xmm2 - - pmaddwd xmm2, xmm2 - paddd xmm1, xmm2 - - - lea rsi, [rsi + rax * 2] - lea rdi, [rdi + rdx * 2] - - movq xmm2, QWORD PTR[rsi + rax] - movq xmm3, QWORD PTR[rdi + rdx] - - punpcklbw xmm2, xmm0 - punpcklbw xmm3, xmm0 - - psubsw xmm2, xmm3 - paddw xmm7, xmm2 - - pmaddwd xmm2, xmm2 - paddd xmm1, xmm2 - - - movdqa xmm6, xmm7 - punpcklwd xmm6, xmm0 - - punpckhwd xmm7, xmm0 - movdqa xmm2, xmm1 - - paddw xmm6, xmm7 - punpckldq xmm1, xmm0 - - punpckhdq xmm2, xmm0 - movdqa xmm7, xmm6 - - paddd xmm1, xmm2 - punpckldq xmm6, xmm0 - - punpckhdq xmm7, xmm0 - paddw xmm6, xmm7 - - movdqa xmm2, xmm1 - movdqa xmm7, xmm6 - - psrldq xmm1, 8 - psrldq xmm6, 8 - - paddw xmm7, xmm6 - paddd xmm1, xmm2 - - mov rax, arg(5) ;[Sum] - mov rdi, arg(4) ;[SSE] - - movq rdx, xmm7 - movsx rcx, dx - - mov dword ptr [rax], ecx - movd DWORD PTR [rdi], xmm1 - - ; begin epilog - add rsp, 16 - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - ;void vp8_filter_block2d_bil_var_sse2 ;( ; unsigned char *ref_ptr, diff --git a/media/libvpx/vp8/common/x86/variance_ssse3.c b/media/libvpx/vp8/common/x86/variance_ssse3.c index 73eb90df61f..2a0df640a90 100644 --- a/media/libvpx/vp8/common/x86/variance_ssse3.c +++ b/media/libvpx/vp8/common/x86/variance_ssse3.c @@ -8,19 +8,11 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include "./vp8_rtcd.h" #include "vpx_config.h" #include "vp8/common/variance.h" #include "vpx_ports/mem.h" -extern unsigned int vp8_get16x16var_sse2 -( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *SSE, - int *Sum -); extern void vp8_half_horiz_vert_variance16x_h_sse2 ( const unsigned char *ref_ptr, diff --git a/media/libvpx/vp8/common/x86/vp8_asm_stubs.c b/media/libvpx/vp8/common/x86/vp8_asm_stubs.c index b4092938161..fb0b57eb1c1 100644 --- a/media/libvpx/vp8/common/x86/vp8_asm_stubs.c +++ b/media/libvpx/vp8/common/x86/vp8_asm_stubs.c @@ -127,7 +127,7 @@ void vp8_sixtap_predict4x4_mmx int dst_pitch ) { - DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 16*16); /* Temp data bufffer used in filtering */ + DECLARE_ALIGNED(16, unsigned short, FData2[16*16]); /* Temp data bufffer used in filtering */ const short *HFilter, *VFilter; HFilter = vp8_six_tap_mmx[xoffset]; vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 9, 8, HFilter); @@ -148,7 +148,7 @@ void vp8_sixtap_predict16x16_mmx ) { - DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 24*24); /* Temp data bufffer used in filtering */ + DECLARE_ALIGNED(16, unsigned short, FData2[24*24]); /* Temp data bufffer used in filtering */ const short *HFilter, *VFilter; @@ -180,7 +180,7 @@ void vp8_sixtap_predict8x8_mmx ) { - DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256); /* Temp data bufffer used in filtering */ + DECLARE_ALIGNED(16, unsigned short, FData2[256]); /* Temp data bufffer used in filtering */ const short *HFilter, *VFilter; @@ -206,7 +206,7 @@ void vp8_sixtap_predict8x4_mmx ) { - DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256); /* Temp data bufffer used in filtering */ + DECLARE_ALIGNED(16, unsigned short, FData2[256]); /* Temp data bufffer used in filtering */ const short *HFilter, *VFilter; @@ -252,7 +252,7 @@ void vp8_sixtap_predict16x16_sse2 ) { - DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 24*24); /* Temp data bufffer used in filtering */ + DECLARE_ALIGNED(16, unsigned short, FData2[24*24]); /* Temp data bufffer used in filtering */ const short *HFilter, *VFilter; @@ -292,7 +292,7 @@ void vp8_sixtap_predict8x8_sse2 int dst_pitch ) { - DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256); /* Temp data bufffer used in filtering */ + DECLARE_ALIGNED(16, unsigned short, FData2[256]); /* Temp data bufffer used in filtering */ const short *HFilter, *VFilter; if (xoffset) @@ -330,7 +330,7 @@ void vp8_sixtap_predict8x4_sse2 int dst_pitch ) { - DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256); /* Temp data bufffer used in filtering */ + DECLARE_ALIGNED(16, unsigned short, FData2[256]); /* Temp data bufffer used in filtering */ const short *HFilter, *VFilter; if (xoffset) @@ -432,7 +432,7 @@ void vp8_sixtap_predict16x16_ssse3 ) { - DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 24*24); + DECLARE_ALIGNED(16, unsigned char, FData2[24*24]); if (xoffset) { @@ -480,7 +480,7 @@ void vp8_sixtap_predict8x8_ssse3 int dst_pitch ) { - DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 256); + DECLARE_ALIGNED(16, unsigned char, FData2[256]); if (xoffset) { @@ -528,7 +528,7 @@ void vp8_sixtap_predict8x4_ssse3 int dst_pitch ) { - DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 256); + DECLARE_ALIGNED(16, unsigned char, FData2[256]); if (xoffset) { @@ -576,7 +576,7 @@ void vp8_sixtap_predict4x4_ssse3 int dst_pitch ) { - DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 4*9); + DECLARE_ALIGNED(16, unsigned char, FData2[4*9]); if (xoffset) { diff --git a/media/libvpx/vp8/common/x86/vp8_variance_impl_mmx.asm b/media/libvpx/vp8/common/x86/vp8_variance_impl_mmx.asm new file mode 100644 index 00000000000..97f25275df2 --- /dev/null +++ b/media/libvpx/vp8/common/x86/vp8_variance_impl_mmx.asm @@ -0,0 +1,353 @@ +; +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + + +%include "vpx_ports/x86_abi_support.asm" + +%define mmx_filter_shift 7 + +;void vp8_filter_block2d_bil4x4_var_mmx +;( +; unsigned char *ref_ptr, +; int ref_pixels_per_line, +; unsigned char *src_ptr, +; int src_pixels_per_line, +; unsigned short *HFilter, +; unsigned short *VFilter, +; int *sum, +; unsigned int *sumsquared +;) +global sym(vp8_filter_block2d_bil4x4_var_mmx) PRIVATE +sym(vp8_filter_block2d_bil4x4_var_mmx): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 8 + GET_GOT rbx + push rsi + push rdi + sub rsp, 16 + ; end prolog + + + pxor mm6, mm6 ; + pxor mm7, mm7 ; + + mov rax, arg(4) ;HFilter ; + mov rdx, arg(5) ;VFilter ; + + mov rsi, arg(0) ;ref_ptr ; + mov rdi, arg(2) ;src_ptr ; + + mov rcx, 4 ; + pxor mm0, mm0 ; + + movd mm1, [rsi] ; + movd mm3, [rsi+1] ; + + punpcklbw mm1, mm0 ; + pmullw mm1, [rax] ; + + punpcklbw mm3, mm0 ; + pmullw mm3, [rax+8] ; + + paddw mm1, mm3 ; + paddw mm1, [GLOBAL(mmx_bi_rd)] ; + + psraw mm1, mmx_filter_shift ; + movq mm5, mm1 + +%if ABI_IS_32BIT + add rsi, dword ptr arg(1) ;ref_pixels_per_line ; +%else + movsxd r8, dword ptr arg(1) ;ref_pixels_per_line ; + add rsi, r8 +%endif + +.filter_block2d_bil4x4_var_mmx_loop: + + movd mm1, [rsi] ; + movd mm3, [rsi+1] ; + + punpcklbw mm1, mm0 ; + pmullw mm1, [rax] ; + + punpcklbw mm3, mm0 ; + pmullw mm3, [rax+8] ; + + paddw mm1, mm3 ; + paddw mm1, [GLOBAL(mmx_bi_rd)] ; + + psraw mm1, mmx_filter_shift ; + movq mm3, mm5 ; + + movq mm5, mm1 ; + pmullw mm3, [rdx] ; + + pmullw mm1, [rdx+8] ; + paddw mm1, mm3 ; + + + paddw mm1, [GLOBAL(mmx_bi_rd)] ; + psraw mm1, mmx_filter_shift ; + + movd mm3, [rdi] ; + punpcklbw mm3, mm0 ; + + psubw mm1, mm3 ; + paddw mm6, mm1 ; + + pmaddwd mm1, mm1 ; + paddd mm7, mm1 ; + +%if ABI_IS_32BIT + add rsi, dword ptr arg(1) ;ref_pixels_per_line ; + add rdi, dword ptr arg(3) ;src_pixels_per_line ; +%else + movsxd r8, dword ptr arg(1) ;ref_pixels_per_line + movsxd r9, dword ptr arg(3) ;src_pixels_per_line + add rsi, r8 + add rdi, r9 +%endif + sub rcx, 1 ; + jnz .filter_block2d_bil4x4_var_mmx_loop ; + + + pxor mm3, mm3 ; + pxor mm2, mm2 ; + + punpcklwd mm2, mm6 ; + punpckhwd mm3, mm6 ; + + paddd mm2, mm3 ; + movq mm6, mm2 ; + + psrlq mm6, 32 ; + paddd mm2, mm6 ; + + psrad mm2, 16 ; + movq mm4, mm7 ; + + psrlq mm4, 32 ; + paddd mm4, mm7 ; + + mov rdi, arg(6) ;sum + mov rsi, arg(7) ;sumsquared + + movd dword ptr [rdi], mm2 ; + movd dword ptr [rsi], mm4 ; + + + + ; begin epilog + add rsp, 16 + pop rdi + pop rsi + RESTORE_GOT + UNSHADOW_ARGS + pop rbp + ret + + + + +;void vp8_filter_block2d_bil_var_mmx +;( +; unsigned char *ref_ptr, +; int ref_pixels_per_line, +; unsigned char *src_ptr, +; int src_pixels_per_line, +; unsigned int Height, +; unsigned short *HFilter, +; unsigned short *VFilter, +; int *sum, +; unsigned int *sumsquared +;) +global sym(vp8_filter_block2d_bil_var_mmx) PRIVATE +sym(vp8_filter_block2d_bil_var_mmx): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 9 + GET_GOT rbx + push rsi + push rdi + sub rsp, 16 + ; end prolog + + pxor mm6, mm6 ; + pxor mm7, mm7 ; + mov rax, arg(5) ;HFilter ; + + mov rdx, arg(6) ;VFilter ; + mov rsi, arg(0) ;ref_ptr ; + + mov rdi, arg(2) ;src_ptr ; + movsxd rcx, dword ptr arg(4) ;Height ; + + pxor mm0, mm0 ; + movq mm1, [rsi] ; + + movq mm3, [rsi+1] ; + movq mm2, mm1 ; + + movq mm4, mm3 ; + punpcklbw mm1, mm0 ; + + punpckhbw mm2, mm0 ; + pmullw mm1, [rax] ; + + pmullw mm2, [rax] ; + punpcklbw mm3, mm0 ; + + punpckhbw mm4, mm0 ; + pmullw mm3, [rax+8] ; + + pmullw mm4, [rax+8] ; + paddw mm1, mm3 ; + + paddw mm2, mm4 ; + paddw mm1, [GLOBAL(mmx_bi_rd)] ; + + psraw mm1, mmx_filter_shift ; + paddw mm2, [GLOBAL(mmx_bi_rd)] ; + + psraw mm2, mmx_filter_shift ; + movq mm5, mm1 + + packuswb mm5, mm2 ; +%if ABI_IS_32BIT + add rsi, dword ptr arg(1) ;ref_pixels_per_line +%else + movsxd r8, dword ptr arg(1) ;ref_pixels_per_line + add rsi, r8 +%endif + +.filter_block2d_bil_var_mmx_loop: + + movq mm1, [rsi] ; + movq mm3, [rsi+1] ; + + movq mm2, mm1 ; + movq mm4, mm3 ; + + punpcklbw mm1, mm0 ; + punpckhbw mm2, mm0 ; + + pmullw mm1, [rax] ; + pmullw mm2, [rax] ; + + punpcklbw mm3, mm0 ; + punpckhbw mm4, mm0 ; + + pmullw mm3, [rax+8] ; + pmullw mm4, [rax+8] ; + + paddw mm1, mm3 ; + paddw mm2, mm4 ; + + paddw mm1, [GLOBAL(mmx_bi_rd)] ; + psraw mm1, mmx_filter_shift ; + + paddw mm2, [GLOBAL(mmx_bi_rd)] ; + psraw mm2, mmx_filter_shift ; + + movq mm3, mm5 ; + movq mm4, mm5 ; + + punpcklbw mm3, mm0 ; + punpckhbw mm4, mm0 ; + + movq mm5, mm1 ; + packuswb mm5, mm2 ; + + pmullw mm3, [rdx] ; + pmullw mm4, [rdx] ; + + pmullw mm1, [rdx+8] ; + pmullw mm2, [rdx+8] ; + + paddw mm1, mm3 ; + paddw mm2, mm4 ; + + paddw mm1, [GLOBAL(mmx_bi_rd)] ; + paddw mm2, [GLOBAL(mmx_bi_rd)] ; + + psraw mm1, mmx_filter_shift ; + psraw mm2, mmx_filter_shift ; + + movq mm3, [rdi] ; + movq mm4, mm3 ; + + punpcklbw mm3, mm0 ; + punpckhbw mm4, mm0 ; + + psubw mm1, mm3 ; + psubw mm2, mm4 ; + + paddw mm6, mm1 ; + pmaddwd mm1, mm1 ; + + paddw mm6, mm2 ; + pmaddwd mm2, mm2 ; + + paddd mm7, mm1 ; + paddd mm7, mm2 ; + +%if ABI_IS_32BIT + add rsi, dword ptr arg(1) ;ref_pixels_per_line ; + add rdi, dword ptr arg(3) ;src_pixels_per_line ; +%else + movsxd r8, dword ptr arg(1) ;ref_pixels_per_line ; + movsxd r9, dword ptr arg(3) ;src_pixels_per_line ; + add rsi, r8 + add rdi, r9 +%endif + sub rcx, 1 ; + jnz .filter_block2d_bil_var_mmx_loop ; + + + pxor mm3, mm3 ; + pxor mm2, mm2 ; + + punpcklwd mm2, mm6 ; + punpckhwd mm3, mm6 ; + + paddd mm2, mm3 ; + movq mm6, mm2 ; + + psrlq mm6, 32 ; + paddd mm2, mm6 ; + + psrad mm2, 16 ; + movq mm4, mm7 ; + + psrlq mm4, 32 ; + paddd mm4, mm7 ; + + mov rdi, arg(7) ;sum + mov rsi, arg(8) ;sumsquared + + movd dword ptr [rdi], mm2 ; + movd dword ptr [rsi], mm4 ; + + ; begin epilog + add rsp, 16 + pop rdi + pop rsi + RESTORE_GOT + UNSHADOW_ARGS + pop rbp + ret + + +SECTION_RODATA +;short mmx_bi_rd[4] = { 64, 64, 64, 64}; +align 16 +mmx_bi_rd: + times 4 dw 64 diff --git a/media/libvpx/vp8/common/x86/variance_mmx.c b/media/libvpx/vp8/common/x86/vp8_variance_mmx.c similarity index 59% rename from media/libvpx/vp8/common/x86/variance_mmx.c rename to media/libvpx/vp8/common/x86/vp8_variance_mmx.c index 02e02420f46..e594b1e65ee 100644 --- a/media/libvpx/vp8/common/x86/variance_mmx.c +++ b/media/libvpx/vp8/common/x86/vp8_variance_mmx.c @@ -8,6 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include "./vp8_rtcd.h" #include "vpx_config.h" #include "vp8/common/variance.h" #include "vpx_ports/mem.h" @@ -34,25 +35,6 @@ extern void filter_block1d_v6_mmx short *filter ); -extern unsigned int vp8_get_mb_ss_mmx(const short *src_ptr); -extern unsigned int vp8_get8x8var_mmx -( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *SSE, - int *Sum -); -extern unsigned int vp8_get4x4var_mmx -( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *SSE, - int *Sum -); extern void vp8_filter_block2d_bil4x4_var_mmx ( const unsigned char *ref_ptr, @@ -77,127 +59,6 @@ extern void vp8_filter_block2d_bil_var_mmx unsigned int *sumsquared ); - -unsigned int vp8_variance4x4_mmx( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) -{ - unsigned int var; - int avg; - - vp8_get4x4var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ; - *sse = var; - return (var - (((unsigned int)avg * avg) >> 4)); - -} - -unsigned int vp8_variance8x8_mmx( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) -{ - unsigned int var; - int avg; - - vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ; - *sse = var; - - return (var - (((unsigned int)avg * avg) >> 6)); - -} - -unsigned int vp8_mse16x16_mmx( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) -{ - unsigned int sse0, sse1, sse2, sse3, var; - int sum0, sum1, sum2, sum3; - - - vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ; - vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1); - vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2) ; - vp8_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3); - - var = sse0 + sse1 + sse2 + sse3; - *sse = var; - return var; -} - - -unsigned int vp8_variance16x16_mmx( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) -{ - unsigned int sse0, sse1, sse2, sse3, var; - int sum0, sum1, sum2, sum3, avg; - - - vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ; - vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1); - vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2) ; - vp8_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3); - - var = sse0 + sse1 + sse2 + sse3; - avg = sum0 + sum1 + sum2 + sum3; - *sse = var; - return (var - (((unsigned int)avg * avg) >> 8)); -} - -unsigned int vp8_variance16x8_mmx( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) -{ - unsigned int sse0, sse1, var; - int sum0, sum1, avg; - - vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ; - vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1); - - var = sse0 + sse1; - avg = sum0 + sum1; - *sse = var; - return (var - (((unsigned int)avg * avg) >> 7)); - -} - - -unsigned int vp8_variance8x16_mmx( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) -{ - unsigned int sse0, sse1, var; - int sum0, sum1, avg; - - vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ; - vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse1, &sum1) ; - - var = sse0 + sse1; - avg = sum0 + sum1; - *sse = var; - - return (var - (((unsigned int)avg * avg) >> 7)); - -} - - unsigned int vp8_sub_pixel_variance4x4_mmx ( const unsigned char *src_ptr, @@ -286,20 +147,6 @@ unsigned int vp8_sub_pixel_variance16x16_mmx } -unsigned int vp8_sub_pixel_mse16x16_mmx( - const unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - const unsigned char *dst_ptr, - int dst_pixels_per_line, - unsigned int *sse -) -{ - vp8_sub_pixel_variance16x16_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse); - return *sse; -} - unsigned int vp8_sub_pixel_variance16x8_mmx ( const unsigned char *src_ptr, diff --git a/media/libvpx/vp8/common/x86/variance_sse2.c b/media/libvpx/vp8/common/x86/vp8_variance_sse2.c similarity index 75% rename from media/libvpx/vp8/common/x86/variance_sse2.c rename to media/libvpx/vp8/common/x86/vp8_variance_sse2.c index 1fe127bf2c6..1c15ed88097 100644 --- a/media/libvpx/vp8/common/x86/variance_sse2.c +++ b/media/libvpx/vp8/common/x86/vp8_variance_sse2.c @@ -8,6 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include "./vp8_rtcd.h" #include "vpx_config.h" #include "vp8/common/variance.h" #include "vpx_ports/mem.h" @@ -30,38 +31,6 @@ extern void vp8_filter_block2d_bil4x4_var_mmx unsigned int *sumsquared ); -extern unsigned int vp8_get4x4var_mmx -( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *SSE, - int *Sum -); - -unsigned int vp8_get_mb_ss_sse2 -( - const short *src_ptr -); -unsigned int vp8_get16x16var_sse2 -( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *SSE, - int *Sum -); -unsigned int vp8_get8x8var_sse2 -( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *SSE, - int *Sum -); void vp8_filter_block2d_bil_var_sse2 ( const unsigned char *ref_ptr, @@ -135,115 +104,6 @@ void vp8_half_vert_variance16x_h_sse2 unsigned int *sumsquared ); -unsigned int vp8_variance4x4_wmt( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) -{ - unsigned int var; - int avg; - - vp8_get4x4var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ; - *sse = var; - return (var - (((unsigned int)avg * avg) >> 4)); - -} - -unsigned int vp8_variance8x8_wmt -( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) -{ - unsigned int var; - int avg; - - vp8_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ; - *sse = var; - return (var - (((unsigned int)avg * avg) >> 6)); - -} - - -unsigned int vp8_variance16x16_wmt -( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) -{ - unsigned int sse0; - int sum0; - - - vp8_get16x16var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ; - *sse = sse0; - return (sse0 - (((unsigned int)sum0 * sum0) >> 8)); -} -unsigned int vp8_mse16x16_wmt( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) -{ - - unsigned int sse0; - int sum0; - vp8_get16x16var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ; - *sse = sse0; - return sse0; - -} - - -unsigned int vp8_variance16x8_wmt -( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) -{ - unsigned int sse0, sse1, var; - int sum0, sum1, avg; - - vp8_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ; - vp8_get8x8var_sse2(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1); - - var = sse0 + sse1; - avg = sum0 + sum1; - *sse = var; - return (var - (((unsigned int)avg * avg) >> 7)); - -} - -unsigned int vp8_variance8x16_wmt -( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) -{ - unsigned int sse0, sse1, var; - int sum0, sum1, avg; - - vp8_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ; - vp8_get8x8var_sse2(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse1, &sum1) ; - - var = sse0 + sse1; - avg = sum0 + sum1; - *sse = var; - return (var - (((unsigned int)avg * avg) >> 7)); - -} - unsigned int vp8_sub_pixel_variance4x4_wmt ( const unsigned char *src_ptr, @@ -378,20 +238,6 @@ unsigned int vp8_sub_pixel_variance16x16_wmt return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8)); } -unsigned int vp8_sub_pixel_mse16x16_wmt( - const unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - const unsigned char *dst_ptr, - int dst_pixels_per_line, - unsigned int *sse -) -{ - vp8_sub_pixel_variance16x16_wmt(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse); - return *sse; -} - unsigned int vp8_sub_pixel_variance16x8_wmt ( const unsigned char *src_ptr, diff --git a/media/libvpx/vp8/decoder/decodeframe.c b/media/libvpx/vp8/decoder/decodeframe.c index ab03c909d6d..fb300fe8827 100644 --- a/media/libvpx/vp8/decoder/decodeframe.c +++ b/media/libvpx/vp8/decoder/decodeframe.c @@ -142,7 +142,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, * Better to use the predictor as reconstruction. */ pbi->frame_corrupt_residual = 1; - vpx_memset(xd->qcoeff, 0, sizeof(xd->qcoeff)); + memset(xd->qcoeff, 0, sizeof(xd->qcoeff)); vp8_conceal_corrupt_mb(xd); @@ -151,7 +151,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, /* force idct to be skipped for B_PRED and use the * prediction only for reconstruction * */ - vpx_memset(xd->eobs, 0, 25); + memset(xd->eobs, 0, 25); } } #endif @@ -184,7 +184,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, /* clear out residual eob info */ if(xd->mode_info_context->mbmi.mb_skip_coeff) - vpx_memset(xd->eobs, 0, 25); + memset(xd->eobs, 0, 25); intra_prediction_down_copy(xd, xd->recon_above[0] + 16); @@ -214,7 +214,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, (b->qcoeff[0] * DQC[0], dst, dst_stride, dst, dst_stride); - vpx_memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0])); + memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0])); } } } @@ -251,14 +251,14 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, vp8_short_inv_walsh4x4(&b->dqcoeff[0], xd->qcoeff); - vpx_memset(b->qcoeff, 0, 16 * sizeof(b->qcoeff[0])); + memset(b->qcoeff, 0, 16 * sizeof(b->qcoeff[0])); } else { b->dqcoeff[0] = b->qcoeff[0] * xd->dequant_y2[0]; vp8_short_inv_walsh4x4_1(&b->dqcoeff[0], xd->qcoeff); - vpx_memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0])); + memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0])); } /* override the dc dequant constant in order to preserve the @@ -323,7 +323,7 @@ static void yv12_extend_frame_top_c(YV12_BUFFER_CONFIG *ybf) for (i = 0; i < (int)Border; i++) { - vpx_memcpy(dest_ptr1, src_ptr1, plane_stride); + memcpy(dest_ptr1, src_ptr1, plane_stride); dest_ptr1 += plane_stride; } @@ -338,7 +338,7 @@ static void yv12_extend_frame_top_c(YV12_BUFFER_CONFIG *ybf) for (i = 0; i < (int)(Border); i++) { - vpx_memcpy(dest_ptr1, src_ptr1, plane_stride); + memcpy(dest_ptr1, src_ptr1, plane_stride); dest_ptr1 += plane_stride; } @@ -351,7 +351,7 @@ static void yv12_extend_frame_top_c(YV12_BUFFER_CONFIG *ybf) for (i = 0; i < (int)(Border); i++) { - vpx_memcpy(dest_ptr1, src_ptr1, plane_stride); + memcpy(dest_ptr1, src_ptr1, plane_stride); dest_ptr1 += plane_stride; } } @@ -379,7 +379,7 @@ static void yv12_extend_frame_bottom_c(YV12_BUFFER_CONFIG *ybf) for (i = 0; i < (int)Border; i++) { - vpx_memcpy(dest_ptr2, src_ptr2, plane_stride); + memcpy(dest_ptr2, src_ptr2, plane_stride); dest_ptr2 += plane_stride; } @@ -397,7 +397,7 @@ static void yv12_extend_frame_bottom_c(YV12_BUFFER_CONFIG *ybf) for (i = 0; i < (int)(Border); i++) { - vpx_memcpy(dest_ptr2, src_ptr2, plane_stride); + memcpy(dest_ptr2, src_ptr2, plane_stride); dest_ptr2 += plane_stride; } @@ -411,7 +411,7 @@ static void yv12_extend_frame_bottom_c(YV12_BUFFER_CONFIG *ybf) for (i = 0; i < (int)(Border); i++) { - vpx_memcpy(dest_ptr2, src_ptr2, plane_stride); + memcpy(dest_ptr2, src_ptr2, plane_stride); dest_ptr2 += plane_stride; } } @@ -446,8 +446,8 @@ static void yv12_extend_frame_left_right_c(YV12_BUFFER_CONFIG *ybf, for (i = 0; i < plane_height; i++) { - vpx_memset(dest_ptr1, src_ptr1[0], Border); - vpx_memset(dest_ptr2, src_ptr2[0], Border); + memset(dest_ptr1, src_ptr1[0], Border); + memset(dest_ptr2, src_ptr2[0], Border); src_ptr1 += plane_stride; src_ptr2 += plane_stride; dest_ptr1 += plane_stride; @@ -470,8 +470,8 @@ static void yv12_extend_frame_left_right_c(YV12_BUFFER_CONFIG *ybf, for (i = 0; i < plane_height; i++) { - vpx_memset(dest_ptr1, src_ptr1[0], Border); - vpx_memset(dest_ptr2, src_ptr2[0], Border); + memset(dest_ptr1, src_ptr1[0], Border); + memset(dest_ptr2, src_ptr2[0], Border); src_ptr1 += plane_stride; src_ptr2 += plane_stride; dest_ptr1 += plane_stride; @@ -490,8 +490,8 @@ static void yv12_extend_frame_left_right_c(YV12_BUFFER_CONFIG *ybf, for (i = 0; i < plane_height; i++) { - vpx_memset(dest_ptr1, src_ptr1[0], Border); - vpx_memset(dest_ptr2, src_ptr2[0], Border); + memset(dest_ptr1, src_ptr1[0], Border); + memset(dest_ptr2, src_ptr2[0], Border); src_ptr1 += plane_stride; src_ptr2 += plane_stride; dest_ptr1 += plane_stride; @@ -568,7 +568,7 @@ static void decode_mb_rows(VP8D_COMP *pbi) /* reset contexts */ xd->above_context = pc->above_context; - vpx_memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)); + memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)); xd->left_available = 0; @@ -918,19 +918,19 @@ static void init_frame(VP8D_COMP *pbi) if (pc->frame_type == KEY_FRAME) { /* Various keyframe initializations */ - vpx_memcpy(pc->fc.mvc, vp8_default_mv_context, sizeof(vp8_default_mv_context)); + memcpy(pc->fc.mvc, vp8_default_mv_context, sizeof(vp8_default_mv_context)); vp8_init_mbmode_probs(pc); vp8_default_coef_probs(pc); /* reset the segment feature data to 0 with delta coding (Default state). */ - vpx_memset(xd->segment_feature_data, 0, sizeof(xd->segment_feature_data)); + memset(xd->segment_feature_data, 0, sizeof(xd->segment_feature_data)); xd->mb_segement_abs_delta = SEGMENT_DELTADATA; /* reset the mode ref deltasa for loop filter */ - vpx_memset(xd->ref_lf_deltas, 0, sizeof(xd->ref_lf_deltas)); - vpx_memset(xd->mode_lf_deltas, 0, sizeof(xd->mode_lf_deltas)); + memset(xd->ref_lf_deltas, 0, sizeof(xd->ref_lf_deltas)); + memset(xd->mode_lf_deltas, 0, sizeof(xd->mode_lf_deltas)); /* All buffers are implicitly updated on key frames. */ pc->refresh_golden_frame = 1; @@ -1069,12 +1069,11 @@ int vp8_decode_frame(VP8D_COMP *pbi) pc->vert_scale = clear[6] >> 6; } data += 7; - clear += 7; } else { - vpx_memcpy(&xd->pre, yv12_fb_new, sizeof(YV12_BUFFER_CONFIG)); - vpx_memcpy(&xd->dst, yv12_fb_new, sizeof(YV12_BUFFER_CONFIG)); + memcpy(&xd->pre, yv12_fb_new, sizeof(YV12_BUFFER_CONFIG)); + memcpy(&xd->dst, yv12_fb_new, sizeof(YV12_BUFFER_CONFIG)); } } if ((!pbi->decoded_key_frame && pc->frame_type != KEY_FRAME)) @@ -1106,7 +1105,7 @@ int vp8_decode_frame(VP8D_COMP *pbi) { xd->mb_segement_abs_delta = (unsigned char)vp8_read_bit(bc); - vpx_memset(xd->segment_feature_data, 0, sizeof(xd->segment_feature_data)); + memset(xd->segment_feature_data, 0, sizeof(xd->segment_feature_data)); /* For each segmentation feature (Quant and loop filter level) */ for (i = 0; i < MB_LVL_MAX; i++) @@ -1130,7 +1129,7 @@ int vp8_decode_frame(VP8D_COMP *pbi) if (xd->update_mb_segmentation_map) { /* Which macro block level features are enabled */ - vpx_memset(xd->mb_segment_tree_probs, 255, sizeof(xd->mb_segment_tree_probs)); + memset(xd->mb_segment_tree_probs, 255, sizeof(xd->mb_segment_tree_probs)); /* Read the probs used to decode the segment id for each macro block. */ for (i = 0; i < MB_FEATURE_TREE_PROBS; i++) @@ -1279,7 +1278,7 @@ int vp8_decode_frame(VP8D_COMP *pbi) #endif if (pc->refresh_entropy_probs == 0) { - vpx_memcpy(&pc->lfc, &pc->fc, sizeof(pc->fc)); + memcpy(&pc->lfc, &pc->fc, sizeof(pc->fc)); } pc->refresh_last_frame = pc->frame_type == KEY_FRAME || vp8_read_bit(bc); @@ -1328,7 +1327,7 @@ int vp8_decode_frame(VP8D_COMP *pbi) } /* clear out the coeff buffer */ - vpx_memset(xd->qcoeff, 0, sizeof(xd->qcoeff)); + memset(xd->qcoeff, 0, sizeof(xd->qcoeff)); vp8_decode_mode_mvs(pbi); @@ -1342,7 +1341,7 @@ int vp8_decode_frame(VP8D_COMP *pbi) } #endif - vpx_memset(pc->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * pc->mb_cols); + memset(pc->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * pc->mb_cols); pbi->frame_corrupt_residual = 0; #if CONFIG_MULTITHREAD @@ -1381,7 +1380,7 @@ int vp8_decode_frame(VP8D_COMP *pbi) if (pc->refresh_entropy_probs == 0) { - vpx_memcpy(&pc->fc, &pc->lfc, sizeof(pc->fc)); + memcpy(&pc->fc, &pc->lfc, sizeof(pc->fc)); pbi->independent_partitions = prev_independent_partitions; } diff --git a/media/libvpx/vp8/decoder/detokenize.c b/media/libvpx/vp8/decoder/detokenize.c index 452ff6cba3a..fcc7533c50f 100644 --- a/media/libvpx/vp8/decoder/detokenize.c +++ b/media/libvpx/vp8/decoder/detokenize.c @@ -20,8 +20,8 @@ void vp8_reset_mb_tokens_context(MACROBLOCKD *x) ENTROPY_CONTEXT *a_ctx = ((ENTROPY_CONTEXT *)x->above_context); ENTROPY_CONTEXT *l_ctx = ((ENTROPY_CONTEXT *)x->left_context); - vpx_memset(a_ctx, 0, sizeof(ENTROPY_CONTEXT_PLANES)-1); - vpx_memset(l_ctx, 0, sizeof(ENTROPY_CONTEXT_PLANES)-1); + memset(a_ctx, 0, sizeof(ENTROPY_CONTEXT_PLANES)-1); + memset(l_ctx, 0, sizeof(ENTROPY_CONTEXT_PLANES)-1); /* Clear entropy contexts for Y2 blocks */ if (!x->mode_info_context->mbmi.is_4x4) diff --git a/media/libvpx/vp8/decoder/error_concealment.c b/media/libvpx/vp8/decoder/error_concealment.c index 4b304c83c78..bb6d443c475 100644 --- a/media/libvpx/vp8/decoder/error_concealment.c +++ b/media/libvpx/vp8/decoder/error_concealment.c @@ -350,7 +350,7 @@ static void estimate_missing_mvs(MB_OVERLAP *overlaps, unsigned int first_corrupt) { int mb_row, mb_col; - vpx_memset(overlaps, 0, sizeof(MB_OVERLAP) * mb_rows * mb_cols); + memset(overlaps, 0, sizeof(MB_OVERLAP) * mb_rows * mb_cols); /* First calculate the overlaps for all blocks */ for (mb_row = 0; mb_row < mb_rows; ++mb_row) { diff --git a/media/libvpx/vp8/decoder/onyxd_if.c b/media/libvpx/vp8/decoder/onyxd_if.c index 3ec532783a3..9015fcbb496 100644 --- a/media/libvpx/vp8/decoder/onyxd_if.c +++ b/media/libvpx/vp8/decoder/onyxd_if.c @@ -58,7 +58,7 @@ static struct VP8D_COMP * create_decompressor(VP8D_CONFIG *oxcf) if (!pbi) return NULL; - vpx_memset(pbi, 0, sizeof(VP8D_COMP)); + memset(pbi, 0, sizeof(VP8D_COMP)); if (setjmp(pbi->common.error.jmp)) { @@ -259,7 +259,7 @@ static int swap_frame_buffers (VP8_COMMON *cm) return err; } -int check_fragments_for_errors(VP8D_COMP *pbi) +static int check_fragments_for_errors(VP8D_COMP *pbi) { if (!pbi->ec_active && pbi->fragments.count <= 1 && pbi->fragments.sizes[0] == 0) diff --git a/media/libvpx/vp8/decoder/threading.c b/media/libvpx/vp8/decoder/threading.c index dfc75703c6c..6801532f118 100644 --- a/media/libvpx/vp8/decoder/threading.c +++ b/media/libvpx/vp8/decoder/threading.c @@ -60,12 +60,12 @@ static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_D mbd->segmentation_enabled = xd->segmentation_enabled; mbd->mb_segement_abs_delta = xd->mb_segement_abs_delta; - vpx_memcpy(mbd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data)); + memcpy(mbd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data)); /*signed char ref_lf_deltas[MAX_REF_LF_DELTAS];*/ - vpx_memcpy(mbd->ref_lf_deltas, xd->ref_lf_deltas, sizeof(xd->ref_lf_deltas)); + memcpy(mbd->ref_lf_deltas, xd->ref_lf_deltas, sizeof(xd->ref_lf_deltas)); /*signed char mode_lf_deltas[MAX_MODE_LF_DELTAS];*/ - vpx_memcpy(mbd->mode_lf_deltas, xd->mode_lf_deltas, sizeof(xd->mode_lf_deltas)); + memcpy(mbd->mode_lf_deltas, xd->mode_lf_deltas, sizeof(xd->mode_lf_deltas)); /*unsigned char mode_ref_lf_delta_enabled; unsigned char mode_ref_lf_delta_update;*/ mbd->mode_ref_lf_delta_enabled = xd->mode_ref_lf_delta_enabled; @@ -73,10 +73,10 @@ static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_D mbd->current_bc = &pbi->mbc[0]; - vpx_memcpy(mbd->dequant_y1_dc, xd->dequant_y1_dc, sizeof(xd->dequant_y1_dc)); - vpx_memcpy(mbd->dequant_y1, xd->dequant_y1, sizeof(xd->dequant_y1)); - vpx_memcpy(mbd->dequant_y2, xd->dequant_y2, sizeof(xd->dequant_y2)); - vpx_memcpy(mbd->dequant_uv, xd->dequant_uv, sizeof(xd->dequant_uv)); + memcpy(mbd->dequant_y1_dc, xd->dequant_y1_dc, sizeof(xd->dequant_y1_dc)); + memcpy(mbd->dequant_y1, xd->dequant_y1, sizeof(xd->dequant_y1)); + memcpy(mbd->dequant_y2, xd->dequant_y2, sizeof(xd->dequant_y2)); + memcpy(mbd->dequant_uv, xd->dequant_uv, sizeof(xd->dequant_uv)); mbd->fullpixel_mask = 0xffffffff; @@ -137,7 +137,7 @@ static void mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, * Better to use the predictor as reconstruction. */ pbi->frame_corrupt_residual = 1; - vpx_memset(xd->qcoeff, 0, sizeof(xd->qcoeff)); + memset(xd->qcoeff, 0, sizeof(xd->qcoeff)); vp8_conceal_corrupt_mb(xd); @@ -146,7 +146,7 @@ static void mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, /* force idct to be skipped for B_PRED and use the * prediction only for reconstruction * */ - vpx_memset(xd->eobs, 0, 25); + memset(xd->eobs, 0, 25); } } #endif @@ -179,7 +179,7 @@ static void mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, /* clear out residual eob info */ if(xd->mode_info_context->mbmi.mb_skip_coeff) - vpx_memset(xd->eobs, 0, 25); + memset(xd->eobs, 0, 25); intra_prediction_down_copy(xd, xd->recon_above[0] + 16); @@ -229,7 +229,7 @@ static void mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, { vp8_dc_only_idct_add(b->qcoeff[0] * DQC[0], dst, dst_stride, dst, dst_stride); - vpx_memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0])); + memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0])); } } } @@ -266,14 +266,14 @@ static void mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, vp8_short_inv_walsh4x4(&b->dqcoeff[0], xd->qcoeff); - vpx_memset(b->qcoeff, 0, 16 * sizeof(b->qcoeff[0])); + memset(b->qcoeff, 0, 16 * sizeof(b->qcoeff[0])); } else { b->dqcoeff[0] = b->qcoeff[0] * xd->dequant_y2[0]; vp8_short_inv_walsh4x4_1(&b->dqcoeff[0], xd->qcoeff); - vpx_memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0])); + memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0])); } /* override the dc dequant constant in order to preserve the @@ -360,7 +360,7 @@ static void mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row) /* reset contexts */ xd->above_context = pc->above_context; - vpx_memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)); + memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)); xd->left_available = 0; @@ -499,9 +499,9 @@ static void mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row) if( mb_row != pc->mb_rows-1 ) { /* Save decoded MB last row data for next-row decoding */ - vpx_memcpy((pbi->mt_yabove_row[mb_row + 1] + 32 + mb_col*16), (xd->dst.y_buffer + 15 * recon_y_stride), 16); - vpx_memcpy((pbi->mt_uabove_row[mb_row + 1] + 16 + mb_col*8), (xd->dst.u_buffer + 7 * recon_uv_stride), 8); - vpx_memcpy((pbi->mt_vabove_row[mb_row + 1] + 16 + mb_col*8), (xd->dst.v_buffer + 7 * recon_uv_stride), 8); + memcpy((pbi->mt_yabove_row[mb_row + 1] + 32 + mb_col*16), (xd->dst.y_buffer + 15 * recon_y_stride), 16); + memcpy((pbi->mt_uabove_row[mb_row + 1] + 16 + mb_col*8), (xd->dst.u_buffer + 7 * recon_uv_stride), 8); + memcpy((pbi->mt_vabove_row[mb_row + 1] + 16 + mb_col*8), (xd->dst.v_buffer + 7 * recon_uv_stride), 8); } /* save left_col for next MB decoding */ @@ -876,23 +876,23 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd) if (filter_level) { /* Set above_row buffer to 127 for decoding first MB row */ - vpx_memset(pbi->mt_yabove_row[0] + VP8BORDERINPIXELS-1, 127, yv12_fb_new->y_width + 5); - vpx_memset(pbi->mt_uabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (yv12_fb_new->y_width>>1) +5); - vpx_memset(pbi->mt_vabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (yv12_fb_new->y_width>>1) +5); + memset(pbi->mt_yabove_row[0] + VP8BORDERINPIXELS-1, 127, yv12_fb_new->y_width + 5); + memset(pbi->mt_uabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (yv12_fb_new->y_width>>1) +5); + memset(pbi->mt_vabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (yv12_fb_new->y_width>>1) +5); for (j=1; jmb_rows; j++) { - vpx_memset(pbi->mt_yabove_row[j] + VP8BORDERINPIXELS-1, (unsigned char)129, 1); - vpx_memset(pbi->mt_uabove_row[j] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1); - vpx_memset(pbi->mt_vabove_row[j] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1); + memset(pbi->mt_yabove_row[j] + VP8BORDERINPIXELS-1, (unsigned char)129, 1); + memset(pbi->mt_uabove_row[j] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1); + memset(pbi->mt_vabove_row[j] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1); } /* Set left_col to 129 initially */ for (j=0; jmb_rows; j++) { - vpx_memset(pbi->mt_yleft_col[j], (unsigned char)129, 16); - vpx_memset(pbi->mt_uleft_col[j], (unsigned char)129, 8); - vpx_memset(pbi->mt_vleft_col[j], (unsigned char)129, 8); + memset(pbi->mt_yleft_col[j], (unsigned char)129, 16); + memset(pbi->mt_uleft_col[j], (unsigned char)129, 8); + memset(pbi->mt_vleft_col[j], (unsigned char)129, 8); } /* Initialize the loop filter for this frame. */ diff --git a/media/libvpx/vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm b/media/libvpx/vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm deleted file mode 100644 index 000805d4fed..00000000000 --- a/media/libvpx/vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm +++ /dev/null @@ -1,138 +0,0 @@ -; -; Copyright (c) 2011 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8_mse16x16_armv6| - - ARM - - AREA ||.text||, CODE, READONLY, ALIGN=2 - -; r0 unsigned char *src_ptr -; r1 int source_stride -; r2 unsigned char *ref_ptr -; r3 int recon_stride -; stack unsigned int *sse -; -;note: Based on vp8_variance16x16_armv6. In this function, sum is never used. -; So, we can remove this part of calculation. - -|vp8_mse16x16_armv6| PROC - - push {r4-r9, lr} - - pld [r0, r1, lsl #0] - pld [r2, r3, lsl #0] - - mov r12, #16 ; set loop counter to 16 (=block height) - mov r4, #0 ; initialize sse = 0 - -loop - ; 1st 4 pixels - ldr r5, [r0, #0x0] ; load 4 src pixels - ldr r6, [r2, #0x0] ; load 4 ref pixels - - mov lr, #0 ; constant zero - - usub8 r8, r5, r6 ; calculate difference - pld [r0, r1, lsl #1] - sel r7, r8, lr ; select bytes with positive difference - usub8 r9, r6, r5 ; calculate difference with reversed operands - pld [r2, r3, lsl #1] - sel r8, r9, lr ; select bytes with negative difference - - ; calculate partial sums - usad8 r5, r7, lr ; calculate sum of positive differences - usad8 r6, r8, lr ; calculate sum of negative differences - orr r8, r8, r7 ; differences of all 4 pixels - - ldr r5, [r0, #0x4] ; load 4 src pixels - - ; calculate sse - uxtb16 r6, r8 ; byte (two pixels) to halfwords - uxtb16 r7, r8, ror #8 ; another two pixels to halfwords - smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1) - - ; 2nd 4 pixels - ldr r6, [r2, #0x4] ; load 4 ref pixels - smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2) - - usub8 r8, r5, r6 ; calculate difference - sel r7, r8, lr ; select bytes with positive difference - usub8 r9, r6, r5 ; calculate difference with reversed operands - sel r8, r9, lr ; select bytes with negative difference - - ; calculate partial sums - usad8 r5, r7, lr ; calculate sum of positive differences - usad8 r6, r8, lr ; calculate sum of negative differences - orr r8, r8, r7 ; differences of all 4 pixels - ldr r5, [r0, #0x8] ; load 4 src pixels - ; calculate sse - uxtb16 r6, r8 ; byte (two pixels) to halfwords - uxtb16 r7, r8, ror #8 ; another two pixels to halfwords - smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1) - - ; 3rd 4 pixels - ldr r6, [r2, #0x8] ; load 4 ref pixels - smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2) - - usub8 r8, r5, r6 ; calculate difference - sel r7, r8, lr ; select bytes with positive difference - usub8 r9, r6, r5 ; calculate difference with reversed operands - sel r8, r9, lr ; select bytes with negative difference - - ; calculate partial sums - usad8 r5, r7, lr ; calculate sum of positive differences - usad8 r6, r8, lr ; calculate sum of negative differences - orr r8, r8, r7 ; differences of all 4 pixels - - ldr r5, [r0, #0xc] ; load 4 src pixels - - ; calculate sse - uxtb16 r6, r8 ; byte (two pixels) to halfwords - uxtb16 r7, r8, ror #8 ; another two pixels to halfwords - smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1) - - ; 4th 4 pixels - ldr r6, [r2, #0xc] ; load 4 ref pixels - smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2) - - usub8 r8, r5, r6 ; calculate difference - add r0, r0, r1 ; set src_ptr to next row - sel r7, r8, lr ; select bytes with positive difference - usub8 r9, r6, r5 ; calculate difference with reversed operands - add r2, r2, r3 ; set dst_ptr to next row - sel r8, r9, lr ; select bytes with negative difference - - ; calculate partial sums - usad8 r5, r7, lr ; calculate sum of positive differences - usad8 r6, r8, lr ; calculate sum of negative differences - orr r8, r8, r7 ; differences of all 4 pixels - - subs r12, r12, #1 ; next row - - ; calculate sse - uxtb16 r6, r8 ; byte (two pixels) to halfwords - uxtb16 r7, r8, ror #8 ; another two pixels to halfwords - smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1) - smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2) - - bne loop - - ; return stuff - ldr r1, [sp, #28] ; get address of sse - mov r0, r4 ; return sse - str r4, [r1] ; store sse - - pop {r4-r9, pc} - - ENDP - - END diff --git a/media/libvpx/vp8/encoder/arm/neon/vp8_mse16x16_neon.c b/media/libvpx/vp8/encoder/arm/neon/vp8_mse16x16_neon.c deleted file mode 100644 index f806809df5b..00000000000 --- a/media/libvpx/vp8/encoder/arm/neon/vp8_mse16x16_neon.c +++ /dev/null @@ -1,131 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include - -unsigned int vp8_mse16x16_neon( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) { - int i; - int16x4_t d22s16, d23s16, d24s16, d25s16, d26s16, d27s16, d28s16, d29s16; - int64x1_t d0s64; - uint8x16_t q0u8, q1u8, q2u8, q3u8; - int32x4_t q7s32, q8s32, q9s32, q10s32; - uint16x8_t q11u16, q12u16, q13u16, q14u16; - int64x2_t q1s64; - - q7s32 = vdupq_n_s32(0); - q8s32 = vdupq_n_s32(0); - q9s32 = vdupq_n_s32(0); - q10s32 = vdupq_n_s32(0); - - for (i = 0; i < 8; i++) { // mse16x16_neon_loop - q0u8 = vld1q_u8(src_ptr); - src_ptr += source_stride; - q1u8 = vld1q_u8(src_ptr); - src_ptr += source_stride; - q2u8 = vld1q_u8(ref_ptr); - ref_ptr += recon_stride; - q3u8 = vld1q_u8(ref_ptr); - ref_ptr += recon_stride; - - q11u16 = vsubl_u8(vget_low_u8(q0u8), vget_low_u8(q2u8)); - q12u16 = vsubl_u8(vget_high_u8(q0u8), vget_high_u8(q2u8)); - q13u16 = vsubl_u8(vget_low_u8(q1u8), vget_low_u8(q3u8)); - q14u16 = vsubl_u8(vget_high_u8(q1u8), vget_high_u8(q3u8)); - - d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16)); - d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16)); - q7s32 = vmlal_s16(q7s32, d22s16, d22s16); - q8s32 = vmlal_s16(q8s32, d23s16, d23s16); - - d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16)); - d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16)); - q9s32 = vmlal_s16(q9s32, d24s16, d24s16); - q10s32 = vmlal_s16(q10s32, d25s16, d25s16); - - d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16)); - d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16)); - q7s32 = vmlal_s16(q7s32, d26s16, d26s16); - q8s32 = vmlal_s16(q8s32, d27s16, d27s16); - - d28s16 = vreinterpret_s16_u16(vget_low_u16(q14u16)); - d29s16 = vreinterpret_s16_u16(vget_high_u16(q14u16)); - q9s32 = vmlal_s16(q9s32, d28s16, d28s16); - q10s32 = vmlal_s16(q10s32, d29s16, d29s16); - } - - q7s32 = vaddq_s32(q7s32, q8s32); - q9s32 = vaddq_s32(q9s32, q10s32); - q10s32 = vaddq_s32(q7s32, q9s32); - - q1s64 = vpaddlq_s32(q10s32); - d0s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64)); - - vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d0s64), 0); - return vget_lane_u32(vreinterpret_u32_s64(d0s64), 0); -} - -unsigned int vp8_get4x4sse_cs_neon( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride) { - int16x4_t d22s16, d24s16, d26s16, d28s16; - int64x1_t d0s64; - uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8; - int32x4_t q7s32, q8s32, q9s32, q10s32; - uint16x8_t q11u16, q12u16, q13u16, q14u16; - int64x2_t q1s64; - - d0u8 = vld1_u8(src_ptr); - src_ptr += source_stride; - d4u8 = vld1_u8(ref_ptr); - ref_ptr += recon_stride; - d1u8 = vld1_u8(src_ptr); - src_ptr += source_stride; - d5u8 = vld1_u8(ref_ptr); - ref_ptr += recon_stride; - d2u8 = vld1_u8(src_ptr); - src_ptr += source_stride; - d6u8 = vld1_u8(ref_ptr); - ref_ptr += recon_stride; - d3u8 = vld1_u8(src_ptr); - src_ptr += source_stride; - d7u8 = vld1_u8(ref_ptr); - ref_ptr += recon_stride; - - q11u16 = vsubl_u8(d0u8, d4u8); - q12u16 = vsubl_u8(d1u8, d5u8); - q13u16 = vsubl_u8(d2u8, d6u8); - q14u16 = vsubl_u8(d3u8, d7u8); - - d22s16 = vget_low_s16(vreinterpretq_s16_u16(q11u16)); - d24s16 = vget_low_s16(vreinterpretq_s16_u16(q12u16)); - d26s16 = vget_low_s16(vreinterpretq_s16_u16(q13u16)); - d28s16 = vget_low_s16(vreinterpretq_s16_u16(q14u16)); - - q7s32 = vmull_s16(d22s16, d22s16); - q8s32 = vmull_s16(d24s16, d24s16); - q9s32 = vmull_s16(d26s16, d26s16); - q10s32 = vmull_s16(d28s16, d28s16); - - q7s32 = vaddq_s32(q7s32, q8s32); - q9s32 = vaddq_s32(q9s32, q10s32); - q9s32 = vaddq_s32(q7s32, q9s32); - - q1s64 = vpaddlq_s32(q9s32); - d0s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64)); - - return vget_lane_u32(vreinterpret_u32_s64(d0s64), 0); -} diff --git a/media/libvpx/vp8/encoder/bitstream.c b/media/libvpx/vp8/encoder/bitstream.c index f9096f9f01f..ea279b32181 100644 --- a/media/libvpx/vp8/encoder/bitstream.c +++ b/media/libvpx/vp8/encoder/bitstream.c @@ -1543,7 +1543,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest if (pc->refresh_entropy_probs == 0) { /* save a copy for later refresh */ - vpx_memcpy(&cpi->common.lfc, &cpi->common.fc, sizeof(cpi->common.fc)); + memcpy(&cpi->common.lfc, &cpi->common.fc, sizeof(cpi->common.fc)); } vp8_update_coef_probs(cpi); @@ -1620,7 +1620,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest /* concatenate partition buffers */ for(i = 0; i < num_part; i++) { - vpx_memmove(dp, cpi->partition_d[i+1], cpi->partition_sz[i+1]); + memmove(dp, cpi->partition_d[i+1], cpi->partition_sz[i+1]); cpi->partition_d[i+1] = dp; dp += cpi->partition_sz[i+1]; } diff --git a/media/libvpx/vp8/encoder/dct.c b/media/libvpx/vp8/encoder/dct.c index 091554a5d50..0c7198d5d3a 100644 --- a/media/libvpx/vp8/encoder/dct.c +++ b/media/libvpx/vp8/encoder/dct.c @@ -11,6 +11,8 @@ #include +#include "./vp8_rtcd.h" + void vp8_short_fdct4x4_c(short *input, short *output, int pitch) { int i; diff --git a/media/libvpx/vp8/encoder/denoising.c b/media/libvpx/vp8/encoder/denoising.c index b9fbf061ee9..d197f8f8166 100644 --- a/media/libvpx/vp8/encoder/denoising.c +++ b/media/libvpx/vp8/encoder/denoising.c @@ -415,8 +415,8 @@ int vp8_denoiser_allocate(VP8_DENOISER *denoiser, int width, int height, vp8_denoiser_free(denoiser); return 1; } - vpx_memset(denoiser->yv12_running_avg[i].buffer_alloc, 0, - denoiser->yv12_running_avg[i].frame_size); + memset(denoiser->yv12_running_avg[i].buffer_alloc, 0, + denoiser->yv12_running_avg[i].frame_size); } denoiser->yv12_mc_running_avg.flags = 0; @@ -428,19 +428,19 @@ int vp8_denoiser_allocate(VP8_DENOISER *denoiser, int width, int height, return 1; } - vpx_memset(denoiser->yv12_mc_running_avg.buffer_alloc, 0, - denoiser->yv12_mc_running_avg.frame_size); + memset(denoiser->yv12_mc_running_avg.buffer_alloc, 0, + denoiser->yv12_mc_running_avg.frame_size); if (vp8_yv12_alloc_frame_buffer(&denoiser->yv12_last_source, width, height, VP8BORDERINPIXELS) < 0) { vp8_denoiser_free(denoiser); return 1; } - vpx_memset(denoiser->yv12_last_source.buffer_alloc, 0, - denoiser->yv12_last_source.frame_size); + memset(denoiser->yv12_last_source.buffer_alloc, 0, + denoiser->yv12_last_source.frame_size); denoiser->denoise_state = vpx_calloc((num_mb_rows * num_mb_cols), 1); - vpx_memset(denoiser->denoise_state, 0, (num_mb_rows * num_mb_cols)); + memset(denoiser->denoise_state, 0, (num_mb_rows * num_mb_cols)); vp8_denoiser_set_parameters(denoiser, mode); denoiser->nmse_source_diff = 0; denoiser->nmse_source_diff_count = 0; diff --git a/media/libvpx/vp8/encoder/encodeframe.c b/media/libvpx/vp8/encoder/encodeframe.c index 62d5d2d4eb6..d381d8ddf45 100644 --- a/media/libvpx/vp8/encoder/encodeframe.c +++ b/media/libvpx/vp8/encoder/encodeframe.c @@ -11,6 +11,7 @@ #include "vpx_config.h" #include "vp8_rtcd.h" +#include "./vpx_dsp_rtcd.h" #include "encodemb.h" #include "encodemv.h" #include "vp8/common/common.h" @@ -90,7 +91,7 @@ static unsigned int tt_activity_measure( VP8_COMP *cpi, MACROBLOCK *x ) * lambda using a non-linear combination (e.g., the smallest, or second * smallest, etc.). */ - act = vp8_variance16x16(x->src.y_buffer, + act = vpx_variance16x16(x->src.y_buffer, x->src.y_stride, VP8_VAR_OFFS, 0, &sse); act = act<<4; @@ -155,8 +156,8 @@ static void calc_av_activity( VP8_COMP *cpi, int64_t activity_sum ) cpi->common.MBs)); /* Copy map to sort list */ - vpx_memcpy( sortlist, cpi->mb_activity_map, - sizeof(unsigned int) * cpi->common.MBs ); + memcpy( sortlist, cpi->mb_activity_map, + sizeof(unsigned int) * cpi->common.MBs ); /* Ripple each value down to its correct position */ @@ -665,8 +666,7 @@ static void init_encode_frame_mb_context(VP8_COMP *cpi) x->mvc = cm->fc.mvc; - vpx_memset(cm->above_context, 0, - sizeof(ENTROPY_CONTEXT_PLANES) * cm->mb_cols); + memset(cm->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * cm->mb_cols); /* Special case treatment when GF and ARF are not sensible options * for reference @@ -744,7 +744,7 @@ void vp8_encode_frame(VP8_COMP *cpi) const int num_part = (1 << cm->multi_token_partition); #endif - vpx_memset(segment_counts, 0, sizeof(segment_counts)); + memset(segment_counts, 0, sizeof(segment_counts)); totalrate = 0; if (cpi->compressor_speed == 2) @@ -974,7 +974,7 @@ void vp8_encode_frame(VP8_COMP *cpi) int i; /* Set to defaults */ - vpx_memset(xd->mb_segment_tree_probs, 255 , sizeof(xd->mb_segment_tree_probs)); + memset(xd->mb_segment_tree_probs, 255 , sizeof(xd->mb_segment_tree_probs)); tot_count = segment_counts[0] + segment_counts[1] + segment_counts[2] + segment_counts[3]; diff --git a/media/libvpx/vp8/encoder/encodeintra.c b/media/libvpx/vp8/encoder/encodeintra.c index cfa4cb927f6..e2de5eecbc4 100644 --- a/media/libvpx/vp8/encoder/encodeintra.c +++ b/media/libvpx/vp8/encoder/encodeintra.c @@ -11,6 +11,7 @@ #include "vpx_config.h" #include "vp8_rtcd.h" +#include "./vpx_dsp_rtcd.h" #include "quantize.h" #include "vp8/common/reconintra4x4.h" #include "encodemb.h" @@ -44,7 +45,7 @@ int vp8_encode_intra(VP8_COMP *cpi, MACROBLOCK *x, int use_dc_pred) } } - intra_pred_var = vp8_get_mb_ss(x->src_diff); + intra_pred_var = vpx_get_mb_ss(x->src_diff); return intra_pred_var; } diff --git a/media/libvpx/vp8/encoder/encodemb.c b/media/libvpx/vp8/encoder/encodemb.c index eb0619d9597..dfd0a237a5f 100644 --- a/media/libvpx/vp8/encoder/encodemb.c +++ b/media/libvpx/vp8/encoder/encodemb.c @@ -506,8 +506,8 @@ static void optimize_mb(MACROBLOCK *x) ENTROPY_CONTEXT *ta; ENTROPY_CONTEXT *tl; - vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); - vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); + memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); + memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); ta = (ENTROPY_CONTEXT *)&t_above; tl = (ENTROPY_CONTEXT *)&t_left; @@ -555,8 +555,8 @@ void vp8_optimize_mby(MACROBLOCK *x) if (!x->e_mbd.left_context) return; - vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); - vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); + memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); + memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); ta = (ENTROPY_CONTEXT *)&t_above; tl = (ENTROPY_CONTEXT *)&t_left; @@ -595,8 +595,8 @@ void vp8_optimize_mbuv(MACROBLOCK *x) if (!x->e_mbd.left_context) return; - vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); - vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); + memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); + memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); ta = (ENTROPY_CONTEXT *)&t_above; tl = (ENTROPY_CONTEXT *)&t_left; diff --git a/media/libvpx/vp8/encoder/ethreading.c b/media/libvpx/vp8/encoder/ethreading.c index a6b30a6bcaf..4e234ccd58b 100644 --- a/media/libvpx/vp8/encoder/ethreading.c +++ b/media/libvpx/vp8/encoder/ethreading.c @@ -19,8 +19,6 @@ extern void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, int ok_to_skip); -extern void vp8_loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm); - static THREAD_FUNCTION thread_loopfilter(void *p_data) { VP8_COMP *cpi = (VP8_COMP *)(((LPFTHREAD_DATA *)p_data)->ptr1); @@ -416,14 +414,13 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc) zd->subpixel_predict16x16 = xd->subpixel_predict16x16; zd->segmentation_enabled = xd->segmentation_enabled; zd->mb_segement_abs_delta = xd->mb_segement_abs_delta; - vpx_memcpy(zd->segment_feature_data, xd->segment_feature_data, - sizeof(xd->segment_feature_data)); + memcpy(zd->segment_feature_data, xd->segment_feature_data, + sizeof(xd->segment_feature_data)); - vpx_memcpy(zd->dequant_y1_dc, xd->dequant_y1_dc, - sizeof(xd->dequant_y1_dc)); - vpx_memcpy(zd->dequant_y1, xd->dequant_y1, sizeof(xd->dequant_y1)); - vpx_memcpy(zd->dequant_y2, xd->dequant_y2, sizeof(xd->dequant_y2)); - vpx_memcpy(zd->dequant_uv, xd->dequant_uv, sizeof(xd->dequant_uv)); + memcpy(zd->dequant_y1_dc, xd->dequant_y1_dc, sizeof(xd->dequant_y1_dc)); + memcpy(zd->dequant_y1, xd->dequant_y1, sizeof(xd->dequant_y1)); + memcpy(zd->dequant_y2, xd->dequant_y2, sizeof(xd->dequant_y2)); + memcpy(zd->dequant_uv, xd->dequant_uv, sizeof(xd->dequant_uv)); #if 1 /*TODO: Remove dequant from BLOCKD. This is a temporary solution until @@ -438,15 +435,14 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc) #endif - vpx_memcpy(z->rd_threshes, x->rd_threshes, sizeof(x->rd_threshes)); - vpx_memcpy(z->rd_thresh_mult, x->rd_thresh_mult, - sizeof(x->rd_thresh_mult)); + memcpy(z->rd_threshes, x->rd_threshes, sizeof(x->rd_threshes)); + memcpy(z->rd_thresh_mult, x->rd_thresh_mult, sizeof(x->rd_thresh_mult)); z->zbin_over_quant = x->zbin_over_quant; z->zbin_mode_boost_enabled = x->zbin_mode_boost_enabled; z->zbin_mode_boost = x->zbin_mode_boost; - vpx_memset(z->error_bins, 0, sizeof(z->error_bins)); + memset(z->error_bins, 0, sizeof(z->error_bins)); } } @@ -472,7 +468,7 @@ void vp8cx_init_mbrthread_data(VP8_COMP *cpi, mbd->subpixel_predict16x16 = xd->subpixel_predict16x16; mb->gf_active_ptr = x->gf_active_ptr; - vpx_memset(mbr_ei[i].segment_counts, 0, sizeof(mbr_ei[i].segment_counts)); + memset(mbr_ei[i].segment_counts, 0, sizeof(mbr_ei[i].segment_counts)); mbr_ei[i].totalrate = 0; mb->partition_info = x->pi + x->e_mbd.mode_info_stride * (i + 1); @@ -547,7 +543,7 @@ int vp8cx_create_encoder_threads(VP8_COMP *cpi) vpx_malloc(sizeof(sem_t) * th_count)); CHECK_MEM_ERROR(cpi->mb_row_ei, vpx_memalign(32, sizeof(MB_ROW_COMP) * th_count)); - vpx_memset(cpi->mb_row_ei, 0, sizeof(MB_ROW_COMP) * th_count); + memset(cpi->mb_row_ei, 0, sizeof(MB_ROW_COMP) * th_count); CHECK_MEM_ERROR(cpi->en_thread_data, vpx_malloc(sizeof(ENCODETHREAD_DATA) * th_count)); diff --git a/media/libvpx/vp8/encoder/firstpass.c b/media/libvpx/vp8/encoder/firstpass.c index 85767ef0d2a..3deb4abb337 100644 --- a/media/libvpx/vp8/encoder/firstpass.c +++ b/media/libvpx/vp8/encoder/firstpass.c @@ -12,6 +12,7 @@ #include #include +#include "./vpx_dsp_rtcd.h" #include "./vpx_scale_rtcd.h" #include "block.h" #include "onyx_int.h" @@ -34,8 +35,6 @@ /* #define OUTPUT_FPF 1 */ extern void vp8cx_frame_init_quantizer(VP8_COMP *cpi); -extern void vp8_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv); -extern void vp8_alloc_compressor_data(VP8_COMP *cpi); #define GFQ_ADJUSTMENT vp8_gf_boost_qadjustment[Q] extern int vp8_kf_boost_qadjustment[QINDEX_RANGE]; @@ -424,14 +423,14 @@ static void zz_motion_search( VP8_COMP *cpi, MACROBLOCK * x, /* Set up pointers for this macro block raw buffer */ raw_ptr = (unsigned char *)(raw_buffer->y_buffer + recon_yoffset + d->offset); - vp8_mse16x16 ( src_ptr, src_stride, raw_ptr, raw_stride, - (unsigned int *)(raw_motion_err)); + vpx_mse16x16(src_ptr, src_stride, raw_ptr, raw_stride, + (unsigned int *)(raw_motion_err)); /* Set up pointers for this macro block recon buffer */ xd->pre.y_buffer = recon_buffer->y_buffer + recon_yoffset; ref_ptr = (unsigned char *)(xd->pre.y_buffer + d->offset ); - vp8_mse16x16 ( src_ptr, src_stride, ref_ptr, ref_stride, - (unsigned int *)(best_motion_err)); + vpx_mse16x16(src_ptr, src_stride, ref_ptr, ref_stride, + (unsigned int *)(best_motion_err)); } static void first_pass_motion_search(VP8_COMP *cpi, MACROBLOCK *x, @@ -455,7 +454,7 @@ static void first_pass_motion_search(VP8_COMP *cpi, MACROBLOCK *x, int new_mv_mode_penalty = 256; /* override the default variance function to use MSE */ - v_fn_ptr.vf = vp8_mse16x16; + v_fn_ptr.vf = vpx_mse16x16; /* Set up pointers for this macro block recon buffer */ xd->pre.y_buffer = recon_buffer->y_buffer + recon_yoffset; @@ -573,7 +572,7 @@ void vp8_first_pass(VP8_COMP *cpi) { int flag[2] = {1, 1}; vp8_initialize_rd_consts(cpi, x, vp8_dc_quant(cm->base_qindex, cm->y1dc_delta_q)); - vpx_memcpy(cm->fc.mvc, vp8_default_mv_context, sizeof(vp8_default_mv_context)); + memcpy(cm->fc.mvc, vp8_default_mv_context, sizeof(vp8_default_mv_context)); vp8_build_component_cost_table(cpi->mb.mvcost, (const MV_CONTEXT *) cm->fc.mvc, flag); } @@ -1329,8 +1328,6 @@ static int estimate_kf_group_q(VP8_COMP *cpi, double section_err, int section_ta return Q; } -extern void vp8_new_framerate(VP8_COMP *cpi, double framerate); - void vp8_init_second_pass(VP8_COMP *cpi) { FIRSTPASS_STATS this_frame; @@ -1779,7 +1776,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) start_pos = cpi->twopass.stats_in; - vpx_memset(&next_frame, 0, sizeof(next_frame)); /* assure clean */ + memset(&next_frame, 0, sizeof(next_frame)); /* assure clean */ /* Load stats for the current frame. */ mod_frame_err = calculate_modified_err(cpi, this_frame); @@ -1875,7 +1872,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) break; } - vpx_memcpy(this_frame, &next_frame, sizeof(*this_frame)); + memcpy(this_frame, &next_frame, sizeof(*this_frame)); old_boost_score = boost_score; } @@ -2445,7 +2442,7 @@ void vp8_second_pass(VP8_COMP *cpi) if (cpi->twopass.frames_to_key == 0) { /* Define next KF group and assign bits to it */ - vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame)); + memcpy(&this_frame_copy, &this_frame, sizeof(this_frame)); find_next_key_frame(cpi, &this_frame_copy); /* Special case: Error error_resilient_mode mode does not make much @@ -2471,7 +2468,7 @@ void vp8_second_pass(VP8_COMP *cpi) if (cpi->frames_till_gf_update_due == 0) { /* Define next gf group and assign bits to it */ - vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame)); + memcpy(&this_frame_copy, &this_frame, sizeof(this_frame)); define_gf_group(cpi, &this_frame_copy); /* If we are going to code an altref frame at the end of the group @@ -2487,7 +2484,7 @@ void vp8_second_pass(VP8_COMP *cpi) * to the GF group */ int bak = cpi->per_frame_bandwidth; - vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame)); + memcpy(&this_frame_copy, &this_frame, sizeof(this_frame)); assign_std_frame_bits(cpi, &this_frame_copy); cpi->per_frame_bandwidth = bak; } @@ -2510,14 +2507,14 @@ void vp8_second_pass(VP8_COMP *cpi) if (cpi->common.frame_type != KEY_FRAME) { /* Assign bits from those allocated to the GF group */ - vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame)); + memcpy(&this_frame_copy, &this_frame, sizeof(this_frame)); assign_std_frame_bits(cpi, &this_frame_copy); } } else { /* Assign bits from those allocated to the GF group */ - vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame)); + memcpy(&this_frame_copy, &this_frame, sizeof(this_frame)); assign_std_frame_bits(cpi, &this_frame_copy); } } @@ -2658,7 +2655,7 @@ static int test_candidate_kf(VP8_COMP *cpi, FIRSTPASS_STATS *last_frame, FIRSTP double decay_accumulator = 1.0; double next_iiratio; - vpx_memcpy(&local_next_frame, next_frame, sizeof(*next_frame)); + memcpy(&local_next_frame, next_frame, sizeof(*next_frame)); /* Note the starting file position so we can reset to it */ start_pos = cpi->twopass.stats_in; @@ -2735,7 +2732,7 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) double kf_group_coded_err = 0.0; double recent_loop_decay[8] = {1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0}; - vpx_memset(&next_frame, 0, sizeof(next_frame)); + memset(&next_frame, 0, sizeof(next_frame)); vp8_clear_system_state(); start_position = cpi->twopass.stats_in; @@ -2756,7 +2753,7 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) cpi->twopass.frames_to_key = 1; /* Take a copy of the initial frame details */ - vpx_memcpy(&first_frame, this_frame, sizeof(*this_frame)); + memcpy(&first_frame, this_frame, sizeof(*this_frame)); cpi->twopass.kf_group_bits = 0; cpi->twopass.kf_group_error_left = 0; @@ -2779,7 +2776,7 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) kf_group_coded_err += this_frame->coded_error; /* Load the next frame's stats. */ - vpx_memcpy(&last_frame, this_frame, sizeof(*this_frame)); + memcpy(&last_frame, this_frame, sizeof(*this_frame)); input_stats(cpi, this_frame); /* Provided that we are not at the end of the file... */ @@ -2847,7 +2844,7 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) cpi->twopass.frames_to_key /= 2; /* Copy first frame details */ - vpx_memcpy(&tmp_frame, &first_frame, sizeof(first_frame)); + memcpy(&tmp_frame, &first_frame, sizeof(first_frame)); /* Reset to the start of the group */ reset_fpf_position(cpi, start_position); @@ -2969,7 +2966,6 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) */ decay_accumulator = 1.0; boost_score = 0.0; - loop_decay_rate = 1.00; /* Starting decay rate */ for (i = 0 ; i < cpi->twopass.frames_to_key ; i++) { @@ -3213,7 +3209,7 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) int new_width = cpi->oxcf.Width; int new_height = cpi->oxcf.Height; - int projected_buffer_level = (int)cpi->buffer_level; + int projected_buffer_level; int tmp_q; double projected_bits_perframe; diff --git a/media/libvpx/vp8/encoder/mcomp.c b/media/libvpx/vp8/encoder/mcomp.c index 237c8ebd170..f848e8fb571 100644 --- a/media/libvpx/vp8/encoder/mcomp.c +++ b/media/libvpx/vp8/encoder/mcomp.c @@ -9,6 +9,8 @@ */ +#include "./vp8_rtcd.h" +#include "./vpx_dsp_rtcd.h" #include "onyx_int.h" #include "mcomp.h" #include "vpx_mem/vpx_mem.h" @@ -900,7 +902,7 @@ int vp8_hex_search this_offset = base_offset + (br * (pre_stride)) + bc; this_mv.as_mv.row = br; this_mv.as_mv.col = bc; - bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride, UINT_MAX) + bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride) + mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit); #if CONFIG_MULTI_RES_ENCODING @@ -927,7 +929,7 @@ int vp8_hex_search this_mv.as_mv.row = br + hex[i].row; this_mv.as_mv.col = bc + hex[i].col; this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + this_mv.as_mv.col; - thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad); + thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride); CHECK_BETTER } }else @@ -938,7 +940,7 @@ int vp8_hex_search this_mv.as_mv.col = bc + hex[i].col; CHECK_POINT this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + this_mv.as_mv.col; - thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad); + thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride); CHECK_BETTER } } @@ -964,7 +966,7 @@ int vp8_hex_search this_mv.as_mv.row = br + next_chkpts[k][i].row; this_mv.as_mv.col = bc + next_chkpts[k][i].col; this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col; - thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad); + thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride); CHECK_BETTER } }else @@ -975,7 +977,7 @@ int vp8_hex_search this_mv.as_mv.col = bc + next_chkpts[k][i].col; CHECK_POINT this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col; - thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad); + thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride); CHECK_BETTER } } @@ -1006,7 +1008,7 @@ cal_neighbors: this_mv.as_mv.row = br + neighbors[i].row; this_mv.as_mv.col = bc + neighbors[i].col; this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col; - thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad); + thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride); CHECK_BETTER } }else @@ -1017,7 +1019,7 @@ cal_neighbors: this_mv.as_mv.col = bc + neighbors[i].col; CHECK_POINT this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col; - thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad); + thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride); CHECK_BETTER } } @@ -1101,7 +1103,7 @@ int vp8_diamond_search_sad_c best_address = in_what; /* Check the starting position */ - bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, UINT_MAX) + bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); /* search_param determines the length of the initial step and hence @@ -1126,7 +1128,7 @@ int vp8_diamond_search_sad_c { check_here = ss[i].offset + best_address; - thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad); + thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride); if (thissad < bestsad) { @@ -1225,7 +1227,7 @@ int vp8_diamond_search_sadx4 best_address = in_what; /* Check the starting position */ - bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, UINT_MAX) + bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); /* search_param determines the length of the initial step and hence the @@ -1293,7 +1295,7 @@ int vp8_diamond_search_sadx4 (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max)) { check_here = ss[i].offset + best_address; - thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad); + thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride); if (thissad < bestsad) { @@ -1376,8 +1378,7 @@ int vp8_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, best_mv->as_mv.col = ref_col; /* Baseline value at the centre */ - bestsad = fn_ptr->sdf(what, what_stride, bestaddress, - in_what_stride, UINT_MAX) + bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride) + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); /* Apply further limits to prevent us looking using vectors that @@ -1402,7 +1403,7 @@ int vp8_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, for (c = col_min; c < col_max; c++) { - thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad); + thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride); this_mv.as_mv.col = c; thissad += mvsad_err_cost(&this_mv, &fcenter_mv, @@ -1474,8 +1475,7 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, best_mv->as_mv.col = ref_col; /* Baseline value at the centre */ - bestsad = fn_ptr->sdf(what, what_stride, bestaddress, - in_what_stride, UINT_MAX) + bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride) + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); /* Apply further limits to prevent us looking using vectors that stretch @@ -1531,7 +1531,7 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, while (c < col_max) { - thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad); + thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride); if (thissad < bestsad) { @@ -1590,7 +1590,8 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, int col_min = ref_col - distance; int col_max = ref_col + distance; - DECLARE_ALIGNED_ARRAY(16, unsigned short, sad_array8, 8); + // TODO(johannkoenig): check if this alignment is necessary. + DECLARE_ALIGNED(16, unsigned int, sad_array8[8]); unsigned int sad_array[3]; int *mvsadcost[2]; @@ -1609,8 +1610,7 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, best_mv->as_mv.col = ref_col; /* Baseline value at the centre */ - bestsad = fn_ptr->sdf(what, what_stride, - bestaddress, in_what_stride, UINT_MAX) + bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride) + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); /* Apply further limits to prevent us looking using vectors that stretch @@ -1696,7 +1696,7 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, while (c < col_max) { - thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad); + thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride); if (thissad < bestsad) { @@ -1754,8 +1754,7 @@ int vp8_refining_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; - bestsad = fn_ptr->sdf(what, what_stride, best_address, - in_what_stride, UINT_MAX) + bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride) + mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit); for (i=0; i x->mv_row_min) && (this_row_offset < x->mv_row_max)) { check_here = (neighbors[j].row)*in_what_stride + neighbors[j].col + best_address; - thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad); + thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride); if (thissad < bestsad) { @@ -1834,8 +1833,7 @@ int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d, fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; - bestsad = fn_ptr->sdf(what, what_stride, best_address, - in_what_stride, UINT_MAX) + bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride) + mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit); for (i=0; i x->mv_row_min) && (this_row_offset < x->mv_row_max)) { check_here = (neighbors[j].row)*in_what_stride + neighbors[j].col + best_address; - thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad); + thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride); if (thissad < bestsad) { @@ -1978,8 +1976,8 @@ void print_mode_context(void) #ifdef VP8_ENTROPY_STATS void init_mv_ref_counts() { - vpx_memset(mv_ref_ct, 0, sizeof(mv_ref_ct)); - vpx_memset(mv_mode_cts, 0, sizeof(mv_mode_cts)); + memset(mv_ref_ct, 0, sizeof(mv_ref_ct)); + memset(mv_mode_cts, 0, sizeof(mv_mode_cts)); } void accum_mv_refs(MB_PREDICTION_MODE m, const int ct[4]) diff --git a/media/libvpx/vp8/encoder/modecosts.c b/media/libvpx/vp8/encoder/modecosts.c index c61563c56f7..ad0e9308dc1 100644 --- a/media/libvpx/vp8/encoder/modecosts.c +++ b/media/libvpx/vp8/encoder/modecosts.c @@ -10,6 +10,7 @@ #include "vp8/common/blockd.h" +#include "modecosts.h" #include "onyx_int.h" #include "treewriter.h" #include "vp8/common/entropymode.h" diff --git a/media/libvpx/vp8/encoder/modecosts.h b/media/libvpx/vp8/encoder/modecosts.h index 9281551c8d5..9871bfffdf9 100644 --- a/media/libvpx/vp8/encoder/modecosts.h +++ b/media/libvpx/vp8/encoder/modecosts.h @@ -16,7 +16,9 @@ extern "C" { #endif -void vp8_init_mode_costs(VP8_COMP *x); +struct VP8_COMP; + +void vp8_init_mode_costs(struct VP8_COMP *x); #ifdef __cplusplus } // extern "C" diff --git a/media/libvpx/vp8/encoder/onyx_if.c b/media/libvpx/vp8/encoder/onyx_if.c index becc07c1a89..40e29e191af 100644 --- a/media/libvpx/vp8/encoder/onyx_if.c +++ b/media/libvpx/vp8/encoder/onyx_if.c @@ -11,6 +11,7 @@ #include "vpx_config.h" #include "./vpx_scale_rtcd.h" +#include "./vpx_dsp_rtcd.h" #include "./vp8_rtcd.h" #include "vp8/common/onyxc_int.h" #include "vp8/common/blockd.h" @@ -428,10 +429,10 @@ static void setup_features(VP8_COMP *cpi) cpi->mb.e_mbd.mode_ref_lf_delta_enabled = 0; cpi->mb.e_mbd.mode_ref_lf_delta_update = 0; - vpx_memset(cpi->mb.e_mbd.ref_lf_deltas, 0, sizeof(cpi->mb.e_mbd.ref_lf_deltas)); - vpx_memset(cpi->mb.e_mbd.mode_lf_deltas, 0, sizeof(cpi->mb.e_mbd.mode_lf_deltas)); - vpx_memset(cpi->mb.e_mbd.last_ref_lf_deltas, 0, sizeof(cpi->mb.e_mbd.ref_lf_deltas)); - vpx_memset(cpi->mb.e_mbd.last_mode_lf_deltas, 0, sizeof(cpi->mb.e_mbd.mode_lf_deltas)); + memset(cpi->mb.e_mbd.ref_lf_deltas, 0, sizeof(cpi->mb.e_mbd.ref_lf_deltas)); + memset(cpi->mb.e_mbd.mode_lf_deltas, 0, sizeof(cpi->mb.e_mbd.mode_lf_deltas)); + memset(cpi->mb.e_mbd.last_ref_lf_deltas, 0, sizeof(cpi->mb.e_mbd.ref_lf_deltas)); + memset(cpi->mb.e_mbd.last_mode_lf_deltas, 0, sizeof(cpi->mb.e_mbd.mode_lf_deltas)); set_default_lf_deltas(cpi); @@ -508,7 +509,7 @@ static void disable_segmentation(VP8_COMP *cpi) static void set_segmentation_map(VP8_COMP *cpi, unsigned char *segmentation_map) { /* Copy in the new segmentation map */ - vpx_memcpy(cpi->segmentation_map, segmentation_map, (cpi->common.mb_rows * cpi->common.mb_cols)); + memcpy(cpi->segmentation_map, segmentation_map, (cpi->common.mb_rows * cpi->common.mb_cols)); /* Signal that the map should be updated. */ cpi->mb.e_mbd.update_mb_segmentation_map = 1; @@ -530,7 +531,7 @@ static void set_segmentation_map(VP8_COMP *cpi, unsigned char *segmentation_map) static void set_segment_data(VP8_COMP *cpi, signed char *feature_data, unsigned char abs_delta) { cpi->mb.e_mbd.mb_segement_abs_delta = abs_delta; - vpx_memcpy(cpi->segment_feature_data, feature_data, sizeof(cpi->segment_feature_data)); + memcpy(cpi->segment_feature_data, feature_data, sizeof(cpi->segment_feature_data)); } @@ -586,7 +587,8 @@ static void cyclic_background_refresh(VP8_COMP *cpi, int Q, int lf_adjustment) // Turn-off under certain conditions (i.e., away from key frame, and if // we are at good quality (low Q) and most of the blocks were skipped-encoded // in previous frame. - if (Q >= 100) { + int qp_thresh = (cpi->oxcf.screen_content_mode == 2) ? 80 : 100; + if (Q >= qp_thresh) { cpi->cyclic_refresh_mode_max_mbs_perframe = (cpi->common.mb_rows * cpi->common.mb_cols) / 10; } else if (cpi->frames_since_key > 250 && @@ -602,7 +604,7 @@ static void cyclic_background_refresh(VP8_COMP *cpi, int Q, int lf_adjustment) // Set every macroblock to be eligible for update. // For key frame this will reset seg map to 0. - vpx_memset(cpi->segmentation_map, 0, mbs_in_frame); + memset(cpi->segmentation_map, 0, mbs_in_frame); if (cpi->common.frame_type != KEY_FRAME && block_count > 0) { @@ -686,8 +688,8 @@ static void set_default_lf_deltas(VP8_COMP *cpi) cpi->mb.e_mbd.mode_ref_lf_delta_enabled = 1; cpi->mb.e_mbd.mode_ref_lf_delta_update = 1; - vpx_memset(cpi->mb.e_mbd.ref_lf_deltas, 0, sizeof(cpi->mb.e_mbd.ref_lf_deltas)); - vpx_memset(cpi->mb.e_mbd.mode_lf_deltas, 0, sizeof(cpi->mb.e_mbd.mode_lf_deltas)); + memset(cpi->mb.e_mbd.ref_lf_deltas, 0, sizeof(cpi->mb.e_mbd.ref_lf_deltas)); + memset(cpi->mb.e_mbd.mode_lf_deltas, 0, sizeof(cpi->mb.e_mbd.mode_lf_deltas)); /* Test of ref frame deltas */ cpi->mb.e_mbd.ref_lf_deltas[INTRA_FRAME] = 2; @@ -1087,7 +1089,7 @@ void vp8_set_speed_features(VP8_COMP *cpi) if (Speed >= 15) sf->half_pixel_search = 0; - vpx_memset(cpi->mb.error_bins, 0, sizeof(cpi->mb.error_bins)); + memset(cpi->mb.error_bins, 0, sizeof(cpi->mb.error_bins)); }; /* switch */ @@ -1298,7 +1300,7 @@ void vp8_alloc_compressor_data(VP8_COMP *cpi) CHECK_MEM_ERROR(cpi->active_map, vpx_calloc(cm->mb_rows * cm->mb_cols, sizeof(*cpi->active_map))); - vpx_memset(cpi->active_map , 1, (cm->mb_rows * cm->mb_cols)); + memset(cpi->active_map , 1, (cm->mb_rows * cm->mb_cols)); #if CONFIG_MULTITHREAD if (width < 640) @@ -1891,7 +1893,7 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf) cm = &cpi->common; - vpx_memset(cpi, 0, sizeof(VP8_COMP)); + memset(cpi, 0, sizeof(VP8_COMP)); if (setjmp(cm->error.jmp)) { @@ -2010,6 +2012,8 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf) cpi->source_alt_ref_active = 0; cpi->common.refresh_alt_ref_frame = 0; + cpi->force_maxqp = 0; + cpi->b_calculate_psnr = CONFIG_INTERNAL_STATS; #if CONFIG_INTERNAL_STATS cpi->b_calculate_ssimg = 0; @@ -2126,55 +2130,55 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf) } #endif - cpi->fn_ptr[BLOCK_16X16].sdf = vp8_sad16x16; - cpi->fn_ptr[BLOCK_16X16].vf = vp8_variance16x16; + cpi->fn_ptr[BLOCK_16X16].sdf = vpx_sad16x16; + cpi->fn_ptr[BLOCK_16X16].vf = vpx_variance16x16; cpi->fn_ptr[BLOCK_16X16].svf = vp8_sub_pixel_variance16x16; cpi->fn_ptr[BLOCK_16X16].svf_halfpix_h = vp8_variance_halfpixvar16x16_h; cpi->fn_ptr[BLOCK_16X16].svf_halfpix_v = vp8_variance_halfpixvar16x16_v; cpi->fn_ptr[BLOCK_16X16].svf_halfpix_hv = vp8_variance_halfpixvar16x16_hv; - cpi->fn_ptr[BLOCK_16X16].sdx3f = vp8_sad16x16x3; - cpi->fn_ptr[BLOCK_16X16].sdx8f = vp8_sad16x16x8; - cpi->fn_ptr[BLOCK_16X16].sdx4df = vp8_sad16x16x4d; + cpi->fn_ptr[BLOCK_16X16].sdx3f = vpx_sad16x16x3; + cpi->fn_ptr[BLOCK_16X16].sdx8f = vpx_sad16x16x8; + cpi->fn_ptr[BLOCK_16X16].sdx4df = vpx_sad16x16x4d; - cpi->fn_ptr[BLOCK_16X8].sdf = vp8_sad16x8; - cpi->fn_ptr[BLOCK_16X8].vf = vp8_variance16x8; + cpi->fn_ptr[BLOCK_16X8].sdf = vpx_sad16x8; + cpi->fn_ptr[BLOCK_16X8].vf = vpx_variance16x8; cpi->fn_ptr[BLOCK_16X8].svf = vp8_sub_pixel_variance16x8; cpi->fn_ptr[BLOCK_16X8].svf_halfpix_h = NULL; cpi->fn_ptr[BLOCK_16X8].svf_halfpix_v = NULL; cpi->fn_ptr[BLOCK_16X8].svf_halfpix_hv = NULL; - cpi->fn_ptr[BLOCK_16X8].sdx3f = vp8_sad16x8x3; - cpi->fn_ptr[BLOCK_16X8].sdx8f = vp8_sad16x8x8; - cpi->fn_ptr[BLOCK_16X8].sdx4df = vp8_sad16x8x4d; + cpi->fn_ptr[BLOCK_16X8].sdx3f = vpx_sad16x8x3; + cpi->fn_ptr[BLOCK_16X8].sdx8f = vpx_sad16x8x8; + cpi->fn_ptr[BLOCK_16X8].sdx4df = vpx_sad16x8x4d; - cpi->fn_ptr[BLOCK_8X16].sdf = vp8_sad8x16; - cpi->fn_ptr[BLOCK_8X16].vf = vp8_variance8x16; + cpi->fn_ptr[BLOCK_8X16].sdf = vpx_sad8x16; + cpi->fn_ptr[BLOCK_8X16].vf = vpx_variance8x16; cpi->fn_ptr[BLOCK_8X16].svf = vp8_sub_pixel_variance8x16; cpi->fn_ptr[BLOCK_8X16].svf_halfpix_h = NULL; cpi->fn_ptr[BLOCK_8X16].svf_halfpix_v = NULL; cpi->fn_ptr[BLOCK_8X16].svf_halfpix_hv = NULL; - cpi->fn_ptr[BLOCK_8X16].sdx3f = vp8_sad8x16x3; - cpi->fn_ptr[BLOCK_8X16].sdx8f = vp8_sad8x16x8; - cpi->fn_ptr[BLOCK_8X16].sdx4df = vp8_sad8x16x4d; + cpi->fn_ptr[BLOCK_8X16].sdx3f = vpx_sad8x16x3; + cpi->fn_ptr[BLOCK_8X16].sdx8f = vpx_sad8x16x8; + cpi->fn_ptr[BLOCK_8X16].sdx4df = vpx_sad8x16x4d; - cpi->fn_ptr[BLOCK_8X8].sdf = vp8_sad8x8; - cpi->fn_ptr[BLOCK_8X8].vf = vp8_variance8x8; + cpi->fn_ptr[BLOCK_8X8].sdf = vpx_sad8x8; + cpi->fn_ptr[BLOCK_8X8].vf = vpx_variance8x8; cpi->fn_ptr[BLOCK_8X8].svf = vp8_sub_pixel_variance8x8; cpi->fn_ptr[BLOCK_8X8].svf_halfpix_h = NULL; cpi->fn_ptr[BLOCK_8X8].svf_halfpix_v = NULL; cpi->fn_ptr[BLOCK_8X8].svf_halfpix_hv = NULL; - cpi->fn_ptr[BLOCK_8X8].sdx3f = vp8_sad8x8x3; - cpi->fn_ptr[BLOCK_8X8].sdx8f = vp8_sad8x8x8; - cpi->fn_ptr[BLOCK_8X8].sdx4df = vp8_sad8x8x4d; + cpi->fn_ptr[BLOCK_8X8].sdx3f = vpx_sad8x8x3; + cpi->fn_ptr[BLOCK_8X8].sdx8f = vpx_sad8x8x8; + cpi->fn_ptr[BLOCK_8X8].sdx4df = vpx_sad8x8x4d; - cpi->fn_ptr[BLOCK_4X4].sdf = vp8_sad4x4; - cpi->fn_ptr[BLOCK_4X4].vf = vp8_variance4x4; + cpi->fn_ptr[BLOCK_4X4].sdf = vpx_sad4x4; + cpi->fn_ptr[BLOCK_4X4].vf = vpx_variance4x4; cpi->fn_ptr[BLOCK_4X4].svf = vp8_sub_pixel_variance4x4; cpi->fn_ptr[BLOCK_4X4].svf_halfpix_h = NULL; cpi->fn_ptr[BLOCK_4X4].svf_halfpix_v = NULL; cpi->fn_ptr[BLOCK_4X4].svf_halfpix_hv = NULL; - cpi->fn_ptr[BLOCK_4X4].sdx3f = vp8_sad4x4x3; - cpi->fn_ptr[BLOCK_4X4].sdx8f = vp8_sad4x4x8; - cpi->fn_ptr[BLOCK_4X4].sdx4df = vp8_sad4x4x4d; + cpi->fn_ptr[BLOCK_4X4].sdx3f = vpx_sad4x4x3; + cpi->fn_ptr[BLOCK_4X4].sdx8f = vpx_sad4x4x8; + cpi->fn_ptr[BLOCK_4X4].sdx4df = vpx_sad4x4x4d; #if ARCH_X86 || ARCH_X86_64 cpi->fn_ptr[BLOCK_16X16].copymem = vp8_copy32xn; @@ -2554,7 +2558,7 @@ static uint64_t calc_plane_error(unsigned char *orig, int orig_stride, { unsigned int sse; - vp8_mse16x16(orig + col, orig_stride, + vpx_mse16x16(orig + col, orig_stride, recon + col, recon_stride, &sse); total_sse += sse; @@ -2867,7 +2871,7 @@ static void update_alt_ref_frame_stats(VP8_COMP *cpi) } /* Update data structure that monitors level of reference to last GF */ - vpx_memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols)); + memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols)); cpi->gf_active_count = cm->mb_rows * cm->mb_cols; /* this frame refreshes means next frames don't unless specified by user */ @@ -2916,7 +2920,7 @@ static void update_golden_frame_stats(VP8_COMP *cpi) } /* Update data structure that monitors level of reference to last GF */ - vpx_memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols)); + memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols)); cpi->gf_active_count = cm->mb_rows * cm->mb_cols; /* this frame refreshes means next frames don't unless specified by @@ -3380,7 +3384,7 @@ static int measure_square_diff_partial(YV12_BUFFER_CONFIG *source, int index = block_index_row + (j >> 4); if (cpi->consec_zero_last[index] >= min_consec_zero_last) { unsigned int sse; - Total += vp8_mse16x16(src + j, + Total += vpx_mse16x16(src + j, source->y_stride, dst + j, dest->y_stride, &sse); @@ -3444,7 +3448,7 @@ static void process_denoiser_mode_change(VP8_COMP *cpi) { int index = block_index_row + (j >> 4); if (cpi->consec_zero_last[index] >= min_consec_zero_last) { unsigned int sse; - const unsigned int var = vp8_variance16x16(src + j, + const unsigned int var = vpx_variance16x16(src + j, ystride, dst + j, ystride, @@ -3454,7 +3458,7 @@ static void process_denoiser_mode_change(VP8_COMP *cpi) { // is small (to avoid effects from lighting change). if ((sse - var) < 128) { unsigned int sse2; - const unsigned int act = vp8_variance16x16(src + j, + const unsigned int act = vpx_variance16x16(src + j, ystride, const_source, 0, @@ -3830,9 +3834,9 @@ static void encode_frame_to_data_rate } // Reset the zero_last counter to 0 on key frame. - vpx_memset(cpi->consec_zero_last, 0, cm->mb_rows * cm->mb_cols); - vpx_memset(cpi->consec_zero_last_mvbias, 0, - (cpi->common.mb_rows * cpi->common.mb_cols)); + memset(cpi->consec_zero_last, 0, cm->mb_rows * cm->mb_cols); + memset(cpi->consec_zero_last_mvbias, 0, + (cpi->common.mb_rows * cpi->common.mb_cols)); } #if 0 @@ -4183,7 +4187,10 @@ static void encode_frame_to_data_rate */ if (cpi->cyclic_refresh_mode_enabled) { - if (cpi->current_layer==0) + // Special case for screen_content_mode with golden frame updates. + int disable_cr_gf = (cpi->oxcf.screen_content_mode == 2 && + cm->refresh_golden_frame); + if (cpi->current_layer == 0 && cpi->force_maxqp == 0 && !disable_cr_gf) cyclic_background_refresh(cpi, Q, 0); else disable_segmentation(cpi); @@ -4362,9 +4369,9 @@ static void encode_frame_to_data_rate disable_segmentation(cpi); } // Reset the zero_last counter to 0 on key frame. - vpx_memset(cpi->consec_zero_last, 0, cm->mb_rows * cm->mb_cols); - vpx_memset(cpi->consec_zero_last_mvbias, 0, - (cpi->common.mb_rows * cpi->common.mb_cols)); + memset(cpi->consec_zero_last, 0, cm->mb_rows * cm->mb_cols); + memset(cpi->consec_zero_last_mvbias, 0, + (cpi->common.mb_rows * cpi->common.mb_cols)); vp8_set_quantizer(cpi, Q); } @@ -4387,7 +4394,7 @@ static void encode_frame_to_data_rate if (cm->refresh_entropy_probs == 0) { /* save a copy for later refresh */ - vpx_memcpy(&cm->lfc, &cm->fc, sizeof(cm->fc)); + memcpy(&cm->lfc, &cm->fc, sizeof(cm->fc)); } vp8_update_coef_context(cpi); @@ -4405,6 +4412,11 @@ static void encode_frame_to_data_rate /* transform / motion compensation build reconstruction frame */ vp8_encode_frame(cpi); + if (cpi->oxcf.screen_content_mode == 2) { + if (vp8_drop_encodedframe_overshoot(cpi, Q)) + return; + } + cpi->projected_frame_size -= vp8_estimate_entropy_savings(cpi); cpi->projected_frame_size = (cpi->projected_frame_size > 0) ? cpi->projected_frame_size : 0; #endif @@ -5613,19 +5625,19 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l if (cm->refresh_entropy_probs == 0) { - vpx_memcpy(&cm->fc, &cm->lfc, sizeof(cm->fc)); + memcpy(&cm->fc, &cm->lfc, sizeof(cm->fc)); } /* Save the contexts separately for alt ref, gold and last. */ /* (TODO jbb -> Optimize this with pointers to avoid extra copies. ) */ if(cm->refresh_alt_ref_frame) - vpx_memcpy(&cpi->lfc_a, &cm->fc, sizeof(cm->fc)); + memcpy(&cpi->lfc_a, &cm->fc, sizeof(cm->fc)); if(cm->refresh_golden_frame) - vpx_memcpy(&cpi->lfc_g, &cm->fc, sizeof(cm->fc)); + memcpy(&cpi->lfc_g, &cm->fc, sizeof(cm->fc)); if(cm->refresh_last_frame) - vpx_memcpy(&cpi->lfc_n, &cm->fc, sizeof(cm->fc)); + memcpy(&cpi->lfc_n, &cm->fc, sizeof(cm->fc)); /* if its a dropped frame honor the requests on subsequent frames */ if (*size > 0) @@ -5934,7 +5946,7 @@ int vp8_set_active_map(VP8_COMP *cpi, unsigned char *map, unsigned int rows, uns { if (map) { - vpx_memcpy(cpi->active_map, map, rows * cols); + memcpy(cpi->active_map, map, rows * cols); cpi->active_map_enabled = 1; } else @@ -5981,7 +5993,8 @@ int vp8_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest) for (j = 0; j < source->y_width; j += 16) { unsigned int sse; - Total += vp8_mse16x16(src + j, source->y_stride, dst + j, dest->y_stride, &sse); + Total += vpx_mse16x16(src + j, source->y_stride, + dst + j, dest->y_stride, &sse); } src += 16 * source->y_stride; diff --git a/media/libvpx/vp8/encoder/onyx_int.h b/media/libvpx/vp8/encoder/onyx_int.h index 82d7453902c..c48e2f4478b 100644 --- a/media/libvpx/vp8/encoder/onyx_int.h +++ b/media/libvpx/vp8/encoder/onyx_int.h @@ -526,6 +526,8 @@ typedef struct VP8_COMP // Measure of average squared difference between source and denoised signal. int mse_source_denoised; + int force_maxqp; + #if CONFIG_MULTITHREAD /* multithread data */ int * mt_current_mb_col; @@ -714,6 +716,11 @@ typedef struct VP8_COMP } rd_costs; } VP8_COMP; +void vp8_alloc_compressor_data(VP8_COMP *cpi); +int vp8_reverse_trans(int x); +void vp8_new_framerate(VP8_COMP *cpi, double framerate); +void vp8_loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm); + void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char *dest_end, unsigned long *size); diff --git a/media/libvpx/vp8/encoder/pickinter.c b/media/libvpx/vp8/encoder/pickinter.c index d02cd30b9a1..053bf119aa9 100644 --- a/media/libvpx/vp8/encoder/pickinter.c +++ b/media/libvpx/vp8/encoder/pickinter.c @@ -11,6 +11,7 @@ #include #include "vpx_config.h" +#include "./vpx_dsp_rtcd.h" #include "onyx_int.h" #include "modecosts.h" #include "encodeintra.h" @@ -29,8 +30,6 @@ #include "denoising.h" #endif -extern int VP8_UVSSE(MACROBLOCK *x); - #ifdef SPEEDSTATS extern unsigned int cnt_pm; #endif @@ -38,8 +37,6 @@ extern unsigned int cnt_pm; extern const int vp8_ref_frame_order[MAX_MODES]; extern const MB_PREDICTION_MODE vp8_mode_order[MAX_MODES]; -extern int vp8_cost_mv_ref(MB_PREDICTION_MODE m, const int near_mv_ref_ct[4]); - // Fixed point implementation of a skin color classifier. Skin color // is model by a Gaussian distribution in the CbCr color space. // See ../../test/skin_color_detector_test.cc where the reference @@ -219,33 +216,6 @@ int vp8_get_inter_mbpred_error(MACROBLOCK *mb, } - -unsigned int vp8_get4x4sse_cs_c -( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride -) -{ - int distortion = 0; - int r, c; - - for (r = 0; r < 4; r++) - { - for (c = 0; c < 4; c++) - { - int diff = src_ptr[c] - ref_ptr[c]; - distortion += diff * diff; - } - - src_ptr += source_stride; - ref_ptr += recon_stride; - } - - return distortion; -} - static int get_prediction_error(BLOCK *be, BLOCKD *b) { unsigned char *sptr; @@ -253,7 +223,7 @@ static int get_prediction_error(BLOCK *be, BLOCKD *b) sptr = (*(be->base_src) + be->src); dptr = b->predictor; - return vp8_get4x4sse_cs(sptr, be->src_stride, dptr, 16); + return vpx_get4x4sse_cs(sptr, be->src_stride, dptr, 16); } @@ -862,8 +832,8 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, mode_mv = mode_mv_sb[sign_bias]; best_ref_mv.as_int = 0; - vpx_memset(mode_mv_sb, 0, sizeof(mode_mv_sb)); - vpx_memset(&best_mbmode, 0, sizeof(best_mbmode)); + memset(mode_mv_sb, 0, sizeof(mode_mv_sb)); + memset(&best_mbmode, 0, sizeof(best_mbmode)); /* Setup search priorities */ #if CONFIG_MULTI_RES_ENCODING @@ -1041,7 +1011,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, else { rate2 += rate; - distortion2 = vp8_variance16x16( + distortion2 = vpx_variance16x16( *(b->base_src), b->src_stride, x->e_mbd.predictor, 16, &sse); this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); @@ -1070,7 +1040,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, xd->dst.y_stride, xd->predictor, 16); - distortion2 = vp8_variance16x16 + distortion2 = vpx_variance16x16 (*(b->base_src), b->src_stride, x->e_mbd.predictor, 16, &sse); rate2 += x->mbmode_cost[x->e_mbd.frame_type][x->e_mbd.mode_info_context->mbmi.mode]; @@ -1348,8 +1318,8 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, *returndistortion = distortion2; best_rd_sse = sse; best_rd = this_rd; - vpx_memcpy(&best_mbmode, &x->e_mbd.mode_info_context->mbmi, - sizeof(MB_MODE_INFO)); + memcpy(&best_mbmode, &x->e_mbd.mode_info_context->mbmi, + sizeof(MB_MODE_INFO)); /* Testing this mode gave rise to an improvement in best error * score. Lower threshold a bit for next time @@ -1487,8 +1457,8 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, if (this_rd < best_rd) { - vpx_memcpy(&best_mbmode, &x->e_mbd.mode_info_context->mbmi, - sizeof(MB_MODE_INFO)); + memcpy(&best_mbmode, &x->e_mbd.mode_info_context->mbmi, + sizeof(MB_MODE_INFO)); } } @@ -1512,8 +1482,8 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, /* set to the best mb mode, this copy can be skip if x->skip since it * already has the right content */ if (!x->skip) - vpx_memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mbmode, - sizeof(MB_MODE_INFO)); + memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mbmode, + sizeof(MB_MODE_INFO)); if (best_mbmode.mode <= B_PRED) { @@ -1551,7 +1521,7 @@ void vp8_pick_intra_mode(MACROBLOCK *x, int *rate_) xd->dst.y_stride, xd->predictor, 16); - distortion = vp8_variance16x16 + distortion = vpx_variance16x16 (*(b->base_src), b->src_stride, xd->predictor, 16, &sse); rate = x->mbmode_cost[xd->frame_type][mode]; this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion); diff --git a/media/libvpx/vp8/encoder/picklpf.c b/media/libvpx/vp8/encoder/picklpf.c index f0c8f28fc96..875b37f6841 100644 --- a/media/libvpx/vp8/encoder/picklpf.c +++ b/media/libvpx/vp8/encoder/picklpf.c @@ -9,6 +9,7 @@ */ +#include "./vpx_dsp_rtcd.h" #include "./vpx_scale_rtcd.h" #include "vp8/common/onyxc_int.h" #include "onyx_int.h" @@ -49,7 +50,7 @@ static void yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc, src_y = src_ybc->y_buffer + yoffset; dst_y = dst_ybc->y_buffer + yoffset; - vpx_memcpy(dst_y, src_y, ystride * linestocopy); + memcpy(dst_y, src_y, ystride * linestocopy); } static int calc_partial_ssl_err(YV12_BUFFER_CONFIG *source, @@ -83,7 +84,7 @@ static int calc_partial_ssl_err(YV12_BUFFER_CONFIG *source, for (j = 0; j < source->y_width; j += 16) { unsigned int sse; - Total += vp8_mse16x16(src + j, source->y_stride, + Total += vpx_mse16x16(src + j, source->y_stride, dst + j, dest->y_stride, &sse); } @@ -142,7 +143,7 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) int min_filter_level = get_min_filter_level(cpi, cm->base_qindex); int max_filter_level = get_max_filter_level(cpi, cm->base_qindex); int filt_val; - int best_filt_val = cm->filter_level; + int best_filt_val; YV12_BUFFER_CONFIG * saved_frame = cm->frame_to_show; /* Replace unfiltered frame buffer with a new one */ @@ -274,8 +275,7 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) int filter_step; int filt_high = 0; - /* Start search at previous frame filter level */ - int filt_mid = cm->filter_level; + int filt_mid; int filt_low = 0; int filt_best; int filt_direction = 0; @@ -287,7 +287,7 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) YV12_BUFFER_CONFIG * saved_frame = cm->frame_to_show; - vpx_memset(ss_err, 0, sizeof(ss_err)); + memset(ss_err, 0, sizeof(ss_err)); /* Replace unfiltered frame buffer with a new one */ cm->frame_to_show = &cpi->pick_lf_lvl_frame; diff --git a/media/libvpx/vp8/encoder/quantize.c b/media/libvpx/vp8/encoder/quantize.c index bd47823dd5f..c5a7bc67039 100644 --- a/media/libvpx/vp8/encoder/quantize.c +++ b/media/libvpx/vp8/encoder/quantize.c @@ -65,8 +65,8 @@ void vp8_regular_quantize_b_c(BLOCK *b, BLOCKD *d) short *dequant_ptr = d->dequant; short zbin_oq_value = b->zbin_extra; - vpx_memset(qcoeff_ptr, 0, 32); - vpx_memset(dqcoeff_ptr, 0, 32); + memset(qcoeff_ptr, 0, 32); + memset(dqcoeff_ptr, 0, 32); eob = -1; diff --git a/media/libvpx/vp8/encoder/ratectrl.c b/media/libvpx/vp8/encoder/ratectrl.c index e30ad9e2838..e8796a1fcfb 100644 --- a/media/libvpx/vp8/encoder/ratectrl.c +++ b/media/libvpx/vp8/encoder/ratectrl.c @@ -296,7 +296,7 @@ void vp8_setup_key_frame(VP8_COMP *cpi) vp8_default_coef_probs(& cpi->common); - vpx_memcpy(cpi->common.fc.mvc, vp8_default_mv_context, sizeof(vp8_default_mv_context)); + memcpy(cpi->common.fc.mvc, vp8_default_mv_context, sizeof(vp8_default_mv_context)); { int flag[2] = {1, 1}; vp8_build_component_cost_table(cpi->mb.mvcost, (const MV_CONTEXT *) cpi->common.fc.mvc, flag); @@ -305,9 +305,9 @@ void vp8_setup_key_frame(VP8_COMP *cpi) /* Make sure we initialize separate contexts for altref,gold, and normal. * TODO shouldn't need 3 different copies of structure to do this! */ - vpx_memcpy(&cpi->lfc_a, &cpi->common.fc, sizeof(cpi->common.fc)); - vpx_memcpy(&cpi->lfc_g, &cpi->common.fc, sizeof(cpi->common.fc)); - vpx_memcpy(&cpi->lfc_n, &cpi->common.fc, sizeof(cpi->common.fc)); + memcpy(&cpi->lfc_a, &cpi->common.fc, sizeof(cpi->common.fc)); + memcpy(&cpi->lfc_g, &cpi->common.fc, sizeof(cpi->common.fc)); + memcpy(&cpi->lfc_n, &cpi->common.fc, sizeof(cpi->common.fc)); cpi->common.filter_level = cpi->common.base_qindex * 3 / 8 ; @@ -1215,6 +1215,11 @@ int vp8_regulate_q(VP8_COMP *cpi, int target_bits_per_frame) { int Q = cpi->active_worst_quality; + if (cpi->force_maxqp == 1) { + cpi->active_worst_quality = cpi->worst_quality; + return cpi->worst_quality; + } + /* Reset Zbin OQ value */ cpi->mb.zbin_over_quant = 0; @@ -1559,3 +1564,46 @@ int vp8_pick_frame_size(VP8_COMP *cpi) } return 1; } +// If this just encoded frame (mcomp/transform/quant, but before loopfilter and +// pack_bitstream) has large overshoot, and was not being encoded close to the +// max QP, then drop this frame and force next frame to be encoded at max QP. +// Condition this on 1 pass CBR with screen content mode and frame dropper off. +// TODO(marpan): Should do this exit condition during the encode_frame +// (i.e., halfway during the encoding of the frame) to save cycles. +int vp8_drop_encodedframe_overshoot(VP8_COMP *cpi, int Q) { + if (cpi->pass == 0 && + cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER && + cpi->drop_frames_allowed == 0 && + cpi->common.frame_type != KEY_FRAME) { + // Note: the "projected_frame_size" from encode_frame() only gives estimate + // of mode/motion vector rate (in non-rd mode): so below we only require + // that projected_frame_size is somewhat greater than per-frame-bandwidth, + // but add additional condition with high threshold on prediction residual. + + // QP threshold: only allow dropping if we are not close to qp_max. + int thresh_qp = 3 * cpi->worst_quality >> 2; + // Rate threshold, in bytes. + int thresh_rate = 2 * (cpi->av_per_frame_bandwidth >> 3); + // Threshold for the average (over all macroblocks) of the pixel-sum + // residual error over 16x16 block. Should add QP dependence on threshold? + int thresh_pred_err_mb = (256 << 4); + int pred_err_mb = (int)(cpi->mb.prediction_error / cpi->common.MBs); + if (Q < thresh_qp && + cpi->projected_frame_size > thresh_rate && + pred_err_mb > thresh_pred_err_mb) { + // Drop this frame: advance frame counters, and set force_maxqp flag. + cpi->common.current_video_frame++; + cpi->frames_since_key++; + // Flag to indicate we will force next frame to be encoded at max QP. + cpi->force_maxqp = 1; + return 1; + } else { + cpi->force_maxqp = 0; + return 0; + } + cpi->force_maxqp = 0; + return 0; + } + cpi->force_maxqp = 0; + return 0; +} diff --git a/media/libvpx/vp8/encoder/ratectrl.h b/media/libvpx/vp8/encoder/ratectrl.h index 829697f391f..703de9ff550 100644 --- a/media/libvpx/vp8/encoder/ratectrl.h +++ b/media/libvpx/vp8/encoder/ratectrl.h @@ -30,6 +30,8 @@ extern void vp8_compute_frame_size_bounds(VP8_COMP *cpi, int *frame_under_shoot_ /* return of 0 means drop frame */ extern int vp8_pick_frame_size(VP8_COMP *cpi); +extern int vp8_drop_encodedframe_overshoot(VP8_COMP *cpi, int Q); + #ifdef __cplusplus } // extern "C" #endif diff --git a/media/libvpx/vp8/encoder/rdopt.c b/media/libvpx/vp8/encoder/rdopt.c index 29da926cebf..17194f0d449 100644 --- a/media/libvpx/vp8/encoder/rdopt.c +++ b/media/libvpx/vp8/encoder/rdopt.c @@ -15,6 +15,7 @@ #include #include "vpx_config.h" #include "vp8_rtcd.h" +#include "./vpx_dsp_rtcd.h" #include "tokenize.h" #include "treewriter.h" #include "onyx_int.h" @@ -507,9 +508,9 @@ int VP8_UVSSE(MACROBLOCK *x) } else { - vp8_variance8x8(uptr, pre_stride, + vpx_variance8x8(uptr, pre_stride, upred_ptr, uv_stride, &sse2); - vp8_variance8x8(vptr, pre_stride, + vpx_variance8x8(vptr, pre_stride, vpred_ptr, uv_stride, &sse1); sse2 += sse1; } @@ -555,8 +556,8 @@ static int vp8_rdcost_mby(MACROBLOCK *mb) ENTROPY_CONTEXT *ta; ENTROPY_CONTEXT *tl; - vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); - vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); + memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); + memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); ta = (ENTROPY_CONTEXT *)&t_above; tl = (ENTROPY_CONTEXT *)&t_left; @@ -650,8 +651,8 @@ static int rd_pick_intra4x4block( * a temp buffer that meets the stride requirements, but we are only * interested in the left 4x4 block * */ - DECLARE_ALIGNED_ARRAY(16, unsigned char, best_predictor, 16*4); - DECLARE_ALIGNED_ARRAY(16, short, best_dqcoeff, 16); + DECLARE_ALIGNED(16, unsigned char, best_predictor[16*4]); + DECLARE_ALIGNED(16, short, best_dqcoeff[16]); int dst_stride = x->e_mbd.dst.y_stride; unsigned char *dst = x->e_mbd.dst.y_buffer + b->offset; @@ -691,7 +692,7 @@ static int rd_pick_intra4x4block( *a = tempa; *l = templ; copy_predictor(best_predictor, b->predictor); - vpx_memcpy(best_dqcoeff, b->dqcoeff, 32); + memcpy(best_dqcoeff, b->dqcoeff, 32); } } b->bmi.as_mode = *best_mode; @@ -715,8 +716,8 @@ static int rd_pick_intra4x4mby_modes(MACROBLOCK *mb, int *Rate, ENTROPY_CONTEXT *tl; const int *bmode_costs; - vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); - vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); + memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); + memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); ta = (ENTROPY_CONTEXT *)&t_above; tl = (ENTROPY_CONTEXT *)&t_left; @@ -820,8 +821,8 @@ static int rd_cost_mbuv(MACROBLOCK *mb) ENTROPY_CONTEXT *ta; ENTROPY_CONTEXT *tl; - vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); - vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); + memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); + memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); ta = (ENTROPY_CONTEXT *)&t_above; tl = (ENTROPY_CONTEXT *)&t_left; @@ -1128,8 +1129,8 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, ENTROPY_CONTEXT *ta_b; ENTROPY_CONTEXT *tl_b; - vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); - vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); + memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); + memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); ta = (ENTROPY_CONTEXT *)&t_above; tl = (ENTROPY_CONTEXT *)&t_left; @@ -1172,8 +1173,8 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, ENTROPY_CONTEXT *ta_s; ENTROPY_CONTEXT *tl_s; - vpx_memcpy(&t_above_s, &t_above, sizeof(ENTROPY_CONTEXT_PLANES)); - vpx_memcpy(&t_left_s, &t_left, sizeof(ENTROPY_CONTEXT_PLANES)); + memcpy(&t_above_s, &t_above, sizeof(ENTROPY_CONTEXT_PLANES)); + memcpy(&t_left_s, &t_left, sizeof(ENTROPY_CONTEXT_PLANES)); ta_s = (ENTROPY_CONTEXT *)&t_above_s; tl_s = (ENTROPY_CONTEXT *)&t_left_s; @@ -1329,14 +1330,14 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, mode_selected = this_mode; best_label_rd = this_rd; - vpx_memcpy(ta_b, ta_s, sizeof(ENTROPY_CONTEXT_PLANES)); - vpx_memcpy(tl_b, tl_s, sizeof(ENTROPY_CONTEXT_PLANES)); + memcpy(ta_b, ta_s, sizeof(ENTROPY_CONTEXT_PLANES)); + memcpy(tl_b, tl_s, sizeof(ENTROPY_CONTEXT_PLANES)); } } /*for each 4x4 mode*/ - vpx_memcpy(ta, ta_b, sizeof(ENTROPY_CONTEXT_PLANES)); - vpx_memcpy(tl, tl_b, sizeof(ENTROPY_CONTEXT_PLANES)); + memcpy(ta, ta_b, sizeof(ENTROPY_CONTEXT_PLANES)); + memcpy(tl, tl_b, sizeof(ENTROPY_CONTEXT_PLANES)); labels2mode(x, labels, i, mode_selected, &mode_mv[mode_selected], bsi->ref_mv, x->mvcost); @@ -1392,7 +1393,7 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, int i; BEST_SEG_INFO bsi; - vpx_memset(&bsi, 0, sizeof(bsi)); + memset(&bsi, 0, sizeof(bsi)); bsi.segment_rd = best_rd; bsi.ref_mv = best_ref_mv; @@ -1661,7 +1662,6 @@ void vp8_mv_pred mv.as_mv.row = mvx[vcnt/2]; mv.as_mv.col = mvy[vcnt/2]; - find = 1; /* sr is set to 0 to allow calling function to decide the search * range. */ @@ -1691,16 +1691,16 @@ void vp8_cal_sad(VP8_COMP *cpi, MACROBLOCKD *xd, MACROBLOCK *x, int recon_yoffse }else if(xd->mb_to_top_edge==0) { /* only has left MB for sad calculation. */ near_sad[0] = near_sad[2] = INT_MAX; - near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - 16,xd->dst.y_stride, UINT_MAX); + near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - 16,xd->dst.y_stride); }else if(xd->mb_to_left_edge ==0) { /* only has left MB for sad calculation. */ near_sad[1] = near_sad[2] = INT_MAX; - near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16,xd->dst.y_stride, UINT_MAX); + near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16,xd->dst.y_stride); }else { - near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16,xd->dst.y_stride, UINT_MAX); - near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - 16,xd->dst.y_stride, UINT_MAX); - near_sad[2] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16 -16,xd->dst.y_stride, UINT_MAX); + near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16,xd->dst.y_stride); + near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - 16,xd->dst.y_stride); + near_sad[2] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16 -16,xd->dst.y_stride); } if(cpi->common.last_frame_type != KEY_FRAME) @@ -1715,14 +1715,14 @@ void vp8_cal_sad(VP8_COMP *cpi, MACROBLOCKD *xd, MACROBLOCK *x, int recon_yoffse if(xd->mb_to_bottom_edge==0) near_sad[7] = INT_MAX; if(near_sad[4] != INT_MAX) - near_sad[4] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer - pre_y_stride *16, pre_y_stride, UINT_MAX); + near_sad[4] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer - pre_y_stride *16, pre_y_stride); if(near_sad[5] != INT_MAX) - near_sad[5] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer - 16, pre_y_stride, UINT_MAX); - near_sad[3] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer, pre_y_stride, UINT_MAX); + near_sad[5] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer - 16, pre_y_stride); + near_sad[3] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer, pre_y_stride); if(near_sad[6] != INT_MAX) - near_sad[6] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer + 16, pre_y_stride, UINT_MAX); + near_sad[6] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer + 16, pre_y_stride); if(near_sad[7] != INT_MAX) - near_sad[7] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer + pre_y_stride *16, pre_y_stride, UINT_MAX); + near_sad[7] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer + pre_y_stride *16, pre_y_stride); } if(cpi->common.last_frame_type != KEY_FRAME) @@ -1784,7 +1784,7 @@ static int evaluate_inter_mode_rd(int mdcounts[4], if(threshold < x->encode_breakout) threshold = x->encode_breakout; - var = vp8_variance16x16 + var = vpx_variance16x16 (*(b->base_src), b->src_stride, x->e_mbd.predictor, 16, &sse); @@ -1926,8 +1926,8 @@ static void update_best_mode(BEST_MODE* best_mode, int this_rd, (rd->distortion2-rd->distortion_uv)); best_mode->rd = this_rd; - vpx_memcpy(&best_mode->mbmode, &x->e_mbd.mode_info_context->mbmi, sizeof(MB_MODE_INFO)); - vpx_memcpy(&best_mode->partition, x->partition_info, sizeof(PARTITION_INFO)); + memcpy(&best_mode->mbmode, &x->e_mbd.mode_info_context->mbmi, sizeof(MB_MODE_INFO)); + memcpy(&best_mode->partition, x->partition_info, sizeof(PARTITION_INFO)); if ((this_mode == B_PRED) || (this_mode == SPLITMV)) { @@ -1989,9 +1989,9 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, best_mode.rd = INT_MAX; best_mode.yrd = INT_MAX; best_mode.intra_rd = INT_MAX; - vpx_memset(mode_mv_sb, 0, sizeof(mode_mv_sb)); - vpx_memset(&best_mode.mbmode, 0, sizeof(best_mode.mbmode)); - vpx_memset(&best_mode.bmodes, 0, sizeof(best_mode.bmodes)); + memset(mode_mv_sb, 0, sizeof(mode_mv_sb)); + memset(&best_mode.mbmode, 0, sizeof(best_mode.mbmode)); + memset(&best_mode.bmodes, 0, sizeof(best_mode.bmodes)); /* Setup search priorities */ get_reference_search_order(cpi, ref_frame_map); @@ -2293,7 +2293,6 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, mode_mv[NEWMV].as_int = d->bmi.mv.as_int; /* Further step/diamond searches as necessary */ - n = 0; further_steps = (cpi->sf.max_step_search_steps - 1) - step_param; n = num00; @@ -2560,8 +2559,6 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, intra_rd_penalty, cpi, x); if (this_rd < best_mode.rd || x->skip) { - /* Note index of best mode so far */ - best_mode_index = mode_index; *returnrate = rd.rate2; *returndistortion = rd.distortion2; update_best_mode(&best_mode, this_rd, &rd, other_cost, x); @@ -2586,7 +2583,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, /* macroblock modes */ - vpx_memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mode.mbmode, sizeof(MB_MODE_INFO)); + memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mode.mbmode, sizeof(MB_MODE_INFO)); if (best_mode.mbmode.mode == B_PRED) { @@ -2599,7 +2596,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, for (i = 0; i < 16; i++) xd->mode_info_context->bmi[i].mv.as_int = best_mode.bmodes[i].mv.as_int; - vpx_memcpy(x->partition_info, &best_mode.partition, sizeof(PARTITION_INFO)); + memcpy(x->partition_info, &best_mode.partition, sizeof(PARTITION_INFO)); x->e_mbd.mode_info_context->mbmi.mv.as_int = x->partition_info->bmi[15].mv.as_int; diff --git a/media/libvpx/vp8/encoder/rdopt.h b/media/libvpx/vp8/encoder/rdopt.h index e0da35e203c..b4fcd10b61e 100644 --- a/media/libvpx/vp8/encoder/rdopt.h +++ b/media/libvpx/vp8/encoder/rdopt.h @@ -136,6 +136,9 @@ extern void vp8_mv_pred int near_sadidx[] ); void vp8_cal_sad(VP8_COMP *cpi, MACROBLOCKD *xd, MACROBLOCK *x, int recon_yoffset, int near_sadidx[]); +int VP8_UVSSE(MACROBLOCK *x); +int vp8_cost_mv_ref(MB_PREDICTION_MODE m, const int near_mv_ref_ct[4]); +void vp8_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv); #ifdef __cplusplus } // extern "C" diff --git a/media/libvpx/vp8/encoder/segmentation.c b/media/libvpx/vp8/encoder/segmentation.c index 37972e219a0..fdd22fceb6e 100644 --- a/media/libvpx/vp8/encoder/segmentation.c +++ b/media/libvpx/vp8/encoder/segmentation.c @@ -23,7 +23,7 @@ void vp8_update_gf_useage_maps(VP8_COMP *cpi, VP8_COMMON *cm, MACROBLOCK *x) if ((cm->frame_type == KEY_FRAME) || (cm->refresh_golden_frame)) { /* Reset Gf useage monitors */ - vpx_memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols)); + memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols)); cpi->gf_active_count = cm->mb_rows * cm->mb_cols; } else diff --git a/media/libvpx/vp8/encoder/temporal_filter.c b/media/libvpx/vp8/encoder/temporal_filter.c index 04501ac9bdf..ba8b0097710 100644 --- a/media/libvpx/vp8/encoder/temporal_filter.c +++ b/media/libvpx/vp8/encoder/temporal_filter.c @@ -238,12 +238,12 @@ static void vp8_temporal_filter_iterate_c int mb_rows = cpi->common.mb_rows; int mb_y_offset = 0; int mb_uv_offset = 0; - DECLARE_ALIGNED_ARRAY(16, unsigned int, accumulator, 16*16 + 8*8 + 8*8); - DECLARE_ALIGNED_ARRAY(16, unsigned short, count, 16*16 + 8*8 + 8*8); + DECLARE_ALIGNED(16, unsigned int, accumulator[16*16 + 8*8 + 8*8]); + DECLARE_ALIGNED(16, unsigned short, count[16*16 + 8*8 + 8*8]); MACROBLOCKD *mbd = &cpi->mb.e_mbd; YV12_BUFFER_CONFIG *f = cpi->frames[alt_ref_index]; unsigned char *dst1, *dst2; - DECLARE_ALIGNED_ARRAY(16, unsigned char, predictor, 16*16 + 8*8 + 8*8); + DECLARE_ALIGNED(16, unsigned char, predictor[16*16 + 8*8 + 8*8]); /* Save input state */ unsigned char *y_buffer = mbd->pre.y_buffer; @@ -274,8 +274,8 @@ static void vp8_temporal_filter_iterate_c int i, j, k; int stride; - vpx_memset(accumulator, 0, 384*sizeof(unsigned int)); - vpx_memset(count, 0, 384*sizeof(unsigned short)); + memset(accumulator, 0, 384*sizeof(unsigned int)); + memset(count, 0, 384*sizeof(unsigned short)); #if ALT_REF_MC_ENABLED cpi->mb.mv_col_min = -((mb_col * 16) + (16 - 5)); @@ -502,7 +502,7 @@ void vp8_temporal_filter_prepare_c start_frame = distance + frames_to_blur_forward; /* Setup frame pointers, NULL indicates frame not included in filter */ - vpx_memset(cpi->frames, 0, max_frames*sizeof(YV12_BUFFER_CONFIG *)); + memset(cpi->frames, 0, max_frames*sizeof(YV12_BUFFER_CONFIG *)); for (frame = 0; frame < frames_to_blur; frame++) { int which_buffer = start_frame - frame; diff --git a/media/libvpx/vp8/encoder/tokenize.c b/media/libvpx/vp8/encoder/tokenize.c index 2dc8205278b..afd46fb2197 100644 --- a/media/libvpx/vp8/encoder/tokenize.c +++ b/media/libvpx/vp8/encoder/tokenize.c @@ -421,7 +421,7 @@ void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t) void init_context_counters(void) { - vpx_memset(context_counters, 0, sizeof(context_counters)); + memset(context_counters, 0, sizeof(context_counters)); } void print_context_counters() @@ -596,13 +596,13 @@ void vp8_fix_contexts(MACROBLOCKD *x) /* Clear entropy contexts for Y2 blocks */ if (x->mode_info_context->mbmi.mode != B_PRED && x->mode_info_context->mbmi.mode != SPLITMV) { - vpx_memset(x->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)); - vpx_memset(x->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)); + memset(x->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)); + memset(x->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)); } else { - vpx_memset(x->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)-1); - vpx_memset(x->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)-1); + memset(x->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)-1); + memset(x->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)-1); } } diff --git a/media/libvpx/vp8/encoder/x86/quantize_sse2.c b/media/libvpx/vp8/encoder/x86/quantize_sse2.c index 291d21992fe..b4e92e04b22 100644 --- a/media/libvpx/vp8/encoder/x86/quantize_sse2.c +++ b/media/libvpx/vp8/encoder/x86/quantize_sse2.c @@ -35,10 +35,10 @@ void vp8_regular_quantize_b_sse2(BLOCK *b, BLOCKD *d) { char eob = 0; - short *zbin_boost_ptr = b->zrun_zbin_boost; + short *zbin_boost_ptr; short *qcoeff_ptr = d->qcoeff; - DECLARE_ALIGNED_ARRAY(16, short, x, 16); - DECLARE_ALIGNED_ARRAY(16, short, y, 16); + DECLARE_ALIGNED(16, short, x[16]); + DECLARE_ALIGNED(16, short, y[16]); __m128i sz0, x0, sz1, x1, y0, y1, x_minus_zbin0, x_minus_zbin1; __m128i quant_shift0 = _mm_load_si128((__m128i *)(b->quant_shift)); @@ -55,7 +55,7 @@ void vp8_regular_quantize_b_sse2(BLOCK *b, BLOCKD *d) __m128i dequant0 = _mm_load_si128((__m128i *)(d->dequant)); __m128i dequant1 = _mm_load_si128((__m128i *)(d->dequant + 8)); - vpx_memset(qcoeff_ptr, 0, 32); + memset(qcoeff_ptr, 0, 32); /* Duplicate to all lanes. */ zbin_extra = _mm_shufflelo_epi16(zbin_extra, 0); diff --git a/media/libvpx/vp8/vp8_cx_iface.c b/media/libvpx/vp8/vp8_cx_iface.c index 6768ffdc3b3..8697377892e 100644 --- a/media/libvpx/vp8/vp8_cx_iface.c +++ b/media/libvpx/vp8/vp8_cx_iface.c @@ -10,7 +10,9 @@ #include "./vpx_config.h" -#include "vp8_rtcd.h" +#include "./vp8_rtcd.h" +#include "./vpx_dsp_rtcd.h" +#include "./vpx_scale_rtcd.h" #include "vpx/vpx_codec.h" #include "vpx/internal/vpx_codec_internal.h" #include "vpx_version.h" @@ -133,7 +135,7 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, RANGE_CHECK(cfg, g_w, 1, 16383); /* 14 bits available */ RANGE_CHECK(cfg, g_h, 1, 16383); /* 14 bits available */ RANGE_CHECK(cfg, g_timebase.den, 1, 1000000000); - RANGE_CHECK(cfg, g_timebase.num, 1, cfg->g_timebase.den); + RANGE_CHECK(cfg, g_timebase.num, 1, 1000000000); RANGE_CHECK_HI(cfg, g_profile, 3); RANGE_CHECK_HI(cfg, rc_max_quantizer, 63); RANGE_CHECK_HI(cfg, rc_min_quantizer, cfg->rc_max_quantizer); @@ -197,7 +199,7 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, RANGE_CHECK_HI(vp8_cfg, arnr_strength, 6); RANGE_CHECK(vp8_cfg, arnr_type, 1, 3); RANGE_CHECK(vp8_cfg, cq_level, 0, 63); - RANGE_CHECK_BOOL(vp8_cfg, screen_content_mode); + RANGE_CHECK_HI(vp8_cfg, screen_content_mode, 2); if (finalize && (cfg->rc_end_usage == VPX_CQ || cfg->rc_end_usage == VPX_Q)) RANGE_CHECK(vp8_cfg, cq_level, cfg->rc_min_quantizer, cfg->rc_max_quantizer); @@ -365,9 +367,9 @@ static vpx_codec_err_t set_vp8e_config(VP8_CONFIG *oxcf, if (oxcf->number_of_layers > 1) { memcpy (oxcf->target_bitrate, cfg.ts_target_bitrate, - sizeof(cfg.ts_target_bitrate)); + sizeof(cfg.ts_target_bitrate)); memcpy (oxcf->rate_decimator, cfg.ts_rate_decimator, - sizeof(cfg.ts_rate_decimator)); + sizeof(cfg.ts_rate_decimator)); memcpy (oxcf->layer_id, cfg.ts_layer_id, sizeof(cfg.ts_layer_id)); } @@ -476,8 +478,6 @@ static vpx_codec_err_t vp8e_set_config(vpx_codec_alg_priv_t *ctx, return res; } -int vp8_reverse_trans(int); - static vpx_codec_err_t get_quantizer(vpx_codec_alg_priv_t *ctx, va_list args) { int *const arg = va_arg(args, int *); @@ -649,6 +649,8 @@ static vpx_codec_err_t vp8e_init(vpx_codec_ctx_t *ctx, vp8_rtcd(); + vpx_dsp_rtcd(); + vpx_scale_rtcd(); if (!ctx->priv) { @@ -858,9 +860,6 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx, { vpx_codec_err_t res = VPX_CODEC_OK; - if (!ctx->cfg.rc_target_bitrate) - return res; - if (!ctx->cfg.rc_target_bitrate) return res; diff --git a/media/libvpx/vp8/vp8_dx_iface.c b/media/libvpx/vp8/vp8_dx_iface.c index 67a0fef64af..72e4770c008 100644 --- a/media/libvpx/vp8/vp8_dx_iface.c +++ b/media/libvpx/vp8/vp8_dx_iface.c @@ -11,7 +11,9 @@ #include #include -#include "vp8_rtcd.h" +#include "./vp8_rtcd.h" +#include "./vpx_dsp_rtcd.h" +#include "./vpx_scale_rtcd.h" #include "vpx/vpx_decoder.h" #include "vpx/vp8dx.h" #include "vpx/internal/vpx_codec_internal.h" @@ -106,6 +108,8 @@ static vpx_codec_err_t vp8_init(vpx_codec_ctx_t *ctx, (void) data; vp8_rtcd(); + vpx_dsp_rtcd(); + vpx_scale_rtcd(); /* This function only allocates space for the vpx_codec_alg_priv_t * structure. More memory may be required at the time the stream @@ -286,8 +290,8 @@ update_fragments(vpx_codec_alg_priv_t *ctx, if (ctx->fragments.count == 0) { /* New frame, reset fragment pointers and sizes */ - vpx_memset((void*)ctx->fragments.ptrs, 0, sizeof(ctx->fragments.ptrs)); - vpx_memset(ctx->fragments.sizes, 0, sizeof(ctx->fragments.sizes)); + memset((void*)ctx->fragments.ptrs, 0, sizeof(ctx->fragments.ptrs)); + memset(ctx->fragments.sizes, 0, sizeof(ctx->fragments.sizes)); } if (ctx->fragments.enabled && !(data == NULL && data_sz == 0)) { diff --git a/media/libvpx/vp8_rtcd_armv7-android-gcc.h b/media/libvpx/vp8_rtcd_armv7-android-gcc.h index 41f749b00d4..39aa0f3d02d 100644 --- a/media/libvpx/vp8_rtcd_armv7-android-gcc.h +++ b/media/libvpx/vp8_rtcd_armv7-android-gcc.h @@ -33,8 +33,7 @@ RTCD_EXTERN void (*vp8_bilinear_predict16x16)(unsigned char *src, int src_pitch, void vp8_bilinear_predict4x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); void vp8_bilinear_predict4x4_armv6(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -void vp8_bilinear_predict4x4_neon(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -RTCD_EXTERN void (*vp8_bilinear_predict4x4)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); +#define vp8_bilinear_predict4x4 vp8_bilinear_predict4x4_armv6 void vp8_bilinear_predict8x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); void vp8_bilinear_predict8x4_armv6(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); @@ -118,13 +117,6 @@ RTCD_EXTERN void (*vp8_fast_quantize_b)(struct block *, struct blockd *); int vp8_full_search_sad_c(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv); #define vp8_full_search_sad vp8_full_search_sad_c -unsigned int vp8_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride); -unsigned int vp8_get4x4sse_cs_neon(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride); -RTCD_EXTERN unsigned int (*vp8_get4x4sse_cs)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride); - -unsigned int vp8_get_mb_ss_c(const short *); -#define vp8_get_mb_ss vp8_get_mb_ss_c - void vp8_intra4x4_predict_c(unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left); void vp8_intra4x4_predict_armv6(unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left); #define vp8_intra4x4_predict vp8_intra4x4_predict_armv6 @@ -175,83 +167,12 @@ int vp8_mbblock_error_c(struct macroblock *mb, int dc); int vp8_mbuverror_c(struct macroblock *mb); #define vp8_mbuverror vp8_mbuverror_c -unsigned int vp8_mse16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_mse16x16_armv6(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_mse16x16_neon(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp8_mse16x16)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); - int vp8_refining_search_sad_c(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv); #define vp8_refining_search_sad vp8_refining_search_sad_c void vp8_regular_quantize_b_c(struct block *, struct blockd *); #define vp8_regular_quantize_b vp8_regular_quantize_b_c -unsigned int vp8_sad16x16_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad16x16_armv6(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad16x16_neon(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -RTCD_EXTERN unsigned int (*vp8_sad16x16)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); - -void vp8_sad16x16x3_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -#define vp8_sad16x16x3 vp8_sad16x16x3_c - -void vp8_sad16x16x4d_c(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp8_sad16x16x4d vp8_sad16x16x4d_c - -void vp8_sad16x16x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -#define vp8_sad16x16x8 vp8_sad16x16x8_c - -unsigned int vp8_sad16x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad16x8_neon(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -RTCD_EXTERN unsigned int (*vp8_sad16x8)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); - -void vp8_sad16x8x3_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -#define vp8_sad16x8x3 vp8_sad16x8x3_c - -void vp8_sad16x8x4d_c(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp8_sad16x8x4d vp8_sad16x8x4d_c - -void vp8_sad16x8x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -#define vp8_sad16x8x8 vp8_sad16x8x8_c - -unsigned int vp8_sad4x4_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad4x4_neon(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -RTCD_EXTERN unsigned int (*vp8_sad4x4)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); - -void vp8_sad4x4x3_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -#define vp8_sad4x4x3 vp8_sad4x4x3_c - -void vp8_sad4x4x4d_c(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp8_sad4x4x4d vp8_sad4x4x4d_c - -void vp8_sad4x4x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -#define vp8_sad4x4x8 vp8_sad4x4x8_c - -unsigned int vp8_sad8x16_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad8x16_neon(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -RTCD_EXTERN unsigned int (*vp8_sad8x16)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); - -void vp8_sad8x16x3_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -#define vp8_sad8x16x3 vp8_sad8x16x3_c - -void vp8_sad8x16x4d_c(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp8_sad8x16x4d vp8_sad8x16x4d_c - -void vp8_sad8x16x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -#define vp8_sad8x16x8 vp8_sad8x16x8_c - -unsigned int vp8_sad8x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad8x8_neon(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -RTCD_EXTERN unsigned int (*vp8_sad8x8)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); - -void vp8_sad8x8x3_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -#define vp8_sad8x8x3 vp8_sad8x8x3_c - -void vp8_sad8x8x4d_c(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp8_sad8x8x4d vp8_sad8x8x4d_c - -void vp8_sad8x8x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -#define vp8_sad8x8x8 vp8_sad8x8x8_c - void vp8_short_fdct4x4_c(short *input, short *output, int pitch); void vp8_short_fdct4x4_armv6(short *input, short *output, int pitch); void vp8_short_fdct4x4_neon(short *input, short *output, int pitch); @@ -299,9 +220,6 @@ void vp8_sixtap_predict8x8_armv6(unsigned char *src, int src_pitch, int xofst, i void vp8_sixtap_predict8x8_neon(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); RTCD_EXTERN void (*vp8_sixtap_predict8x8)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -unsigned int vp8_sub_pixel_mse16x16_c(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse); -#define vp8_sub_pixel_mse16x16 vp8_sub_pixel_mse16x16_c - unsigned int vp8_sub_pixel_variance16x16_c(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse); unsigned int vp8_sub_pixel_variance16x16_armv6(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse); unsigned int vp8_sub_pixel_variance16x16_neon(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse); @@ -318,8 +236,7 @@ unsigned int vp8_sub_pixel_variance8x16_c(const unsigned char *src_ptr, int so unsigned int vp8_sub_pixel_variance8x8_c(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse); unsigned int vp8_sub_pixel_variance8x8_armv6(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse); -unsigned int vp8_sub_pixel_variance8x8_neon(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp8_sub_pixel_variance8x8)(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse); +#define vp8_sub_pixel_variance8x8 vp8_sub_pixel_variance8x8_armv6 void vp8_subtract_b_c(struct block *be, struct blockd *bd, int pitch); void vp8_subtract_b_neon(struct block *be, struct blockd *bd, int pitch); @@ -333,27 +250,6 @@ void vp8_subtract_mby_c(short *diff, unsigned char *src, int src_stride, unsigne void vp8_subtract_mby_neon(short *diff, unsigned char *src, int src_stride, unsigned char *pred, int pred_stride); RTCD_EXTERN void (*vp8_subtract_mby)(short *diff, unsigned char *src, int src_stride, unsigned char *pred, int pred_stride); -unsigned int vp8_variance16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance16x16_armv6(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance16x16_neon(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp8_variance16x16)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp8_variance16x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance16x8_neon(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp8_variance16x8)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp8_variance4x4_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -#define vp8_variance4x4 vp8_variance4x4_c - -unsigned int vp8_variance8x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance8x16_neon(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp8_variance8x16)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp8_variance8x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance8x8_armv6(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance8x8_neon(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp8_variance8x8)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); - unsigned int vp8_variance_halfpixvar16x16_h_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vp8_variance_halfpixvar16x16_h_armv6(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vp8_variance_halfpixvar16x16_h_neon(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); @@ -383,8 +279,6 @@ static void setup_rtcd_internal(void) vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_armv6; if (flags & HAS_NEON) vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_neon; - vp8_bilinear_predict4x4 = vp8_bilinear_predict4x4_armv6; - if (flags & HAS_NEON) vp8_bilinear_predict4x4 = vp8_bilinear_predict4x4_neon; vp8_bilinear_predict8x4 = vp8_bilinear_predict8x4_armv6; if (flags & HAS_NEON) vp8_bilinear_predict8x4 = vp8_bilinear_predict8x4_neon; vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_armv6; @@ -415,8 +309,6 @@ static void setup_rtcd_internal(void) if (flags & HAS_NEON) vp8_dequantize_b = vp8_dequantize_b_neon; vp8_fast_quantize_b = vp8_fast_quantize_b_c; if (flags & HAS_NEON) vp8_fast_quantize_b = vp8_fast_quantize_b_neon; - vp8_get4x4sse_cs = vp8_get4x4sse_cs_c; - if (flags & HAS_NEON) vp8_get4x4sse_cs = vp8_get4x4sse_cs_neon; vp8_loop_filter_bh = vp8_loop_filter_bh_armv6; if (flags & HAS_NEON) vp8_loop_filter_bh = vp8_loop_filter_bh_neon; vp8_loop_filter_bv = vp8_loop_filter_bv_armv6; @@ -433,18 +325,6 @@ static void setup_rtcd_internal(void) if (flags & HAS_NEON) vp8_loop_filter_simple_mbh = vp8_loop_filter_mbhs_neon; vp8_loop_filter_simple_mbv = vp8_loop_filter_simple_vertical_edge_armv6; if (flags & HAS_NEON) vp8_loop_filter_simple_mbv = vp8_loop_filter_mbvs_neon; - vp8_mse16x16 = vp8_mse16x16_armv6; - if (flags & HAS_NEON) vp8_mse16x16 = vp8_mse16x16_neon; - vp8_sad16x16 = vp8_sad16x16_armv6; - if (flags & HAS_NEON) vp8_sad16x16 = vp8_sad16x16_neon; - vp8_sad16x8 = vp8_sad16x8_c; - if (flags & HAS_NEON) vp8_sad16x8 = vp8_sad16x8_neon; - vp8_sad4x4 = vp8_sad4x4_c; - if (flags & HAS_NEON) vp8_sad4x4 = vp8_sad4x4_neon; - vp8_sad8x16 = vp8_sad8x16_c; - if (flags & HAS_NEON) vp8_sad8x16 = vp8_sad8x16_neon; - vp8_sad8x8 = vp8_sad8x8_c; - if (flags & HAS_NEON) vp8_sad8x8 = vp8_sad8x8_neon; vp8_short_fdct4x4 = vp8_short_fdct4x4_armv6; if (flags & HAS_NEON) vp8_short_fdct4x4 = vp8_short_fdct4x4_neon; vp8_short_fdct8x4 = vp8_short_fdct8x4_armv6; @@ -463,22 +343,12 @@ static void setup_rtcd_internal(void) if (flags & HAS_NEON) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_neon; vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_armv6; if (flags & HAS_NEON) vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_neon; - vp8_sub_pixel_variance8x8 = vp8_sub_pixel_variance8x8_armv6; - if (flags & HAS_NEON) vp8_sub_pixel_variance8x8 = vp8_sub_pixel_variance8x8_neon; vp8_subtract_b = vp8_subtract_b_c; if (flags & HAS_NEON) vp8_subtract_b = vp8_subtract_b_neon; vp8_subtract_mbuv = vp8_subtract_mbuv_c; if (flags & HAS_NEON) vp8_subtract_mbuv = vp8_subtract_mbuv_neon; vp8_subtract_mby = vp8_subtract_mby_c; if (flags & HAS_NEON) vp8_subtract_mby = vp8_subtract_mby_neon; - vp8_variance16x16 = vp8_variance16x16_armv6; - if (flags & HAS_NEON) vp8_variance16x16 = vp8_variance16x16_neon; - vp8_variance16x8 = vp8_variance16x8_c; - if (flags & HAS_NEON) vp8_variance16x8 = vp8_variance16x8_neon; - vp8_variance8x16 = vp8_variance8x16_c; - if (flags & HAS_NEON) vp8_variance8x16 = vp8_variance8x16_neon; - vp8_variance8x8 = vp8_variance8x8_armv6; - if (flags & HAS_NEON) vp8_variance8x8 = vp8_variance8x8_neon; vp8_variance_halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_armv6; if (flags & HAS_NEON) vp8_variance_halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_neon; vp8_variance_halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_hv_armv6; diff --git a/media/libvpx/vp8_rtcd_generic-gnu.h b/media/libvpx/vp8_rtcd_generic-gnu.h index fbcc3662eb9..a2d8cb36744 100644 --- a/media/libvpx/vp8_rtcd_generic-gnu.h +++ b/media/libvpx/vp8_rtcd_generic-gnu.h @@ -89,12 +89,6 @@ void vp8_fast_quantize_b_c(struct block *, struct blockd *); int vp8_full_search_sad_c(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv); #define vp8_full_search_sad vp8_full_search_sad_c -unsigned int vp8_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride); -#define vp8_get4x4sse_cs vp8_get4x4sse_cs_c - -unsigned int vp8_get_mb_ss_c(const short *); -#define vp8_get_mb_ss vp8_get_mb_ss_c - void vp8_intra4x4_predict_c(unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left); #define vp8_intra4x4_predict vp8_intra4x4_predict_c @@ -128,75 +122,12 @@ int vp8_mbblock_error_c(struct macroblock *mb, int dc); int vp8_mbuverror_c(struct macroblock *mb); #define vp8_mbuverror vp8_mbuverror_c -unsigned int vp8_mse16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -#define vp8_mse16x16 vp8_mse16x16_c - int vp8_refining_search_sad_c(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv); #define vp8_refining_search_sad vp8_refining_search_sad_c void vp8_regular_quantize_b_c(struct block *, struct blockd *); #define vp8_regular_quantize_b vp8_regular_quantize_b_c -unsigned int vp8_sad16x16_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -#define vp8_sad16x16 vp8_sad16x16_c - -void vp8_sad16x16x3_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -#define vp8_sad16x16x3 vp8_sad16x16x3_c - -void vp8_sad16x16x4d_c(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp8_sad16x16x4d vp8_sad16x16x4d_c - -void vp8_sad16x16x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -#define vp8_sad16x16x8 vp8_sad16x16x8_c - -unsigned int vp8_sad16x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -#define vp8_sad16x8 vp8_sad16x8_c - -void vp8_sad16x8x3_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -#define vp8_sad16x8x3 vp8_sad16x8x3_c - -void vp8_sad16x8x4d_c(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp8_sad16x8x4d vp8_sad16x8x4d_c - -void vp8_sad16x8x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -#define vp8_sad16x8x8 vp8_sad16x8x8_c - -unsigned int vp8_sad4x4_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -#define vp8_sad4x4 vp8_sad4x4_c - -void vp8_sad4x4x3_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -#define vp8_sad4x4x3 vp8_sad4x4x3_c - -void vp8_sad4x4x4d_c(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp8_sad4x4x4d vp8_sad4x4x4d_c - -void vp8_sad4x4x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -#define vp8_sad4x4x8 vp8_sad4x4x8_c - -unsigned int vp8_sad8x16_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -#define vp8_sad8x16 vp8_sad8x16_c - -void vp8_sad8x16x3_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -#define vp8_sad8x16x3 vp8_sad8x16x3_c - -void vp8_sad8x16x4d_c(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp8_sad8x16x4d vp8_sad8x16x4d_c - -void vp8_sad8x16x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -#define vp8_sad8x16x8 vp8_sad8x16x8_c - -unsigned int vp8_sad8x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -#define vp8_sad8x8 vp8_sad8x8_c - -void vp8_sad8x8x3_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -#define vp8_sad8x8x3 vp8_sad8x8x3_c - -void vp8_sad8x8x4d_c(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp8_sad8x8x4d vp8_sad8x8x4d_c - -void vp8_sad8x8x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -#define vp8_sad8x8x8 vp8_sad8x8x8_c - void vp8_short_fdct4x4_c(short *input, short *output, int pitch); #define vp8_short_fdct4x4 vp8_short_fdct4x4_c @@ -227,9 +158,6 @@ void vp8_sixtap_predict8x4_c(unsigned char *src, int src_pitch, int xofst, int y void vp8_sixtap_predict8x8_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); #define vp8_sixtap_predict8x8 vp8_sixtap_predict8x8_c -unsigned int vp8_sub_pixel_mse16x16_c(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse); -#define vp8_sub_pixel_mse16x16 vp8_sub_pixel_mse16x16_c - unsigned int vp8_sub_pixel_variance16x16_c(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse); #define vp8_sub_pixel_variance16x16 vp8_sub_pixel_variance16x16_c @@ -257,21 +185,6 @@ void vp8_subtract_mby_c(short *diff, unsigned char *src, int src_stride, unsigne void vp8_temporal_filter_apply_c(unsigned char *frame1, unsigned int stride, unsigned char *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, unsigned short *count); #define vp8_temporal_filter_apply vp8_temporal_filter_apply_c -unsigned int vp8_variance16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -#define vp8_variance16x16 vp8_variance16x16_c - -unsigned int vp8_variance16x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -#define vp8_variance16x8 vp8_variance16x8_c - -unsigned int vp8_variance4x4_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -#define vp8_variance4x4 vp8_variance4x4_c - -unsigned int vp8_variance8x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -#define vp8_variance8x16 vp8_variance8x16_c - -unsigned int vp8_variance8x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -#define vp8_variance8x8 vp8_variance8x8_c - unsigned int vp8_variance_halfpixvar16x16_h_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); #define vp8_variance_halfpixvar16x16_h vp8_variance_halfpixvar16x16_h_c diff --git a/media/libvpx/vp8_rtcd_x86-darwin9-gcc.h b/media/libvpx/vp8_rtcd_x86-darwin9-gcc.h index a13b9e27162..5a0bf372ade 100644 --- a/media/libvpx/vp8_rtcd_x86-darwin9-gcc.h +++ b/media/libvpx/vp8_rtcd_x86-darwin9-gcc.h @@ -74,10 +74,10 @@ void vp8_clear_system_state_c(); void vpx_reset_mmx_state(); RTCD_EXTERN void (*vp8_clear_system_state)(); -void vp8_copy32xn_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n); -void vp8_copy32xn_sse2(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n); -void vp8_copy32xn_sse3(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n); -RTCD_EXTERN void (*vp8_copy32xn)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n); +void vp8_copy32xn_c(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n); +void vp8_copy32xn_sse2(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n); +void vp8_copy32xn_sse3(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n); +RTCD_EXTERN void (*vp8_copy32xn)(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n); void vp8_copy_mem16x16_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); void vp8_copy_mem16x16_mmx(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); @@ -147,15 +147,6 @@ int vp8_full_search_sadx3(struct macroblock *x, struct block *b, struct blockd * int vp8_full_search_sadx8(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv); RTCD_EXTERN int (*vp8_full_search_sad)(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv); -unsigned int vp8_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride); -unsigned int vp8_get4x4sse_cs_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride); -RTCD_EXTERN unsigned int (*vp8_get4x4sse_cs)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride); - -unsigned int vp8_get_mb_ss_c(const short *); -unsigned int vp8_get_mb_ss_mmx(const short *); -unsigned int vp8_get_mb_ss_sse2(const short *); -RTCD_EXTERN unsigned int (*vp8_get_mb_ss)(const short *); - void vp8_intra4x4_predict_c(unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left); #define vp8_intra4x4_predict vp8_intra4x4_predict_c @@ -218,11 +209,6 @@ int vp8_mbuverror_mmx(struct macroblock *mb); int vp8_mbuverror_xmm(struct macroblock *mb); RTCD_EXTERN int (*vp8_mbuverror)(struct macroblock *mb); -unsigned int vp8_mse16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_mse16x16_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_mse16x16_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp8_mse16x16)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); - void vp8_plane_add_noise_c(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch); void vp8_plane_add_noise_mmx(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch); void vp8_plane_add_noise_wmt(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch); @@ -241,94 +227,6 @@ void vp8_regular_quantize_b_sse2(struct block *, struct blockd *); void vp8_regular_quantize_b_sse4_1(struct block *, struct blockd *); RTCD_EXTERN void (*vp8_regular_quantize_b)(struct block *, struct blockd *); -unsigned int vp8_sad16x16_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad16x16_mmx(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad16x16_wmt(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad16x16_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -RTCD_EXTERN unsigned int (*vp8_sad16x16)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); - -void vp8_sad16x16x3_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp8_sad16x16x3_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp8_sad16x16x3_ssse3(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad16x16x3)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp8_sad16x16x4d_c(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp8_sad16x16x4d_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad16x16x4d)(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp8_sad16x16x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -void vp8_sad16x16x8_sse4(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -RTCD_EXTERN void (*vp8_sad16x16x8)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); - -unsigned int vp8_sad16x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad16x8_mmx(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad16x8_wmt(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -RTCD_EXTERN unsigned int (*vp8_sad16x8)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); - -void vp8_sad16x8x3_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp8_sad16x8x3_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp8_sad16x8x3_ssse3(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad16x8x3)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp8_sad16x8x4d_c(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp8_sad16x8x4d_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad16x8x4d)(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp8_sad16x8x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -void vp8_sad16x8x8_sse4(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -RTCD_EXTERN void (*vp8_sad16x8x8)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); - -unsigned int vp8_sad4x4_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad4x4_mmx(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad4x4_wmt(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -RTCD_EXTERN unsigned int (*vp8_sad4x4)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); - -void vp8_sad4x4x3_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp8_sad4x4x3_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad4x4x3)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp8_sad4x4x4d_c(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp8_sad4x4x4d_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad4x4x4d)(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp8_sad4x4x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -void vp8_sad4x4x8_sse4(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -RTCD_EXTERN void (*vp8_sad4x4x8)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); - -unsigned int vp8_sad8x16_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad8x16_mmx(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad8x16_wmt(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -RTCD_EXTERN unsigned int (*vp8_sad8x16)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); - -void vp8_sad8x16x3_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp8_sad8x16x3_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad8x16x3)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp8_sad8x16x4d_c(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp8_sad8x16x4d_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad8x16x4d)(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp8_sad8x16x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -void vp8_sad8x16x8_sse4(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -RTCD_EXTERN void (*vp8_sad8x16x8)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); - -unsigned int vp8_sad8x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad8x8_mmx(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad8x8_wmt(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -RTCD_EXTERN unsigned int (*vp8_sad8x8)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); - -void vp8_sad8x8x3_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp8_sad8x8x3_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad8x8x3)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp8_sad8x8x4d_c(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp8_sad8x8x4d_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad8x8x4d)(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp8_sad8x8x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -void vp8_sad8x8x8_sse4(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -RTCD_EXTERN void (*vp8_sad8x8x8)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); - void vp8_short_fdct4x4_c(short *input, short *output, int pitch); void vp8_short_fdct4x4_mmx(short *input, short *output, int pitch); void vp8_short_fdct4x4_sse2(short *input, short *output, int pitch); @@ -378,11 +276,6 @@ void vp8_sixtap_predict8x8_sse2(unsigned char *src, int src_pitch, int xofst, in void vp8_sixtap_predict8x8_ssse3(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); RTCD_EXTERN void (*vp8_sixtap_predict8x8)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -unsigned int vp8_sub_pixel_mse16x16_c(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse); -unsigned int vp8_sub_pixel_mse16x16_mmx(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse); -unsigned int vp8_sub_pixel_mse16x16_wmt(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp8_sub_pixel_mse16x16)(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse); - unsigned int vp8_sub_pixel_variance16x16_c(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse); unsigned int vp8_sub_pixel_variance16x16_mmx(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse); unsigned int vp8_sub_pixel_variance16x16_wmt(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse); @@ -429,31 +322,6 @@ void vp8_temporal_filter_apply_c(unsigned char *frame1, unsigned int stride, uns void vp8_temporal_filter_apply_sse2(unsigned char *frame1, unsigned int stride, unsigned char *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, unsigned short *count); RTCD_EXTERN void (*vp8_temporal_filter_apply)(unsigned char *frame1, unsigned int stride, unsigned char *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, unsigned short *count); -unsigned int vp8_variance16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance16x16_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance16x16_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp8_variance16x16)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp8_variance16x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance16x8_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance16x8_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp8_variance16x8)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp8_variance4x4_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance4x4_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance4x4_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp8_variance4x4)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp8_variance8x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance8x16_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance8x16_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp8_variance8x16)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp8_variance8x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance8x8_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance8x8_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp8_variance8x8)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); - unsigned int vp8_variance_halfpixvar16x16_h_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vp8_variance_halfpixvar16x16_h_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vp8_variance_halfpixvar16x16_h_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); @@ -540,11 +408,6 @@ static void setup_rtcd_internal(void) vp8_full_search_sad = vp8_full_search_sad_c; if (flags & HAS_SSE3) vp8_full_search_sad = vp8_full_search_sadx3; if (flags & HAS_SSE4_1) vp8_full_search_sad = vp8_full_search_sadx8; - vp8_get4x4sse_cs = vp8_get4x4sse_cs_c; - if (flags & HAS_MMX) vp8_get4x4sse_cs = vp8_get4x4sse_cs_mmx; - vp8_get_mb_ss = vp8_get_mb_ss_c; - if (flags & HAS_MMX) vp8_get_mb_ss = vp8_get_mb_ss_mmx; - if (flags & HAS_SSE2) vp8_get_mb_ss = vp8_get_mb_ss_sse2; vp8_loop_filter_bh = vp8_loop_filter_bh_c; if (flags & HAS_MMX) vp8_loop_filter_bh = vp8_loop_filter_bh_mmx; if (flags & HAS_SSE2) vp8_loop_filter_bh = vp8_loop_filter_bh_sse2; @@ -580,9 +443,6 @@ static void setup_rtcd_internal(void) vp8_mbuverror = vp8_mbuverror_c; if (flags & HAS_MMX) vp8_mbuverror = vp8_mbuverror_mmx; if (flags & HAS_SSE2) vp8_mbuverror = vp8_mbuverror_xmm; - vp8_mse16x16 = vp8_mse16x16_c; - if (flags & HAS_MMX) vp8_mse16x16 = vp8_mse16x16_mmx; - if (flags & HAS_SSE2) vp8_mse16x16 = vp8_mse16x16_wmt; vp8_plane_add_noise = vp8_plane_add_noise_c; if (flags & HAS_MMX) vp8_plane_add_noise = vp8_plane_add_noise_mmx; if (flags & HAS_SSE2) vp8_plane_add_noise = vp8_plane_add_noise_wmt; @@ -593,54 +453,6 @@ static void setup_rtcd_internal(void) vp8_regular_quantize_b = vp8_regular_quantize_b_c; if (flags & HAS_SSE2) vp8_regular_quantize_b = vp8_regular_quantize_b_sse2; if (flags & HAS_SSE4_1) vp8_regular_quantize_b = vp8_regular_quantize_b_sse4_1; - vp8_sad16x16 = vp8_sad16x16_c; - if (flags & HAS_MMX) vp8_sad16x16 = vp8_sad16x16_mmx; - if (flags & HAS_SSE2) vp8_sad16x16 = vp8_sad16x16_wmt; - if (flags & HAS_SSE3) vp8_sad16x16 = vp8_sad16x16_sse3; - vp8_sad16x16x3 = vp8_sad16x16x3_c; - if (flags & HAS_SSE3) vp8_sad16x16x3 = vp8_sad16x16x3_sse3; - if (flags & HAS_SSSE3) vp8_sad16x16x3 = vp8_sad16x16x3_ssse3; - vp8_sad16x16x4d = vp8_sad16x16x4d_c; - if (flags & HAS_SSE3) vp8_sad16x16x4d = vp8_sad16x16x4d_sse3; - vp8_sad16x16x8 = vp8_sad16x16x8_c; - if (flags & HAS_SSE4_1) vp8_sad16x16x8 = vp8_sad16x16x8_sse4; - vp8_sad16x8 = vp8_sad16x8_c; - if (flags & HAS_MMX) vp8_sad16x8 = vp8_sad16x8_mmx; - if (flags & HAS_SSE2) vp8_sad16x8 = vp8_sad16x8_wmt; - vp8_sad16x8x3 = vp8_sad16x8x3_c; - if (flags & HAS_SSE3) vp8_sad16x8x3 = vp8_sad16x8x3_sse3; - if (flags & HAS_SSSE3) vp8_sad16x8x3 = vp8_sad16x8x3_ssse3; - vp8_sad16x8x4d = vp8_sad16x8x4d_c; - if (flags & HAS_SSE3) vp8_sad16x8x4d = vp8_sad16x8x4d_sse3; - vp8_sad16x8x8 = vp8_sad16x8x8_c; - if (flags & HAS_SSE4_1) vp8_sad16x8x8 = vp8_sad16x8x8_sse4; - vp8_sad4x4 = vp8_sad4x4_c; - if (flags & HAS_MMX) vp8_sad4x4 = vp8_sad4x4_mmx; - if (flags & HAS_SSE2) vp8_sad4x4 = vp8_sad4x4_wmt; - vp8_sad4x4x3 = vp8_sad4x4x3_c; - if (flags & HAS_SSE3) vp8_sad4x4x3 = vp8_sad4x4x3_sse3; - vp8_sad4x4x4d = vp8_sad4x4x4d_c; - if (flags & HAS_SSE3) vp8_sad4x4x4d = vp8_sad4x4x4d_sse3; - vp8_sad4x4x8 = vp8_sad4x4x8_c; - if (flags & HAS_SSE4_1) vp8_sad4x4x8 = vp8_sad4x4x8_sse4; - vp8_sad8x16 = vp8_sad8x16_c; - if (flags & HAS_MMX) vp8_sad8x16 = vp8_sad8x16_mmx; - if (flags & HAS_SSE2) vp8_sad8x16 = vp8_sad8x16_wmt; - vp8_sad8x16x3 = vp8_sad8x16x3_c; - if (flags & HAS_SSE3) vp8_sad8x16x3 = vp8_sad8x16x3_sse3; - vp8_sad8x16x4d = vp8_sad8x16x4d_c; - if (flags & HAS_SSE3) vp8_sad8x16x4d = vp8_sad8x16x4d_sse3; - vp8_sad8x16x8 = vp8_sad8x16x8_c; - if (flags & HAS_SSE4_1) vp8_sad8x16x8 = vp8_sad8x16x8_sse4; - vp8_sad8x8 = vp8_sad8x8_c; - if (flags & HAS_MMX) vp8_sad8x8 = vp8_sad8x8_mmx; - if (flags & HAS_SSE2) vp8_sad8x8 = vp8_sad8x8_wmt; - vp8_sad8x8x3 = vp8_sad8x8x3_c; - if (flags & HAS_SSE3) vp8_sad8x8x3 = vp8_sad8x8x3_sse3; - vp8_sad8x8x4d = vp8_sad8x8x4d_c; - if (flags & HAS_SSE3) vp8_sad8x8x4d = vp8_sad8x8x4d_sse3; - vp8_sad8x8x8 = vp8_sad8x8x8_c; - if (flags & HAS_SSE4_1) vp8_sad8x8x8 = vp8_sad8x8x8_sse4; vp8_short_fdct4x4 = vp8_short_fdct4x4_c; if (flags & HAS_MMX) vp8_short_fdct4x4 = vp8_short_fdct4x4_mmx; if (flags & HAS_SSE2) vp8_short_fdct4x4 = vp8_short_fdct4x4_sse2; @@ -669,9 +481,6 @@ static void setup_rtcd_internal(void) if (flags & HAS_MMX) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_mmx; if (flags & HAS_SSE2) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_sse2; if (flags & HAS_SSSE3) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_ssse3; - vp8_sub_pixel_mse16x16 = vp8_sub_pixel_mse16x16_c; - if (flags & HAS_MMX) vp8_sub_pixel_mse16x16 = vp8_sub_pixel_mse16x16_mmx; - if (flags & HAS_SSE2) vp8_sub_pixel_mse16x16 = vp8_sub_pixel_mse16x16_wmt; vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_c; if (flags & HAS_MMX) vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_mmx; if (flags & HAS_SSE2) vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_wmt; @@ -700,21 +509,6 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE2) vp8_subtract_mby = vp8_subtract_mby_sse2; vp8_temporal_filter_apply = vp8_temporal_filter_apply_c; if (flags & HAS_SSE2) vp8_temporal_filter_apply = vp8_temporal_filter_apply_sse2; - vp8_variance16x16 = vp8_variance16x16_c; - if (flags & HAS_MMX) vp8_variance16x16 = vp8_variance16x16_mmx; - if (flags & HAS_SSE2) vp8_variance16x16 = vp8_variance16x16_wmt; - vp8_variance16x8 = vp8_variance16x8_c; - if (flags & HAS_MMX) vp8_variance16x8 = vp8_variance16x8_mmx; - if (flags & HAS_SSE2) vp8_variance16x8 = vp8_variance16x8_wmt; - vp8_variance4x4 = vp8_variance4x4_c; - if (flags & HAS_MMX) vp8_variance4x4 = vp8_variance4x4_mmx; - if (flags & HAS_SSE2) vp8_variance4x4 = vp8_variance4x4_wmt; - vp8_variance8x16 = vp8_variance8x16_c; - if (flags & HAS_MMX) vp8_variance8x16 = vp8_variance8x16_mmx; - if (flags & HAS_SSE2) vp8_variance8x16 = vp8_variance8x16_wmt; - vp8_variance8x8 = vp8_variance8x8_c; - if (flags & HAS_MMX) vp8_variance8x8 = vp8_variance8x8_mmx; - if (flags & HAS_SSE2) vp8_variance8x8 = vp8_variance8x8_wmt; vp8_variance_halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_c; if (flags & HAS_MMX) vp8_variance_halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_mmx; if (flags & HAS_SSE2) vp8_variance_halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_wmt; diff --git a/media/libvpx/vp8_rtcd_x86-linux-gcc.h b/media/libvpx/vp8_rtcd_x86-linux-gcc.h index a13b9e27162..5a0bf372ade 100644 --- a/media/libvpx/vp8_rtcd_x86-linux-gcc.h +++ b/media/libvpx/vp8_rtcd_x86-linux-gcc.h @@ -74,10 +74,10 @@ void vp8_clear_system_state_c(); void vpx_reset_mmx_state(); RTCD_EXTERN void (*vp8_clear_system_state)(); -void vp8_copy32xn_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n); -void vp8_copy32xn_sse2(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n); -void vp8_copy32xn_sse3(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n); -RTCD_EXTERN void (*vp8_copy32xn)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n); +void vp8_copy32xn_c(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n); +void vp8_copy32xn_sse2(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n); +void vp8_copy32xn_sse3(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n); +RTCD_EXTERN void (*vp8_copy32xn)(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n); void vp8_copy_mem16x16_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); void vp8_copy_mem16x16_mmx(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); @@ -147,15 +147,6 @@ int vp8_full_search_sadx3(struct macroblock *x, struct block *b, struct blockd * int vp8_full_search_sadx8(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv); RTCD_EXTERN int (*vp8_full_search_sad)(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv); -unsigned int vp8_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride); -unsigned int vp8_get4x4sse_cs_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride); -RTCD_EXTERN unsigned int (*vp8_get4x4sse_cs)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride); - -unsigned int vp8_get_mb_ss_c(const short *); -unsigned int vp8_get_mb_ss_mmx(const short *); -unsigned int vp8_get_mb_ss_sse2(const short *); -RTCD_EXTERN unsigned int (*vp8_get_mb_ss)(const short *); - void vp8_intra4x4_predict_c(unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left); #define vp8_intra4x4_predict vp8_intra4x4_predict_c @@ -218,11 +209,6 @@ int vp8_mbuverror_mmx(struct macroblock *mb); int vp8_mbuverror_xmm(struct macroblock *mb); RTCD_EXTERN int (*vp8_mbuverror)(struct macroblock *mb); -unsigned int vp8_mse16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_mse16x16_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_mse16x16_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp8_mse16x16)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); - void vp8_plane_add_noise_c(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch); void vp8_plane_add_noise_mmx(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch); void vp8_plane_add_noise_wmt(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch); @@ -241,94 +227,6 @@ void vp8_regular_quantize_b_sse2(struct block *, struct blockd *); void vp8_regular_quantize_b_sse4_1(struct block *, struct blockd *); RTCD_EXTERN void (*vp8_regular_quantize_b)(struct block *, struct blockd *); -unsigned int vp8_sad16x16_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad16x16_mmx(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad16x16_wmt(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad16x16_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -RTCD_EXTERN unsigned int (*vp8_sad16x16)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); - -void vp8_sad16x16x3_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp8_sad16x16x3_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp8_sad16x16x3_ssse3(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad16x16x3)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp8_sad16x16x4d_c(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp8_sad16x16x4d_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad16x16x4d)(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp8_sad16x16x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -void vp8_sad16x16x8_sse4(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -RTCD_EXTERN void (*vp8_sad16x16x8)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); - -unsigned int vp8_sad16x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad16x8_mmx(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad16x8_wmt(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -RTCD_EXTERN unsigned int (*vp8_sad16x8)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); - -void vp8_sad16x8x3_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp8_sad16x8x3_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp8_sad16x8x3_ssse3(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad16x8x3)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp8_sad16x8x4d_c(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp8_sad16x8x4d_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad16x8x4d)(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp8_sad16x8x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -void vp8_sad16x8x8_sse4(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -RTCD_EXTERN void (*vp8_sad16x8x8)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); - -unsigned int vp8_sad4x4_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad4x4_mmx(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad4x4_wmt(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -RTCD_EXTERN unsigned int (*vp8_sad4x4)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); - -void vp8_sad4x4x3_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp8_sad4x4x3_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad4x4x3)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp8_sad4x4x4d_c(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp8_sad4x4x4d_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad4x4x4d)(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp8_sad4x4x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -void vp8_sad4x4x8_sse4(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -RTCD_EXTERN void (*vp8_sad4x4x8)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); - -unsigned int vp8_sad8x16_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad8x16_mmx(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad8x16_wmt(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -RTCD_EXTERN unsigned int (*vp8_sad8x16)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); - -void vp8_sad8x16x3_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp8_sad8x16x3_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad8x16x3)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp8_sad8x16x4d_c(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp8_sad8x16x4d_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad8x16x4d)(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp8_sad8x16x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -void vp8_sad8x16x8_sse4(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -RTCD_EXTERN void (*vp8_sad8x16x8)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); - -unsigned int vp8_sad8x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad8x8_mmx(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad8x8_wmt(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -RTCD_EXTERN unsigned int (*vp8_sad8x8)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); - -void vp8_sad8x8x3_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp8_sad8x8x3_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad8x8x3)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp8_sad8x8x4d_c(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp8_sad8x8x4d_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad8x8x4d)(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp8_sad8x8x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -void vp8_sad8x8x8_sse4(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -RTCD_EXTERN void (*vp8_sad8x8x8)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); - void vp8_short_fdct4x4_c(short *input, short *output, int pitch); void vp8_short_fdct4x4_mmx(short *input, short *output, int pitch); void vp8_short_fdct4x4_sse2(short *input, short *output, int pitch); @@ -378,11 +276,6 @@ void vp8_sixtap_predict8x8_sse2(unsigned char *src, int src_pitch, int xofst, in void vp8_sixtap_predict8x8_ssse3(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); RTCD_EXTERN void (*vp8_sixtap_predict8x8)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -unsigned int vp8_sub_pixel_mse16x16_c(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse); -unsigned int vp8_sub_pixel_mse16x16_mmx(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse); -unsigned int vp8_sub_pixel_mse16x16_wmt(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp8_sub_pixel_mse16x16)(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse); - unsigned int vp8_sub_pixel_variance16x16_c(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse); unsigned int vp8_sub_pixel_variance16x16_mmx(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse); unsigned int vp8_sub_pixel_variance16x16_wmt(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse); @@ -429,31 +322,6 @@ void vp8_temporal_filter_apply_c(unsigned char *frame1, unsigned int stride, uns void vp8_temporal_filter_apply_sse2(unsigned char *frame1, unsigned int stride, unsigned char *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, unsigned short *count); RTCD_EXTERN void (*vp8_temporal_filter_apply)(unsigned char *frame1, unsigned int stride, unsigned char *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, unsigned short *count); -unsigned int vp8_variance16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance16x16_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance16x16_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp8_variance16x16)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp8_variance16x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance16x8_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance16x8_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp8_variance16x8)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp8_variance4x4_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance4x4_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance4x4_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp8_variance4x4)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp8_variance8x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance8x16_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance8x16_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp8_variance8x16)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp8_variance8x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance8x8_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance8x8_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp8_variance8x8)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); - unsigned int vp8_variance_halfpixvar16x16_h_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vp8_variance_halfpixvar16x16_h_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vp8_variance_halfpixvar16x16_h_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); @@ -540,11 +408,6 @@ static void setup_rtcd_internal(void) vp8_full_search_sad = vp8_full_search_sad_c; if (flags & HAS_SSE3) vp8_full_search_sad = vp8_full_search_sadx3; if (flags & HAS_SSE4_1) vp8_full_search_sad = vp8_full_search_sadx8; - vp8_get4x4sse_cs = vp8_get4x4sse_cs_c; - if (flags & HAS_MMX) vp8_get4x4sse_cs = vp8_get4x4sse_cs_mmx; - vp8_get_mb_ss = vp8_get_mb_ss_c; - if (flags & HAS_MMX) vp8_get_mb_ss = vp8_get_mb_ss_mmx; - if (flags & HAS_SSE2) vp8_get_mb_ss = vp8_get_mb_ss_sse2; vp8_loop_filter_bh = vp8_loop_filter_bh_c; if (flags & HAS_MMX) vp8_loop_filter_bh = vp8_loop_filter_bh_mmx; if (flags & HAS_SSE2) vp8_loop_filter_bh = vp8_loop_filter_bh_sse2; @@ -580,9 +443,6 @@ static void setup_rtcd_internal(void) vp8_mbuverror = vp8_mbuverror_c; if (flags & HAS_MMX) vp8_mbuverror = vp8_mbuverror_mmx; if (flags & HAS_SSE2) vp8_mbuverror = vp8_mbuverror_xmm; - vp8_mse16x16 = vp8_mse16x16_c; - if (flags & HAS_MMX) vp8_mse16x16 = vp8_mse16x16_mmx; - if (flags & HAS_SSE2) vp8_mse16x16 = vp8_mse16x16_wmt; vp8_plane_add_noise = vp8_plane_add_noise_c; if (flags & HAS_MMX) vp8_plane_add_noise = vp8_plane_add_noise_mmx; if (flags & HAS_SSE2) vp8_plane_add_noise = vp8_plane_add_noise_wmt; @@ -593,54 +453,6 @@ static void setup_rtcd_internal(void) vp8_regular_quantize_b = vp8_regular_quantize_b_c; if (flags & HAS_SSE2) vp8_regular_quantize_b = vp8_regular_quantize_b_sse2; if (flags & HAS_SSE4_1) vp8_regular_quantize_b = vp8_regular_quantize_b_sse4_1; - vp8_sad16x16 = vp8_sad16x16_c; - if (flags & HAS_MMX) vp8_sad16x16 = vp8_sad16x16_mmx; - if (flags & HAS_SSE2) vp8_sad16x16 = vp8_sad16x16_wmt; - if (flags & HAS_SSE3) vp8_sad16x16 = vp8_sad16x16_sse3; - vp8_sad16x16x3 = vp8_sad16x16x3_c; - if (flags & HAS_SSE3) vp8_sad16x16x3 = vp8_sad16x16x3_sse3; - if (flags & HAS_SSSE3) vp8_sad16x16x3 = vp8_sad16x16x3_ssse3; - vp8_sad16x16x4d = vp8_sad16x16x4d_c; - if (flags & HAS_SSE3) vp8_sad16x16x4d = vp8_sad16x16x4d_sse3; - vp8_sad16x16x8 = vp8_sad16x16x8_c; - if (flags & HAS_SSE4_1) vp8_sad16x16x8 = vp8_sad16x16x8_sse4; - vp8_sad16x8 = vp8_sad16x8_c; - if (flags & HAS_MMX) vp8_sad16x8 = vp8_sad16x8_mmx; - if (flags & HAS_SSE2) vp8_sad16x8 = vp8_sad16x8_wmt; - vp8_sad16x8x3 = vp8_sad16x8x3_c; - if (flags & HAS_SSE3) vp8_sad16x8x3 = vp8_sad16x8x3_sse3; - if (flags & HAS_SSSE3) vp8_sad16x8x3 = vp8_sad16x8x3_ssse3; - vp8_sad16x8x4d = vp8_sad16x8x4d_c; - if (flags & HAS_SSE3) vp8_sad16x8x4d = vp8_sad16x8x4d_sse3; - vp8_sad16x8x8 = vp8_sad16x8x8_c; - if (flags & HAS_SSE4_1) vp8_sad16x8x8 = vp8_sad16x8x8_sse4; - vp8_sad4x4 = vp8_sad4x4_c; - if (flags & HAS_MMX) vp8_sad4x4 = vp8_sad4x4_mmx; - if (flags & HAS_SSE2) vp8_sad4x4 = vp8_sad4x4_wmt; - vp8_sad4x4x3 = vp8_sad4x4x3_c; - if (flags & HAS_SSE3) vp8_sad4x4x3 = vp8_sad4x4x3_sse3; - vp8_sad4x4x4d = vp8_sad4x4x4d_c; - if (flags & HAS_SSE3) vp8_sad4x4x4d = vp8_sad4x4x4d_sse3; - vp8_sad4x4x8 = vp8_sad4x4x8_c; - if (flags & HAS_SSE4_1) vp8_sad4x4x8 = vp8_sad4x4x8_sse4; - vp8_sad8x16 = vp8_sad8x16_c; - if (flags & HAS_MMX) vp8_sad8x16 = vp8_sad8x16_mmx; - if (flags & HAS_SSE2) vp8_sad8x16 = vp8_sad8x16_wmt; - vp8_sad8x16x3 = vp8_sad8x16x3_c; - if (flags & HAS_SSE3) vp8_sad8x16x3 = vp8_sad8x16x3_sse3; - vp8_sad8x16x4d = vp8_sad8x16x4d_c; - if (flags & HAS_SSE3) vp8_sad8x16x4d = vp8_sad8x16x4d_sse3; - vp8_sad8x16x8 = vp8_sad8x16x8_c; - if (flags & HAS_SSE4_1) vp8_sad8x16x8 = vp8_sad8x16x8_sse4; - vp8_sad8x8 = vp8_sad8x8_c; - if (flags & HAS_MMX) vp8_sad8x8 = vp8_sad8x8_mmx; - if (flags & HAS_SSE2) vp8_sad8x8 = vp8_sad8x8_wmt; - vp8_sad8x8x3 = vp8_sad8x8x3_c; - if (flags & HAS_SSE3) vp8_sad8x8x3 = vp8_sad8x8x3_sse3; - vp8_sad8x8x4d = vp8_sad8x8x4d_c; - if (flags & HAS_SSE3) vp8_sad8x8x4d = vp8_sad8x8x4d_sse3; - vp8_sad8x8x8 = vp8_sad8x8x8_c; - if (flags & HAS_SSE4_1) vp8_sad8x8x8 = vp8_sad8x8x8_sse4; vp8_short_fdct4x4 = vp8_short_fdct4x4_c; if (flags & HAS_MMX) vp8_short_fdct4x4 = vp8_short_fdct4x4_mmx; if (flags & HAS_SSE2) vp8_short_fdct4x4 = vp8_short_fdct4x4_sse2; @@ -669,9 +481,6 @@ static void setup_rtcd_internal(void) if (flags & HAS_MMX) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_mmx; if (flags & HAS_SSE2) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_sse2; if (flags & HAS_SSSE3) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_ssse3; - vp8_sub_pixel_mse16x16 = vp8_sub_pixel_mse16x16_c; - if (flags & HAS_MMX) vp8_sub_pixel_mse16x16 = vp8_sub_pixel_mse16x16_mmx; - if (flags & HAS_SSE2) vp8_sub_pixel_mse16x16 = vp8_sub_pixel_mse16x16_wmt; vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_c; if (flags & HAS_MMX) vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_mmx; if (flags & HAS_SSE2) vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_wmt; @@ -700,21 +509,6 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE2) vp8_subtract_mby = vp8_subtract_mby_sse2; vp8_temporal_filter_apply = vp8_temporal_filter_apply_c; if (flags & HAS_SSE2) vp8_temporal_filter_apply = vp8_temporal_filter_apply_sse2; - vp8_variance16x16 = vp8_variance16x16_c; - if (flags & HAS_MMX) vp8_variance16x16 = vp8_variance16x16_mmx; - if (flags & HAS_SSE2) vp8_variance16x16 = vp8_variance16x16_wmt; - vp8_variance16x8 = vp8_variance16x8_c; - if (flags & HAS_MMX) vp8_variance16x8 = vp8_variance16x8_mmx; - if (flags & HAS_SSE2) vp8_variance16x8 = vp8_variance16x8_wmt; - vp8_variance4x4 = vp8_variance4x4_c; - if (flags & HAS_MMX) vp8_variance4x4 = vp8_variance4x4_mmx; - if (flags & HAS_SSE2) vp8_variance4x4 = vp8_variance4x4_wmt; - vp8_variance8x16 = vp8_variance8x16_c; - if (flags & HAS_MMX) vp8_variance8x16 = vp8_variance8x16_mmx; - if (flags & HAS_SSE2) vp8_variance8x16 = vp8_variance8x16_wmt; - vp8_variance8x8 = vp8_variance8x8_c; - if (flags & HAS_MMX) vp8_variance8x8 = vp8_variance8x8_mmx; - if (flags & HAS_SSE2) vp8_variance8x8 = vp8_variance8x8_wmt; vp8_variance_halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_c; if (flags & HAS_MMX) vp8_variance_halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_mmx; if (flags & HAS_SSE2) vp8_variance_halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_wmt; diff --git a/media/libvpx/vp8_rtcd_x86-win32-gcc.h b/media/libvpx/vp8_rtcd_x86-win32-gcc.h index a13b9e27162..5a0bf372ade 100644 --- a/media/libvpx/vp8_rtcd_x86-win32-gcc.h +++ b/media/libvpx/vp8_rtcd_x86-win32-gcc.h @@ -74,10 +74,10 @@ void vp8_clear_system_state_c(); void vpx_reset_mmx_state(); RTCD_EXTERN void (*vp8_clear_system_state)(); -void vp8_copy32xn_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n); -void vp8_copy32xn_sse2(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n); -void vp8_copy32xn_sse3(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n); -RTCD_EXTERN void (*vp8_copy32xn)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n); +void vp8_copy32xn_c(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n); +void vp8_copy32xn_sse2(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n); +void vp8_copy32xn_sse3(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n); +RTCD_EXTERN void (*vp8_copy32xn)(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n); void vp8_copy_mem16x16_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); void vp8_copy_mem16x16_mmx(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); @@ -147,15 +147,6 @@ int vp8_full_search_sadx3(struct macroblock *x, struct block *b, struct blockd * int vp8_full_search_sadx8(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv); RTCD_EXTERN int (*vp8_full_search_sad)(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv); -unsigned int vp8_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride); -unsigned int vp8_get4x4sse_cs_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride); -RTCD_EXTERN unsigned int (*vp8_get4x4sse_cs)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride); - -unsigned int vp8_get_mb_ss_c(const short *); -unsigned int vp8_get_mb_ss_mmx(const short *); -unsigned int vp8_get_mb_ss_sse2(const short *); -RTCD_EXTERN unsigned int (*vp8_get_mb_ss)(const short *); - void vp8_intra4x4_predict_c(unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left); #define vp8_intra4x4_predict vp8_intra4x4_predict_c @@ -218,11 +209,6 @@ int vp8_mbuverror_mmx(struct macroblock *mb); int vp8_mbuverror_xmm(struct macroblock *mb); RTCD_EXTERN int (*vp8_mbuverror)(struct macroblock *mb); -unsigned int vp8_mse16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_mse16x16_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_mse16x16_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp8_mse16x16)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); - void vp8_plane_add_noise_c(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch); void vp8_plane_add_noise_mmx(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch); void vp8_plane_add_noise_wmt(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch); @@ -241,94 +227,6 @@ void vp8_regular_quantize_b_sse2(struct block *, struct blockd *); void vp8_regular_quantize_b_sse4_1(struct block *, struct blockd *); RTCD_EXTERN void (*vp8_regular_quantize_b)(struct block *, struct blockd *); -unsigned int vp8_sad16x16_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad16x16_mmx(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad16x16_wmt(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad16x16_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -RTCD_EXTERN unsigned int (*vp8_sad16x16)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); - -void vp8_sad16x16x3_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp8_sad16x16x3_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp8_sad16x16x3_ssse3(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad16x16x3)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp8_sad16x16x4d_c(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp8_sad16x16x4d_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad16x16x4d)(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp8_sad16x16x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -void vp8_sad16x16x8_sse4(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -RTCD_EXTERN void (*vp8_sad16x16x8)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); - -unsigned int vp8_sad16x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad16x8_mmx(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad16x8_wmt(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -RTCD_EXTERN unsigned int (*vp8_sad16x8)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); - -void vp8_sad16x8x3_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp8_sad16x8x3_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp8_sad16x8x3_ssse3(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad16x8x3)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp8_sad16x8x4d_c(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp8_sad16x8x4d_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad16x8x4d)(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp8_sad16x8x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -void vp8_sad16x8x8_sse4(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -RTCD_EXTERN void (*vp8_sad16x8x8)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); - -unsigned int vp8_sad4x4_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad4x4_mmx(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad4x4_wmt(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -RTCD_EXTERN unsigned int (*vp8_sad4x4)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); - -void vp8_sad4x4x3_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp8_sad4x4x3_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad4x4x3)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp8_sad4x4x4d_c(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp8_sad4x4x4d_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad4x4x4d)(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp8_sad4x4x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -void vp8_sad4x4x8_sse4(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -RTCD_EXTERN void (*vp8_sad4x4x8)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); - -unsigned int vp8_sad8x16_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad8x16_mmx(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad8x16_wmt(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -RTCD_EXTERN unsigned int (*vp8_sad8x16)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); - -void vp8_sad8x16x3_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp8_sad8x16x3_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad8x16x3)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp8_sad8x16x4d_c(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp8_sad8x16x4d_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad8x16x4d)(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp8_sad8x16x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -void vp8_sad8x16x8_sse4(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -RTCD_EXTERN void (*vp8_sad8x16x8)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); - -unsigned int vp8_sad8x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad8x8_mmx(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad8x8_wmt(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -RTCD_EXTERN unsigned int (*vp8_sad8x8)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); - -void vp8_sad8x8x3_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp8_sad8x8x3_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad8x8x3)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp8_sad8x8x4d_c(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp8_sad8x8x4d_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad8x8x4d)(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp8_sad8x8x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -void vp8_sad8x8x8_sse4(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -RTCD_EXTERN void (*vp8_sad8x8x8)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); - void vp8_short_fdct4x4_c(short *input, short *output, int pitch); void vp8_short_fdct4x4_mmx(short *input, short *output, int pitch); void vp8_short_fdct4x4_sse2(short *input, short *output, int pitch); @@ -378,11 +276,6 @@ void vp8_sixtap_predict8x8_sse2(unsigned char *src, int src_pitch, int xofst, in void vp8_sixtap_predict8x8_ssse3(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); RTCD_EXTERN void (*vp8_sixtap_predict8x8)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -unsigned int vp8_sub_pixel_mse16x16_c(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse); -unsigned int vp8_sub_pixel_mse16x16_mmx(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse); -unsigned int vp8_sub_pixel_mse16x16_wmt(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp8_sub_pixel_mse16x16)(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse); - unsigned int vp8_sub_pixel_variance16x16_c(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse); unsigned int vp8_sub_pixel_variance16x16_mmx(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse); unsigned int vp8_sub_pixel_variance16x16_wmt(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse); @@ -429,31 +322,6 @@ void vp8_temporal_filter_apply_c(unsigned char *frame1, unsigned int stride, uns void vp8_temporal_filter_apply_sse2(unsigned char *frame1, unsigned int stride, unsigned char *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, unsigned short *count); RTCD_EXTERN void (*vp8_temporal_filter_apply)(unsigned char *frame1, unsigned int stride, unsigned char *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, unsigned short *count); -unsigned int vp8_variance16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance16x16_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance16x16_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp8_variance16x16)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp8_variance16x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance16x8_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance16x8_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp8_variance16x8)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp8_variance4x4_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance4x4_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance4x4_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp8_variance4x4)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp8_variance8x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance8x16_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance8x16_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp8_variance8x16)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp8_variance8x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance8x8_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance8x8_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp8_variance8x8)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); - unsigned int vp8_variance_halfpixvar16x16_h_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vp8_variance_halfpixvar16x16_h_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vp8_variance_halfpixvar16x16_h_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); @@ -540,11 +408,6 @@ static void setup_rtcd_internal(void) vp8_full_search_sad = vp8_full_search_sad_c; if (flags & HAS_SSE3) vp8_full_search_sad = vp8_full_search_sadx3; if (flags & HAS_SSE4_1) vp8_full_search_sad = vp8_full_search_sadx8; - vp8_get4x4sse_cs = vp8_get4x4sse_cs_c; - if (flags & HAS_MMX) vp8_get4x4sse_cs = vp8_get4x4sse_cs_mmx; - vp8_get_mb_ss = vp8_get_mb_ss_c; - if (flags & HAS_MMX) vp8_get_mb_ss = vp8_get_mb_ss_mmx; - if (flags & HAS_SSE2) vp8_get_mb_ss = vp8_get_mb_ss_sse2; vp8_loop_filter_bh = vp8_loop_filter_bh_c; if (flags & HAS_MMX) vp8_loop_filter_bh = vp8_loop_filter_bh_mmx; if (flags & HAS_SSE2) vp8_loop_filter_bh = vp8_loop_filter_bh_sse2; @@ -580,9 +443,6 @@ static void setup_rtcd_internal(void) vp8_mbuverror = vp8_mbuverror_c; if (flags & HAS_MMX) vp8_mbuverror = vp8_mbuverror_mmx; if (flags & HAS_SSE2) vp8_mbuverror = vp8_mbuverror_xmm; - vp8_mse16x16 = vp8_mse16x16_c; - if (flags & HAS_MMX) vp8_mse16x16 = vp8_mse16x16_mmx; - if (flags & HAS_SSE2) vp8_mse16x16 = vp8_mse16x16_wmt; vp8_plane_add_noise = vp8_plane_add_noise_c; if (flags & HAS_MMX) vp8_plane_add_noise = vp8_plane_add_noise_mmx; if (flags & HAS_SSE2) vp8_plane_add_noise = vp8_plane_add_noise_wmt; @@ -593,54 +453,6 @@ static void setup_rtcd_internal(void) vp8_regular_quantize_b = vp8_regular_quantize_b_c; if (flags & HAS_SSE2) vp8_regular_quantize_b = vp8_regular_quantize_b_sse2; if (flags & HAS_SSE4_1) vp8_regular_quantize_b = vp8_regular_quantize_b_sse4_1; - vp8_sad16x16 = vp8_sad16x16_c; - if (flags & HAS_MMX) vp8_sad16x16 = vp8_sad16x16_mmx; - if (flags & HAS_SSE2) vp8_sad16x16 = vp8_sad16x16_wmt; - if (flags & HAS_SSE3) vp8_sad16x16 = vp8_sad16x16_sse3; - vp8_sad16x16x3 = vp8_sad16x16x3_c; - if (flags & HAS_SSE3) vp8_sad16x16x3 = vp8_sad16x16x3_sse3; - if (flags & HAS_SSSE3) vp8_sad16x16x3 = vp8_sad16x16x3_ssse3; - vp8_sad16x16x4d = vp8_sad16x16x4d_c; - if (flags & HAS_SSE3) vp8_sad16x16x4d = vp8_sad16x16x4d_sse3; - vp8_sad16x16x8 = vp8_sad16x16x8_c; - if (flags & HAS_SSE4_1) vp8_sad16x16x8 = vp8_sad16x16x8_sse4; - vp8_sad16x8 = vp8_sad16x8_c; - if (flags & HAS_MMX) vp8_sad16x8 = vp8_sad16x8_mmx; - if (flags & HAS_SSE2) vp8_sad16x8 = vp8_sad16x8_wmt; - vp8_sad16x8x3 = vp8_sad16x8x3_c; - if (flags & HAS_SSE3) vp8_sad16x8x3 = vp8_sad16x8x3_sse3; - if (flags & HAS_SSSE3) vp8_sad16x8x3 = vp8_sad16x8x3_ssse3; - vp8_sad16x8x4d = vp8_sad16x8x4d_c; - if (flags & HAS_SSE3) vp8_sad16x8x4d = vp8_sad16x8x4d_sse3; - vp8_sad16x8x8 = vp8_sad16x8x8_c; - if (flags & HAS_SSE4_1) vp8_sad16x8x8 = vp8_sad16x8x8_sse4; - vp8_sad4x4 = vp8_sad4x4_c; - if (flags & HAS_MMX) vp8_sad4x4 = vp8_sad4x4_mmx; - if (flags & HAS_SSE2) vp8_sad4x4 = vp8_sad4x4_wmt; - vp8_sad4x4x3 = vp8_sad4x4x3_c; - if (flags & HAS_SSE3) vp8_sad4x4x3 = vp8_sad4x4x3_sse3; - vp8_sad4x4x4d = vp8_sad4x4x4d_c; - if (flags & HAS_SSE3) vp8_sad4x4x4d = vp8_sad4x4x4d_sse3; - vp8_sad4x4x8 = vp8_sad4x4x8_c; - if (flags & HAS_SSE4_1) vp8_sad4x4x8 = vp8_sad4x4x8_sse4; - vp8_sad8x16 = vp8_sad8x16_c; - if (flags & HAS_MMX) vp8_sad8x16 = vp8_sad8x16_mmx; - if (flags & HAS_SSE2) vp8_sad8x16 = vp8_sad8x16_wmt; - vp8_sad8x16x3 = vp8_sad8x16x3_c; - if (flags & HAS_SSE3) vp8_sad8x16x3 = vp8_sad8x16x3_sse3; - vp8_sad8x16x4d = vp8_sad8x16x4d_c; - if (flags & HAS_SSE3) vp8_sad8x16x4d = vp8_sad8x16x4d_sse3; - vp8_sad8x16x8 = vp8_sad8x16x8_c; - if (flags & HAS_SSE4_1) vp8_sad8x16x8 = vp8_sad8x16x8_sse4; - vp8_sad8x8 = vp8_sad8x8_c; - if (flags & HAS_MMX) vp8_sad8x8 = vp8_sad8x8_mmx; - if (flags & HAS_SSE2) vp8_sad8x8 = vp8_sad8x8_wmt; - vp8_sad8x8x3 = vp8_sad8x8x3_c; - if (flags & HAS_SSE3) vp8_sad8x8x3 = vp8_sad8x8x3_sse3; - vp8_sad8x8x4d = vp8_sad8x8x4d_c; - if (flags & HAS_SSE3) vp8_sad8x8x4d = vp8_sad8x8x4d_sse3; - vp8_sad8x8x8 = vp8_sad8x8x8_c; - if (flags & HAS_SSE4_1) vp8_sad8x8x8 = vp8_sad8x8x8_sse4; vp8_short_fdct4x4 = vp8_short_fdct4x4_c; if (flags & HAS_MMX) vp8_short_fdct4x4 = vp8_short_fdct4x4_mmx; if (flags & HAS_SSE2) vp8_short_fdct4x4 = vp8_short_fdct4x4_sse2; @@ -669,9 +481,6 @@ static void setup_rtcd_internal(void) if (flags & HAS_MMX) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_mmx; if (flags & HAS_SSE2) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_sse2; if (flags & HAS_SSSE3) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_ssse3; - vp8_sub_pixel_mse16x16 = vp8_sub_pixel_mse16x16_c; - if (flags & HAS_MMX) vp8_sub_pixel_mse16x16 = vp8_sub_pixel_mse16x16_mmx; - if (flags & HAS_SSE2) vp8_sub_pixel_mse16x16 = vp8_sub_pixel_mse16x16_wmt; vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_c; if (flags & HAS_MMX) vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_mmx; if (flags & HAS_SSE2) vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_wmt; @@ -700,21 +509,6 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE2) vp8_subtract_mby = vp8_subtract_mby_sse2; vp8_temporal_filter_apply = vp8_temporal_filter_apply_c; if (flags & HAS_SSE2) vp8_temporal_filter_apply = vp8_temporal_filter_apply_sse2; - vp8_variance16x16 = vp8_variance16x16_c; - if (flags & HAS_MMX) vp8_variance16x16 = vp8_variance16x16_mmx; - if (flags & HAS_SSE2) vp8_variance16x16 = vp8_variance16x16_wmt; - vp8_variance16x8 = vp8_variance16x8_c; - if (flags & HAS_MMX) vp8_variance16x8 = vp8_variance16x8_mmx; - if (flags & HAS_SSE2) vp8_variance16x8 = vp8_variance16x8_wmt; - vp8_variance4x4 = vp8_variance4x4_c; - if (flags & HAS_MMX) vp8_variance4x4 = vp8_variance4x4_mmx; - if (flags & HAS_SSE2) vp8_variance4x4 = vp8_variance4x4_wmt; - vp8_variance8x16 = vp8_variance8x16_c; - if (flags & HAS_MMX) vp8_variance8x16 = vp8_variance8x16_mmx; - if (flags & HAS_SSE2) vp8_variance8x16 = vp8_variance8x16_wmt; - vp8_variance8x8 = vp8_variance8x8_c; - if (flags & HAS_MMX) vp8_variance8x8 = vp8_variance8x8_mmx; - if (flags & HAS_SSE2) vp8_variance8x8 = vp8_variance8x8_wmt; vp8_variance_halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_c; if (flags & HAS_MMX) vp8_variance_halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_mmx; if (flags & HAS_SSE2) vp8_variance_halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_wmt; diff --git a/media/libvpx/vp8_rtcd_x86-win32-vs12.h b/media/libvpx/vp8_rtcd_x86-win32-vs12.h index a13b9e27162..5a0bf372ade 100644 --- a/media/libvpx/vp8_rtcd_x86-win32-vs12.h +++ b/media/libvpx/vp8_rtcd_x86-win32-vs12.h @@ -74,10 +74,10 @@ void vp8_clear_system_state_c(); void vpx_reset_mmx_state(); RTCD_EXTERN void (*vp8_clear_system_state)(); -void vp8_copy32xn_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n); -void vp8_copy32xn_sse2(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n); -void vp8_copy32xn_sse3(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n); -RTCD_EXTERN void (*vp8_copy32xn)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n); +void vp8_copy32xn_c(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n); +void vp8_copy32xn_sse2(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n); +void vp8_copy32xn_sse3(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n); +RTCD_EXTERN void (*vp8_copy32xn)(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n); void vp8_copy_mem16x16_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); void vp8_copy_mem16x16_mmx(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); @@ -147,15 +147,6 @@ int vp8_full_search_sadx3(struct macroblock *x, struct block *b, struct blockd * int vp8_full_search_sadx8(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv); RTCD_EXTERN int (*vp8_full_search_sad)(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv); -unsigned int vp8_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride); -unsigned int vp8_get4x4sse_cs_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride); -RTCD_EXTERN unsigned int (*vp8_get4x4sse_cs)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride); - -unsigned int vp8_get_mb_ss_c(const short *); -unsigned int vp8_get_mb_ss_mmx(const short *); -unsigned int vp8_get_mb_ss_sse2(const short *); -RTCD_EXTERN unsigned int (*vp8_get_mb_ss)(const short *); - void vp8_intra4x4_predict_c(unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left); #define vp8_intra4x4_predict vp8_intra4x4_predict_c @@ -218,11 +209,6 @@ int vp8_mbuverror_mmx(struct macroblock *mb); int vp8_mbuverror_xmm(struct macroblock *mb); RTCD_EXTERN int (*vp8_mbuverror)(struct macroblock *mb); -unsigned int vp8_mse16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_mse16x16_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_mse16x16_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp8_mse16x16)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); - void vp8_plane_add_noise_c(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch); void vp8_plane_add_noise_mmx(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch); void vp8_plane_add_noise_wmt(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch); @@ -241,94 +227,6 @@ void vp8_regular_quantize_b_sse2(struct block *, struct blockd *); void vp8_regular_quantize_b_sse4_1(struct block *, struct blockd *); RTCD_EXTERN void (*vp8_regular_quantize_b)(struct block *, struct blockd *); -unsigned int vp8_sad16x16_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad16x16_mmx(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad16x16_wmt(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad16x16_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -RTCD_EXTERN unsigned int (*vp8_sad16x16)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); - -void vp8_sad16x16x3_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp8_sad16x16x3_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp8_sad16x16x3_ssse3(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad16x16x3)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp8_sad16x16x4d_c(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp8_sad16x16x4d_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad16x16x4d)(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp8_sad16x16x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -void vp8_sad16x16x8_sse4(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -RTCD_EXTERN void (*vp8_sad16x16x8)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); - -unsigned int vp8_sad16x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad16x8_mmx(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad16x8_wmt(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -RTCD_EXTERN unsigned int (*vp8_sad16x8)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); - -void vp8_sad16x8x3_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp8_sad16x8x3_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp8_sad16x8x3_ssse3(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad16x8x3)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp8_sad16x8x4d_c(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp8_sad16x8x4d_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad16x8x4d)(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp8_sad16x8x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -void vp8_sad16x8x8_sse4(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -RTCD_EXTERN void (*vp8_sad16x8x8)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); - -unsigned int vp8_sad4x4_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad4x4_mmx(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad4x4_wmt(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -RTCD_EXTERN unsigned int (*vp8_sad4x4)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); - -void vp8_sad4x4x3_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp8_sad4x4x3_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad4x4x3)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp8_sad4x4x4d_c(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp8_sad4x4x4d_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad4x4x4d)(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp8_sad4x4x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -void vp8_sad4x4x8_sse4(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -RTCD_EXTERN void (*vp8_sad4x4x8)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); - -unsigned int vp8_sad8x16_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad8x16_mmx(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad8x16_wmt(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -RTCD_EXTERN unsigned int (*vp8_sad8x16)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); - -void vp8_sad8x16x3_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp8_sad8x16x3_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad8x16x3)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp8_sad8x16x4d_c(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp8_sad8x16x4d_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad8x16x4d)(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp8_sad8x16x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -void vp8_sad8x16x8_sse4(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -RTCD_EXTERN void (*vp8_sad8x16x8)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); - -unsigned int vp8_sad8x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad8x8_mmx(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad8x8_wmt(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -RTCD_EXTERN unsigned int (*vp8_sad8x8)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); - -void vp8_sad8x8x3_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp8_sad8x8x3_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad8x8x3)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp8_sad8x8x4d_c(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp8_sad8x8x4d_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad8x8x4d)(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp8_sad8x8x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -void vp8_sad8x8x8_sse4(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -RTCD_EXTERN void (*vp8_sad8x8x8)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); - void vp8_short_fdct4x4_c(short *input, short *output, int pitch); void vp8_short_fdct4x4_mmx(short *input, short *output, int pitch); void vp8_short_fdct4x4_sse2(short *input, short *output, int pitch); @@ -378,11 +276,6 @@ void vp8_sixtap_predict8x8_sse2(unsigned char *src, int src_pitch, int xofst, in void vp8_sixtap_predict8x8_ssse3(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); RTCD_EXTERN void (*vp8_sixtap_predict8x8)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -unsigned int vp8_sub_pixel_mse16x16_c(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse); -unsigned int vp8_sub_pixel_mse16x16_mmx(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse); -unsigned int vp8_sub_pixel_mse16x16_wmt(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp8_sub_pixel_mse16x16)(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse); - unsigned int vp8_sub_pixel_variance16x16_c(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse); unsigned int vp8_sub_pixel_variance16x16_mmx(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse); unsigned int vp8_sub_pixel_variance16x16_wmt(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse); @@ -429,31 +322,6 @@ void vp8_temporal_filter_apply_c(unsigned char *frame1, unsigned int stride, uns void vp8_temporal_filter_apply_sse2(unsigned char *frame1, unsigned int stride, unsigned char *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, unsigned short *count); RTCD_EXTERN void (*vp8_temporal_filter_apply)(unsigned char *frame1, unsigned int stride, unsigned char *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, unsigned short *count); -unsigned int vp8_variance16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance16x16_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance16x16_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp8_variance16x16)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp8_variance16x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance16x8_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance16x8_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp8_variance16x8)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp8_variance4x4_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance4x4_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance4x4_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp8_variance4x4)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp8_variance8x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance8x16_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance8x16_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp8_variance8x16)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp8_variance8x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance8x8_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance8x8_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp8_variance8x8)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); - unsigned int vp8_variance_halfpixvar16x16_h_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vp8_variance_halfpixvar16x16_h_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vp8_variance_halfpixvar16x16_h_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); @@ -540,11 +408,6 @@ static void setup_rtcd_internal(void) vp8_full_search_sad = vp8_full_search_sad_c; if (flags & HAS_SSE3) vp8_full_search_sad = vp8_full_search_sadx3; if (flags & HAS_SSE4_1) vp8_full_search_sad = vp8_full_search_sadx8; - vp8_get4x4sse_cs = vp8_get4x4sse_cs_c; - if (flags & HAS_MMX) vp8_get4x4sse_cs = vp8_get4x4sse_cs_mmx; - vp8_get_mb_ss = vp8_get_mb_ss_c; - if (flags & HAS_MMX) vp8_get_mb_ss = vp8_get_mb_ss_mmx; - if (flags & HAS_SSE2) vp8_get_mb_ss = vp8_get_mb_ss_sse2; vp8_loop_filter_bh = vp8_loop_filter_bh_c; if (flags & HAS_MMX) vp8_loop_filter_bh = vp8_loop_filter_bh_mmx; if (flags & HAS_SSE2) vp8_loop_filter_bh = vp8_loop_filter_bh_sse2; @@ -580,9 +443,6 @@ static void setup_rtcd_internal(void) vp8_mbuverror = vp8_mbuverror_c; if (flags & HAS_MMX) vp8_mbuverror = vp8_mbuverror_mmx; if (flags & HAS_SSE2) vp8_mbuverror = vp8_mbuverror_xmm; - vp8_mse16x16 = vp8_mse16x16_c; - if (flags & HAS_MMX) vp8_mse16x16 = vp8_mse16x16_mmx; - if (flags & HAS_SSE2) vp8_mse16x16 = vp8_mse16x16_wmt; vp8_plane_add_noise = vp8_plane_add_noise_c; if (flags & HAS_MMX) vp8_plane_add_noise = vp8_plane_add_noise_mmx; if (flags & HAS_SSE2) vp8_plane_add_noise = vp8_plane_add_noise_wmt; @@ -593,54 +453,6 @@ static void setup_rtcd_internal(void) vp8_regular_quantize_b = vp8_regular_quantize_b_c; if (flags & HAS_SSE2) vp8_regular_quantize_b = vp8_regular_quantize_b_sse2; if (flags & HAS_SSE4_1) vp8_regular_quantize_b = vp8_regular_quantize_b_sse4_1; - vp8_sad16x16 = vp8_sad16x16_c; - if (flags & HAS_MMX) vp8_sad16x16 = vp8_sad16x16_mmx; - if (flags & HAS_SSE2) vp8_sad16x16 = vp8_sad16x16_wmt; - if (flags & HAS_SSE3) vp8_sad16x16 = vp8_sad16x16_sse3; - vp8_sad16x16x3 = vp8_sad16x16x3_c; - if (flags & HAS_SSE3) vp8_sad16x16x3 = vp8_sad16x16x3_sse3; - if (flags & HAS_SSSE3) vp8_sad16x16x3 = vp8_sad16x16x3_ssse3; - vp8_sad16x16x4d = vp8_sad16x16x4d_c; - if (flags & HAS_SSE3) vp8_sad16x16x4d = vp8_sad16x16x4d_sse3; - vp8_sad16x16x8 = vp8_sad16x16x8_c; - if (flags & HAS_SSE4_1) vp8_sad16x16x8 = vp8_sad16x16x8_sse4; - vp8_sad16x8 = vp8_sad16x8_c; - if (flags & HAS_MMX) vp8_sad16x8 = vp8_sad16x8_mmx; - if (flags & HAS_SSE2) vp8_sad16x8 = vp8_sad16x8_wmt; - vp8_sad16x8x3 = vp8_sad16x8x3_c; - if (flags & HAS_SSE3) vp8_sad16x8x3 = vp8_sad16x8x3_sse3; - if (flags & HAS_SSSE3) vp8_sad16x8x3 = vp8_sad16x8x3_ssse3; - vp8_sad16x8x4d = vp8_sad16x8x4d_c; - if (flags & HAS_SSE3) vp8_sad16x8x4d = vp8_sad16x8x4d_sse3; - vp8_sad16x8x8 = vp8_sad16x8x8_c; - if (flags & HAS_SSE4_1) vp8_sad16x8x8 = vp8_sad16x8x8_sse4; - vp8_sad4x4 = vp8_sad4x4_c; - if (flags & HAS_MMX) vp8_sad4x4 = vp8_sad4x4_mmx; - if (flags & HAS_SSE2) vp8_sad4x4 = vp8_sad4x4_wmt; - vp8_sad4x4x3 = vp8_sad4x4x3_c; - if (flags & HAS_SSE3) vp8_sad4x4x3 = vp8_sad4x4x3_sse3; - vp8_sad4x4x4d = vp8_sad4x4x4d_c; - if (flags & HAS_SSE3) vp8_sad4x4x4d = vp8_sad4x4x4d_sse3; - vp8_sad4x4x8 = vp8_sad4x4x8_c; - if (flags & HAS_SSE4_1) vp8_sad4x4x8 = vp8_sad4x4x8_sse4; - vp8_sad8x16 = vp8_sad8x16_c; - if (flags & HAS_MMX) vp8_sad8x16 = vp8_sad8x16_mmx; - if (flags & HAS_SSE2) vp8_sad8x16 = vp8_sad8x16_wmt; - vp8_sad8x16x3 = vp8_sad8x16x3_c; - if (flags & HAS_SSE3) vp8_sad8x16x3 = vp8_sad8x16x3_sse3; - vp8_sad8x16x4d = vp8_sad8x16x4d_c; - if (flags & HAS_SSE3) vp8_sad8x16x4d = vp8_sad8x16x4d_sse3; - vp8_sad8x16x8 = vp8_sad8x16x8_c; - if (flags & HAS_SSE4_1) vp8_sad8x16x8 = vp8_sad8x16x8_sse4; - vp8_sad8x8 = vp8_sad8x8_c; - if (flags & HAS_MMX) vp8_sad8x8 = vp8_sad8x8_mmx; - if (flags & HAS_SSE2) vp8_sad8x8 = vp8_sad8x8_wmt; - vp8_sad8x8x3 = vp8_sad8x8x3_c; - if (flags & HAS_SSE3) vp8_sad8x8x3 = vp8_sad8x8x3_sse3; - vp8_sad8x8x4d = vp8_sad8x8x4d_c; - if (flags & HAS_SSE3) vp8_sad8x8x4d = vp8_sad8x8x4d_sse3; - vp8_sad8x8x8 = vp8_sad8x8x8_c; - if (flags & HAS_SSE4_1) vp8_sad8x8x8 = vp8_sad8x8x8_sse4; vp8_short_fdct4x4 = vp8_short_fdct4x4_c; if (flags & HAS_MMX) vp8_short_fdct4x4 = vp8_short_fdct4x4_mmx; if (flags & HAS_SSE2) vp8_short_fdct4x4 = vp8_short_fdct4x4_sse2; @@ -669,9 +481,6 @@ static void setup_rtcd_internal(void) if (flags & HAS_MMX) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_mmx; if (flags & HAS_SSE2) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_sse2; if (flags & HAS_SSSE3) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_ssse3; - vp8_sub_pixel_mse16x16 = vp8_sub_pixel_mse16x16_c; - if (flags & HAS_MMX) vp8_sub_pixel_mse16x16 = vp8_sub_pixel_mse16x16_mmx; - if (flags & HAS_SSE2) vp8_sub_pixel_mse16x16 = vp8_sub_pixel_mse16x16_wmt; vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_c; if (flags & HAS_MMX) vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_mmx; if (flags & HAS_SSE2) vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_wmt; @@ -700,21 +509,6 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE2) vp8_subtract_mby = vp8_subtract_mby_sse2; vp8_temporal_filter_apply = vp8_temporal_filter_apply_c; if (flags & HAS_SSE2) vp8_temporal_filter_apply = vp8_temporal_filter_apply_sse2; - vp8_variance16x16 = vp8_variance16x16_c; - if (flags & HAS_MMX) vp8_variance16x16 = vp8_variance16x16_mmx; - if (flags & HAS_SSE2) vp8_variance16x16 = vp8_variance16x16_wmt; - vp8_variance16x8 = vp8_variance16x8_c; - if (flags & HAS_MMX) vp8_variance16x8 = vp8_variance16x8_mmx; - if (flags & HAS_SSE2) vp8_variance16x8 = vp8_variance16x8_wmt; - vp8_variance4x4 = vp8_variance4x4_c; - if (flags & HAS_MMX) vp8_variance4x4 = vp8_variance4x4_mmx; - if (flags & HAS_SSE2) vp8_variance4x4 = vp8_variance4x4_wmt; - vp8_variance8x16 = vp8_variance8x16_c; - if (flags & HAS_MMX) vp8_variance8x16 = vp8_variance8x16_mmx; - if (flags & HAS_SSE2) vp8_variance8x16 = vp8_variance8x16_wmt; - vp8_variance8x8 = vp8_variance8x8_c; - if (flags & HAS_MMX) vp8_variance8x8 = vp8_variance8x8_mmx; - if (flags & HAS_SSE2) vp8_variance8x8 = vp8_variance8x8_wmt; vp8_variance_halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_c; if (flags & HAS_MMX) vp8_variance_halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_mmx; if (flags & HAS_SSE2) vp8_variance_halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_wmt; diff --git a/media/libvpx/vp8_rtcd_x86_64-darwin9-gcc.h b/media/libvpx/vp8_rtcd_x86_64-darwin9-gcc.h index 6add5de8f39..985115bf1c5 100644 --- a/media/libvpx/vp8_rtcd_x86_64-darwin9-gcc.h +++ b/media/libvpx/vp8_rtcd_x86_64-darwin9-gcc.h @@ -74,10 +74,10 @@ void vp8_clear_system_state_c(); void vpx_reset_mmx_state(); #define vp8_clear_system_state vpx_reset_mmx_state -void vp8_copy32xn_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n); -void vp8_copy32xn_sse2(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n); -void vp8_copy32xn_sse3(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n); -RTCD_EXTERN void (*vp8_copy32xn)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n); +void vp8_copy32xn_c(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n); +void vp8_copy32xn_sse2(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n); +void vp8_copy32xn_sse3(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n); +RTCD_EXTERN void (*vp8_copy32xn)(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n); void vp8_copy_mem16x16_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); void vp8_copy_mem16x16_mmx(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); @@ -147,15 +147,6 @@ int vp8_full_search_sadx3(struct macroblock *x, struct block *b, struct blockd * int vp8_full_search_sadx8(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv); RTCD_EXTERN int (*vp8_full_search_sad)(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv); -unsigned int vp8_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride); -unsigned int vp8_get4x4sse_cs_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride); -#define vp8_get4x4sse_cs vp8_get4x4sse_cs_mmx - -unsigned int vp8_get_mb_ss_c(const short *); -unsigned int vp8_get_mb_ss_mmx(const short *); -unsigned int vp8_get_mb_ss_sse2(const short *); -#define vp8_get_mb_ss vp8_get_mb_ss_sse2 - void vp8_intra4x4_predict_c(unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left); #define vp8_intra4x4_predict vp8_intra4x4_predict_c @@ -218,11 +209,6 @@ int vp8_mbuverror_mmx(struct macroblock *mb); int vp8_mbuverror_xmm(struct macroblock *mb); #define vp8_mbuverror vp8_mbuverror_xmm -unsigned int vp8_mse16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_mse16x16_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_mse16x16_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -#define vp8_mse16x16 vp8_mse16x16_wmt - void vp8_plane_add_noise_c(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch); void vp8_plane_add_noise_mmx(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch); void vp8_plane_add_noise_wmt(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch); @@ -241,94 +227,6 @@ void vp8_regular_quantize_b_sse2(struct block *, struct blockd *); void vp8_regular_quantize_b_sse4_1(struct block *, struct blockd *); RTCD_EXTERN void (*vp8_regular_quantize_b)(struct block *, struct blockd *); -unsigned int vp8_sad16x16_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad16x16_mmx(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad16x16_wmt(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad16x16_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -RTCD_EXTERN unsigned int (*vp8_sad16x16)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); - -void vp8_sad16x16x3_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp8_sad16x16x3_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp8_sad16x16x3_ssse3(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad16x16x3)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp8_sad16x16x4d_c(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp8_sad16x16x4d_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad16x16x4d)(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp8_sad16x16x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -void vp8_sad16x16x8_sse4(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -RTCD_EXTERN void (*vp8_sad16x16x8)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); - -unsigned int vp8_sad16x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad16x8_mmx(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad16x8_wmt(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -#define vp8_sad16x8 vp8_sad16x8_wmt - -void vp8_sad16x8x3_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp8_sad16x8x3_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp8_sad16x8x3_ssse3(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad16x8x3)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp8_sad16x8x4d_c(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp8_sad16x8x4d_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad16x8x4d)(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp8_sad16x8x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -void vp8_sad16x8x8_sse4(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -RTCD_EXTERN void (*vp8_sad16x8x8)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); - -unsigned int vp8_sad4x4_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad4x4_mmx(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad4x4_wmt(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -#define vp8_sad4x4 vp8_sad4x4_wmt - -void vp8_sad4x4x3_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp8_sad4x4x3_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad4x4x3)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp8_sad4x4x4d_c(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp8_sad4x4x4d_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad4x4x4d)(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp8_sad4x4x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -void vp8_sad4x4x8_sse4(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -RTCD_EXTERN void (*vp8_sad4x4x8)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); - -unsigned int vp8_sad8x16_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad8x16_mmx(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad8x16_wmt(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -#define vp8_sad8x16 vp8_sad8x16_wmt - -void vp8_sad8x16x3_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp8_sad8x16x3_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad8x16x3)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp8_sad8x16x4d_c(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp8_sad8x16x4d_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad8x16x4d)(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp8_sad8x16x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -void vp8_sad8x16x8_sse4(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -RTCD_EXTERN void (*vp8_sad8x16x8)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); - -unsigned int vp8_sad8x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad8x8_mmx(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad8x8_wmt(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -#define vp8_sad8x8 vp8_sad8x8_wmt - -void vp8_sad8x8x3_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp8_sad8x8x3_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad8x8x3)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp8_sad8x8x4d_c(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp8_sad8x8x4d_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad8x8x4d)(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp8_sad8x8x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -void vp8_sad8x8x8_sse4(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -RTCD_EXTERN void (*vp8_sad8x8x8)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); - void vp8_short_fdct4x4_c(short *input, short *output, int pitch); void vp8_short_fdct4x4_mmx(short *input, short *output, int pitch); void vp8_short_fdct4x4_sse2(short *input, short *output, int pitch); @@ -378,11 +276,6 @@ void vp8_sixtap_predict8x8_sse2(unsigned char *src, int src_pitch, int xofst, in void vp8_sixtap_predict8x8_ssse3(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); RTCD_EXTERN void (*vp8_sixtap_predict8x8)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -unsigned int vp8_sub_pixel_mse16x16_c(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse); -unsigned int vp8_sub_pixel_mse16x16_mmx(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse); -unsigned int vp8_sub_pixel_mse16x16_wmt(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse); -#define vp8_sub_pixel_mse16x16 vp8_sub_pixel_mse16x16_wmt - unsigned int vp8_sub_pixel_variance16x16_c(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse); unsigned int vp8_sub_pixel_variance16x16_mmx(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse); unsigned int vp8_sub_pixel_variance16x16_wmt(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse); @@ -429,31 +322,6 @@ void vp8_temporal_filter_apply_c(unsigned char *frame1, unsigned int stride, uns void vp8_temporal_filter_apply_sse2(unsigned char *frame1, unsigned int stride, unsigned char *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, unsigned short *count); #define vp8_temporal_filter_apply vp8_temporal_filter_apply_sse2 -unsigned int vp8_variance16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance16x16_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance16x16_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -#define vp8_variance16x16 vp8_variance16x16_wmt - -unsigned int vp8_variance16x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance16x8_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance16x8_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -#define vp8_variance16x8 vp8_variance16x8_wmt - -unsigned int vp8_variance4x4_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance4x4_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance4x4_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -#define vp8_variance4x4 vp8_variance4x4_wmt - -unsigned int vp8_variance8x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance8x16_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance8x16_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -#define vp8_variance8x16 vp8_variance8x16_wmt - -unsigned int vp8_variance8x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance8x8_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance8x8_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -#define vp8_variance8x8 vp8_variance8x8_wmt - unsigned int vp8_variance_halfpixvar16x16_h_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vp8_variance_halfpixvar16x16_h_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vp8_variance_halfpixvar16x16_h_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); @@ -500,40 +368,6 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE3) vp8_refining_search_sad = vp8_refining_search_sadx4; vp8_regular_quantize_b = vp8_regular_quantize_b_sse2; if (flags & HAS_SSE4_1) vp8_regular_quantize_b = vp8_regular_quantize_b_sse4_1; - vp8_sad16x16 = vp8_sad16x16_wmt; - if (flags & HAS_SSE3) vp8_sad16x16 = vp8_sad16x16_sse3; - vp8_sad16x16x3 = vp8_sad16x16x3_c; - if (flags & HAS_SSE3) vp8_sad16x16x3 = vp8_sad16x16x3_sse3; - if (flags & HAS_SSSE3) vp8_sad16x16x3 = vp8_sad16x16x3_ssse3; - vp8_sad16x16x4d = vp8_sad16x16x4d_c; - if (flags & HAS_SSE3) vp8_sad16x16x4d = vp8_sad16x16x4d_sse3; - vp8_sad16x16x8 = vp8_sad16x16x8_c; - if (flags & HAS_SSE4_1) vp8_sad16x16x8 = vp8_sad16x16x8_sse4; - vp8_sad16x8x3 = vp8_sad16x8x3_c; - if (flags & HAS_SSE3) vp8_sad16x8x3 = vp8_sad16x8x3_sse3; - if (flags & HAS_SSSE3) vp8_sad16x8x3 = vp8_sad16x8x3_ssse3; - vp8_sad16x8x4d = vp8_sad16x8x4d_c; - if (flags & HAS_SSE3) vp8_sad16x8x4d = vp8_sad16x8x4d_sse3; - vp8_sad16x8x8 = vp8_sad16x8x8_c; - if (flags & HAS_SSE4_1) vp8_sad16x8x8 = vp8_sad16x8x8_sse4; - vp8_sad4x4x3 = vp8_sad4x4x3_c; - if (flags & HAS_SSE3) vp8_sad4x4x3 = vp8_sad4x4x3_sse3; - vp8_sad4x4x4d = vp8_sad4x4x4d_c; - if (flags & HAS_SSE3) vp8_sad4x4x4d = vp8_sad4x4x4d_sse3; - vp8_sad4x4x8 = vp8_sad4x4x8_c; - if (flags & HAS_SSE4_1) vp8_sad4x4x8 = vp8_sad4x4x8_sse4; - vp8_sad8x16x3 = vp8_sad8x16x3_c; - if (flags & HAS_SSE3) vp8_sad8x16x3 = vp8_sad8x16x3_sse3; - vp8_sad8x16x4d = vp8_sad8x16x4d_c; - if (flags & HAS_SSE3) vp8_sad8x16x4d = vp8_sad8x16x4d_sse3; - vp8_sad8x16x8 = vp8_sad8x16x8_c; - if (flags & HAS_SSE4_1) vp8_sad8x16x8 = vp8_sad8x16x8_sse4; - vp8_sad8x8x3 = vp8_sad8x8x3_c; - if (flags & HAS_SSE3) vp8_sad8x8x3 = vp8_sad8x8x3_sse3; - vp8_sad8x8x4d = vp8_sad8x8x4d_c; - if (flags & HAS_SSE3) vp8_sad8x8x4d = vp8_sad8x8x4d_sse3; - vp8_sad8x8x8 = vp8_sad8x8x8_c; - if (flags & HAS_SSE4_1) vp8_sad8x8x8 = vp8_sad8x8x8_sse4; vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_sse2; if (flags & HAS_SSSE3) vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_ssse3; vp8_sixtap_predict4x4 = vp8_sixtap_predict4x4_mmx; diff --git a/media/libvpx/vp8_rtcd_x86_64-linux-gcc.h b/media/libvpx/vp8_rtcd_x86_64-linux-gcc.h index 6add5de8f39..985115bf1c5 100644 --- a/media/libvpx/vp8_rtcd_x86_64-linux-gcc.h +++ b/media/libvpx/vp8_rtcd_x86_64-linux-gcc.h @@ -74,10 +74,10 @@ void vp8_clear_system_state_c(); void vpx_reset_mmx_state(); #define vp8_clear_system_state vpx_reset_mmx_state -void vp8_copy32xn_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n); -void vp8_copy32xn_sse2(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n); -void vp8_copy32xn_sse3(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n); -RTCD_EXTERN void (*vp8_copy32xn)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n); +void vp8_copy32xn_c(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n); +void vp8_copy32xn_sse2(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n); +void vp8_copy32xn_sse3(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n); +RTCD_EXTERN void (*vp8_copy32xn)(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n); void vp8_copy_mem16x16_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); void vp8_copy_mem16x16_mmx(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); @@ -147,15 +147,6 @@ int vp8_full_search_sadx3(struct macroblock *x, struct block *b, struct blockd * int vp8_full_search_sadx8(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv); RTCD_EXTERN int (*vp8_full_search_sad)(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv); -unsigned int vp8_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride); -unsigned int vp8_get4x4sse_cs_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride); -#define vp8_get4x4sse_cs vp8_get4x4sse_cs_mmx - -unsigned int vp8_get_mb_ss_c(const short *); -unsigned int vp8_get_mb_ss_mmx(const short *); -unsigned int vp8_get_mb_ss_sse2(const short *); -#define vp8_get_mb_ss vp8_get_mb_ss_sse2 - void vp8_intra4x4_predict_c(unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left); #define vp8_intra4x4_predict vp8_intra4x4_predict_c @@ -218,11 +209,6 @@ int vp8_mbuverror_mmx(struct macroblock *mb); int vp8_mbuverror_xmm(struct macroblock *mb); #define vp8_mbuverror vp8_mbuverror_xmm -unsigned int vp8_mse16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_mse16x16_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_mse16x16_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -#define vp8_mse16x16 vp8_mse16x16_wmt - void vp8_plane_add_noise_c(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch); void vp8_plane_add_noise_mmx(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch); void vp8_plane_add_noise_wmt(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch); @@ -241,94 +227,6 @@ void vp8_regular_quantize_b_sse2(struct block *, struct blockd *); void vp8_regular_quantize_b_sse4_1(struct block *, struct blockd *); RTCD_EXTERN void (*vp8_regular_quantize_b)(struct block *, struct blockd *); -unsigned int vp8_sad16x16_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad16x16_mmx(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad16x16_wmt(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad16x16_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -RTCD_EXTERN unsigned int (*vp8_sad16x16)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); - -void vp8_sad16x16x3_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp8_sad16x16x3_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp8_sad16x16x3_ssse3(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad16x16x3)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp8_sad16x16x4d_c(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp8_sad16x16x4d_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad16x16x4d)(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp8_sad16x16x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -void vp8_sad16x16x8_sse4(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -RTCD_EXTERN void (*vp8_sad16x16x8)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); - -unsigned int vp8_sad16x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad16x8_mmx(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad16x8_wmt(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -#define vp8_sad16x8 vp8_sad16x8_wmt - -void vp8_sad16x8x3_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp8_sad16x8x3_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp8_sad16x8x3_ssse3(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad16x8x3)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp8_sad16x8x4d_c(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp8_sad16x8x4d_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad16x8x4d)(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp8_sad16x8x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -void vp8_sad16x8x8_sse4(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -RTCD_EXTERN void (*vp8_sad16x8x8)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); - -unsigned int vp8_sad4x4_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad4x4_mmx(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad4x4_wmt(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -#define vp8_sad4x4 vp8_sad4x4_wmt - -void vp8_sad4x4x3_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp8_sad4x4x3_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad4x4x3)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp8_sad4x4x4d_c(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp8_sad4x4x4d_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad4x4x4d)(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp8_sad4x4x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -void vp8_sad4x4x8_sse4(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -RTCD_EXTERN void (*vp8_sad4x4x8)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); - -unsigned int vp8_sad8x16_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad8x16_mmx(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad8x16_wmt(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -#define vp8_sad8x16 vp8_sad8x16_wmt - -void vp8_sad8x16x3_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp8_sad8x16x3_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad8x16x3)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp8_sad8x16x4d_c(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp8_sad8x16x4d_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad8x16x4d)(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp8_sad8x16x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -void vp8_sad8x16x8_sse4(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -RTCD_EXTERN void (*vp8_sad8x16x8)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); - -unsigned int vp8_sad8x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad8x8_mmx(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad8x8_wmt(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -#define vp8_sad8x8 vp8_sad8x8_wmt - -void vp8_sad8x8x3_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp8_sad8x8x3_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad8x8x3)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp8_sad8x8x4d_c(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp8_sad8x8x4d_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad8x8x4d)(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp8_sad8x8x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -void vp8_sad8x8x8_sse4(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -RTCD_EXTERN void (*vp8_sad8x8x8)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); - void vp8_short_fdct4x4_c(short *input, short *output, int pitch); void vp8_short_fdct4x4_mmx(short *input, short *output, int pitch); void vp8_short_fdct4x4_sse2(short *input, short *output, int pitch); @@ -378,11 +276,6 @@ void vp8_sixtap_predict8x8_sse2(unsigned char *src, int src_pitch, int xofst, in void vp8_sixtap_predict8x8_ssse3(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); RTCD_EXTERN void (*vp8_sixtap_predict8x8)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -unsigned int vp8_sub_pixel_mse16x16_c(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse); -unsigned int vp8_sub_pixel_mse16x16_mmx(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse); -unsigned int vp8_sub_pixel_mse16x16_wmt(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse); -#define vp8_sub_pixel_mse16x16 vp8_sub_pixel_mse16x16_wmt - unsigned int vp8_sub_pixel_variance16x16_c(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse); unsigned int vp8_sub_pixel_variance16x16_mmx(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse); unsigned int vp8_sub_pixel_variance16x16_wmt(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse); @@ -429,31 +322,6 @@ void vp8_temporal_filter_apply_c(unsigned char *frame1, unsigned int stride, uns void vp8_temporal_filter_apply_sse2(unsigned char *frame1, unsigned int stride, unsigned char *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, unsigned short *count); #define vp8_temporal_filter_apply vp8_temporal_filter_apply_sse2 -unsigned int vp8_variance16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance16x16_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance16x16_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -#define vp8_variance16x16 vp8_variance16x16_wmt - -unsigned int vp8_variance16x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance16x8_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance16x8_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -#define vp8_variance16x8 vp8_variance16x8_wmt - -unsigned int vp8_variance4x4_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance4x4_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance4x4_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -#define vp8_variance4x4 vp8_variance4x4_wmt - -unsigned int vp8_variance8x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance8x16_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance8x16_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -#define vp8_variance8x16 vp8_variance8x16_wmt - -unsigned int vp8_variance8x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance8x8_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance8x8_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -#define vp8_variance8x8 vp8_variance8x8_wmt - unsigned int vp8_variance_halfpixvar16x16_h_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vp8_variance_halfpixvar16x16_h_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vp8_variance_halfpixvar16x16_h_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); @@ -500,40 +368,6 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE3) vp8_refining_search_sad = vp8_refining_search_sadx4; vp8_regular_quantize_b = vp8_regular_quantize_b_sse2; if (flags & HAS_SSE4_1) vp8_regular_quantize_b = vp8_regular_quantize_b_sse4_1; - vp8_sad16x16 = vp8_sad16x16_wmt; - if (flags & HAS_SSE3) vp8_sad16x16 = vp8_sad16x16_sse3; - vp8_sad16x16x3 = vp8_sad16x16x3_c; - if (flags & HAS_SSE3) vp8_sad16x16x3 = vp8_sad16x16x3_sse3; - if (flags & HAS_SSSE3) vp8_sad16x16x3 = vp8_sad16x16x3_ssse3; - vp8_sad16x16x4d = vp8_sad16x16x4d_c; - if (flags & HAS_SSE3) vp8_sad16x16x4d = vp8_sad16x16x4d_sse3; - vp8_sad16x16x8 = vp8_sad16x16x8_c; - if (flags & HAS_SSE4_1) vp8_sad16x16x8 = vp8_sad16x16x8_sse4; - vp8_sad16x8x3 = vp8_sad16x8x3_c; - if (flags & HAS_SSE3) vp8_sad16x8x3 = vp8_sad16x8x3_sse3; - if (flags & HAS_SSSE3) vp8_sad16x8x3 = vp8_sad16x8x3_ssse3; - vp8_sad16x8x4d = vp8_sad16x8x4d_c; - if (flags & HAS_SSE3) vp8_sad16x8x4d = vp8_sad16x8x4d_sse3; - vp8_sad16x8x8 = vp8_sad16x8x8_c; - if (flags & HAS_SSE4_1) vp8_sad16x8x8 = vp8_sad16x8x8_sse4; - vp8_sad4x4x3 = vp8_sad4x4x3_c; - if (flags & HAS_SSE3) vp8_sad4x4x3 = vp8_sad4x4x3_sse3; - vp8_sad4x4x4d = vp8_sad4x4x4d_c; - if (flags & HAS_SSE3) vp8_sad4x4x4d = vp8_sad4x4x4d_sse3; - vp8_sad4x4x8 = vp8_sad4x4x8_c; - if (flags & HAS_SSE4_1) vp8_sad4x4x8 = vp8_sad4x4x8_sse4; - vp8_sad8x16x3 = vp8_sad8x16x3_c; - if (flags & HAS_SSE3) vp8_sad8x16x3 = vp8_sad8x16x3_sse3; - vp8_sad8x16x4d = vp8_sad8x16x4d_c; - if (flags & HAS_SSE3) vp8_sad8x16x4d = vp8_sad8x16x4d_sse3; - vp8_sad8x16x8 = vp8_sad8x16x8_c; - if (flags & HAS_SSE4_1) vp8_sad8x16x8 = vp8_sad8x16x8_sse4; - vp8_sad8x8x3 = vp8_sad8x8x3_c; - if (flags & HAS_SSE3) vp8_sad8x8x3 = vp8_sad8x8x3_sse3; - vp8_sad8x8x4d = vp8_sad8x8x4d_c; - if (flags & HAS_SSE3) vp8_sad8x8x4d = vp8_sad8x8x4d_sse3; - vp8_sad8x8x8 = vp8_sad8x8x8_c; - if (flags & HAS_SSE4_1) vp8_sad8x8x8 = vp8_sad8x8x8_sse4; vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_sse2; if (flags & HAS_SSSE3) vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_ssse3; vp8_sixtap_predict4x4 = vp8_sixtap_predict4x4_mmx; diff --git a/media/libvpx/vp8_rtcd_x86_64-win64-gcc.h b/media/libvpx/vp8_rtcd_x86_64-win64-gcc.h index 6add5de8f39..985115bf1c5 100644 --- a/media/libvpx/vp8_rtcd_x86_64-win64-gcc.h +++ b/media/libvpx/vp8_rtcd_x86_64-win64-gcc.h @@ -74,10 +74,10 @@ void vp8_clear_system_state_c(); void vpx_reset_mmx_state(); #define vp8_clear_system_state vpx_reset_mmx_state -void vp8_copy32xn_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n); -void vp8_copy32xn_sse2(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n); -void vp8_copy32xn_sse3(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n); -RTCD_EXTERN void (*vp8_copy32xn)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n); +void vp8_copy32xn_c(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n); +void vp8_copy32xn_sse2(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n); +void vp8_copy32xn_sse3(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n); +RTCD_EXTERN void (*vp8_copy32xn)(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n); void vp8_copy_mem16x16_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); void vp8_copy_mem16x16_mmx(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); @@ -147,15 +147,6 @@ int vp8_full_search_sadx3(struct macroblock *x, struct block *b, struct blockd * int vp8_full_search_sadx8(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv); RTCD_EXTERN int (*vp8_full_search_sad)(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv); -unsigned int vp8_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride); -unsigned int vp8_get4x4sse_cs_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride); -#define vp8_get4x4sse_cs vp8_get4x4sse_cs_mmx - -unsigned int vp8_get_mb_ss_c(const short *); -unsigned int vp8_get_mb_ss_mmx(const short *); -unsigned int vp8_get_mb_ss_sse2(const short *); -#define vp8_get_mb_ss vp8_get_mb_ss_sse2 - void vp8_intra4x4_predict_c(unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left); #define vp8_intra4x4_predict vp8_intra4x4_predict_c @@ -218,11 +209,6 @@ int vp8_mbuverror_mmx(struct macroblock *mb); int vp8_mbuverror_xmm(struct macroblock *mb); #define vp8_mbuverror vp8_mbuverror_xmm -unsigned int vp8_mse16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_mse16x16_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_mse16x16_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -#define vp8_mse16x16 vp8_mse16x16_wmt - void vp8_plane_add_noise_c(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch); void vp8_plane_add_noise_mmx(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch); void vp8_plane_add_noise_wmt(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch); @@ -241,94 +227,6 @@ void vp8_regular_quantize_b_sse2(struct block *, struct blockd *); void vp8_regular_quantize_b_sse4_1(struct block *, struct blockd *); RTCD_EXTERN void (*vp8_regular_quantize_b)(struct block *, struct blockd *); -unsigned int vp8_sad16x16_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad16x16_mmx(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad16x16_wmt(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad16x16_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -RTCD_EXTERN unsigned int (*vp8_sad16x16)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); - -void vp8_sad16x16x3_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp8_sad16x16x3_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp8_sad16x16x3_ssse3(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad16x16x3)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp8_sad16x16x4d_c(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp8_sad16x16x4d_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad16x16x4d)(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp8_sad16x16x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -void vp8_sad16x16x8_sse4(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -RTCD_EXTERN void (*vp8_sad16x16x8)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); - -unsigned int vp8_sad16x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad16x8_mmx(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad16x8_wmt(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -#define vp8_sad16x8 vp8_sad16x8_wmt - -void vp8_sad16x8x3_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp8_sad16x8x3_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp8_sad16x8x3_ssse3(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad16x8x3)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp8_sad16x8x4d_c(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp8_sad16x8x4d_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad16x8x4d)(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp8_sad16x8x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -void vp8_sad16x8x8_sse4(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -RTCD_EXTERN void (*vp8_sad16x8x8)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); - -unsigned int vp8_sad4x4_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad4x4_mmx(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad4x4_wmt(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -#define vp8_sad4x4 vp8_sad4x4_wmt - -void vp8_sad4x4x3_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp8_sad4x4x3_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad4x4x3)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp8_sad4x4x4d_c(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp8_sad4x4x4d_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad4x4x4d)(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp8_sad4x4x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -void vp8_sad4x4x8_sse4(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -RTCD_EXTERN void (*vp8_sad4x4x8)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); - -unsigned int vp8_sad8x16_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad8x16_mmx(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad8x16_wmt(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -#define vp8_sad8x16 vp8_sad8x16_wmt - -void vp8_sad8x16x3_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp8_sad8x16x3_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad8x16x3)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp8_sad8x16x4d_c(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp8_sad8x16x4d_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad8x16x4d)(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp8_sad8x16x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -void vp8_sad8x16x8_sse4(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -RTCD_EXTERN void (*vp8_sad8x16x8)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); - -unsigned int vp8_sad8x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad8x8_mmx(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad8x8_wmt(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -#define vp8_sad8x8 vp8_sad8x8_wmt - -void vp8_sad8x8x3_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp8_sad8x8x3_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad8x8x3)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp8_sad8x8x4d_c(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp8_sad8x8x4d_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad8x8x4d)(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp8_sad8x8x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -void vp8_sad8x8x8_sse4(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -RTCD_EXTERN void (*vp8_sad8x8x8)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); - void vp8_short_fdct4x4_c(short *input, short *output, int pitch); void vp8_short_fdct4x4_mmx(short *input, short *output, int pitch); void vp8_short_fdct4x4_sse2(short *input, short *output, int pitch); @@ -378,11 +276,6 @@ void vp8_sixtap_predict8x8_sse2(unsigned char *src, int src_pitch, int xofst, in void vp8_sixtap_predict8x8_ssse3(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); RTCD_EXTERN void (*vp8_sixtap_predict8x8)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -unsigned int vp8_sub_pixel_mse16x16_c(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse); -unsigned int vp8_sub_pixel_mse16x16_mmx(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse); -unsigned int vp8_sub_pixel_mse16x16_wmt(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse); -#define vp8_sub_pixel_mse16x16 vp8_sub_pixel_mse16x16_wmt - unsigned int vp8_sub_pixel_variance16x16_c(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse); unsigned int vp8_sub_pixel_variance16x16_mmx(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse); unsigned int vp8_sub_pixel_variance16x16_wmt(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse); @@ -429,31 +322,6 @@ void vp8_temporal_filter_apply_c(unsigned char *frame1, unsigned int stride, uns void vp8_temporal_filter_apply_sse2(unsigned char *frame1, unsigned int stride, unsigned char *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, unsigned short *count); #define vp8_temporal_filter_apply vp8_temporal_filter_apply_sse2 -unsigned int vp8_variance16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance16x16_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance16x16_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -#define vp8_variance16x16 vp8_variance16x16_wmt - -unsigned int vp8_variance16x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance16x8_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance16x8_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -#define vp8_variance16x8 vp8_variance16x8_wmt - -unsigned int vp8_variance4x4_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance4x4_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance4x4_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -#define vp8_variance4x4 vp8_variance4x4_wmt - -unsigned int vp8_variance8x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance8x16_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance8x16_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -#define vp8_variance8x16 vp8_variance8x16_wmt - -unsigned int vp8_variance8x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance8x8_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance8x8_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -#define vp8_variance8x8 vp8_variance8x8_wmt - unsigned int vp8_variance_halfpixvar16x16_h_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vp8_variance_halfpixvar16x16_h_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vp8_variance_halfpixvar16x16_h_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); @@ -500,40 +368,6 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE3) vp8_refining_search_sad = vp8_refining_search_sadx4; vp8_regular_quantize_b = vp8_regular_quantize_b_sse2; if (flags & HAS_SSE4_1) vp8_regular_quantize_b = vp8_regular_quantize_b_sse4_1; - vp8_sad16x16 = vp8_sad16x16_wmt; - if (flags & HAS_SSE3) vp8_sad16x16 = vp8_sad16x16_sse3; - vp8_sad16x16x3 = vp8_sad16x16x3_c; - if (flags & HAS_SSE3) vp8_sad16x16x3 = vp8_sad16x16x3_sse3; - if (flags & HAS_SSSE3) vp8_sad16x16x3 = vp8_sad16x16x3_ssse3; - vp8_sad16x16x4d = vp8_sad16x16x4d_c; - if (flags & HAS_SSE3) vp8_sad16x16x4d = vp8_sad16x16x4d_sse3; - vp8_sad16x16x8 = vp8_sad16x16x8_c; - if (flags & HAS_SSE4_1) vp8_sad16x16x8 = vp8_sad16x16x8_sse4; - vp8_sad16x8x3 = vp8_sad16x8x3_c; - if (flags & HAS_SSE3) vp8_sad16x8x3 = vp8_sad16x8x3_sse3; - if (flags & HAS_SSSE3) vp8_sad16x8x3 = vp8_sad16x8x3_ssse3; - vp8_sad16x8x4d = vp8_sad16x8x4d_c; - if (flags & HAS_SSE3) vp8_sad16x8x4d = vp8_sad16x8x4d_sse3; - vp8_sad16x8x8 = vp8_sad16x8x8_c; - if (flags & HAS_SSE4_1) vp8_sad16x8x8 = vp8_sad16x8x8_sse4; - vp8_sad4x4x3 = vp8_sad4x4x3_c; - if (flags & HAS_SSE3) vp8_sad4x4x3 = vp8_sad4x4x3_sse3; - vp8_sad4x4x4d = vp8_sad4x4x4d_c; - if (flags & HAS_SSE3) vp8_sad4x4x4d = vp8_sad4x4x4d_sse3; - vp8_sad4x4x8 = vp8_sad4x4x8_c; - if (flags & HAS_SSE4_1) vp8_sad4x4x8 = vp8_sad4x4x8_sse4; - vp8_sad8x16x3 = vp8_sad8x16x3_c; - if (flags & HAS_SSE3) vp8_sad8x16x3 = vp8_sad8x16x3_sse3; - vp8_sad8x16x4d = vp8_sad8x16x4d_c; - if (flags & HAS_SSE3) vp8_sad8x16x4d = vp8_sad8x16x4d_sse3; - vp8_sad8x16x8 = vp8_sad8x16x8_c; - if (flags & HAS_SSE4_1) vp8_sad8x16x8 = vp8_sad8x16x8_sse4; - vp8_sad8x8x3 = vp8_sad8x8x3_c; - if (flags & HAS_SSE3) vp8_sad8x8x3 = vp8_sad8x8x3_sse3; - vp8_sad8x8x4d = vp8_sad8x8x4d_c; - if (flags & HAS_SSE3) vp8_sad8x8x4d = vp8_sad8x8x4d_sse3; - vp8_sad8x8x8 = vp8_sad8x8x8_c; - if (flags & HAS_SSE4_1) vp8_sad8x8x8 = vp8_sad8x8x8_sse4; vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_sse2; if (flags & HAS_SSSE3) vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_ssse3; vp8_sixtap_predict4x4 = vp8_sixtap_predict4x4_mmx; diff --git a/media/libvpx/vp8_rtcd_x86_64-win64-vs12.h b/media/libvpx/vp8_rtcd_x86_64-win64-vs12.h index 6add5de8f39..985115bf1c5 100644 --- a/media/libvpx/vp8_rtcd_x86_64-win64-vs12.h +++ b/media/libvpx/vp8_rtcd_x86_64-win64-vs12.h @@ -74,10 +74,10 @@ void vp8_clear_system_state_c(); void vpx_reset_mmx_state(); #define vp8_clear_system_state vpx_reset_mmx_state -void vp8_copy32xn_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n); -void vp8_copy32xn_sse2(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n); -void vp8_copy32xn_sse3(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n); -RTCD_EXTERN void (*vp8_copy32xn)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n); +void vp8_copy32xn_c(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n); +void vp8_copy32xn_sse2(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n); +void vp8_copy32xn_sse3(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n); +RTCD_EXTERN void (*vp8_copy32xn)(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n); void vp8_copy_mem16x16_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); void vp8_copy_mem16x16_mmx(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); @@ -147,15 +147,6 @@ int vp8_full_search_sadx3(struct macroblock *x, struct block *b, struct blockd * int vp8_full_search_sadx8(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv); RTCD_EXTERN int (*vp8_full_search_sad)(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv); -unsigned int vp8_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride); -unsigned int vp8_get4x4sse_cs_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride); -#define vp8_get4x4sse_cs vp8_get4x4sse_cs_mmx - -unsigned int vp8_get_mb_ss_c(const short *); -unsigned int vp8_get_mb_ss_mmx(const short *); -unsigned int vp8_get_mb_ss_sse2(const short *); -#define vp8_get_mb_ss vp8_get_mb_ss_sse2 - void vp8_intra4x4_predict_c(unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left); #define vp8_intra4x4_predict vp8_intra4x4_predict_c @@ -218,11 +209,6 @@ int vp8_mbuverror_mmx(struct macroblock *mb); int vp8_mbuverror_xmm(struct macroblock *mb); #define vp8_mbuverror vp8_mbuverror_xmm -unsigned int vp8_mse16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_mse16x16_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_mse16x16_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -#define vp8_mse16x16 vp8_mse16x16_wmt - void vp8_plane_add_noise_c(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch); void vp8_plane_add_noise_mmx(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch); void vp8_plane_add_noise_wmt(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch); @@ -241,94 +227,6 @@ void vp8_regular_quantize_b_sse2(struct block *, struct blockd *); void vp8_regular_quantize_b_sse4_1(struct block *, struct blockd *); RTCD_EXTERN void (*vp8_regular_quantize_b)(struct block *, struct blockd *); -unsigned int vp8_sad16x16_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad16x16_mmx(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad16x16_wmt(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad16x16_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -RTCD_EXTERN unsigned int (*vp8_sad16x16)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); - -void vp8_sad16x16x3_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp8_sad16x16x3_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp8_sad16x16x3_ssse3(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad16x16x3)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp8_sad16x16x4d_c(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp8_sad16x16x4d_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad16x16x4d)(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp8_sad16x16x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -void vp8_sad16x16x8_sse4(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -RTCD_EXTERN void (*vp8_sad16x16x8)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); - -unsigned int vp8_sad16x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad16x8_mmx(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad16x8_wmt(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -#define vp8_sad16x8 vp8_sad16x8_wmt - -void vp8_sad16x8x3_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp8_sad16x8x3_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp8_sad16x8x3_ssse3(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad16x8x3)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp8_sad16x8x4d_c(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp8_sad16x8x4d_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad16x8x4d)(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp8_sad16x8x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -void vp8_sad16x8x8_sse4(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -RTCD_EXTERN void (*vp8_sad16x8x8)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); - -unsigned int vp8_sad4x4_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad4x4_mmx(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad4x4_wmt(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -#define vp8_sad4x4 vp8_sad4x4_wmt - -void vp8_sad4x4x3_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp8_sad4x4x3_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad4x4x3)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp8_sad4x4x4d_c(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp8_sad4x4x4d_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad4x4x4d)(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp8_sad4x4x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -void vp8_sad4x4x8_sse4(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -RTCD_EXTERN void (*vp8_sad4x4x8)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); - -unsigned int vp8_sad8x16_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad8x16_mmx(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad8x16_wmt(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -#define vp8_sad8x16 vp8_sad8x16_wmt - -void vp8_sad8x16x3_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp8_sad8x16x3_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad8x16x3)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp8_sad8x16x4d_c(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp8_sad8x16x4d_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad8x16x4d)(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp8_sad8x16x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -void vp8_sad8x16x8_sse4(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -RTCD_EXTERN void (*vp8_sad8x16x8)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); - -unsigned int vp8_sad8x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad8x8_mmx(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -unsigned int vp8_sad8x8_wmt(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); -#define vp8_sad8x8 vp8_sad8x8_wmt - -void vp8_sad8x8x3_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp8_sad8x8x3_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad8x8x3)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp8_sad8x8x4d_c(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp8_sad8x8x4d_sse3(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp8_sad8x8x4d)(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp8_sad8x8x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -void vp8_sad8x8x8_sse4(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); -RTCD_EXTERN void (*vp8_sad8x8x8)(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); - void vp8_short_fdct4x4_c(short *input, short *output, int pitch); void vp8_short_fdct4x4_mmx(short *input, short *output, int pitch); void vp8_short_fdct4x4_sse2(short *input, short *output, int pitch); @@ -378,11 +276,6 @@ void vp8_sixtap_predict8x8_sse2(unsigned char *src, int src_pitch, int xofst, in void vp8_sixtap_predict8x8_ssse3(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); RTCD_EXTERN void (*vp8_sixtap_predict8x8)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -unsigned int vp8_sub_pixel_mse16x16_c(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse); -unsigned int vp8_sub_pixel_mse16x16_mmx(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse); -unsigned int vp8_sub_pixel_mse16x16_wmt(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse); -#define vp8_sub_pixel_mse16x16 vp8_sub_pixel_mse16x16_wmt - unsigned int vp8_sub_pixel_variance16x16_c(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse); unsigned int vp8_sub_pixel_variance16x16_mmx(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse); unsigned int vp8_sub_pixel_variance16x16_wmt(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse); @@ -429,31 +322,6 @@ void vp8_temporal_filter_apply_c(unsigned char *frame1, unsigned int stride, uns void vp8_temporal_filter_apply_sse2(unsigned char *frame1, unsigned int stride, unsigned char *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, unsigned short *count); #define vp8_temporal_filter_apply vp8_temporal_filter_apply_sse2 -unsigned int vp8_variance16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance16x16_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance16x16_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -#define vp8_variance16x16 vp8_variance16x16_wmt - -unsigned int vp8_variance16x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance16x8_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance16x8_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -#define vp8_variance16x8 vp8_variance16x8_wmt - -unsigned int vp8_variance4x4_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance4x4_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance4x4_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -#define vp8_variance4x4 vp8_variance4x4_wmt - -unsigned int vp8_variance8x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance8x16_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance8x16_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -#define vp8_variance8x16 vp8_variance8x16_wmt - -unsigned int vp8_variance8x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance8x8_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp8_variance8x8_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); -#define vp8_variance8x8 vp8_variance8x8_wmt - unsigned int vp8_variance_halfpixvar16x16_h_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vp8_variance_halfpixvar16x16_h_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vp8_variance_halfpixvar16x16_h_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); @@ -500,40 +368,6 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE3) vp8_refining_search_sad = vp8_refining_search_sadx4; vp8_regular_quantize_b = vp8_regular_quantize_b_sse2; if (flags & HAS_SSE4_1) vp8_regular_quantize_b = vp8_regular_quantize_b_sse4_1; - vp8_sad16x16 = vp8_sad16x16_wmt; - if (flags & HAS_SSE3) vp8_sad16x16 = vp8_sad16x16_sse3; - vp8_sad16x16x3 = vp8_sad16x16x3_c; - if (flags & HAS_SSE3) vp8_sad16x16x3 = vp8_sad16x16x3_sse3; - if (flags & HAS_SSSE3) vp8_sad16x16x3 = vp8_sad16x16x3_ssse3; - vp8_sad16x16x4d = vp8_sad16x16x4d_c; - if (flags & HAS_SSE3) vp8_sad16x16x4d = vp8_sad16x16x4d_sse3; - vp8_sad16x16x8 = vp8_sad16x16x8_c; - if (flags & HAS_SSE4_1) vp8_sad16x16x8 = vp8_sad16x16x8_sse4; - vp8_sad16x8x3 = vp8_sad16x8x3_c; - if (flags & HAS_SSE3) vp8_sad16x8x3 = vp8_sad16x8x3_sse3; - if (flags & HAS_SSSE3) vp8_sad16x8x3 = vp8_sad16x8x3_ssse3; - vp8_sad16x8x4d = vp8_sad16x8x4d_c; - if (flags & HAS_SSE3) vp8_sad16x8x4d = vp8_sad16x8x4d_sse3; - vp8_sad16x8x8 = vp8_sad16x8x8_c; - if (flags & HAS_SSE4_1) vp8_sad16x8x8 = vp8_sad16x8x8_sse4; - vp8_sad4x4x3 = vp8_sad4x4x3_c; - if (flags & HAS_SSE3) vp8_sad4x4x3 = vp8_sad4x4x3_sse3; - vp8_sad4x4x4d = vp8_sad4x4x4d_c; - if (flags & HAS_SSE3) vp8_sad4x4x4d = vp8_sad4x4x4d_sse3; - vp8_sad4x4x8 = vp8_sad4x4x8_c; - if (flags & HAS_SSE4_1) vp8_sad4x4x8 = vp8_sad4x4x8_sse4; - vp8_sad8x16x3 = vp8_sad8x16x3_c; - if (flags & HAS_SSE3) vp8_sad8x16x3 = vp8_sad8x16x3_sse3; - vp8_sad8x16x4d = vp8_sad8x16x4d_c; - if (flags & HAS_SSE3) vp8_sad8x16x4d = vp8_sad8x16x4d_sse3; - vp8_sad8x16x8 = vp8_sad8x16x8_c; - if (flags & HAS_SSE4_1) vp8_sad8x16x8 = vp8_sad8x16x8_sse4; - vp8_sad8x8x3 = vp8_sad8x8x3_c; - if (flags & HAS_SSE3) vp8_sad8x8x3 = vp8_sad8x8x3_sse3; - vp8_sad8x8x4d = vp8_sad8x8x4d_c; - if (flags & HAS_SSE3) vp8_sad8x8x4d = vp8_sad8x8x4d_sse3; - vp8_sad8x8x8 = vp8_sad8x8x8_c; - if (flags & HAS_SSE4_1) vp8_sad8x8x8 = vp8_sad8x8x8_sse4; vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_sse2; if (flags & HAS_SSSE3) vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_ssse3; vp8_sixtap_predict4x4 = vp8_sixtap_predict4x4_mmx; diff --git a/media/libvpx/vp9/common/arm/neon/vp9_avg_neon.c b/media/libvpx/vp9/common/arm/neon/vp9_convolve_avg_neon.c similarity index 100% rename from media/libvpx/vp9/common/arm/neon/vp9_avg_neon.c rename to media/libvpx/vp9/common/arm/neon/vp9_convolve_avg_neon.c diff --git a/media/libvpx/vp9/common/arm/neon/vp9_avg_neon_asm.asm b/media/libvpx/vp9/common/arm/neon/vp9_convolve_avg_neon_asm.asm similarity index 100% rename from media/libvpx/vp9/common/arm/neon/vp9_avg_neon_asm.asm rename to media/libvpx/vp9/common/arm/neon/vp9_convolve_avg_neon_asm.asm diff --git a/media/libvpx/vp9/common/arm/neon/vp9_convolve_neon.c b/media/libvpx/vp9/common/arm/neon/vp9_convolve_neon.c index f0881b5ae9c..2e28cb20ebd 100644 --- a/media/libvpx/vp9/common/arm/neon/vp9_convolve_neon.c +++ b/media/libvpx/vp9/common/arm/neon/vp9_convolve_neon.c @@ -20,7 +20,7 @@ void vp9_convolve8_neon(const uint8_t *src, ptrdiff_t src_stride, /* Given our constraints: w <= 64, h <= 64, taps == 8 we can reduce the * maximum buffer size to 64 * 64 + 7 (+ 1 to make it divisible by 4). */ - DECLARE_ALIGNED_ARRAY(8, uint8_t, temp, 64 * 72); + DECLARE_ALIGNED(8, uint8_t, temp[64 * 72]); // Account for the vertical phase needing 3 lines prior and 4 lines post int intermediate_height = h + 7; @@ -56,7 +56,7 @@ void vp9_convolve8_avg_neon(const uint8_t *src, ptrdiff_t src_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h) { - DECLARE_ALIGNED_ARRAY(8, uint8_t, temp, 64 * 72); + DECLARE_ALIGNED(8, uint8_t, temp[64 * 72]); int intermediate_height = h + 7; if (x_step_q4 != 16 || y_step_q4 != 16) { diff --git a/media/libvpx/vp9/common/arm/neon/vp9_idct16x16_1_add_neon.c b/media/libvpx/vp9/common/arm/neon/vp9_idct16x16_1_add_neon.c index 3c8c6a9348d..0233877dd38 100644 --- a/media/libvpx/vp9/common/arm/neon/vp9_idct16x16_1_add_neon.c +++ b/media/libvpx/vp9/common/arm/neon/vp9_idct16x16_1_add_neon.c @@ -9,6 +9,8 @@ */ #include + +#include "vpx_ports/mem.h" #include "vp9/common/vp9_idct.h" void vp9_idct16x16_1_add_neon( diff --git a/media/libvpx/vp9/common/arm/neon/vp9_idct32x32_1_add_neon.c b/media/libvpx/vp9/common/arm/neon/vp9_idct32x32_1_add_neon.c index d0e4b4f4014..0ce45f2bfa8 100644 --- a/media/libvpx/vp9/common/arm/neon/vp9_idct32x32_1_add_neon.c +++ b/media/libvpx/vp9/common/arm/neon/vp9_idct32x32_1_add_neon.c @@ -9,10 +9,12 @@ */ #include -#include "vp9/common/vp9_idct.h" #include "./vpx_config.h" +#include "vpx_ports/mem.h" +#include "vp9/common/vp9_idct.h" + static INLINE void LD_16x8( uint8_t *d, int d_stride, diff --git a/media/libvpx/vp9/common/arm/neon/vp9_idct4x4_1_add_neon.c b/media/libvpx/vp9/common/arm/neon/vp9_idct4x4_1_add_neon.c index 7c8a930b645..f0457358e6c 100644 --- a/media/libvpx/vp9/common/arm/neon/vp9_idct4x4_1_add_neon.c +++ b/media/libvpx/vp9/common/arm/neon/vp9_idct4x4_1_add_neon.c @@ -9,6 +9,8 @@ */ #include + +#include "vpx_ports/mem.h" #include "vp9/common/vp9_idct.h" void vp9_idct4x4_1_add_neon( diff --git a/media/libvpx/vp9/common/arm/neon/vp9_idct8x8_1_add_neon.c b/media/libvpx/vp9/common/arm/neon/vp9_idct8x8_1_add_neon.c index 24c29fb77f6..5369697c7d1 100644 --- a/media/libvpx/vp9/common/arm/neon/vp9_idct8x8_1_add_neon.c +++ b/media/libvpx/vp9/common/arm/neon/vp9_idct8x8_1_add_neon.c @@ -9,6 +9,8 @@ */ #include + +#include "vpx_ports/mem.h" #include "vp9/common/vp9_idct.h" void vp9_idct8x8_1_add_neon( diff --git a/media/libvpx/vp9/common/arm/neon/vp9_reconintra_neon.c b/media/libvpx/vp9/common/arm/neon/vp9_reconintra_neon.c index d0beaa7208f..499c42ac3ac 100644 --- a/media/libvpx/vp9/common/arm/neon/vp9_reconintra_neon.c +++ b/media/libvpx/vp9/common/arm/neon/vp9_reconintra_neon.c @@ -8,466 +8,571 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include #include -void vp9_v_predictor_4x4_neon( - uint8_t *dst, - ptrdiff_t y_stride, - const uint8_t *above, - const uint8_t *left) { +#include "./vp9_rtcd.h" +#include "./vpx_config.h" +#include "vpx/vpx_integer.h" + +//------------------------------------------------------------------------------ +// DC 8x8 + +// 'do_above' and 'do_left' facilitate branch removal when inlined. +static INLINE void dc_8x8(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left, + int do_above, int do_left) { + uint16x8_t sum_top; + uint16x8_t sum_left; + uint8x8_t dc0; + + if (do_above) { + const uint8x8_t A = vld1_u8(above); // top row + const uint16x4_t p0 = vpaddl_u8(A); // cascading summation of the top + const uint16x4_t p1 = vpadd_u16(p0, p0); + const uint16x4_t p2 = vpadd_u16(p1, p1); + sum_top = vcombine_u16(p2, p2); + } + + if (do_left) { + const uint8x8_t L = vld1_u8(left); // left border + const uint16x4_t p0 = vpaddl_u8(L); // cascading summation of the left + const uint16x4_t p1 = vpadd_u16(p0, p0); + const uint16x4_t p2 = vpadd_u16(p1, p1); + sum_left = vcombine_u16(p2, p2); + } + + if (do_above && do_left) { + const uint16x8_t sum = vaddq_u16(sum_left, sum_top); + dc0 = vrshrn_n_u16(sum, 4); + } else if (do_above) { + dc0 = vrshrn_n_u16(sum_top, 3); + } else if (do_left) { + dc0 = vrshrn_n_u16(sum_left, 3); + } else { + dc0 = vdup_n_u8(0x80); + } + + { + const uint8x8_t dc = vdup_lane_u8(dc0, 0); int i; - uint32x2_t d0u32 = vdup_n_u32(0); - (void)left; - - d0u32 = vld1_lane_u32((const uint32_t *)above, d0u32, 0); - for (i = 0; i < 4; i++, dst += y_stride) - vst1_lane_u32((uint32_t *)dst, d0u32, 0); - return; -} - -void vp9_v_predictor_8x8_neon( - uint8_t *dst, - ptrdiff_t y_stride, - const uint8_t *above, - const uint8_t *left) { - int i; - uint8x8_t d0u8 = vdup_n_u8(0); - (void)left; - - d0u8 = vld1_u8(above); - for (i = 0; i < 8; i++, dst += y_stride) - vst1_u8(dst, d0u8); - return; -} - -void vp9_v_predictor_16x16_neon( - uint8_t *dst, - ptrdiff_t y_stride, - const uint8_t *above, - const uint8_t *left) { - int i; - uint8x16_t q0u8 = vdupq_n_u8(0); - (void)left; - - q0u8 = vld1q_u8(above); - for (i = 0; i < 16; i++, dst += y_stride) - vst1q_u8(dst, q0u8); - return; -} - -void vp9_v_predictor_32x32_neon( - uint8_t *dst, - ptrdiff_t y_stride, - const uint8_t *above, - const uint8_t *left) { - int i; - uint8x16_t q0u8 = vdupq_n_u8(0); - uint8x16_t q1u8 = vdupq_n_u8(0); - (void)left; - - q0u8 = vld1q_u8(above); - q1u8 = vld1q_u8(above + 16); - for (i = 0; i < 32; i++, dst += y_stride) { - vst1q_u8(dst, q0u8); - vst1q_u8(dst + 16, q1u8); + for (i = 0; i < 8; ++i) { + vst1_u32((uint32_t*)(dst + i * stride), vreinterpret_u32_u8(dc)); } - return; + } } -void vp9_h_predictor_4x4_neon( - uint8_t *dst, - ptrdiff_t y_stride, - const uint8_t *above, - const uint8_t *left) { - uint8x8_t d0u8 = vdup_n_u8(0); - uint32x2_t d1u32 = vdup_n_u32(0); - (void)above; - - d1u32 = vld1_lane_u32((const uint32_t *)left, d1u32, 0); - - d0u8 = vdup_lane_u8(vreinterpret_u8_u32(d1u32), 0); - vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d0u8), 0); - dst += y_stride; - d0u8 = vdup_lane_u8(vreinterpret_u8_u32(d1u32), 1); - vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d0u8), 0); - dst += y_stride; - d0u8 = vdup_lane_u8(vreinterpret_u8_u32(d1u32), 2); - vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d0u8), 0); - dst += y_stride; - d0u8 = vdup_lane_u8(vreinterpret_u8_u32(d1u32), 3); - vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d0u8), 0); - return; +void vp9_dc_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + dc_8x8(dst, stride, above, left, 1, 1); } -void vp9_h_predictor_8x8_neon( - uint8_t *dst, - ptrdiff_t y_stride, - const uint8_t *above, - const uint8_t *left) { - uint8x8_t d0u8 = vdup_n_u8(0); - uint64x1_t d1u64 = vdup_n_u64(0); - (void)above; - - d1u64 = vld1_u64((const uint64_t *)left); - - d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 0); - vst1_u8(dst, d0u8); - dst += y_stride; - d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 1); - vst1_u8(dst, d0u8); - dst += y_stride; - d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 2); - vst1_u8(dst, d0u8); - dst += y_stride; - d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 3); - vst1_u8(dst, d0u8); - dst += y_stride; - d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 4); - vst1_u8(dst, d0u8); - dst += y_stride; - d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 5); - vst1_u8(dst, d0u8); - dst += y_stride; - d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 6); - vst1_u8(dst, d0u8); - dst += y_stride; - d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 7); - vst1_u8(dst, d0u8); - return; +void vp9_dc_left_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + (void)above; + dc_8x8(dst, stride, NULL, left, 0, 1); } -void vp9_h_predictor_16x16_neon( - uint8_t *dst, - ptrdiff_t y_stride, - const uint8_t *above, - const uint8_t *left) { - int j; - uint8x8_t d2u8 = vdup_n_u8(0); - uint8x16_t q0u8 = vdupq_n_u8(0); - uint8x16_t q1u8 = vdupq_n_u8(0); - (void)above; +void vp9_dc_top_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + (void)left; + dc_8x8(dst, stride, above, NULL, 1, 0); +} +void vp9_dc_128_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + (void)above; + (void)left; + dc_8x8(dst, stride, NULL, NULL, 0, 0); +} + +//------------------------------------------------------------------------------ +// DC 16x16 + +// 'do_above' and 'do_left' facilitate branch removal when inlined. +static INLINE void dc_16x16(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left, + int do_above, int do_left) { + uint16x8_t sum_top; + uint16x8_t sum_left; + uint8x8_t dc0; + + if (do_above) { + const uint8x16_t A = vld1q_u8(above); // top row + const uint16x8_t p0 = vpaddlq_u8(A); // cascading summation of the top + const uint16x4_t p1 = vadd_u16(vget_low_u16(p0), vget_high_u16(p0)); + const uint16x4_t p2 = vpadd_u16(p1, p1); + const uint16x4_t p3 = vpadd_u16(p2, p2); + sum_top = vcombine_u16(p3, p3); + } + + if (do_left) { + const uint8x16_t L = vld1q_u8(left); // left row + const uint16x8_t p0 = vpaddlq_u8(L); // cascading summation of the left + const uint16x4_t p1 = vadd_u16(vget_low_u16(p0), vget_high_u16(p0)); + const uint16x4_t p2 = vpadd_u16(p1, p1); + const uint16x4_t p3 = vpadd_u16(p2, p2); + sum_left = vcombine_u16(p3, p3); + } + + if (do_above && do_left) { + const uint16x8_t sum = vaddq_u16(sum_left, sum_top); + dc0 = vrshrn_n_u16(sum, 5); + } else if (do_above) { + dc0 = vrshrn_n_u16(sum_top, 4); + } else if (do_left) { + dc0 = vrshrn_n_u16(sum_left, 4); + } else { + dc0 = vdup_n_u8(0x80); + } + + { + const uint8x16_t dc = vdupq_lane_u8(dc0, 0); + int i; + for (i = 0; i < 16; ++i) { + vst1q_u8(dst + i * stride, dc); + } + } +} + +void vp9_dc_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + dc_16x16(dst, stride, above, left, 1, 1); +} + +void vp9_dc_left_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, + const uint8_t *left) { + (void)above; + dc_16x16(dst, stride, NULL, left, 0, 1); +} + +void vp9_dc_top_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, + const uint8_t *left) { + (void)left; + dc_16x16(dst, stride, above, NULL, 1, 0); +} + +void vp9_dc_128_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, + const uint8_t *left) { + (void)above; + (void)left; + dc_16x16(dst, stride, NULL, NULL, 0, 0); +} + +#if !HAVE_NEON_ASM + +void vp9_v_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + int i; + uint32x2_t d0u32 = vdup_n_u32(0); + (void)left; + + d0u32 = vld1_lane_u32((const uint32_t *)above, d0u32, 0); + for (i = 0; i < 4; i++, dst += stride) + vst1_lane_u32((uint32_t *)dst, d0u32, 0); +} + +void vp9_v_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + int i; + uint8x8_t d0u8 = vdup_n_u8(0); + (void)left; + + d0u8 = vld1_u8(above); + for (i = 0; i < 8; i++, dst += stride) + vst1_u8(dst, d0u8); +} + +void vp9_v_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + int i; + uint8x16_t q0u8 = vdupq_n_u8(0); + (void)left; + + q0u8 = vld1q_u8(above); + for (i = 0; i < 16; i++, dst += stride) + vst1q_u8(dst, q0u8); +} + +void vp9_v_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + int i; + uint8x16_t q0u8 = vdupq_n_u8(0); + uint8x16_t q1u8 = vdupq_n_u8(0); + (void)left; + + q0u8 = vld1q_u8(above); + q1u8 = vld1q_u8(above + 16); + for (i = 0; i < 32; i++, dst += stride) { + vst1q_u8(dst, q0u8); + vst1q_u8(dst + 16, q1u8); + } +} + +void vp9_h_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + uint8x8_t d0u8 = vdup_n_u8(0); + uint32x2_t d1u32 = vdup_n_u32(0); + (void)above; + + d1u32 = vld1_lane_u32((const uint32_t *)left, d1u32, 0); + + d0u8 = vdup_lane_u8(vreinterpret_u8_u32(d1u32), 0); + vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d0u8), 0); + dst += stride; + d0u8 = vdup_lane_u8(vreinterpret_u8_u32(d1u32), 1); + vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d0u8), 0); + dst += stride; + d0u8 = vdup_lane_u8(vreinterpret_u8_u32(d1u32), 2); + vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d0u8), 0); + dst += stride; + d0u8 = vdup_lane_u8(vreinterpret_u8_u32(d1u32), 3); + vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d0u8), 0); +} + +void vp9_h_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + uint8x8_t d0u8 = vdup_n_u8(0); + uint64x1_t d1u64 = vdup_n_u64(0); + (void)above; + + d1u64 = vld1_u64((const uint64_t *)left); + + d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 0); + vst1_u8(dst, d0u8); + dst += stride; + d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 1); + vst1_u8(dst, d0u8); + dst += stride; + d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 2); + vst1_u8(dst, d0u8); + dst += stride; + d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 3); + vst1_u8(dst, d0u8); + dst += stride; + d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 4); + vst1_u8(dst, d0u8); + dst += stride; + d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 5); + vst1_u8(dst, d0u8); + dst += stride; + d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 6); + vst1_u8(dst, d0u8); + dst += stride; + d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 7); + vst1_u8(dst, d0u8); +} + +void vp9_h_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + int j; + uint8x8_t d2u8 = vdup_n_u8(0); + uint8x16_t q0u8 = vdupq_n_u8(0); + uint8x16_t q1u8 = vdupq_n_u8(0); + (void)above; + + q1u8 = vld1q_u8(left); + d2u8 = vget_low_u8(q1u8); + for (j = 0; j < 2; j++, d2u8 = vget_high_u8(q1u8)) { + q0u8 = vdupq_lane_u8(d2u8, 0); + vst1q_u8(dst, q0u8); + dst += stride; + q0u8 = vdupq_lane_u8(d2u8, 1); + vst1q_u8(dst, q0u8); + dst += stride; + q0u8 = vdupq_lane_u8(d2u8, 2); + vst1q_u8(dst, q0u8); + dst += stride; + q0u8 = vdupq_lane_u8(d2u8, 3); + vst1q_u8(dst, q0u8); + dst += stride; + q0u8 = vdupq_lane_u8(d2u8, 4); + vst1q_u8(dst, q0u8); + dst += stride; + q0u8 = vdupq_lane_u8(d2u8, 5); + vst1q_u8(dst, q0u8); + dst += stride; + q0u8 = vdupq_lane_u8(d2u8, 6); + vst1q_u8(dst, q0u8); + dst += stride; + q0u8 = vdupq_lane_u8(d2u8, 7); + vst1q_u8(dst, q0u8); + dst += stride; + } +} + +void vp9_h_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + int j, k; + uint8x8_t d2u8 = vdup_n_u8(0); + uint8x16_t q0u8 = vdupq_n_u8(0); + uint8x16_t q1u8 = vdupq_n_u8(0); + (void)above; + + for (k = 0; k < 2; k++, left += 16) { q1u8 = vld1q_u8(left); d2u8 = vget_low_u8(q1u8); for (j = 0; j < 2; j++, d2u8 = vget_high_u8(q1u8)) { - q0u8 = vdupq_lane_u8(d2u8, 0); - vst1q_u8(dst, q0u8); - dst += y_stride; - q0u8 = vdupq_lane_u8(d2u8, 1); - vst1q_u8(dst, q0u8); - dst += y_stride; - q0u8 = vdupq_lane_u8(d2u8, 2); - vst1q_u8(dst, q0u8); - dst += y_stride; - q0u8 = vdupq_lane_u8(d2u8, 3); - vst1q_u8(dst, q0u8); - dst += y_stride; - q0u8 = vdupq_lane_u8(d2u8, 4); - vst1q_u8(dst, q0u8); - dst += y_stride; - q0u8 = vdupq_lane_u8(d2u8, 5); - vst1q_u8(dst, q0u8); - dst += y_stride; - q0u8 = vdupq_lane_u8(d2u8, 6); - vst1q_u8(dst, q0u8); - dst += y_stride; - q0u8 = vdupq_lane_u8(d2u8, 7); - vst1q_u8(dst, q0u8); - dst += y_stride; + q0u8 = vdupq_lane_u8(d2u8, 0); + vst1q_u8(dst, q0u8); + vst1q_u8(dst + 16, q0u8); + dst += stride; + q0u8 = vdupq_lane_u8(d2u8, 1); + vst1q_u8(dst, q0u8); + vst1q_u8(dst + 16, q0u8); + dst += stride; + q0u8 = vdupq_lane_u8(d2u8, 2); + vst1q_u8(dst, q0u8); + vst1q_u8(dst + 16, q0u8); + dst += stride; + q0u8 = vdupq_lane_u8(d2u8, 3); + vst1q_u8(dst, q0u8); + vst1q_u8(dst + 16, q0u8); + dst += stride; + q0u8 = vdupq_lane_u8(d2u8, 4); + vst1q_u8(dst, q0u8); + vst1q_u8(dst + 16, q0u8); + dst += stride; + q0u8 = vdupq_lane_u8(d2u8, 5); + vst1q_u8(dst, q0u8); + vst1q_u8(dst + 16, q0u8); + dst += stride; + q0u8 = vdupq_lane_u8(d2u8, 6); + vst1q_u8(dst, q0u8); + vst1q_u8(dst + 16, q0u8); + dst += stride; + q0u8 = vdupq_lane_u8(d2u8, 7); + vst1q_u8(dst, q0u8); + vst1q_u8(dst + 16, q0u8); + dst += stride; } - return; + } } -void vp9_h_predictor_32x32_neon( - uint8_t *dst, - ptrdiff_t y_stride, - const uint8_t *above, - const uint8_t *left) { - int j, k; - uint8x8_t d2u8 = vdup_n_u8(0); - uint8x16_t q0u8 = vdupq_n_u8(0); - uint8x16_t q1u8 = vdupq_n_u8(0); - (void)above; +void vp9_tm_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + int i; + uint16x8_t q1u16, q3u16; + int16x8_t q1s16; + uint8x8_t d0u8 = vdup_n_u8(0); + uint32x2_t d2u32 = vdup_n_u32(0); - for (k = 0; k < 2; k++, left += 16) { - q1u8 = vld1q_u8(left); - d2u8 = vget_low_u8(q1u8); - for (j = 0; j < 2; j++, d2u8 = vget_high_u8(q1u8)) { - q0u8 = vdupq_lane_u8(d2u8, 0); - vst1q_u8(dst, q0u8); - vst1q_u8(dst + 16, q0u8); - dst += y_stride; - q0u8 = vdupq_lane_u8(d2u8, 1); - vst1q_u8(dst, q0u8); - vst1q_u8(dst + 16, q0u8); - dst += y_stride; - q0u8 = vdupq_lane_u8(d2u8, 2); - vst1q_u8(dst, q0u8); - vst1q_u8(dst + 16, q0u8); - dst += y_stride; - q0u8 = vdupq_lane_u8(d2u8, 3); - vst1q_u8(dst, q0u8); - vst1q_u8(dst + 16, q0u8); - dst += y_stride; - q0u8 = vdupq_lane_u8(d2u8, 4); - vst1q_u8(dst, q0u8); - vst1q_u8(dst + 16, q0u8); - dst += y_stride; - q0u8 = vdupq_lane_u8(d2u8, 5); - vst1q_u8(dst, q0u8); - vst1q_u8(dst + 16, q0u8); - dst += y_stride; - q0u8 = vdupq_lane_u8(d2u8, 6); - vst1q_u8(dst, q0u8); - vst1q_u8(dst + 16, q0u8); - dst += y_stride; - q0u8 = vdupq_lane_u8(d2u8, 7); - vst1q_u8(dst, q0u8); - vst1q_u8(dst + 16, q0u8); - dst += y_stride; - } - } - return; + d0u8 = vld1_dup_u8(above - 1); + d2u32 = vld1_lane_u32((const uint32_t *)above, d2u32, 0); + q3u16 = vsubl_u8(vreinterpret_u8_u32(d2u32), d0u8); + for (i = 0; i < 4; i++, dst += stride) { + q1u16 = vdupq_n_u16((uint16_t)left[i]); + q1s16 = vaddq_s16(vreinterpretq_s16_u16(q1u16), + vreinterpretq_s16_u16(q3u16)); + d0u8 = vqmovun_s16(q1s16); + vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d0u8), 0); + } } -void vp9_tm_predictor_4x4_neon( - uint8_t *dst, - ptrdiff_t y_stride, - const uint8_t *above, - const uint8_t *left) { - int i; - uint16x8_t q1u16, q3u16; - int16x8_t q1s16; - uint8x8_t d0u8 = vdup_n_u8(0); - uint32x2_t d2u32 = vdup_n_u32(0); +void vp9_tm_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + int j; + uint16x8_t q0u16, q3u16, q10u16; + int16x8_t q0s16; + uint16x4_t d20u16; + uint8x8_t d0u8, d2u8, d30u8; - d0u8 = vdup_n_u8(above[-1]); - d2u32 = vld1_lane_u32((const uint32_t *)above, d2u32, 0); - q3u16 = vsubl_u8(vreinterpret_u8_u32(d2u32), d0u8); - for (i = 0; i < 4; i++, dst += y_stride) { - q1u16 = vdupq_n_u16((uint16_t)left[i]); - q1s16 = vaddq_s16(vreinterpretq_s16_u16(q1u16), - vreinterpretq_s16_u16(q3u16)); - d0u8 = vqmovun_s16(q1s16); - vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d0u8), 0); - } - return; + d0u8 = vld1_dup_u8(above - 1); + d30u8 = vld1_u8(left); + d2u8 = vld1_u8(above); + q10u16 = vmovl_u8(d30u8); + q3u16 = vsubl_u8(d2u8, d0u8); + d20u16 = vget_low_u16(q10u16); + for (j = 0; j < 2; j++, d20u16 = vget_high_u16(q10u16)) { + q0u16 = vdupq_lane_u16(d20u16, 0); + q0s16 = vaddq_s16(vreinterpretq_s16_u16(q3u16), + vreinterpretq_s16_u16(q0u16)); + d0u8 = vqmovun_s16(q0s16); + vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d0u8)); + dst += stride; + q0u16 = vdupq_lane_u16(d20u16, 1); + q0s16 = vaddq_s16(vreinterpretq_s16_u16(q3u16), + vreinterpretq_s16_u16(q0u16)); + d0u8 = vqmovun_s16(q0s16); + vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d0u8)); + dst += stride; + q0u16 = vdupq_lane_u16(d20u16, 2); + q0s16 = vaddq_s16(vreinterpretq_s16_u16(q3u16), + vreinterpretq_s16_u16(q0u16)); + d0u8 = vqmovun_s16(q0s16); + vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d0u8)); + dst += stride; + q0u16 = vdupq_lane_u16(d20u16, 3); + q0s16 = vaddq_s16(vreinterpretq_s16_u16(q3u16), + vreinterpretq_s16_u16(q0u16)); + d0u8 = vqmovun_s16(q0s16); + vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d0u8)); + dst += stride; + } } -void vp9_tm_predictor_8x8_neon( - uint8_t *dst, - ptrdiff_t y_stride, - const uint8_t *above, - const uint8_t *left) { - int j; - uint16x8_t q0u16, q3u16, q10u16; - int16x8_t q0s16; - uint16x4_t d20u16; - uint8x8_t d0u8, d2u8, d30u8; +void vp9_tm_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + int j, k; + uint16x8_t q0u16, q2u16, q3u16, q8u16, q10u16; + uint8x16_t q0u8, q1u8; + int16x8_t q0s16, q1s16, q8s16, q11s16; + uint16x4_t d20u16; + uint8x8_t d2u8, d3u8, d18u8, d22u8, d23u8; - d0u8 = vdup_n_u8(above[-1]); - d30u8 = vld1_u8(left); - d2u8 = vld1_u8(above); - q10u16 = vmovl_u8(d30u8); - q3u16 = vsubl_u8(d2u8, d0u8); + q0u8 = vld1q_dup_u8(above - 1); + q1u8 = vld1q_u8(above); + q2u16 = vsubl_u8(vget_low_u8(q1u8), vget_low_u8(q0u8)); + q3u16 = vsubl_u8(vget_high_u8(q1u8), vget_high_u8(q0u8)); + for (k = 0; k < 2; k++, left += 8) { + d18u8 = vld1_u8(left); + q10u16 = vmovl_u8(d18u8); d20u16 = vget_low_u16(q10u16); for (j = 0; j < 2; j++, d20u16 = vget_high_u16(q10u16)) { - q0u16 = vdupq_lane_u16(d20u16, 0); - q0s16 = vaddq_s16(vreinterpretq_s16_u16(q3u16), - vreinterpretq_s16_u16(q0u16)); - d0u8 = vqmovun_s16(q0s16); - vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d0u8)); - dst += y_stride; - q0u16 = vdupq_lane_u16(d20u16, 1); - q0s16 = vaddq_s16(vreinterpretq_s16_u16(q3u16), - vreinterpretq_s16_u16(q0u16)); - d0u8 = vqmovun_s16(q0s16); - vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d0u8)); - dst += y_stride; - q0u16 = vdupq_lane_u16(d20u16, 2); - q0s16 = vaddq_s16(vreinterpretq_s16_u16(q3u16), - vreinterpretq_s16_u16(q0u16)); - d0u8 = vqmovun_s16(q0s16); - vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d0u8)); - dst += y_stride; - q0u16 = vdupq_lane_u16(d20u16, 3); - q0s16 = vaddq_s16(vreinterpretq_s16_u16(q3u16), - vreinterpretq_s16_u16(q0u16)); - d0u8 = vqmovun_s16(q0s16); - vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d0u8)); - dst += y_stride; + q0u16 = vdupq_lane_u16(d20u16, 0); + q8u16 = vdupq_lane_u16(d20u16, 1); + q1s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), + vreinterpretq_s16_u16(q2u16)); + q0s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), + vreinterpretq_s16_u16(q3u16)); + q11s16 = vaddq_s16(vreinterpretq_s16_u16(q8u16), + vreinterpretq_s16_u16(q2u16)); + q8s16 = vaddq_s16(vreinterpretq_s16_u16(q8u16), + vreinterpretq_s16_u16(q3u16)); + d2u8 = vqmovun_s16(q1s16); + d3u8 = vqmovun_s16(q0s16); + d22u8 = vqmovun_s16(q11s16); + d23u8 = vqmovun_s16(q8s16); + vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d2u8)); + vst1_u64((uint64_t *)(dst + 8), vreinterpret_u64_u8(d3u8)); + dst += stride; + vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d22u8)); + vst1_u64((uint64_t *)(dst + 8), vreinterpret_u64_u8(d23u8)); + dst += stride; + + q0u16 = vdupq_lane_u16(d20u16, 2); + q8u16 = vdupq_lane_u16(d20u16, 3); + q1s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), + vreinterpretq_s16_u16(q2u16)); + q0s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), + vreinterpretq_s16_u16(q3u16)); + q11s16 = vaddq_s16(vreinterpretq_s16_u16(q8u16), + vreinterpretq_s16_u16(q2u16)); + q8s16 = vaddq_s16(vreinterpretq_s16_u16(q8u16), + vreinterpretq_s16_u16(q3u16)); + d2u8 = vqmovun_s16(q1s16); + d3u8 = vqmovun_s16(q0s16); + d22u8 = vqmovun_s16(q11s16); + d23u8 = vqmovun_s16(q8s16); + vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d2u8)); + vst1_u64((uint64_t *)(dst + 8), vreinterpret_u64_u8(d3u8)); + dst += stride; + vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d22u8)); + vst1_u64((uint64_t *)(dst + 8), vreinterpret_u64_u8(d23u8)); + dst += stride; } - return; + } } -void vp9_tm_predictor_16x16_neon( - uint8_t *dst, - ptrdiff_t y_stride, - const uint8_t *above, - const uint8_t *left) { - int j, k; - uint16x8_t q0u16, q2u16, q3u16, q8u16, q10u16; - uint8x16_t q0u8, q1u8; - int16x8_t q0s16, q1s16, q8s16, q11s16; - uint16x4_t d20u16; - uint8x8_t d2u8, d3u8, d18u8, d22u8, d23u8; +void vp9_tm_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + int j, k; + uint16x8_t q0u16, q3u16, q8u16, q9u16, q10u16, q11u16; + uint8x16_t q0u8, q1u8, q2u8; + int16x8_t q12s16, q13s16, q14s16, q15s16; + uint16x4_t d6u16; + uint8x8_t d0u8, d1u8, d2u8, d3u8, d26u8; - q0u8 = vdupq_n_u8(above[-1]); - q1u8 = vld1q_u8(above); - q2u16 = vsubl_u8(vget_low_u8(q1u8), vget_low_u8(q0u8)); - q3u16 = vsubl_u8(vget_high_u8(q1u8), vget_high_u8(q0u8)); - for (k = 0; k < 2; k++, left += 8) { - d18u8 = vld1_u8(left); - q10u16 = vmovl_u8(d18u8); - d20u16 = vget_low_u16(q10u16); - for (j = 0; j < 2; j++, d20u16 = vget_high_u16(q10u16)) { - q0u16 = vdupq_lane_u16(d20u16, 0); - q8u16 = vdupq_lane_u16(d20u16, 1); - q1s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), - vreinterpretq_s16_u16(q2u16)); - q0s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), - vreinterpretq_s16_u16(q3u16)); - q11s16 = vaddq_s16(vreinterpretq_s16_u16(q8u16), - vreinterpretq_s16_u16(q2u16)); - q8s16 = vaddq_s16(vreinterpretq_s16_u16(q8u16), - vreinterpretq_s16_u16(q3u16)); - d2u8 = vqmovun_s16(q1s16); - d3u8 = vqmovun_s16(q0s16); - d22u8 = vqmovun_s16(q11s16); - d23u8 = vqmovun_s16(q8s16); - vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d2u8)); - vst1_u64((uint64_t *)(dst + 8), vreinterpret_u64_u8(d3u8)); - dst += y_stride; - vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d22u8)); - vst1_u64((uint64_t *)(dst + 8), vreinterpret_u64_u8(d23u8)); - dst += y_stride; + q0u8 = vld1q_dup_u8(above - 1); + q1u8 = vld1q_u8(above); + q2u8 = vld1q_u8(above + 16); + q8u16 = vsubl_u8(vget_low_u8(q1u8), vget_low_u8(q0u8)); + q9u16 = vsubl_u8(vget_high_u8(q1u8), vget_high_u8(q0u8)); + q10u16 = vsubl_u8(vget_low_u8(q2u8), vget_low_u8(q0u8)); + q11u16 = vsubl_u8(vget_high_u8(q2u8), vget_high_u8(q0u8)); + for (k = 0; k < 4; k++, left += 8) { + d26u8 = vld1_u8(left); + q3u16 = vmovl_u8(d26u8); + d6u16 = vget_low_u16(q3u16); + for (j = 0; j < 2; j++, d6u16 = vget_high_u16(q3u16)) { + q0u16 = vdupq_lane_u16(d6u16, 0); + q12s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), + vreinterpretq_s16_u16(q8u16)); + q13s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), + vreinterpretq_s16_u16(q9u16)); + q14s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), + vreinterpretq_s16_u16(q10u16)); + q15s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), + vreinterpretq_s16_u16(q11u16)); + d0u8 = vqmovun_s16(q12s16); + d1u8 = vqmovun_s16(q13s16); + d2u8 = vqmovun_s16(q14s16); + d3u8 = vqmovun_s16(q15s16); + q0u8 = vcombine_u8(d0u8, d1u8); + q1u8 = vcombine_u8(d2u8, d3u8); + vst1q_u64((uint64_t *)dst, vreinterpretq_u64_u8(q0u8)); + vst1q_u64((uint64_t *)(dst + 16), vreinterpretq_u64_u8(q1u8)); + dst += stride; - q0u16 = vdupq_lane_u16(d20u16, 2); - q8u16 = vdupq_lane_u16(d20u16, 3); - q1s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), - vreinterpretq_s16_u16(q2u16)); - q0s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), - vreinterpretq_s16_u16(q3u16)); - q11s16 = vaddq_s16(vreinterpretq_s16_u16(q8u16), - vreinterpretq_s16_u16(q2u16)); - q8s16 = vaddq_s16(vreinterpretq_s16_u16(q8u16), - vreinterpretq_s16_u16(q3u16)); - d2u8 = vqmovun_s16(q1s16); - d3u8 = vqmovun_s16(q0s16); - d22u8 = vqmovun_s16(q11s16); - d23u8 = vqmovun_s16(q8s16); - vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d2u8)); - vst1_u64((uint64_t *)(dst + 8), vreinterpret_u64_u8(d3u8)); - dst += y_stride; - vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d22u8)); - vst1_u64((uint64_t *)(dst + 8), vreinterpret_u64_u8(d23u8)); - dst += y_stride; - } + q0u16 = vdupq_lane_u16(d6u16, 1); + q12s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), + vreinterpretq_s16_u16(q8u16)); + q13s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), + vreinterpretq_s16_u16(q9u16)); + q14s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), + vreinterpretq_s16_u16(q10u16)); + q15s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), + vreinterpretq_s16_u16(q11u16)); + d0u8 = vqmovun_s16(q12s16); + d1u8 = vqmovun_s16(q13s16); + d2u8 = vqmovun_s16(q14s16); + d3u8 = vqmovun_s16(q15s16); + q0u8 = vcombine_u8(d0u8, d1u8); + q1u8 = vcombine_u8(d2u8, d3u8); + vst1q_u64((uint64_t *)dst, vreinterpretq_u64_u8(q0u8)); + vst1q_u64((uint64_t *)(dst + 16), vreinterpretq_u64_u8(q1u8)); + dst += stride; + + q0u16 = vdupq_lane_u16(d6u16, 2); + q12s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), + vreinterpretq_s16_u16(q8u16)); + q13s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), + vreinterpretq_s16_u16(q9u16)); + q14s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), + vreinterpretq_s16_u16(q10u16)); + q15s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), + vreinterpretq_s16_u16(q11u16)); + d0u8 = vqmovun_s16(q12s16); + d1u8 = vqmovun_s16(q13s16); + d2u8 = vqmovun_s16(q14s16); + d3u8 = vqmovun_s16(q15s16); + q0u8 = vcombine_u8(d0u8, d1u8); + q1u8 = vcombine_u8(d2u8, d3u8); + vst1q_u64((uint64_t *)dst, vreinterpretq_u64_u8(q0u8)); + vst1q_u64((uint64_t *)(dst + 16), vreinterpretq_u64_u8(q1u8)); + dst += stride; + + q0u16 = vdupq_lane_u16(d6u16, 3); + q12s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), + vreinterpretq_s16_u16(q8u16)); + q13s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), + vreinterpretq_s16_u16(q9u16)); + q14s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), + vreinterpretq_s16_u16(q10u16)); + q15s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), + vreinterpretq_s16_u16(q11u16)); + d0u8 = vqmovun_s16(q12s16); + d1u8 = vqmovun_s16(q13s16); + d2u8 = vqmovun_s16(q14s16); + d3u8 = vqmovun_s16(q15s16); + q0u8 = vcombine_u8(d0u8, d1u8); + q1u8 = vcombine_u8(d2u8, d3u8); + vst1q_u64((uint64_t *)dst, vreinterpretq_u64_u8(q0u8)); + vst1q_u64((uint64_t *)(dst + 16), vreinterpretq_u64_u8(q1u8)); + dst += stride; } - return; -} - -void vp9_tm_predictor_32x32_neon( - uint8_t *dst, - ptrdiff_t y_stride, - const uint8_t *above, - const uint8_t *left) { - int j, k; - uint16x8_t q0u16, q3u16, q8u16, q9u16, q10u16, q11u16; - uint8x16_t q0u8, q1u8, q2u8; - int16x8_t q12s16, q13s16, q14s16, q15s16; - uint16x4_t d6u16; - uint8x8_t d0u8, d1u8, d2u8, d3u8, d26u8; - - q0u8 = vdupq_n_u8(above[-1]); - q1u8 = vld1q_u8(above); - q2u8 = vld1q_u8(above + 16); - q8u16 = vsubl_u8(vget_low_u8(q1u8), vget_low_u8(q0u8)); - q9u16 = vsubl_u8(vget_high_u8(q1u8), vget_high_u8(q0u8)); - q10u16 = vsubl_u8(vget_low_u8(q2u8), vget_low_u8(q0u8)); - q11u16 = vsubl_u8(vget_high_u8(q2u8), vget_high_u8(q0u8)); - for (k = 0; k < 4; k++, left += 8) { - d26u8 = vld1_u8(left); - q3u16 = vmovl_u8(d26u8); - d6u16 = vget_low_u16(q3u16); - for (j = 0; j < 2; j++, d6u16 = vget_high_u16(q3u16)) { - q0u16 = vdupq_lane_u16(d6u16, 0); - q12s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), - vreinterpretq_s16_u16(q8u16)); - q13s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), - vreinterpretq_s16_u16(q9u16)); - q14s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), - vreinterpretq_s16_u16(q10u16)); - q15s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), - vreinterpretq_s16_u16(q11u16)); - d0u8 = vqmovun_s16(q12s16); - d1u8 = vqmovun_s16(q13s16); - d2u8 = vqmovun_s16(q14s16); - d3u8 = vqmovun_s16(q15s16); - q0u8 = vcombine_u8(d0u8, d1u8); - q1u8 = vcombine_u8(d2u8, d3u8); - vst1q_u64((uint64_t *)dst, vreinterpretq_u64_u8(q0u8)); - vst1q_u64((uint64_t *)(dst + 16), vreinterpretq_u64_u8(q1u8)); - dst += y_stride; - - q0u16 = vdupq_lane_u16(d6u16, 1); - q12s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), - vreinterpretq_s16_u16(q8u16)); - q13s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), - vreinterpretq_s16_u16(q9u16)); - q14s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), - vreinterpretq_s16_u16(q10u16)); - q15s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), - vreinterpretq_s16_u16(q11u16)); - d0u8 = vqmovun_s16(q12s16); - d1u8 = vqmovun_s16(q13s16); - d2u8 = vqmovun_s16(q14s16); - d3u8 = vqmovun_s16(q15s16); - q0u8 = vcombine_u8(d0u8, d1u8); - q1u8 = vcombine_u8(d2u8, d3u8); - vst1q_u64((uint64_t *)dst, vreinterpretq_u64_u8(q0u8)); - vst1q_u64((uint64_t *)(dst + 16), vreinterpretq_u64_u8(q1u8)); - dst += y_stride; - - q0u16 = vdupq_lane_u16(d6u16, 2); - q12s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), - vreinterpretq_s16_u16(q8u16)); - q13s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), - vreinterpretq_s16_u16(q9u16)); - q14s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), - vreinterpretq_s16_u16(q10u16)); - q15s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), - vreinterpretq_s16_u16(q11u16)); - d0u8 = vqmovun_s16(q12s16); - d1u8 = vqmovun_s16(q13s16); - d2u8 = vqmovun_s16(q14s16); - d3u8 = vqmovun_s16(q15s16); - q0u8 = vcombine_u8(d0u8, d1u8); - q1u8 = vcombine_u8(d2u8, d3u8); - vst1q_u64((uint64_t *)dst, vreinterpretq_u64_u8(q0u8)); - vst1q_u64((uint64_t *)(dst + 16), vreinterpretq_u64_u8(q1u8)); - dst += y_stride; - - q0u16 = vdupq_lane_u16(d6u16, 3); - q12s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), - vreinterpretq_s16_u16(q8u16)); - q13s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), - vreinterpretq_s16_u16(q9u16)); - q14s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), - vreinterpretq_s16_u16(q10u16)); - q15s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), - vreinterpretq_s16_u16(q11u16)); - d0u8 = vqmovun_s16(q12s16); - d1u8 = vqmovun_s16(q13s16); - d2u8 = vqmovun_s16(q14s16); - d3u8 = vqmovun_s16(q15s16); - q0u8 = vcombine_u8(d0u8, d1u8); - q1u8 = vcombine_u8(d2u8, d3u8); - vst1q_u64((uint64_t *)dst, vreinterpretq_u64_u8(q0u8)); - vst1q_u64((uint64_t *)(dst + 16), vreinterpretq_u64_u8(q1u8)); - dst += y_stride; - } - } - return; + } } +#endif // !HAVE_NEON_ASM diff --git a/media/libvpx/vp9/common/arm/neon/vp9_reconintra_neon_asm.asm b/media/libvpx/vp9/common/arm/neon/vp9_reconintra_neon_asm.asm index dc9856fa887..14f574a50e1 100644 --- a/media/libvpx/vp9/common/arm/neon/vp9_reconintra_neon_asm.asm +++ b/media/libvpx/vp9/common/arm/neon/vp9_reconintra_neon_asm.asm @@ -298,8 +298,7 @@ loop_h |vp9_tm_predictor_4x4_neon| PROC ; Load ytop_left = above[-1]; sub r12, r2, #1 - ldrb r12, [r12] - vdup.u8 d0, r12 + vld1.u8 {d0[]}, [r12] ; Load above 4 pixels vld1.32 {d2[0]}, [r2] @@ -309,10 +308,10 @@ loop_h ; Load left row by row and compute left + (above - ytop_left) ; 1st row and 2nd row - ldrb r12, [r3], #1 - ldrb r2, [r3], #1 - vdup.u16 q1, r12 - vdup.u16 q2, r2 + vld1.u8 {d2[]}, [r3]! + vld1.u8 {d4[]}, [r3]! + vmovl.u8 q1, d2 + vmovl.u8 q2, d4 vadd.s16 q1, q1, q3 vadd.s16 q2, q2, q3 vqmovun.s16 d0, q1 @@ -321,10 +320,10 @@ loop_h vst1.32 {d1[0]}, [r0], r1 ; 3rd row and 4th row - ldrb r12, [r3], #1 - ldrb r2, [r3], #1 - vdup.u16 q1, r12 - vdup.u16 q2, r2 + vld1.u8 {d2[]}, [r3]! + vld1.u8 {d4[]}, [r3] + vmovl.u8 q1, d2 + vmovl.u8 q2, d4 vadd.s16 q1, q1, q3 vadd.s16 q2, q2, q3 vqmovun.s16 d0, q1 @@ -345,8 +344,7 @@ loop_h |vp9_tm_predictor_8x8_neon| PROC ; Load ytop_left = above[-1]; sub r12, r2, #1 - ldrb r12, [r12] - vdup.u8 d0, r12 + vld1.8 {d0[]}, [r12] ; preload 8 left vld1.8 {d30}, [r3] @@ -418,8 +416,7 @@ loop_h |vp9_tm_predictor_16x16_neon| PROC ; Load ytop_left = above[-1]; sub r12, r2, #1 - ldrb r12, [r12] - vdup.u8 q0, r12 + vld1.8 {d0[]}, [r12] ; Load above 8 pixels vld1.8 {q1}, [r2] @@ -429,7 +426,7 @@ loop_h ; Compute above - ytop_left vsubl.u8 q2, d2, d0 - vsubl.u8 q3, d3, d1 + vsubl.u8 q3, d3, d0 vmovl.u8 q10, d18 @@ -512,8 +509,7 @@ loop_16x16_neon |vp9_tm_predictor_32x32_neon| PROC ; Load ytop_left = above[-1]; sub r12, r2, #1 - ldrb r12, [r12] - vdup.u8 q0, r12 + vld1.8 {d0[]}, [r12] ; Load above 32 pixels vld1.8 {q1}, [r2]! @@ -524,9 +520,9 @@ loop_16x16_neon ; Compute above - ytop_left vsubl.u8 q8, d2, d0 - vsubl.u8 q9, d3, d1 + vsubl.u8 q9, d3, d0 vsubl.u8 q10, d4, d0 - vsubl.u8 q11, d5, d1 + vsubl.u8 q11, d5, d0 vmovl.u8 q3, d26 diff --git a/media/libvpx/vp9/common/vp9_alloccommon.c b/media/libvpx/vp9/common/vp9_alloccommon.c index e209788c3d0..8eda491de93 100644 --- a/media/libvpx/vp9/common/vp9_alloccommon.c +++ b/media/libvpx/vp9/common/vp9_alloccommon.c @@ -11,6 +11,7 @@ #include "./vpx_config.h" #include "vpx_mem/vpx_mem.h" +#include "vp9/common/vp9_alloccommon.h" #include "vp9/common/vp9_blockd.h" #include "vp9/common/vp9_entropymode.h" #include "vp9/common/vp9_entropymv.h" @@ -56,6 +57,7 @@ static int alloc_seg_map(VP9_COMMON *cm, int seg_map_size) { if (cm->seg_map_array[i] == NULL) return 1; } + cm->seg_map_alloc_size = seg_map_size; // Init the index. cm->seg_map_idx = 0; @@ -83,8 +85,7 @@ static void free_seg_map(VP9_COMMON *cm) { } } -void vp9_free_ref_frame_buffers(VP9_COMMON *cm) { - BufferPool *const pool = cm->buffer_pool; +void vp9_free_ref_frame_buffers(BufferPool *pool) { int i; for (i = 0; i < FRAME_BUFFERS; ++i) { @@ -97,10 +98,14 @@ void vp9_free_ref_frame_buffers(VP9_COMMON *cm) { pool->frame_bufs[i].mvs = NULL; vp9_free_frame_buffer(&pool->frame_bufs[i].buf); } +} +void vp9_free_postproc_buffers(VP9_COMMON *cm) { #if CONFIG_VP9_POSTPROC vp9_free_frame_buffer(&cm->post_proc_buffer); vp9_free_frame_buffer(&cm->post_proc_buffer_int); +#else + (void)cm; #endif } @@ -114,25 +119,36 @@ void vp9_free_context_buffers(VP9_COMMON *cm) { } int vp9_alloc_context_buffers(VP9_COMMON *cm, int width, int height) { - vp9_free_context_buffers(cm); + int new_mi_size; vp9_set_mb_mi(cm, width, height); - if (cm->alloc_mi(cm, cm->mi_stride * calc_mi_size(cm->mi_rows))) - goto fail; + new_mi_size = cm->mi_stride * calc_mi_size(cm->mi_rows); + if (cm->mi_alloc_size < new_mi_size) { + cm->free_mi(cm); + if (cm->alloc_mi(cm, new_mi_size)) + goto fail; + } - // Create the segmentation map structure and set to 0. - free_seg_map(cm); - if (alloc_seg_map(cm, cm->mi_rows * cm->mi_cols)) - goto fail; + if (cm->seg_map_alloc_size < cm->mi_rows * cm->mi_cols) { + // Create the segmentation map structure and set to 0. + free_seg_map(cm); + if (alloc_seg_map(cm, cm->mi_rows * cm->mi_cols)) + goto fail; + } - cm->above_context = (ENTROPY_CONTEXT *)vpx_calloc( - 2 * mi_cols_aligned_to_sb(cm->mi_cols) * MAX_MB_PLANE, - sizeof(*cm->above_context)); - if (!cm->above_context) goto fail; + if (cm->above_context_alloc_cols < cm->mi_cols) { + vpx_free(cm->above_context); + cm->above_context = (ENTROPY_CONTEXT *)vpx_calloc( + 2 * mi_cols_aligned_to_sb(cm->mi_cols) * MAX_MB_PLANE, + sizeof(*cm->above_context)); + if (!cm->above_context) goto fail; - cm->above_seg_context = (PARTITION_CONTEXT *)vpx_calloc( - mi_cols_aligned_to_sb(cm->mi_cols), sizeof(*cm->above_seg_context)); - if (!cm->above_seg_context) goto fail; + vpx_free(cm->above_seg_context); + cm->above_seg_context = (PARTITION_CONTEXT *)vpx_calloc( + mi_cols_aligned_to_sb(cm->mi_cols), sizeof(*cm->above_seg_context)); + if (!cm->above_seg_context) goto fail; + cm->above_context_alloc_cols = cm->mi_cols; + } return 0; @@ -142,7 +158,6 @@ int vp9_alloc_context_buffers(VP9_COMMON *cm, int width, int height) { } void vp9_remove_common(VP9_COMMON *cm) { - vp9_free_ref_frame_buffers(cm); vp9_free_context_buffers(cm); vpx_free(cm->fc); @@ -154,7 +169,7 @@ void vp9_remove_common(VP9_COMMON *cm) { void vp9_init_context_buffers(VP9_COMMON *cm) { cm->setup_mi(cm); if (cm->last_frame_seg_map && !cm->frame_parallel_decode) - vpx_memset(cm->last_frame_seg_map, 0, cm->mi_rows * cm->mi_cols); + memset(cm->last_frame_seg_map, 0, cm->mi_rows * cm->mi_cols); } void vp9_swap_current_and_last_seg_map(VP9_COMMON *cm) { diff --git a/media/libvpx/vp9/common/vp9_alloccommon.h b/media/libvpx/vp9/common/vp9_alloccommon.h index d82397fa3c6..c0e51a6ce64 100644 --- a/media/libvpx/vp9/common/vp9_alloccommon.h +++ b/media/libvpx/vp9/common/vp9_alloccommon.h @@ -19,6 +19,7 @@ extern "C" { #endif struct VP9Common; +struct BufferPool; void vp9_remove_common(struct VP9Common *cm); @@ -26,7 +27,8 @@ int vp9_alloc_context_buffers(struct VP9Common *cm, int width, int height); void vp9_init_context_buffers(struct VP9Common *cm); void vp9_free_context_buffers(struct VP9Common *cm); -void vp9_free_ref_frame_buffers(struct VP9Common *cm); +void vp9_free_ref_frame_buffers(struct BufferPool *pool); +void vp9_free_postproc_buffers(struct VP9Common *cm); int vp9_alloc_state_buffers(struct VP9Common *cm, int width, int height); void vp9_free_state_buffers(struct VP9Common *cm); diff --git a/media/libvpx/vp9/common/vp9_blockd.c b/media/libvpx/vp9/common/vp9_blockd.c index 3cd9f44e914..b2bb1818893 100644 --- a/media/libvpx/vp9/common/vp9_blockd.c +++ b/media/libvpx/vp9/common/vp9_blockd.c @@ -40,7 +40,7 @@ void vp9_foreach_transformed_block_in_plane( const MACROBLOCKD *const xd, BLOCK_SIZE bsize, int plane, foreach_transformed_block_visitor visit, void *arg) { const struct macroblockd_plane *const pd = &xd->plane[plane]; - const MB_MODE_INFO* mbmi = &xd->mi[0].src_mi->mbmi; + const MB_MODE_INFO* mbmi = &xd->mi[0]->mbmi; // block and transform sizes, in number of 4x4 blocks log 2 ("*_b") // 4x4=0, 8x8=2, 16x16=4, 32x32=6, 64x64=8 // transform size varies per plane, look it up in a common way. @@ -103,7 +103,7 @@ void vp9_set_contexts(const MACROBLOCKD *xd, struct macroblockd_plane *pd, for (i = above_contexts; i < tx_size_in_blocks; ++i) a[i] = 0; } else { - vpx_memset(a, has_eob, sizeof(ENTROPY_CONTEXT) * tx_size_in_blocks); + memset(a, has_eob, sizeof(ENTROPY_CONTEXT) * tx_size_in_blocks); } // left @@ -120,7 +120,7 @@ void vp9_set_contexts(const MACROBLOCKD *xd, struct macroblockd_plane *pd, for (i = left_contexts; i < tx_size_in_blocks; ++i) l[i] = 0; } else { - vpx_memset(l, has_eob, sizeof(ENTROPY_CONTEXT) * tx_size_in_blocks); + memset(l, has_eob, sizeof(ENTROPY_CONTEXT) * tx_size_in_blocks); } } diff --git a/media/libvpx/vp9/common/vp9_blockd.h b/media/libvpx/vp9/common/vp9_blockd.h index 124057634c7..e53e15da92b 100644 --- a/media/libvpx/vp9/common/vp9_blockd.h +++ b/media/libvpx/vp9/common/vp9_blockd.h @@ -18,74 +18,28 @@ #include "vpx_scale/yv12config.h" #include "vp9/common/vp9_common_data.h" -#include "vp9/common/vp9_filter.h" +#include "vp9/common/vp9_entropy.h" +#include "vp9/common/vp9_entropymode.h" #include "vp9/common/vp9_mv.h" #include "vp9/common/vp9_scale.h" +#include "vp9/common/vp9_seg_common.h" #ifdef __cplusplus extern "C" { #endif -#define BLOCK_SIZE_GROUPS 4 -#define SKIP_CONTEXTS 3 -#define INTER_MODE_CONTEXTS 7 - -/* Segment Feature Masks */ -#define MAX_MV_REF_CANDIDATES 2 - -#define INTRA_INTER_CONTEXTS 4 -#define COMP_INTER_CONTEXTS 5 -#define REF_CONTEXTS 5 - -typedef enum { - PLANE_TYPE_Y = 0, - PLANE_TYPE_UV = 1, - PLANE_TYPES -} PLANE_TYPE; - #define MAX_MB_PLANE 3 -typedef char ENTROPY_CONTEXT; - -static INLINE int combine_entropy_contexts(ENTROPY_CONTEXT a, - ENTROPY_CONTEXT b) { - return (a != 0) + (b != 0); -} - typedef enum { KEY_FRAME = 0, INTER_FRAME = 1, FRAME_TYPES, } FRAME_TYPE; -typedef enum { - DC_PRED, // Average of above and left pixels - V_PRED, // Vertical - H_PRED, // Horizontal - D45_PRED, // Directional 45 deg = round(arctan(1/1) * 180/pi) - D135_PRED, // Directional 135 deg = 180 - 45 - D117_PRED, // Directional 117 deg = 180 - 63 - D153_PRED, // Directional 153 deg = 180 - 27 - D207_PRED, // Directional 207 deg = 180 + 27 - D63_PRED, // Directional 63 deg = round(arctan(2/1) * 180/pi) - TM_PRED, // True-motion - NEARESTMV, - NEARMV, - ZEROMV, - NEWMV, - MB_MODE_COUNT -} PREDICTION_MODE; - static INLINE int is_inter_mode(PREDICTION_MODE mode) { return mode >= NEARESTMV && mode <= NEWMV; } -#define INTRA_MODES (TM_PRED + 1) - -#define INTER_MODES (1 + NEWMV - NEARESTMV) - -#define INTER_OFFSET(mode) ((mode) - NEARESTMV) - /* For keyframes, intra block modes are predicted by the (already decoded) modes for the Y blocks to the left and above us; for interframes, there is a single probability table. */ @@ -130,7 +84,6 @@ typedef struct { } MB_MODE_INFO; typedef struct MODE_INFO { - struct MODE_INFO *src_mi; MB_MODE_INFO mbmi; b_mode_info bmi[4]; } MODE_INFO; @@ -171,9 +124,12 @@ struct macroblockd_plane { int subsampling_y; struct buf_2d dst; struct buf_2d pre[2]; - const int16_t *dequant; ENTROPY_CONTEXT *above_context; ENTROPY_CONTEXT *left_context; + int16_t seg_dequant[MAX_SEGMENTS][2]; + + // encoder + const int16_t *dequant; }; #define BLOCK_OFFSET(x, i) ((x) + (i) * 16) @@ -188,10 +144,10 @@ typedef struct RefBuffer { typedef struct macroblockd { struct macroblockd_plane plane[MAX_MB_PLANE]; - + FRAME_COUNTS *counts; int mi_stride; - MODE_INFO *mi; + MODE_INFO **mi; MODE_INFO *left_mi; MODE_INFO *above_mi; MB_MODE_INFO *left_mbmi; @@ -206,6 +162,9 @@ typedef struct macroblockd { int mb_to_top_edge; int mb_to_bottom_edge; + FRAME_CONTEXT *fc; + int frame_parallel_decoding_mode; + /* pointers to reference frames */ RefBuffer *block_refs[2]; @@ -218,13 +177,9 @@ typedef struct macroblockd { PARTITION_CONTEXT *above_seg_context; PARTITION_CONTEXT left_seg_context[8]; - /* mc buffer */ - DECLARE_ALIGNED(16, uint8_t, mc_buf[80 * 2 * 80 * 2]); - #if CONFIG_VP9_HIGHBITDEPTH /* Bit depth: 8, 10, 12 */ int bd; - DECLARE_ALIGNED(16, uint16_t, mc_buf_high[80 * 2 * 80 * 2]); #endif /* dqcoeff are shared by all the planes. So planes must be decoded serially */ @@ -245,7 +200,7 @@ extern const TX_TYPE intra_mode_to_tx_type_lookup[INTRA_MODES]; static INLINE TX_TYPE get_tx_type(PLANE_TYPE plane_type, const MACROBLOCKD *xd) { - const MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi; + const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; if (plane_type != PLANE_TYPE_Y || xd->lossless || is_inter_block(mbmi)) return DCT_DCT; @@ -255,7 +210,7 @@ static INLINE TX_TYPE get_tx_type(PLANE_TYPE plane_type, static INLINE TX_TYPE get_tx_type_4x4(PLANE_TYPE plane_type, const MACROBLOCKD *xd, int ib) { - const MODE_INFO *const mi = xd->mi[0].src_mi; + const MODE_INFO *const mi = xd->mi[0]; if (plane_type != PLANE_TYPE_Y || xd->lossless || is_inter_block(&mi->mbmi)) return DCT_DCT; @@ -286,6 +241,27 @@ static INLINE BLOCK_SIZE get_plane_block_size(BLOCK_SIZE bsize, return ss_size_lookup[bsize][pd->subsampling_x][pd->subsampling_y]; } +static INLINE void reset_skip_context(MACROBLOCKD *xd, BLOCK_SIZE bsize) { + int i; + for (i = 0; i < MAX_MB_PLANE; i++) { + struct macroblockd_plane *const pd = &xd->plane[i]; + const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd); + memset(pd->above_context, 0, + sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide_lookup[plane_bsize]); + memset(pd->left_context, 0, + sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high_lookup[plane_bsize]); + } +} + +static INLINE const vp9_prob *get_y_mode_probs(const MODE_INFO *mi, + const MODE_INFO *above_mi, + const MODE_INFO *left_mi, + int block) { + const PREDICTION_MODE above = vp9_above_block_mode(mi, above_mi, block); + const PREDICTION_MODE left = vp9_left_block_mode(mi, left_mi, block); + return vp9_kf_y_mode_prob[above][left]; +} + typedef void (*foreach_transformed_block_visitor)(int plane, int block, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, diff --git a/media/libvpx/vp9/common/vp9_common.h b/media/libvpx/vp9/common/vp9_common.h index 6801dd3a2b6..9c2d7791e75 100644 --- a/media/libvpx/vp9/common/vp9_common.h +++ b/media/libvpx/vp9/common/vp9_common.h @@ -27,26 +27,20 @@ extern "C" { #define MIN(x, y) (((x) < (y)) ? (x) : (y)) #define MAX(x, y) (((x) > (y)) ? (x) : (y)) -#define ROUND_POWER_OF_TWO(value, n) \ - (((value) + (1 << ((n) - 1))) >> (n)) - -#define ALIGN_POWER_OF_TWO(value, n) \ - (((value) + ((1 << (n)) - 1)) & ~((1 << (n)) - 1)) - // Only need this for fixed-size arrays, for structs just assign. #define vp9_copy(dest, src) { \ assert(sizeof(dest) == sizeof(src)); \ - vpx_memcpy(dest, src, sizeof(src)); \ + memcpy(dest, src, sizeof(src)); \ } // Use this for variably-sized arrays. #define vp9_copy_array(dest, src, n) { \ assert(sizeof(*dest) == sizeof(*src)); \ - vpx_memcpy(dest, src, n * sizeof(*src)); \ + memcpy(dest, src, n * sizeof(*src)); \ } -#define vp9_zero(dest) vpx_memset(&(dest), 0, sizeof(dest)) -#define vp9_zero_array(dest, n) vpx_memset(dest, 0, n * sizeof(*dest)) +#define vp9_zero(dest) memset(&(dest), 0, sizeof(dest)) +#define vp9_zero_array(dest, n) memset(dest, 0, n * sizeof(*dest)) static INLINE uint8_t clip_pixel(int val) { return (val > 255) ? 255 : (val < 0) ? 0 : val; @@ -83,9 +77,6 @@ static INLINE uint16_t clip_pixel_highbd(int val, int bd) { typedef int64_t tran_high_t; typedef int32_t tran_low_t; -#define CONVERT_TO_SHORTPTR(x) ((uint16_t*)(((uintptr_t)x) << 1)) -#define CONVERT_TO_BYTEPTR(x) ((uint8_t*)(((uintptr_t)x) >> 1 )) - #else // Note: diff --git a/media/libvpx/vp9/common/vp9_convolve.c b/media/libvpx/vp9/common/vp9_convolve.c index 7b65651ba88..90e337fd66e 100644 --- a/media/libvpx/vp9/common/vp9_convolve.c +++ b/media/libvpx/vp9/common/vp9_convolve.c @@ -236,7 +236,7 @@ void vp9_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, const int16_t *filter_y, int y_step_q4, int w, int h) { /* Fixed size intermediate buffer places limits on parameters. */ - DECLARE_ALIGNED_ARRAY(16, uint8_t, temp, 64 * 64); + DECLARE_ALIGNED(16, uint8_t, temp[64 * 64]); assert(w <= 64); assert(h <= 64); @@ -256,7 +256,7 @@ void vp9_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride, (void)filter_y; (void)filter_y_stride; for (r = h; r > 0; --r) { - vpx_memcpy(dst, src, w); + memcpy(dst, src, w); src += src_stride; dst += dst_stride; } @@ -501,7 +501,7 @@ void vp9_highbd_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, const int16_t *filter_y, int y_step_q4, int w, int h, int bd) { // Fixed size intermediate buffer places limits on parameters. - DECLARE_ALIGNED_ARRAY(16, uint16_t, temp, 64 * 64); + DECLARE_ALIGNED(16, uint16_t, temp[64 * 64]); assert(w <= 64); assert(h <= 64); @@ -526,7 +526,7 @@ void vp9_highbd_convolve_copy_c(const uint8_t *src8, ptrdiff_t src_stride, (void)bd; for (r = h; r > 0; --r) { - vpx_memcpy(dst, src, w * sizeof(uint16_t)); + memcpy(dst, src, w * sizeof(uint16_t)); src += src_stride; dst += dst_stride; } diff --git a/media/libvpx/vp9/common/vp9_debugmodes.c b/media/libvpx/vp9/common/vp9_debugmodes.c index e96bc4f2b02..3d80103d21b 100644 --- a/media/libvpx/vp9/common/vp9_debugmodes.c +++ b/media/libvpx/vp9/common/vp9_debugmodes.c @@ -25,7 +25,7 @@ static void log_frame_info(VP9_COMMON *cm, const char *str, FILE *f) { static void print_mi_data(VP9_COMMON *cm, FILE *file, const char *descriptor, size_t member_offset) { int mi_row, mi_col; - MODE_INFO *mi = cm->mi; + MODE_INFO **mi = cm->mi_grid_visible; int rows = cm->mi_rows; int cols = cm->mi_cols; char prefix = descriptor[0]; @@ -35,7 +35,7 @@ static void print_mi_data(VP9_COMMON *cm, FILE *file, const char *descriptor, fprintf(file, "%c ", prefix); for (mi_col = 0; mi_col < cols; mi_col++) { fprintf(file, "%2d ", - *((int*) ((char *) (&mi->src_mi->mbmi) + + *((int*) ((char *) (&mi[0]->mbmi) + member_offset))); mi++; } @@ -49,7 +49,7 @@ void vp9_print_modes_and_motion_vectors(VP9_COMMON *cm, const char *file) { int mi_row; int mi_col; FILE *mvs = fopen(file, "a"); - MODE_INFO *mi = cm->mi; + MODE_INFO **mi = cm->mi_grid_visible; int rows = cm->mi_rows; int cols = cm->mi_cols; @@ -64,7 +64,7 @@ void vp9_print_modes_and_motion_vectors(VP9_COMMON *cm, const char *file) { for (mi_row = 0; mi_row < rows; mi_row++) { fprintf(mvs, "S "); for (mi_col = 0; mi_col < cols; mi_col++) { - fprintf(mvs, "%2d ", mi->src_mi->mbmi.skip); + fprintf(mvs, "%2d ", mi[0]->mbmi.skip); mi++; } fprintf(mvs, "\n"); @@ -74,12 +74,12 @@ void vp9_print_modes_and_motion_vectors(VP9_COMMON *cm, const char *file) { // output motion vectors. log_frame_info(cm, "Vectors ", mvs); - mi = cm->mi; + mi = cm->mi_grid_visible; for (mi_row = 0; mi_row < rows; mi_row++) { fprintf(mvs, "V "); for (mi_col = 0; mi_col < cols; mi_col++) { - fprintf(mvs, "%4d:%4d ", mi->src_mi->mbmi.mv[0].as_mv.row, - mi->src_mi->mbmi.mv[0].as_mv.col); + fprintf(mvs, "%4d:%4d ", mi[0]->mbmi.mv[0].as_mv.row, + mi[0]->mbmi.mv[0].as_mv.col); mi++; } fprintf(mvs, "\n"); diff --git a/media/libvpx/vp9/common/vp9_entropy.c b/media/libvpx/vp9/common/vp9_entropy.c index 77a8709f0ec..a2584e8da5b 100644 --- a/media/libvpx/vp9/common/vp9_entropy.c +++ b/media/libvpx/vp9/common/vp9_entropy.c @@ -15,6 +15,18 @@ #include "vpx_mem/vpx_mem.h" #include "vpx/vpx_integer.h" +// Unconstrained Node Tree +const vp9_tree_index vp9_coef_con_tree[TREE_SIZE(ENTROPY_TOKENS)] = { + 2, 6, // 0 = LOW_VAL + -TWO_TOKEN, 4, // 1 = TWO + -THREE_TOKEN, -FOUR_TOKEN, // 2 = THREE + 8, 10, // 3 = HIGH_LOW + -CATEGORY1_TOKEN, -CATEGORY2_TOKEN, // 4 = CAT_ONE + 12, 14, // 5 = CAT_THREEFOUR + -CATEGORY3_TOKEN, -CATEGORY4_TOKEN, // 6 = CAT_THREE + -CATEGORY5_TOKEN, -CATEGORY6_TOKEN // 7 = CAT_FIVE +}; + const vp9_prob vp9_cat1_prob[] = { 159 }; const vp9_prob vp9_cat2_prob[] = { 165, 145 }; const vp9_prob vp9_cat3_prob[] = { 173, 148, 140 }; @@ -737,13 +749,13 @@ static const vp9_coeff_probs_model default_coef_probs_32x32[PLANE_TYPES] = { }; static void extend_to_full_distribution(vp9_prob *probs, vp9_prob p) { - vpx_memcpy(probs, vp9_pareto8_full[p = 0 ? 0 : p - 1], - MODEL_NODES * sizeof(vp9_prob)); + memcpy(probs, vp9_pareto8_full[p = 0 ? 0 : p - 1], + MODEL_NODES * sizeof(vp9_prob)); } void vp9_model_to_full_probs(const vp9_prob *model, vp9_prob *full) { if (full != model) - vpx_memcpy(full, model, sizeof(vp9_prob) * UNCONSTRAINED_NODES); + memcpy(full, model, sizeof(vp9_prob) * UNCONSTRAINED_NODES); extend_to_full_distribution(&full[UNCONSTRAINED_NODES], model[PIVOT_NODE]); } diff --git a/media/libvpx/vp9/common/vp9_entropy.h b/media/libvpx/vp9/common/vp9_entropy.h index 4eb2e64135e..4e02630e6c6 100644 --- a/media/libvpx/vp9/common/vp9_entropy.h +++ b/media/libvpx/vp9/common/vp9_entropy.h @@ -14,8 +14,8 @@ #include "vpx/vpx_integer.h" #include "vp9/common/vp9_common.h" +#include "vp9/common/vp9_enums.h" #include "vp9/common/vp9_prob.h" -#include "vp9/common/vp9_scan.h" #ifdef __cplusplus extern "C" { @@ -137,18 +137,6 @@ struct VP9Common; void vp9_default_coef_probs(struct VP9Common *cm); void vp9_adapt_coef_probs(struct VP9Common *cm); -static INLINE void reset_skip_context(MACROBLOCKD *xd, BLOCK_SIZE bsize) { - int i; - for (i = 0; i < MAX_MB_PLANE; i++) { - struct macroblockd_plane *const pd = &xd->plane[i]; - const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd); - vpx_memset(pd->above_context, 0, sizeof(ENTROPY_CONTEXT) * - num_4x4_blocks_wide_lookup[plane_bsize]); - vpx_memset(pd->left_context, 0, sizeof(ENTROPY_CONTEXT) * - num_4x4_blocks_high_lookup[plane_bsize]); - } -} - // This is the index in the scan order beyond which all coefficients for // 8x8 transform and above are in the top band. // This macro is currently unused but may be used by certain implementations @@ -173,6 +161,7 @@ static INLINE const uint8_t *get_band_translate(TX_SIZE tx_size) { #define PIVOT_NODE 2 // which node is pivot #define MODEL_NODES (ENTROPY_NODES - UNCONSTRAINED_NODES) +extern const vp9_tree_index vp9_coef_con_tree[TREE_SIZE(ENTROPY_TOKENS)]; extern const vp9_prob vp9_pareto8_full[COEFF_PROB_MODELS][MODEL_NODES]; typedef vp9_prob vp9_coeff_probs_model[REF_TYPES][COEF_BANDS] @@ -184,6 +173,13 @@ typedef unsigned int vp9_coeff_count_model[REF_TYPES][COEF_BANDS] void vp9_model_to_full_probs(const vp9_prob *model, vp9_prob *full); +typedef char ENTROPY_CONTEXT; + +static INLINE int combine_entropy_contexts(ENTROPY_CONTEXT a, + ENTROPY_CONTEXT b) { + return (a != 0) + (b != 0); +} + static INLINE int get_entropy_context(TX_SIZE tx_size, const ENTROPY_CONTEXT *a, const ENTROPY_CONTEXT *l) { ENTROPY_CONTEXT above_ec = 0, left_ec = 0; @@ -213,18 +209,6 @@ static INLINE int get_entropy_context(TX_SIZE tx_size, const ENTROPY_CONTEXT *a, return combine_entropy_contexts(above_ec, left_ec); } -static INLINE const scan_order *get_scan(const MACROBLOCKD *xd, TX_SIZE tx_size, - PLANE_TYPE type, int block_idx) { - const MODE_INFO *const mi = xd->mi[0].src_mi; - - if (is_inter_block(&mi->mbmi) || type != PLANE_TYPE_Y || xd->lossless) { - return &vp9_default_scan_orders[tx_size]; - } else { - const PREDICTION_MODE mode = get_y_mode(mi, block_idx); - return &vp9_scan_orders[tx_size][intra_mode_to_tx_type_lookup[mode]]; - } -} - #ifdef __cplusplus } // extern "C" #endif diff --git a/media/libvpx/vp9/common/vp9_entropymode.c b/media/libvpx/vp9/common/vp9_entropymode.c index 7ba078b2b52..424451fee39 100644 --- a/media/libvpx/vp9/common/vp9_entropymode.c +++ b/media/libvpx/vp9/common/vp9_entropymode.c @@ -430,10 +430,10 @@ void vp9_setup_past_independence(VP9_COMMON *cm) { cm->seg.abs_delta = SEGMENT_DELTADATA; if (cm->last_frame_seg_map && !cm->frame_parallel_decode) - vpx_memset(cm->last_frame_seg_map, 0, (cm->mi_rows * cm->mi_cols)); + memset(cm->last_frame_seg_map, 0, (cm->mi_rows * cm->mi_cols)); if (cm->current_frame_seg_map) - vpx_memset(cm->current_frame_seg_map, 0, (cm->mi_rows * cm->mi_cols)); + memset(cm->current_frame_seg_map, 0, (cm->mi_rows * cm->mi_cols)); // Reset the mode ref deltas for loop filter vp9_zero(lf->last_ref_deltas); @@ -460,8 +460,8 @@ void vp9_setup_past_independence(VP9_COMMON *cm) { // prev_mip will only be allocated in encoder. if (frame_is_intra_only(cm) && cm->prev_mip && !cm->frame_parallel_decode) - vpx_memset(cm->prev_mip, 0, cm->mi_stride * (cm->mi_rows + 1) * - sizeof(*cm->prev_mip)); + memset(cm->prev_mip, 0, + cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->prev_mip)); vp9_zero(cm->ref_frame_sign_bias); diff --git a/media/libvpx/vp9/common/vp9_entropymode.h b/media/libvpx/vp9/common/vp9_entropymode.h index f4e20e1af8b..a0619ec6fa9 100644 --- a/media/libvpx/vp9/common/vp9_entropymode.h +++ b/media/libvpx/vp9/common/vp9_entropymode.h @@ -11,7 +11,7 @@ #ifndef VP9_COMMON_VP9_ENTROPYMODE_H_ #define VP9_COMMON_VP9_ENTROPYMODE_H_ -#include "vp9/common/vp9_blockd.h" +#include "vp9/common/vp9_filter.h" #include "vp9/common/vp9_entropy.h" #include "vp9/common/vp9_entropymv.h" @@ -19,8 +19,12 @@ extern "C" { #endif +#define BLOCK_SIZE_GROUPS 4 + #define TX_SIZE_CONTEXTS 2 +#define INTER_OFFSET(mode) ((mode) - NEARESTMV) + struct VP9Common; struct tx_probs { @@ -97,15 +101,6 @@ void tx_counts_to_branch_counts_16x16(const unsigned int *tx_count_16x16p, void tx_counts_to_branch_counts_8x8(const unsigned int *tx_count_8x8p, unsigned int (*ct_8x8p)[2]); -static INLINE const vp9_prob *get_y_mode_probs(const MODE_INFO *mi, - const MODE_INFO *above_mi, - const MODE_INFO *left_mi, - int block) { - const PREDICTION_MODE above = vp9_above_block_mode(mi, above_mi, block); - const PREDICTION_MODE left = vp9_left_block_mode(mi, left_mi, block); - return vp9_kf_y_mode_prob[above][left]; -} - #ifdef __cplusplus } // extern "C" #endif diff --git a/media/libvpx/vp9/common/vp9_enums.h b/media/libvpx/vp9/common/vp9_enums.h index 7938fc10a11..048202593a9 100644 --- a/media/libvpx/vp9/common/vp9_enums.h +++ b/media/libvpx/vp9/common/vp9_enums.h @@ -104,6 +104,44 @@ typedef enum { VP9_ALT_FLAG = 1 << 2, } VP9_REFFRAME; +typedef enum { + PLANE_TYPE_Y = 0, + PLANE_TYPE_UV = 1, + PLANE_TYPES +} PLANE_TYPE; + +typedef enum { + DC_PRED, // Average of above and left pixels + V_PRED, // Vertical + H_PRED, // Horizontal + D45_PRED, // Directional 45 deg = round(arctan(1/1) * 180/pi) + D135_PRED, // Directional 135 deg = 180 - 45 + D117_PRED, // Directional 117 deg = 180 - 63 + D153_PRED, // Directional 153 deg = 180 - 27 + D207_PRED, // Directional 207 deg = 180 + 27 + D63_PRED, // Directional 63 deg = round(arctan(2/1) * 180/pi) + TM_PRED, // True-motion + NEARESTMV, + NEARMV, + ZEROMV, + NEWMV, + MB_MODE_COUNT +} PREDICTION_MODE; + +#define INTRA_MODES (TM_PRED + 1) + +#define INTER_MODES (1 + NEWMV - NEARESTMV) + +#define SKIP_CONTEXTS 3 +#define INTER_MODE_CONTEXTS 7 + +/* Segment Feature Masks */ +#define MAX_MV_REF_CANDIDATES 2 + +#define INTRA_INTER_CONTEXTS 4 +#define COMP_INTER_CONTEXTS 5 +#define REF_CONTEXTS 5 + #ifdef __cplusplus } // extern "C" #endif diff --git a/media/libvpx/vp9/common/vp9_filter.c b/media/libvpx/vp9/common/vp9_filter.c index afcdf22ec63..b256d4af500 100644 --- a/media/libvpx/vp9/common/vp9_filter.c +++ b/media/libvpx/vp9/common/vp9_filter.c @@ -12,7 +12,8 @@ #include "vp9/common/vp9_filter.h" -const InterpKernel vp9_bilinear_filters[SUBPEL_SHIFTS] = { +DECLARE_ALIGNED(256, const InterpKernel, + vp9_bilinear_filters[SUBPEL_SHIFTS]) = { { 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 0, 0, 120, 8, 0, 0, 0 }, { 0, 0, 0, 112, 16, 0, 0, 0 }, diff --git a/media/libvpx/vp9/common/vp9_filter.h b/media/libvpx/vp9/common/vp9_filter.h index d963ee23569..808a270facb 100644 --- a/media/libvpx/vp9/common/vp9_filter.h +++ b/media/libvpx/vp9/common/vp9_filter.h @@ -43,14 +43,6 @@ typedef int16_t InterpKernel[SUBPEL_TAPS]; const InterpKernel *vp9_get_interp_kernel(INTERP_FILTER filter); -DECLARE_ALIGNED(256, extern const InterpKernel, - vp9_bilinear_filters[SUBPEL_SHIFTS]); - -// The VP9_BILINEAR_FILTERS_2TAP macro returns a pointer to the bilinear -// filter kernel as a 2 tap filter. -#define BILINEAR_FILTERS_2TAP(x) \ - (vp9_bilinear_filters[(x)] + SUBPEL_TAPS/2 - 1) - #ifdef __cplusplus } // extern "C" #endif diff --git a/media/libvpx/vp9/common/vp9_frame_buffers.c b/media/libvpx/vp9/common/vp9_frame_buffers.c index 34795b74ec2..0f41d66985f 100644 --- a/media/libvpx/vp9/common/vp9_frame_buffers.c +++ b/media/libvpx/vp9/common/vp9_frame_buffers.c @@ -64,7 +64,7 @@ int vp9_get_frame_buffer(void *cb_priv, size_t min_size, // This memset is needed for fixing valgrind error from C loop filter // due to access uninitialized memory in frame border. It could be // removed if border is totally removed. - vpx_memset(int_fb_list->int_fb[i].data, 0, min_size); + memset(int_fb_list->int_fb[i].data, 0, min_size); int_fb_list->int_fb[i].size = min_size; } diff --git a/media/libvpx/vp9/common/vp9_idct.c b/media/libvpx/vp9/common/vp9_idct.c index b48d52230e8..174b96e21ad 100644 --- a/media/libvpx/vp9/common/vp9_idct.c +++ b/media/libvpx/vp9/common/vp9_idct.c @@ -11,6 +11,7 @@ #include #include "./vp9_rtcd.h" +#include "vpx_ports/mem.h" #include "vp9/common/vp9_blockd.h" #include "vp9/common/vp9_idct.h" #include "vp9/common/vp9_systemdependent.h" @@ -1276,7 +1277,7 @@ void vp9_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, if (zero_coeff[0] | zero_coeff[1]) idct32(input, outptr); else - vpx_memset(outptr, 0, sizeof(tran_low_t) * 32); + memset(outptr, 0, sizeof(tran_low_t) * 32); input += 32; outptr += 32; } @@ -1676,7 +1677,7 @@ static void highbd_iadst4(const tran_low_t *input, tran_low_t *output, int bd) { (void) bd; if (!(x0 | x1 | x2 | x3)) { - vpx_memset(output, 0, 4 * sizeof(*output)); + memset(output, 0, 4 * sizeof(*output)); return; } @@ -1752,7 +1753,7 @@ static void highbd_iadst8(const tran_low_t *input, tran_low_t *output, int bd) { (void) bd; if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7)) { - vpx_memset(output, 0, 8 * sizeof(*output)); + memset(output, 0, 8 * sizeof(*output)); return; } @@ -2095,7 +2096,7 @@ static void highbd_iadst16(const tran_low_t *input, tran_low_t *output, if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7 | x8 | x9 | x10 | x11 | x12 | x13 | x14 | x15)) { - vpx_memset(output, 0, 16 * sizeof(*output)); + memset(output, 0, 16 * sizeof(*output)); return; } @@ -2712,7 +2713,7 @@ void vp9_highbd_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest8, if (zero_coeff[0] | zero_coeff[1]) highbd_idct32(input, outptr, bd); else - vpx_memset(outptr, 0, sizeof(tran_low_t) * 32); + memset(outptr, 0, sizeof(tran_low_t) * 32); input += 32; outptr += 32; } diff --git a/media/libvpx/vp9/common/vp9_idct.h b/media/libvpx/vp9/common/vp9_idct.h index 6e2551dd4bc..cee1682a67f 100644 --- a/media/libvpx/vp9/common/vp9_idct.h +++ b/media/libvpx/vp9/common/vp9_idct.h @@ -14,6 +14,7 @@ #include #include "./vpx_config.h" +#include "vpx_ports/mem.h" #include "vp9/common/vp9_common.h" #include "vp9/common/vp9_enums.h" diff --git a/media/libvpx/vp9/common/vp9_loopfilter.c b/media/libvpx/vp9/common/vp9_loopfilter.c index 676382dc8a9..484e457df8d 100644 --- a/media/libvpx/vp9/common/vp9_loopfilter.c +++ b/media/libvpx/vp9/common/vp9_loopfilter.c @@ -13,6 +13,7 @@ #include "vp9/common/vp9_onyxc_int.h" #include "vp9/common/vp9_reconinter.h" #include "vpx_mem/vpx_mem.h" +#include "vpx_ports/mem.h" #include "vp9/common/vp9_seg_common.h" @@ -222,9 +223,9 @@ static void update_sharpness(loop_filter_info_n *lfi, int sharpness_lvl) { if (block_inside_limit < 1) block_inside_limit = 1; - vpx_memset(lfi->lfthr[lvl].lim, block_inside_limit, SIMD_WIDTH); - vpx_memset(lfi->lfthr[lvl].mblim, (2 * (lvl + 2) + block_inside_limit), - SIMD_WIDTH); + memset(lfi->lfthr[lvl].lim, block_inside_limit, SIMD_WIDTH); + memset(lfi->lfthr[lvl].mblim, (2 * (lvl + 2) + block_inside_limit), + SIMD_WIDTH); } } @@ -245,7 +246,7 @@ void vp9_loop_filter_init(VP9_COMMON *cm) { // init hev threshold const vectors for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++) - vpx_memset(lfi->lfthr[lvl].hev_thr, (lvl >> 4), SIMD_WIDTH); + memset(lfi->lfthr[lvl].hev_thr, (lvl >> 4), SIMD_WIDTH); } void vp9_loop_filter_frame_init(VP9_COMMON *cm, int default_filt_lvl) { @@ -276,7 +277,7 @@ void vp9_loop_filter_frame_init(VP9_COMMON *cm, int default_filt_lvl) { if (!lf->mode_ref_delta_enabled) { // we could get rid of this if we assume that deltas are set to // zero when not in use; encoder always uses deltas - vpx_memset(lfi->lvl[seg_id], lvl_seg, sizeof(lfi->lvl[seg_id])); + memset(lfi->lvl[seg_id], lvl_seg, sizeof(lfi->lvl[seg_id])); } else { int ref, mode; const int intra_lvl = lvl_seg + lf->ref_deltas[INTRA_FRAME] * scale; @@ -293,7 +294,7 @@ void vp9_loop_filter_frame_init(VP9_COMMON *cm, int default_filt_lvl) { } } -static void filter_selectively_vert_row2(PLANE_TYPE plane_type, +static void filter_selectively_vert_row2(int subsampling_factor, uint8_t *s, int pitch, unsigned int mask_16x16_l, unsigned int mask_8x8_l, @@ -301,9 +302,9 @@ static void filter_selectively_vert_row2(PLANE_TYPE plane_type, unsigned int mask_4x4_int_l, const loop_filter_info_n *lfi_n, const uint8_t *lfl) { - const int mask_shift = plane_type ? 4 : 8; - const int mask_cutoff = plane_type ? 0xf : 0xff; - const int lfl_forward = plane_type ? 4 : 8; + const int mask_shift = subsampling_factor ? 4 : 8; + const int mask_cutoff = subsampling_factor ? 0xf : 0xff; + const int lfl_forward = subsampling_factor ? 4 : 8; unsigned int mask_16x16_0 = mask_16x16_l & mask_cutoff; unsigned int mask_8x8_0 = mask_8x8_l & mask_cutoff; @@ -393,7 +394,7 @@ static void filter_selectively_vert_row2(PLANE_TYPE plane_type, } #if CONFIG_VP9_HIGHBITDEPTH -static void highbd_filter_selectively_vert_row2(PLANE_TYPE plane_type, +static void highbd_filter_selectively_vert_row2(int subsampling_factor, uint16_t *s, int pitch, unsigned int mask_16x16_l, unsigned int mask_8x8_l, @@ -401,9 +402,9 @@ static void highbd_filter_selectively_vert_row2(PLANE_TYPE plane_type, unsigned int mask_4x4_int_l, const loop_filter_info_n *lfi_n, const uint8_t *lfl, int bd) { - const int mask_shift = plane_type ? 4 : 8; - const int mask_cutoff = plane_type ? 0xf : 0xff; - const int lfl_forward = plane_type ? 4 : 8; + const int mask_shift = subsampling_factor ? 4 : 8; + const int mask_cutoff = subsampling_factor ? 0xf : 0xff; + const int lfl_forward = subsampling_factor ? 4 : 8; unsigned int mask_16x16_0 = mask_16x16_l & mask_cutoff; unsigned int mask_8x8_0 = mask_8x8_l & mask_cutoff; @@ -727,7 +728,7 @@ static void build_masks(const loop_filter_info_n *const lfi_n, const int h = num_8x8_blocks_high_lookup[block_size]; int index = shift_y; for (i = 0; i < h; i++) { - vpx_memset(&lfm->lfl_y[index], filter_level, w); + memset(&lfm->lfl_y[index], filter_level, w); index += 8; } } @@ -801,7 +802,7 @@ static void build_y_mask(const loop_filter_info_n *const lfi_n, const int h = num_8x8_blocks_high_lookup[block_size]; int index = shift_y; for (i = 0; i < h; i++) { - vpx_memset(&lfm->lfl_y[index], filter_level, w); + memset(&lfm->lfl_y[index], filter_level, w); index += 8; } } @@ -826,12 +827,12 @@ static void build_y_mask(const loop_filter_info_n *const lfi_n, // by mi_row, mi_col. // TODO(JBB): This function only works for yv12. void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col, - MODE_INFO *mi, const int mode_info_stride, + MODE_INFO **mi, const int mode_info_stride, LOOP_FILTER_MASK *lfm) { int idx_32, idx_16, idx_8; const loop_filter_info_n *const lfi_n = &cm->lf_info; - MODE_INFO *mip = mi; - MODE_INFO *mip2 = mi; + MODE_INFO **mip = mi; + MODE_INFO **mip2 = mi; // These are offsets to the next mi in the 64x64 block. It is what gets // added to the mi ptr as we go through each loop. It helps us to avoid @@ -859,28 +860,28 @@ void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col, cm->mi_cols - mi_col : MI_BLOCK_SIZE); vp9_zero(*lfm); - assert(mip != NULL); + assert(mip[0] != NULL); // TODO(jimbankoski): Try moving most of the following code into decode // loop and storing lfm in the mbmi structure so that we don't have to go // through the recursive loop structure multiple times. - switch (mip->mbmi.sb_type) { + switch (mip[0]->mbmi.sb_type) { case BLOCK_64X64: - build_masks(lfi_n, mip , 0, 0, lfm); + build_masks(lfi_n, mip[0] , 0, 0, lfm); break; case BLOCK_64X32: - build_masks(lfi_n, mip, 0, 0, lfm); + build_masks(lfi_n, mip[0], 0, 0, lfm); mip2 = mip + mode_info_stride * 4; if (4 >= max_rows) break; - build_masks(lfi_n, mip2, 32, 8, lfm); + build_masks(lfi_n, mip2[0], 32, 8, lfm); break; case BLOCK_32X64: - build_masks(lfi_n, mip, 0, 0, lfm); + build_masks(lfi_n, mip[0], 0, 0, lfm); mip2 = mip + 4; if (4 >= max_cols) break; - build_masks(lfi_n, mip2, 4, 2, lfm); + build_masks(lfi_n, mip2[0], 4, 2, lfm); break; default: for (idx_32 = 0; idx_32 < 4; mip += offset_32[idx_32], ++idx_32) { @@ -890,23 +891,23 @@ void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col, const int mi_32_row_offset = ((idx_32 >> 1) << 2); if (mi_32_col_offset >= max_cols || mi_32_row_offset >= max_rows) continue; - switch (mip->mbmi.sb_type) { + switch (mip[0]->mbmi.sb_type) { case BLOCK_32X32: - build_masks(lfi_n, mip, shift_y, shift_uv, lfm); + build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); break; case BLOCK_32X16: - build_masks(lfi_n, mip, shift_y, shift_uv, lfm); + build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); if (mi_32_row_offset + 2 >= max_rows) continue; mip2 = mip + mode_info_stride * 2; - build_masks(lfi_n, mip2, shift_y + 16, shift_uv + 4, lfm); + build_masks(lfi_n, mip2[0], shift_y + 16, shift_uv + 4, lfm); break; case BLOCK_16X32: - build_masks(lfi_n, mip, shift_y, shift_uv, lfm); + build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); if (mi_32_col_offset + 2 >= max_cols) continue; mip2 = mip + 2; - build_masks(lfi_n, mip2, shift_y + 2, shift_uv + 1, lfm); + build_masks(lfi_n, mip2[0], shift_y + 2, shift_uv + 1, lfm); break; default: for (idx_16 = 0; idx_16 < 4; mip += offset_16[idx_16], ++idx_16) { @@ -920,29 +921,29 @@ void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col, if (mi_16_col_offset >= max_cols || mi_16_row_offset >= max_rows) continue; - switch (mip->mbmi.sb_type) { + switch (mip[0]->mbmi.sb_type) { case BLOCK_16X16: - build_masks(lfi_n, mip, shift_y, shift_uv, lfm); + build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); break; case BLOCK_16X8: - build_masks(lfi_n, mip, shift_y, shift_uv, lfm); + build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); if (mi_16_row_offset + 1 >= max_rows) continue; mip2 = mip + mode_info_stride; - build_y_mask(lfi_n, mip2, shift_y+8, lfm); + build_y_mask(lfi_n, mip2[0], shift_y+8, lfm); break; case BLOCK_8X16: - build_masks(lfi_n, mip, shift_y, shift_uv, lfm); + build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); if (mi_16_col_offset +1 >= max_cols) continue; mip2 = mip + 1; - build_y_mask(lfi_n, mip2, shift_y+1, lfm); + build_y_mask(lfi_n, mip2[0], shift_y+1, lfm); break; default: { const int shift_y = shift_32_y[idx_32] + shift_16_y[idx_16] + shift_8_y[0]; - build_masks(lfi_n, mip, shift_y, shift_uv, lfm); + build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); mip += offset[0]; for (idx_8 = 1; idx_8 < 4; mip += offset[idx_8], ++idx_8) { const int shift_y = shift_32_y[idx_32] + @@ -956,7 +957,7 @@ void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col, if (mi_8_col_offset >= max_cols || mi_8_row_offset >= max_rows) continue; - build_y_mask(lfi_n, mip, shift_y, lfm); + build_y_mask(lfi_n, mip[0], shift_y, lfm); } break; } @@ -1151,7 +1152,7 @@ static void highbd_filter_selectively_vert(uint16_t *s, int pitch, void vp9_filter_block_plane_non420(VP9_COMMON *cm, struct macroblockd_plane *plane, - MODE_INFO *mi_8x8, + MODE_INFO **mi_8x8, int mi_row, int mi_col) { const int ss_x = plane->subsampling_x; const int ss_y = plane->subsampling_y; @@ -1175,7 +1176,7 @@ void vp9_filter_block_plane_non420(VP9_COMMON *cm, // Determine the vertical edges that need filtering for (c = 0; c < MI_BLOCK_SIZE && mi_col + c < cm->mi_cols; c += col_step) { - const MODE_INFO *mi = mi_8x8[c].src_mi; + const MODE_INFO *mi = mi_8x8[c]; const BLOCK_SIZE sb_type = mi[0].mbmi.sb_type; const int skip_this = mi[0].mbmi.skip && is_inter_block(&mi[0].mbmi); // left edge of current unit is block/partition edge -> no skip @@ -1326,248 +1327,203 @@ void vp9_filter_block_plane_non420(VP9_COMMON *cm, } } -void vp9_filter_block_plane(VP9_COMMON *const cm, - struct macroblockd_plane *const plane, - int mi_row, - LOOP_FILTER_MASK *lfm) { +void vp9_filter_block_plane_ss00(VP9_COMMON *const cm, + struct macroblockd_plane *const plane, + int mi_row, + LOOP_FILTER_MASK *lfm) { struct buf_2d *const dst = &plane->dst; - uint8_t* const dst0 = dst->buf; - int r, c; + uint8_t *const dst0 = dst->buf; + int r; + uint64_t mask_16x16 = lfm->left_y[TX_16X16]; + uint64_t mask_8x8 = lfm->left_y[TX_8X8]; + uint64_t mask_4x4 = lfm->left_y[TX_4X4]; + uint64_t mask_4x4_int = lfm->int_4x4_y; - if (!plane->plane_type) { - uint64_t mask_16x16 = lfm->left_y[TX_16X16]; - uint64_t mask_8x8 = lfm->left_y[TX_8X8]; - uint64_t mask_4x4 = lfm->left_y[TX_4X4]; - uint64_t mask_4x4_int = lfm->int_4x4_y; + assert(plane->subsampling_x == 0 && plane->subsampling_y == 0); - // Vertical pass: do 2 rows at one time - for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 2) { - unsigned int mask_16x16_l = mask_16x16 & 0xffff; - unsigned int mask_8x8_l = mask_8x8 & 0xffff; - unsigned int mask_4x4_l = mask_4x4 & 0xffff; - unsigned int mask_4x4_int_l = mask_4x4_int & 0xffff; + // Vertical pass: do 2 rows at one time + for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 2) { + unsigned int mask_16x16_l = mask_16x16 & 0xffff; + unsigned int mask_8x8_l = mask_8x8 & 0xffff; + unsigned int mask_4x4_l = mask_4x4 & 0xffff; + unsigned int mask_4x4_int_l = mask_4x4_int & 0xffff; - // Disable filtering on the leftmost column. +// Disable filtering on the leftmost column. #if CONFIG_VP9_HIGHBITDEPTH - if (cm->use_highbitdepth) { - highbd_filter_selectively_vert_row2(plane->plane_type, - CONVERT_TO_SHORTPTR(dst->buf), - dst->stride, - mask_16x16_l, - mask_8x8_l, - mask_4x4_l, - mask_4x4_int_l, - &cm->lf_info, &lfm->lfl_y[r << 3], - (int)cm->bit_depth); - } else { - filter_selectively_vert_row2(plane->plane_type, - dst->buf, dst->stride, - mask_16x16_l, - mask_8x8_l, - mask_4x4_l, - mask_4x4_int_l, - &cm->lf_info, - &lfm->lfl_y[r << 3]); - } + if (cm->use_highbitdepth) { + highbd_filter_selectively_vert_row2( + plane->subsampling_x, CONVERT_TO_SHORTPTR(dst->buf), dst->stride, + mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info, + &lfm->lfl_y[r << 3], (int)cm->bit_depth); + } else { + filter_selectively_vert_row2( + plane->subsampling_x, dst->buf, dst->stride, mask_16x16_l, mask_8x8_l, + mask_4x4_l, mask_4x4_int_l, &cm->lf_info, &lfm->lfl_y[r << 3]); + } #else - filter_selectively_vert_row2(plane->plane_type, - dst->buf, dst->stride, - mask_16x16_l, - mask_8x8_l, - mask_4x4_l, - mask_4x4_int_l, - &cm->lf_info, &lfm->lfl_y[r << 3]); + filter_selectively_vert_row2( + plane->subsampling_x, dst->buf, dst->stride, mask_16x16_l, mask_8x8_l, + mask_4x4_l, mask_4x4_int_l, &cm->lf_info, &lfm->lfl_y[r << 3]); #endif // CONFIG_VP9_HIGHBITDEPTH - dst->buf += 16 * dst->stride; - mask_16x16 >>= 16; - mask_8x8 >>= 16; - mask_4x4 >>= 16; - mask_4x4_int >>= 16; + dst->buf += 16 * dst->stride; + mask_16x16 >>= 16; + mask_8x8 >>= 16; + mask_4x4 >>= 16; + mask_4x4_int >>= 16; + } + + // Horizontal pass + dst->buf = dst0; + mask_16x16 = lfm->above_y[TX_16X16]; + mask_8x8 = lfm->above_y[TX_8X8]; + mask_4x4 = lfm->above_y[TX_4X4]; + mask_4x4_int = lfm->int_4x4_y; + + for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r++) { + unsigned int mask_16x16_r; + unsigned int mask_8x8_r; + unsigned int mask_4x4_r; + + if (mi_row + r == 0) { + mask_16x16_r = 0; + mask_8x8_r = 0; + mask_4x4_r = 0; + } else { + mask_16x16_r = mask_16x16 & 0xff; + mask_8x8_r = mask_8x8 & 0xff; + mask_4x4_r = mask_4x4 & 0xff; } - // Horizontal pass - dst->buf = dst0; - mask_16x16 = lfm->above_y[TX_16X16]; - mask_8x8 = lfm->above_y[TX_8X8]; - mask_4x4 = lfm->above_y[TX_4X4]; - mask_4x4_int = lfm->int_4x4_y; - - for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r++) { - unsigned int mask_16x16_r; - unsigned int mask_8x8_r; - unsigned int mask_4x4_r; - - if (mi_row + r == 0) { - mask_16x16_r = 0; - mask_8x8_r = 0; - mask_4x4_r = 0; - } else { - mask_16x16_r = mask_16x16 & 0xff; - mask_8x8_r = mask_8x8 & 0xff; - mask_4x4_r = mask_4x4 & 0xff; - } - #if CONFIG_VP9_HIGHBITDEPTH - if (cm->use_highbitdepth) { - highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf), - dst->stride, - mask_16x16_r, - mask_8x8_r, - mask_4x4_r, - mask_4x4_int & 0xff, - &cm->lf_info, - &lfm->lfl_y[r << 3], - (int)cm->bit_depth); - } else { - filter_selectively_horiz(dst->buf, dst->stride, - mask_16x16_r, - mask_8x8_r, - mask_4x4_r, - mask_4x4_int & 0xff, - &cm->lf_info, - &lfm->lfl_y[r << 3]); - } -#else - filter_selectively_horiz(dst->buf, dst->stride, - mask_16x16_r, - mask_8x8_r, - mask_4x4_r, - mask_4x4_int & 0xff, - &cm->lf_info, + if (cm->use_highbitdepth) { + highbd_filter_selectively_horiz( + CONVERT_TO_SHORTPTR(dst->buf), dst->stride, mask_16x16_r, mask_8x8_r, + mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info, &lfm->lfl_y[r << 3], + (int)cm->bit_depth); + } else { + filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r, + mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info, &lfm->lfl_y[r << 3]); + } +#else + filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r, + mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info, + &lfm->lfl_y[r << 3]); #endif // CONFIG_VP9_HIGHBITDEPTH - dst->buf += 8 * dst->stride; + dst->buf += 8 * dst->stride; + mask_16x16 >>= 8; + mask_8x8 >>= 8; + mask_4x4 >>= 8; + mask_4x4_int >>= 8; + } +} + +void vp9_filter_block_plane_ss11(VP9_COMMON *const cm, + struct macroblockd_plane *const plane, + int mi_row, + LOOP_FILTER_MASK *lfm) { + struct buf_2d *const dst = &plane->dst; + uint8_t *const dst0 = dst->buf; + int r, c; + + uint16_t mask_16x16 = lfm->left_uv[TX_16X16]; + uint16_t mask_8x8 = lfm->left_uv[TX_8X8]; + uint16_t mask_4x4 = lfm->left_uv[TX_4X4]; + uint16_t mask_4x4_int = lfm->int_4x4_uv; + + assert(plane->subsampling_x == 1 && plane->subsampling_y == 1); + + // Vertical pass: do 2 rows at one time + for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 4) { + if (plane->plane_type == 1) { + for (c = 0; c < (MI_BLOCK_SIZE >> 1); c++) { + lfm->lfl_uv[(r << 1) + c] = lfm->lfl_y[(r << 3) + (c << 1)]; + lfm->lfl_uv[((r + 2) << 1) + c] = lfm->lfl_y[((r + 2) << 3) + (c << 1)]; + } + } + + { + unsigned int mask_16x16_l = mask_16x16 & 0xff; + unsigned int mask_8x8_l = mask_8x8 & 0xff; + unsigned int mask_4x4_l = mask_4x4 & 0xff; + unsigned int mask_4x4_int_l = mask_4x4_int & 0xff; + +// Disable filtering on the leftmost column. +#if CONFIG_VP9_HIGHBITDEPTH + if (cm->use_highbitdepth) { + highbd_filter_selectively_vert_row2( + plane->subsampling_x, CONVERT_TO_SHORTPTR(dst->buf), dst->stride, + mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info, + &lfm->lfl_uv[r << 1], (int)cm->bit_depth); + } else { + filter_selectively_vert_row2( + plane->subsampling_x, dst->buf, dst->stride, + mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info, + &lfm->lfl_uv[r << 1]); + } +#else + filter_selectively_vert_row2( + plane->subsampling_x, dst->buf, dst->stride, + mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info, + &lfm->lfl_uv[r << 1]); +#endif // CONFIG_VP9_HIGHBITDEPTH + + dst->buf += 16 * dst->stride; mask_16x16 >>= 8; mask_8x8 >>= 8; mask_4x4 >>= 8; mask_4x4_int >>= 8; } - } else { - uint16_t mask_16x16 = lfm->left_uv[TX_16X16]; - uint16_t mask_8x8 = lfm->left_uv[TX_8X8]; - uint16_t mask_4x4 = lfm->left_uv[TX_4X4]; - uint16_t mask_4x4_int = lfm->int_4x4_uv; + } - // Vertical pass: do 2 rows at one time - for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 4) { - if (plane->plane_type == 1) { - for (c = 0; c < (MI_BLOCK_SIZE >> 1); c++) { - lfm->lfl_uv[(r << 1) + c] = lfm->lfl_y[(r << 3) + (c << 1)]; - lfm->lfl_uv[((r + 2) << 1) + c] = lfm->lfl_y[((r + 2) << 3) + - (c << 1)]; - } - } + // Horizontal pass + dst->buf = dst0; + mask_16x16 = lfm->above_uv[TX_16X16]; + mask_8x8 = lfm->above_uv[TX_8X8]; + mask_4x4 = lfm->above_uv[TX_4X4]; + mask_4x4_int = lfm->int_4x4_uv; - { - unsigned int mask_16x16_l = mask_16x16 & 0xff; - unsigned int mask_8x8_l = mask_8x8 & 0xff; - unsigned int mask_4x4_l = mask_4x4 & 0xff; - unsigned int mask_4x4_int_l = mask_4x4_int & 0xff; + for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 2) { + const int skip_border_4x4_r = mi_row + r == cm->mi_rows - 1; + const unsigned int mask_4x4_int_r = + skip_border_4x4_r ? 0 : (mask_4x4_int & 0xf); + unsigned int mask_16x16_r; + unsigned int mask_8x8_r; + unsigned int mask_4x4_r; - // Disable filtering on the leftmost column. -#if CONFIG_VP9_HIGHBITDEPTH - if (cm->use_highbitdepth) { - highbd_filter_selectively_vert_row2(plane->plane_type, - CONVERT_TO_SHORTPTR(dst->buf), - dst->stride, - mask_16x16_l, - mask_8x8_l, - mask_4x4_l, - mask_4x4_int_l, - &cm->lf_info, - &lfm->lfl_uv[r << 1], - (int)cm->bit_depth); - } else { - filter_selectively_vert_row2(plane->plane_type, - dst->buf, dst->stride, - mask_16x16_l, - mask_8x8_l, - mask_4x4_l, - mask_4x4_int_l, - &cm->lf_info, - &lfm->lfl_uv[r << 1]); - } -#else - filter_selectively_vert_row2(plane->plane_type, - dst->buf, dst->stride, - mask_16x16_l, - mask_8x8_l, - mask_4x4_l, - mask_4x4_int_l, - &cm->lf_info, - &lfm->lfl_uv[r << 1]); -#endif // CONFIG_VP9_HIGHBITDEPTH - - dst->buf += 16 * dst->stride; - mask_16x16 >>= 8; - mask_8x8 >>= 8; - mask_4x4 >>= 8; - mask_4x4_int >>= 8; - } + if (mi_row + r == 0) { + mask_16x16_r = 0; + mask_8x8_r = 0; + mask_4x4_r = 0; + } else { + mask_16x16_r = mask_16x16 & 0xf; + mask_8x8_r = mask_8x8 & 0xf; + mask_4x4_r = mask_4x4 & 0xf; } - // Horizontal pass - dst->buf = dst0; - mask_16x16 = lfm->above_uv[TX_16X16]; - mask_8x8 = lfm->above_uv[TX_8X8]; - mask_4x4 = lfm->above_uv[TX_4X4]; - mask_4x4_int = lfm->int_4x4_uv; - - for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 2) { - const int skip_border_4x4_r = mi_row + r == cm->mi_rows - 1; - const unsigned int mask_4x4_int_r = skip_border_4x4_r ? - 0 : (mask_4x4_int & 0xf); - unsigned int mask_16x16_r; - unsigned int mask_8x8_r; - unsigned int mask_4x4_r; - - if (mi_row + r == 0) { - mask_16x16_r = 0; - mask_8x8_r = 0; - mask_4x4_r = 0; - } else { - mask_16x16_r = mask_16x16 & 0xf; - mask_8x8_r = mask_8x8 & 0xf; - mask_4x4_r = mask_4x4 & 0xf; - } - #if CONFIG_VP9_HIGHBITDEPTH - if (cm->use_highbitdepth) { - highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf), - dst->stride, - mask_16x16_r, - mask_8x8_r, - mask_4x4_r, - mask_4x4_int_r, - &cm->lf_info, - &lfm->lfl_uv[r << 1], - (int)cm->bit_depth); - } else { - filter_selectively_horiz(dst->buf, dst->stride, - mask_16x16_r, - mask_8x8_r, - mask_4x4_r, - mask_4x4_int_r, - &cm->lf_info, - &lfm->lfl_uv[r << 1]); - } -#else - filter_selectively_horiz(dst->buf, dst->stride, - mask_16x16_r, - mask_8x8_r, - mask_4x4_r, - mask_4x4_int_r, - &cm->lf_info, + if (cm->use_highbitdepth) { + highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf), + dst->stride, mask_16x16_r, mask_8x8_r, + mask_4x4_r, mask_4x4_int_r, &cm->lf_info, + &lfm->lfl_uv[r << 1], (int)cm->bit_depth); + } else { + filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r, + mask_4x4_r, mask_4x4_int_r, &cm->lf_info, &lfm->lfl_uv[r << 1]); + } +#else + filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r, + mask_4x4_r, mask_4x4_int_r, &cm->lf_info, + &lfm->lfl_uv[r << 1]); #endif // CONFIG_VP9_HIGHBITDEPTH - dst->buf += 8 * dst->stride; - mask_16x16 >>= 4; - mask_8x8 >>= 4; - mask_4x4 >>= 4; - mask_4x4_int >>= 4; - } + dst->buf += 8 * dst->stride; + mask_16x16 >>= 4; + mask_8x8 >>= 4; + mask_4x4 >>= 4; + mask_4x4_int >>= 4; } } @@ -1576,13 +1532,21 @@ void vp9_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, struct macroblockd_plane planes[MAX_MB_PLANE], int start, int stop, int y_only) { const int num_planes = y_only ? 1 : MAX_MB_PLANE; - const int use_420 = y_only || (planes[1].subsampling_y == 1 && - planes[1].subsampling_x == 1); + enum lf_path path; LOOP_FILTER_MASK lfm; int mi_row, mi_col; + if (y_only) + path = LF_PATH_444; + else if (planes[1].subsampling_y == 1 && planes[1].subsampling_x == 1) + path = LF_PATH_420; + else if (planes[1].subsampling_y == 0 && planes[1].subsampling_x == 0) + path = LF_PATH_444; + else + path = LF_PATH_SLOW; + for (mi_row = start; mi_row < stop; mi_row += MI_BLOCK_SIZE) { - MODE_INFO *mi = cm->mi + mi_row * cm->mi_stride; + MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride; for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) { int plane; @@ -1590,16 +1554,23 @@ void vp9_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, vp9_setup_dst_planes(planes, frame_buffer, mi_row, mi_col); // TODO(JBB): Make setup_mask work for non 420. - if (use_420) - vp9_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride, - &lfm); + vp9_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride, + &lfm); - for (plane = 0; plane < num_planes; ++plane) { - if (use_420) - vp9_filter_block_plane(cm, &planes[plane], mi_row, &lfm); - else - vp9_filter_block_plane_non420(cm, &planes[plane], mi + mi_col, - mi_row, mi_col); + vp9_filter_block_plane_ss00(cm, &planes[0], mi_row, &lfm); + for (plane = 1; plane < num_planes; ++plane) { + switch (path) { + case LF_PATH_420: + vp9_filter_block_plane_ss11(cm, &planes[plane], mi_row, &lfm); + break; + case LF_PATH_444: + vp9_filter_block_plane_ss00(cm, &planes[plane], mi_row, &lfm); + break; + case LF_PATH_SLOW: + vp9_filter_block_plane_non420(cm, &planes[plane], mi + mi_col, + mi_row, mi_col); + break; + } } } } @@ -1633,7 +1604,7 @@ void vp9_loop_filter_data_reset( lf_data->start = 0; lf_data->stop = 0; lf_data->y_only = 0; - vpx_memcpy(lf_data->planes, planes, sizeof(lf_data->planes)); + memcpy(lf_data->planes, planes, sizeof(lf_data->planes)); } int vp9_loop_filter_worker(LFWorkerData *const lf_data, void *unused) { diff --git a/media/libvpx/vp9/common/vp9_loopfilter.h b/media/libvpx/vp9/common/vp9_loopfilter.h index 6d7cabf7cd4..f7cbde678de 100644 --- a/media/libvpx/vp9/common/vp9_loopfilter.h +++ b/media/libvpx/vp9/common/vp9_loopfilter.h @@ -29,6 +29,12 @@ extern "C" { #define MAX_REF_LF_DELTAS 4 #define MAX_MODE_LF_DELTAS 2 +enum lf_path { + LF_PATH_420, + LF_PATH_444, + LF_PATH_SLOW, +}; + struct loopfilter { int filter_level; @@ -89,17 +95,22 @@ struct VP9LfSyncData; // by mi_row, mi_col. void vp9_setup_mask(struct VP9Common *const cm, const int mi_row, const int mi_col, - MODE_INFO *mi_8x8, const int mode_info_stride, + MODE_INFO **mi_8x8, const int mode_info_stride, LOOP_FILTER_MASK *lfm); -void vp9_filter_block_plane(struct VP9Common *const cm, - struct macroblockd_plane *const plane, - int mi_row, - LOOP_FILTER_MASK *lfm); +void vp9_filter_block_plane_ss00(struct VP9Common *const cm, + struct macroblockd_plane *const plane, + int mi_row, + LOOP_FILTER_MASK *lfm); + +void vp9_filter_block_plane_ss11(struct VP9Common *const cm, + struct macroblockd_plane *const plane, + int mi_row, + LOOP_FILTER_MASK *lfm); void vp9_filter_block_plane_non420(struct VP9Common *cm, struct macroblockd_plane *plane, - MODE_INFO *mi_8x8, + MODE_INFO **mi_8x8, int mi_row, int mi_col); void vp9_loop_filter_init(struct VP9Common *cm); diff --git a/media/libvpx/vp9/common/vp9_loopfilter_filters.c b/media/libvpx/vp9/common/vp9_loopfilter_filters.c index 2e32c40b85e..3cf4c32253e 100644 --- a/media/libvpx/vp9/common/vp9_loopfilter_filters.c +++ b/media/libvpx/vp9/common/vp9_loopfilter_filters.c @@ -9,6 +9,7 @@ */ #include "./vpx_config.h" +#include "vpx_ports/mem.h" #include "vp9/common/vp9_common.h" #include "vp9/common/vp9_loopfilter.h" #include "vp9/common/vp9_onyxc_int.h" diff --git a/media/libvpx/vp9/common/vp9_mfqe.c b/media/libvpx/vp9/common/vp9_mfqe.c index 1494c3fd70a..bebb37eda07 100644 --- a/media/libvpx/vp9/common/vp9_mfqe.c +++ b/media/libvpx/vp9/common/vp9_mfqe.c @@ -9,8 +9,9 @@ */ #include "./vpx_config.h" -#include "./vpx_scale_rtcd.h" #include "./vp9_rtcd.h" +#include "./vpx_dsp_rtcd.h" +#include "./vpx_scale_rtcd.h" #include "vp9/common/vp9_onyxc_int.h" #include "vp9/common/vp9_postproc.h" @@ -170,14 +171,14 @@ static void mfqe_block(BLOCK_SIZE bs, const uint8_t *y, const uint8_t *u, get_thr(bs, qdiff, &sad_thr, &vdiff_thr); if (bs == BLOCK_16X16) { - vdiff = (vp9_variance16x16(y, y_stride, yd, yd_stride, &sse) + 128) >> 8; - sad = (vp9_sad16x16(y, y_stride, yd, yd_stride) + 128) >> 8; + vdiff = (vpx_variance16x16(y, y_stride, yd, yd_stride, &sse) + 128) >> 8; + sad = (vpx_sad16x16(y, y_stride, yd, yd_stride) + 128) >> 8; } else if (bs == BLOCK_32X32) { - vdiff = (vp9_variance32x32(y, y_stride, yd, yd_stride, &sse) + 512) >> 10; - sad = (vp9_sad32x32(y, y_stride, yd, yd_stride) + 512) >> 10; + vdiff = (vpx_variance32x32(y, y_stride, yd, yd_stride, &sse) + 512) >> 10; + sad = (vpx_sad32x32(y, y_stride, yd, yd_stride) + 512) >> 10; } else /* if (bs == BLOCK_64X64) */ { - vdiff = (vp9_variance64x64(y, y_stride, yd, yd_stride, &sse) + 2048) >> 12; - sad = (vp9_sad64x64(y, y_stride, yd, yd_stride) + 2048) >> 12; + vdiff = (vpx_variance64x64(y, y_stride, yd, yd_stride, &sse) + 2048) >> 12; + sad = (vpx_sad64x64(y, y_stride, yd, yd_stride) + 2048) >> 12; } // vdiff > sad * 3 means vdiff should not be too small, otherwise, diff --git a/media/libvpx/vp9/common/vp9_mvref_common.c b/media/libvpx/vp9/common/vp9_mvref_common.c index 52a731329d3..ce6952752a1 100644 --- a/media/libvpx/vp9/common/vp9_mvref_common.c +++ b/media/libvpx/vp9/common/vp9_mvref_common.c @@ -28,7 +28,7 @@ static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd, cm->prev_frame->mvs + mi_row * cm->mi_cols + mi_col : NULL; // Blank the reference vector list - vpx_memset(mv_ref_list, 0, sizeof(*mv_ref_list) * MAX_MV_REF_CANDIDATES); + memset(mv_ref_list, 0, sizeof(*mv_ref_list) * MAX_MV_REF_CANDIDATES); // The nearest 2 blocks are treated differently // if the size < 8x8 we get the mv from the bmi substructure, @@ -37,7 +37,7 @@ static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd, const POSITION *const mv_ref = &mv_ref_search[i]; if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { const MODE_INFO *const candidate_mi = xd->mi[mv_ref->col + mv_ref->row * - xd->mi_stride].src_mi; + xd->mi_stride]; const MB_MODE_INFO *const candidate = &candidate_mi->mbmi; // Keep counts for entropy encoding. context_counter += mode_2_counter[candidate->mode]; @@ -59,7 +59,7 @@ static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd, const POSITION *const mv_ref = &mv_ref_search[i]; if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { const MB_MODE_INFO *const candidate = &xd->mi[mv_ref->col + mv_ref->row * - xd->mi_stride].src_mi->mbmi; + xd->mi_stride]->mbmi; different_ref_found = 1; if (candidate->ref_frame[0] == ref_frame) @@ -101,7 +101,7 @@ static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd, const POSITION *mv_ref = &mv_ref_search[i]; if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { const MB_MODE_INFO *const candidate = &xd->mi[mv_ref->col + mv_ref->row - * xd->mi_stride].src_mi->mbmi; + * xd->mi_stride]->mbmi; // If the candidate is INTRA we don't want to consider its mv. IF_DIFF_REF_FRAME_ADD_MV(candidate, ref_frame, ref_sign_bias, @@ -183,7 +183,7 @@ void vp9_append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd, int block, int ref, int mi_row, int mi_col, int_mv *nearest_mv, int_mv *near_mv) { int_mv mv_list[MAX_MV_REF_CANDIDATES]; - MODE_INFO *const mi = xd->mi[0].src_mi; + MODE_INFO *const mi = xd->mi[0]; b_mode_info *bmi = mi->bmi; int n; @@ -223,6 +223,6 @@ void vp9_append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd, break; } default: - assert("Invalid block index."); + assert(0 && "Invalid block index."); } } diff --git a/media/libvpx/vp9/common/vp9_onyxc_int.h b/media/libvpx/vp9/common/vp9_onyxc_int.h index 7ca24a56ee5..3af2a41bd28 100644 --- a/media/libvpx/vp9/common/vp9_onyxc_int.h +++ b/media/libvpx/vp9/common/vp9_onyxc_int.h @@ -14,6 +14,7 @@ #include "./vpx_config.h" #include "vpx/internal/vpx_codec_internal.h" #include "./vp9_rtcd.h" +#include "vp9/common/vp9_alloccommon.h" #include "vp9/common/vp9_loopfilter.h" #include "vp9/common/vp9_entropymv.h" #include "vp9/common/vp9_entropy.h" @@ -88,7 +89,7 @@ typedef struct { int col; } RefCntBuffer; -typedef struct { +typedef struct BufferPool { // Protect BufferPool from being accessed by several FrameWorkers at // the same time during frame parallel decode. // TODO(hkuang): Try to use atomic variable instead of locking the whole pool. @@ -184,6 +185,8 @@ typedef struct VP9Common { int y_dc_delta_q; int uv_dc_delta_q; int uv_ac_delta_q; + int16_t y_dequant[MAX_SEGMENTS][2]; + int16_t uv_dequant[MAX_SEGMENTS][2]; /* We allocate a MODE_INFO struct for each macroblock, together with an extra row on top and column on the left to simplify prediction. */ @@ -201,6 +204,12 @@ typedef struct VP9Common { void (*free_mi)(struct VP9Common *cm); void (*setup_mi)(struct VP9Common *cm); + // Grid of pointers to 8x8 MODE_INFO structs. Any 8x8 not in the visible + // area will be NULL. + MODE_INFO **mi_grid_base; + MODE_INFO **mi_grid_visible; + MODE_INFO **prev_mi_grid_base; + MODE_INFO **prev_mi_grid_visible; // Whether to use previous frame's motion vectors for prediction. int use_prev_frame_mvs; @@ -212,6 +221,7 @@ typedef struct VP9Common { uint8_t *seg_map_array[NUM_PING_PONG_BUFFERS]; uint8_t *last_frame_seg_map; uint8_t *current_frame_seg_map; + int seg_map_alloc_size; INTERP_FILTER interp_filter; @@ -254,6 +264,7 @@ typedef struct VP9Common { int log2_tile_cols, log2_tile_rows; int byte_alignment; + int skip_loop_filter; // Private data associated with the frame buffer callbacks. void *cb_priv; @@ -268,6 +279,7 @@ typedef struct VP9Common { PARTITION_CONTEXT *above_seg_context; ENTROPY_CONTEXT *above_context; + int above_context_alloc_cols; } VP9_COMMON; // TODO(hkuang): Don't need to lock the whole pool after implementing atomic @@ -297,8 +309,13 @@ static INLINE int get_free_fb(VP9_COMMON *cm) { if (frame_bufs[i].ref_count == 0) break; - assert(i < FRAME_BUFFERS); - frame_bufs[i].ref_count = 1; + if (i != FRAME_BUFFERS) { + frame_bufs[i].ref_count = 1; + } else { + // Reset i to be INVALID_IDX to indicate no free buffer found. + i = INVALID_IDX; + } + unlock_buffer_pool(cm->buffer_pool); return i; } @@ -325,6 +342,14 @@ static INLINE void init_macroblockd(VP9_COMMON *cm, MACROBLOCKD *xd) { xd->plane[i].dqcoeff = xd->dqcoeff; xd->above_context[i] = cm->above_context + i * sizeof(*cm->above_context) * 2 * mi_cols_aligned_to_sb(cm->mi_cols); + + if (xd->plane[i].plane_type == PLANE_TYPE_Y) { + memcpy(xd->plane[i].seg_dequant, cm->y_dequant, sizeof(cm->y_dequant)); + } else { + memcpy(xd->plane[i].seg_dequant, cm->uv_dequant, sizeof(cm->uv_dequant)); + } + xd->fc = cm->fc; + xd->frame_parallel_decoding_mode = cm->frame_parallel_decoding_mode; } xd->above_seg_context = cm->above_seg_context; @@ -371,7 +396,8 @@ static INLINE void set_mi_row_col(MACROBLOCKD *xd, const TileInfo *const tile, xd->up_available = (mi_row != 0); xd->left_available = (mi_col > tile->mi_col_start); if (xd->up_available) { - xd->above_mi = xd->mi[-xd->mi_stride].src_mi; + xd->above_mi = xd->mi[-xd->mi_stride]; + // above_mi may be NULL in VP9 encoder's first pass. xd->above_mbmi = xd->above_mi ? &xd->above_mi->mbmi : NULL; } else { xd->above_mi = NULL; @@ -379,7 +405,8 @@ static INLINE void set_mi_row_col(MACROBLOCKD *xd, const TileInfo *const tile, } if (xd->left_available) { - xd->left_mi = xd->mi[-1].src_mi; + xd->left_mi = xd->mi[-1]; + // left_mi may be NULL in VP9 encoder's first pass. xd->left_mbmi = xd->left_mi ? &xd->left_mi->mbmi : NULL; } else { xd->left_mi = NULL; @@ -400,8 +427,8 @@ static INLINE void update_partition_context(MACROBLOCKD *xd, // update the partition context at the end notes. set partition bits // of block sizes larger than the current one to be one, and partition // bits of smaller block sizes to be zero. - vpx_memset(above_ctx, partition_context_lookup[subsize].above, bs); - vpx_memset(left_ctx, partition_context_lookup[subsize].left, bs); + memset(above_ctx, partition_context_lookup[subsize].above, bs); + memset(left_ctx, partition_context_lookup[subsize].left, bs); } static INLINE int partition_plane_context(const MACROBLOCKD *xd, @@ -409,21 +436,12 @@ static INLINE int partition_plane_context(const MACROBLOCKD *xd, BLOCK_SIZE bsize) { const PARTITION_CONTEXT *above_ctx = xd->above_seg_context + mi_col; const PARTITION_CONTEXT *left_ctx = xd->left_seg_context + (mi_row & MI_MASK); - const int bsl = mi_width_log2_lookup[bsize]; - const int bs = 1 << bsl; - int above = 0, left = 0, i; + int above = (*above_ctx >> bsl) & 1 , left = (*left_ctx >> bsl) & 1; assert(b_width_log2_lookup[bsize] == b_height_log2_lookup[bsize]); assert(bsl >= 0); - for (i = 0; i < bs; i++) { - above |= above_ctx[i]; - left |= left_ctx[i]; - } - above = (above & bs) > 0; - left = (left & bs) > 0; - return (left * 2 + above) + bsl * PARTITION_PLOFFSET; } diff --git a/media/libvpx/vp9/common/vp9_postproc.c b/media/libvpx/vp9/common/vp9_postproc.c index bb21ade4037..d26a6eb5c88 100644 --- a/media/libvpx/vp9/common/vp9_postproc.c +++ b/media/libvpx/vp9/common/vp9_postproc.c @@ -16,12 +16,10 @@ #include "./vpx_scale_rtcd.h" #include "./vp9_rtcd.h" +#include "vpx_ports/mem.h" #include "vpx_scale/vpx_scale.h" #include "vpx_scale/yv12config.h" -#if CONFIG_VP9_HIGHBITDEPTH -#include "vp9/common/vp9_common.h" -#endif #include "vp9/common/vp9_onyxc_int.h" #include "vp9/common/vp9_postproc.h" #include "vp9/common/vp9_systemdependent.h" @@ -91,10 +89,7 @@ void vp9_post_proc_down_and_across_c(const uint8_t *src_ptr, int flimit) { uint8_t const *p_src; uint8_t *p_dst; - int row; - int col; - int i; - int v; + int row, col, i, v, kernel; int pitch = src_pixels_per_line; uint8_t d[8]; (void)dst_pixels_per_line; @@ -105,8 +100,8 @@ void vp9_post_proc_down_and_across_c(const uint8_t *src_ptr, p_dst = dst_ptr; for (col = 0; col < cols; col++) { - int kernel = 4; - int v = p_src[col]; + kernel = 4; + v = p_src[col]; for (i = -2; i <= 2; i++) { if (abs(v - p_src[col + i * pitch]) > flimit) @@ -128,7 +123,7 @@ void vp9_post_proc_down_and_across_c(const uint8_t *src_ptr, d[i] = p_src[i]; for (col = 0; col < cols; col++) { - int kernel = 4; + kernel = 4; v = p_src[col]; d[col & 7] = v; @@ -168,10 +163,7 @@ void vp9_highbd_post_proc_down_and_across_c(const uint16_t *src_ptr, int flimit) { uint16_t const *p_src; uint16_t *p_dst; - int row; - int col; - int i; - int v; + int row, col, i, v, kernel; int pitch = src_pixels_per_line; uint16_t d[8]; @@ -181,8 +173,8 @@ void vp9_highbd_post_proc_down_and_across_c(const uint16_t *src_ptr, p_dst = dst_ptr; for (col = 0; col < cols; col++) { - int kernel = 4; - int v = p_src[col]; + kernel = 4; + v = p_src[col]; for (i = -2; i <= 2; i++) { if (abs(v - p_src[col + i * pitch]) > flimit) @@ -205,7 +197,7 @@ void vp9_highbd_post_proc_down_and_across_c(const uint16_t *src_ptr, d[i] = p_src[i]; for (col = 0; col < cols; col++) { - int kernel = 4; + kernel = 4; v = p_src[col]; d[col & 7] = v; @@ -518,22 +510,24 @@ void vp9_denoise(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, assert((src->flags & YV12_FLAG_HIGHBITDEPTH) == (dst->flags & YV12_FLAG_HIGHBITDEPTH)); if (src->flags & YV12_FLAG_HIGHBITDEPTH) { - const uint16_t *const src = CONVERT_TO_SHORTPTR(srcs[i] + 2 * src_stride - + 2); - uint16_t *const dst = CONVERT_TO_SHORTPTR(dsts[i] + 2 * dst_stride + 2); - vp9_highbd_post_proc_down_and_across(src, dst, src_stride, dst_stride, - src_height, src_width, ppl); + const uint16_t *const src_plane = CONVERT_TO_SHORTPTR( + srcs[i] + 2 * src_stride + 2); + uint16_t *const dst_plane = CONVERT_TO_SHORTPTR( + dsts[i] + 2 * dst_stride + 2); + vp9_highbd_post_proc_down_and_across(src_plane, dst_plane, src_stride, + dst_stride, src_height, src_width, + ppl); } else { - const uint8_t *const src = srcs[i] + 2 * src_stride + 2; - uint8_t *const dst = dsts[i] + 2 * dst_stride + 2; + const uint8_t *const src_plane = srcs[i] + 2 * src_stride + 2; + uint8_t *const dst_plane = dsts[i] + 2 * dst_stride + 2; - vp9_post_proc_down_and_across(src, dst, src_stride, dst_stride, - src_height, src_width, ppl); + vp9_post_proc_down_and_across(src_plane, dst_plane, src_stride, + dst_stride, src_height, src_width, ppl); } #else - const uint8_t *const src = srcs[i] + 2 * src_stride + 2; - uint8_t *const dst = dsts[i] + 2 * dst_stride + 2; - vp9_post_proc_down_and_across(src, dst, src_stride, dst_stride, + const uint8_t *const src_plane = srcs[i] + 2 * src_stride + 2; + uint8_t *const dst_plane = dsts[i] + 2 * dst_stride + 2; + vp9_post_proc_down_and_across(src_plane, dst_plane, src_stride, dst_stride, src_height, src_width, ppl); #endif } @@ -558,16 +552,15 @@ static void fillrd(struct postproc_state *state, int q, int a) { * a gaussian distribution with sigma determined by q. */ { - double i; int next, j; next = 0; for (i = -32; i < 32; i++) { - int a = (int)(0.5 + 256 * gaussian(sigma, 0, i)); + int a_i = (int)(0.5 + 256 * gaussian(sigma, 0, i)); - if (a) { - for (j = 0; j < a; j++) { + if (a_i) { + for (j = 0; j < a_i; j++) { char_dist[next + j] = (char) i; } @@ -656,8 +649,8 @@ int vp9_post_proc_frame(struct VP9Common *cm, return 1; } ppstate->prev_mi = ppstate->prev_mip + cm->mi_stride + 1; - vpx_memset(ppstate->prev_mip, 0, - cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->mip)); + memset(ppstate->prev_mip, 0, + cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->mip)); } // Allocate post_proc_buffer_int if needed. @@ -679,8 +672,8 @@ int vp9_post_proc_frame(struct VP9Common *cm, // Ensure that postproc is set to all 0s so that post proc // doesn't pull random data in from edge. - vpx_memset(cm->post_proc_buffer_int.buffer_alloc, 128, - cm->post_proc_buffer.frame_size); + memset(cm->post_proc_buffer_int.buffer_alloc, 128, + cm->post_proc_buffer.frame_size); } } diff --git a/media/libvpx/vp9/common/vp9_pred_common.c b/media/libvpx/vp9/common/vp9_pred_common.c index fd735f48361..0aac4a9e677 100644 --- a/media/libvpx/vp9/common/vp9_pred_common.c +++ b/media/libvpx/vp9/common/vp9_pred_common.c @@ -344,7 +344,7 @@ int vp9_get_pred_context_single_ref_p2(const MACROBLOCKD *xd) { // left of the entries corresponding to real blocks. // The prediction flags in these dummy entries are initialized to 0. int vp9_get_tx_size_context(const MACROBLOCKD *xd) { - const int max_tx_size = max_txsize_lookup[xd->mi[0].src_mi->mbmi.sb_type]; + const int max_tx_size = max_txsize_lookup[xd->mi[0]->mbmi.sb_type]; const MB_MODE_INFO *const above_mbmi = xd->above_mbmi; const MB_MODE_INFO *const left_mbmi = xd->left_mbmi; const int has_above = xd->up_available; diff --git a/media/libvpx/vp9/common/vp9_reconinter.c b/media/libvpx/vp9/common/vp9_reconinter.c index 1be358e874a..11eaf2e2d70 100644 --- a/media/libvpx/vp9/common/vp9_reconinter.c +++ b/media/libvpx/vp9/common/vp9_reconinter.c @@ -172,7 +172,7 @@ void build_inter_predictors(MACROBLOCKD *xd, int plane, int block, int x, int y, int w, int h, int mi_x, int mi_y) { struct macroblockd_plane *const pd = &xd->plane[plane]; - const MODE_INFO *mi = xd->mi[0].src_mi; + const MODE_INFO *mi = xd->mi[0]; const int is_compound = has_second_ref(&mi->mbmi); const InterpKernel *kernel = vp9_get_interp_kernel(mi->mbmi.interp_filter); int ref; @@ -246,7 +246,7 @@ static void build_inter_predictors_for_planes(MACROBLOCKD *xd, BLOCK_SIZE bsize, const int bw = 4 * num_4x4_w; const int bh = 4 * num_4x4_h; - if (xd->mi[0].src_mi->mbmi.sb_type < BLOCK_8X8) { + if (xd->mi[0]->mbmi.sb_type < BLOCK_8X8) { int i = 0, x, y; assert(bsize == BLOCK_8X8); for (y = 0; y < num_4x4_h; ++y) @@ -285,10 +285,10 @@ void vp9_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col, void vp9_setup_dst_planes(struct macroblockd_plane planes[MAX_MB_PLANE], const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col) { - uint8_t *const buffers[4] = {src->y_buffer, src->u_buffer, src->v_buffer, - src->alpha_buffer}; - const int strides[4] = {src->y_stride, src->uv_stride, src->uv_stride, - src->alpha_stride}; + uint8_t *const buffers[MAX_MB_PLANE] = { src->y_buffer, src->u_buffer, + src->v_buffer}; + const int strides[MAX_MB_PLANE] = { src->y_stride, src->uv_stride, + src->uv_stride}; int i; for (i = 0; i < MAX_MB_PLANE; ++i) { @@ -304,11 +304,10 @@ void vp9_setup_pre_planes(MACROBLOCKD *xd, int idx, const struct scale_factors *sf) { if (src != NULL) { int i; - uint8_t *const buffers[4] = {src->y_buffer, src->u_buffer, src->v_buffer, - src->alpha_buffer}; - const int strides[4] = {src->y_stride, src->uv_stride, src->uv_stride, - src->alpha_stride}; - + uint8_t *const buffers[MAX_MB_PLANE] = { src->y_buffer, src->u_buffer, + src->v_buffer}; + const int strides[MAX_MB_PLANE] = { src->y_stride, src->uv_stride, + src->uv_stride}; for (i = 0; i < MAX_MB_PLANE; ++i) { struct macroblockd_plane *const pd = &xd->plane[i]; setup_pred_plane(&pd->pre[idx], buffers[i], strides[i], mi_row, mi_col, diff --git a/media/libvpx/vp9/common/vp9_reconintra.c b/media/libvpx/vp9/common/vp9_reconintra.c index 1668b99ce22..3312f297730 100644 --- a/media/libvpx/vp9/common/vp9_reconintra.c +++ b/media/libvpx/vp9/common/vp9_reconintra.c @@ -12,6 +12,7 @@ #include "./vp9_rtcd.h" #include "vpx_mem/vpx_mem.h" +#include "vpx_ports/mem.h" #include "vpx_ports/vpx_once.h" #include "vp9/common/vp9_reconintra.h" @@ -30,6 +31,25 @@ const TX_TYPE intra_mode_to_tx_type_lookup[INTRA_MODES] = { ADST_ADST, // TM }; +enum { + NEED_LEFT = 1 << 1, + NEED_ABOVE = 1 << 2, + NEED_ABOVERIGHT = 1 << 3, +}; + +static const uint8_t extend_modes[INTRA_MODES] = { + NEED_ABOVE | NEED_LEFT, // DC + NEED_ABOVE, // V + NEED_LEFT, // H + NEED_ABOVERIGHT, // D45 + NEED_LEFT | NEED_ABOVE, // D135 + NEED_LEFT | NEED_ABOVE, // D117 + NEED_LEFT | NEED_ABOVE, // D153 + NEED_LEFT, // D207 + NEED_ABOVERIGHT, // D63 + NEED_LEFT | NEED_ABOVE, // TM +}; + // This serves as a wrapper function, so that all the prediction functions // can be unified and accessed as a pointer array. Note that the boundary // above and left are not necessarily used all the time. @@ -59,6 +79,15 @@ const TX_TYPE intra_mode_to_tx_type_lookup[INTRA_MODES] = { intra_pred_highbd_sized(type, 16) \ intra_pred_highbd_sized(type, 32) +#define intra_pred_no_4x4(type) \ + intra_pred_sized(type, 8) \ + intra_pred_sized(type, 16) \ + intra_pred_sized(type, 32) \ + intra_pred_highbd_sized(type, 4) \ + intra_pred_highbd_sized(type, 8) \ + intra_pred_highbd_sized(type, 16) \ + intra_pred_highbd_sized(type, 32) + #else #define intra_pred_allsizes(type) \ @@ -66,8 +95,17 @@ const TX_TYPE intra_mode_to_tx_type_lookup[INTRA_MODES] = { intra_pred_sized(type, 8) \ intra_pred_sized(type, 16) \ intra_pred_sized(type, 32) + +#define intra_pred_no_4x4(type) \ + intra_pred_sized(type, 8) \ + intra_pred_sized(type, 16) \ + intra_pred_sized(type, 32) #endif // CONFIG_VP9_HIGHBITDEPTH +#define DST(x, y) dst[(x) + (y) * stride] +#define AVG3(a, b, c) (((a) + 2 * (b) + (c) + 2) >> 2) +#define AVG2(a, b) (((a) + (b) + 1) >> 1) + #if CONFIG_VP9_HIGHBITDEPTH static INLINE void highbd_d207_predictor(uint16_t *dst, ptrdiff_t stride, int bs, const uint16_t *above, @@ -78,18 +116,16 @@ static INLINE void highbd_d207_predictor(uint16_t *dst, ptrdiff_t stride, // First column. for (r = 0; r < bs - 1; ++r) { - dst[r * stride] = ROUND_POWER_OF_TWO(left[r] + left[r + 1], 1); + dst[r * stride] = AVG2(left[r], left[r + 1]); } dst[(bs - 1) * stride] = left[bs - 1]; dst++; // Second column. for (r = 0; r < bs - 2; ++r) { - dst[r * stride] = ROUND_POWER_OF_TWO(left[r] + left[r + 1] * 2 + - left[r + 2], 2); + dst[r * stride] = AVG3(left[r], left[r + 1], left[r + 2]); } - dst[(bs - 2) * stride] = ROUND_POWER_OF_TWO(left[bs - 2] + - left[bs - 1] * 3, 2); + dst[(bs - 2) * stride] = AVG3(left[bs - 2], left[bs - 1], left[bs - 1]); dst[(bs - 1) * stride] = left[bs - 1]; dst++; @@ -111,11 +147,9 @@ static INLINE void highbd_d63_predictor(uint16_t *dst, ptrdiff_t stride, (void) bd; for (r = 0; r < bs; ++r) { for (c = 0; c < bs; ++c) { - dst[c] = r & 1 ? ROUND_POWER_OF_TWO(above[r/2 + c] + - above[r/2 + c + 1] * 2 + - above[r/2 + c + 2], 2) - : ROUND_POWER_OF_TWO(above[r/2 + c] + - above[r/2 + c + 1], 1); + dst[c] = r & 1 ? AVG3(above[(r >> 1) + c], above[(r >> 1) + c + 1], + above[(r >> 1) + c + 2]) + : AVG2(above[(r >> 1) + c], above[(r >> 1) + c + 1]); } dst += stride; } @@ -129,9 +163,8 @@ static INLINE void highbd_d45_predictor(uint16_t *dst, ptrdiff_t stride, int bs, (void) bd; for (r = 0; r < bs; ++r) { for (c = 0; c < bs; ++c) { - dst[c] = r + c + 2 < bs * 2 ? ROUND_POWER_OF_TWO(above[r + c] + - above[r + c + 1] * 2 + - above[r + c + 2], 2) + dst[c] = r + c + 2 < bs * 2 ? AVG3(above[r + c], above[r + c + 1], + above[r + c + 2]) : above[bs * 2 - 1]; } dst += stride; @@ -146,20 +179,19 @@ static INLINE void highbd_d117_predictor(uint16_t *dst, ptrdiff_t stride, // first row for (c = 0; c < bs; c++) - dst[c] = ROUND_POWER_OF_TWO(above[c - 1] + above[c], 1); + dst[c] = AVG2(above[c - 1], above[c]); dst += stride; // second row - dst[0] = ROUND_POWER_OF_TWO(left[0] + above[-1] * 2 + above[0], 2); + dst[0] = AVG3(left[0], above[-1], above[0]); for (c = 1; c < bs; c++) - dst[c] = ROUND_POWER_OF_TWO(above[c - 2] + above[c - 1] * 2 + above[c], 2); + dst[c] = AVG3(above[c - 2], above[c - 1], above[c]); dst += stride; // the rest of first col - dst[0] = ROUND_POWER_OF_TWO(above[-1] + left[0] * 2 + left[1], 2); + dst[0] = AVG3(above[-1], left[0], left[1]); for (r = 3; r < bs; ++r) - dst[(r - 2) * stride] = ROUND_POWER_OF_TWO(left[r - 3] + left[r - 2] * 2 + - left[r - 1], 2); + dst[(r - 2) * stride] = AVG3(left[r - 3], left[r - 2], left[r - 1]); // the rest of the block for (r = 2; r < bs; ++r) { @@ -174,14 +206,13 @@ static INLINE void highbd_d135_predictor(uint16_t *dst, ptrdiff_t stride, const uint16_t *left, int bd) { int r, c; (void) bd; - dst[0] = ROUND_POWER_OF_TWO(left[0] + above[-1] * 2 + above[0], 2); + dst[0] = AVG3(left[0], above[-1], above[0]); for (c = 1; c < bs; c++) - dst[c] = ROUND_POWER_OF_TWO(above[c - 2] + above[c - 1] * 2 + above[c], 2); + dst[c] = AVG3(above[c - 2], above[c - 1], above[c]); - dst[stride] = ROUND_POWER_OF_TWO(above[-1] + left[0] * 2 + left[1], 2); + dst[stride] = AVG3(above[-1], left[0], left[1]); for (r = 2; r < bs; ++r) - dst[r * stride] = ROUND_POWER_OF_TWO(left[r - 2] + left[r - 1] * 2 + - left[r], 2); + dst[r * stride] = AVG3(left[r - 2], left[r - 1], left[r]); dst += stride; for (r = 1; r < bs; ++r) { @@ -196,20 +227,19 @@ static INLINE void highbd_d153_predictor(uint16_t *dst, ptrdiff_t stride, const uint16_t *left, int bd) { int r, c; (void) bd; - dst[0] = ROUND_POWER_OF_TWO(above[-1] + left[0], 1); + dst[0] = AVG2(above[-1], left[0]); for (r = 1; r < bs; r++) - dst[r * stride] = ROUND_POWER_OF_TWO(left[r - 1] + left[r], 1); + dst[r * stride] = AVG2(left[r - 1], left[r]); dst++; - dst[0] = ROUND_POWER_OF_TWO(left[0] + above[-1] * 2 + above[0], 2); - dst[stride] = ROUND_POWER_OF_TWO(above[-1] + left[0] * 2 + left[1], 2); + dst[0] = AVG3(left[0], above[-1], above[0]); + dst[stride] = AVG3(above[-1], left[0], left[1]); for (r = 2; r < bs; r++) - dst[r * stride] = ROUND_POWER_OF_TWO(left[r - 2] + left[r - 1] * 2 + - left[r], 2); + dst[r * stride] = AVG3(left[r - 2], left[r - 1], left[r]); dst++; for (c = 0; c < bs - 2; c++) - dst[c] = ROUND_POWER_OF_TWO(above[c - 1] + above[c] * 2 + above[c + 1], 2); + dst[c] = AVG3(above[c - 1], above[c], above[c + 1]); dst += stride; for (r = 1; r < bs; ++r) { @@ -226,7 +256,7 @@ static INLINE void highbd_v_predictor(uint16_t *dst, ptrdiff_t stride, (void) left; (void) bd; for (r = 0; r < bs; r++) { - vpx_memcpy(dst, above, bs * sizeof(uint16_t)); + memcpy(dst, above, bs * sizeof(uint16_t)); dst += stride; } } @@ -325,22 +355,37 @@ static INLINE void highbd_dc_predictor(uint16_t *dst, ptrdiff_t stride, } #endif // CONFIG_VP9_HIGHBITDEPTH +void vp9_d207_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + const int I = left[0]; + const int J = left[1]; + const int K = left[2]; + const int L = left[3]; + (void)above; + DST(0, 0) = AVG2(I, J); + DST(2, 0) = DST(0, 1) = AVG2(J, K); + DST(2, 1) = DST(0, 2) = AVG2(K, L); + DST(1, 0) = AVG3(I, J, K); + DST(3, 0) = DST(1, 1) = AVG3(J, K, L); + DST(3, 1) = DST(1, 2) = AVG3(K, L, L); + DST(3, 2) = DST(2, 2) = + DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L; +} + static INLINE void d207_predictor(uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left) { int r, c; (void) above; // first column for (r = 0; r < bs - 1; ++r) - dst[r * stride] = ROUND_POWER_OF_TWO(left[r] + left[r + 1], 1); + dst[r * stride] = AVG2(left[r], left[r + 1]); dst[(bs - 1) * stride] = left[bs - 1]; dst++; // second column for (r = 0; r < bs - 2; ++r) - dst[r * stride] = ROUND_POWER_OF_TWO(left[r] + left[r + 1] * 2 + - left[r + 2], 2); - dst[(bs - 2) * stride] = ROUND_POWER_OF_TWO(left[bs - 2] + - left[bs - 1] * 3, 2); + dst[r * stride] = AVG3(left[r], left[r + 1], left[r + 2]); + dst[(bs - 2) * stride] = AVG3(left[bs - 2], left[bs - 1], left[bs - 1]); dst[(bs - 1) * stride] = left[bs - 1]; dst++; @@ -352,38 +397,110 @@ static INLINE void d207_predictor(uint8_t *dst, ptrdiff_t stride, int bs, for (c = 0; c < bs - 2; ++c) dst[r * stride + c] = dst[(r + 1) * stride + c - 2]; } -intra_pred_allsizes(d207) +intra_pred_no_4x4(d207) + +void vp9_d63_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + const int A = above[0]; + const int B = above[1]; + const int C = above[2]; + const int D = above[3]; + const int E = above[4]; + const int F = above[5]; + const int G = above[6]; + (void)left; + DST(0, 0) = AVG2(A, B); + DST(1, 0) = DST(0, 2) = AVG2(B, C); + DST(2, 0) = DST(1, 2) = AVG2(C, D); + DST(3, 0) = DST(2, 2) = AVG2(D, E); + DST(3, 2) = AVG2(E, F); // differs from vp8 + + DST(0, 1) = AVG3(A, B, C); + DST(1, 1) = DST(0, 3) = AVG3(B, C, D); + DST(2, 1) = DST(1, 3) = AVG3(C, D, E); + DST(3, 1) = DST(2, 3) = AVG3(D, E, F); + DST(3, 3) = AVG3(E, F, G); // differs from vp8 +} static INLINE void d63_predictor(uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left) { int r, c; - (void) left; - for (r = 0; r < bs; ++r) { - for (c = 0; c < bs; ++c) - dst[c] = r & 1 ? ROUND_POWER_OF_TWO(above[r/2 + c] + - above[r/2 + c + 1] * 2 + - above[r/2 + c + 2], 2) - : ROUND_POWER_OF_TWO(above[r/2 + c] + - above[r/2 + c + 1], 1); - dst += stride; + int size; + (void)left; + for (c = 0; c < bs; ++c) { + dst[c] = AVG2(above[c], above[c + 1]); + dst[stride + c] = AVG3(above[c], above[c + 1], above[c + 2]); + } + for (r = 2, size = bs - 2; r < bs; r += 2, --size) { + memcpy(dst + (r + 0) * stride, dst + (r >> 1), size); + memset(dst + (r + 0) * stride + size, above[bs - 1], bs - size); + memcpy(dst + (r + 1) * stride, dst + stride + (r >> 1), size); + memset(dst + (r + 1) * stride + size, above[bs - 1], bs - size); } } -intra_pred_allsizes(d63) +intra_pred_no_4x4(d63) + +void vp9_d45_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + const int A = above[0]; + const int B = above[1]; + const int C = above[2]; + const int D = above[3]; + const int E = above[4]; + const int F = above[5]; + const int G = above[6]; + const int H = above[7]; + (void)stride; + (void)left; + DST(0, 0) = AVG3(A, B, C); + DST(1, 0) = DST(0, 1) = AVG3(B, C, D); + DST(2, 0) = DST(1, 1) = DST(0, 2) = AVG3(C, D, E); + DST(3, 0) = DST(2, 1) = DST(1, 2) = DST(0, 3) = AVG3(D, E, F); + DST(3, 1) = DST(2, 2) = DST(1, 3) = AVG3(E, F, G); + DST(3, 2) = DST(2, 3) = AVG3(F, G, H); + DST(3, 3) = H; // differs from vp8 +} static INLINE void d45_predictor(uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left) { - int r, c; - (void) left; - for (r = 0; r < bs; ++r) { - for (c = 0; c < bs; ++c) - dst[c] = r + c + 2 < bs * 2 ? ROUND_POWER_OF_TWO(above[r + c] + - above[r + c + 1] * 2 + - above[r + c + 2], 2) - : above[bs * 2 - 1]; + const uint8_t above_right = above[bs - 1]; + int x, size; + uint8_t avg[31]; // TODO(jzern): this could be block size specific + (void)left; + + for (x = 0; x < bs - 1; ++x) { + avg[x] = AVG3(above[x], above[x + 1], above[x + 2]); + } + for (x = 0, size = bs - 1; x < bs; ++x, --size) { + memcpy(dst, avg + x, size); + memset(dst + size, above_right, x + 1); dst += stride; } } -intra_pred_allsizes(d45) +intra_pred_no_4x4(d45) + +void vp9_d117_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + const int I = left[0]; + const int J = left[1]; + const int K = left[2]; + const int X = above[-1]; + const int A = above[0]; + const int B = above[1]; + const int C = above[2]; + const int D = above[3]; + DST(0, 0) = DST(1, 2) = AVG2(X, A); + DST(1, 0) = DST(2, 2) = AVG2(A, B); + DST(2, 0) = DST(3, 2) = AVG2(B, C); + DST(3, 0) = AVG2(C, D); + + DST(0, 3) = AVG3(K, J, I); + DST(0, 2) = AVG3(J, I, X); + DST(0, 1) = DST(1, 3) = AVG3(I, X, A); + DST(1, 1) = DST(2, 3) = AVG3(X, A, B); + DST(2, 1) = DST(3, 3) = AVG3(A, B, C); + DST(3, 1) = AVG3(B, C, D); +} static INLINE void d117_predictor(uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left) { @@ -391,20 +508,19 @@ static INLINE void d117_predictor(uint8_t *dst, ptrdiff_t stride, int bs, // first row for (c = 0; c < bs; c++) - dst[c] = ROUND_POWER_OF_TWO(above[c - 1] + above[c], 1); + dst[c] = AVG2(above[c - 1], above[c]); dst += stride; // second row - dst[0] = ROUND_POWER_OF_TWO(left[0] + above[-1] * 2 + above[0], 2); + dst[0] = AVG3(left[0], above[-1], above[0]); for (c = 1; c < bs; c++) - dst[c] = ROUND_POWER_OF_TWO(above[c - 2] + above[c - 1] * 2 + above[c], 2); + dst[c] = AVG3(above[c - 2], above[c - 1], above[c]); dst += stride; // the rest of first col - dst[0] = ROUND_POWER_OF_TWO(above[-1] + left[0] * 2 + left[1], 2); + dst[0] = AVG3(above[-1], left[0], left[1]); for (r = 3; r < bs; ++r) - dst[(r - 2) * stride] = ROUND_POWER_OF_TWO(left[r - 3] + left[r - 2] * 2 + - left[r - 1], 2); + dst[(r - 2) * stride] = AVG3(left[r - 3], left[r - 2], left[r - 1]); // the rest of the block for (r = 2; r < bs; ++r) { @@ -413,19 +529,39 @@ static INLINE void d117_predictor(uint8_t *dst, ptrdiff_t stride, int bs, dst += stride; } } -intra_pred_allsizes(d117) +intra_pred_no_4x4(d117) + +void vp9_d135_predictor_4x4(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + const int I = left[0]; + const int J = left[1]; + const int K = left[2]; + const int L = left[3]; + const int X = above[-1]; + const int A = above[0]; + const int B = above[1]; + const int C = above[2]; + const int D = above[3]; + (void)stride; + DST(0, 3) = AVG3(J, K, L); + DST(1, 3) = DST(0, 2) = AVG3(I, J, K); + DST(2, 3) = DST(1, 2) = DST(0, 1) = AVG3(X, I, J); + DST(3, 3) = DST(2, 2) = DST(1, 1) = DST(0, 0) = AVG3(A, X, I); + DST(3, 2) = DST(2, 1) = DST(1, 0) = AVG3(B, A, X); + DST(3, 1) = DST(2, 0) = AVG3(C, B, A); + DST(3, 0) = AVG3(D, C, B); +} static INLINE void d135_predictor(uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left) { int r, c; - dst[0] = ROUND_POWER_OF_TWO(left[0] + above[-1] * 2 + above[0], 2); + dst[0] = AVG3(left[0], above[-1], above[0]); for (c = 1; c < bs; c++) - dst[c] = ROUND_POWER_OF_TWO(above[c - 2] + above[c - 1] * 2 + above[c], 2); + dst[c] = AVG3(above[c - 2], above[c - 1], above[c]); - dst[stride] = ROUND_POWER_OF_TWO(above[-1] + left[0] * 2 + left[1], 2); + dst[stride] = AVG3(above[-1], left[0], left[1]); for (r = 2; r < bs; ++r) - dst[r * stride] = ROUND_POWER_OF_TWO(left[r - 2] + left[r - 1] * 2 + - left[r], 2); + dst[r * stride] = AVG3(left[r - 2], left[r - 1], left[r]); dst += stride; for (r = 1; r < bs; ++r) { @@ -434,25 +570,48 @@ static INLINE void d135_predictor(uint8_t *dst, ptrdiff_t stride, int bs, dst += stride; } } -intra_pred_allsizes(d135) +intra_pred_no_4x4(d135) + +void vp9_d153_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + const int I = left[0]; + const int J = left[1]; + const int K = left[2]; + const int L = left[3]; + const int X = above[-1]; + const int A = above[0]; + const int B = above[1]; + const int C = above[2]; + + DST(0, 0) = DST(2, 1) = AVG2(I, X); + DST(0, 1) = DST(2, 2) = AVG2(J, I); + DST(0, 2) = DST(2, 3) = AVG2(K, J); + DST(0, 3) = AVG2(L, K); + + DST(3, 0) = AVG3(A, B, C); + DST(2, 0) = AVG3(X, A, B); + DST(1, 0) = DST(3, 1) = AVG3(I, X, A); + DST(1, 1) = DST(3, 2) = AVG3(J, I, X); + DST(1, 2) = DST(3, 3) = AVG3(K, J, I); + DST(1, 3) = AVG3(L, K, J); +} static INLINE void d153_predictor(uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left) { int r, c; - dst[0] = ROUND_POWER_OF_TWO(above[-1] + left[0], 1); + dst[0] = AVG2(above[-1], left[0]); for (r = 1; r < bs; r++) - dst[r * stride] = ROUND_POWER_OF_TWO(left[r - 1] + left[r], 1); + dst[r * stride] = AVG2(left[r - 1], left[r]); dst++; - dst[0] = ROUND_POWER_OF_TWO(left[0] + above[-1] * 2 + above[0], 2); - dst[stride] = ROUND_POWER_OF_TWO(above[-1] + left[0] * 2 + left[1], 2); + dst[0] = AVG3(left[0], above[-1], above[0]); + dst[stride] = AVG3(above[-1], left[0], left[1]); for (r = 2; r < bs; r++) - dst[r * stride] = ROUND_POWER_OF_TWO(left[r - 2] + left[r - 1] * 2 + - left[r], 2); + dst[r * stride] = AVG3(left[r - 2], left[r - 1], left[r]); dst++; for (c = 0; c < bs - 2; c++) - dst[c] = ROUND_POWER_OF_TWO(above[c - 1] + above[c] * 2 + above[c + 1], 2); + dst[c] = AVG3(above[c - 1], above[c], above[c + 1]); dst += stride; for (r = 1; r < bs; ++r) { @@ -461,7 +620,7 @@ static INLINE void d153_predictor(uint8_t *dst, ptrdiff_t stride, int bs, dst += stride; } } -intra_pred_allsizes(d153) +intra_pred_no_4x4(d153) static INLINE void v_predictor(uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left) { @@ -469,7 +628,7 @@ static INLINE void v_predictor(uint8_t *dst, ptrdiff_t stride, int bs, (void) left; for (r = 0; r < bs; r++) { - vpx_memcpy(dst, above, bs); + memcpy(dst, above, bs); dst += stride; } } @@ -481,7 +640,7 @@ static INLINE void h_predictor(uint8_t *dst, ptrdiff_t stride, int bs, (void) above; for (r = 0; r < bs; r++) { - vpx_memset(dst, left[r], bs); + memset(dst, left[r], bs); dst += stride; } } @@ -507,7 +666,7 @@ static INLINE void dc_128_predictor(uint8_t *dst, ptrdiff_t stride, int bs, (void) left; for (r = 0; r < bs; r++) { - vpx_memset(dst, 128, bs); + memset(dst, 128, bs); dst += stride; } } @@ -524,7 +683,7 @@ static INLINE void dc_left_predictor(uint8_t *dst, ptrdiff_t stride, int bs, expected_dc = (sum + (bs >> 1)) / bs; for (r = 0; r < bs; r++) { - vpx_memset(dst, expected_dc, bs); + memset(dst, expected_dc, bs); dst += stride; } } @@ -540,7 +699,7 @@ static INLINE void dc_top_predictor(uint8_t *dst, ptrdiff_t stride, int bs, expected_dc = (sum + (bs >> 1)) / bs; for (r = 0; r < bs; r++) { - vpx_memset(dst, expected_dc, bs); + memset(dst, expected_dc, bs); dst += stride; } } @@ -559,7 +718,7 @@ static INLINE void dc_predictor(uint8_t *dst, ptrdiff_t stride, int bs, expected_dc = (sum + (count >> 1)) / count; for (r = 0; r < bs; r++) { - vpx_memset(dst, expected_dc, bs); + memset(dst, expected_dc, bs); dst += stride; } } @@ -638,8 +797,8 @@ static void build_intra_predictors_high(const MACROBLOCKD *xd, int i; uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); - DECLARE_ALIGNED_ARRAY(16, uint16_t, left_col, 64); - DECLARE_ALIGNED_ARRAY(16, uint16_t, above_data, 128 + 16); + DECLARE_ALIGNED(16, uint16_t, left_col[32]); + DECLARE_ALIGNED(16, uint16_t, above_data[64 + 16]); uint16_t *above_row = above_data + 16; const uint16_t *const_above_row = above_row; const int bs = 4 << tx_size; @@ -699,24 +858,24 @@ static void build_intra_predictors_high(const MACROBLOCKD *xd, /* slower path if the block needs border extension */ if (x0 + 2 * bs <= frame_width) { if (right_available && bs == 4) { - vpx_memcpy(above_row, above_ref, 2 * bs * sizeof(uint16_t)); + memcpy(above_row, above_ref, 2 * bs * sizeof(uint16_t)); } else { - vpx_memcpy(above_row, above_ref, bs * sizeof(uint16_t)); + memcpy(above_row, above_ref, bs * sizeof(uint16_t)); vpx_memset16(above_row + bs, above_row[bs - 1], bs); } } else if (x0 + bs <= frame_width) { const int r = frame_width - x0; if (right_available && bs == 4) { - vpx_memcpy(above_row, above_ref, r * sizeof(uint16_t)); + memcpy(above_row, above_ref, r * sizeof(uint16_t)); vpx_memset16(above_row + r, above_row[r - 1], x0 + 2 * bs - frame_width); } else { - vpx_memcpy(above_row, above_ref, bs * sizeof(uint16_t)); + memcpy(above_row, above_ref, bs * sizeof(uint16_t)); vpx_memset16(above_row + bs, above_row[bs - 1], bs); } } else if (x0 <= frame_width) { const int r = frame_width - x0; - vpx_memcpy(above_row, above_ref, r * sizeof(uint16_t)); + memcpy(above_row, above_ref, r * sizeof(uint16_t)); vpx_memset16(above_row + r, above_row[r - 1], x0 + 2 * bs - frame_width); } @@ -727,9 +886,9 @@ static void build_intra_predictors_high(const MACROBLOCKD *xd, if (bs == 4 && right_available && left_available) { const_above_row = above_ref; } else { - vpx_memcpy(above_row, above_ref, bs * sizeof(uint16_t)); + memcpy(above_row, above_ref, bs * sizeof(uint16_t)); if (bs == 4 && right_available) - vpx_memcpy(above_row + bs, above_ref + bs, bs * sizeof(uint16_t)); + memcpy(above_row + bs, above_ref + bs, bs * sizeof(uint16_t)); else vpx_memset16(above_row + bs, above_row[bs - 1], bs); // TODO(Peter): this value should probably change for high bitdepth @@ -761,8 +920,8 @@ static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref, int right_available, int x, int y, int plane) { int i; - DECLARE_ALIGNED_ARRAY(16, uint8_t, left_col, 64); - DECLARE_ALIGNED_ARRAY(16, uint8_t, above_data, 128 + 16); + DECLARE_ALIGNED(16, uint8_t, left_col[32]); + DECLARE_ALIGNED(16, uint8_t, above_data[64 + 16]); uint8_t *above_row = above_data + 16; const uint8_t *const_above_row = above_row; const int bs = 4 << tx_size; @@ -790,75 +949,103 @@ static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref, x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x; y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y; - vpx_memset(left_col, 129, 64); - - // left - if (left_available) { - if (xd->mb_to_bottom_edge < 0) { - /* slower path if the block needs border extension */ - if (y0 + bs <= frame_height) { + // NEED_LEFT + if (extend_modes[mode] & NEED_LEFT) { + if (left_available) { + if (xd->mb_to_bottom_edge < 0) { + /* slower path if the block needs border extension */ + if (y0 + bs <= frame_height) { + for (i = 0; i < bs; ++i) + left_col[i] = ref[i * ref_stride - 1]; + } else { + const int extend_bottom = frame_height - y0; + for (i = 0; i < extend_bottom; ++i) + left_col[i] = ref[i * ref_stride - 1]; + for (; i < bs; ++i) + left_col[i] = ref[(extend_bottom - 1) * ref_stride - 1]; + } + } else { + /* faster path if the block does not need extension */ for (i = 0; i < bs; ++i) left_col[i] = ref[i * ref_stride - 1]; - } else { - const int extend_bottom = frame_height - y0; - for (i = 0; i < extend_bottom; ++i) - left_col[i] = ref[i * ref_stride - 1]; - for (; i < bs; ++i) - left_col[i] = ref[(extend_bottom - 1) * ref_stride - 1]; } } else { - /* faster path if the block does not need extension */ - for (i = 0; i < bs; ++i) - left_col[i] = ref[i * ref_stride - 1]; + memset(left_col, 129, bs); } } - // TODO(hkuang) do not extend 2*bs pixels for all modes. - // above - if (up_available) { - const uint8_t *above_ref = ref - ref_stride; - if (xd->mb_to_right_edge < 0) { - /* slower path if the block needs border extension */ - if (x0 + 2 * bs <= frame_width) { - if (right_available && bs == 4) { - vpx_memcpy(above_row, above_ref, 2 * bs); - } else { - vpx_memcpy(above_row, above_ref, bs); - vpx_memset(above_row + bs, above_row[bs - 1], bs); + // NEED_ABOVE + if (extend_modes[mode] & NEED_ABOVE) { + if (up_available) { + const uint8_t *above_ref = ref - ref_stride; + if (xd->mb_to_right_edge < 0) { + /* slower path if the block needs border extension */ + if (x0 + bs <= frame_width) { + memcpy(above_row, above_ref, bs); + } else if (x0 <= frame_width) { + const int r = frame_width - x0; + memcpy(above_row, above_ref, r); + memset(above_row + r, above_row[r - 1], x0 + bs - frame_width); } - } else if (x0 + bs <= frame_width) { - const int r = frame_width - x0; - if (right_available && bs == 4) { - vpx_memcpy(above_row, above_ref, r); - vpx_memset(above_row + r, above_row[r - 1], - x0 + 2 * bs - frame_width); + } else { + /* faster path if the block does not need extension */ + if (bs == 4 && right_available && left_available) { + const_above_row = above_ref; } else { - vpx_memcpy(above_row, above_ref, bs); - vpx_memset(above_row + bs, above_row[bs - 1], bs); + memcpy(above_row, above_ref, bs); } - } else if (x0 <= frame_width) { - const int r = frame_width - x0; - vpx_memcpy(above_row, above_ref, r); - vpx_memset(above_row + r, above_row[r - 1], - x0 + 2 * bs - frame_width); } above_row[-1] = left_available ? above_ref[-1] : 129; } else { - /* faster path if the block does not need extension */ - if (bs == 4 && right_available && left_available) { - const_above_row = above_ref; - } else { - vpx_memcpy(above_row, above_ref, bs); - if (bs == 4 && right_available) - vpx_memcpy(above_row + bs, above_ref + bs, bs); - else - vpx_memset(above_row + bs, above_row[bs - 1], bs); - above_row[-1] = left_available ? above_ref[-1] : 129; - } + memset(above_row, 127, bs); + above_row[-1] = 127; + } + } + + // NEED_ABOVERIGHT + if (extend_modes[mode] & NEED_ABOVERIGHT) { + if (up_available) { + const uint8_t *above_ref = ref - ref_stride; + if (xd->mb_to_right_edge < 0) { + /* slower path if the block needs border extension */ + if (x0 + 2 * bs <= frame_width) { + if (right_available && bs == 4) { + memcpy(above_row, above_ref, 2 * bs); + } else { + memcpy(above_row, above_ref, bs); + memset(above_row + bs, above_row[bs - 1], bs); + } + } else if (x0 + bs <= frame_width) { + const int r = frame_width - x0; + if (right_available && bs == 4) { + memcpy(above_row, above_ref, r); + memset(above_row + r, above_row[r - 1], x0 + 2 * bs - frame_width); + } else { + memcpy(above_row, above_ref, bs); + memset(above_row + bs, above_row[bs - 1], bs); + } + } else if (x0 <= frame_width) { + const int r = frame_width - x0; + memcpy(above_row, above_ref, r); + memset(above_row + r, above_row[r - 1], x0 + 2 * bs - frame_width); + } + } else { + /* faster path if the block does not need extension */ + if (bs == 4 && right_available && left_available) { + const_above_row = above_ref; + } else { + memcpy(above_row, above_ref, bs); + if (bs == 4 && right_available) + memcpy(above_row + bs, above_ref + bs, bs); + else + memset(above_row + bs, above_row[bs - 1], bs); + } + } + above_row[-1] = left_available ? above_ref[-1] : 129; + } else { + memset(above_row, 127, bs * 2); + above_row[-1] = 127; } - } else { - vpx_memset(above_row, 127, bs * 2); - above_row[-1] = 127; } // predict @@ -896,6 +1083,6 @@ void vp9_predict_intra_block(const MACROBLOCKD *xd, int block_idx, int bwl_in, have_top, have_left, have_right, x, y, plane); } -void vp9_init_intra_predictors() { +void vp9_init_intra_predictors(void) { once(vp9_init_intra_predictors_internal); } diff --git a/media/libvpx/vp9/common/vp9_reconintra.h b/media/libvpx/vp9/common/vp9_reconintra.h index 845f3bcaac7..da5e435b132 100644 --- a/media/libvpx/vp9/common/vp9_reconintra.h +++ b/media/libvpx/vp9/common/vp9_reconintra.h @@ -18,7 +18,7 @@ extern "C" { #endif -void vp9_init_intra_predictors(); +void vp9_init_intra_predictors(void); void vp9_predict_intra_block(const MACROBLOCKD *xd, int block_idx, int bwl_in, TX_SIZE tx_size, PREDICTION_MODE mode, diff --git a/media/libvpx/vp9/common/vp9_rtcd.c b/media/libvpx/vp9/common/vp9_rtcd.c index c777bc81f26..2dfa09f50e0 100644 --- a/media/libvpx/vp9/common/vp9_rtcd.c +++ b/media/libvpx/vp9/common/vp9_rtcd.c @@ -12,10 +12,7 @@ #include "./vp9_rtcd.h" #include "vpx_ports/vpx_once.h" -void vpx_scale_rtcd(void); - void vp9_rtcd() { - vpx_scale_rtcd(); // TODO(JBB): Remove this once, by insuring that both the encoder and // decoder setup functions are protected by once(); once(setup_rtcd_internal); diff --git a/media/libvpx/vp9/common/vp9_scan.h b/media/libvpx/vp9/common/vp9_scan.h index 65e2aa69a54..1d86b5cfe2c 100644 --- a/media/libvpx/vp9/common/vp9_scan.h +++ b/media/libvpx/vp9/common/vp9_scan.h @@ -38,6 +38,18 @@ static INLINE int get_coef_context(const int16_t *neighbors, token_cache[neighbors[MAX_NEIGHBORS * c + 1]]) >> 1; } +static INLINE const scan_order *get_scan(const MACROBLOCKD *xd, TX_SIZE tx_size, + PLANE_TYPE type, int block_idx) { + const MODE_INFO *const mi = xd->mi[0]; + + if (is_inter_block(&mi->mbmi) || type != PLANE_TYPE_Y || xd->lossless) { + return &vp9_default_scan_orders[tx_size]; + } else { + const PREDICTION_MODE mode = get_y_mode(mi, block_idx); + return &vp9_scan_orders[tx_size][intra_mode_to_tx_type_lookup[mode]]; + } +} + #ifdef __cplusplus } // extern "C" #endif diff --git a/media/libvpx/vp9/common/vp9_systemdependent.h b/media/libvpx/vp9/common/vp9_systemdependent.h index fba9283899b..fc77762def5 100644 --- a/media/libvpx/vp9/common/vp9_systemdependent.h +++ b/media/libvpx/vp9/common/vp9_systemdependent.h @@ -11,14 +11,13 @@ #ifndef VP9_COMMON_VP9_SYSTEMDEPENDENT_H_ #define VP9_COMMON_VP9_SYSTEMDEPENDENT_H_ +#include "vpx_ports/msvc.h" + #ifdef _MSC_VER # include // the ceil() definition must precede intrin.h # if _MSC_VER > 1310 && (defined(_M_X64) || defined(_M_IX86)) # include -# define USE_MSC_INTRIN -# endif -# if _MSC_VER < 1900 -# define snprintf _snprintf +# define USE_MSC_INTRINSICS # endif #endif @@ -50,7 +49,7 @@ static INLINE int round(double x) { static INLINE int get_msb(unsigned int n) { return 31 ^ __builtin_clz(n); } -#elif defined(USE_MSC_INTRIN) +#elif defined(USE_MSC_INTRINSICS) #pragma intrinsic(_BitScanReverse) static INLINE int get_msb(unsigned int n) { @@ -58,7 +57,7 @@ static INLINE int get_msb(unsigned int n) { _BitScanReverse(&first_set_bit, n); return first_set_bit; } -#undef USE_MSC_INTRIN +#undef USE_MSC_INTRINSICS #else // Returns (int)floor(log2(n)). n must be > 0. static INLINE int get_msb(unsigned int n) { diff --git a/media/libvpx/vp9/common/vp9_thread_common.c b/media/libvpx/vp9/common/vp9_thread_common.c index 7a3871c4123..cba57ff41aa 100644 --- a/media/libvpx/vp9/common/vp9_thread_common.c +++ b/media/libvpx/vp9/common/vp9_thread_common.c @@ -13,6 +13,7 @@ #include "vp9/common/vp9_entropymode.h" #include "vp9/common/vp9_thread_common.h" #include "vp9/common/vp9_reconinter.h" +#include "vp9/common/vp9_loopfilter.h" #if CONFIG_MULTITHREAD static INLINE void mutex_lock(pthread_mutex_t *const mutex) { @@ -92,14 +93,21 @@ void thread_loop_filter_rows(const YV12_BUFFER_CONFIG *const frame_buffer, int start, int stop, int y_only, VP9LfSync *const lf_sync) { const int num_planes = y_only ? 1 : MAX_MB_PLANE; - const int use_420 = y_only || (planes[1].subsampling_y == 1 && - planes[1].subsampling_x == 1); const int sb_cols = mi_cols_aligned_to_sb(cm->mi_cols) >> MI_BLOCK_SIZE_LOG2; int mi_row, mi_col; + enum lf_path path; + if (y_only) + path = LF_PATH_444; + else if (planes[1].subsampling_y == 1 && planes[1].subsampling_x == 1) + path = LF_PATH_420; + else if (planes[1].subsampling_y == 0 && planes[1].subsampling_x == 0) + path = LF_PATH_444; + else + path = LF_PATH_SLOW; for (mi_row = start; mi_row < stop; mi_row += lf_sync->num_workers * MI_BLOCK_SIZE) { - MODE_INFO *const mi = cm->mi + mi_row * cm->mi_stride; + MODE_INFO **const mi = cm->mi_grid_visible + mi_row * cm->mi_stride; for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) { const int r = mi_row >> MI_BLOCK_SIZE_LOG2; @@ -112,16 +120,23 @@ void thread_loop_filter_rows(const YV12_BUFFER_CONFIG *const frame_buffer, vp9_setup_dst_planes(planes, frame_buffer, mi_row, mi_col); // TODO(JBB): Make setup_mask work for non 420. - if (use_420) - vp9_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride, - &lfm); + vp9_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride, + &lfm); - for (plane = 0; plane < num_planes; ++plane) { - if (use_420) - vp9_filter_block_plane(cm, &planes[plane], mi_row, &lfm); - else - vp9_filter_block_plane_non420(cm, &planes[plane], mi + mi_col, - mi_row, mi_col); + vp9_filter_block_plane_ss00(cm, &planes[0], mi_row, &lfm); + for (plane = 1; plane < num_planes; ++plane) { + switch (path) { + case LF_PATH_420: + vp9_filter_block_plane_ss11(cm, &planes[plane], mi_row, &lfm); + break; + case LF_PATH_444: + vp9_filter_block_plane_ss00(cm, &planes[plane], mi_row, &lfm); + break; + case LF_PATH_SLOW: + vp9_filter_block_plane_non420(cm, &planes[plane], mi + mi_col, + mi_row, mi_col); + break; + } } sync_write(lf_sync, r, c, sb_cols); @@ -160,7 +175,7 @@ static void loop_filter_rows_mt(YV12_BUFFER_CONFIG *frame, } // Initialize cur_sb_col to -1 for all SB rows. - vpx_memset(lf_sync->cur_sb_col, -1, sizeof(*lf_sync->cur_sb_col) * sb_rows); + memset(lf_sync->cur_sb_col, -1, sizeof(*lf_sync->cur_sb_col) * sb_rows); // Set up loopfilter thread data. // The decoder is capping num_workers because it has been observed that using diff --git a/media/libvpx/vp9/common/x86/convolve.h b/media/libvpx/vp9/common/x86/convolve.h new file mode 100644 index 00000000000..de2df47e5e5 --- /dev/null +++ b/media/libvpx/vp9/common/x86/convolve.h @@ -0,0 +1,296 @@ +/* + * Copyright (c) 2015 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#ifndef VP9_COMMON_X86_CONVOLVE_H_ +#define VP9_COMMON_X86_CONVOLVE_H_ + +#include + +#include "./vpx_config.h" +#include "vpx/vpx_integer.h" +#include "vpx_ports/mem.h" + +typedef void filter8_1dfunction ( + const uint8_t *src_ptr, + ptrdiff_t src_pitch, + uint8_t *output_ptr, + ptrdiff_t out_pitch, + uint32_t output_height, + const int16_t *filter +); + +#define FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \ + void vp9_convolve8_##name##_##opt(const uint8_t *src, ptrdiff_t src_stride, \ + uint8_t *dst, ptrdiff_t dst_stride, \ + const int16_t *filter_x, int x_step_q4, \ + const int16_t *filter_y, int y_step_q4, \ + int w, int h) { \ + if (step_q4 == 16 && filter[3] != 128) { \ + if (filter[0] || filter[1] || filter[2]) { \ + while (w >= 16) { \ + vp9_filter_block1d16_##dir##8_##avg##opt(src_start, \ + src_stride, \ + dst, \ + dst_stride, \ + h, \ + filter); \ + src += 16; \ + dst += 16; \ + w -= 16; \ + } \ + while (w >= 8) { \ + vp9_filter_block1d8_##dir##8_##avg##opt(src_start, \ + src_stride, \ + dst, \ + dst_stride, \ + h, \ + filter); \ + src += 8; \ + dst += 8; \ + w -= 8; \ + } \ + while (w >= 4) { \ + vp9_filter_block1d4_##dir##8_##avg##opt(src_start, \ + src_stride, \ + dst, \ + dst_stride, \ + h, \ + filter); \ + src += 4; \ + dst += 4; \ + w -= 4; \ + } \ + } else { \ + while (w >= 16) { \ + vp9_filter_block1d16_##dir##2_##avg##opt(src, \ + src_stride, \ + dst, \ + dst_stride, \ + h, \ + filter); \ + src += 16; \ + dst += 16; \ + w -= 16; \ + } \ + while (w >= 8) { \ + vp9_filter_block1d8_##dir##2_##avg##opt(src, \ + src_stride, \ + dst, \ + dst_stride, \ + h, \ + filter); \ + src += 8; \ + dst += 8; \ + w -= 8; \ + } \ + while (w >= 4) { \ + vp9_filter_block1d4_##dir##2_##avg##opt(src, \ + src_stride, \ + dst, \ + dst_stride, \ + h, \ + filter); \ + src += 4; \ + dst += 4; \ + w -= 4; \ + } \ + } \ + } \ + if (w) { \ + vp9_convolve8_##name##_c(src, src_stride, dst, dst_stride, \ + filter_x, x_step_q4, filter_y, y_step_q4, \ + w, h); \ + } \ +} + +#define FUN_CONV_2D(avg, opt) \ +void vp9_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \ + uint8_t *dst, ptrdiff_t dst_stride, \ + const int16_t *filter_x, int x_step_q4, \ + const int16_t *filter_y, int y_step_q4, \ + int w, int h) { \ + assert(w <= 64); \ + assert(h <= 64); \ + if (x_step_q4 == 16 && y_step_q4 == 16) { \ + if (filter_x[0] || filter_x[1] || filter_x[2] || filter_x[3] == 128 || \ + filter_y[0] || filter_y[1] || filter_y[2] || filter_y[3] == 128) { \ + DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 71]); \ + vp9_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, fdata2, 64, \ + filter_x, x_step_q4, filter_y, y_step_q4, \ + w, h + 7); \ + vp9_convolve8_##avg##vert_##opt(fdata2 + 3 * 64, 64, dst, dst_stride, \ + filter_x, x_step_q4, filter_y, \ + y_step_q4, w, h); \ + } else { \ + DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 65]); \ + vp9_convolve8_horiz_##opt(src, src_stride, fdata2, 64, \ + filter_x, x_step_q4, filter_y, y_step_q4, \ + w, h + 1); \ + vp9_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride, \ + filter_x, x_step_q4, filter_y, \ + y_step_q4, w, h); \ + } \ + } else { \ + vp9_convolve8_##avg##c(src, src_stride, dst, dst_stride, \ + filter_x, x_step_q4, filter_y, y_step_q4, w, h); \ + } \ +} + +#if CONFIG_VP9_HIGHBITDEPTH + +typedef void highbd_filter8_1dfunction ( + const uint16_t *src_ptr, + const ptrdiff_t src_pitch, + uint16_t *output_ptr, + ptrdiff_t out_pitch, + unsigned int output_height, + const int16_t *filter, + int bd +); + +#define HIGH_FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \ + void vp9_highbd_convolve8_##name##_##opt(const uint8_t *src8, \ + ptrdiff_t src_stride, \ + uint8_t *dst8, \ + ptrdiff_t dst_stride, \ + const int16_t *filter_x, \ + int x_step_q4, \ + const int16_t *filter_y, \ + int y_step_q4, \ + int w, int h, int bd) { \ + if (step_q4 == 16 && filter[3] != 128) { \ + uint16_t *src = CONVERT_TO_SHORTPTR(src8); \ + uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); \ + if (filter[0] || filter[1] || filter[2]) { \ + while (w >= 16) { \ + vp9_highbd_filter_block1d16_##dir##8_##avg##opt(src_start, \ + src_stride, \ + dst, \ + dst_stride, \ + h, \ + filter, \ + bd); \ + src += 16; \ + dst += 16; \ + w -= 16; \ + } \ + while (w >= 8) { \ + vp9_highbd_filter_block1d8_##dir##8_##avg##opt(src_start, \ + src_stride, \ + dst, \ + dst_stride, \ + h, \ + filter, \ + bd); \ + src += 8; \ + dst += 8; \ + w -= 8; \ + } \ + while (w >= 4) { \ + vp9_highbd_filter_block1d4_##dir##8_##avg##opt(src_start, \ + src_stride, \ + dst, \ + dst_stride, \ + h, \ + filter, \ + bd); \ + src += 4; \ + dst += 4; \ + w -= 4; \ + } \ + } else { \ + while (w >= 16) { \ + vp9_highbd_filter_block1d16_##dir##2_##avg##opt(src, \ + src_stride, \ + dst, \ + dst_stride, \ + h, \ + filter, \ + bd); \ + src += 16; \ + dst += 16; \ + w -= 16; \ + } \ + while (w >= 8) { \ + vp9_highbd_filter_block1d8_##dir##2_##avg##opt(src, \ + src_stride, \ + dst, \ + dst_stride, \ + h, \ + filter, \ + bd); \ + src += 8; \ + dst += 8; \ + w -= 8; \ + } \ + while (w >= 4) { \ + vp9_highbd_filter_block1d4_##dir##2_##avg##opt(src, \ + src_stride, \ + dst, \ + dst_stride, \ + h, \ + filter, \ + bd); \ + src += 4; \ + dst += 4; \ + w -= 4; \ + } \ + } \ + } \ + if (w) { \ + vp9_highbd_convolve8_##name##_c(src8, src_stride, dst8, dst_stride, \ + filter_x, x_step_q4, filter_y, y_step_q4, \ + w, h, bd); \ + } \ +} + +#define HIGH_FUN_CONV_2D(avg, opt) \ +void vp9_highbd_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \ + uint8_t *dst, ptrdiff_t dst_stride, \ + const int16_t *filter_x, int x_step_q4, \ + const int16_t *filter_y, int y_step_q4, \ + int w, int h, int bd) { \ + assert(w <= 64); \ + assert(h <= 64); \ + if (x_step_q4 == 16 && y_step_q4 == 16) { \ + if (filter_x[0] || filter_x[1] || filter_x[2] || filter_x[3] == 128 || \ + filter_y[0] || filter_y[1] || filter_y[2] || filter_y[3] == 128) { \ + DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 71]); \ + vp9_highbd_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, \ + CONVERT_TO_BYTEPTR(fdata2), 64, \ + filter_x, x_step_q4, \ + filter_y, y_step_q4, \ + w, h + 7, bd); \ + vp9_highbd_convolve8_##avg##vert_##opt(CONVERT_TO_BYTEPTR(fdata2) + 192, \ + 64, dst, dst_stride, \ + filter_x, x_step_q4, \ + filter_y, y_step_q4, \ + w, h, bd); \ + } else { \ + DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 65]); \ + vp9_highbd_convolve8_horiz_##opt(src, src_stride, \ + CONVERT_TO_BYTEPTR(fdata2), 64, \ + filter_x, x_step_q4, \ + filter_y, y_step_q4, \ + w, h + 1, bd); \ + vp9_highbd_convolve8_##avg##vert_##opt(CONVERT_TO_BYTEPTR(fdata2), 64, \ + dst, dst_stride, \ + filter_x, x_step_q4, \ + filter_y, y_step_q4, \ + w, h, bd); \ + } \ + } else { \ + vp9_highbd_convolve8_##avg##c(src, src_stride, dst, dst_stride, \ + filter_x, x_step_q4, filter_y, y_step_q4, w, \ + h, bd); \ + } \ +} +#endif // CONFIG_VP9_HIGHBITDEPTH + +#endif // VP9_COMMON_X86_CONVOLVE_H_ diff --git a/media/libvpx/vp9/common/x86/vp9_asm_stubs.c b/media/libvpx/vp9/common/x86/vp9_asm_stubs.c index a0a599691c8..fd55fb8c664 100644 --- a/media/libvpx/vp9/common/x86/vp9_asm_stubs.c +++ b/media/libvpx/vp9/common/x86/vp9_asm_stubs.c @@ -8,421 +8,9 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include - -#include "./vpx_config.h" #include "./vp9_rtcd.h" -#include "vpx_ports/mem.h" - -typedef void filter8_1dfunction ( - const unsigned char *src_ptr, - const ptrdiff_t src_pitch, - unsigned char *output_ptr, - ptrdiff_t out_pitch, - unsigned int output_height, - const short *filter -); - -#define FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \ - void vp9_convolve8_##name##_##opt(const uint8_t *src, ptrdiff_t src_stride, \ - uint8_t *dst, ptrdiff_t dst_stride, \ - const int16_t *filter_x, int x_step_q4, \ - const int16_t *filter_y, int y_step_q4, \ - int w, int h) { \ - if (step_q4 == 16 && filter[3] != 128) { \ - if (filter[0] || filter[1] || filter[2]) { \ - while (w >= 16) { \ - vp9_filter_block1d16_##dir##8_##avg##opt(src_start, \ - src_stride, \ - dst, \ - dst_stride, \ - h, \ - filter); \ - src += 16; \ - dst += 16; \ - w -= 16; \ - } \ - while (w >= 8) { \ - vp9_filter_block1d8_##dir##8_##avg##opt(src_start, \ - src_stride, \ - dst, \ - dst_stride, \ - h, \ - filter); \ - src += 8; \ - dst += 8; \ - w -= 8; \ - } \ - while (w >= 4) { \ - vp9_filter_block1d4_##dir##8_##avg##opt(src_start, \ - src_stride, \ - dst, \ - dst_stride, \ - h, \ - filter); \ - src += 4; \ - dst += 4; \ - w -= 4; \ - } \ - } else { \ - while (w >= 16) { \ - vp9_filter_block1d16_##dir##2_##avg##opt(src, \ - src_stride, \ - dst, \ - dst_stride, \ - h, \ - filter); \ - src += 16; \ - dst += 16; \ - w -= 16; \ - } \ - while (w >= 8) { \ - vp9_filter_block1d8_##dir##2_##avg##opt(src, \ - src_stride, \ - dst, \ - dst_stride, \ - h, \ - filter); \ - src += 8; \ - dst += 8; \ - w -= 8; \ - } \ - while (w >= 4) { \ - vp9_filter_block1d4_##dir##2_##avg##opt(src, \ - src_stride, \ - dst, \ - dst_stride, \ - h, \ - filter); \ - src += 4; \ - dst += 4; \ - w -= 4; \ - } \ - } \ - } \ - if (w) { \ - vp9_convolve8_##name##_c(src, src_stride, dst, dst_stride, \ - filter_x, x_step_q4, filter_y, y_step_q4, \ - w, h); \ - } \ -} - -#define FUN_CONV_2D(avg, opt) \ -void vp9_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \ - uint8_t *dst, ptrdiff_t dst_stride, \ - const int16_t *filter_x, int x_step_q4, \ - const int16_t *filter_y, int y_step_q4, \ - int w, int h) { \ - assert(w <= 64); \ - assert(h <= 64); \ - if (x_step_q4 == 16 && y_step_q4 == 16) { \ - if (filter_x[0] || filter_x[1] || filter_x[2] || filter_x[3] == 128 || \ - filter_y[0] || filter_y[1] || filter_y[2] || filter_y[3] == 128) { \ - DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 64 * 71); \ - vp9_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, fdata2, 64, \ - filter_x, x_step_q4, filter_y, y_step_q4, \ - w, h + 7); \ - vp9_convolve8_##avg##vert_##opt(fdata2 + 3 * 64, 64, dst, dst_stride, \ - filter_x, x_step_q4, filter_y, \ - y_step_q4, w, h); \ - } else { \ - DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 64 * 65); \ - vp9_convolve8_horiz_##opt(src, src_stride, fdata2, 64, \ - filter_x, x_step_q4, filter_y, y_step_q4, \ - w, h + 1); \ - vp9_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride, \ - filter_x, x_step_q4, filter_y, \ - y_step_q4, w, h); \ - } \ - } else { \ - vp9_convolve8_##avg##c(src, src_stride, dst, dst_stride, \ - filter_x, x_step_q4, filter_y, y_step_q4, w, h); \ - } \ -} - -#if CONFIG_VP9_HIGHBITDEPTH - -typedef void highbd_filter8_1dfunction ( - const uint16_t *src_ptr, - const ptrdiff_t src_pitch, - uint16_t *output_ptr, - ptrdiff_t out_pitch, - unsigned int output_height, - const int16_t *filter, - int bd -); - -#define HIGH_FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \ - void vp9_highbd_convolve8_##name##_##opt(const uint8_t *src8, \ - ptrdiff_t src_stride, \ - uint8_t *dst8, \ - ptrdiff_t dst_stride, \ - const int16_t *filter_x, \ - int x_step_q4, \ - const int16_t *filter_y, \ - int y_step_q4, \ - int w, int h, int bd) { \ - if (step_q4 == 16 && filter[3] != 128) { \ - uint16_t *src = CONVERT_TO_SHORTPTR(src8); \ - uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); \ - if (filter[0] || filter[1] || filter[2]) { \ - while (w >= 16) { \ - vp9_highbd_filter_block1d16_##dir##8_##avg##opt(src_start, \ - src_stride, \ - dst, \ - dst_stride, \ - h, \ - filter, \ - bd); \ - src += 16; \ - dst += 16; \ - w -= 16; \ - } \ - while (w >= 8) { \ - vp9_highbd_filter_block1d8_##dir##8_##avg##opt(src_start, \ - src_stride, \ - dst, \ - dst_stride, \ - h, \ - filter, \ - bd); \ - src += 8; \ - dst += 8; \ - w -= 8; \ - } \ - while (w >= 4) { \ - vp9_highbd_filter_block1d4_##dir##8_##avg##opt(src_start, \ - src_stride, \ - dst, \ - dst_stride, \ - h, \ - filter, \ - bd); \ - src += 4; \ - dst += 4; \ - w -= 4; \ - } \ - } else { \ - while (w >= 16) { \ - vp9_highbd_filter_block1d16_##dir##2_##avg##opt(src, \ - src_stride, \ - dst, \ - dst_stride, \ - h, \ - filter, \ - bd); \ - src += 16; \ - dst += 16; \ - w -= 16; \ - } \ - while (w >= 8) { \ - vp9_highbd_filter_block1d8_##dir##2_##avg##opt(src, \ - src_stride, \ - dst, \ - dst_stride, \ - h, \ - filter, \ - bd); \ - src += 8; \ - dst += 8; \ - w -= 8; \ - } \ - while (w >= 4) { \ - vp9_highbd_filter_block1d4_##dir##2_##avg##opt(src, \ - src_stride, \ - dst, \ - dst_stride, \ - h, \ - filter, \ - bd); \ - src += 4; \ - dst += 4; \ - w -= 4; \ - } \ - } \ - } \ - if (w) { \ - vp9_highbd_convolve8_##name##_c(src8, src_stride, dst8, dst_stride, \ - filter_x, x_step_q4, filter_y, y_step_q4, \ - w, h, bd); \ - } \ -} - -#define HIGH_FUN_CONV_2D(avg, opt) \ -void vp9_highbd_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \ - uint8_t *dst, ptrdiff_t dst_stride, \ - const int16_t *filter_x, int x_step_q4, \ - const int16_t *filter_y, int y_step_q4, \ - int w, int h, int bd) { \ - assert(w <= 64); \ - assert(h <= 64); \ - if (x_step_q4 == 16 && y_step_q4 == 16) { \ - if (filter_x[0] || filter_x[1] || filter_x[2] || filter_x[3] == 128 || \ - filter_y[0] || filter_y[1] || filter_y[2] || filter_y[3] == 128) { \ - DECLARE_ALIGNED_ARRAY(16, uint16_t, fdata2, 64 * 71); \ - vp9_highbd_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, \ - CONVERT_TO_BYTEPTR(fdata2), 64, \ - filter_x, x_step_q4, \ - filter_y, y_step_q4, \ - w, h + 7, bd); \ - vp9_highbd_convolve8_##avg##vert_##opt(CONVERT_TO_BYTEPTR(fdata2) + 192, \ - 64, dst, dst_stride, \ - filter_x, x_step_q4, \ - filter_y, y_step_q4, \ - w, h, bd); \ - } else { \ - DECLARE_ALIGNED_ARRAY(16, uint16_t, fdata2, 64 * 65); \ - vp9_highbd_convolve8_horiz_##opt(src, src_stride, \ - CONVERT_TO_BYTEPTR(fdata2), 64, \ - filter_x, x_step_q4, \ - filter_y, y_step_q4, \ - w, h + 1, bd); \ - vp9_highbd_convolve8_##avg##vert_##opt(CONVERT_TO_BYTEPTR(fdata2), 64, \ - dst, dst_stride, \ - filter_x, x_step_q4, \ - filter_y, y_step_q4, \ - w, h, bd); \ - } \ - } else { \ - vp9_highbd_convolve8_##avg##c(src, src_stride, dst, dst_stride, \ - filter_x, x_step_q4, filter_y, y_step_q4, w, \ - h, bd); \ - } \ -} -#endif // CONFIG_VP9_HIGHBITDEPTH - -#if HAVE_AVX2 && HAVE_SSSE3 -filter8_1dfunction vp9_filter_block1d16_v8_avx2; -filter8_1dfunction vp9_filter_block1d16_h8_avx2; -filter8_1dfunction vp9_filter_block1d4_v8_ssse3; -#if ARCH_X86_64 -filter8_1dfunction vp9_filter_block1d8_v8_intrin_ssse3; -filter8_1dfunction vp9_filter_block1d8_h8_intrin_ssse3; -filter8_1dfunction vp9_filter_block1d4_h8_intrin_ssse3; -#define vp9_filter_block1d8_v8_avx2 vp9_filter_block1d8_v8_intrin_ssse3 -#define vp9_filter_block1d8_h8_avx2 vp9_filter_block1d8_h8_intrin_ssse3 -#define vp9_filter_block1d4_h8_avx2 vp9_filter_block1d4_h8_intrin_ssse3 -#else // ARCH_X86 -filter8_1dfunction vp9_filter_block1d8_v8_ssse3; -filter8_1dfunction vp9_filter_block1d8_h8_ssse3; -filter8_1dfunction vp9_filter_block1d4_h8_ssse3; -#define vp9_filter_block1d8_v8_avx2 vp9_filter_block1d8_v8_ssse3 -#define vp9_filter_block1d8_h8_avx2 vp9_filter_block1d8_h8_ssse3 -#define vp9_filter_block1d4_h8_avx2 vp9_filter_block1d4_h8_ssse3 -#endif // ARCH_X86_64 / ARCH_X86 -filter8_1dfunction vp9_filter_block1d16_v2_ssse3; -filter8_1dfunction vp9_filter_block1d16_h2_ssse3; -filter8_1dfunction vp9_filter_block1d8_v2_ssse3; -filter8_1dfunction vp9_filter_block1d8_h2_ssse3; -filter8_1dfunction vp9_filter_block1d4_v2_ssse3; -filter8_1dfunction vp9_filter_block1d4_h2_ssse3; -#define vp9_filter_block1d4_v8_avx2 vp9_filter_block1d4_v8_ssse3 -#define vp9_filter_block1d16_v2_avx2 vp9_filter_block1d16_v2_ssse3 -#define vp9_filter_block1d16_h2_avx2 vp9_filter_block1d16_h2_ssse3 -#define vp9_filter_block1d8_v2_avx2 vp9_filter_block1d8_v2_ssse3 -#define vp9_filter_block1d8_h2_avx2 vp9_filter_block1d8_h2_ssse3 -#define vp9_filter_block1d4_v2_avx2 vp9_filter_block1d4_v2_ssse3 -#define vp9_filter_block1d4_h2_avx2 vp9_filter_block1d4_h2_ssse3 -// void vp9_convolve8_horiz_avx2(const uint8_t *src, ptrdiff_t src_stride, -// uint8_t *dst, ptrdiff_t dst_stride, -// const int16_t *filter_x, int x_step_q4, -// const int16_t *filter_y, int y_step_q4, -// int w, int h); -// void vp9_convolve8_vert_avx2(const uint8_t *src, ptrdiff_t src_stride, -// uint8_t *dst, ptrdiff_t dst_stride, -// const int16_t *filter_x, int x_step_q4, -// const int16_t *filter_y, int y_step_q4, -// int w, int h); -FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , avx2); -FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , avx2); - -// void vp9_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride, -// uint8_t *dst, ptrdiff_t dst_stride, -// const int16_t *filter_x, int x_step_q4, -// const int16_t *filter_y, int y_step_q4, -// int w, int h); -FUN_CONV_2D(, avx2); -#endif // HAVE_AX2 && HAVE_SSSE3 -#if HAVE_SSSE3 -#if ARCH_X86_64 -filter8_1dfunction vp9_filter_block1d16_v8_intrin_ssse3; -filter8_1dfunction vp9_filter_block1d16_h8_intrin_ssse3; -filter8_1dfunction vp9_filter_block1d8_v8_intrin_ssse3; -filter8_1dfunction vp9_filter_block1d8_h8_intrin_ssse3; -filter8_1dfunction vp9_filter_block1d4_v8_ssse3; -filter8_1dfunction vp9_filter_block1d4_h8_intrin_ssse3; -#define vp9_filter_block1d16_v8_ssse3 vp9_filter_block1d16_v8_intrin_ssse3 -#define vp9_filter_block1d16_h8_ssse3 vp9_filter_block1d16_h8_intrin_ssse3 -#define vp9_filter_block1d8_v8_ssse3 vp9_filter_block1d8_v8_intrin_ssse3 -#define vp9_filter_block1d8_h8_ssse3 vp9_filter_block1d8_h8_intrin_ssse3 -#define vp9_filter_block1d4_h8_ssse3 vp9_filter_block1d4_h8_intrin_ssse3 -#else // ARCH_X86 -filter8_1dfunction vp9_filter_block1d16_v8_ssse3; -filter8_1dfunction vp9_filter_block1d16_h8_ssse3; -filter8_1dfunction vp9_filter_block1d8_v8_ssse3; -filter8_1dfunction vp9_filter_block1d8_h8_ssse3; -filter8_1dfunction vp9_filter_block1d4_v8_ssse3; -filter8_1dfunction vp9_filter_block1d4_h8_ssse3; -#endif // ARCH_X86_64 / ARCH_X86 -filter8_1dfunction vp9_filter_block1d16_v8_avg_ssse3; -filter8_1dfunction vp9_filter_block1d16_h8_avg_ssse3; -filter8_1dfunction vp9_filter_block1d8_v8_avg_ssse3; -filter8_1dfunction vp9_filter_block1d8_h8_avg_ssse3; -filter8_1dfunction vp9_filter_block1d4_v8_avg_ssse3; -filter8_1dfunction vp9_filter_block1d4_h8_avg_ssse3; - -filter8_1dfunction vp9_filter_block1d16_v2_ssse3; -filter8_1dfunction vp9_filter_block1d16_h2_ssse3; -filter8_1dfunction vp9_filter_block1d8_v2_ssse3; -filter8_1dfunction vp9_filter_block1d8_h2_ssse3; -filter8_1dfunction vp9_filter_block1d4_v2_ssse3; -filter8_1dfunction vp9_filter_block1d4_h2_ssse3; -filter8_1dfunction vp9_filter_block1d16_v2_avg_ssse3; -filter8_1dfunction vp9_filter_block1d16_h2_avg_ssse3; -filter8_1dfunction vp9_filter_block1d8_v2_avg_ssse3; -filter8_1dfunction vp9_filter_block1d8_h2_avg_ssse3; -filter8_1dfunction vp9_filter_block1d4_v2_avg_ssse3; -filter8_1dfunction vp9_filter_block1d4_h2_avg_ssse3; - -// void vp9_convolve8_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride, -// uint8_t *dst, ptrdiff_t dst_stride, -// const int16_t *filter_x, int x_step_q4, -// const int16_t *filter_y, int y_step_q4, -// int w, int h); -// void vp9_convolve8_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride, -// uint8_t *dst, ptrdiff_t dst_stride, -// const int16_t *filter_x, int x_step_q4, -// const int16_t *filter_y, int y_step_q4, -// int w, int h); -// void vp9_convolve8_avg_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride, -// uint8_t *dst, ptrdiff_t dst_stride, -// const int16_t *filter_x, int x_step_q4, -// const int16_t *filter_y, int y_step_q4, -// int w, int h); -// void vp9_convolve8_avg_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride, -// uint8_t *dst, ptrdiff_t dst_stride, -// const int16_t *filter_x, int x_step_q4, -// const int16_t *filter_y, int y_step_q4, -// int w, int h); -FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , ssse3); -FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , ssse3); -FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, ssse3); -FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_, - ssse3); - -// void vp9_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride, -// uint8_t *dst, ptrdiff_t dst_stride, -// const int16_t *filter_x, int x_step_q4, -// const int16_t *filter_y, int y_step_q4, -// int w, int h); -// void vp9_convolve8_avg_ssse3(const uint8_t *src, ptrdiff_t src_stride, -// uint8_t *dst, ptrdiff_t dst_stride, -// const int16_t *filter_x, int x_step_q4, -// const int16_t *filter_y, int y_step_q4, -// int w, int h); -FUN_CONV_2D(, ssse3); -FUN_CONV_2D(avg_ , ssse3); -#endif // HAVE_SSSE3 +#include "./vpx_config.h" +#include "vp9/common/x86/convolve.h" #if HAVE_SSE2 filter8_1dfunction vp9_filter_block1d16_v8_sse2; diff --git a/media/libvpx/vp9/common/x86/vp9_high_loopfilter_intrin_sse2.c b/media/libvpx/vp9/common/x86/vp9_high_loopfilter_intrin_sse2.c index 4bd8ac4a315..b40669c6375 100644 --- a/media/libvpx/vp9/common/x86/vp9_high_loopfilter_intrin_sse2.c +++ b/media/libvpx/vp9/common/x86/vp9_high_loopfilter_intrin_sse2.c @@ -11,6 +11,7 @@ #include // SSE2 #include "./vp9_rtcd.h" +#include "vpx_ports/mem.h" #include "vp9/common/vp9_loopfilter.h" #include "vpx_ports/emmintrin_compat.h" @@ -524,12 +525,12 @@ void vp9_highbd_lpf_horizontal_8_sse2(uint16_t *s, int p, const uint8_t *_limit, const uint8_t *_thresh, int count, int bd) { - DECLARE_ALIGNED_ARRAY(16, uint16_t, flat_op2, 16); - DECLARE_ALIGNED_ARRAY(16, uint16_t, flat_op1, 16); - DECLARE_ALIGNED_ARRAY(16, uint16_t, flat_op0, 16); - DECLARE_ALIGNED_ARRAY(16, uint16_t, flat_oq2, 16); - DECLARE_ALIGNED_ARRAY(16, uint16_t, flat_oq1, 16); - DECLARE_ALIGNED_ARRAY(16, uint16_t, flat_oq0, 16); + DECLARE_ALIGNED(16, uint16_t, flat_op2[16]); + DECLARE_ALIGNED(16, uint16_t, flat_op1[16]); + DECLARE_ALIGNED(16, uint16_t, flat_op0[16]); + DECLARE_ALIGNED(16, uint16_t, flat_oq2[16]); + DECLARE_ALIGNED(16, uint16_t, flat_oq1[16]); + DECLARE_ALIGNED(16, uint16_t, flat_oq0[16]); const __m128i zero = _mm_set1_epi16(0); __m128i blimit, limit, thresh; __m128i mask, hev, flat; @@ -1059,7 +1060,7 @@ void vp9_highbd_lpf_vertical_4_sse2(uint16_t *s, int p, const uint8_t *limit, const uint8_t *thresh, int count, int bd) { - DECLARE_ALIGNED_ARRAY(16, uint16_t, t_dst, 8 * 8); + DECLARE_ALIGNED(16, uint16_t, t_dst[8 * 8]); uint16_t *src[1]; uint16_t *dst[1]; (void)count; @@ -1089,7 +1090,7 @@ void vp9_highbd_lpf_vertical_4_dual_sse2(uint16_t *s, int p, const uint8_t *limit1, const uint8_t *thresh1, int bd) { - DECLARE_ALIGNED_ARRAY(16, uint16_t, t_dst, 16 * 8); + DECLARE_ALIGNED(16, uint16_t, t_dst[16 * 8]); uint16_t *src[2]; uint16_t *dst[2]; @@ -1113,7 +1114,7 @@ void vp9_highbd_lpf_vertical_8_sse2(uint16_t *s, int p, const uint8_t *limit, const uint8_t *thresh, int count, int bd) { - DECLARE_ALIGNED_ARRAY(16, uint16_t, t_dst, 8 * 8); + DECLARE_ALIGNED(16, uint16_t, t_dst[8 * 8]); uint16_t *src[1]; uint16_t *dst[1]; (void)count; @@ -1143,7 +1144,7 @@ void vp9_highbd_lpf_vertical_8_dual_sse2(uint16_t *s, int p, const uint8_t *limit1, const uint8_t *thresh1, int bd) { - DECLARE_ALIGNED_ARRAY(16, uint16_t, t_dst, 16 * 8); + DECLARE_ALIGNED(16, uint16_t, t_dst[16 * 8]); uint16_t *src[2]; uint16_t *dst[2]; @@ -1168,7 +1169,7 @@ void vp9_highbd_lpf_vertical_16_sse2(uint16_t *s, int p, const uint8_t *limit, const uint8_t *thresh, int bd) { - DECLARE_ALIGNED_ARRAY(16, uint16_t, t_dst, 8 * 16); + DECLARE_ALIGNED(16, uint16_t, t_dst[8 * 16]); uint16_t *src[2]; uint16_t *dst[2]; @@ -1198,7 +1199,7 @@ void vp9_highbd_lpf_vertical_16_dual_sse2(uint16_t *s, const uint8_t *limit, const uint8_t *thresh, int bd) { - DECLARE_ALIGNED_ARRAY(16, uint16_t, t_dst, 256); + DECLARE_ALIGNED(16, uint16_t, t_dst[256]); // Transpose 16x16 highbd_transpose8x16(s - 8, s - 8 + 8 * p, p, t_dst, 16); diff --git a/media/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c b/media/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c index 42e0baa05b4..ce010df3b8a 100644 --- a/media/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c +++ b/media/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c @@ -8,6 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include "./vp9_rtcd.h" +#include "vpx_ports/mem.h" #include "vp9/common/x86/vp9_idct_intrin_sse2.h" #include "vp9/common/vp9_idct.h" @@ -17,17 +19,16 @@ d0 = _mm_unpacklo_epi8(d0, zero); \ d0 = _mm_add_epi16(in_x, d0); \ d0 = _mm_packus_epi16(d0, d0); \ - *(int *)dest = _mm_cvtsi128_si32(d0); \ - dest += stride; \ + *(int *)(dest) = _mm_cvtsi128_si32(d0); \ } void vp9_idct4x4_16_add_sse2(const int16_t *input, uint8_t *dest, int stride) { const __m128i zero = _mm_setzero_si128(); const __m128i eight = _mm_set1_epi16(8); - const __m128i cst = _mm_setr_epi16((int16_t)cospi_16_64, (int16_t)cospi_16_64, - (int16_t)cospi_16_64, (int16_t)-cospi_16_64, - (int16_t)cospi_24_64, (int16_t)-cospi_8_64, - (int16_t)cospi_8_64, (int16_t)cospi_24_64); + const __m128i cst = _mm_setr_epi16( + (int16_t)cospi_16_64, (int16_t)cospi_16_64, (int16_t)cospi_16_64, + (int16_t)-cospi_16_64, (int16_t)cospi_24_64, (int16_t)-cospi_8_64, + (int16_t)cospi_8_64, (int16_t)cospi_24_64); const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); __m128i input0, input1, input2, input3; @@ -126,28 +127,28 @@ void vp9_idct4x4_16_add_sse2(const int16_t *input, uint8_t *dest, int stride) { // Reconstruction and Store { - __m128i d0 = _mm_cvtsi32_si128(*(const int *)(dest)); - __m128i d2 = _mm_cvtsi32_si128(*(const int *)(dest + stride * 2)); - d0 = _mm_unpacklo_epi32(d0, - _mm_cvtsi32_si128(*(const int *) (dest + stride))); - d2 = _mm_unpacklo_epi32(_mm_cvtsi32_si128( - *(const int *) (dest + stride * 3)), d2); - d0 = _mm_unpacklo_epi8(d0, zero); - d2 = _mm_unpacklo_epi8(d2, zero); - d0 = _mm_add_epi16(d0, input2); - d2 = _mm_add_epi16(d2, input3); - d0 = _mm_packus_epi16(d0, d2); - // store input0 - *(int *)dest = _mm_cvtsi128_si32(d0); - // store input1 - d0 = _mm_srli_si128(d0, 4); - *(int *)(dest + stride) = _mm_cvtsi128_si32(d0); - // store input2 - d0 = _mm_srli_si128(d0, 4); - *(int *)(dest + stride * 3) = _mm_cvtsi128_si32(d0); - // store input3 - d0 = _mm_srli_si128(d0, 4); - *(int *)(dest + stride * 2) = _mm_cvtsi128_si32(d0); + __m128i d0 = _mm_cvtsi32_si128(*(const int *)(dest)); + __m128i d2 = _mm_cvtsi32_si128(*(const int *)(dest + stride * 2)); + d0 = _mm_unpacklo_epi32(d0, + _mm_cvtsi32_si128(*(const int *)(dest + stride))); + d2 = _mm_unpacklo_epi32( + _mm_cvtsi32_si128(*(const int *)(dest + stride * 3)), d2); + d0 = _mm_unpacklo_epi8(d0, zero); + d2 = _mm_unpacklo_epi8(d2, zero); + d0 = _mm_add_epi16(d0, input2); + d2 = _mm_add_epi16(d2, input3); + d0 = _mm_packus_epi16(d0, d2); + // store input0 + *(int *)dest = _mm_cvtsi128_si32(d0); + // store input1 + d0 = _mm_srli_si128(d0, 4); + *(int *)(dest + stride) = _mm_cvtsi128_si32(d0); + // store input2 + d0 = _mm_srli_si128(d0, 4); + *(int *)(dest + stride * 3) = _mm_cvtsi128_si32(d0); + // store input3 + d0 = _mm_srli_si128(d0, 4); + *(int *)(dest + stride * 2) = _mm_cvtsi128_si32(d0); } } @@ -162,10 +163,10 @@ void vp9_idct4x4_1_add_sse2(const int16_t *input, uint8_t *dest, int stride) { dc_value = _mm_set1_epi16(a); - RECON_AND_STORE4X4(dest, dc_value); - RECON_AND_STORE4X4(dest, dc_value); - RECON_AND_STORE4X4(dest, dc_value); - RECON_AND_STORE4X4(dest, dc_value); + RECON_AND_STORE4X4(dest + 0 * stride, dc_value); + RECON_AND_STORE4X4(dest + 1 * stride, dc_value); + RECON_AND_STORE4X4(dest + 2 * stride, dc_value); + RECON_AND_STORE4X4(dest + 3 * stride, dc_value); } static INLINE void transpose_4x4(__m128i *res) { @@ -267,8 +268,8 @@ void vp9_iht4x4_16_add_sse2(const int16_t *input, uint8_t *dest, int stride, const __m128i zero = _mm_setzero_si128(); const __m128i eight = _mm_set1_epi16(8); - in[0]= _mm_loadu_si128((const __m128i *)(input)); - in[1]= _mm_loadu_si128((const __m128i *)(input + 8)); + in[0] = _mm_loadu_si128((const __m128i *)(input)); + in[1] = _mm_loadu_si128((const __m128i *)(input + 8)); switch (tx_type) { case 0: // DCT_DCT @@ -301,28 +302,28 @@ void vp9_iht4x4_16_add_sse2(const int16_t *input, uint8_t *dest, int stride, // Reconstruction and Store { - __m128i d0 = _mm_cvtsi32_si128(*(const int *)(dest)); - __m128i d2 = _mm_cvtsi32_si128(*(const int *)(dest + stride * 2)); - d0 = _mm_unpacklo_epi32(d0, - _mm_cvtsi32_si128(*(const int *) (dest + stride))); - d2 = _mm_unpacklo_epi32(d2, _mm_cvtsi32_si128( - *(const int *) (dest + stride * 3))); - d0 = _mm_unpacklo_epi8(d0, zero); - d2 = _mm_unpacklo_epi8(d2, zero); - d0 = _mm_add_epi16(d0, in[0]); - d2 = _mm_add_epi16(d2, in[1]); - d0 = _mm_packus_epi16(d0, d2); - // store result[0] - *(int *)dest = _mm_cvtsi128_si32(d0); - // store result[1] - d0 = _mm_srli_si128(d0, 4); - *(int *)(dest + stride) = _mm_cvtsi128_si32(d0); - // store result[2] - d0 = _mm_srli_si128(d0, 4); - *(int *)(dest + stride * 2) = _mm_cvtsi128_si32(d0); - // store result[3] - d0 = _mm_srli_si128(d0, 4); - *(int *)(dest + stride * 3) = _mm_cvtsi128_si32(d0); + __m128i d0 = _mm_cvtsi32_si128(*(const int *)(dest)); + __m128i d2 = _mm_cvtsi32_si128(*(const int *)(dest + stride * 2)); + d0 = _mm_unpacklo_epi32(d0, + _mm_cvtsi32_si128(*(const int *)(dest + stride))); + d2 = _mm_unpacklo_epi32( + d2, _mm_cvtsi32_si128(*(const int *)(dest + stride * 3))); + d0 = _mm_unpacklo_epi8(d0, zero); + d2 = _mm_unpacklo_epi8(d2, zero); + d0 = _mm_add_epi16(d0, in[0]); + d2 = _mm_add_epi16(d2, in[1]); + d0 = _mm_packus_epi16(d0, d2); + // store result[0] + *(int *)dest = _mm_cvtsi128_si32(d0); + // store result[1] + d0 = _mm_srli_si128(d0, 4); + *(int *)(dest + stride) = _mm_cvtsi128_si32(d0); + // store result[2] + d0 = _mm_srli_si128(d0, 4); + *(int *)(dest + stride * 2) = _mm_cvtsi128_si32(d0); + // store result[3] + d0 = _mm_srli_si128(d0, 4); + *(int *)(dest + stride * 3) = _mm_cvtsi128_si32(d0); } } @@ -517,7 +518,7 @@ void vp9_iht4x4_16_add_sse2(const int16_t *input, uint8_t *dest, int stride, void vp9_idct8x8_64_add_sse2(const int16_t *input, uint8_t *dest, int stride) { const __m128i zero = _mm_setzero_si128(); const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); - const __m128i final_rounding = _mm_set1_epi16(1<<4); + const __m128i final_rounding = _mm_set1_epi16(1 << 4); const __m128i stg1_0 = pair_set_epi16(cospi_28_64, -cospi_4_64); const __m128i stg1_1 = pair_set_epi16(cospi_4_64, cospi_28_64); const __m128i stg1_2 = pair_set_epi16(-cospi_20_64, cospi_12_64); @@ -551,7 +552,7 @@ void vp9_idct8x8_64_add_sse2(const int16_t *input, uint8_t *dest, int stride) { // 4-stage 1D idct8x8 IDCT8(in0, in1, in2, in3, in4, in5, in6, in7, - in0, in1, in2, in3, in4, in5, in6, in7); + in0, in1, in2, in3, in4, in5, in6, in7); } // Final rounding and shift @@ -573,14 +574,14 @@ void vp9_idct8x8_64_add_sse2(const int16_t *input, uint8_t *dest, int stride) { in6 = _mm_srai_epi16(in6, 5); in7 = _mm_srai_epi16(in7, 5); - RECON_AND_STORE(dest, in0); - RECON_AND_STORE(dest, in1); - RECON_AND_STORE(dest, in2); - RECON_AND_STORE(dest, in3); - RECON_AND_STORE(dest, in4); - RECON_AND_STORE(dest, in5); - RECON_AND_STORE(dest, in6); - RECON_AND_STORE(dest, in7); + RECON_AND_STORE(dest + 0 * stride, in0); + RECON_AND_STORE(dest + 1 * stride, in1); + RECON_AND_STORE(dest + 2 * stride, in2); + RECON_AND_STORE(dest + 3 * stride, in3); + RECON_AND_STORE(dest + 4 * stride, in4); + RECON_AND_STORE(dest + 5 * stride, in5); + RECON_AND_STORE(dest + 6 * stride, in6); + RECON_AND_STORE(dest + 7 * stride, in7); } void vp9_idct8x8_1_add_sse2(const int16_t *input, uint8_t *dest, int stride) { @@ -594,14 +595,14 @@ void vp9_idct8x8_1_add_sse2(const int16_t *input, uint8_t *dest, int stride) { dc_value = _mm_set1_epi16(a); - RECON_AND_STORE(dest, dc_value); - RECON_AND_STORE(dest, dc_value); - RECON_AND_STORE(dest, dc_value); - RECON_AND_STORE(dest, dc_value); - RECON_AND_STORE(dest, dc_value); - RECON_AND_STORE(dest, dc_value); - RECON_AND_STORE(dest, dc_value); - RECON_AND_STORE(dest, dc_value); + RECON_AND_STORE(dest + 0 * stride, dc_value); + RECON_AND_STORE(dest + 1 * stride, dc_value); + RECON_AND_STORE(dest + 2 * stride, dc_value); + RECON_AND_STORE(dest + 3 * stride, dc_value); + RECON_AND_STORE(dest + 4 * stride, dc_value); + RECON_AND_STORE(dest + 5 * stride, dc_value); + RECON_AND_STORE(dest + 6 * stride, dc_value); + RECON_AND_STORE(dest + 7 * stride, dc_value); } static void idct8_sse2(__m128i *in) { @@ -626,7 +627,7 @@ static void idct8_sse2(__m128i *in) { // 4-stage 1D idct8x8 IDCT8(in0, in1, in2, in3, in4, in5, in6, in7, - in[0], in[1], in[2], in[3], in[4], in[5], in[6], in[7]); + in[0], in[1], in[2], in[3], in[4], in[5], in[6], in[7]); } static void iadst8_sse2(__m128i *in) { @@ -656,14 +657,14 @@ static void iadst8_sse2(__m128i *in) { array_transpose_8x8(in, in); // properly aligned for butterfly input - in0 = in[7]; - in1 = in[0]; - in2 = in[5]; - in3 = in[2]; - in4 = in[3]; - in5 = in[4]; - in6 = in[1]; - in7 = in[6]; + in0 = in[7]; + in1 = in[0]; + in2 = in[5]; + in3 = in[2]; + in4 = in[3]; + in5 = in[4]; + in6 = in[1]; + in7 = in[6]; // column transformation // stage 1 @@ -857,12 +858,11 @@ static void iadst8_sse2(__m128i *in) { in[7] = _mm_sub_epi16(k__const_0, s1); } - void vp9_iht8x8_64_add_sse2(const int16_t *input, uint8_t *dest, int stride, int tx_type) { __m128i in[8]; const __m128i zero = _mm_setzero_si128(); - const __m128i final_rounding = _mm_set1_epi16(1<<4); + const __m128i final_rounding = _mm_set1_epi16(1 << 4); // load input data in[0] = _mm_load_si128((const __m128i *)input); @@ -915,20 +915,20 @@ void vp9_iht8x8_64_add_sse2(const int16_t *input, uint8_t *dest, int stride, in[6] = _mm_srai_epi16(in[6], 5); in[7] = _mm_srai_epi16(in[7], 5); - RECON_AND_STORE(dest, in[0]); - RECON_AND_STORE(dest, in[1]); - RECON_AND_STORE(dest, in[2]); - RECON_AND_STORE(dest, in[3]); - RECON_AND_STORE(dest, in[4]); - RECON_AND_STORE(dest, in[5]); - RECON_AND_STORE(dest, in[6]); - RECON_AND_STORE(dest, in[7]); + RECON_AND_STORE(dest + 0 * stride, in[0]); + RECON_AND_STORE(dest + 1 * stride, in[1]); + RECON_AND_STORE(dest + 2 * stride, in[2]); + RECON_AND_STORE(dest + 3 * stride, in[3]); + RECON_AND_STORE(dest + 4 * stride, in[4]); + RECON_AND_STORE(dest + 5 * stride, in[5]); + RECON_AND_STORE(dest + 6 * stride, in[6]); + RECON_AND_STORE(dest + 7 * stride, in[7]); } void vp9_idct8x8_12_add_sse2(const int16_t *input, uint8_t *dest, int stride) { const __m128i zero = _mm_setzero_si128(); const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); - const __m128i final_rounding = _mm_set1_epi16(1<<4); + const __m128i final_rounding = _mm_set1_epi16(1 << 4); const __m128i stg1_0 = pair_set_epi16(cospi_28_64, -cospi_4_64); const __m128i stg1_1 = pair_set_epi16(cospi_4_64, cospi_28_64); const __m128i stg1_2 = pair_set_epi16(-cospi_20_64, cospi_12_64); @@ -953,7 +953,7 @@ void vp9_idct8x8_12_add_sse2(const int16_t *input, uint8_t *dest, int stride) { // 8x4 Transpose TRANSPOSE_8X8_10(in0, in1, in2, in3, in0, in1); // Stage1 - { //NOLINT + { const __m128i lo_17 = _mm_unpackhi_epi16(in0, zero); const __m128i lo_35 = _mm_unpackhi_epi16(in1, zero); @@ -976,7 +976,7 @@ void vp9_idct8x8_12_add_sse2(const int16_t *input, uint8_t *dest, int stride) { } // Stage2 - { //NOLINT + { const __m128i lo_04 = _mm_unpacklo_epi16(in0, zero); const __m128i lo_26 = _mm_unpacklo_epi16(in1, zero); @@ -1006,7 +1006,7 @@ void vp9_idct8x8_12_add_sse2(const int16_t *input, uint8_t *dest, int stride) { } // Stage3 - { //NOLINT + { const __m128i lo_56 = _mm_unpacklo_epi16(stp2_5, stp2_6); tmp4 = _mm_adds_epi16(stp2_0, stp2_2); @@ -1035,7 +1035,7 @@ void vp9_idct8x8_12_add_sse2(const int16_t *input, uint8_t *dest, int stride) { TRANSPOSE_4X8_10(tmp0, tmp1, tmp2, tmp3, in0, in1, in2, in3) IDCT8(in0, in1, in2, in3, zero, zero, zero, zero, - in0, in1, in2, in3, in4, in5, in6, in7); + in0, in1, in2, in3, in4, in5, in6, in7); // Final rounding and shift in0 = _mm_adds_epi16(in0, final_rounding); in1 = _mm_adds_epi16(in1, final_rounding); @@ -1055,14 +1055,14 @@ void vp9_idct8x8_12_add_sse2(const int16_t *input, uint8_t *dest, int stride) { in6 = _mm_srai_epi16(in6, 5); in7 = _mm_srai_epi16(in7, 5); - RECON_AND_STORE(dest, in0); - RECON_AND_STORE(dest, in1); - RECON_AND_STORE(dest, in2); - RECON_AND_STORE(dest, in3); - RECON_AND_STORE(dest, in4); - RECON_AND_STORE(dest, in5); - RECON_AND_STORE(dest, in6); - RECON_AND_STORE(dest, in7); + RECON_AND_STORE(dest + 0 * stride, in0); + RECON_AND_STORE(dest + 1 * stride, in1); + RECON_AND_STORE(dest + 2 * stride, in2); + RECON_AND_STORE(dest + 3 * stride, in3); + RECON_AND_STORE(dest + 4 * stride, in4); + RECON_AND_STORE(dest + 5 * stride, in5); + RECON_AND_STORE(dest + 6 * stride, in6); + RECON_AND_STORE(dest + 7 * stride, in7); } #define IDCT16 \ @@ -1305,7 +1305,7 @@ void vp9_idct8x8_12_add_sse2(const int16_t *input, uint8_t *dest, int stride) { void vp9_idct16x16_256_add_sse2(const int16_t *input, uint8_t *dest, int stride) { const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); - const __m128i final_rounding = _mm_set1_epi16(1<<5); + const __m128i final_rounding = _mm_set1_epi16(1 << 5); const __m128i zero = _mm_setzero_si128(); const __m128i stg2_0 = pair_set_epi16(cospi_30_64, -cospi_2_64); @@ -1344,130 +1344,86 @@ void vp9_idct16x16_256_add_sse2(const int16_t *input, uint8_t *dest, curr1 = l; for (i = 0; i < 2; i++) { - // 1-D idct + // 1-D idct - // Load input data. - in[0] = _mm_load_si128((const __m128i *)input); - in[8] = _mm_load_si128((const __m128i *)(input + 8 * 1)); - in[1] = _mm_load_si128((const __m128i *)(input + 8 * 2)); - in[9] = _mm_load_si128((const __m128i *)(input + 8 * 3)); - in[2] = _mm_load_si128((const __m128i *)(input + 8 * 4)); - in[10] = _mm_load_si128((const __m128i *)(input + 8 * 5)); - in[3] = _mm_load_si128((const __m128i *)(input + 8 * 6)); - in[11] = _mm_load_si128((const __m128i *)(input + 8 * 7)); - in[4] = _mm_load_si128((const __m128i *)(input + 8 * 8)); - in[12] = _mm_load_si128((const __m128i *)(input + 8 * 9)); - in[5] = _mm_load_si128((const __m128i *)(input + 8 * 10)); - in[13] = _mm_load_si128((const __m128i *)(input + 8 * 11)); - in[6] = _mm_load_si128((const __m128i *)(input + 8 * 12)); - in[14] = _mm_load_si128((const __m128i *)(input + 8 * 13)); - in[7] = _mm_load_si128((const __m128i *)(input + 8 * 14)); - in[15] = _mm_load_si128((const __m128i *)(input + 8 * 15)); + // Load input data. + in[0] = _mm_load_si128((const __m128i *)input); + in[8] = _mm_load_si128((const __m128i *)(input + 8 * 1)); + in[1] = _mm_load_si128((const __m128i *)(input + 8 * 2)); + in[9] = _mm_load_si128((const __m128i *)(input + 8 * 3)); + in[2] = _mm_load_si128((const __m128i *)(input + 8 * 4)); + in[10] = _mm_load_si128((const __m128i *)(input + 8 * 5)); + in[3] = _mm_load_si128((const __m128i *)(input + 8 * 6)); + in[11] = _mm_load_si128((const __m128i *)(input + 8 * 7)); + in[4] = _mm_load_si128((const __m128i *)(input + 8 * 8)); + in[12] = _mm_load_si128((const __m128i *)(input + 8 * 9)); + in[5] = _mm_load_si128((const __m128i *)(input + 8 * 10)); + in[13] = _mm_load_si128((const __m128i *)(input + 8 * 11)); + in[6] = _mm_load_si128((const __m128i *)(input + 8 * 12)); + in[14] = _mm_load_si128((const __m128i *)(input + 8 * 13)); + in[7] = _mm_load_si128((const __m128i *)(input + 8 * 14)); + in[15] = _mm_load_si128((const __m128i *)(input + 8 * 15)); - array_transpose_8x8(in, in); - array_transpose_8x8(in+8, in+8); + array_transpose_8x8(in, in); + array_transpose_8x8(in + 8, in + 8); - IDCT16 + IDCT16 - // Stage7 - curr1[0] = _mm_add_epi16(stp2_0, stp1_15); - curr1[1] = _mm_add_epi16(stp2_1, stp1_14); - curr1[2] = _mm_add_epi16(stp2_2, stp2_13); - curr1[3] = _mm_add_epi16(stp2_3, stp2_12); - curr1[4] = _mm_add_epi16(stp2_4, stp2_11); - curr1[5] = _mm_add_epi16(stp2_5, stp2_10); - curr1[6] = _mm_add_epi16(stp2_6, stp1_9); - curr1[7] = _mm_add_epi16(stp2_7, stp1_8); - curr1[8] = _mm_sub_epi16(stp2_7, stp1_8); - curr1[9] = _mm_sub_epi16(stp2_6, stp1_9); - curr1[10] = _mm_sub_epi16(stp2_5, stp2_10); - curr1[11] = _mm_sub_epi16(stp2_4, stp2_11); - curr1[12] = _mm_sub_epi16(stp2_3, stp2_12); - curr1[13] = _mm_sub_epi16(stp2_2, stp2_13); - curr1[14] = _mm_sub_epi16(stp2_1, stp1_14); - curr1[15] = _mm_sub_epi16(stp2_0, stp1_15); + // Stage7 + curr1[0] = _mm_add_epi16(stp2_0, stp1_15); + curr1[1] = _mm_add_epi16(stp2_1, stp1_14); + curr1[2] = _mm_add_epi16(stp2_2, stp2_13); + curr1[3] = _mm_add_epi16(stp2_3, stp2_12); + curr1[4] = _mm_add_epi16(stp2_4, stp2_11); + curr1[5] = _mm_add_epi16(stp2_5, stp2_10); + curr1[6] = _mm_add_epi16(stp2_6, stp1_9); + curr1[7] = _mm_add_epi16(stp2_7, stp1_8); + curr1[8] = _mm_sub_epi16(stp2_7, stp1_8); + curr1[9] = _mm_sub_epi16(stp2_6, stp1_9); + curr1[10] = _mm_sub_epi16(stp2_5, stp2_10); + curr1[11] = _mm_sub_epi16(stp2_4, stp2_11); + curr1[12] = _mm_sub_epi16(stp2_3, stp2_12); + curr1[13] = _mm_sub_epi16(stp2_2, stp2_13); + curr1[14] = _mm_sub_epi16(stp2_1, stp1_14); + curr1[15] = _mm_sub_epi16(stp2_0, stp1_15); - curr1 = r; - input += 128; + curr1 = r; + input += 128; } for (i = 0; i < 2; i++) { - // 1-D idct - array_transpose_8x8(l+i*8, in); - array_transpose_8x8(r+i*8, in+8); + int j; + // 1-D idct + array_transpose_8x8(l + i * 8, in); + array_transpose_8x8(r + i * 8, in + 8); - IDCT16 + IDCT16 - // 2-D - in[0] = _mm_add_epi16(stp2_0, stp1_15); - in[1] = _mm_add_epi16(stp2_1, stp1_14); - in[2] = _mm_add_epi16(stp2_2, stp2_13); - in[3] = _mm_add_epi16(stp2_3, stp2_12); - in[4] = _mm_add_epi16(stp2_4, stp2_11); - in[5] = _mm_add_epi16(stp2_5, stp2_10); - in[6] = _mm_add_epi16(stp2_6, stp1_9); - in[7] = _mm_add_epi16(stp2_7, stp1_8); - in[8] = _mm_sub_epi16(stp2_7, stp1_8); - in[9] = _mm_sub_epi16(stp2_6, stp1_9); - in[10] = _mm_sub_epi16(stp2_5, stp2_10); - in[11] = _mm_sub_epi16(stp2_4, stp2_11); - in[12] = _mm_sub_epi16(stp2_3, stp2_12); - in[13] = _mm_sub_epi16(stp2_2, stp2_13); - in[14] = _mm_sub_epi16(stp2_1, stp1_14); - in[15] = _mm_sub_epi16(stp2_0, stp1_15); + // 2-D + in[0] = _mm_add_epi16(stp2_0, stp1_15); + in[1] = _mm_add_epi16(stp2_1, stp1_14); + in[2] = _mm_add_epi16(stp2_2, stp2_13); + in[3] = _mm_add_epi16(stp2_3, stp2_12); + in[4] = _mm_add_epi16(stp2_4, stp2_11); + in[5] = _mm_add_epi16(stp2_5, stp2_10); + in[6] = _mm_add_epi16(stp2_6, stp1_9); + in[7] = _mm_add_epi16(stp2_7, stp1_8); + in[8] = _mm_sub_epi16(stp2_7, stp1_8); + in[9] = _mm_sub_epi16(stp2_6, stp1_9); + in[10] = _mm_sub_epi16(stp2_5, stp2_10); + in[11] = _mm_sub_epi16(stp2_4, stp2_11); + in[12] = _mm_sub_epi16(stp2_3, stp2_12); + in[13] = _mm_sub_epi16(stp2_2, stp2_13); + in[14] = _mm_sub_epi16(stp2_1, stp1_14); + in[15] = _mm_sub_epi16(stp2_0, stp1_15); + for (j = 0; j < 16; ++j) { // Final rounding and shift - in[0] = _mm_adds_epi16(in[0], final_rounding); - in[1] = _mm_adds_epi16(in[1], final_rounding); - in[2] = _mm_adds_epi16(in[2], final_rounding); - in[3] = _mm_adds_epi16(in[3], final_rounding); - in[4] = _mm_adds_epi16(in[4], final_rounding); - in[5] = _mm_adds_epi16(in[5], final_rounding); - in[6] = _mm_adds_epi16(in[6], final_rounding); - in[7] = _mm_adds_epi16(in[7], final_rounding); - in[8] = _mm_adds_epi16(in[8], final_rounding); - in[9] = _mm_adds_epi16(in[9], final_rounding); - in[10] = _mm_adds_epi16(in[10], final_rounding); - in[11] = _mm_adds_epi16(in[11], final_rounding); - in[12] = _mm_adds_epi16(in[12], final_rounding); - in[13] = _mm_adds_epi16(in[13], final_rounding); - in[14] = _mm_adds_epi16(in[14], final_rounding); - in[15] = _mm_adds_epi16(in[15], final_rounding); + in[j] = _mm_adds_epi16(in[j], final_rounding); + in[j] = _mm_srai_epi16(in[j], 6); + RECON_AND_STORE(dest + j * stride, in[j]); + } - in[0] = _mm_srai_epi16(in[0], 6); - in[1] = _mm_srai_epi16(in[1], 6); - in[2] = _mm_srai_epi16(in[2], 6); - in[3] = _mm_srai_epi16(in[3], 6); - in[4] = _mm_srai_epi16(in[4], 6); - in[5] = _mm_srai_epi16(in[5], 6); - in[6] = _mm_srai_epi16(in[6], 6); - in[7] = _mm_srai_epi16(in[7], 6); - in[8] = _mm_srai_epi16(in[8], 6); - in[9] = _mm_srai_epi16(in[9], 6); - in[10] = _mm_srai_epi16(in[10], 6); - in[11] = _mm_srai_epi16(in[11], 6); - in[12] = _mm_srai_epi16(in[12], 6); - in[13] = _mm_srai_epi16(in[13], 6); - in[14] = _mm_srai_epi16(in[14], 6); - in[15] = _mm_srai_epi16(in[15], 6); - - RECON_AND_STORE(dest, in[0]); - RECON_AND_STORE(dest, in[1]); - RECON_AND_STORE(dest, in[2]); - RECON_AND_STORE(dest, in[3]); - RECON_AND_STORE(dest, in[4]); - RECON_AND_STORE(dest, in[5]); - RECON_AND_STORE(dest, in[6]); - RECON_AND_STORE(dest, in[7]); - RECON_AND_STORE(dest, in[8]); - RECON_AND_STORE(dest, in[9]); - RECON_AND_STORE(dest, in[10]); - RECON_AND_STORE(dest, in[11]); - RECON_AND_STORE(dest, in[12]); - RECON_AND_STORE(dest, in[13]); - RECON_AND_STORE(dest, in[14]); - RECON_AND_STORE(dest, in[15]); - - dest += 8 - (stride * 16); + dest += 8; } } @@ -1483,23 +1439,23 @@ void vp9_idct16x16_1_add_sse2(const int16_t *input, uint8_t *dest, int stride) { dc_value = _mm_set1_epi16(a); for (i = 0; i < 2; ++i) { - RECON_AND_STORE(dest, dc_value); - RECON_AND_STORE(dest, dc_value); - RECON_AND_STORE(dest, dc_value); - RECON_AND_STORE(dest, dc_value); - RECON_AND_STORE(dest, dc_value); - RECON_AND_STORE(dest, dc_value); - RECON_AND_STORE(dest, dc_value); - RECON_AND_STORE(dest, dc_value); - RECON_AND_STORE(dest, dc_value); - RECON_AND_STORE(dest, dc_value); - RECON_AND_STORE(dest, dc_value); - RECON_AND_STORE(dest, dc_value); - RECON_AND_STORE(dest, dc_value); - RECON_AND_STORE(dest, dc_value); - RECON_AND_STORE(dest, dc_value); - RECON_AND_STORE(dest, dc_value); - dest += 8 - (stride * 16); + RECON_AND_STORE(dest + 0 * stride, dc_value); + RECON_AND_STORE(dest + 1 * stride, dc_value); + RECON_AND_STORE(dest + 2 * stride, dc_value); + RECON_AND_STORE(dest + 3 * stride, dc_value); + RECON_AND_STORE(dest + 4 * stride, dc_value); + RECON_AND_STORE(dest + 5 * stride, dc_value); + RECON_AND_STORE(dest + 6 * stride, dc_value); + RECON_AND_STORE(dest + 7 * stride, dc_value); + RECON_AND_STORE(dest + 8 * stride, dc_value); + RECON_AND_STORE(dest + 9 * stride, dc_value); + RECON_AND_STORE(dest + 10 * stride, dc_value); + RECON_AND_STORE(dest + 11 * stride, dc_value); + RECON_AND_STORE(dest + 12 * stride, dc_value); + RECON_AND_STORE(dest + 13 * stride, dc_value); + RECON_AND_STORE(dest + 14 * stride, dc_value); + RECON_AND_STORE(dest + 15 * stride, dc_value); + dest += 8; } } @@ -2367,7 +2323,7 @@ void vp9_iht16x16_256_add_sse2(const int16_t *input, uint8_t *dest, int stride, void vp9_idct16x16_10_add_sse2(const int16_t *input, uint8_t *dest, int stride) { const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); - const __m128i final_rounding = _mm_set1_epi16(1<<5); + const __m128i final_rounding = _mm_set1_epi16(1 << 5); const __m128i zero = _mm_setzero_si128(); const __m128i stg2_0 = pair_set_epi16(cospi_30_64, -cospi_2_64); @@ -2406,7 +2362,7 @@ void vp9_idct16x16_10_add_sse2(const int16_t *input, uint8_t *dest, // Stage2 { const __m128i lo_1_15 = _mm_unpackhi_epi16(in[0], zero); - const __m128i lo_13_3 = _mm_unpackhi_epi16(zero, in[1]); + const __m128i lo_13_3 = _mm_unpackhi_epi16(zero, in[1]); tmp0 = _mm_madd_epi16(lo_1_15, stg2_0); tmp2 = _mm_madd_epi16(lo_1_15, stg2_1); @@ -2567,7 +2523,8 @@ void vp9_idct16x16_10_add_sse2(const int16_t *input, uint8_t *dest, // Second 1-D inverse transform, performed per 8x16 block for (i = 0; i < 2; i++) { - array_transpose_4X8(l + 8*i, in); + int j; + array_transpose_4X8(l + 8 * i, in); IDCT16_10 @@ -2589,59 +2546,14 @@ void vp9_idct16x16_10_add_sse2(const int16_t *input, uint8_t *dest, in[14] = _mm_sub_epi16(stp2_1, stp1_14); in[15] = _mm_sub_epi16(stp2_0, stp1_15); - // Final rounding and shift - in[0] = _mm_adds_epi16(in[0], final_rounding); - in[1] = _mm_adds_epi16(in[1], final_rounding); - in[2] = _mm_adds_epi16(in[2], final_rounding); - in[3] = _mm_adds_epi16(in[3], final_rounding); - in[4] = _mm_adds_epi16(in[4], final_rounding); - in[5] = _mm_adds_epi16(in[5], final_rounding); - in[6] = _mm_adds_epi16(in[6], final_rounding); - in[7] = _mm_adds_epi16(in[7], final_rounding); - in[8] = _mm_adds_epi16(in[8], final_rounding); - in[9] = _mm_adds_epi16(in[9], final_rounding); - in[10] = _mm_adds_epi16(in[10], final_rounding); - in[11] = _mm_adds_epi16(in[11], final_rounding); - in[12] = _mm_adds_epi16(in[12], final_rounding); - in[13] = _mm_adds_epi16(in[13], final_rounding); - in[14] = _mm_adds_epi16(in[14], final_rounding); - in[15] = _mm_adds_epi16(in[15], final_rounding); + for (j = 0; j < 16; ++j) { + // Final rounding and shift + in[j] = _mm_adds_epi16(in[j], final_rounding); + in[j] = _mm_srai_epi16(in[j], 6); + RECON_AND_STORE(dest + j * stride, in[j]); + } - in[0] = _mm_srai_epi16(in[0], 6); - in[1] = _mm_srai_epi16(in[1], 6); - in[2] = _mm_srai_epi16(in[2], 6); - in[3] = _mm_srai_epi16(in[3], 6); - in[4] = _mm_srai_epi16(in[4], 6); - in[5] = _mm_srai_epi16(in[5], 6); - in[6] = _mm_srai_epi16(in[6], 6); - in[7] = _mm_srai_epi16(in[7], 6); - in[8] = _mm_srai_epi16(in[8], 6); - in[9] = _mm_srai_epi16(in[9], 6); - in[10] = _mm_srai_epi16(in[10], 6); - in[11] = _mm_srai_epi16(in[11], 6); - in[12] = _mm_srai_epi16(in[12], 6); - in[13] = _mm_srai_epi16(in[13], 6); - in[14] = _mm_srai_epi16(in[14], 6); - in[15] = _mm_srai_epi16(in[15], 6); - - RECON_AND_STORE(dest, in[0]); - RECON_AND_STORE(dest, in[1]); - RECON_AND_STORE(dest, in[2]); - RECON_AND_STORE(dest, in[3]); - RECON_AND_STORE(dest, in[4]); - RECON_AND_STORE(dest, in[5]); - RECON_AND_STORE(dest, in[6]); - RECON_AND_STORE(dest, in[7]); - RECON_AND_STORE(dest, in[8]); - RECON_AND_STORE(dest, in[9]); - RECON_AND_STORE(dest, in[10]); - RECON_AND_STORE(dest, in[11]); - RECON_AND_STORE(dest, in[12]); - RECON_AND_STORE(dest, in[13]); - RECON_AND_STORE(dest, in[14]); - RECON_AND_STORE(dest, in[15]); - - dest += 8 - (stride * 16); + dest += 8; } } @@ -3286,41 +3198,27 @@ void vp9_idct16x16_10_add_sse2(const int16_t *input, uint8_t *dest, // Only upper-left 8x8 has non-zero coeff void vp9_idct32x32_34_add_sse2(const int16_t *input, uint8_t *dest, - int stride) { + int stride) { const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); const __m128i final_rounding = _mm_set1_epi16(1<<5); // idct constants for each stage const __m128i stg1_0 = pair_set_epi16(cospi_31_64, -cospi_1_64); const __m128i stg1_1 = pair_set_epi16(cospi_1_64, cospi_31_64); - const __m128i stg1_2 = pair_set_epi16(cospi_15_64, -cospi_17_64); - const __m128i stg1_3 = pair_set_epi16(cospi_17_64, cospi_15_64); - const __m128i stg1_4 = pair_set_epi16(cospi_23_64, -cospi_9_64); - const __m128i stg1_5 = pair_set_epi16(cospi_9_64, cospi_23_64); const __m128i stg1_6 = pair_set_epi16(cospi_7_64, -cospi_25_64); const __m128i stg1_7 = pair_set_epi16(cospi_25_64, cospi_7_64); const __m128i stg1_8 = pair_set_epi16(cospi_27_64, -cospi_5_64); const __m128i stg1_9 = pair_set_epi16(cospi_5_64, cospi_27_64); - const __m128i stg1_10 = pair_set_epi16(cospi_11_64, -cospi_21_64); - const __m128i stg1_11 = pair_set_epi16(cospi_21_64, cospi_11_64); - const __m128i stg1_12 = pair_set_epi16(cospi_19_64, -cospi_13_64); - const __m128i stg1_13 = pair_set_epi16(cospi_13_64, cospi_19_64); const __m128i stg1_14 = pair_set_epi16(cospi_3_64, -cospi_29_64); const __m128i stg1_15 = pair_set_epi16(cospi_29_64, cospi_3_64); const __m128i stg2_0 = pair_set_epi16(cospi_30_64, -cospi_2_64); const __m128i stg2_1 = pair_set_epi16(cospi_2_64, cospi_30_64); - const __m128i stg2_2 = pair_set_epi16(cospi_14_64, -cospi_18_64); - const __m128i stg2_3 = pair_set_epi16(cospi_18_64, cospi_14_64); - const __m128i stg2_4 = pair_set_epi16(cospi_22_64, -cospi_10_64); - const __m128i stg2_5 = pair_set_epi16(cospi_10_64, cospi_22_64); const __m128i stg2_6 = pair_set_epi16(cospi_6_64, -cospi_26_64); const __m128i stg2_7 = pair_set_epi16(cospi_26_64, cospi_6_64); const __m128i stg3_0 = pair_set_epi16(cospi_28_64, -cospi_4_64); const __m128i stg3_1 = pair_set_epi16(cospi_4_64, cospi_28_64); - const __m128i stg3_2 = pair_set_epi16(cospi_12_64, -cospi_20_64); - const __m128i stg3_3 = pair_set_epi16(cospi_20_64, cospi_12_64); const __m128i stg3_4 = pair_set_epi16(-cospi_4_64, cospi_28_64); const __m128i stg3_5 = pair_set_epi16(cospi_28_64, cospi_4_64); const __m128i stg3_6 = pair_set_epi16(-cospi_28_64, -cospi_4_64); @@ -3330,8 +3228,6 @@ void vp9_idct32x32_34_add_sse2(const int16_t *input, uint8_t *dest, const __m128i stg4_0 = pair_set_epi16(cospi_16_64, cospi_16_64); const __m128i stg4_1 = pair_set_epi16(cospi_16_64, -cospi_16_64); - const __m128i stg4_2 = pair_set_epi16(cospi_24_64, -cospi_8_64); - const __m128i stg4_3 = pair_set_epi16(cospi_8_64, cospi_24_64); const __m128i stg4_4 = pair_set_epi16(-cospi_8_64, cospi_24_64); const __m128i stg4_5 = pair_set_epi16(cospi_24_64, cospi_8_64); const __m128i stg4_6 = pair_set_epi16(-cospi_24_64, -cospi_8_64); @@ -3351,47 +3247,29 @@ void vp9_idct32x32_34_add_sse2(const int16_t *input, uint8_t *dest, stp2_30, stp2_31; __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; int i; - // Load input data. - LOAD_DQCOEFF(in[0], input); - LOAD_DQCOEFF(in[8], input); - LOAD_DQCOEFF(in[16], input); - LOAD_DQCOEFF(in[24], input); - LOAD_DQCOEFF(in[1], input); - LOAD_DQCOEFF(in[9], input); - LOAD_DQCOEFF(in[17], input); - LOAD_DQCOEFF(in[25], input); - LOAD_DQCOEFF(in[2], input); - LOAD_DQCOEFF(in[10], input); - LOAD_DQCOEFF(in[18], input); - LOAD_DQCOEFF(in[26], input); - LOAD_DQCOEFF(in[3], input); - LOAD_DQCOEFF(in[11], input); - LOAD_DQCOEFF(in[19], input); - LOAD_DQCOEFF(in[27], input); - LOAD_DQCOEFF(in[4], input); - LOAD_DQCOEFF(in[12], input); - LOAD_DQCOEFF(in[20], input); - LOAD_DQCOEFF(in[28], input); - LOAD_DQCOEFF(in[5], input); - LOAD_DQCOEFF(in[13], input); - LOAD_DQCOEFF(in[21], input); - LOAD_DQCOEFF(in[29], input); - LOAD_DQCOEFF(in[6], input); - LOAD_DQCOEFF(in[14], input); - LOAD_DQCOEFF(in[22], input); - LOAD_DQCOEFF(in[30], input); - LOAD_DQCOEFF(in[7], input); - LOAD_DQCOEFF(in[15], input); - LOAD_DQCOEFF(in[23], input); - LOAD_DQCOEFF(in[31], input); + // Load input data. Only need to load the top left 8x8 block. + in[0] = _mm_load_si128((const __m128i *)input); + in[1] = _mm_load_si128((const __m128i *)(input + 32)); + in[2] = _mm_load_si128((const __m128i *)(input + 64)); + in[3] = _mm_load_si128((const __m128i *)(input + 96)); + in[4] = _mm_load_si128((const __m128i *)(input + 128)); + in[5] = _mm_load_si128((const __m128i *)(input + 160)); + in[6] = _mm_load_si128((const __m128i *)(input + 192)); + in[7] = _mm_load_si128((const __m128i *)(input + 224)); + + for (i = 8; i < 32; ++i) { + in[i] = _mm_setzero_si128(); + } array_transpose_8x8(in, in); - array_transpose_8x8(in+8, in+8); - array_transpose_8x8(in+16, in+16); - array_transpose_8x8(in+24, in+24); + // TODO(hkuang): Following transposes are unnecessary. But remove them will + // lead to performance drop on some devices. + array_transpose_8x8(in + 8, in + 8); + array_transpose_8x8(in + 16, in + 16); + array_transpose_8x8(in + 24, in + 24); - IDCT32 + IDCT32_34 // 1_D: Store 32 intermediate results for each 8x32 block. col[0] = _mm_add_epi16(stp1_0, stp1_31); @@ -3427,153 +3305,61 @@ void vp9_idct32x32_34_add_sse2(const int16_t *input, uint8_t *dest, col[30] = _mm_sub_epi16(stp1_1, stp1_30); col[31] = _mm_sub_epi16(stp1_0, stp1_31); for (i = 0; i < 4; i++) { - const __m128i zero = _mm_setzero_si128(); - // Transpose 32x8 block to 8x32 block - array_transpose_8x8(col+i*8, in); - IDCT32_34 + int j; + const __m128i zero = _mm_setzero_si128(); + // Transpose 32x8 block to 8x32 block + array_transpose_8x8(col + i * 8, in); + IDCT32_34 - // 2_D: Calculate the results and store them to destination. - in[0] = _mm_add_epi16(stp1_0, stp1_31); - in[1] = _mm_add_epi16(stp1_1, stp1_30); - in[2] = _mm_add_epi16(stp1_2, stp1_29); - in[3] = _mm_add_epi16(stp1_3, stp1_28); - in[4] = _mm_add_epi16(stp1_4, stp1_27); - in[5] = _mm_add_epi16(stp1_5, stp1_26); - in[6] = _mm_add_epi16(stp1_6, stp1_25); - in[7] = _mm_add_epi16(stp1_7, stp1_24); - in[8] = _mm_add_epi16(stp1_8, stp1_23); - in[9] = _mm_add_epi16(stp1_9, stp1_22); - in[10] = _mm_add_epi16(stp1_10, stp1_21); - in[11] = _mm_add_epi16(stp1_11, stp1_20); - in[12] = _mm_add_epi16(stp1_12, stp1_19); - in[13] = _mm_add_epi16(stp1_13, stp1_18); - in[14] = _mm_add_epi16(stp1_14, stp1_17); - in[15] = _mm_add_epi16(stp1_15, stp1_16); - in[16] = _mm_sub_epi16(stp1_15, stp1_16); - in[17] = _mm_sub_epi16(stp1_14, stp1_17); - in[18] = _mm_sub_epi16(stp1_13, stp1_18); - in[19] = _mm_sub_epi16(stp1_12, stp1_19); - in[20] = _mm_sub_epi16(stp1_11, stp1_20); - in[21] = _mm_sub_epi16(stp1_10, stp1_21); - in[22] = _mm_sub_epi16(stp1_9, stp1_22); - in[23] = _mm_sub_epi16(stp1_8, stp1_23); - in[24] = _mm_sub_epi16(stp1_7, stp1_24); - in[25] = _mm_sub_epi16(stp1_6, stp1_25); - in[26] = _mm_sub_epi16(stp1_5, stp1_26); - in[27] = _mm_sub_epi16(stp1_4, stp1_27); - in[28] = _mm_sub_epi16(stp1_3, stp1_28); - in[29] = _mm_sub_epi16(stp1_2, stp1_29); - in[30] = _mm_sub_epi16(stp1_1, stp1_30); - in[31] = _mm_sub_epi16(stp1_0, stp1_31); + // 2_D: Calculate the results and store them to destination. + in[0] = _mm_add_epi16(stp1_0, stp1_31); + in[1] = _mm_add_epi16(stp1_1, stp1_30); + in[2] = _mm_add_epi16(stp1_2, stp1_29); + in[3] = _mm_add_epi16(stp1_3, stp1_28); + in[4] = _mm_add_epi16(stp1_4, stp1_27); + in[5] = _mm_add_epi16(stp1_5, stp1_26); + in[6] = _mm_add_epi16(stp1_6, stp1_25); + in[7] = _mm_add_epi16(stp1_7, stp1_24); + in[8] = _mm_add_epi16(stp1_8, stp1_23); + in[9] = _mm_add_epi16(stp1_9, stp1_22); + in[10] = _mm_add_epi16(stp1_10, stp1_21); + in[11] = _mm_add_epi16(stp1_11, stp1_20); + in[12] = _mm_add_epi16(stp1_12, stp1_19); + in[13] = _mm_add_epi16(stp1_13, stp1_18); + in[14] = _mm_add_epi16(stp1_14, stp1_17); + in[15] = _mm_add_epi16(stp1_15, stp1_16); + in[16] = _mm_sub_epi16(stp1_15, stp1_16); + in[17] = _mm_sub_epi16(stp1_14, stp1_17); + in[18] = _mm_sub_epi16(stp1_13, stp1_18); + in[19] = _mm_sub_epi16(stp1_12, stp1_19); + in[20] = _mm_sub_epi16(stp1_11, stp1_20); + in[21] = _mm_sub_epi16(stp1_10, stp1_21); + in[22] = _mm_sub_epi16(stp1_9, stp1_22); + in[23] = _mm_sub_epi16(stp1_8, stp1_23); + in[24] = _mm_sub_epi16(stp1_7, stp1_24); + in[25] = _mm_sub_epi16(stp1_6, stp1_25); + in[26] = _mm_sub_epi16(stp1_5, stp1_26); + in[27] = _mm_sub_epi16(stp1_4, stp1_27); + in[28] = _mm_sub_epi16(stp1_3, stp1_28); + in[29] = _mm_sub_epi16(stp1_2, stp1_29); + in[30] = _mm_sub_epi16(stp1_1, stp1_30); + in[31] = _mm_sub_epi16(stp1_0, stp1_31); + for (j = 0; j < 32; ++j) { // Final rounding and shift - in[0] = _mm_adds_epi16(in[0], final_rounding); - in[1] = _mm_adds_epi16(in[1], final_rounding); - in[2] = _mm_adds_epi16(in[2], final_rounding); - in[3] = _mm_adds_epi16(in[3], final_rounding); - in[4] = _mm_adds_epi16(in[4], final_rounding); - in[5] = _mm_adds_epi16(in[5], final_rounding); - in[6] = _mm_adds_epi16(in[6], final_rounding); - in[7] = _mm_adds_epi16(in[7], final_rounding); - in[8] = _mm_adds_epi16(in[8], final_rounding); - in[9] = _mm_adds_epi16(in[9], final_rounding); - in[10] = _mm_adds_epi16(in[10], final_rounding); - in[11] = _mm_adds_epi16(in[11], final_rounding); - in[12] = _mm_adds_epi16(in[12], final_rounding); - in[13] = _mm_adds_epi16(in[13], final_rounding); - in[14] = _mm_adds_epi16(in[14], final_rounding); - in[15] = _mm_adds_epi16(in[15], final_rounding); - in[16] = _mm_adds_epi16(in[16], final_rounding); - in[17] = _mm_adds_epi16(in[17], final_rounding); - in[18] = _mm_adds_epi16(in[18], final_rounding); - in[19] = _mm_adds_epi16(in[19], final_rounding); - in[20] = _mm_adds_epi16(in[20], final_rounding); - in[21] = _mm_adds_epi16(in[21], final_rounding); - in[22] = _mm_adds_epi16(in[22], final_rounding); - in[23] = _mm_adds_epi16(in[23], final_rounding); - in[24] = _mm_adds_epi16(in[24], final_rounding); - in[25] = _mm_adds_epi16(in[25], final_rounding); - in[26] = _mm_adds_epi16(in[26], final_rounding); - in[27] = _mm_adds_epi16(in[27], final_rounding); - in[28] = _mm_adds_epi16(in[28], final_rounding); - in[29] = _mm_adds_epi16(in[29], final_rounding); - in[30] = _mm_adds_epi16(in[30], final_rounding); - in[31] = _mm_adds_epi16(in[31], final_rounding); - - in[0] = _mm_srai_epi16(in[0], 6); - in[1] = _mm_srai_epi16(in[1], 6); - in[2] = _mm_srai_epi16(in[2], 6); - in[3] = _mm_srai_epi16(in[3], 6); - in[4] = _mm_srai_epi16(in[4], 6); - in[5] = _mm_srai_epi16(in[5], 6); - in[6] = _mm_srai_epi16(in[6], 6); - in[7] = _mm_srai_epi16(in[7], 6); - in[8] = _mm_srai_epi16(in[8], 6); - in[9] = _mm_srai_epi16(in[9], 6); - in[10] = _mm_srai_epi16(in[10], 6); - in[11] = _mm_srai_epi16(in[11], 6); - in[12] = _mm_srai_epi16(in[12], 6); - in[13] = _mm_srai_epi16(in[13], 6); - in[14] = _mm_srai_epi16(in[14], 6); - in[15] = _mm_srai_epi16(in[15], 6); - in[16] = _mm_srai_epi16(in[16], 6); - in[17] = _mm_srai_epi16(in[17], 6); - in[18] = _mm_srai_epi16(in[18], 6); - in[19] = _mm_srai_epi16(in[19], 6); - in[20] = _mm_srai_epi16(in[20], 6); - in[21] = _mm_srai_epi16(in[21], 6); - in[22] = _mm_srai_epi16(in[22], 6); - in[23] = _mm_srai_epi16(in[23], 6); - in[24] = _mm_srai_epi16(in[24], 6); - in[25] = _mm_srai_epi16(in[25], 6); - in[26] = _mm_srai_epi16(in[26], 6); - in[27] = _mm_srai_epi16(in[27], 6); - in[28] = _mm_srai_epi16(in[28], 6); - in[29] = _mm_srai_epi16(in[29], 6); - in[30] = _mm_srai_epi16(in[30], 6); - in[31] = _mm_srai_epi16(in[31], 6); - - RECON_AND_STORE(dest, in[0]); - RECON_AND_STORE(dest, in[1]); - RECON_AND_STORE(dest, in[2]); - RECON_AND_STORE(dest, in[3]); - RECON_AND_STORE(dest, in[4]); - RECON_AND_STORE(dest, in[5]); - RECON_AND_STORE(dest, in[6]); - RECON_AND_STORE(dest, in[7]); - RECON_AND_STORE(dest, in[8]); - RECON_AND_STORE(dest, in[9]); - RECON_AND_STORE(dest, in[10]); - RECON_AND_STORE(dest, in[11]); - RECON_AND_STORE(dest, in[12]); - RECON_AND_STORE(dest, in[13]); - RECON_AND_STORE(dest, in[14]); - RECON_AND_STORE(dest, in[15]); - RECON_AND_STORE(dest, in[16]); - RECON_AND_STORE(dest, in[17]); - RECON_AND_STORE(dest, in[18]); - RECON_AND_STORE(dest, in[19]); - RECON_AND_STORE(dest, in[20]); - RECON_AND_STORE(dest, in[21]); - RECON_AND_STORE(dest, in[22]); - RECON_AND_STORE(dest, in[23]); - RECON_AND_STORE(dest, in[24]); - RECON_AND_STORE(dest, in[25]); - RECON_AND_STORE(dest, in[26]); - RECON_AND_STORE(dest, in[27]); - RECON_AND_STORE(dest, in[28]); - RECON_AND_STORE(dest, in[29]); - RECON_AND_STORE(dest, in[30]); - RECON_AND_STORE(dest, in[31]); - - dest += 8 - (stride * 32); + in[j] = _mm_adds_epi16(in[j], final_rounding); + in[j] = _mm_srai_epi16(in[j], 6); + RECON_AND_STORE(dest + j * stride, in[j]); } + + dest += 8; } +} void vp9_idct32x32_1024_add_sse2(const int16_t *input, uint8_t *dest, int stride) { const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); - const __m128i final_rounding = _mm_set1_epi16(1<<5); + const __m128i final_rounding = _mm_set1_epi16(1 << 5); const __m128i zero = _mm_setzero_si128(); // idct constants for each stage @@ -3640,304 +3426,211 @@ void vp9_idct32x32_1024_add_sse2(const int16_t *input, uint8_t *dest, for (i = 0; i < 4; i++) { i32 = (i << 5); - // First 1-D idct - // Load input data. - LOAD_DQCOEFF(in[0], input); - LOAD_DQCOEFF(in[8], input); - LOAD_DQCOEFF(in[16], input); - LOAD_DQCOEFF(in[24], input); - LOAD_DQCOEFF(in[1], input); - LOAD_DQCOEFF(in[9], input); - LOAD_DQCOEFF(in[17], input); - LOAD_DQCOEFF(in[25], input); - LOAD_DQCOEFF(in[2], input); - LOAD_DQCOEFF(in[10], input); - LOAD_DQCOEFF(in[18], input); - LOAD_DQCOEFF(in[26], input); - LOAD_DQCOEFF(in[3], input); - LOAD_DQCOEFF(in[11], input); - LOAD_DQCOEFF(in[19], input); - LOAD_DQCOEFF(in[27], input); + // First 1-D idct + // Load input data. + LOAD_DQCOEFF(in[0], input); + LOAD_DQCOEFF(in[8], input); + LOAD_DQCOEFF(in[16], input); + LOAD_DQCOEFF(in[24], input); + LOAD_DQCOEFF(in[1], input); + LOAD_DQCOEFF(in[9], input); + LOAD_DQCOEFF(in[17], input); + LOAD_DQCOEFF(in[25], input); + LOAD_DQCOEFF(in[2], input); + LOAD_DQCOEFF(in[10], input); + LOAD_DQCOEFF(in[18], input); + LOAD_DQCOEFF(in[26], input); + LOAD_DQCOEFF(in[3], input); + LOAD_DQCOEFF(in[11], input); + LOAD_DQCOEFF(in[19], input); + LOAD_DQCOEFF(in[27], input); - LOAD_DQCOEFF(in[4], input); - LOAD_DQCOEFF(in[12], input); - LOAD_DQCOEFF(in[20], input); - LOAD_DQCOEFF(in[28], input); - LOAD_DQCOEFF(in[5], input); - LOAD_DQCOEFF(in[13], input); - LOAD_DQCOEFF(in[21], input); - LOAD_DQCOEFF(in[29], input); - LOAD_DQCOEFF(in[6], input); - LOAD_DQCOEFF(in[14], input); - LOAD_DQCOEFF(in[22], input); - LOAD_DQCOEFF(in[30], input); - LOAD_DQCOEFF(in[7], input); - LOAD_DQCOEFF(in[15], input); - LOAD_DQCOEFF(in[23], input); - LOAD_DQCOEFF(in[31], input); + LOAD_DQCOEFF(in[4], input); + LOAD_DQCOEFF(in[12], input); + LOAD_DQCOEFF(in[20], input); + LOAD_DQCOEFF(in[28], input); + LOAD_DQCOEFF(in[5], input); + LOAD_DQCOEFF(in[13], input); + LOAD_DQCOEFF(in[21], input); + LOAD_DQCOEFF(in[29], input); + LOAD_DQCOEFF(in[6], input); + LOAD_DQCOEFF(in[14], input); + LOAD_DQCOEFF(in[22], input); + LOAD_DQCOEFF(in[30], input); + LOAD_DQCOEFF(in[7], input); + LOAD_DQCOEFF(in[15], input); + LOAD_DQCOEFF(in[23], input); + LOAD_DQCOEFF(in[31], input); - // checking if all entries are zero - zero_idx[0] = _mm_or_si128(in[0], in[1]); - zero_idx[1] = _mm_or_si128(in[2], in[3]); - zero_idx[2] = _mm_or_si128(in[4], in[5]); - zero_idx[3] = _mm_or_si128(in[6], in[7]); - zero_idx[4] = _mm_or_si128(in[8], in[9]); - zero_idx[5] = _mm_or_si128(in[10], in[11]); - zero_idx[6] = _mm_or_si128(in[12], in[13]); - zero_idx[7] = _mm_or_si128(in[14], in[15]); - zero_idx[8] = _mm_or_si128(in[16], in[17]); - zero_idx[9] = _mm_or_si128(in[18], in[19]); - zero_idx[10] = _mm_or_si128(in[20], in[21]); - zero_idx[11] = _mm_or_si128(in[22], in[23]); - zero_idx[12] = _mm_or_si128(in[24], in[25]); - zero_idx[13] = _mm_or_si128(in[26], in[27]); - zero_idx[14] = _mm_or_si128(in[28], in[29]); - zero_idx[15] = _mm_or_si128(in[30], in[31]); + // checking if all entries are zero + zero_idx[0] = _mm_or_si128(in[0], in[1]); + zero_idx[1] = _mm_or_si128(in[2], in[3]); + zero_idx[2] = _mm_or_si128(in[4], in[5]); + zero_idx[3] = _mm_or_si128(in[6], in[7]); + zero_idx[4] = _mm_or_si128(in[8], in[9]); + zero_idx[5] = _mm_or_si128(in[10], in[11]); + zero_idx[6] = _mm_or_si128(in[12], in[13]); + zero_idx[7] = _mm_or_si128(in[14], in[15]); + zero_idx[8] = _mm_or_si128(in[16], in[17]); + zero_idx[9] = _mm_or_si128(in[18], in[19]); + zero_idx[10] = _mm_or_si128(in[20], in[21]); + zero_idx[11] = _mm_or_si128(in[22], in[23]); + zero_idx[12] = _mm_or_si128(in[24], in[25]); + zero_idx[13] = _mm_or_si128(in[26], in[27]); + zero_idx[14] = _mm_or_si128(in[28], in[29]); + zero_idx[15] = _mm_or_si128(in[30], in[31]); - zero_idx[0] = _mm_or_si128(zero_idx[0], zero_idx[1]); - zero_idx[1] = _mm_or_si128(zero_idx[2], zero_idx[3]); - zero_idx[2] = _mm_or_si128(zero_idx[4], zero_idx[5]); - zero_idx[3] = _mm_or_si128(zero_idx[6], zero_idx[7]); - zero_idx[4] = _mm_or_si128(zero_idx[8], zero_idx[9]); - zero_idx[5] = _mm_or_si128(zero_idx[10], zero_idx[11]); - zero_idx[6] = _mm_or_si128(zero_idx[12], zero_idx[13]); - zero_idx[7] = _mm_or_si128(zero_idx[14], zero_idx[15]); + zero_idx[0] = _mm_or_si128(zero_idx[0], zero_idx[1]); + zero_idx[1] = _mm_or_si128(zero_idx[2], zero_idx[3]); + zero_idx[2] = _mm_or_si128(zero_idx[4], zero_idx[5]); + zero_idx[3] = _mm_or_si128(zero_idx[6], zero_idx[7]); + zero_idx[4] = _mm_or_si128(zero_idx[8], zero_idx[9]); + zero_idx[5] = _mm_or_si128(zero_idx[10], zero_idx[11]); + zero_idx[6] = _mm_or_si128(zero_idx[12], zero_idx[13]); + zero_idx[7] = _mm_or_si128(zero_idx[14], zero_idx[15]); - zero_idx[8] = _mm_or_si128(zero_idx[0], zero_idx[1]); - zero_idx[9] = _mm_or_si128(zero_idx[2], zero_idx[3]); - zero_idx[10] = _mm_or_si128(zero_idx[4], zero_idx[5]); - zero_idx[11] = _mm_or_si128(zero_idx[6], zero_idx[7]); - zero_idx[12] = _mm_or_si128(zero_idx[8], zero_idx[9]); - zero_idx[13] = _mm_or_si128(zero_idx[10], zero_idx[11]); - zero_idx[14] = _mm_or_si128(zero_idx[12], zero_idx[13]); + zero_idx[8] = _mm_or_si128(zero_idx[0], zero_idx[1]); + zero_idx[9] = _mm_or_si128(zero_idx[2], zero_idx[3]); + zero_idx[10] = _mm_or_si128(zero_idx[4], zero_idx[5]); + zero_idx[11] = _mm_or_si128(zero_idx[6], zero_idx[7]); + zero_idx[12] = _mm_or_si128(zero_idx[8], zero_idx[9]); + zero_idx[13] = _mm_or_si128(zero_idx[10], zero_idx[11]); + zero_idx[14] = _mm_or_si128(zero_idx[12], zero_idx[13]); - if (_mm_movemask_epi8(_mm_cmpeq_epi32(zero_idx[14], zero)) == 0xFFFF) { - col[i32 + 0] = _mm_setzero_si128(); - col[i32 + 1] = _mm_setzero_si128(); - col[i32 + 2] = _mm_setzero_si128(); - col[i32 + 3] = _mm_setzero_si128(); - col[i32 + 4] = _mm_setzero_si128(); - col[i32 + 5] = _mm_setzero_si128(); - col[i32 + 6] = _mm_setzero_si128(); - col[i32 + 7] = _mm_setzero_si128(); - col[i32 + 8] = _mm_setzero_si128(); - col[i32 + 9] = _mm_setzero_si128(); - col[i32 + 10] = _mm_setzero_si128(); - col[i32 + 11] = _mm_setzero_si128(); - col[i32 + 12] = _mm_setzero_si128(); - col[i32 + 13] = _mm_setzero_si128(); - col[i32 + 14] = _mm_setzero_si128(); - col[i32 + 15] = _mm_setzero_si128(); - col[i32 + 16] = _mm_setzero_si128(); - col[i32 + 17] = _mm_setzero_si128(); - col[i32 + 18] = _mm_setzero_si128(); - col[i32 + 19] = _mm_setzero_si128(); - col[i32 + 20] = _mm_setzero_si128(); - col[i32 + 21] = _mm_setzero_si128(); - col[i32 + 22] = _mm_setzero_si128(); - col[i32 + 23] = _mm_setzero_si128(); - col[i32 + 24] = _mm_setzero_si128(); - col[i32 + 25] = _mm_setzero_si128(); - col[i32 + 26] = _mm_setzero_si128(); - col[i32 + 27] = _mm_setzero_si128(); - col[i32 + 28] = _mm_setzero_si128(); - col[i32 + 29] = _mm_setzero_si128(); - col[i32 + 30] = _mm_setzero_si128(); - col[i32 + 31] = _mm_setzero_si128(); - continue; - } - - // Transpose 32x8 block to 8x32 block - array_transpose_8x8(in, in); - array_transpose_8x8(in+8, in+8); - array_transpose_8x8(in+16, in+16); - array_transpose_8x8(in+24, in+24); - - IDCT32 - - // 1_D: Store 32 intermediate results for each 8x32 block. - col[i32 + 0] = _mm_add_epi16(stp1_0, stp1_31); - col[i32 + 1] = _mm_add_epi16(stp1_1, stp1_30); - col[i32 + 2] = _mm_add_epi16(stp1_2, stp1_29); - col[i32 + 3] = _mm_add_epi16(stp1_3, stp1_28); - col[i32 + 4] = _mm_add_epi16(stp1_4, stp1_27); - col[i32 + 5] = _mm_add_epi16(stp1_5, stp1_26); - col[i32 + 6] = _mm_add_epi16(stp1_6, stp1_25); - col[i32 + 7] = _mm_add_epi16(stp1_7, stp1_24); - col[i32 + 8] = _mm_add_epi16(stp1_8, stp1_23); - col[i32 + 9] = _mm_add_epi16(stp1_9, stp1_22); - col[i32 + 10] = _mm_add_epi16(stp1_10, stp1_21); - col[i32 + 11] = _mm_add_epi16(stp1_11, stp1_20); - col[i32 + 12] = _mm_add_epi16(stp1_12, stp1_19); - col[i32 + 13] = _mm_add_epi16(stp1_13, stp1_18); - col[i32 + 14] = _mm_add_epi16(stp1_14, stp1_17); - col[i32 + 15] = _mm_add_epi16(stp1_15, stp1_16); - col[i32 + 16] = _mm_sub_epi16(stp1_15, stp1_16); - col[i32 + 17] = _mm_sub_epi16(stp1_14, stp1_17); - col[i32 + 18] = _mm_sub_epi16(stp1_13, stp1_18); - col[i32 + 19] = _mm_sub_epi16(stp1_12, stp1_19); - col[i32 + 20] = _mm_sub_epi16(stp1_11, stp1_20); - col[i32 + 21] = _mm_sub_epi16(stp1_10, stp1_21); - col[i32 + 22] = _mm_sub_epi16(stp1_9, stp1_22); - col[i32 + 23] = _mm_sub_epi16(stp1_8, stp1_23); - col[i32 + 24] = _mm_sub_epi16(stp1_7, stp1_24); - col[i32 + 25] = _mm_sub_epi16(stp1_6, stp1_25); - col[i32 + 26] = _mm_sub_epi16(stp1_5, stp1_26); - col[i32 + 27] = _mm_sub_epi16(stp1_4, stp1_27); - col[i32 + 28] = _mm_sub_epi16(stp1_3, stp1_28); - col[i32 + 29] = _mm_sub_epi16(stp1_2, stp1_29); - col[i32 + 30] = _mm_sub_epi16(stp1_1, stp1_30); - col[i32 + 31] = _mm_sub_epi16(stp1_0, stp1_31); + if (_mm_movemask_epi8(_mm_cmpeq_epi32(zero_idx[14], zero)) == 0xFFFF) { + col[i32 + 0] = _mm_setzero_si128(); + col[i32 + 1] = _mm_setzero_si128(); + col[i32 + 2] = _mm_setzero_si128(); + col[i32 + 3] = _mm_setzero_si128(); + col[i32 + 4] = _mm_setzero_si128(); + col[i32 + 5] = _mm_setzero_si128(); + col[i32 + 6] = _mm_setzero_si128(); + col[i32 + 7] = _mm_setzero_si128(); + col[i32 + 8] = _mm_setzero_si128(); + col[i32 + 9] = _mm_setzero_si128(); + col[i32 + 10] = _mm_setzero_si128(); + col[i32 + 11] = _mm_setzero_si128(); + col[i32 + 12] = _mm_setzero_si128(); + col[i32 + 13] = _mm_setzero_si128(); + col[i32 + 14] = _mm_setzero_si128(); + col[i32 + 15] = _mm_setzero_si128(); + col[i32 + 16] = _mm_setzero_si128(); + col[i32 + 17] = _mm_setzero_si128(); + col[i32 + 18] = _mm_setzero_si128(); + col[i32 + 19] = _mm_setzero_si128(); + col[i32 + 20] = _mm_setzero_si128(); + col[i32 + 21] = _mm_setzero_si128(); + col[i32 + 22] = _mm_setzero_si128(); + col[i32 + 23] = _mm_setzero_si128(); + col[i32 + 24] = _mm_setzero_si128(); + col[i32 + 25] = _mm_setzero_si128(); + col[i32 + 26] = _mm_setzero_si128(); + col[i32 + 27] = _mm_setzero_si128(); + col[i32 + 28] = _mm_setzero_si128(); + col[i32 + 29] = _mm_setzero_si128(); + col[i32 + 30] = _mm_setzero_si128(); + col[i32 + 31] = _mm_setzero_si128(); + continue; } + + // Transpose 32x8 block to 8x32 block + array_transpose_8x8(in, in); + array_transpose_8x8(in + 8, in + 8); + array_transpose_8x8(in + 16, in + 16); + array_transpose_8x8(in + 24, in + 24); + + IDCT32 + + // 1_D: Store 32 intermediate results for each 8x32 block. + col[i32 + 0] = _mm_add_epi16(stp1_0, stp1_31); + col[i32 + 1] = _mm_add_epi16(stp1_1, stp1_30); + col[i32 + 2] = _mm_add_epi16(stp1_2, stp1_29); + col[i32 + 3] = _mm_add_epi16(stp1_3, stp1_28); + col[i32 + 4] = _mm_add_epi16(stp1_4, stp1_27); + col[i32 + 5] = _mm_add_epi16(stp1_5, stp1_26); + col[i32 + 6] = _mm_add_epi16(stp1_6, stp1_25); + col[i32 + 7] = _mm_add_epi16(stp1_7, stp1_24); + col[i32 + 8] = _mm_add_epi16(stp1_8, stp1_23); + col[i32 + 9] = _mm_add_epi16(stp1_9, stp1_22); + col[i32 + 10] = _mm_add_epi16(stp1_10, stp1_21); + col[i32 + 11] = _mm_add_epi16(stp1_11, stp1_20); + col[i32 + 12] = _mm_add_epi16(stp1_12, stp1_19); + col[i32 + 13] = _mm_add_epi16(stp1_13, stp1_18); + col[i32 + 14] = _mm_add_epi16(stp1_14, stp1_17); + col[i32 + 15] = _mm_add_epi16(stp1_15, stp1_16); + col[i32 + 16] = _mm_sub_epi16(stp1_15, stp1_16); + col[i32 + 17] = _mm_sub_epi16(stp1_14, stp1_17); + col[i32 + 18] = _mm_sub_epi16(stp1_13, stp1_18); + col[i32 + 19] = _mm_sub_epi16(stp1_12, stp1_19); + col[i32 + 20] = _mm_sub_epi16(stp1_11, stp1_20); + col[i32 + 21] = _mm_sub_epi16(stp1_10, stp1_21); + col[i32 + 22] = _mm_sub_epi16(stp1_9, stp1_22); + col[i32 + 23] = _mm_sub_epi16(stp1_8, stp1_23); + col[i32 + 24] = _mm_sub_epi16(stp1_7, stp1_24); + col[i32 + 25] = _mm_sub_epi16(stp1_6, stp1_25); + col[i32 + 26] = _mm_sub_epi16(stp1_5, stp1_26); + col[i32 + 27] = _mm_sub_epi16(stp1_4, stp1_27); + col[i32 + 28] = _mm_sub_epi16(stp1_3, stp1_28); + col[i32 + 29] = _mm_sub_epi16(stp1_2, stp1_29); + col[i32 + 30] = _mm_sub_epi16(stp1_1, stp1_30); + col[i32 + 31] = _mm_sub_epi16(stp1_0, stp1_31); + } for (i = 0; i < 4; i++) { - // Second 1-D idct - j = i << 3; + // Second 1-D idct + j = i << 3; - // Transpose 32x8 block to 8x32 block - array_transpose_8x8(col+j, in); - array_transpose_8x8(col+j+32, in+8); - array_transpose_8x8(col+j+64, in+16); - array_transpose_8x8(col+j+96, in+24); + // Transpose 32x8 block to 8x32 block + array_transpose_8x8(col + j, in); + array_transpose_8x8(col + j + 32, in + 8); + array_transpose_8x8(col + j + 64, in + 16); + array_transpose_8x8(col + j + 96, in + 24); - IDCT32 + IDCT32 - // 2_D: Calculate the results and store them to destination. - in[0] = _mm_add_epi16(stp1_0, stp1_31); - in[1] = _mm_add_epi16(stp1_1, stp1_30); - in[2] = _mm_add_epi16(stp1_2, stp1_29); - in[3] = _mm_add_epi16(stp1_3, stp1_28); - in[4] = _mm_add_epi16(stp1_4, stp1_27); - in[5] = _mm_add_epi16(stp1_5, stp1_26); - in[6] = _mm_add_epi16(stp1_6, stp1_25); - in[7] = _mm_add_epi16(stp1_7, stp1_24); - in[8] = _mm_add_epi16(stp1_8, stp1_23); - in[9] = _mm_add_epi16(stp1_9, stp1_22); - in[10] = _mm_add_epi16(stp1_10, stp1_21); - in[11] = _mm_add_epi16(stp1_11, stp1_20); - in[12] = _mm_add_epi16(stp1_12, stp1_19); - in[13] = _mm_add_epi16(stp1_13, stp1_18); - in[14] = _mm_add_epi16(stp1_14, stp1_17); - in[15] = _mm_add_epi16(stp1_15, stp1_16); - in[16] = _mm_sub_epi16(stp1_15, stp1_16); - in[17] = _mm_sub_epi16(stp1_14, stp1_17); - in[18] = _mm_sub_epi16(stp1_13, stp1_18); - in[19] = _mm_sub_epi16(stp1_12, stp1_19); - in[20] = _mm_sub_epi16(stp1_11, stp1_20); - in[21] = _mm_sub_epi16(stp1_10, stp1_21); - in[22] = _mm_sub_epi16(stp1_9, stp1_22); - in[23] = _mm_sub_epi16(stp1_8, stp1_23); - in[24] = _mm_sub_epi16(stp1_7, stp1_24); - in[25] = _mm_sub_epi16(stp1_6, stp1_25); - in[26] = _mm_sub_epi16(stp1_5, stp1_26); - in[27] = _mm_sub_epi16(stp1_4, stp1_27); - in[28] = _mm_sub_epi16(stp1_3, stp1_28); - in[29] = _mm_sub_epi16(stp1_2, stp1_29); - in[30] = _mm_sub_epi16(stp1_1, stp1_30); - in[31] = _mm_sub_epi16(stp1_0, stp1_31); + // 2_D: Calculate the results and store them to destination. + in[0] = _mm_add_epi16(stp1_0, stp1_31); + in[1] = _mm_add_epi16(stp1_1, stp1_30); + in[2] = _mm_add_epi16(stp1_2, stp1_29); + in[3] = _mm_add_epi16(stp1_3, stp1_28); + in[4] = _mm_add_epi16(stp1_4, stp1_27); + in[5] = _mm_add_epi16(stp1_5, stp1_26); + in[6] = _mm_add_epi16(stp1_6, stp1_25); + in[7] = _mm_add_epi16(stp1_7, stp1_24); + in[8] = _mm_add_epi16(stp1_8, stp1_23); + in[9] = _mm_add_epi16(stp1_9, stp1_22); + in[10] = _mm_add_epi16(stp1_10, stp1_21); + in[11] = _mm_add_epi16(stp1_11, stp1_20); + in[12] = _mm_add_epi16(stp1_12, stp1_19); + in[13] = _mm_add_epi16(stp1_13, stp1_18); + in[14] = _mm_add_epi16(stp1_14, stp1_17); + in[15] = _mm_add_epi16(stp1_15, stp1_16); + in[16] = _mm_sub_epi16(stp1_15, stp1_16); + in[17] = _mm_sub_epi16(stp1_14, stp1_17); + in[18] = _mm_sub_epi16(stp1_13, stp1_18); + in[19] = _mm_sub_epi16(stp1_12, stp1_19); + in[20] = _mm_sub_epi16(stp1_11, stp1_20); + in[21] = _mm_sub_epi16(stp1_10, stp1_21); + in[22] = _mm_sub_epi16(stp1_9, stp1_22); + in[23] = _mm_sub_epi16(stp1_8, stp1_23); + in[24] = _mm_sub_epi16(stp1_7, stp1_24); + in[25] = _mm_sub_epi16(stp1_6, stp1_25); + in[26] = _mm_sub_epi16(stp1_5, stp1_26); + in[27] = _mm_sub_epi16(stp1_4, stp1_27); + in[28] = _mm_sub_epi16(stp1_3, stp1_28); + in[29] = _mm_sub_epi16(stp1_2, stp1_29); + in[30] = _mm_sub_epi16(stp1_1, stp1_30); + in[31] = _mm_sub_epi16(stp1_0, stp1_31); + for (j = 0; j < 32; ++j) { // Final rounding and shift - in[0] = _mm_adds_epi16(in[0], final_rounding); - in[1] = _mm_adds_epi16(in[1], final_rounding); - in[2] = _mm_adds_epi16(in[2], final_rounding); - in[3] = _mm_adds_epi16(in[3], final_rounding); - in[4] = _mm_adds_epi16(in[4], final_rounding); - in[5] = _mm_adds_epi16(in[5], final_rounding); - in[6] = _mm_adds_epi16(in[6], final_rounding); - in[7] = _mm_adds_epi16(in[7], final_rounding); - in[8] = _mm_adds_epi16(in[8], final_rounding); - in[9] = _mm_adds_epi16(in[9], final_rounding); - in[10] = _mm_adds_epi16(in[10], final_rounding); - in[11] = _mm_adds_epi16(in[11], final_rounding); - in[12] = _mm_adds_epi16(in[12], final_rounding); - in[13] = _mm_adds_epi16(in[13], final_rounding); - in[14] = _mm_adds_epi16(in[14], final_rounding); - in[15] = _mm_adds_epi16(in[15], final_rounding); - in[16] = _mm_adds_epi16(in[16], final_rounding); - in[17] = _mm_adds_epi16(in[17], final_rounding); - in[18] = _mm_adds_epi16(in[18], final_rounding); - in[19] = _mm_adds_epi16(in[19], final_rounding); - in[20] = _mm_adds_epi16(in[20], final_rounding); - in[21] = _mm_adds_epi16(in[21], final_rounding); - in[22] = _mm_adds_epi16(in[22], final_rounding); - in[23] = _mm_adds_epi16(in[23], final_rounding); - in[24] = _mm_adds_epi16(in[24], final_rounding); - in[25] = _mm_adds_epi16(in[25], final_rounding); - in[26] = _mm_adds_epi16(in[26], final_rounding); - in[27] = _mm_adds_epi16(in[27], final_rounding); - in[28] = _mm_adds_epi16(in[28], final_rounding); - in[29] = _mm_adds_epi16(in[29], final_rounding); - in[30] = _mm_adds_epi16(in[30], final_rounding); - in[31] = _mm_adds_epi16(in[31], final_rounding); - - in[0] = _mm_srai_epi16(in[0], 6); - in[1] = _mm_srai_epi16(in[1], 6); - in[2] = _mm_srai_epi16(in[2], 6); - in[3] = _mm_srai_epi16(in[3], 6); - in[4] = _mm_srai_epi16(in[4], 6); - in[5] = _mm_srai_epi16(in[5], 6); - in[6] = _mm_srai_epi16(in[6], 6); - in[7] = _mm_srai_epi16(in[7], 6); - in[8] = _mm_srai_epi16(in[8], 6); - in[9] = _mm_srai_epi16(in[9], 6); - in[10] = _mm_srai_epi16(in[10], 6); - in[11] = _mm_srai_epi16(in[11], 6); - in[12] = _mm_srai_epi16(in[12], 6); - in[13] = _mm_srai_epi16(in[13], 6); - in[14] = _mm_srai_epi16(in[14], 6); - in[15] = _mm_srai_epi16(in[15], 6); - in[16] = _mm_srai_epi16(in[16], 6); - in[17] = _mm_srai_epi16(in[17], 6); - in[18] = _mm_srai_epi16(in[18], 6); - in[19] = _mm_srai_epi16(in[19], 6); - in[20] = _mm_srai_epi16(in[20], 6); - in[21] = _mm_srai_epi16(in[21], 6); - in[22] = _mm_srai_epi16(in[22], 6); - in[23] = _mm_srai_epi16(in[23], 6); - in[24] = _mm_srai_epi16(in[24], 6); - in[25] = _mm_srai_epi16(in[25], 6); - in[26] = _mm_srai_epi16(in[26], 6); - in[27] = _mm_srai_epi16(in[27], 6); - in[28] = _mm_srai_epi16(in[28], 6); - in[29] = _mm_srai_epi16(in[29], 6); - in[30] = _mm_srai_epi16(in[30], 6); - in[31] = _mm_srai_epi16(in[31], 6); - - RECON_AND_STORE(dest, in[0]); - RECON_AND_STORE(dest, in[1]); - RECON_AND_STORE(dest, in[2]); - RECON_AND_STORE(dest, in[3]); - RECON_AND_STORE(dest, in[4]); - RECON_AND_STORE(dest, in[5]); - RECON_AND_STORE(dest, in[6]); - RECON_AND_STORE(dest, in[7]); - RECON_AND_STORE(dest, in[8]); - RECON_AND_STORE(dest, in[9]); - RECON_AND_STORE(dest, in[10]); - RECON_AND_STORE(dest, in[11]); - RECON_AND_STORE(dest, in[12]); - RECON_AND_STORE(dest, in[13]); - RECON_AND_STORE(dest, in[14]); - RECON_AND_STORE(dest, in[15]); - RECON_AND_STORE(dest, in[16]); - RECON_AND_STORE(dest, in[17]); - RECON_AND_STORE(dest, in[18]); - RECON_AND_STORE(dest, in[19]); - RECON_AND_STORE(dest, in[20]); - RECON_AND_STORE(dest, in[21]); - RECON_AND_STORE(dest, in[22]); - RECON_AND_STORE(dest, in[23]); - RECON_AND_STORE(dest, in[24]); - RECON_AND_STORE(dest, in[25]); - RECON_AND_STORE(dest, in[26]); - RECON_AND_STORE(dest, in[27]); - RECON_AND_STORE(dest, in[28]); - RECON_AND_STORE(dest, in[29]); - RECON_AND_STORE(dest, in[30]); - RECON_AND_STORE(dest, in[31]); - - dest += 8 - (stride * 32); + in[j] = _mm_adds_epi16(in[j], final_rounding); + in[j] = _mm_srai_epi16(in[j], 6); + RECON_AND_STORE(dest + j * stride, in[j]); } -} //NOLINT + + dest += 8; + } +} void vp9_idct32x32_1_add_sse2(const int16_t *input, uint8_t *dest, int stride) { __m128i dc_value; @@ -3951,66 +3644,38 @@ void vp9_idct32x32_1_add_sse2(const int16_t *input, uint8_t *dest, int stride) { dc_value = _mm_set1_epi16(a); for (i = 0; i < 4; ++i) { - RECON_AND_STORE(dest, dc_value); - RECON_AND_STORE(dest, dc_value); - RECON_AND_STORE(dest, dc_value); - RECON_AND_STORE(dest, dc_value); - RECON_AND_STORE(dest, dc_value); - RECON_AND_STORE(dest, dc_value); - RECON_AND_STORE(dest, dc_value); - RECON_AND_STORE(dest, dc_value); - RECON_AND_STORE(dest, dc_value); - RECON_AND_STORE(dest, dc_value); - RECON_AND_STORE(dest, dc_value); - RECON_AND_STORE(dest, dc_value); - RECON_AND_STORE(dest, dc_value); - RECON_AND_STORE(dest, dc_value); - RECON_AND_STORE(dest, dc_value); - RECON_AND_STORE(dest, dc_value); - RECON_AND_STORE(dest, dc_value); - RECON_AND_STORE(dest, dc_value); - RECON_AND_STORE(dest, dc_value); - RECON_AND_STORE(dest, dc_value); - RECON_AND_STORE(dest, dc_value); - RECON_AND_STORE(dest, dc_value); - RECON_AND_STORE(dest, dc_value); - RECON_AND_STORE(dest, dc_value); - RECON_AND_STORE(dest, dc_value); - RECON_AND_STORE(dest, dc_value); - RECON_AND_STORE(dest, dc_value); - RECON_AND_STORE(dest, dc_value); - RECON_AND_STORE(dest, dc_value); - RECON_AND_STORE(dest, dc_value); - RECON_AND_STORE(dest, dc_value); - RECON_AND_STORE(dest, dc_value); - dest += 8 - (stride * 32); + int j; + for (j = 0; j < 32; ++j) { + RECON_AND_STORE(dest + j * stride, dc_value); + } + dest += 8; } } #if CONFIG_VP9_HIGHBITDEPTH static INLINE __m128i clamp_high_sse2(__m128i value, int bd) { - __m128i ubounded, retval; - const __m128i zero = _mm_set1_epi16(0); - const __m128i one = _mm_set1_epi16(1); - const __m128i max = _mm_subs_epi16(_mm_slli_epi16(one, bd), one); - ubounded = _mm_cmpgt_epi16(value, max); - retval = _mm_andnot_si128(ubounded, value); - ubounded = _mm_and_si128(ubounded, max); - retval = _mm_or_si128(retval, ubounded); - retval = _mm_and_si128(retval, _mm_cmpgt_epi16(retval, zero)); - return retval; + __m128i ubounded, retval; + const __m128i zero = _mm_set1_epi16(0); + const __m128i one = _mm_set1_epi16(1); + const __m128i max = _mm_subs_epi16(_mm_slli_epi16(one, bd), one); + ubounded = _mm_cmpgt_epi16(value, max); + retval = _mm_andnot_si128(ubounded, value); + ubounded = _mm_and_si128(ubounded, max); + retval = _mm_or_si128(retval, ubounded); + retval = _mm_and_si128(retval, _mm_cmpgt_epi16(retval, zero)); + return retval; } void vp9_highbd_idct4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest8, - int stride, int bd) { + int stride, int bd) { tran_low_t out[4 * 4]; tran_low_t *outptr = out; int i, j; __m128i inptr[4]; __m128i sign_bits[2]; - __m128i temp_mm, min_input, max_input; + __m128i temp_mm, min_input, max_input; int test; - uint16_t * dest = CONVERT_TO_SHORTPTR(dest8); + uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); int optimised_cols = 0; const __m128i zero = _mm_set1_epi16(0); const __m128i eight = _mm_set1_epi16(8); @@ -4053,10 +3718,10 @@ void vp9_highbd_idct4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest8, inptr[2] = _mm_unpacklo_epi16(inptr[1], sign_bits[1]); inptr[1] = _mm_unpackhi_epi16(inptr[0], sign_bits[0]); inptr[0] = _mm_unpacklo_epi16(inptr[0], sign_bits[0]); - _mm_storeu_si128((__m128i*)outptr, inptr[0]); - _mm_storeu_si128((__m128i*)(outptr + 4), inptr[1]); - _mm_storeu_si128((__m128i*)(outptr + 8), inptr[2]); - _mm_storeu_si128((__m128i*)(outptr + 12), inptr[3]); + _mm_storeu_si128((__m128i *)outptr, inptr[0]); + _mm_storeu_si128((__m128i *)(outptr + 4), inptr[1]); + _mm_storeu_si128((__m128i *)(outptr + 8), inptr[2]); + _mm_storeu_si128((__m128i *)(outptr + 12), inptr[3]); } else { // Set to use the optimised transform for the column optimised_cols = 1; @@ -4084,10 +3749,10 @@ void vp9_highbd_idct4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest8, { __m128i d0 = _mm_loadl_epi64((const __m128i *)dest); __m128i d2 = _mm_loadl_epi64((const __m128i *)(dest + stride * 2)); - d0 = _mm_unpacklo_epi64(d0, - _mm_loadl_epi64((const __m128i *)(dest + stride))); - d2 = _mm_unpacklo_epi64(d2, - _mm_loadl_epi64((const __m128i *)(dest + stride * 3))); + d0 = _mm_unpacklo_epi64( + d0, _mm_loadl_epi64((const __m128i *)(dest + stride))); + d2 = _mm_unpacklo_epi64( + d2, _mm_loadl_epi64((const __m128i *)(dest + stride * 3))); d0 = clamp_high_sse2(_mm_adds_epi16(d0, inptr[0]), bd); d2 = clamp_high_sse2(_mm_adds_epi16(d2, inptr[1]), bd); // store input0 @@ -4118,13 +3783,13 @@ void vp9_highbd_idct4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest8, } void vp9_highbd_idct8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest8, - int stride, int bd) { + int stride, int bd) { tran_low_t out[8 * 8]; tran_low_t *outptr = out; int i, j, test; __m128i inptr[8]; __m128i min_input, max_input, temp1, temp2, sign_bits; - uint16_t * dest = CONVERT_TO_SHORTPTR(dest8); + uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); const __m128i zero = _mm_set1_epi16(0); const __m128i sixteen = _mm_set1_epi16(16); const __m128i max = _mm_set1_epi16(6201); @@ -4133,8 +3798,8 @@ void vp9_highbd_idct8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest8, // Load input into __m128i & pack to 16 bits for (i = 0; i < 8; i++) { - temp1 = _mm_loadu_si128((const __m128i *)(input + 8*i)); - temp2 = _mm_loadu_si128((const __m128i *)(input + 8*i + 4)); + temp1 = _mm_loadu_si128((const __m128i *)(input + 8 * i)); + temp2 = _mm_loadu_si128((const __m128i *)(input + 8 * i + 4)); inptr[i] = _mm_packs_epi32(temp1, temp2); } @@ -4172,8 +3837,8 @@ void vp9_highbd_idct8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest8, sign_bits = _mm_cmplt_epi16(inptr[i], zero); temp1 = _mm_unpackhi_epi16(inptr[i], sign_bits); temp2 = _mm_unpacklo_epi16(inptr[i], sign_bits); - _mm_storeu_si128((__m128i*)(outptr + 4*(2*i+1)), temp1); - _mm_storeu_si128((__m128i*)(outptr + 4*(2*i)), temp2); + _mm_storeu_si128((__m128i *)(outptr + 4 * (2 * i + 1)), temp1); + _mm_storeu_si128((__m128i *)(outptr + 4 * (2 * i)), temp2); } } else { // Set to use the optimised transform for the column @@ -4219,13 +3884,13 @@ void vp9_highbd_idct8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest8, } void vp9_highbd_idct8x8_10_add_sse2(const tran_low_t *input, uint8_t *dest8, - int stride, int bd) { + int stride, int bd) { tran_low_t out[8 * 8] = { 0 }; tran_low_t *outptr = out; int i, j, test; __m128i inptr[8]; __m128i min_input, max_input, temp1, temp2, sign_bits; - uint16_t * dest = CONVERT_TO_SHORTPTR(dest8); + uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); const __m128i zero = _mm_set1_epi16(0); const __m128i sixteen = _mm_set1_epi16(16); const __m128i max = _mm_set1_epi16(6201); @@ -4234,8 +3899,8 @@ void vp9_highbd_idct8x8_10_add_sse2(const tran_low_t *input, uint8_t *dest8, // Load input into __m128i & pack to 16 bits for (i = 0; i < 8; i++) { - temp1 = _mm_loadu_si128((const __m128i *)(input + 8*i)); - temp2 = _mm_loadu_si128((const __m128i *)(input + 8*i + 4)); + temp1 = _mm_loadu_si128((const __m128i *)(input + 8 * i)); + temp2 = _mm_loadu_si128((const __m128i *)(input + 8 * i + 4)); inptr[i] = _mm_packs_epi32(temp1, temp2); } @@ -4276,8 +3941,8 @@ void vp9_highbd_idct8x8_10_add_sse2(const tran_low_t *input, uint8_t *dest8, sign_bits = _mm_cmplt_epi16(inptr[i], zero); temp1 = _mm_unpackhi_epi16(inptr[i], sign_bits); temp2 = _mm_unpacklo_epi16(inptr[i], sign_bits); - _mm_storeu_si128((__m128i*)(outptr + 4*(2*i+1)), temp1); - _mm_storeu_si128((__m128i*)(outptr + 4*(2*i)), temp2); + _mm_storeu_si128((__m128i *)(outptr + 4 * (2 * i + 1)), temp1); + _mm_storeu_si128((__m128i *)(outptr + 4 * (2 * i)), temp2); } } else { // Set to use the optimised transform for the column @@ -4323,13 +3988,13 @@ void vp9_highbd_idct8x8_10_add_sse2(const tran_low_t *input, uint8_t *dest8, } void vp9_highbd_idct16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest8, - int stride, int bd) { + int stride, int bd) { tran_low_t out[16 * 16]; tran_low_t *outptr = out; int i, j, test; __m128i inptr[32]; __m128i min_input, max_input, temp1, temp2, sign_bits; - uint16_t * dest = CONVERT_TO_SHORTPTR(dest8); + uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); const __m128i zero = _mm_set1_epi16(0); const __m128i rounding = _mm_set1_epi16(32); const __m128i max = _mm_set1_epi16(3155); @@ -4338,11 +4003,11 @@ void vp9_highbd_idct16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest8, // Load input into __m128i & pack to 16 bits for (i = 0; i < 16; i++) { - temp1 = _mm_loadu_si128((const __m128i *)(input + 16*i)); - temp2 = _mm_loadu_si128((const __m128i *)(input + 16*i + 4)); + temp1 = _mm_loadu_si128((const __m128i *)(input + 16 * i)); + temp2 = _mm_loadu_si128((const __m128i *)(input + 16 * i + 4)); inptr[i] = _mm_packs_epi32(temp1, temp2); - temp1 = _mm_loadu_si128((const __m128i *)(input + 16*i + 8)); - temp2 = _mm_loadu_si128((const __m128i *)(input + 16*i + 12)); + temp1 = _mm_loadu_si128((const __m128i *)(input + 16 * i + 8)); + temp2 = _mm_loadu_si128((const __m128i *)(input + 16 * i + 12)); inptr[i + 16] = _mm_packs_epi32(temp1, temp2); } @@ -4378,15 +4043,15 @@ void vp9_highbd_idct16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest8, array_transpose_16x16(inptr, inptr + 16); for (i = 0; i < 16; i++) { sign_bits = _mm_cmplt_epi16(inptr[i], zero); - temp1 = _mm_unpacklo_epi16(inptr[i ], sign_bits); - temp2 = _mm_unpackhi_epi16(inptr[i ], sign_bits); - _mm_storeu_si128((__m128i*)(outptr + 4*(i*4)), temp1); - _mm_storeu_si128((__m128i*)(outptr + 4*(i*4+1)), temp2); - sign_bits = _mm_cmplt_epi16(inptr[i+16], zero); - temp1 = _mm_unpacklo_epi16(inptr[i+16], sign_bits); - temp2 = _mm_unpackhi_epi16(inptr[i+16], sign_bits); - _mm_storeu_si128((__m128i*)(outptr + 4*(i*4+2)), temp1); - _mm_storeu_si128((__m128i*)(outptr + 4*(i*4+3)), temp2); + temp1 = _mm_unpacklo_epi16(inptr[i], sign_bits); + temp2 = _mm_unpackhi_epi16(inptr[i], sign_bits); + _mm_storeu_si128((__m128i *)(outptr + 4 * (i * 4)), temp1); + _mm_storeu_si128((__m128i *)(outptr + 4 * (i * 4 + 1)), temp2); + sign_bits = _mm_cmplt_epi16(inptr[i + 16], zero); + temp1 = _mm_unpacklo_epi16(inptr[i + 16], sign_bits); + temp2 = _mm_unpackhi_epi16(inptr[i + 16], sign_bits); + _mm_storeu_si128((__m128i *)(outptr + 4 * (i * 4 + 2)), temp1); + _mm_storeu_si128((__m128i *)(outptr + 4 * (i * 4 + 3)), temp2); } } else { // Set to use the optimised transform for the column @@ -4437,13 +4102,13 @@ void vp9_highbd_idct16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest8, } void vp9_highbd_idct16x16_10_add_sse2(const tran_low_t *input, uint8_t *dest8, - int stride, int bd) { + int stride, int bd) { tran_low_t out[16 * 16] = { 0 }; tran_low_t *outptr = out; int i, j, test; __m128i inptr[32]; __m128i min_input, max_input, temp1, temp2, sign_bits; - uint16_t * dest = CONVERT_TO_SHORTPTR(dest8); + uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); const __m128i zero = _mm_set1_epi16(0); const __m128i rounding = _mm_set1_epi16(32); const __m128i max = _mm_set1_epi16(3155); @@ -4452,11 +4117,11 @@ void vp9_highbd_idct16x16_10_add_sse2(const tran_low_t *input, uint8_t *dest8, // Load input into __m128i & pack to 16 bits for (i = 0; i < 16; i++) { - temp1 = _mm_loadu_si128((const __m128i *)(input + 16*i)); - temp2 = _mm_loadu_si128((const __m128i *)(input + 16*i + 4)); + temp1 = _mm_loadu_si128((const __m128i *)(input + 16 * i)); + temp2 = _mm_loadu_si128((const __m128i *)(input + 16 * i + 4)); inptr[i] = _mm_packs_epi32(temp1, temp2); - temp1 = _mm_loadu_si128((const __m128i *)(input + 16*i + 8)); - temp2 = _mm_loadu_si128((const __m128i *)(input + 16*i + 12)); + temp1 = _mm_loadu_si128((const __m128i *)(input + 16 * i + 8)); + temp2 = _mm_loadu_si128((const __m128i *)(input + 16 * i + 12)); inptr[i + 16] = _mm_packs_epi32(temp1, temp2); } @@ -4497,15 +4162,15 @@ void vp9_highbd_idct16x16_10_add_sse2(const tran_low_t *input, uint8_t *dest8, array_transpose_8x8(inptr + 8, inptr + 16); for (i = 0; i < 4; i++) { sign_bits = _mm_cmplt_epi16(inptr[i], zero); - temp1 = _mm_unpacklo_epi16(inptr[i ], sign_bits); - temp2 = _mm_unpackhi_epi16(inptr[i ], sign_bits); - _mm_storeu_si128((__m128i*)(outptr + 4*(i*4)), temp1); - _mm_storeu_si128((__m128i*)(outptr + 4*(i*4+1)), temp2); - sign_bits = _mm_cmplt_epi16(inptr[i+16], zero); - temp1 = _mm_unpacklo_epi16(inptr[i+16], sign_bits); - temp2 = _mm_unpackhi_epi16(inptr[i+16], sign_bits); - _mm_storeu_si128((__m128i*)(outptr + 4*(i*4+2)), temp1); - _mm_storeu_si128((__m128i*)(outptr + 4*(i*4+3)), temp2); + temp1 = _mm_unpacklo_epi16(inptr[i], sign_bits); + temp2 = _mm_unpackhi_epi16(inptr[i], sign_bits); + _mm_storeu_si128((__m128i *)(outptr + 4 * (i * 4)), temp1); + _mm_storeu_si128((__m128i *)(outptr + 4 * (i * 4 + 1)), temp2); + sign_bits = _mm_cmplt_epi16(inptr[i + 16], zero); + temp1 = _mm_unpacklo_epi16(inptr[i + 16], sign_bits); + temp2 = _mm_unpackhi_epi16(inptr[i + 16], sign_bits); + _mm_storeu_si128((__m128i *)(outptr + 4 * (i * 4 + 2)), temp1); + _mm_storeu_si128((__m128i *)(outptr + 4 * (i * 4 + 3)), temp2); } } else { // Set to use the optimised transform for the column diff --git a/media/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.h b/media/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.h index 0f179b49a57..984363d4035 100644 --- a/media/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.h +++ b/media/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.h @@ -115,7 +115,6 @@ static INLINE void load_buffer_8x16(const int16_t *input, __m128i *in) { d0 = _mm_add_epi16(in_x, d0); \ d0 = _mm_packus_epi16(d0, d0); \ _mm_storel_epi64((__m128i *)(dest), d0); \ - dest += stride; \ } static INLINE void write_buffer_8x16(uint8_t *dest, __m128i *in, int stride) { @@ -156,20 +155,20 @@ static INLINE void write_buffer_8x16(uint8_t *dest, __m128i *in, int stride) { in[14] = _mm_srai_epi16(in[14], 6); in[15] = _mm_srai_epi16(in[15], 6); - RECON_AND_STORE(dest, in[0]); - RECON_AND_STORE(dest, in[1]); - RECON_AND_STORE(dest, in[2]); - RECON_AND_STORE(dest, in[3]); - RECON_AND_STORE(dest, in[4]); - RECON_AND_STORE(dest, in[5]); - RECON_AND_STORE(dest, in[6]); - RECON_AND_STORE(dest, in[7]); - RECON_AND_STORE(dest, in[8]); - RECON_AND_STORE(dest, in[9]); - RECON_AND_STORE(dest, in[10]); - RECON_AND_STORE(dest, in[11]); - RECON_AND_STORE(dest, in[12]); - RECON_AND_STORE(dest, in[13]); - RECON_AND_STORE(dest, in[14]); - RECON_AND_STORE(dest, in[15]); + RECON_AND_STORE(dest + 0 * stride, in[0]); + RECON_AND_STORE(dest + 1 * stride, in[1]); + RECON_AND_STORE(dest + 2 * stride, in[2]); + RECON_AND_STORE(dest + 3 * stride, in[3]); + RECON_AND_STORE(dest + 4 * stride, in[4]); + RECON_AND_STORE(dest + 5 * stride, in[5]); + RECON_AND_STORE(dest + 6 * stride, in[6]); + RECON_AND_STORE(dest + 7 * stride, in[7]); + RECON_AND_STORE(dest + 8 * stride, in[8]); + RECON_AND_STORE(dest + 9 * stride, in[9]); + RECON_AND_STORE(dest + 10 * stride, in[10]); + RECON_AND_STORE(dest + 11 * stride, in[11]); + RECON_AND_STORE(dest + 12 * stride, in[12]); + RECON_AND_STORE(dest + 13 * stride, in[13]); + RECON_AND_STORE(dest + 14 * stride, in[14]); + RECON_AND_STORE(dest + 15 * stride, in[15]); } diff --git a/media/libvpx/vp9/common/x86/vp9_idct_intrin_ssse3.c b/media/libvpx/vp9/common/x86/vp9_idct_intrin_ssse3.c deleted file mode 100644 index b9d2ef22b97..00000000000 --- a/media/libvpx/vp9/common/x86/vp9_idct_intrin_ssse3.c +++ /dev/null @@ -1,762 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#if defined(_MSC_VER) && _MSC_VER <= 1500 -// Need to include math.h before calling tmmintrin.h/intrin.h -// in certain versions of MSVS. -#include -#endif -#include // SSSE3 -#include "vp9/common/x86/vp9_idct_intrin_sse2.h" - -static void idct16_8col(__m128i *in, int round) { - const __m128i k__cospi_p30_m02 = pair_set_epi16(cospi_30_64, -cospi_2_64); - const __m128i k__cospi_p02_p30 = pair_set_epi16(cospi_2_64, cospi_30_64); - const __m128i k__cospi_p14_m18 = pair_set_epi16(cospi_14_64, -cospi_18_64); - const __m128i k__cospi_p18_p14 = pair_set_epi16(cospi_18_64, cospi_14_64); - const __m128i k__cospi_p22_m10 = pair_set_epi16(cospi_22_64, -cospi_10_64); - const __m128i k__cospi_p10_p22 = pair_set_epi16(cospi_10_64, cospi_22_64); - const __m128i k__cospi_p06_m26 = pair_set_epi16(cospi_6_64, -cospi_26_64); - const __m128i k__cospi_p26_p06 = pair_set_epi16(cospi_26_64, cospi_6_64); - const __m128i k__cospi_p28_m04 = pair_set_epi16(cospi_28_64, -cospi_4_64); - const __m128i k__cospi_p04_p28 = pair_set_epi16(cospi_4_64, cospi_28_64); - const __m128i k__cospi_p12_m20 = pair_set_epi16(cospi_12_64, -cospi_20_64); - const __m128i k__cospi_p20_p12 = pair_set_epi16(cospi_20_64, cospi_12_64); - const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64); - const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64); - const __m128i k__cospi_m08_p24 = pair_set_epi16(-cospi_8_64, cospi_24_64); - const __m128i k__cospi_p24_p08 = pair_set_epi16(cospi_24_64, cospi_8_64); - const __m128i k__cospi_m24_m08 = pair_set_epi16(-cospi_24_64, -cospi_8_64); - const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING); - const __m128i k__cospi_p16_p16_x2 = pair_set_epi16(23170, 23170); - const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64); - const __m128i k__cospi_m16_p16 = pair_set_epi16(-cospi_16_64, cospi_16_64); - - __m128i v[16], u[16], s[16], t[16]; - - // stage 1 - s[0] = in[0]; - s[1] = in[8]; - s[2] = in[4]; - s[3] = in[12]; - s[4] = in[2]; - s[5] = in[10]; - s[6] = in[6]; - s[7] = in[14]; - s[8] = in[1]; - s[9] = in[9]; - s[10] = in[5]; - s[11] = in[13]; - s[12] = in[3]; - s[13] = in[11]; - s[14] = in[7]; - s[15] = in[15]; - - // stage 2 - u[0] = _mm_unpacklo_epi16(s[8], s[15]); - u[1] = _mm_unpackhi_epi16(s[8], s[15]); - u[2] = _mm_unpacklo_epi16(s[9], s[14]); - u[3] = _mm_unpackhi_epi16(s[9], s[14]); - u[4] = _mm_unpacklo_epi16(s[10], s[13]); - u[5] = _mm_unpackhi_epi16(s[10], s[13]); - u[6] = _mm_unpacklo_epi16(s[11], s[12]); - u[7] = _mm_unpackhi_epi16(s[11], s[12]); - - v[0] = _mm_madd_epi16(u[0], k__cospi_p30_m02); - v[1] = _mm_madd_epi16(u[1], k__cospi_p30_m02); - v[2] = _mm_madd_epi16(u[0], k__cospi_p02_p30); - v[3] = _mm_madd_epi16(u[1], k__cospi_p02_p30); - v[4] = _mm_madd_epi16(u[2], k__cospi_p14_m18); - v[5] = _mm_madd_epi16(u[3], k__cospi_p14_m18); - v[6] = _mm_madd_epi16(u[2], k__cospi_p18_p14); - v[7] = _mm_madd_epi16(u[3], k__cospi_p18_p14); - v[8] = _mm_madd_epi16(u[4], k__cospi_p22_m10); - v[9] = _mm_madd_epi16(u[5], k__cospi_p22_m10); - v[10] = _mm_madd_epi16(u[4], k__cospi_p10_p22); - v[11] = _mm_madd_epi16(u[5], k__cospi_p10_p22); - v[12] = _mm_madd_epi16(u[6], k__cospi_p06_m26); - v[13] = _mm_madd_epi16(u[7], k__cospi_p06_m26); - v[14] = _mm_madd_epi16(u[6], k__cospi_p26_p06); - v[15] = _mm_madd_epi16(u[7], k__cospi_p26_p06); - - u[0] = _mm_add_epi32(v[0], k__DCT_CONST_ROUNDING); - u[1] = _mm_add_epi32(v[1], k__DCT_CONST_ROUNDING); - u[2] = _mm_add_epi32(v[2], k__DCT_CONST_ROUNDING); - u[3] = _mm_add_epi32(v[3], k__DCT_CONST_ROUNDING); - u[4] = _mm_add_epi32(v[4], k__DCT_CONST_ROUNDING); - u[5] = _mm_add_epi32(v[5], k__DCT_CONST_ROUNDING); - u[6] = _mm_add_epi32(v[6], k__DCT_CONST_ROUNDING); - u[7] = _mm_add_epi32(v[7], k__DCT_CONST_ROUNDING); - u[8] = _mm_add_epi32(v[8], k__DCT_CONST_ROUNDING); - u[9] = _mm_add_epi32(v[9], k__DCT_CONST_ROUNDING); - u[10] = _mm_add_epi32(v[10], k__DCT_CONST_ROUNDING); - u[11] = _mm_add_epi32(v[11], k__DCT_CONST_ROUNDING); - u[12] = _mm_add_epi32(v[12], k__DCT_CONST_ROUNDING); - u[13] = _mm_add_epi32(v[13], k__DCT_CONST_ROUNDING); - u[14] = _mm_add_epi32(v[14], k__DCT_CONST_ROUNDING); - u[15] = _mm_add_epi32(v[15], k__DCT_CONST_ROUNDING); - - u[0] = _mm_srai_epi32(u[0], DCT_CONST_BITS); - u[1] = _mm_srai_epi32(u[1], DCT_CONST_BITS); - u[2] = _mm_srai_epi32(u[2], DCT_CONST_BITS); - u[3] = _mm_srai_epi32(u[3], DCT_CONST_BITS); - u[4] = _mm_srai_epi32(u[4], DCT_CONST_BITS); - u[5] = _mm_srai_epi32(u[5], DCT_CONST_BITS); - u[6] = _mm_srai_epi32(u[6], DCT_CONST_BITS); - u[7] = _mm_srai_epi32(u[7], DCT_CONST_BITS); - u[8] = _mm_srai_epi32(u[8], DCT_CONST_BITS); - u[9] = _mm_srai_epi32(u[9], DCT_CONST_BITS); - u[10] = _mm_srai_epi32(u[10], DCT_CONST_BITS); - u[11] = _mm_srai_epi32(u[11], DCT_CONST_BITS); - u[12] = _mm_srai_epi32(u[12], DCT_CONST_BITS); - u[13] = _mm_srai_epi32(u[13], DCT_CONST_BITS); - u[14] = _mm_srai_epi32(u[14], DCT_CONST_BITS); - u[15] = _mm_srai_epi32(u[15], DCT_CONST_BITS); - - s[8] = _mm_packs_epi32(u[0], u[1]); - s[15] = _mm_packs_epi32(u[2], u[3]); - s[9] = _mm_packs_epi32(u[4], u[5]); - s[14] = _mm_packs_epi32(u[6], u[7]); - s[10] = _mm_packs_epi32(u[8], u[9]); - s[13] = _mm_packs_epi32(u[10], u[11]); - s[11] = _mm_packs_epi32(u[12], u[13]); - s[12] = _mm_packs_epi32(u[14], u[15]); - - // stage 3 - t[0] = s[0]; - t[1] = s[1]; - t[2] = s[2]; - t[3] = s[3]; - u[0] = _mm_unpacklo_epi16(s[4], s[7]); - u[1] = _mm_unpackhi_epi16(s[4], s[7]); - u[2] = _mm_unpacklo_epi16(s[5], s[6]); - u[3] = _mm_unpackhi_epi16(s[5], s[6]); - - v[0] = _mm_madd_epi16(u[0], k__cospi_p28_m04); - v[1] = _mm_madd_epi16(u[1], k__cospi_p28_m04); - v[2] = _mm_madd_epi16(u[0], k__cospi_p04_p28); - v[3] = _mm_madd_epi16(u[1], k__cospi_p04_p28); - v[4] = _mm_madd_epi16(u[2], k__cospi_p12_m20); - v[5] = _mm_madd_epi16(u[3], k__cospi_p12_m20); - v[6] = _mm_madd_epi16(u[2], k__cospi_p20_p12); - v[7] = _mm_madd_epi16(u[3], k__cospi_p20_p12); - - u[0] = _mm_add_epi32(v[0], k__DCT_CONST_ROUNDING); - u[1] = _mm_add_epi32(v[1], k__DCT_CONST_ROUNDING); - u[2] = _mm_add_epi32(v[2], k__DCT_CONST_ROUNDING); - u[3] = _mm_add_epi32(v[3], k__DCT_CONST_ROUNDING); - u[4] = _mm_add_epi32(v[4], k__DCT_CONST_ROUNDING); - u[5] = _mm_add_epi32(v[5], k__DCT_CONST_ROUNDING); - u[6] = _mm_add_epi32(v[6], k__DCT_CONST_ROUNDING); - u[7] = _mm_add_epi32(v[7], k__DCT_CONST_ROUNDING); - - u[0] = _mm_srai_epi32(u[0], DCT_CONST_BITS); - u[1] = _mm_srai_epi32(u[1], DCT_CONST_BITS); - u[2] = _mm_srai_epi32(u[2], DCT_CONST_BITS); - u[3] = _mm_srai_epi32(u[3], DCT_CONST_BITS); - u[4] = _mm_srai_epi32(u[4], DCT_CONST_BITS); - u[5] = _mm_srai_epi32(u[5], DCT_CONST_BITS); - u[6] = _mm_srai_epi32(u[6], DCT_CONST_BITS); - u[7] = _mm_srai_epi32(u[7], DCT_CONST_BITS); - - t[4] = _mm_packs_epi32(u[0], u[1]); - t[7] = _mm_packs_epi32(u[2], u[3]); - t[5] = _mm_packs_epi32(u[4], u[5]); - t[6] = _mm_packs_epi32(u[6], u[7]); - t[8] = _mm_add_epi16(s[8], s[9]); - t[9] = _mm_sub_epi16(s[8], s[9]); - t[10] = _mm_sub_epi16(s[11], s[10]); - t[11] = _mm_add_epi16(s[10], s[11]); - t[12] = _mm_add_epi16(s[12], s[13]); - t[13] = _mm_sub_epi16(s[12], s[13]); - t[14] = _mm_sub_epi16(s[15], s[14]); - t[15] = _mm_add_epi16(s[14], s[15]); - - // stage 4 - u[0] = _mm_add_epi16(t[0], t[1]); - u[1] = _mm_sub_epi16(t[0], t[1]); - u[2] = _mm_unpacklo_epi16(t[2], t[3]); - u[3] = _mm_unpackhi_epi16(t[2], t[3]); - u[4] = _mm_unpacklo_epi16(t[9], t[14]); - u[5] = _mm_unpackhi_epi16(t[9], t[14]); - u[6] = _mm_unpacklo_epi16(t[10], t[13]); - u[7] = _mm_unpackhi_epi16(t[10], t[13]); - - s[0] = _mm_mulhrs_epi16(u[0], k__cospi_p16_p16_x2); - s[1] = _mm_mulhrs_epi16(u[1], k__cospi_p16_p16_x2); - v[4] = _mm_madd_epi16(u[2], k__cospi_p24_m08); - v[5] = _mm_madd_epi16(u[3], k__cospi_p24_m08); - v[6] = _mm_madd_epi16(u[2], k__cospi_p08_p24); - v[7] = _mm_madd_epi16(u[3], k__cospi_p08_p24); - v[8] = _mm_madd_epi16(u[4], k__cospi_m08_p24); - v[9] = _mm_madd_epi16(u[5], k__cospi_m08_p24); - v[10] = _mm_madd_epi16(u[4], k__cospi_p24_p08); - v[11] = _mm_madd_epi16(u[5], k__cospi_p24_p08); - v[12] = _mm_madd_epi16(u[6], k__cospi_m24_m08); - v[13] = _mm_madd_epi16(u[7], k__cospi_m24_m08); - v[14] = _mm_madd_epi16(u[6], k__cospi_m08_p24); - v[15] = _mm_madd_epi16(u[7], k__cospi_m08_p24); - - u[4] = _mm_add_epi32(v[4], k__DCT_CONST_ROUNDING); - u[5] = _mm_add_epi32(v[5], k__DCT_CONST_ROUNDING); - u[6] = _mm_add_epi32(v[6], k__DCT_CONST_ROUNDING); - u[7] = _mm_add_epi32(v[7], k__DCT_CONST_ROUNDING); - u[8] = _mm_add_epi32(v[8], k__DCT_CONST_ROUNDING); - u[9] = _mm_add_epi32(v[9], k__DCT_CONST_ROUNDING); - u[10] = _mm_add_epi32(v[10], k__DCT_CONST_ROUNDING); - u[11] = _mm_add_epi32(v[11], k__DCT_CONST_ROUNDING); - u[12] = _mm_add_epi32(v[12], k__DCT_CONST_ROUNDING); - u[13] = _mm_add_epi32(v[13], k__DCT_CONST_ROUNDING); - u[14] = _mm_add_epi32(v[14], k__DCT_CONST_ROUNDING); - u[15] = _mm_add_epi32(v[15], k__DCT_CONST_ROUNDING); - - u[4] = _mm_srai_epi32(u[4], DCT_CONST_BITS); - u[5] = _mm_srai_epi32(u[5], DCT_CONST_BITS); - u[6] = _mm_srai_epi32(u[6], DCT_CONST_BITS); - u[7] = _mm_srai_epi32(u[7], DCT_CONST_BITS); - u[8] = _mm_srai_epi32(u[8], DCT_CONST_BITS); - u[9] = _mm_srai_epi32(u[9], DCT_CONST_BITS); - u[10] = _mm_srai_epi32(u[10], DCT_CONST_BITS); - u[11] = _mm_srai_epi32(u[11], DCT_CONST_BITS); - u[12] = _mm_srai_epi32(u[12], DCT_CONST_BITS); - u[13] = _mm_srai_epi32(u[13], DCT_CONST_BITS); - u[14] = _mm_srai_epi32(u[14], DCT_CONST_BITS); - u[15] = _mm_srai_epi32(u[15], DCT_CONST_BITS); - - s[2] = _mm_packs_epi32(u[4], u[5]); - s[3] = _mm_packs_epi32(u[6], u[7]); - s[4] = _mm_add_epi16(t[4], t[5]); - s[5] = _mm_sub_epi16(t[4], t[5]); - s[6] = _mm_sub_epi16(t[7], t[6]); - s[7] = _mm_add_epi16(t[6], t[7]); - s[8] = t[8]; - s[15] = t[15]; - s[9] = _mm_packs_epi32(u[8], u[9]); - s[14] = _mm_packs_epi32(u[10], u[11]); - s[10] = _mm_packs_epi32(u[12], u[13]); - s[13] = _mm_packs_epi32(u[14], u[15]); - s[11] = t[11]; - s[12] = t[12]; - - // stage 5 - t[0] = _mm_add_epi16(s[0], s[3]); - t[1] = _mm_add_epi16(s[1], s[2]); - t[2] = _mm_sub_epi16(s[1], s[2]); - t[3] = _mm_sub_epi16(s[0], s[3]); - t[4] = s[4]; - t[7] = s[7]; - - u[0] = _mm_sub_epi16(s[6], s[5]); - u[1] = _mm_add_epi16(s[6], s[5]); - t[5] = _mm_mulhrs_epi16(u[0], k__cospi_p16_p16_x2); - t[6] = _mm_mulhrs_epi16(u[1], k__cospi_p16_p16_x2); - - t[8] = _mm_add_epi16(s[8], s[11]); - t[9] = _mm_add_epi16(s[9], s[10]); - t[10] = _mm_sub_epi16(s[9], s[10]); - t[11] = _mm_sub_epi16(s[8], s[11]); - t[12] = _mm_sub_epi16(s[15], s[12]); - t[13] = _mm_sub_epi16(s[14], s[13]); - t[14] = _mm_add_epi16(s[13], s[14]); - t[15] = _mm_add_epi16(s[12], s[15]); - - // stage 6 - if (round == 1) { - s[0] = _mm_add_epi16(t[0], t[7]); - s[1] = _mm_add_epi16(t[1], t[6]); - s[2] = _mm_add_epi16(t[2], t[5]); - s[3] = _mm_add_epi16(t[3], t[4]); - s[4] = _mm_sub_epi16(t[3], t[4]); - s[5] = _mm_sub_epi16(t[2], t[5]); - s[6] = _mm_sub_epi16(t[1], t[6]); - s[7] = _mm_sub_epi16(t[0], t[7]); - s[8] = t[8]; - s[9] = t[9]; - - u[0] = _mm_unpacklo_epi16(t[10], t[13]); - u[1] = _mm_unpackhi_epi16(t[10], t[13]); - u[2] = _mm_unpacklo_epi16(t[11], t[12]); - u[3] = _mm_unpackhi_epi16(t[11], t[12]); - - v[0] = _mm_madd_epi16(u[0], k__cospi_m16_p16); - v[1] = _mm_madd_epi16(u[1], k__cospi_m16_p16); - v[2] = _mm_madd_epi16(u[0], k__cospi_p16_p16); - v[3] = _mm_madd_epi16(u[1], k__cospi_p16_p16); - v[4] = _mm_madd_epi16(u[2], k__cospi_m16_p16); - v[5] = _mm_madd_epi16(u[3], k__cospi_m16_p16); - v[6] = _mm_madd_epi16(u[2], k__cospi_p16_p16); - v[7] = _mm_madd_epi16(u[3], k__cospi_p16_p16); - - u[0] = _mm_add_epi32(v[0], k__DCT_CONST_ROUNDING); - u[1] = _mm_add_epi32(v[1], k__DCT_CONST_ROUNDING); - u[2] = _mm_add_epi32(v[2], k__DCT_CONST_ROUNDING); - u[3] = _mm_add_epi32(v[3], k__DCT_CONST_ROUNDING); - u[4] = _mm_add_epi32(v[4], k__DCT_CONST_ROUNDING); - u[5] = _mm_add_epi32(v[5], k__DCT_CONST_ROUNDING); - u[6] = _mm_add_epi32(v[6], k__DCT_CONST_ROUNDING); - u[7] = _mm_add_epi32(v[7], k__DCT_CONST_ROUNDING); - - u[0] = _mm_srai_epi32(u[0], DCT_CONST_BITS); - u[1] = _mm_srai_epi32(u[1], DCT_CONST_BITS); - u[2] = _mm_srai_epi32(u[2], DCT_CONST_BITS); - u[3] = _mm_srai_epi32(u[3], DCT_CONST_BITS); - u[4] = _mm_srai_epi32(u[4], DCT_CONST_BITS); - u[5] = _mm_srai_epi32(u[5], DCT_CONST_BITS); - u[6] = _mm_srai_epi32(u[6], DCT_CONST_BITS); - u[7] = _mm_srai_epi32(u[7], DCT_CONST_BITS); - - s[10] = _mm_packs_epi32(u[0], u[1]); - s[13] = _mm_packs_epi32(u[2], u[3]); - s[11] = _mm_packs_epi32(u[4], u[5]); - s[12] = _mm_packs_epi32(u[6], u[7]); - s[14] = t[14]; - s[15] = t[15]; - } else { - s[0] = _mm_add_epi16(t[0], t[7]); - s[1] = _mm_add_epi16(t[1], t[6]); - s[2] = _mm_add_epi16(t[2], t[5]); - s[3] = _mm_add_epi16(t[3], t[4]); - s[4] = _mm_sub_epi16(t[3], t[4]); - s[5] = _mm_sub_epi16(t[2], t[5]); - s[6] = _mm_sub_epi16(t[1], t[6]); - s[7] = _mm_sub_epi16(t[0], t[7]); - s[8] = t[8]; - s[9] = t[9]; - - u[0] = _mm_sub_epi16(t[13], t[10]); - u[1] = _mm_add_epi16(t[13], t[10]); - u[2] = _mm_sub_epi16(t[12], t[11]); - u[3] = _mm_add_epi16(t[12], t[11]); - - s[10] = _mm_mulhrs_epi16(u[0], k__cospi_p16_p16_x2); - s[13] = _mm_mulhrs_epi16(u[1], k__cospi_p16_p16_x2); - s[11] = _mm_mulhrs_epi16(u[2], k__cospi_p16_p16_x2); - s[12] = _mm_mulhrs_epi16(u[3], k__cospi_p16_p16_x2); - s[14] = t[14]; - s[15] = t[15]; - } - - // stage 7 - in[0] = _mm_add_epi16(s[0], s[15]); - in[1] = _mm_add_epi16(s[1], s[14]); - in[2] = _mm_add_epi16(s[2], s[13]); - in[3] = _mm_add_epi16(s[3], s[12]); - in[4] = _mm_add_epi16(s[4], s[11]); - in[5] = _mm_add_epi16(s[5], s[10]); - in[6] = _mm_add_epi16(s[6], s[9]); - in[7] = _mm_add_epi16(s[7], s[8]); - in[8] = _mm_sub_epi16(s[7], s[8]); - in[9] = _mm_sub_epi16(s[6], s[9]); - in[10] = _mm_sub_epi16(s[5], s[10]); - in[11] = _mm_sub_epi16(s[4], s[11]); - in[12] = _mm_sub_epi16(s[3], s[12]); - in[13] = _mm_sub_epi16(s[2], s[13]); - in[14] = _mm_sub_epi16(s[1], s[14]); - in[15] = _mm_sub_epi16(s[0], s[15]); -} - -static void idct16_sse2(__m128i *in0, __m128i *in1, int round) { - array_transpose_16x16(in0, in1); - idct16_8col(in0, round); - idct16_8col(in1, round); -} - -void vp9_idct16x16_256_add_ssse3(const int16_t *input, uint8_t *dest, - int stride) { - __m128i in0[16], in1[16]; - - load_buffer_8x16(input, in0); - input += 8; - load_buffer_8x16(input, in1); - - idct16_sse2(in0, in1, 0); - idct16_sse2(in0, in1, 1); - - write_buffer_8x16(dest, in0, stride); - dest += 8; - write_buffer_8x16(dest, in1, stride); -} - -static void idct16_10_r1(__m128i *in, __m128i *l) { - const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); - const __m128i zero = _mm_setzero_si128(); - - const __m128i stg2_01 = dual_set_epi16(3212, 32610); - const __m128i stg2_67 = dual_set_epi16(-9512, 31358); - const __m128i stg3_01 = dual_set_epi16(6392, 32138); - const __m128i stg4_01 = dual_set_epi16(23170, 23170); - - - - const __m128i stg4_4 = pair_set_epi16(-cospi_8_64, cospi_24_64); - const __m128i stg4_5 = pair_set_epi16(cospi_24_64, cospi_8_64); - const __m128i stg4_6 = pair_set_epi16(-cospi_24_64, -cospi_8_64); - const __m128i stg4_7 = pair_set_epi16(-cospi_8_64, cospi_24_64); - - __m128i stp1_0, stp1_1, stp1_4, stp1_6, - stp1_8, stp1_9, stp1_10, stp1_11, stp1_12, stp1_13, stp1_14, stp1_15; - __m128i stp2_0, stp2_1, stp2_2, stp2_3, stp2_4, stp2_5, stp2_6, stp2_7, - stp2_8, stp2_9, stp2_10, stp2_11, stp2_12, stp2_13; - __m128i tmp0, tmp1, tmp2, tmp3, tmp4; - - // Stage2 - { - const __m128i lo_1_15 = _mm_unpackhi_epi64(in[0], in[0]); - const __m128i lo_13_3 = _mm_unpackhi_epi64(in[1], in[1]); - - stp2_8 = _mm_mulhrs_epi16(lo_1_15, stg2_01); - stp2_11 = _mm_mulhrs_epi16(lo_13_3, stg2_67); - } - - // Stage3 - { - const __m128i lo_2_14 = _mm_unpacklo_epi64(in[1], in[1]); - stp1_4 = _mm_mulhrs_epi16(lo_2_14, stg3_01); - - stp1_13 = _mm_unpackhi_epi64(stp2_11, zero); - stp1_14 = _mm_unpackhi_epi64(stp2_8, zero); - } - - // Stage4 - { - const __m128i lo_0_8 = _mm_unpacklo_epi64(in[0], in[0]); - const __m128i lo_9_14 = _mm_unpacklo_epi16(stp2_8, stp1_14); - const __m128i lo_10_13 = _mm_unpacklo_epi16(stp2_11, stp1_13); - - tmp0 = _mm_mulhrs_epi16(lo_0_8, stg4_01); - tmp1 = _mm_madd_epi16(lo_9_14, stg4_4); - tmp3 = _mm_madd_epi16(lo_9_14, stg4_5); - tmp2 = _mm_madd_epi16(lo_10_13, stg4_6); - tmp4 = _mm_madd_epi16(lo_10_13, stg4_7); - - tmp1 = _mm_add_epi32(tmp1, rounding); - tmp3 = _mm_add_epi32(tmp3, rounding); - tmp2 = _mm_add_epi32(tmp2, rounding); - tmp4 = _mm_add_epi32(tmp4, rounding); - - tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS); - tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS); - tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); - tmp4 = _mm_srai_epi32(tmp4, DCT_CONST_BITS); - - stp1_0 = _mm_unpacklo_epi64(tmp0, tmp0); - stp1_1 = _mm_unpackhi_epi64(tmp0, tmp0); - stp2_9 = _mm_packs_epi32(tmp1, tmp3); - stp2_10 = _mm_packs_epi32(tmp2, tmp4); - - stp2_6 = _mm_unpackhi_epi64(stp1_4, zero); - } - - // Stage5 and Stage6 - { - tmp0 = _mm_add_epi16(stp2_8, stp2_11); - tmp1 = _mm_sub_epi16(stp2_8, stp2_11); - tmp2 = _mm_add_epi16(stp2_9, stp2_10); - tmp3 = _mm_sub_epi16(stp2_9, stp2_10); - - stp1_9 = _mm_unpacklo_epi64(tmp2, zero); - stp1_10 = _mm_unpacklo_epi64(tmp3, zero); - stp1_8 = _mm_unpacklo_epi64(tmp0, zero); - stp1_11 = _mm_unpacklo_epi64(tmp1, zero); - - stp1_13 = _mm_unpackhi_epi64(tmp3, zero); - stp1_14 = _mm_unpackhi_epi64(tmp2, zero); - stp1_12 = _mm_unpackhi_epi64(tmp1, zero); - stp1_15 = _mm_unpackhi_epi64(tmp0, zero); - } - - // Stage6 - { - const __m128i lo_6_5 = _mm_add_epi16(stp2_6, stp1_4); - const __m128i lo_6_6 = _mm_sub_epi16(stp2_6, stp1_4); - const __m128i lo_10_13 = _mm_sub_epi16(stp1_13, stp1_10); - const __m128i lo_10_14 = _mm_add_epi16(stp1_13, stp1_10); - const __m128i lo_11_12 = _mm_sub_epi16(stp1_12, stp1_11); - const __m128i lo_11_13 = _mm_add_epi16(stp1_12, stp1_11); - - tmp1 = _mm_unpacklo_epi64(lo_6_5, lo_6_6); - tmp0 = _mm_unpacklo_epi64(lo_10_13, lo_10_14); - tmp4 = _mm_unpacklo_epi64(lo_11_12, lo_11_13); - - stp1_6 = _mm_mulhrs_epi16(tmp1, stg4_01); - tmp0 = _mm_mulhrs_epi16(tmp0, stg4_01); - tmp4 = _mm_mulhrs_epi16(tmp4, stg4_01); - - stp2_10 = _mm_unpacklo_epi64(tmp0, zero); - stp2_13 = _mm_unpackhi_epi64(tmp0, zero); - stp2_11 = _mm_unpacklo_epi64(tmp4, zero); - stp2_12 = _mm_unpackhi_epi64(tmp4, zero); - - tmp0 = _mm_add_epi16(stp1_0, stp1_4); - tmp1 = _mm_sub_epi16(stp1_0, stp1_4); - tmp2 = _mm_add_epi16(stp1_1, stp1_6); - tmp3 = _mm_sub_epi16(stp1_1, stp1_6); - - stp2_0 = _mm_unpackhi_epi64(tmp0, zero); - stp2_1 = _mm_unpacklo_epi64(tmp2, zero); - stp2_2 = _mm_unpackhi_epi64(tmp2, zero); - stp2_3 = _mm_unpacklo_epi64(tmp0, zero); - stp2_4 = _mm_unpacklo_epi64(tmp1, zero); - stp2_5 = _mm_unpackhi_epi64(tmp3, zero); - stp2_6 = _mm_unpacklo_epi64(tmp3, zero); - stp2_7 = _mm_unpackhi_epi64(tmp1, zero); - } - - // Stage7. Left 8x16 only. - l[0] = _mm_add_epi16(stp2_0, stp1_15); - l[1] = _mm_add_epi16(stp2_1, stp1_14); - l[2] = _mm_add_epi16(stp2_2, stp2_13); - l[3] = _mm_add_epi16(stp2_3, stp2_12); - l[4] = _mm_add_epi16(stp2_4, stp2_11); - l[5] = _mm_add_epi16(stp2_5, stp2_10); - l[6] = _mm_add_epi16(stp2_6, stp1_9); - l[7] = _mm_add_epi16(stp2_7, stp1_8); - l[8] = _mm_sub_epi16(stp2_7, stp1_8); - l[9] = _mm_sub_epi16(stp2_6, stp1_9); - l[10] = _mm_sub_epi16(stp2_5, stp2_10); - l[11] = _mm_sub_epi16(stp2_4, stp2_11); - l[12] = _mm_sub_epi16(stp2_3, stp2_12); - l[13] = _mm_sub_epi16(stp2_2, stp2_13); - l[14] = _mm_sub_epi16(stp2_1, stp1_14); - l[15] = _mm_sub_epi16(stp2_0, stp1_15); -} - -static void idct16_10_r2(__m128i *in) { - const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); - - const __m128i stg2_0 = dual_set_epi16(3212, 3212); - const __m128i stg2_1 = dual_set_epi16(32610, 32610); - const __m128i stg2_6 = dual_set_epi16(-9512, -9512); - const __m128i stg2_7 = dual_set_epi16(31358, 31358); - const __m128i stg3_0 = dual_set_epi16(6392, 6392); - const __m128i stg3_1 = dual_set_epi16(32138, 32138); - const __m128i stg4_01 = dual_set_epi16(23170, 23170); - - const __m128i stg4_4 = pair_set_epi16(-cospi_8_64, cospi_24_64); - const __m128i stg4_5 = pair_set_epi16(cospi_24_64, cospi_8_64); - const __m128i stg4_6 = pair_set_epi16(-cospi_24_64, -cospi_8_64); - const __m128i stg4_7 = pair_set_epi16(-cospi_8_64, cospi_24_64); - - __m128i stp1_0, stp1_2, stp1_3, stp1_5, stp1_6, - stp1_8, stp1_9, stp1_10, stp1_11, stp1_12, stp1_13, stp1_14, stp1_15, - stp1_8_0, stp1_12_0; - __m128i stp2_0, stp2_1, stp2_2, stp2_3, stp2_4, stp2_5, stp2_6, stp2_7, - stp2_9, stp2_10, stp2_11, stp2_12, stp2_13, stp2_14; - __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; - - /* Stage2 */ - { - stp1_8_0 = _mm_mulhrs_epi16(in[1], stg2_0); - stp1_15 = _mm_mulhrs_epi16(in[1], stg2_1); - stp1_11 = _mm_mulhrs_epi16(in[3], stg2_6); - stp1_12_0 = _mm_mulhrs_epi16(in[3], stg2_7); - } - - /* Stage3 */ - { - stp2_4 = _mm_mulhrs_epi16(in[2], stg3_0); - stp2_7 = _mm_mulhrs_epi16(in[2], stg3_1); - - stp1_9 = stp1_8_0; - stp1_10 = stp1_11; - - stp1_13 = stp1_12_0; - stp1_14 = stp1_15; - } - - /* Stage4 */ - { - const __m128i lo_9_14 = _mm_unpacklo_epi16(stp1_9, stp1_14); - const __m128i hi_9_14 = _mm_unpackhi_epi16(stp1_9, stp1_14); - const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13); - const __m128i hi_10_13 = _mm_unpackhi_epi16(stp1_10, stp1_13); - - stp1_0 = _mm_mulhrs_epi16(in[0], stg4_01); - - stp2_5 = stp2_4; - stp2_6 = stp2_7; - - - tmp0 = _mm_madd_epi16(lo_9_14, stg4_4); - tmp1 = _mm_madd_epi16(hi_9_14, stg4_4); - tmp2 = _mm_madd_epi16(lo_9_14, stg4_5); - tmp3 = _mm_madd_epi16(hi_9_14, stg4_5); - tmp4 = _mm_madd_epi16(lo_10_13, stg4_6); - tmp5 = _mm_madd_epi16(hi_10_13, stg4_6); - tmp6 = _mm_madd_epi16(lo_10_13, stg4_7); - tmp7 = _mm_madd_epi16(hi_10_13, stg4_7); - - tmp0 = _mm_add_epi32(tmp0, rounding); - tmp1 = _mm_add_epi32(tmp1, rounding); - tmp2 = _mm_add_epi32(tmp2, rounding); - tmp3 = _mm_add_epi32(tmp3, rounding); - tmp4 = _mm_add_epi32(tmp4, rounding); - tmp5 = _mm_add_epi32(tmp5, rounding); - tmp6 = _mm_add_epi32(tmp6, rounding); - tmp7 = _mm_add_epi32(tmp7, rounding); - - tmp0 = _mm_srai_epi32(tmp0, 14); - tmp1 = _mm_srai_epi32(tmp1, 14); - tmp2 = _mm_srai_epi32(tmp2, 14); - tmp3 = _mm_srai_epi32(tmp3, 14); - tmp4 = _mm_srai_epi32(tmp4, 14); - tmp5 = _mm_srai_epi32(tmp5, 14); - tmp6 = _mm_srai_epi32(tmp6, 14); - tmp7 = _mm_srai_epi32(tmp7, 14); - - stp2_9 = _mm_packs_epi32(tmp0, tmp1); - stp2_14 = _mm_packs_epi32(tmp2, tmp3); - stp2_10 = _mm_packs_epi32(tmp4, tmp5); - stp2_13 = _mm_packs_epi32(tmp6, tmp7); - } - - /* Stage5 */ - { - stp1_2 = stp1_0; - stp1_3 = stp1_0; - - tmp0 = _mm_sub_epi16(stp2_6, stp2_5); - tmp1 = _mm_add_epi16(stp2_6, stp2_5); - - stp1_5 = _mm_mulhrs_epi16(tmp0, stg4_01); - stp1_6 = _mm_mulhrs_epi16(tmp1, stg4_01); - - stp1_8 = _mm_add_epi16(stp1_8_0, stp1_11); - stp1_9 = _mm_add_epi16(stp2_9, stp2_10); - stp1_10 = _mm_sub_epi16(stp2_9, stp2_10); - stp1_11 = _mm_sub_epi16(stp1_8_0, stp1_11); - - stp1_12 = _mm_sub_epi16(stp1_15, stp1_12_0); - stp1_13 = _mm_sub_epi16(stp2_14, stp2_13); - stp1_14 = _mm_add_epi16(stp2_14, stp2_13); - stp1_15 = _mm_add_epi16(stp1_15, stp1_12_0); - } - - /* Stage6 */ - { - stp2_0 = _mm_add_epi16(stp1_0, stp2_7); - stp2_1 = _mm_add_epi16(stp1_0, stp1_6); - stp2_2 = _mm_add_epi16(stp1_2, stp1_5); - stp2_3 = _mm_add_epi16(stp1_3, stp2_4); - - tmp0 = _mm_sub_epi16(stp1_13, stp1_10); - tmp1 = _mm_add_epi16(stp1_13, stp1_10); - tmp2 = _mm_sub_epi16(stp1_12, stp1_11); - tmp3 = _mm_add_epi16(stp1_12, stp1_11); - - stp2_4 = _mm_sub_epi16(stp1_3, stp2_4); - stp2_5 = _mm_sub_epi16(stp1_2, stp1_5); - stp2_6 = _mm_sub_epi16(stp1_0, stp1_6); - stp2_7 = _mm_sub_epi16(stp1_0, stp2_7); - - stp2_10 = _mm_mulhrs_epi16(tmp0, stg4_01); - stp2_13 = _mm_mulhrs_epi16(tmp1, stg4_01); - stp2_11 = _mm_mulhrs_epi16(tmp2, stg4_01); - stp2_12 = _mm_mulhrs_epi16(tmp3, stg4_01); - } - - // Stage7 - in[0] = _mm_add_epi16(stp2_0, stp1_15); - in[1] = _mm_add_epi16(stp2_1, stp1_14); - in[2] = _mm_add_epi16(stp2_2, stp2_13); - in[3] = _mm_add_epi16(stp2_3, stp2_12); - in[4] = _mm_add_epi16(stp2_4, stp2_11); - in[5] = _mm_add_epi16(stp2_5, stp2_10); - in[6] = _mm_add_epi16(stp2_6, stp1_9); - in[7] = _mm_add_epi16(stp2_7, stp1_8); - in[8] = _mm_sub_epi16(stp2_7, stp1_8); - in[9] = _mm_sub_epi16(stp2_6, stp1_9); - in[10] = _mm_sub_epi16(stp2_5, stp2_10); - in[11] = _mm_sub_epi16(stp2_4, stp2_11); - in[12] = _mm_sub_epi16(stp2_3, stp2_12); - in[13] = _mm_sub_epi16(stp2_2, stp2_13); - in[14] = _mm_sub_epi16(stp2_1, stp1_14); - in[15] = _mm_sub_epi16(stp2_0, stp1_15); -} - -void vp9_idct16x16_10_add_ssse3(const int16_t *input, uint8_t *dest, - int stride) { - const __m128i final_rounding = _mm_set1_epi16(1<<5); - const __m128i zero = _mm_setzero_si128(); - __m128i in[16], l[16]; - - int i; - // First 1-D inverse DCT - // Load input data. - in[0] = _mm_load_si128((const __m128i *)input); - in[1] = _mm_load_si128((const __m128i *)(input + 8 * 2)); - in[2] = _mm_load_si128((const __m128i *)(input + 8 * 4)); - in[3] = _mm_load_si128((const __m128i *)(input + 8 * 6)); - - TRANSPOSE_8X4(in[0], in[1], in[2], in[3], in[0], in[1]); - - idct16_10_r1(in, l); - - // Second 1-D inverse transform, performed per 8x16 block - for (i = 0; i < 2; i++) { - array_transpose_4X8(l + 8*i, in); - - idct16_10_r2(in); - - // Final rounding and shift - in[0] = _mm_adds_epi16(in[0], final_rounding); - in[1] = _mm_adds_epi16(in[1], final_rounding); - in[2] = _mm_adds_epi16(in[2], final_rounding); - in[3] = _mm_adds_epi16(in[3], final_rounding); - in[4] = _mm_adds_epi16(in[4], final_rounding); - in[5] = _mm_adds_epi16(in[5], final_rounding); - in[6] = _mm_adds_epi16(in[6], final_rounding); - in[7] = _mm_adds_epi16(in[7], final_rounding); - in[8] = _mm_adds_epi16(in[8], final_rounding); - in[9] = _mm_adds_epi16(in[9], final_rounding); - in[10] = _mm_adds_epi16(in[10], final_rounding); - in[11] = _mm_adds_epi16(in[11], final_rounding); - in[12] = _mm_adds_epi16(in[12], final_rounding); - in[13] = _mm_adds_epi16(in[13], final_rounding); - in[14] = _mm_adds_epi16(in[14], final_rounding); - in[15] = _mm_adds_epi16(in[15], final_rounding); - - in[0] = _mm_srai_epi16(in[0], 6); - in[1] = _mm_srai_epi16(in[1], 6); - in[2] = _mm_srai_epi16(in[2], 6); - in[3] = _mm_srai_epi16(in[3], 6); - in[4] = _mm_srai_epi16(in[4], 6); - in[5] = _mm_srai_epi16(in[5], 6); - in[6] = _mm_srai_epi16(in[6], 6); - in[7] = _mm_srai_epi16(in[7], 6); - in[8] = _mm_srai_epi16(in[8], 6); - in[9] = _mm_srai_epi16(in[9], 6); - in[10] = _mm_srai_epi16(in[10], 6); - in[11] = _mm_srai_epi16(in[11], 6); - in[12] = _mm_srai_epi16(in[12], 6); - in[13] = _mm_srai_epi16(in[13], 6); - in[14] = _mm_srai_epi16(in[14], 6); - in[15] = _mm_srai_epi16(in[15], 6); - - RECON_AND_STORE(dest, in[0]); - RECON_AND_STORE(dest, in[1]); - RECON_AND_STORE(dest, in[2]); - RECON_AND_STORE(dest, in[3]); - RECON_AND_STORE(dest, in[4]); - RECON_AND_STORE(dest, in[5]); - RECON_AND_STORE(dest, in[6]); - RECON_AND_STORE(dest, in[7]); - RECON_AND_STORE(dest, in[8]); - RECON_AND_STORE(dest, in[9]); - RECON_AND_STORE(dest, in[10]); - RECON_AND_STORE(dest, in[11]); - RECON_AND_STORE(dest, in[12]); - RECON_AND_STORE(dest, in[13]); - RECON_AND_STORE(dest, in[14]); - RECON_AND_STORE(dest, in[15]); - - dest += 8 - (stride * 16); - } -} diff --git a/media/libvpx/vp9/common/x86/vp9_intrapred_sse2.asm b/media/libvpx/vp9/common/x86/vp9_intrapred_sse2.asm index 69b07f64575..22b5731886c 100644 --- a/media/libvpx/vp9/common/x86/vp9_intrapred_sse2.asm +++ b/media/libvpx/vp9/common/x86/vp9_intrapred_sse2.asm @@ -15,6 +15,11 @@ pw_4: times 8 dw 4 pw_8: times 8 dw 8 pw_16: times 8 dw 16 pw_32: times 8 dw 32 +dc_128: times 16 db 128 +pw2_4: times 8 dw 2 +pw2_8: times 8 dw 4 +pw2_16: times 8 dw 8 +pw2_32: times 8 dw 16 SECTION .text @@ -39,6 +44,46 @@ cglobal dc_predictor_4x4, 4, 5, 2, dst, stride, above, left, goffset RESTORE_GOT RET +INIT_MMX sse +cglobal dc_left_predictor_4x4, 4, 5, 2, dst, stride, above, left, goffset + GET_GOT goffsetq + + pxor m1, m1 + movd m0, [leftq] + psadbw m0, m1 + paddw m0, [GLOBAL(pw2_4)] + psraw m0, 2 + pshufw m0, m0, 0x0 + packuswb m0, m0 + movd [dstq ], m0 + movd [dstq+strideq], m0 + lea dstq, [dstq+strideq*2] + movd [dstq ], m0 + movd [dstq+strideq], m0 + + RESTORE_GOT + RET + +INIT_MMX sse +cglobal dc_top_predictor_4x4, 4, 5, 2, dst, stride, above, left, goffset + GET_GOT goffsetq + + pxor m1, m1 + movd m0, [aboveq] + psadbw m0, m1 + paddw m0, [GLOBAL(pw2_4)] + psraw m0, 2 + pshufw m0, m0, 0x0 + packuswb m0, m0 + movd [dstq ], m0 + movd [dstq+strideq], m0 + lea dstq, [dstq+strideq*2] + movd [dstq ], m0 + movd [dstq+strideq], m0 + + RESTORE_GOT + RET + INIT_MMX sse cglobal dc_predictor_8x8, 4, 5, 3, dst, stride, above, left, goffset GET_GOT goffsetq @@ -68,6 +113,91 @@ cglobal dc_predictor_8x8, 4, 5, 3, dst, stride, above, left, goffset RESTORE_GOT RET +INIT_MMX sse +cglobal dc_top_predictor_8x8, 4, 5, 3, dst, stride, above, left, goffset + GET_GOT goffsetq + + pxor m1, m1 + movq m0, [aboveq] + DEFINE_ARGS dst, stride, stride3 + lea stride3q, [strideq*3] + psadbw m0, m1 + paddw m0, [GLOBAL(pw2_8)] + psraw m0, 3 + pshufw m0, m0, 0x0 + packuswb m0, m0 + movq [dstq ], m0 + movq [dstq+strideq ], m0 + movq [dstq+strideq*2], m0 + movq [dstq+stride3q ], m0 + lea dstq, [dstq+strideq*4] + movq [dstq ], m0 + movq [dstq+strideq ], m0 + movq [dstq+strideq*2], m0 + movq [dstq+stride3q ], m0 + + RESTORE_GOT + RET + +INIT_MMX sse +cglobal dc_left_predictor_8x8, 4, 5, 3, dst, stride, above, left, goffset + GET_GOT goffsetq + + pxor m1, m1 + movq m0, [leftq] + DEFINE_ARGS dst, stride, stride3 + lea stride3q, [strideq*3] + psadbw m0, m1 + paddw m0, [GLOBAL(pw2_8)] + psraw m0, 3 + pshufw m0, m0, 0x0 + packuswb m0, m0 + movq [dstq ], m0 + movq [dstq+strideq ], m0 + movq [dstq+strideq*2], m0 + movq [dstq+stride3q ], m0 + lea dstq, [dstq+strideq*4] + movq [dstq ], m0 + movq [dstq+strideq ], m0 + movq [dstq+strideq*2], m0 + movq [dstq+stride3q ], m0 + + RESTORE_GOT + RET + +INIT_MMX sse +cglobal dc_128_predictor_4x4, 4, 5, 3, dst, stride, above, left, goffset + GET_GOT goffsetq + + DEFINE_ARGS dst, stride, stride3 + lea stride3q, [strideq*3] + movd m0, [GLOBAL(dc_128)] + movd [dstq ], m0 + movd [dstq+strideq ], m0 + movd [dstq+strideq*2], m0 + movd [dstq+stride3q ], m0 + RESTORE_GOT + RET + +INIT_MMX sse +cglobal dc_128_predictor_8x8, 4, 5, 3, dst, stride, above, left, goffset + GET_GOT goffsetq + + DEFINE_ARGS dst, stride, stride3 + lea stride3q, [strideq*3] + movq m0, [GLOBAL(dc_128)] + movq [dstq ], m0 + movq [dstq+strideq ], m0 + movq [dstq+strideq*2], m0 + movq [dstq+stride3q ], m0 + lea dstq, [dstq+strideq*4] + movq [dstq ], m0 + movq [dstq+strideq ], m0 + movq [dstq+strideq*2], m0 + movq [dstq+stride3q ], m0 + RESTORE_GOT + RET + INIT_XMM sse2 cglobal dc_predictor_16x16, 4, 5, 3, dst, stride, above, left, goffset GET_GOT goffsetq @@ -100,6 +230,91 @@ cglobal dc_predictor_16x16, 4, 5, 3, dst, stride, above, left, goffset RESTORE_GOT REP_RET + +INIT_XMM sse2 +cglobal dc_top_predictor_16x16, 4, 5, 3, dst, stride, above, left, goffset + GET_GOT goffsetq + + pxor m1, m1 + pxor m2, m2 + mova m0, [aboveq] + DEFINE_ARGS dst, stride, stride3, lines4 + lea stride3q, [strideq*3] + mov lines4d, 4 + psadbw m0, m1 + psadbw m2, m1 + paddw m0, m2 + movhlps m2, m0 + paddw m0, m2 + paddw m0, [GLOBAL(pw2_16)] + psraw m0, 4 + pshuflw m0, m0, 0x0 + punpcklqdq m0, m0 + packuswb m0, m0 +.loop: + mova [dstq ], m0 + mova [dstq+strideq ], m0 + mova [dstq+strideq*2], m0 + mova [dstq+stride3q ], m0 + lea dstq, [dstq+strideq*4] + dec lines4d + jnz .loop + + RESTORE_GOT + REP_RET + +INIT_XMM sse2 +cglobal dc_left_predictor_16x16, 4, 5, 3, dst, stride, above, left, goffset + GET_GOT goffsetq + + pxor m1, m1 + pxor m2, m2 + mova m0, [leftq] + DEFINE_ARGS dst, stride, stride3, lines4 + lea stride3q, [strideq*3] + mov lines4d, 4 + psadbw m0, m1 + psadbw m2, m1 + paddw m0, m2 + movhlps m2, m0 + paddw m0, m2 + paddw m0, [GLOBAL(pw2_16)] + psraw m0, 4 + pshuflw m0, m0, 0x0 + punpcklqdq m0, m0 + packuswb m0, m0 +.loop: + mova [dstq ], m0 + mova [dstq+strideq ], m0 + mova [dstq+strideq*2], m0 + mova [dstq+stride3q ], m0 + lea dstq, [dstq+strideq*4] + dec lines4d + jnz .loop + + RESTORE_GOT + REP_RET + +INIT_XMM sse2 +cglobal dc_128_predictor_16x16, 4, 5, 3, dst, stride, above, left, goffset + GET_GOT goffsetq + + DEFINE_ARGS dst, stride, stride3, lines4 + lea stride3q, [strideq*3] + mov lines4d, 4 + mova m0, [GLOBAL(dc_128)] +.loop: + mova [dstq ], m0 + mova [dstq+strideq ], m0 + mova [dstq+strideq*2], m0 + mova [dstq+stride3q ], m0 + lea dstq, [dstq+strideq*4] + dec lines4d + jnz .loop + RESTORE_GOT + RET + + INIT_XMM sse2 cglobal dc_predictor_32x32, 4, 5, 5, dst, stride, above, left, goffset GET_GOT goffsetq @@ -142,6 +357,101 @@ cglobal dc_predictor_32x32, 4, 5, 5, dst, stride, above, left, goffset RESTORE_GOT REP_RET +INIT_XMM sse2 +cglobal dc_top_predictor_32x32, 4, 5, 5, dst, stride, above, left, goffset + GET_GOT goffsetq + + pxor m1, m1 + mova m0, [aboveq] + mova m2, [aboveq+16] + DEFINE_ARGS dst, stride, stride3, lines4 + lea stride3q, [strideq*3] + mov lines4d, 8 + psadbw m0, m1 + psadbw m2, m1 + paddw m0, m2 + movhlps m2, m0 + paddw m0, m2 + paddw m0, [GLOBAL(pw2_32)] + psraw m0, 5 + pshuflw m0, m0, 0x0 + punpcklqdq m0, m0 + packuswb m0, m0 +.loop: + mova [dstq ], m0 + mova [dstq +16], m0 + mova [dstq+strideq ], m0 + mova [dstq+strideq +16], m0 + mova [dstq+strideq*2 ], m0 + mova [dstq+strideq*2+16], m0 + mova [dstq+stride3q ], m0 + mova [dstq+stride3q +16], m0 + lea dstq, [dstq+strideq*4] + dec lines4d + jnz .loop + + RESTORE_GOT + REP_RET + +INIT_XMM sse2 +cglobal dc_left_predictor_32x32, 4, 5, 5, dst, stride, above, left, goffset + GET_GOT goffsetq + + pxor m1, m1 + mova m0, [leftq] + mova m2, [leftq+16] + DEFINE_ARGS dst, stride, stride3, lines4 + lea stride3q, [strideq*3] + mov lines4d, 8 + psadbw m0, m1 + psadbw m2, m1 + paddw m0, m2 + movhlps m2, m0 + paddw m0, m2 + paddw m0, [GLOBAL(pw2_32)] + psraw m0, 5 + pshuflw m0, m0, 0x0 + punpcklqdq m0, m0 + packuswb m0, m0 +.loop: + mova [dstq ], m0 + mova [dstq +16], m0 + mova [dstq+strideq ], m0 + mova [dstq+strideq +16], m0 + mova [dstq+strideq*2 ], m0 + mova [dstq+strideq*2+16], m0 + mova [dstq+stride3q ], m0 + mova [dstq+stride3q +16], m0 + lea dstq, [dstq+strideq*4] + dec lines4d + jnz .loop + + RESTORE_GOT + REP_RET + +INIT_XMM sse2 +cglobal dc_128_predictor_32x32, 4, 5, 3, dst, stride, above, left, goffset + GET_GOT goffsetq + + DEFINE_ARGS dst, stride, stride3, lines4 + lea stride3q, [strideq*3] + mov lines4d, 8 + mova m0, [GLOBAL(dc_128)] +.loop: + mova [dstq ], m0 + mova [dstq +16], m0 + mova [dstq+strideq ], m0 + mova [dstq+strideq +16], m0 + mova [dstq+strideq*2 ], m0 + mova [dstq+strideq*2+16], m0 + mova [dstq+stride3q ], m0 + mova [dstq+stride3q +16], m0 + lea dstq, [dstq+strideq*4] + dec lines4d + jnz .loop + RESTORE_GOT + RET + INIT_MMX sse cglobal v_predictor_4x4, 3, 3, 1, dst, stride, above movd m0, [aboveq] diff --git a/media/libvpx/vp9/common/x86/vp9_loopfilter_intrin_avx2.c b/media/libvpx/vp9/common/x86/vp9_loopfilter_intrin_avx2.c index 0cb0912ad62..770a65f4ca1 100644 --- a/media/libvpx/vp9/common/x86/vp9_loopfilter_intrin_avx2.c +++ b/media/libvpx/vp9/common/x86/vp9_loopfilter_intrin_avx2.c @@ -9,6 +9,8 @@ */ #include /* AVX2 */ + +#include "./vp9_rtcd.h" #include "vpx_ports/mem.h" static void mb_lpf_horizontal_edge_w_avx2_8(unsigned char *s, int p, diff --git a/media/libvpx/vp9/common/x86/vp9_loopfilter_intrin_sse2.c b/media/libvpx/vp9/common/x86/vp9_loopfilter_intrin_sse2.c index 320328e2129..e321dbebe39 100644 --- a/media/libvpx/vp9/common/x86/vp9_loopfilter_intrin_sse2.c +++ b/media/libvpx/vp9/common/x86/vp9_loopfilter_intrin_sse2.c @@ -9,6 +9,8 @@ */ #include // SSE2 + +#include "./vp9_rtcd.h" #include "vp9/common/vp9_loopfilter.h" #include "vpx_ports/emmintrin_compat.h" @@ -729,12 +731,12 @@ void vp9_lpf_horizontal_8_sse2(unsigned char *s, int p, const unsigned char *_blimit, const unsigned char *_limit, const unsigned char *_thresh, int count) { - DECLARE_ALIGNED_ARRAY(16, unsigned char, flat_op2, 16); - DECLARE_ALIGNED_ARRAY(16, unsigned char, flat_op1, 16); - DECLARE_ALIGNED_ARRAY(16, unsigned char, flat_op0, 16); - DECLARE_ALIGNED_ARRAY(16, unsigned char, flat_oq2, 16); - DECLARE_ALIGNED_ARRAY(16, unsigned char, flat_oq1, 16); - DECLARE_ALIGNED_ARRAY(16, unsigned char, flat_oq0, 16); + DECLARE_ALIGNED(16, unsigned char, flat_op2[16]); + DECLARE_ALIGNED(16, unsigned char, flat_op1[16]); + DECLARE_ALIGNED(16, unsigned char, flat_op0[16]); + DECLARE_ALIGNED(16, unsigned char, flat_oq2[16]); + DECLARE_ALIGNED(16, unsigned char, flat_oq1[16]); + DECLARE_ALIGNED(16, unsigned char, flat_oq0[16]); const __m128i zero = _mm_set1_epi16(0); const __m128i blimit = _mm_load_si128((const __m128i *)_blimit); const __m128i limit = _mm_load_si128((const __m128i *)_limit); @@ -948,12 +950,12 @@ void vp9_lpf_horizontal_8_dual_sse2(uint8_t *s, int p, const uint8_t *_blimit1, const uint8_t *_limit1, const uint8_t *_thresh1) { - DECLARE_ALIGNED_ARRAY(16, unsigned char, flat_op2, 16); - DECLARE_ALIGNED_ARRAY(16, unsigned char, flat_op1, 16); - DECLARE_ALIGNED_ARRAY(16, unsigned char, flat_op0, 16); - DECLARE_ALIGNED_ARRAY(16, unsigned char, flat_oq2, 16); - DECLARE_ALIGNED_ARRAY(16, unsigned char, flat_oq1, 16); - DECLARE_ALIGNED_ARRAY(16, unsigned char, flat_oq0, 16); + DECLARE_ALIGNED(16, unsigned char, flat_op2[16]); + DECLARE_ALIGNED(16, unsigned char, flat_op1[16]); + DECLARE_ALIGNED(16, unsigned char, flat_op0[16]); + DECLARE_ALIGNED(16, unsigned char, flat_oq2[16]); + DECLARE_ALIGNED(16, unsigned char, flat_oq1[16]); + DECLARE_ALIGNED(16, unsigned char, flat_oq0[16]); const __m128i zero = _mm_set1_epi16(0); const __m128i blimit = _mm_unpacklo_epi64(_mm_load_si128((const __m128i *)_blimit0), @@ -1461,7 +1463,7 @@ void vp9_lpf_vertical_4_dual_sse2(uint8_t *s, int p, const uint8_t *blimit0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1) { - DECLARE_ALIGNED_ARRAY(16, unsigned char, t_dst, 16 * 8); + DECLARE_ALIGNED(16, unsigned char, t_dst[16 * 8]); unsigned char *src[2]; unsigned char *dst[2]; @@ -1484,7 +1486,7 @@ void vp9_lpf_vertical_8_sse2(unsigned char *s, int p, const unsigned char *blimit, const unsigned char *limit, const unsigned char *thresh, int count) { - DECLARE_ALIGNED_ARRAY(8, unsigned char, t_dst, 8 * 8); + DECLARE_ALIGNED(8, unsigned char, t_dst[8 * 8]); unsigned char *src[1]; unsigned char *dst[1]; (void)count; @@ -1511,7 +1513,7 @@ void vp9_lpf_vertical_8_dual_sse2(uint8_t *s, int p, const uint8_t *blimit0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1) { - DECLARE_ALIGNED_ARRAY(16, unsigned char, t_dst, 16 * 8); + DECLARE_ALIGNED(16, unsigned char, t_dst[16 * 8]); unsigned char *src[2]; unsigned char *dst[2]; @@ -1535,7 +1537,7 @@ void vp9_lpf_vertical_16_sse2(unsigned char *s, int p, const unsigned char *blimit, const unsigned char *limit, const unsigned char *thresh) { - DECLARE_ALIGNED_ARRAY(8, unsigned char, t_dst, 8 * 16); + DECLARE_ALIGNED(8, unsigned char, t_dst[8 * 16]); unsigned char *src[2]; unsigned char *dst[2]; @@ -1562,7 +1564,7 @@ void vp9_lpf_vertical_16_sse2(unsigned char *s, int p, void vp9_lpf_vertical_16_dual_sse2(unsigned char *s, int p, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh) { - DECLARE_ALIGNED_ARRAY(16, unsigned char, t_dst, 256); + DECLARE_ALIGNED(16, unsigned char, t_dst[256]); // Transpose 16x16 transpose8x16(s - 8, s - 8 + 8 * p, p, t_dst, 16); diff --git a/media/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c b/media/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c index 3bc7d3918b7..cee8d1e76ac 100644 --- a/media/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c +++ b/media/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c @@ -8,7 +8,14 @@ * be found in the AUTHORS file in the root of the source tree. */ +// Due to a header conflict between math.h and intrinsics includes with ceil() +// in certain configurations under vs9 this include needs to precede +// immintrin.h. +#include "./vp9_rtcd.h" + #include + +#include "vp9/common/x86/convolve.h" #include "vpx_ports/mem.h" // filters for 16_h8 and 16_v8 @@ -53,23 +60,23 @@ DECLARE_ALIGNED(32, static const uint8_t, filt4_global_avx2[32]) = { # define MM256_BROADCASTSI128_SI256(x) _mm256_broadcastsi128_si256(x) #endif // __clang__ -void vp9_filter_block1d16_h8_avx2(unsigned char *src_ptr, - unsigned int src_pixels_per_line, - unsigned char *output_ptr, - unsigned int output_pitch, - unsigned int output_height, - int16_t *filter) { +static void vp9_filter_block1d16_h8_avx2(const uint8_t *src_ptr, + ptrdiff_t src_pixels_per_line, + uint8_t *output_ptr, + ptrdiff_t output_pitch, + uint32_t output_height, + const int16_t *filter) { __m128i filtersReg; __m256i addFilterReg64, filt1Reg, filt2Reg, filt3Reg, filt4Reg; __m256i firstFilters, secondFilters, thirdFilters, forthFilters; __m256i srcRegFilt32b1_1, srcRegFilt32b2_1, srcRegFilt32b2, srcRegFilt32b3; __m256i srcReg32b1, srcReg32b2, filtersReg32; unsigned int i; - unsigned int src_stride, dst_stride; + ptrdiff_t src_stride, dst_stride; // create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64 addFilterReg64 = _mm256_set1_epi32((int)0x0400040u); - filtersReg = _mm_loadu_si128((__m128i *)filter); + filtersReg = _mm_loadu_si128((const __m128i *)filter); // converting the 16 bit (short) to 8 bit (byte) and have the same data // in both lanes of 128 bit register. filtersReg =_mm_packs_epi16(filtersReg, filtersReg); @@ -104,9 +111,9 @@ void vp9_filter_block1d16_h8_avx2(unsigned char *src_ptr, for (i = output_height; i > 1; i-=2) { // load the 2 strides of source srcReg32b1 = _mm256_castsi128_si256( - _mm_loadu_si128((__m128i *)(src_ptr-3))); + _mm_loadu_si128((const __m128i *)(src_ptr - 3))); srcReg32b1 = _mm256_inserti128_si256(srcReg32b1, - _mm_loadu_si128((__m128i *) + _mm_loadu_si128((const __m128i *) (src_ptr+src_pixels_per_line-3)), 1); // filter the source buffer @@ -135,9 +142,9 @@ void vp9_filter_block1d16_h8_avx2(unsigned char *src_ptr, // reading 2 strides of the next 16 bytes // (part of it was being read by earlier read) srcReg32b2 = _mm256_castsi128_si256( - _mm_loadu_si128((__m128i *)(src_ptr+5))); + _mm_loadu_si128((const __m128i *)(src_ptr + 5))); srcReg32b2 = _mm256_inserti128_si256(srcReg32b2, - _mm_loadu_si128((__m128i *) + _mm_loadu_si128((const __m128i *) (src_ptr+src_pixels_per_line+5)), 1); // add and saturate the results together @@ -202,7 +209,7 @@ void vp9_filter_block1d16_h8_avx2(unsigned char *src_ptr, __m128i srcReg1, srcReg2, srcRegFilt1_1, srcRegFilt2_1; __m128i srcRegFilt2, srcRegFilt3; - srcReg1 = _mm_loadu_si128((__m128i *)(src_ptr-3)); + srcReg1 = _mm_loadu_si128((const __m128i *)(src_ptr - 3)); // filter the source buffer srcRegFilt1_1 = _mm_shuffle_epi8(srcReg1, @@ -237,7 +244,7 @@ void vp9_filter_block1d16_h8_avx2(unsigned char *src_ptr, // reading the next 16 bytes // (part of it was being read by earlier read) - srcReg2 = _mm_loadu_si128((__m128i *)(src_ptr+5)); + srcReg2 = _mm_loadu_si128((const __m128i *)(src_ptr + 5)); // add and saturate the results together srcRegFilt1_1 = _mm_adds_epi16(srcRegFilt1_1, @@ -297,12 +304,12 @@ void vp9_filter_block1d16_h8_avx2(unsigned char *src_ptr, } } -void vp9_filter_block1d16_v8_avx2(unsigned char *src_ptr, - unsigned int src_pitch, - unsigned char *output_ptr, - unsigned int out_pitch, - unsigned int output_height, - int16_t *filter) { +static void vp9_filter_block1d16_v8_avx2(const uint8_t *src_ptr, + ptrdiff_t src_pitch, + uint8_t *output_ptr, + ptrdiff_t out_pitch, + uint32_t output_height, + const int16_t *filter) { __m128i filtersReg; __m256i addFilterReg64; __m256i srcReg32b1, srcReg32b2, srcReg32b3, srcReg32b4, srcReg32b5; @@ -310,11 +317,11 @@ void vp9_filter_block1d16_v8_avx2(unsigned char *src_ptr, __m256i srcReg32b11, srcReg32b12, filtersReg32; __m256i firstFilters, secondFilters, thirdFilters, forthFilters; unsigned int i; - unsigned int src_stride, dst_stride; + ptrdiff_t src_stride, dst_stride; // create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64 addFilterReg64 = _mm256_set1_epi32((int)0x0400040u); - filtersReg = _mm_loadu_si128((__m128i *)filter); + filtersReg = _mm_loadu_si128((const __m128i *)filter); // converting the 16 bit (short) to 8 bit (byte) and have the // same data in both lanes of 128 bit register. filtersReg =_mm_packs_epi16(filtersReg, filtersReg); @@ -344,19 +351,19 @@ void vp9_filter_block1d16_v8_avx2(unsigned char *src_ptr, // load 16 bytes 7 times in stride of src_pitch srcReg32b1 = _mm256_castsi128_si256( - _mm_loadu_si128((__m128i *)(src_ptr))); + _mm_loadu_si128((const __m128i *)(src_ptr))); srcReg32b2 = _mm256_castsi128_si256( - _mm_loadu_si128((__m128i *)(src_ptr+src_pitch))); + _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch))); srcReg32b3 = _mm256_castsi128_si256( - _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*2))); + _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 2))); srcReg32b4 = _mm256_castsi128_si256( - _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*3))); + _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 3))); srcReg32b5 = _mm256_castsi128_si256( - _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*4))); + _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 4))); srcReg32b6 = _mm256_castsi128_si256( - _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*5))); + _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 5))); srcReg32b7 = _mm256_castsi128_si256( - _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*6))); + _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 6))); // have each consecutive loads on the same 256 register srcReg32b1 = _mm256_inserti128_si256(srcReg32b1, @@ -393,11 +400,11 @@ void vp9_filter_block1d16_v8_avx2(unsigned char *src_ptr, // load the last 2 loads of 16 bytes and have every two // consecutive loads in the same 256 bit register srcReg32b8 = _mm256_castsi128_si256( - _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*7))); + _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 7))); srcReg32b7 = _mm256_inserti128_si256(srcReg32b7, _mm256_castsi256_si128(srcReg32b8), 1); srcReg32b9 = _mm256_castsi128_si256( - _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*8))); + _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 8))); srcReg32b8 = _mm256_inserti128_si256(srcReg32b8, _mm256_castsi256_si128(srcReg32b9), 1); @@ -476,7 +483,7 @@ void vp9_filter_block1d16_v8_avx2(unsigned char *src_ptr, __m128i srcRegFilt1, srcRegFilt3, srcRegFilt4, srcRegFilt5; __m128i srcRegFilt6, srcRegFilt7, srcRegFilt8; // load the last 16 bytes - srcRegFilt8 = _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*7)); + srcRegFilt8 = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 7)); // merge the last 2 results together srcRegFilt4 = _mm_unpacklo_epi8( @@ -542,3 +549,54 @@ void vp9_filter_block1d16_v8_avx2(unsigned char *src_ptr, _mm_store_si128((__m128i*)output_ptr, srcRegFilt1); } } + +#if HAVE_AVX2 && HAVE_SSSE3 +filter8_1dfunction vp9_filter_block1d4_v8_ssse3; +#if ARCH_X86_64 +filter8_1dfunction vp9_filter_block1d8_v8_intrin_ssse3; +filter8_1dfunction vp9_filter_block1d8_h8_intrin_ssse3; +filter8_1dfunction vp9_filter_block1d4_h8_intrin_ssse3; +#define vp9_filter_block1d8_v8_avx2 vp9_filter_block1d8_v8_intrin_ssse3 +#define vp9_filter_block1d8_h8_avx2 vp9_filter_block1d8_h8_intrin_ssse3 +#define vp9_filter_block1d4_h8_avx2 vp9_filter_block1d4_h8_intrin_ssse3 +#else // ARCH_X86 +filter8_1dfunction vp9_filter_block1d8_v8_ssse3; +filter8_1dfunction vp9_filter_block1d8_h8_ssse3; +filter8_1dfunction vp9_filter_block1d4_h8_ssse3; +#define vp9_filter_block1d8_v8_avx2 vp9_filter_block1d8_v8_ssse3 +#define vp9_filter_block1d8_h8_avx2 vp9_filter_block1d8_h8_ssse3 +#define vp9_filter_block1d4_h8_avx2 vp9_filter_block1d4_h8_ssse3 +#endif // ARCH_X86_64 +filter8_1dfunction vp9_filter_block1d16_v2_ssse3; +filter8_1dfunction vp9_filter_block1d16_h2_ssse3; +filter8_1dfunction vp9_filter_block1d8_v2_ssse3; +filter8_1dfunction vp9_filter_block1d8_h2_ssse3; +filter8_1dfunction vp9_filter_block1d4_v2_ssse3; +filter8_1dfunction vp9_filter_block1d4_h2_ssse3; +#define vp9_filter_block1d4_v8_avx2 vp9_filter_block1d4_v8_ssse3 +#define vp9_filter_block1d16_v2_avx2 vp9_filter_block1d16_v2_ssse3 +#define vp9_filter_block1d16_h2_avx2 vp9_filter_block1d16_h2_ssse3 +#define vp9_filter_block1d8_v2_avx2 vp9_filter_block1d8_v2_ssse3 +#define vp9_filter_block1d8_h2_avx2 vp9_filter_block1d8_h2_ssse3 +#define vp9_filter_block1d4_v2_avx2 vp9_filter_block1d4_v2_ssse3 +#define vp9_filter_block1d4_h2_avx2 vp9_filter_block1d4_h2_ssse3 +// void vp9_convolve8_horiz_avx2(const uint8_t *src, ptrdiff_t src_stride, +// uint8_t *dst, ptrdiff_t dst_stride, +// const int16_t *filter_x, int x_step_q4, +// const int16_t *filter_y, int y_step_q4, +// int w, int h); +// void vp9_convolve8_vert_avx2(const uint8_t *src, ptrdiff_t src_stride, +// uint8_t *dst, ptrdiff_t dst_stride, +// const int16_t *filter_x, int x_step_q4, +// const int16_t *filter_y, int y_step_q4, +// int w, int h); +FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , avx2); +FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , avx2); + +// void vp9_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride, +// uint8_t *dst, ptrdiff_t dst_stride, +// const int16_t *filter_x, int x_step_q4, +// const int16_t *filter_y, int y_step_q4, +// int w, int h); +FUN_CONV_2D(, avx2); +#endif // HAVE_AX2 && HAVE_SSSE3 diff --git a/media/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_ssse3.c b/media/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_ssse3.c index 71dbb402dd4..5fd2857e140 100644 --- a/media/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_ssse3.c +++ b/media/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_ssse3.c @@ -8,7 +8,14 @@ * be found in the AUTHORS file in the root of the source tree. */ +// Due to a header conflict between math.h and intrinsics includes with ceil() +// in certain configurations under vs9 this include needs to precede +// tmmintrin.h. +#include "./vp9_rtcd.h" + #include + +#include "vp9/common/x86/convolve.h" #include "vpx_ports/mem.h" #include "vpx_ports/emmintrin_compat.h" @@ -38,12 +45,17 @@ DECLARE_ALIGNED(16, static const uint8_t, filt4_global[16]) = { 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14 }; -void vp9_filter_block1d4_h8_intrin_ssse3(unsigned char *src_ptr, - unsigned int src_pixels_per_line, - unsigned char *output_ptr, - unsigned int output_pitch, - unsigned int output_height, - int16_t *filter) { +// These are reused by the avx2 intrinsics. +filter8_1dfunction vp9_filter_block1d8_v8_intrin_ssse3; +filter8_1dfunction vp9_filter_block1d8_h8_intrin_ssse3; +filter8_1dfunction vp9_filter_block1d4_h8_intrin_ssse3; + +void vp9_filter_block1d4_h8_intrin_ssse3(const uint8_t *src_ptr, + ptrdiff_t src_pixels_per_line, + uint8_t *output_ptr, + ptrdiff_t output_pitch, + uint32_t output_height, + const int16_t *filter) { __m128i firstFilters, secondFilters, shuffle1, shuffle2; __m128i srcRegFilt1, srcRegFilt2, srcRegFilt3, srcRegFilt4; __m128i addFilterReg64, filtersReg, srcReg, minReg; @@ -51,7 +63,7 @@ void vp9_filter_block1d4_h8_intrin_ssse3(unsigned char *src_ptr, // create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64 addFilterReg64 =_mm_set1_epi32((int)0x0400040u); - filtersReg = _mm_loadu_si128((__m128i *)filter); + filtersReg = _mm_loadu_si128((const __m128i *)filter); // converting the 16 bit (short) to 8 bit (byte) and have the same data // in both lanes of 128 bit register. filtersReg =_mm_packs_epi16(filtersReg, filtersReg); @@ -72,7 +84,7 @@ void vp9_filter_block1d4_h8_intrin_ssse3(unsigned char *src_ptr, shuffle2 = _mm_load_si128((__m128i const *)filt2_4_h8); for (i = 0; i < output_height; i++) { - srcReg = _mm_loadu_si128((__m128i *)(src_ptr-3)); + srcReg = _mm_loadu_si128((const __m128i *)(src_ptr - 3)); // filter the source buffer srcRegFilt1= _mm_shuffle_epi8(srcReg, shuffle1); @@ -109,12 +121,12 @@ void vp9_filter_block1d4_h8_intrin_ssse3(unsigned char *src_ptr, } } -void vp9_filter_block1d8_h8_intrin_ssse3(unsigned char *src_ptr, - unsigned int src_pixels_per_line, - unsigned char *output_ptr, - unsigned int output_pitch, - unsigned int output_height, - int16_t *filter) { +void vp9_filter_block1d8_h8_intrin_ssse3(const uint8_t *src_ptr, + ptrdiff_t src_pixels_per_line, + uint8_t *output_ptr, + ptrdiff_t output_pitch, + uint32_t output_height, + const int16_t *filter) { __m128i firstFilters, secondFilters, thirdFilters, forthFilters, srcReg; __m128i filt1Reg, filt2Reg, filt3Reg, filt4Reg; __m128i srcRegFilt1, srcRegFilt2, srcRegFilt3, srcRegFilt4; @@ -123,7 +135,7 @@ void vp9_filter_block1d8_h8_intrin_ssse3(unsigned char *src_ptr, // create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64 addFilterReg64 = _mm_set1_epi32((int)0x0400040u); - filtersReg = _mm_loadu_si128((__m128i *)filter); + filtersReg = _mm_loadu_si128((const __m128i *)filter); // converting the 16 bit (short) to 8 bit (byte) and have the same data // in both lanes of 128 bit register. filtersReg =_mm_packs_epi16(filtersReg, filtersReg); @@ -147,7 +159,7 @@ void vp9_filter_block1d8_h8_intrin_ssse3(unsigned char *src_ptr, filt4Reg = _mm_load_si128((__m128i const *)filt4_global); for (i = 0; i < output_height; i++) { - srcReg = _mm_loadu_si128((__m128i *)(src_ptr-3)); + srcReg = _mm_loadu_si128((const __m128i *)(src_ptr - 3)); // filter the source buffer srcRegFilt1= _mm_shuffle_epi8(srcReg, filt1Reg); @@ -189,12 +201,12 @@ void vp9_filter_block1d8_h8_intrin_ssse3(unsigned char *src_ptr, } } -void vp9_filter_block1d16_h8_intrin_ssse3(unsigned char *src_ptr, - unsigned int src_pixels_per_line, - unsigned char *output_ptr, - unsigned int output_pitch, - unsigned int output_height, - int16_t *filter) { +static void vp9_filter_block1d16_h8_intrin_ssse3(const uint8_t *src_ptr, + ptrdiff_t src_pixels_per_line, + uint8_t *output_ptr, + ptrdiff_t output_pitch, + uint32_t output_height, + const int16_t *filter) { __m128i addFilterReg64, filtersReg, srcReg1, srcReg2; __m128i filt1Reg, filt2Reg, filt3Reg, filt4Reg; __m128i firstFilters, secondFilters, thirdFilters, forthFilters; @@ -203,7 +215,7 @@ void vp9_filter_block1d16_h8_intrin_ssse3(unsigned char *src_ptr, // create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64 addFilterReg64 = _mm_set1_epi32((int)0x0400040u); - filtersReg = _mm_loadu_si128((__m128i *)filter); + filtersReg = _mm_loadu_si128((const __m128i *)filter); // converting the 16 bit (short) to 8 bit (byte) and have the same data // in both lanes of 128 bit register. filtersReg =_mm_packs_epi16(filtersReg, filtersReg); @@ -227,7 +239,7 @@ void vp9_filter_block1d16_h8_intrin_ssse3(unsigned char *src_ptr, filt4Reg = _mm_load_si128((__m128i const *)filt4_global); for (i = 0; i < output_height; i++) { - srcReg1 = _mm_loadu_si128((__m128i *)(src_ptr-3)); + srcReg1 = _mm_loadu_si128((const __m128i *)(src_ptr - 3)); // filter the source buffer srcRegFilt1_1= _mm_shuffle_epi8(srcReg1, filt1Reg); @@ -254,7 +266,7 @@ void vp9_filter_block1d16_h8_intrin_ssse3(unsigned char *src_ptr, // reading the next 16 bytes. // (part of it was being read by earlier read) - srcReg2 = _mm_loadu_si128((__m128i *)(src_ptr+5)); + srcReg2 = _mm_loadu_si128((const __m128i *)(src_ptr + 5)); // add and saturate the results together srcRegFilt1_1 = _mm_adds_epi16(srcRegFilt1_1, @@ -306,12 +318,12 @@ void vp9_filter_block1d16_h8_intrin_ssse3(unsigned char *src_ptr, } } -void vp9_filter_block1d8_v8_intrin_ssse3(unsigned char *src_ptr, - unsigned int src_pitch, - unsigned char *output_ptr, - unsigned int out_pitch, - unsigned int output_height, - int16_t *filter) { +void vp9_filter_block1d8_v8_intrin_ssse3(const uint8_t *src_ptr, + ptrdiff_t src_pitch, + uint8_t *output_ptr, + ptrdiff_t out_pitch, + uint32_t output_height, + const int16_t *filter) { __m128i addFilterReg64, filtersReg, minReg; __m128i firstFilters, secondFilters, thirdFilters, forthFilters; __m128i srcRegFilt1, srcRegFilt2, srcRegFilt3, srcRegFilt5; @@ -321,7 +333,7 @@ void vp9_filter_block1d8_v8_intrin_ssse3(unsigned char *src_ptr, // create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64 addFilterReg64 = _mm_set1_epi32((int)0x0400040u); - filtersReg = _mm_loadu_si128((__m128i *)filter); + filtersReg = _mm_loadu_si128((const __m128i *)filter); // converting the 16 bit (short) to 8 bit (byte) and have the same data // in both lanes of 128 bit register. filtersReg =_mm_packs_epi16(filtersReg, filtersReg); @@ -336,17 +348,17 @@ void vp9_filter_block1d8_v8_intrin_ssse3(unsigned char *src_ptr, forthFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x706u)); // load the first 7 rows of 8 bytes - srcReg1 = _mm_loadl_epi64((__m128i *)&src_ptr[0]); - srcReg2 = _mm_loadl_epi64((__m128i *)&(src_ptr + src_pitch)[0]); - srcReg3 = _mm_loadl_epi64((__m128i *)&(src_ptr + src_pitch * 2)[0]); - srcReg4 = _mm_loadl_epi64((__m128i *)&(src_ptr + src_pitch * 3)[0]); - srcReg5 = _mm_loadl_epi64((__m128i *)&(src_ptr + src_pitch * 4)[0]); - srcReg6 = _mm_loadl_epi64((__m128i *)&(src_ptr + src_pitch * 5)[0]); - srcReg7 = _mm_loadl_epi64((__m128i *)&(src_ptr + src_pitch * 6)[0]); + srcReg1 = _mm_loadl_epi64((const __m128i *)src_ptr); + srcReg2 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch)); + srcReg3 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 2)); + srcReg4 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 3)); + srcReg5 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 4)); + srcReg6 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 5)); + srcReg7 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 6)); for (i = 0; i < output_height; i++) { // load the last 8 bytes - srcReg8 = _mm_loadl_epi64((__m128i *)&(src_ptr + src_pitch * 7)[0]); + srcReg8 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 7)); // merge the result together srcRegFilt1 = _mm_unpacklo_epi8(srcReg1, srcReg2); @@ -394,12 +406,12 @@ void vp9_filter_block1d8_v8_intrin_ssse3(unsigned char *src_ptr, } } -void vp9_filter_block1d16_v8_intrin_ssse3(unsigned char *src_ptr, - unsigned int src_pitch, - unsigned char *output_ptr, - unsigned int out_pitch, - unsigned int output_height, - int16_t *filter) { +static void vp9_filter_block1d16_v8_intrin_ssse3(const uint8_t *src_ptr, + ptrdiff_t src_pitch, + uint8_t *output_ptr, + ptrdiff_t out_pitch, + uint32_t output_height, + const int16_t *filter) { __m128i addFilterReg64, filtersReg, srcRegFilt1, srcRegFilt3; __m128i firstFilters, secondFilters, thirdFilters, forthFilters; __m128i srcRegFilt5, srcRegFilt6, srcRegFilt7, srcRegFilt8; @@ -409,7 +421,7 @@ void vp9_filter_block1d16_v8_intrin_ssse3(unsigned char *src_ptr, // create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64 addFilterReg64 = _mm_set1_epi32((int)0x0400040u); - filtersReg = _mm_loadu_si128((__m128i *)filter); + filtersReg = _mm_loadu_si128((const __m128i *)filter); // converting the 16 bit (short) to 8 bit (byte) and have the same data // in both lanes of 128 bit register. filtersReg =_mm_packs_epi16(filtersReg, filtersReg); @@ -424,17 +436,17 @@ void vp9_filter_block1d16_v8_intrin_ssse3(unsigned char *src_ptr, forthFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x706u)); // load the first 7 rows of 16 bytes - srcReg1 = _mm_loadu_si128((__m128i *)(src_ptr)); - srcReg2 = _mm_loadu_si128((__m128i *)(src_ptr + src_pitch)); - srcReg3 = _mm_loadu_si128((__m128i *)(src_ptr + src_pitch * 2)); - srcReg4 = _mm_loadu_si128((__m128i *)(src_ptr + src_pitch * 3)); - srcReg5 = _mm_loadu_si128((__m128i *)(src_ptr + src_pitch * 4)); - srcReg6 = _mm_loadu_si128((__m128i *)(src_ptr + src_pitch * 5)); - srcReg7 = _mm_loadu_si128((__m128i *)(src_ptr + src_pitch * 6)); + srcReg1 = _mm_loadu_si128((const __m128i *)(src_ptr)); + srcReg2 = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch)); + srcReg3 = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 2)); + srcReg4 = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 3)); + srcReg5 = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 4)); + srcReg6 = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 5)); + srcReg7 = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 6)); for (i = 0; i < output_height; i++) { // load the last 16 bytes - srcReg8 = _mm_loadu_si128((__m128i *)(src_ptr + src_pitch * 7)); + srcReg8 = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 7)); // merge the result together srcRegFilt5 = _mm_unpacklo_epi8(srcReg1, srcReg2); @@ -508,3 +520,82 @@ void vp9_filter_block1d16_v8_intrin_ssse3(unsigned char *src_ptr, output_ptr+=out_pitch; } } + +#if ARCH_X86_64 +filter8_1dfunction vp9_filter_block1d16_v8_intrin_ssse3; +filter8_1dfunction vp9_filter_block1d16_h8_intrin_ssse3; +filter8_1dfunction vp9_filter_block1d8_v8_intrin_ssse3; +filter8_1dfunction vp9_filter_block1d8_h8_intrin_ssse3; +filter8_1dfunction vp9_filter_block1d4_v8_ssse3; +filter8_1dfunction vp9_filter_block1d4_h8_intrin_ssse3; +#define vp9_filter_block1d16_v8_ssse3 vp9_filter_block1d16_v8_intrin_ssse3 +#define vp9_filter_block1d16_h8_ssse3 vp9_filter_block1d16_h8_intrin_ssse3 +#define vp9_filter_block1d8_v8_ssse3 vp9_filter_block1d8_v8_intrin_ssse3 +#define vp9_filter_block1d8_h8_ssse3 vp9_filter_block1d8_h8_intrin_ssse3 +#define vp9_filter_block1d4_h8_ssse3 vp9_filter_block1d4_h8_intrin_ssse3 +#else // ARCH_X86 +filter8_1dfunction vp9_filter_block1d16_v8_ssse3; +filter8_1dfunction vp9_filter_block1d16_h8_ssse3; +filter8_1dfunction vp9_filter_block1d8_v8_ssse3; +filter8_1dfunction vp9_filter_block1d8_h8_ssse3; +filter8_1dfunction vp9_filter_block1d4_v8_ssse3; +filter8_1dfunction vp9_filter_block1d4_h8_ssse3; +#endif // ARCH_X86_64 +filter8_1dfunction vp9_filter_block1d16_v8_avg_ssse3; +filter8_1dfunction vp9_filter_block1d16_h8_avg_ssse3; +filter8_1dfunction vp9_filter_block1d8_v8_avg_ssse3; +filter8_1dfunction vp9_filter_block1d8_h8_avg_ssse3; +filter8_1dfunction vp9_filter_block1d4_v8_avg_ssse3; +filter8_1dfunction vp9_filter_block1d4_h8_avg_ssse3; + +filter8_1dfunction vp9_filter_block1d16_v2_ssse3; +filter8_1dfunction vp9_filter_block1d16_h2_ssse3; +filter8_1dfunction vp9_filter_block1d8_v2_ssse3; +filter8_1dfunction vp9_filter_block1d8_h2_ssse3; +filter8_1dfunction vp9_filter_block1d4_v2_ssse3; +filter8_1dfunction vp9_filter_block1d4_h2_ssse3; +filter8_1dfunction vp9_filter_block1d16_v2_avg_ssse3; +filter8_1dfunction vp9_filter_block1d16_h2_avg_ssse3; +filter8_1dfunction vp9_filter_block1d8_v2_avg_ssse3; +filter8_1dfunction vp9_filter_block1d8_h2_avg_ssse3; +filter8_1dfunction vp9_filter_block1d4_v2_avg_ssse3; +filter8_1dfunction vp9_filter_block1d4_h2_avg_ssse3; + +// void vp9_convolve8_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride, +// uint8_t *dst, ptrdiff_t dst_stride, +// const int16_t *filter_x, int x_step_q4, +// const int16_t *filter_y, int y_step_q4, +// int w, int h); +// void vp9_convolve8_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride, +// uint8_t *dst, ptrdiff_t dst_stride, +// const int16_t *filter_x, int x_step_q4, +// const int16_t *filter_y, int y_step_q4, +// int w, int h); +// void vp9_convolve8_avg_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride, +// uint8_t *dst, ptrdiff_t dst_stride, +// const int16_t *filter_x, int x_step_q4, +// const int16_t *filter_y, int y_step_q4, +// int w, int h); +// void vp9_convolve8_avg_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride, +// uint8_t *dst, ptrdiff_t dst_stride, +// const int16_t *filter_x, int x_step_q4, +// const int16_t *filter_y, int y_step_q4, +// int w, int h); +FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , ssse3); +FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , ssse3); +FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, ssse3); +FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_, + ssse3); + +// void vp9_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride, +// uint8_t *dst, ptrdiff_t dst_stride, +// const int16_t *filter_x, int x_step_q4, +// const int16_t *filter_y, int y_step_q4, +// int w, int h); +// void vp9_convolve8_avg_ssse3(const uint8_t *src, ptrdiff_t src_stride, +// uint8_t *dst, ptrdiff_t dst_stride, +// const int16_t *filter_x, int x_step_q4, +// const int16_t *filter_y, int y_step_q4, +// int w, int h); +FUN_CONV_2D(, ssse3); +FUN_CONV_2D(avg_ , ssse3); diff --git a/media/libvpx/vp9/decoder/vp9_decodeframe.c b/media/libvpx/vp9/decoder/vp9_decodeframe.c index 8840750fdbf..30ca2d08a7e 100644 --- a/media/libvpx/vp9/decoder/vp9_decodeframe.c +++ b/media/libvpx/vp9/decoder/vp9_decodeframe.c @@ -15,6 +15,7 @@ #include "./vpx_scale_rtcd.h" #include "vpx_mem/vpx_mem.h" +#include "vpx_ports/mem.h" #include "vpx_ports/mem_ops.h" #include "vpx_scale/vpx_scale.h" @@ -276,39 +277,33 @@ static void inverse_transform_block(MACROBLOCKD* xd, int plane, int block, #endif // CONFIG_VP9_HIGHBITDEPTH if (eob == 1) { - vpx_memset(dqcoeff, 0, 2 * sizeof(dqcoeff[0])); + memset(dqcoeff, 0, 2 * sizeof(dqcoeff[0])); } else { if (tx_type == DCT_DCT && tx_size <= TX_16X16 && eob <= 10) - vpx_memset(dqcoeff, 0, 4 * (4 << tx_size) * sizeof(dqcoeff[0])); + memset(dqcoeff, 0, 4 * (4 << tx_size) * sizeof(dqcoeff[0])); else if (tx_size == TX_32X32 && eob <= 34) - vpx_memset(dqcoeff, 0, 256 * sizeof(dqcoeff[0])); + memset(dqcoeff, 0, 256 * sizeof(dqcoeff[0])); else - vpx_memset(dqcoeff, 0, (16 << (tx_size << 1)) * sizeof(dqcoeff[0])); + memset(dqcoeff, 0, (16 << (tx_size << 1)) * sizeof(dqcoeff[0])); } } } struct intra_args { - VP9_COMMON *cm; MACROBLOCKD *xd; - FRAME_COUNTS *counts; vp9_reader *r; - const int16_t *const y_dequant; - const int16_t *const uv_dequant; + int seg_id; }; static void predict_and_reconstruct_intra_block(int plane, int block, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) { struct intra_args *const args = (struct intra_args *)arg; - VP9_COMMON *const cm = args->cm; MACROBLOCKD *const xd = args->xd; struct macroblockd_plane *const pd = &xd->plane[plane]; - MODE_INFO *const mi = xd->mi[0].src_mi; + MODE_INFO *const mi = xd->mi[0]; const PREDICTION_MODE mode = (plane == 0) ? get_y_mode(mi, block) : mi->mbmi.uv_mode; - const int16_t *const dequant = (plane == 0) ? args->y_dequant - : args->uv_dequant; int x, y; uint8_t *dst; txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x, &y); @@ -320,37 +315,31 @@ static void predict_and_reconstruct_intra_block(int plane, int block, x, y, plane); if (!mi->mbmi.skip) { - const int eob = vp9_decode_block_tokens(cm, xd, args->counts, plane, block, + const int eob = vp9_decode_block_tokens(xd, plane, block, plane_bsize, x, y, tx_size, - args->r, dequant); + args->r, args->seg_id); inverse_transform_block(xd, plane, block, tx_size, dst, pd->dst.stride, eob); } } struct inter_args { - VP9_COMMON *cm; MACROBLOCKD *xd; vp9_reader *r; - FRAME_COUNTS *counts; int *eobtotal; - const int16_t *const y_dequant; - const int16_t *const uv_dequant; + int seg_id; }; static void reconstruct_inter_block(int plane, int block, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) { struct inter_args *args = (struct inter_args *)arg; - VP9_COMMON *const cm = args->cm; MACROBLOCKD *const xd = args->xd; struct macroblockd_plane *const pd = &xd->plane[plane]; - const int16_t *const dequant = (plane == 0) ? args->y_dequant - : args->uv_dequant; int x, y, eob; txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x, &y); - eob = vp9_decode_block_tokens(cm, xd, args->counts, plane, block, plane_bsize, - x, y, tx_size, args->r, dequant); + eob = vp9_decode_block_tokens(xd, plane, block, plane_bsize, + x, y, tx_size, args->r, args->seg_id); inverse_transform_block(xd, plane, block, tx_size, &pd->dst.buf[4 * y * pd->dst.stride + 4 * x], pd->dst.stride, eob); @@ -367,13 +356,12 @@ static MB_MODE_INFO *set_offsets(VP9_COMMON *const cm, MACROBLOCKD *const xd, const int offset = mi_row * cm->mi_stride + mi_col; int x, y; - xd->mi = cm->mi + offset; - xd->mi[0].src_mi = &xd->mi[0]; // Point to self. - xd->mi[0].mbmi.sb_type = bsize; - + xd->mi = cm->mi_grid_visible + offset; + xd->mi[0] = &cm->mi[offset]; + xd->mi[0]->mbmi.sb_type = bsize; for (y = 0; y < y_mis; ++y) for (x = !y; x < x_mis; ++x) { - xd->mi[y * cm->mi_stride + x].src_mi = &xd->mi[0]; + xd->mi[y * cm->mi_stride + x] = xd->mi[0]; } set_skip_context(xd, mi_row, mi_col); @@ -383,37 +371,36 @@ static MB_MODE_INFO *set_offsets(VP9_COMMON *const cm, MACROBLOCKD *const xd, set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols); vp9_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col); - return &xd->mi[0].mbmi; + return &xd->mi[0]->mbmi; } static void decode_block(VP9Decoder *const pbi, MACROBLOCKD *const xd, - FRAME_COUNTS *counts, const TileInfo *const tile, int mi_row, int mi_col, vp9_reader *r, BLOCK_SIZE bsize) { VP9_COMMON *const cm = &pbi->common; const int less8x8 = bsize < BLOCK_8X8; - int16_t y_dequant[2], uv_dequant[2]; - int qindex = cm->base_qindex; MB_MODE_INFO *mbmi = set_offsets(cm, xd, tile, bsize, mi_row, mi_col); - vp9_read_mode_info(pbi, xd, counts, tile, mi_row, mi_col, r); + + if (bsize >= BLOCK_8X8 && (cm->subsampling_x || cm->subsampling_y)) { + const BLOCK_SIZE uv_subsize = + ss_size_lookup[bsize][cm->subsampling_x][cm->subsampling_y]; + if (uv_subsize == BLOCK_INVALID) + vpx_internal_error(xd->error_info, + VPX_CODEC_CORRUPT_FRAME, "Invalid block size."); + } + + vp9_read_mode_info(pbi, xd, tile, mi_row, mi_col, r); if (less8x8) bsize = BLOCK_8X8; if (mbmi->skip) { reset_skip_context(xd, bsize); - } else if (cm->seg.enabled) { - qindex = vp9_get_qindex(&cm->seg, mbmi->segment_id, cm->base_qindex); } - y_dequant[0] = vp9_dc_quant(qindex, cm->y_dc_delta_q, cm->bit_depth); - y_dequant[1] = vp9_ac_quant(qindex, 0, cm->bit_depth); - uv_dequant[0] = vp9_dc_quant(qindex, cm->uv_dc_delta_q, cm->bit_depth); - uv_dequant[1] = vp9_ac_quant(qindex, cm->uv_ac_delta_q, cm->bit_depth); - if (!is_inter_block(mbmi)) { - struct intra_args arg = {cm, xd, counts, r , y_dequant, uv_dequant}; + struct intra_args arg = {xd, r, mbmi->segment_id}; vp9_foreach_transformed_block(xd, bsize, predict_and_reconstruct_intra_block, &arg); } else { @@ -423,8 +410,7 @@ static void decode_block(VP9Decoder *const pbi, MACROBLOCKD *const xd, // Reconstruction if (!mbmi->skip) { int eobtotal = 0; - struct inter_args arg = {cm, xd, r, counts, &eobtotal, y_dequant, - uv_dequant}; + struct inter_args arg = {xd, r, &eobtotal, mbmi->segment_id}; vp9_foreach_transformed_block(xd, bsize, reconstruct_inter_block, &arg); if (!less8x8 && eobtotal == 0) mbmi->skip = 1; // skip loopfilter @@ -435,13 +421,14 @@ static void decode_block(VP9Decoder *const pbi, MACROBLOCKD *const xd, } static PARTITION_TYPE read_partition(VP9_COMMON *cm, MACROBLOCKD *xd, - FRAME_COUNTS *counts, int hbs, + int hbs, int mi_row, int mi_col, BLOCK_SIZE bsize, vp9_reader *r) { const int ctx = partition_plane_context(xd, mi_row, mi_col, bsize); const vp9_prob *const probs = get_partition_probs(cm, ctx); const int has_rows = (mi_row + hbs) < cm->mi_rows; const int has_cols = (mi_col + hbs) < cm->mi_cols; + FRAME_COUNTS *counts = xd->counts; PARTITION_TYPE p; if (has_rows && has_cols) @@ -453,56 +440,48 @@ static PARTITION_TYPE read_partition(VP9_COMMON *cm, MACROBLOCKD *xd, else p = PARTITION_SPLIT; - if (!cm->frame_parallel_decoding_mode) + if (counts) ++counts->partition[ctx][p]; return p; } static void decode_partition(VP9Decoder *const pbi, MACROBLOCKD *const xd, - FRAME_COUNTS *counts, const TileInfo *const tile, int mi_row, int mi_col, vp9_reader* r, BLOCK_SIZE bsize) { VP9_COMMON *const cm = &pbi->common; const int hbs = num_8x8_blocks_wide_lookup[bsize] / 2; PARTITION_TYPE partition; - BLOCK_SIZE subsize, uv_subsize; + BLOCK_SIZE subsize; if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; - partition = read_partition(cm, xd, counts, hbs, mi_row, mi_col, bsize, r); + partition = read_partition(cm, xd, hbs, mi_row, mi_col, bsize, r); subsize = get_subsize(bsize, partition); - uv_subsize = ss_size_lookup[subsize][cm->subsampling_x][cm->subsampling_y]; - if (subsize >= BLOCK_8X8 && uv_subsize == BLOCK_INVALID) - vpx_internal_error(xd->error_info, - VPX_CODEC_CORRUPT_FRAME, "Invalid block size."); - if (subsize < BLOCK_8X8) { - decode_block(pbi, xd, counts, tile, mi_row, mi_col, r, subsize); + if (bsize == BLOCK_8X8) { + decode_block(pbi, xd, tile, mi_row, mi_col, r, subsize); } else { switch (partition) { case PARTITION_NONE: - decode_block(pbi, xd, counts, tile, mi_row, mi_col, r, subsize); + decode_block(pbi, xd, tile, mi_row, mi_col, r, subsize); break; case PARTITION_HORZ: - decode_block(pbi, xd, counts, tile, mi_row, mi_col, r, subsize); + decode_block(pbi, xd, tile, mi_row, mi_col, r, subsize); if (mi_row + hbs < cm->mi_rows) - decode_block(pbi, xd, counts, tile, mi_row + hbs, mi_col, r, subsize); + decode_block(pbi, xd, tile, mi_row + hbs, mi_col, r, subsize); break; case PARTITION_VERT: - decode_block(pbi, xd, counts, tile, mi_row, mi_col, r, subsize); + decode_block(pbi, xd, tile, mi_row, mi_col, r, subsize); if (mi_col + hbs < cm->mi_cols) - decode_block(pbi, xd, counts, tile, mi_row, mi_col + hbs, r, subsize); + decode_block(pbi, xd, tile, mi_row, mi_col + hbs, r, subsize); break; case PARTITION_SPLIT: - decode_partition(pbi, xd, counts, tile, mi_row, mi_col, r, subsize); - decode_partition(pbi, xd, counts, tile, mi_row, mi_col + hbs, r, - subsize); - decode_partition(pbi, xd, counts, tile, mi_row + hbs, mi_col, r, - subsize); - decode_partition(pbi, xd, counts, tile, mi_row + hbs, mi_col + hbs, r, - subsize); + decode_partition(pbi, xd, tile, mi_row, mi_col, r, subsize); + decode_partition(pbi, xd, tile, mi_row, mi_col + hbs, r, subsize); + decode_partition(pbi, xd, tile, mi_row + hbs, mi_col, r, subsize); + decode_partition(pbi, xd, tile, mi_row + hbs, mi_col + hbs, r, subsize); break; default: assert(0 && "Invalid partition type"); @@ -648,11 +627,39 @@ static void setup_quantization(VP9_COMMON *const cm, MACROBLOCKD *const xd, cm->y_dc_delta_q == 0 && cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0; + #if CONFIG_VP9_HIGHBITDEPTH xd->bd = (int)cm->bit_depth; #endif } +static void setup_segmentation_dequant(VP9_COMMON *const cm) { + // Build y/uv dequant values based on segmentation. + if (cm->seg.enabled) { + int i; + for (i = 0; i < MAX_SEGMENTS; ++i) { + const int qindex = vp9_get_qindex(&cm->seg, i, cm->base_qindex); + cm->y_dequant[i][0] = vp9_dc_quant(qindex, cm->y_dc_delta_q, + cm->bit_depth); + cm->y_dequant[i][1] = vp9_ac_quant(qindex, 0, cm->bit_depth); + cm->uv_dequant[i][0] = vp9_dc_quant(qindex, cm->uv_dc_delta_q, + cm->bit_depth); + cm->uv_dequant[i][1] = vp9_ac_quant(qindex, cm->uv_ac_delta_q, + cm->bit_depth); + } + } else { + const int qindex = cm->base_qindex; + // When segmentation is disabled, only the first value is used. The + // remaining are don't cares. + cm->y_dequant[0][0] = vp9_dc_quant(qindex, cm->y_dc_delta_q, cm->bit_depth); + cm->y_dequant[0][1] = vp9_ac_quant(qindex, 0, cm->bit_depth); + cm->uv_dequant[0][0] = vp9_dc_quant(qindex, cm->uv_dc_delta_q, + cm->bit_depth); + cm->uv_dequant[0][1] = vp9_ac_quant(qindex, cm->uv_ac_delta_q, + cm->bit_depth); + } +} + static INTERP_FILTER read_interp_filter(struct vp9_read_bit_buffer *rb) { const INTERP_FILTER literal_to_filter[] = { EIGHTTAP_SMOOTH, EIGHTTAP, @@ -687,7 +694,8 @@ static void resize_context_buffers(VP9_COMMON *cm, int width, int height) { #if CONFIG_SIZE_LIMIT if (width > DECODE_WIDTH_LIMIT || height > DECODE_HEIGHT_LIMIT) vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, - "Width and height beyond allowed size."); + "Dimensions of %dx%d beyond allowed size of %dx%d.", + width, height, DECODE_WIDTH_LIMIT, DECODE_HEIGHT_LIMIT); #endif if (cm->width != width || cm->height != height) { const int new_mi_rows = @@ -918,7 +926,8 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi, int mi_row, mi_col; TileData *tile_data = NULL; - if (cm->lf.filter_level && pbi->lf_worker.data1 == NULL) { + if (cm->lf.filter_level && !cm->skip_loop_filter && + pbi->lf_worker.data1 == NULL) { CHECK_MEM_ERROR(cm, pbi->lf_worker.data1, vpx_memalign(32, sizeof(LFWorkerData))); pbi->lf_worker.hook = (VP9WorkerHook)vp9_loop_filter_worker; @@ -928,7 +937,7 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi, } } - if (cm->lf.filter_level) { + if (cm->lf.filter_level && !cm->skip_loop_filter) { LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1; // Be sure to sync as we might be resuming after a failed frame decode. winterface->sync(&pbi->lf_worker); @@ -941,11 +950,11 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi, // Note: this memset assumes above_context[0], [1] and [2] // are allocated as part of the same buffer. - vpx_memset(cm->above_context, 0, - sizeof(*cm->above_context) * MAX_MB_PLANE * 2 * aligned_cols); + memset(cm->above_context, 0, + sizeof(*cm->above_context) * MAX_MB_PLANE * 2 * aligned_cols); - vpx_memset(cm->above_seg_context, 0, - sizeof(*cm->above_seg_context) * aligned_cols); + memset(cm->above_seg_context, 0, + sizeof(*cm->above_seg_context) * aligned_cols); get_tile_buffers(pbi, data, data_end, tile_cols, tile_rows, tile_buffers); @@ -968,6 +977,8 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi, tile_data->cm = cm; tile_data->xd = pbi->mb; tile_data->xd.corrupted = 0; + tile_data->xd.counts = cm->frame_parallel_decoding_mode ? + NULL : &cm->counts; vp9_tile_init(&tile, tile_data->cm, tile_row, tile_col); setup_token_decoder(buf->data, data_end, buf->size, &cm->error, &tile_data->bit_reader, pbi->decrypt_cb, @@ -990,7 +1001,7 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi, vp9_zero(tile_data->xd.left_seg_context); for (mi_col = tile.mi_col_start; mi_col < tile.mi_col_end; mi_col += MI_BLOCK_SIZE) { - decode_partition(pbi, &tile_data->xd, &cm->counts, &tile, mi_row, + decode_partition(pbi, &tile_data->xd, &tile, mi_row, mi_col, &tile_data->bit_reader, BLOCK_64X64); } pbi->mb.corrupted |= tile_data->xd.corrupted; @@ -999,7 +1010,7 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi, "Failed to decode tile data"); } // Loopfilter one row. - if (cm->lf.filter_level) { + if (cm->lf.filter_level && !cm->skip_loop_filter) { const int lf_start = mi_row - MI_BLOCK_SIZE; LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1; @@ -1028,7 +1039,7 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi, } // Loopfilter remaining rows in the frame. - if (cm->lf.filter_level) { + if (cm->lf.filter_level && !cm->skip_loop_filter) { LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1; winterface->sync(&pbi->lf_worker); lf_data->start = lf_data->stop; @@ -1063,7 +1074,7 @@ static int tile_worker_hook(TileWorkerData *const tile_data, vp9_zero(tile_data->xd.left_seg_context); for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end; mi_col += MI_BLOCK_SIZE) { - decode_partition(tile_data->pbi, &tile_data->xd, &tile_data->counts, + decode_partition(tile_data->pbi, &tile_data->xd, tile, mi_row, mi_col, &tile_data->bit_reader, BLOCK_64X64); } @@ -1075,13 +1086,7 @@ static int tile_worker_hook(TileWorkerData *const tile_data, static int compare_tile_buffers(const void *a, const void *b) { const TileBuffer *const buf1 = (const TileBuffer*)a; const TileBuffer *const buf2 = (const TileBuffer*)b; - if (buf1->size < buf2->size) { - return 1; - } else if (buf1->size == buf2->size) { - return 0; - } else { - return -1; - } + return (int)(buf2->size - buf1->size); } static const uint8_t *decode_tiles_mt(VP9Decoder *pbi, @@ -1142,10 +1147,10 @@ static const uint8_t *decode_tiles_mt(VP9Decoder *pbi, // Note: this memset assumes above_context[0], [1] and [2] // are allocated as part of the same buffer. - vpx_memset(cm->above_context, 0, - sizeof(*cm->above_context) * MAX_MB_PLANE * 2 * aligned_mi_cols); - vpx_memset(cm->above_seg_context, 0, - sizeof(*cm->above_seg_context) * aligned_mi_cols); + memset(cm->above_context, 0, + sizeof(*cm->above_context) * MAX_MB_PLANE * 2 * aligned_mi_cols); + memset(cm->above_seg_context, 0, + sizeof(*cm->above_seg_context) * aligned_mi_cols); // Load tile data into tile_buffers get_tile_buffers(pbi, data, data_end, tile_cols, tile_rows, tile_buffers); @@ -1193,6 +1198,8 @@ static const uint8_t *decode_tiles_mt(VP9Decoder *pbi, tile_data->pbi = pbi; tile_data->xd = pbi->mb; tile_data->xd.corrupted = 0; + tile_data->xd.counts = cm->frame_parallel_decoding_mode ? + 0 : &tile_data->counts; vp9_tile_init(tile, cm, 0, buf->col); setup_token_decoder(buf->data, data_end, buf->size, &cm->error, &tile_data->bit_reader, pbi->decrypt_cb, @@ -1306,8 +1313,8 @@ static void read_bitdepth_colorspace_sampling( static size_t read_uncompressed_header(VP9Decoder *pbi, struct vp9_read_bit_buffer *rb) { VP9_COMMON *const cm = &pbi->common; - RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs; - BufferPool *const pool = pbi->common.buffer_pool; + BufferPool *const pool = cm->buffer_pool; + RefCntBuffer *const frame_bufs = pool->frame_bufs; int i, mask, ref_index = 0; size_t sz; @@ -1367,7 +1374,7 @@ static size_t read_uncompressed_header(VP9Decoder *pbi, setup_frame_size(cm, rb); if (pbi->need_resync) { - vpx_memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map)); + memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map)); pbi->need_resync = 0; } } else { @@ -1398,7 +1405,7 @@ static size_t read_uncompressed_header(VP9Decoder *pbi, pbi->refresh_frame_flags = vp9_rb_read_literal(rb, REF_FRAMES); setup_frame_size(cm, rb); if (pbi->need_resync) { - vpx_memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map)); + memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map)); pbi->need_resync = 0; } } else if (pbi->need_resync != 1) { /* Skip if need resync */ @@ -1487,6 +1494,7 @@ static size_t read_uncompressed_header(VP9Decoder *pbi, setup_loopfilter(&cm->lf, rb); setup_quantization(cm, &pbi->mb, rb); setup_segmentation(&cm->seg, rb); + setup_segmentation_dequant(cm); setup_tile_info(cm, rb); sz = vp9_rb_read_literal(rb, 16); @@ -1655,7 +1663,7 @@ void vp9_decode_frame(VP9Decoder *pbi, vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, "Decode failed. Frame data header is corrupted."); - if (cm->lf.filter_level) { + if (cm->lf.filter_level && !cm->skip_loop_filter) { vp9_loop_filter_frame_init(cm, cm->lf.filter_level); } @@ -1681,11 +1689,13 @@ void vp9_decode_frame(VP9Decoder *pbi, // Multi-threaded tile decoder *p_data_end = decode_tiles_mt(pbi, data + first_partition_size, data_end); if (!xd->corrupted) { - // If multiple threads are used to decode tiles, then we use those threads - // to do parallel loopfiltering. - vp9_loop_filter_frame_mt(new_fb, cm, pbi->mb.plane, cm->lf.filter_level, - 0, 0, pbi->tile_workers, pbi->num_tile_workers, - &pbi->lf_row_sync); + if (!cm->skip_loop_filter) { + // If multiple threads are used to decode tiles, then we use those + // threads to do parallel loopfiltering. + vp9_loop_filter_frame_mt(new_fb, cm, pbi->mb.plane, + cm->lf.filter_level, 0, 0, pbi->tile_workers, + pbi->num_tile_workers, &pbi->lf_row_sync); + } } else { vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, "Decode failed. Frame data is corrupted."); @@ -1806,197 +1816,210 @@ static void high_build_mc_border(const uint8_t *src8, int src_stride, } #endif // CONFIG_VP9_HIGHBITDEPTH -void dec_build_inter_predictors(VP9Decoder *const pbi, MACROBLOCKD *xd, - int plane, int block, int bw, int bh, int x, - int y, int w, int h, int mi_x, int mi_y) { +#if CONFIG_VP9_HIGHBITDEPTH +static void extend_and_predict(const uint8_t *buf_ptr1, int pre_buf_stride, + int x0, int y0, int b_w, int b_h, + int frame_width, int frame_height, + int border_offset, + uint8_t *const dst, int dst_buf_stride, + int subpel_x, int subpel_y, + const InterpKernel *kernel, + const struct scale_factors *sf, + MACROBLOCKD *xd, + int w, int h, int ref, int xs, int ys) { + DECLARE_ALIGNED(16, uint16_t, mc_buf_high[80 * 2 * 80 * 2]); + const uint8_t *buf_ptr; + + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + high_build_mc_border(buf_ptr1, pre_buf_stride, mc_buf_high, b_w, + x0, y0, b_w, b_h, frame_width, frame_height); + buf_ptr = CONVERT_TO_BYTEPTR(mc_buf_high) + border_offset; + } else { + build_mc_border(buf_ptr1, pre_buf_stride, (uint8_t *)mc_buf_high, b_w, + x0, y0, b_w, b_h, frame_width, frame_height); + buf_ptr = ((uint8_t *)mc_buf_high) + border_offset; + } + + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + high_inter_predictor(buf_ptr, b_w, dst, dst_buf_stride, subpel_x, + subpel_y, sf, w, h, ref, kernel, xs, ys, xd->bd); + } else { + inter_predictor(buf_ptr, b_w, dst, dst_buf_stride, subpel_x, + subpel_y, sf, w, h, ref, kernel, xs, ys); + } +} +#else +static void extend_and_predict(const uint8_t *buf_ptr1, int pre_buf_stride, + int x0, int y0, int b_w, int b_h, + int frame_width, int frame_height, + int border_offset, + uint8_t *const dst, int dst_buf_stride, + int subpel_x, int subpel_y, + const InterpKernel *kernel, + const struct scale_factors *sf, + int w, int h, int ref, int xs, int ys) { + DECLARE_ALIGNED(16, uint8_t, mc_buf[80 * 2 * 80 * 2]); + const uint8_t *buf_ptr; + + build_mc_border(buf_ptr1, pre_buf_stride, mc_buf, b_w, + x0, y0, b_w, b_h, frame_width, frame_height); + buf_ptr = mc_buf + border_offset; + + inter_predictor(buf_ptr, b_w, dst, dst_buf_stride, subpel_x, + subpel_y, sf, w, h, ref, kernel, xs, ys); +} +#endif // CONFIG_VP9_HIGHBITDEPTH + +static void dec_build_inter_predictors(VP9Decoder *const pbi, MACROBLOCKD *xd, + int plane, int bw, int bh, int x, + int y, int w, int h, int mi_x, int mi_y, + const InterpKernel *kernel, + const struct scale_factors *sf, + struct buf_2d *pre_buf, + struct buf_2d *dst_buf, const MV* mv, + RefCntBuffer *ref_frame_buf, + int is_scaled, int ref) { struct macroblockd_plane *const pd = &xd->plane[plane]; - const MODE_INFO *mi = xd->mi[0].src_mi; - const int is_compound = has_second_ref(&mi->mbmi); - const InterpKernel *kernel = vp9_get_interp_kernel(mi->mbmi.interp_filter); - int ref; + uint8_t *const dst = dst_buf->buf + dst_buf->stride * y + x; + MV32 scaled_mv; + int xs, ys, x0, y0, x0_16, y0_16, frame_width, frame_height, + buf_stride, subpel_x, subpel_y; + uint8_t *ref_frame, *buf_ptr; - for (ref = 0; ref < 1 + is_compound; ++ref) { - const struct scale_factors *const sf = &xd->block_refs[ref]->sf; - struct buf_2d *const pre_buf = &pd->pre[ref]; - struct buf_2d *const dst_buf = &pd->dst; - uint8_t *const dst = dst_buf->buf + dst_buf->stride * y + x; - const MV mv = mi->mbmi.sb_type < BLOCK_8X8 - ? average_split_mvs(pd, mi, ref, block) - : mi->mbmi.mv[ref].as_mv; + // Get reference frame pointer, width and height. + if (plane == 0) { + frame_width = ref_frame_buf->buf.y_crop_width; + frame_height = ref_frame_buf->buf.y_crop_height; + ref_frame = ref_frame_buf->buf.y_buffer; + } else { + frame_width = ref_frame_buf->buf.uv_crop_width; + frame_height = ref_frame_buf->buf.uv_crop_height; + ref_frame = plane == 1 ? ref_frame_buf->buf.u_buffer + : ref_frame_buf->buf.v_buffer; + } - const MV mv_q4 = clamp_mv_to_umv_border_sb(xd, &mv, bw, bh, + if (is_scaled) { + const MV mv_q4 = clamp_mv_to_umv_border_sb(xd, mv, bw, bh, pd->subsampling_x, pd->subsampling_y); + // Co-ordinate of containing block to pixel precision. + int x_start = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)); + int y_start = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)); - MV32 scaled_mv; - int xs, ys, x0, y0, x0_16, y0_16, y1, frame_width, frame_height, - buf_stride, subpel_x, subpel_y; - uint8_t *ref_frame, *buf_ptr; - const int idx = xd->block_refs[ref]->idx; - BufferPool *const pool = pbi->common.buffer_pool; - RefCntBuffer *const ref_frame_buf = &pool->frame_bufs[idx]; - const int is_scaled = vp9_is_scaled(sf); + // Co-ordinate of the block to 1/16th pixel precision. + x0_16 = (x_start + x) << SUBPEL_BITS; + y0_16 = (y_start + y) << SUBPEL_BITS; - // Get reference frame pointer, width and height. - if (plane == 0) { - frame_width = ref_frame_buf->buf.y_crop_width; - frame_height = ref_frame_buf->buf.y_crop_height; - ref_frame = ref_frame_buf->buf.y_buffer; - } else { - frame_width = ref_frame_buf->buf.uv_crop_width; - frame_height = ref_frame_buf->buf.uv_crop_height; - ref_frame = plane == 1 ? ref_frame_buf->buf.u_buffer - : ref_frame_buf->buf.v_buffer; + // Co-ordinate of current block in reference frame + // to 1/16th pixel precision. + x0_16 = sf->scale_value_x(x0_16, sf); + y0_16 = sf->scale_value_y(y0_16, sf); + + // Map the top left corner of the block into the reference frame. + x0 = sf->scale_value_x(x_start + x, sf); + y0 = sf->scale_value_y(y_start + y, sf); + + // Scale the MV and incorporate the sub-pixel offset of the block + // in the reference frame. + scaled_mv = vp9_scale_mv(&mv_q4, mi_x + x, mi_y + y, sf); + xs = sf->x_step_q4; + ys = sf->y_step_q4; + } else { + // Co-ordinate of containing block to pixel precision. + x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x; + y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y; + + // Co-ordinate of the block to 1/16th pixel precision. + x0_16 = x0 << SUBPEL_BITS; + y0_16 = y0 << SUBPEL_BITS; + + scaled_mv.row = mv->row * (1 << (1 - pd->subsampling_y)); + scaled_mv.col = mv->col * (1 << (1 - pd->subsampling_x)); + xs = ys = 16; + } + subpel_x = scaled_mv.col & SUBPEL_MASK; + subpel_y = scaled_mv.row & SUBPEL_MASK; + + // Calculate the top left corner of the best matching block in the + // reference frame. + x0 += scaled_mv.col >> SUBPEL_BITS; + y0 += scaled_mv.row >> SUBPEL_BITS; + x0_16 += scaled_mv.col; + y0_16 += scaled_mv.row; + + // Get reference block pointer. + buf_ptr = ref_frame + y0 * pre_buf->stride + x0; + buf_stride = pre_buf->stride; + + // Do border extension if there is motion or the + // width/height is not a multiple of 8 pixels. + if (is_scaled || scaled_mv.col || scaled_mv.row || + (frame_width & 0x7) || (frame_height & 0x7)) { + int y1 = (y0_16 + (h - 1) * ys) >> SUBPEL_BITS; + + // Get reference block bottom right horizontal coordinate. + int x1 = (x0_16 + (w - 1) * xs) >> SUBPEL_BITS; + int x_pad = 0, y_pad = 0; + + if (subpel_x || (sf->x_step_q4 != SUBPEL_SHIFTS)) { + x0 -= VP9_INTERP_EXTEND - 1; + x1 += VP9_INTERP_EXTEND; + x_pad = 1; } - if (is_scaled) { - // Co-ordinate of containing block to pixel precision. - int x_start = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)); - int y_start = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)); - - // Co-ordinate of the block to 1/16th pixel precision. - x0_16 = (x_start + x) << SUBPEL_BITS; - y0_16 = (y_start + y) << SUBPEL_BITS; - - // Co-ordinate of current block in reference frame - // to 1/16th pixel precision. - x0_16 = sf->scale_value_x(x0_16, sf); - y0_16 = sf->scale_value_y(y0_16, sf); - - // Map the top left corner of the block into the reference frame. - x0 = sf->scale_value_x(x_start + x, sf); - y0 = sf->scale_value_y(y_start + y, sf); - - // Scale the MV and incorporate the sub-pixel offset of the block - // in the reference frame. - scaled_mv = vp9_scale_mv(&mv_q4, mi_x + x, mi_y + y, sf); - xs = sf->x_step_q4; - ys = sf->y_step_q4; - } else { - // Co-ordinate of containing block to pixel precision. - x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x; - y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y; - - // Co-ordinate of the block to 1/16th pixel precision. - x0_16 = x0 << SUBPEL_BITS; - y0_16 = y0 << SUBPEL_BITS; - - scaled_mv.row = mv_q4.row; - scaled_mv.col = mv_q4.col; - xs = ys = 16; + if (subpel_y || (sf->y_step_q4 != SUBPEL_SHIFTS)) { + y0 -= VP9_INTERP_EXTEND - 1; + y1 += VP9_INTERP_EXTEND; + y_pad = 1; } - subpel_x = scaled_mv.col & SUBPEL_MASK; - subpel_y = scaled_mv.row & SUBPEL_MASK; - // Calculate the top left corner of the best matching block in the - // reference frame. - x0 += scaled_mv.col >> SUBPEL_BITS; - y0 += scaled_mv.row >> SUBPEL_BITS; - x0_16 += scaled_mv.col; - y0_16 += scaled_mv.row; + // Wait until reference block is ready. Pad 7 more pixels as last 7 + // pixels of each superblock row can be changed by next superblock row. + if (pbi->frame_parallel_decode) + vp9_frameworker_wait(pbi->frame_worker_owner, ref_frame_buf, + MAX(0, (y1 + 7)) << (plane == 0 ? 0 : 1)); - // Get reference block pointer. - buf_ptr = ref_frame + y0 * pre_buf->stride + x0; - buf_stride = pre_buf->stride; + // Skip border extension if block is inside the frame. + if (x0 < 0 || x0 > frame_width - 1 || x1 < 0 || x1 > frame_width - 1 || + y0 < 0 || y0 > frame_height - 1 || y1 < 0 || y1 > frame_height - 1) { + // Extend the border. + const uint8_t *const buf_ptr1 = ref_frame + y0 * buf_stride + x0; + const int b_w = x1 - x0 + 1; + const int b_h = y1 - y0 + 1; + const int border_offset = y_pad * 3 * b_w + x_pad * 3; - // Get reference block bottom right vertical coordinate. - y1 = ((y0_16 + (h - 1) * ys) >> SUBPEL_BITS) + 1; - - // Do border extension if there is motion or the - // width/height is not a multiple of 8 pixels. - if (is_scaled || scaled_mv.col || scaled_mv.row || - (frame_width & 0x7) || (frame_height & 0x7)) { - // Get reference block bottom right horizontal coordinate. - int x1 = ((x0_16 + (w - 1) * xs) >> SUBPEL_BITS) + 1; - int x_pad = 0, y_pad = 0; - - if (subpel_x || (sf->x_step_q4 != SUBPEL_SHIFTS)) { - x0 -= VP9_INTERP_EXTEND - 1; - x1 += VP9_INTERP_EXTEND; - x_pad = 1; - } - - if (subpel_y || (sf->y_step_q4 != SUBPEL_SHIFTS)) { - y0 -= VP9_INTERP_EXTEND - 1; - y1 += VP9_INTERP_EXTEND; - y_pad = 1; - } - - // Wait until reference block is ready. Pad 7 more pixels as last 7 - // pixels of each superblock row can be changed by next superblock row. - if (pbi->frame_parallel_decode) - vp9_frameworker_wait(pbi->frame_worker_owner, ref_frame_buf, - MAX(0, (y1 + 7) << (plane == 0 ? 0 : 1))); - - // Skip border extension if block is inside the frame. - if (x0 < 0 || x0 > frame_width - 1 || x1 < 0 || x1 > frame_width - 1 || - y0 < 0 || y0 > frame_height - 1 || y1 < 0 || y1 > frame_height - 1) { - uint8_t *buf_ptr1 = ref_frame + y0 * pre_buf->stride + x0; - // Extend the border. + extend_and_predict(buf_ptr1, buf_stride, x0, y0, b_w, b_h, + frame_width, frame_height, border_offset, + dst, dst_buf->stride, + subpel_x, subpel_y, + kernel, sf, #if CONFIG_VP9_HIGHBITDEPTH - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - high_build_mc_border(buf_ptr1, - pre_buf->stride, - xd->mc_buf_high, - x1 - x0 + 1, - x0, - y0, - x1 - x0 + 1, - y1 - y0 + 1, - frame_width, - frame_height); - buf_stride = x1 - x0 + 1; - buf_ptr = CONVERT_TO_BYTEPTR(xd->mc_buf_high) + - y_pad * 3 * buf_stride + x_pad * 3; - } else { - build_mc_border(buf_ptr1, - pre_buf->stride, - xd->mc_buf, - x1 - x0 + 1, - x0, - y0, - x1 - x0 + 1, - y1 - y0 + 1, - frame_width, - frame_height); - buf_stride = x1 - x0 + 1; - buf_ptr = xd->mc_buf + y_pad * 3 * buf_stride + x_pad * 3; - } -#else - build_mc_border(buf_ptr1, - pre_buf->stride, - xd->mc_buf, - x1 - x0 + 1, - x0, - y0, - x1 - x0 + 1, - y1 - y0 + 1, - frame_width, - frame_height); - buf_stride = x1 - x0 + 1; - buf_ptr = xd->mc_buf + y_pad * 3 * buf_stride + x_pad * 3; -#endif // CONFIG_VP9_HIGHBITDEPTH - } - } else { - // Wait until reference block is ready. Pad 7 more pixels as last 7 - // pixels of each superblock row can be changed by next superblock row. - if (pbi->frame_parallel_decode) - vp9_frameworker_wait(pbi->frame_worker_owner, ref_frame_buf, - MAX(0, (y1 + 7) << (plane == 0 ? 0 : 1))); + xd, +#endif + w, h, ref, xs, ys); + return; } + } else { + // Wait until reference block is ready. Pad 7 more pixels as last 7 + // pixels of each superblock row can be changed by next superblock row. + if (pbi->frame_parallel_decode) { + const int y1 = (y0_16 + (h - 1) * ys) >> SUBPEL_BITS; + vp9_frameworker_wait(pbi->frame_worker_owner, ref_frame_buf, + MAX(0, (y1 + 7)) << (plane == 0 ? 0 : 1)); + } + } #if CONFIG_VP9_HIGHBITDEPTH - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - high_inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x, - subpel_y, sf, w, h, ref, kernel, xs, ys, xd->bd); - } else { - inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x, - subpel_y, sf, w, h, ref, kernel, xs, ys); - } -#else + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + high_inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x, + subpel_y, sf, w, h, ref, kernel, xs, ys, xd->bd); + } else { inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys); -#endif // CONFIG_VP9_HIGHBITDEPTH } +#else + inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x, + subpel_y, sf, w, h, ref, kernel, xs, ys); +#endif // CONFIG_VP9_HIGHBITDEPTH } void vp9_dec_build_inter_predictors_sb(VP9Decoder *const pbi, MACROBLOCKD *xd, @@ -2005,24 +2028,50 @@ void vp9_dec_build_inter_predictors_sb(VP9Decoder *const pbi, MACROBLOCKD *xd, int plane; const int mi_x = mi_col * MI_SIZE; const int mi_y = mi_row * MI_SIZE; + const MODE_INFO *mi = xd->mi[0]; + const InterpKernel *kernel = vp9_get_interp_kernel(mi->mbmi.interp_filter); + const BLOCK_SIZE sb_type = mi->mbmi.sb_type; + const int is_compound = has_second_ref(&mi->mbmi); + for (plane = 0; plane < MAX_MB_PLANE; ++plane) { const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, &xd->plane[plane]); + struct macroblockd_plane *const pd = &xd->plane[plane]; + struct buf_2d *const dst_buf = &pd->dst; const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize]; const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; + const int bw = 4 * num_4x4_w; const int bh = 4 * num_4x4_h; + int ref; - if (xd->mi[0].src_mi->mbmi.sb_type < BLOCK_8X8) { - int i = 0, x, y; - assert(bsize == BLOCK_8X8); - for (y = 0; y < num_4x4_h; ++y) - for (x = 0; x < num_4x4_w; ++x) - dec_build_inter_predictors(pbi, xd, plane, i++, bw, bh, - 4 * x, 4 * y, 4, 4, mi_x, mi_y); - } else { - dec_build_inter_predictors(pbi, xd, plane, 0, bw, bh, - 0, 0, bw, bh, mi_x, mi_y); + for (ref = 0; ref < 1 + is_compound; ++ref) { + const struct scale_factors *const sf = &xd->block_refs[ref]->sf; + struct buf_2d *const pre_buf = &pd->pre[ref]; + const int idx = xd->block_refs[ref]->idx; + BufferPool *const pool = pbi->common.buffer_pool; + RefCntBuffer *const ref_frame_buf = &pool->frame_bufs[idx]; + const int is_scaled = vp9_is_scaled(sf); + + if (sb_type < BLOCK_8X8) { + int i = 0, x, y; + assert(bsize == BLOCK_8X8); + for (y = 0; y < num_4x4_h; ++y) { + for (x = 0; x < num_4x4_w; ++x) { + const MV mv = average_split_mvs(pd, mi, ref, i++); + dec_build_inter_predictors(pbi, xd, plane, bw, bh, + 4 * x, 4 * y, 4, 4, mi_x, mi_y, kernel, + sf, pre_buf, dst_buf, &mv, + ref_frame_buf, is_scaled, ref); + } + } + } else { + const MV mv = mi->mbmi.mv[ref].as_mv; + dec_build_inter_predictors(pbi, xd, plane, bw, bh, + 0, 0, bw, bh, mi_x, mi_y, kernel, + sf, pre_buf, dst_buf, &mv, ref_frame_buf, + is_scaled, ref); + } } } } diff --git a/media/libvpx/vp9/decoder/vp9_decodemv.c b/media/libvpx/vp9/decoder/vp9_decodemv.c index 072f6b7b71a..7ce3389e88c 100644 --- a/media/libvpx/vp9/decoder/vp9_decodemv.c +++ b/media/libvpx/vp9/decoder/vp9_decodemv.c @@ -27,30 +27,33 @@ static PREDICTION_MODE read_intra_mode(vp9_reader *r, const vp9_prob *p) { return (PREDICTION_MODE)vp9_read_tree(r, vp9_intra_mode_tree, p); } -static PREDICTION_MODE read_intra_mode_y(VP9_COMMON *cm, FRAME_COUNTS *counts, +static PREDICTION_MODE read_intra_mode_y(VP9_COMMON *cm, MACROBLOCKD *xd, vp9_reader *r, int size_group) { const PREDICTION_MODE y_mode = read_intra_mode(r, cm->fc->y_mode_prob[size_group]); - if (!cm->frame_parallel_decoding_mode) + FRAME_COUNTS *counts = xd->counts; + if (counts) ++counts->y_mode[size_group][y_mode]; return y_mode; } -static PREDICTION_MODE read_intra_mode_uv(VP9_COMMON *cm, FRAME_COUNTS *counts, +static PREDICTION_MODE read_intra_mode_uv(VP9_COMMON *cm, MACROBLOCKD *xd, vp9_reader *r, PREDICTION_MODE y_mode) { const PREDICTION_MODE uv_mode = read_intra_mode(r, cm->fc->uv_mode_prob[y_mode]); - if (!cm->frame_parallel_decoding_mode) + FRAME_COUNTS *counts = xd->counts; + if (counts) ++counts->uv_mode[y_mode][uv_mode]; return uv_mode; } -static PREDICTION_MODE read_inter_mode(VP9_COMMON *cm, FRAME_COUNTS *counts, +static PREDICTION_MODE read_inter_mode(VP9_COMMON *cm, MACROBLOCKD *xd, vp9_reader *r, int ctx) { const int mode = vp9_read_tree(r, vp9_inter_mode_tree, cm->fc->inter_mode_probs[ctx]); - if (!cm->frame_parallel_decoding_mode) + FRAME_COUNTS *counts = xd->counts; + if (counts) ++counts->inter_mode[ctx][mode]; return NEARESTMV + mode; @@ -61,8 +64,8 @@ static int read_segment_id(vp9_reader *r, const struct segmentation *seg) { } static TX_SIZE read_selected_tx_size(VP9_COMMON *cm, MACROBLOCKD *xd, - FRAME_COUNTS *counts, TX_SIZE max_tx_size, vp9_reader *r) { + FRAME_COUNTS *counts = xd->counts; const int ctx = vp9_get_tx_size_context(xd); const vp9_prob *tx_probs = get_tx_probs(max_tx_size, ctx, &cm->fc->tx_probs); int tx_size = vp9_read(r, tx_probs[0]); @@ -72,19 +75,18 @@ static TX_SIZE read_selected_tx_size(VP9_COMMON *cm, MACROBLOCKD *xd, tx_size += vp9_read(r, tx_probs[2]); } - if (!cm->frame_parallel_decoding_mode) + if (counts) ++get_tx_counts(max_tx_size, ctx, &counts->tx)[tx_size]; return (TX_SIZE)tx_size; } static TX_SIZE read_tx_size(VP9_COMMON *cm, MACROBLOCKD *xd, - FRAME_COUNTS *counts, int allow_select, vp9_reader *r) { TX_MODE tx_mode = cm->tx_mode; - BLOCK_SIZE bsize = xd->mi[0].src_mi->mbmi.sb_type; + BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type; const TX_SIZE max_tx_size = max_txsize_lookup[bsize]; if (allow_select && tx_mode == TX_MODE_SELECT && bsize >= BLOCK_8X8) - return read_selected_tx_size(cm, xd, counts, max_tx_size, r); + return read_selected_tx_size(cm, xd, max_tx_size, r); else return MIN(max_tx_size, tx_mode_to_biggest_tx_size[tx_mode]); } @@ -145,7 +147,7 @@ static int read_intra_segment_id(VP9_COMMON *const cm, BLOCK_SIZE bsize, static int read_inter_segment_id(VP9_COMMON *const cm, MACROBLOCKD *const xd, int mi_row, int mi_col, vp9_reader *r) { struct segmentation *const seg = &cm->seg; - MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; const BLOCK_SIZE bsize = mbmi->sb_type; int predicted_segment_id, segment_id; @@ -174,14 +176,14 @@ static int read_inter_segment_id(VP9_COMMON *const cm, MACROBLOCKD *const xd, } static int read_skip(VP9_COMMON *cm, const MACROBLOCKD *xd, - FRAME_COUNTS *counts, int segment_id, vp9_reader *r) { if (vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)) { return 1; } else { const int ctx = vp9_get_skip_context(xd); const int skip = vp9_read(r, cm->fc->skip_probs[ctx]); - if (!cm->frame_parallel_decoding_mode) + FRAME_COUNTS *counts = xd->counts; + if (counts) ++counts->skip[ctx][skip]; return skip; } @@ -189,18 +191,17 @@ static int read_skip(VP9_COMMON *cm, const MACROBLOCKD *xd, static void read_intra_frame_mode_info(VP9_COMMON *const cm, MACROBLOCKD *const xd, - FRAME_COUNTS *counts, int mi_row, int mi_col, vp9_reader *r) { - MODE_INFO *const mi = xd->mi[0].src_mi; + MODE_INFO *const mi = xd->mi[0]; MB_MODE_INFO *const mbmi = &mi->mbmi; - const MODE_INFO *above_mi = xd->mi[-cm->mi_stride].src_mi; - const MODE_INFO *left_mi = xd->left_available ? xd->mi[-1].src_mi : NULL; + const MODE_INFO *above_mi = xd->above_mi; + const MODE_INFO *left_mi = xd->left_mi; const BLOCK_SIZE bsize = mbmi->sb_type; int i; mbmi->segment_id = read_intra_segment_id(cm, bsize, mi_row, mi_col, r); - mbmi->skip = read_skip(cm, xd, counts, mbmi->segment_id, r); - mbmi->tx_size = read_tx_size(cm, xd, counts, 1, r); + mbmi->skip = read_skip(cm, xd, mbmi->segment_id, r); + mbmi->tx_size = read_tx_size(cm, xd, 1, r); mbmi->ref_frame[0] = INTRA_FRAME; mbmi->ref_frame[1] = NONE; @@ -285,13 +286,13 @@ static INLINE void read_mv(vp9_reader *r, MV *mv, const MV *ref, static REFERENCE_MODE read_block_reference_mode(VP9_COMMON *cm, const MACROBLOCKD *xd, - FRAME_COUNTS *counts, vp9_reader *r) { if (cm->reference_mode == REFERENCE_MODE_SELECT) { const int ctx = vp9_get_reference_mode_context(cm, xd); const REFERENCE_MODE mode = (REFERENCE_MODE)vp9_read(r, cm->fc->comp_inter_prob[ctx]); - if (!cm->frame_parallel_decoding_mode) + FRAME_COUNTS *counts = xd->counts; + if (counts) ++counts->comp_inter[ctx][mode]; return mode; // SINGLE_REFERENCE or COMPOUND_REFERENCE } else { @@ -301,34 +302,35 @@ static REFERENCE_MODE read_block_reference_mode(VP9_COMMON *cm, // Read the referncence frame static void read_ref_frames(VP9_COMMON *const cm, MACROBLOCKD *const xd, - FRAME_COUNTS *counts, vp9_reader *r, + vp9_reader *r, int segment_id, MV_REFERENCE_FRAME ref_frame[2]) { FRAME_CONTEXT *const fc = cm->fc; + FRAME_COUNTS *counts = xd->counts; if (vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) { ref_frame[0] = (MV_REFERENCE_FRAME)vp9_get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME); ref_frame[1] = NONE; } else { - const REFERENCE_MODE mode = read_block_reference_mode(cm, xd, counts, r); + const REFERENCE_MODE mode = read_block_reference_mode(cm, xd, r); // FIXME(rbultje) I'm pretty sure this breaks segmentation ref frame coding if (mode == COMPOUND_REFERENCE) { const int idx = cm->ref_frame_sign_bias[cm->comp_fixed_ref]; const int ctx = vp9_get_pred_context_comp_ref_p(cm, xd); const int bit = vp9_read(r, fc->comp_ref_prob[ctx]); - if (!cm->frame_parallel_decoding_mode) + if (counts) ++counts->comp_ref[ctx][bit]; ref_frame[idx] = cm->comp_fixed_ref; ref_frame[!idx] = cm->comp_var_ref[bit]; } else if (mode == SINGLE_REFERENCE) { const int ctx0 = vp9_get_pred_context_single_ref_p1(xd); const int bit0 = vp9_read(r, fc->single_ref_prob[ctx0][0]); - if (!cm->frame_parallel_decoding_mode) + if (counts) ++counts->single_ref[ctx0][0][bit0]; if (bit0) { const int ctx1 = vp9_get_pred_context_single_ref_p2(xd); const int bit1 = vp9_read(r, fc->single_ref_prob[ctx1][1]); - if (!cm->frame_parallel_decoding_mode) + if (counts) ++counts->single_ref[ctx1][1][bit1]; ref_frame[0] = bit1 ? ALTREF_FRAME : GOLDEN_FRAME; } else { @@ -345,18 +347,19 @@ static void read_ref_frames(VP9_COMMON *const cm, MACROBLOCKD *const xd, static INLINE INTERP_FILTER read_switchable_interp_filter( VP9_COMMON *const cm, MACROBLOCKD *const xd, - FRAME_COUNTS *counts, vp9_reader *r) { + vp9_reader *r) { const int ctx = vp9_get_pred_context_switchable_interp(xd); const INTERP_FILTER type = (INTERP_FILTER)vp9_read_tree(r, vp9_switchable_interp_tree, cm->fc->switchable_interp_prob[ctx]); - if (!cm->frame_parallel_decoding_mode) + FRAME_COUNTS *counts = xd->counts; + if (counts) ++counts->switchable_interp[ctx][type]; return type; } static void read_intra_block_mode_info(VP9_COMMON *const cm, - FRAME_COUNTS *counts, MODE_INFO *mi, + MACROBLOCKD *const xd, MODE_INFO *mi, vp9_reader *r) { MB_MODE_INFO *const mbmi = &mi->mbmi; const BLOCK_SIZE bsize = mi->mbmi.sb_type; @@ -368,26 +371,26 @@ static void read_intra_block_mode_info(VP9_COMMON *const cm, switch (bsize) { case BLOCK_4X4: for (i = 0; i < 4; ++i) - mi->bmi[i].as_mode = read_intra_mode_y(cm, counts, r, 0); + mi->bmi[i].as_mode = read_intra_mode_y(cm, xd, r, 0); mbmi->mode = mi->bmi[3].as_mode; break; case BLOCK_4X8: - mi->bmi[0].as_mode = mi->bmi[2].as_mode = read_intra_mode_y(cm, counts, + mi->bmi[0].as_mode = mi->bmi[2].as_mode = read_intra_mode_y(cm, xd, r, 0); mi->bmi[1].as_mode = mi->bmi[3].as_mode = mbmi->mode = - read_intra_mode_y(cm, counts, r, 0); + read_intra_mode_y(cm, xd, r, 0); break; case BLOCK_8X4: - mi->bmi[0].as_mode = mi->bmi[1].as_mode = read_intra_mode_y(cm, counts, + mi->bmi[0].as_mode = mi->bmi[1].as_mode = read_intra_mode_y(cm, xd, r, 0); mi->bmi[2].as_mode = mi->bmi[3].as_mode = mbmi->mode = - read_intra_mode_y(cm, counts, r, 0); + read_intra_mode_y(cm, xd, r, 0); break; default: - mbmi->mode = read_intra_mode_y(cm, counts, r, size_group_lookup[bsize]); + mbmi->mode = read_intra_mode_y(cm, xd, r, size_group_lookup[bsize]); } - mbmi->uv_mode = read_intra_mode_uv(cm, counts, r, mbmi->mode); + mbmi->uv_mode = read_intra_mode_uv(cm, xd, r, mbmi->mode); } static INLINE int is_mv_valid(const MV *mv) { @@ -395,7 +398,7 @@ static INLINE int is_mv_valid(const MV *mv) { mv->col > MV_LOW && mv->col < MV_UPP; } -static INLINE int assign_mv(VP9_COMMON *cm, FRAME_COUNTS *counts, +static INLINE int assign_mv(VP9_COMMON *cm, MACROBLOCKD *xd, PREDICTION_MODE mode, int_mv mv[2], int_mv ref_mv[2], int_mv nearest_mv[2], int_mv near_mv[2], @@ -405,8 +408,8 @@ static INLINE int assign_mv(VP9_COMMON *cm, FRAME_COUNTS *counts, switch (mode) { case NEWMV: { - nmv_context_counts *const mv_counts = cm->frame_parallel_decoding_mode ? - NULL : &counts->mv; + FRAME_COUNTS *counts = xd->counts; + nmv_context_counts *const mv_counts = counts ? &counts->mv : NULL; for (i = 0; i < 1 + is_compound; ++i) { read_mv(r, &mv[i].as_mv, &ref_mv[i].as_mv, &cm->fc->nmvc, mv_counts, allow_hp); @@ -440,7 +443,6 @@ static INLINE int assign_mv(VP9_COMMON *cm, FRAME_COUNTS *counts, } static int read_is_inter_block(VP9_COMMON *const cm, MACROBLOCKD *const xd, - FRAME_COUNTS *counts, int segment_id, vp9_reader *r) { if (vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) { return vp9_get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME) != @@ -448,7 +450,8 @@ static int read_is_inter_block(VP9_COMMON *const cm, MACROBLOCKD *const xd, } else { const int ctx = vp9_get_intra_inter_context(xd); const int is_inter = vp9_read(r, cm->fc->intra_inter_prob[ctx]); - if (!cm->frame_parallel_decoding_mode) + FRAME_COUNTS *counts = xd->counts; + if (counts) ++counts->intra_inter[ctx][is_inter]; return is_inter; } @@ -462,7 +465,6 @@ static void fpm_sync(void *const data, int mi_row) { static void read_inter_block_mode_info(VP9Decoder *const pbi, MACROBLOCKD *const xd, - FRAME_COUNTS *counts, const TileInfo *const tile, MODE_INFO *const mi, int mi_row, int mi_col, vp9_reader *r) { @@ -473,7 +475,7 @@ static void read_inter_block_mode_info(VP9Decoder *const pbi, int_mv nearestmv[2], nearmv[2]; int inter_mode_ctx, ref, is_compound; - read_ref_frames(cm, xd, counts, r, mbmi->segment_id, mbmi->ref_frame); + read_ref_frames(cm, xd, r, mbmi->segment_id, mbmi->ref_frame); is_compound = has_second_ref(mbmi); for (ref = 0; ref < 1 + is_compound; ++ref) { @@ -500,7 +502,7 @@ static void read_inter_block_mode_info(VP9Decoder *const pbi, } } else { if (bsize >= BLOCK_8X8) - mbmi->mode = read_inter_mode(cm, counts, r, inter_mode_ctx); + mbmi->mode = read_inter_mode(cm, xd, r, inter_mode_ctx); } if (bsize < BLOCK_8X8 || mbmi->mode != ZEROMV) { @@ -511,7 +513,7 @@ static void read_inter_block_mode_info(VP9Decoder *const pbi, } mbmi->interp_filter = (cm->interp_filter == SWITCHABLE) - ? read_switchable_interp_filter(cm, xd, counts, r) + ? read_switchable_interp_filter(cm, xd, r) : cm->interp_filter; if (bsize < BLOCK_8X8) { @@ -524,7 +526,7 @@ static void read_inter_block_mode_info(VP9Decoder *const pbi, for (idx = 0; idx < 2; idx += num_4x4_w) { int_mv block[2]; const int j = idy * 2 + idx; - b_mode = read_inter_mode(cm, counts, r, inter_mode_ctx); + b_mode = read_inter_mode(cm, xd, r, inter_mode_ctx); if (b_mode == NEARESTMV || b_mode == NEARMV) for (ref = 0; ref < 1 + is_compound; ++ref) @@ -532,7 +534,7 @@ static void read_inter_block_mode_info(VP9Decoder *const pbi, &nearest_sub8x8[ref], &near_sub8x8[ref]); - if (!assign_mv(cm, counts, b_mode, block, nearestmv, + if (!assign_mv(cm, xd, b_mode, block, nearestmv, nearest_sub8x8, near_sub8x8, is_compound, allow_hp, r)) { xd->corrupted |= 1; @@ -555,40 +557,38 @@ static void read_inter_block_mode_info(VP9Decoder *const pbi, mbmi->mv[0].as_int = mi->bmi[3].as_mv[0].as_int; mbmi->mv[1].as_int = mi->bmi[3].as_mv[1].as_int; } else { - xd->corrupted |= !assign_mv(cm, counts, mbmi->mode, mbmi->mv, nearestmv, + xd->corrupted |= !assign_mv(cm, xd, mbmi->mode, mbmi->mv, nearestmv, nearestmv, nearmv, is_compound, allow_hp, r); } } static void read_inter_frame_mode_info(VP9Decoder *const pbi, MACROBLOCKD *const xd, - FRAME_COUNTS *counts, const TileInfo *const tile, int mi_row, int mi_col, vp9_reader *r) { VP9_COMMON *const cm = &pbi->common; - MODE_INFO *const mi = xd->mi[0].src_mi; + MODE_INFO *const mi = xd->mi[0]; MB_MODE_INFO *const mbmi = &mi->mbmi; int inter_block; mbmi->mv[0].as_int = 0; mbmi->mv[1].as_int = 0; mbmi->segment_id = read_inter_segment_id(cm, xd, mi_row, mi_col, r); - mbmi->skip = read_skip(cm, xd, counts, mbmi->segment_id, r); - inter_block = read_is_inter_block(cm, xd, counts, mbmi->segment_id, r); - mbmi->tx_size = read_tx_size(cm, xd, counts, !mbmi->skip || !inter_block, r); + mbmi->skip = read_skip(cm, xd, mbmi->segment_id, r); + inter_block = read_is_inter_block(cm, xd, mbmi->segment_id, r); + mbmi->tx_size = read_tx_size(cm, xd, !mbmi->skip || !inter_block, r); if (inter_block) - read_inter_block_mode_info(pbi, xd, counts, tile, mi, mi_row, mi_col, r); + read_inter_block_mode_info(pbi, xd, tile, mi, mi_row, mi_col, r); else - read_intra_block_mode_info(cm, counts, mi, r); + read_intra_block_mode_info(cm, xd, mi, r); } void vp9_read_mode_info(VP9Decoder *const pbi, MACROBLOCKD *xd, - FRAME_COUNTS *counts, const TileInfo *const tile, int mi_row, int mi_col, vp9_reader *r) { VP9_COMMON *const cm = &pbi->common; - MODE_INFO *const mi = xd->mi[0].src_mi; + MODE_INFO *const mi = xd->mi[0]; const int bw = num_8x8_blocks_wide_lookup[mi->mbmi.sb_type]; const int bh = num_8x8_blocks_high_lookup[mi->mbmi.sb_type]; const int x_mis = MIN(bw, cm->mi_cols - mi_col); @@ -597,18 +597,18 @@ void vp9_read_mode_info(VP9Decoder *const pbi, MACROBLOCKD *xd, int w, h; if (frame_is_intra_only(cm)) - read_intra_frame_mode_info(cm, xd, counts, mi_row, mi_col, r); + read_intra_frame_mode_info(cm, xd, mi_row, mi_col, r); else - read_inter_frame_mode_info(pbi, xd, counts, tile, mi_row, mi_col, r); + read_inter_frame_mode_info(pbi, xd, tile, mi_row, mi_col, r); for (h = 0; h < y_mis; ++h) { MV_REF *const frame_mv = frame_mvs + h * cm->mi_cols; for (w = 0; w < x_mis; ++w) { MV_REF *const mv = frame_mv + w; - mv->ref_frame[0] = mi->src_mi->mbmi.ref_frame[0]; - mv->ref_frame[1] = mi->src_mi->mbmi.ref_frame[1]; - mv->mv[0].as_int = mi->src_mi->mbmi.mv[0].as_int; - mv->mv[1].as_int = mi->src_mi->mbmi.mv[1].as_int; + mv->ref_frame[0] = mi->mbmi.ref_frame[0]; + mv->ref_frame[1] = mi->mbmi.ref_frame[1]; + mv->mv[0].as_int = mi->mbmi.mv[0].as_int; + mv->mv[1].as_int = mi->mbmi.mv[1].as_int; } } } diff --git a/media/libvpx/vp9/decoder/vp9_decodemv.h b/media/libvpx/vp9/decoder/vp9_decodemv.h index c79dff71888..dd97d8da030 100644 --- a/media/libvpx/vp9/decoder/vp9_decodemv.h +++ b/media/libvpx/vp9/decoder/vp9_decodemv.h @@ -21,7 +21,6 @@ extern "C" { struct TileInfo; void vp9_read_mode_info(VP9Decoder *const pbi, MACROBLOCKD *xd, - FRAME_COUNTS *counts, const struct TileInfo *const tile, int mi_row, int mi_col, vp9_reader *r); diff --git a/media/libvpx/vp9/decoder/vp9_decoder.c b/media/libvpx/vp9/decoder/vp9_decoder.c index 358f22a8dc1..7991a39e610 100644 --- a/media/libvpx/vp9/decoder/vp9_decoder.c +++ b/media/libvpx/vp9/decoder/vp9_decoder.c @@ -12,6 +12,8 @@ #include #include +#include "./vp9_rtcd.h" +#include "./vpx_dsp_rtcd.h" #include "./vpx_scale_rtcd.h" #include "vpx_mem/vpx_mem.h" @@ -39,6 +41,8 @@ static void initialize_dec(void) { if (!init_done) { vp9_rtcd(); + vpx_dsp_rtcd(); + vpx_scale_rtcd(); vp9_init_intra_predictors(); init_done = 1; } @@ -46,7 +50,9 @@ static void initialize_dec(void) { static void vp9_dec_setup_mi(VP9_COMMON *cm) { cm->mi = cm->mip + cm->mi_stride + 1; - vpx_memset(cm->mip, 0, cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->mip)); + cm->mi_grid_visible = cm->mi_grid_base + cm->mi_stride + 1; + memset(cm->mi_grid_base, 0, + cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->mi_grid_base)); } static int vp9_dec_alloc_mi(VP9_COMMON *cm, int mi_size) { @@ -54,12 +60,17 @@ static int vp9_dec_alloc_mi(VP9_COMMON *cm, int mi_size) { if (!cm->mip) return 1; cm->mi_alloc_size = mi_size; + cm->mi_grid_base = (MODE_INFO **)vpx_calloc(mi_size, sizeof(MODE_INFO*)); + if (!cm->mi_grid_base) + return 1; return 0; } static void vp9_dec_free_mi(VP9_COMMON *cm) { vpx_free(cm->mip); cm->mip = NULL; + vpx_free(cm->mi_grid_base); + cm->mi_grid_base = NULL; } VP9Decoder *vp9_decoder_create(BufferPool *const pool) { @@ -89,8 +100,8 @@ VP9Decoder *vp9_decoder_create(BufferPool *const pool) { once(initialize_dec); // Initialize the references to not point to any frame buffers. - vpx_memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map)); - vpx_memset(&cm->next_ref_frame_map, -1, sizeof(cm->next_ref_frame_map)); + memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map)); + memset(&cm->next_ref_frame_map, -1, sizeof(cm->next_ref_frame_map)); cm->current_video_frame = 0; pbi->ready_for_new_data = 1; @@ -200,6 +211,9 @@ vpx_codec_err_t vp9_set_reference_dec(VP9_COMMON *cm, // Find an empty frame buffer. const int free_fb = get_free_fb(cm); + if (cm->new_fb_idx == INVALID_IDX) + return VPX_CODEC_MEM_ERROR; + // Decrease ref_count since it will be increased again in // ref_cnt_fb() below. --frame_bufs[free_fb].ref_count; @@ -287,7 +301,10 @@ int vp9_receive_compressed_data(VP9Decoder *pbi, && frame_bufs[cm->new_fb_idx].ref_count == 0) pool->release_fb_cb(pool->cb_priv, &frame_bufs[cm->new_fb_idx].raw_frame_buffer); + // Find a free frame buffer. Return error if can not find any. cm->new_fb_idx = get_free_fb(cm); + if (cm->new_fb_idx == INVALID_IDX) + return VPX_CODEC_MEM_ERROR; // Assign a MV array to the frame buffer. cm->cur_frame = &pool->frame_bufs[cm->new_fb_idx]; diff --git a/media/libvpx/vp9/decoder/vp9_detokenize.c b/media/libvpx/vp9/decoder/vp9_detokenize.c index 35690b89b72..3304e64b2d5 100644 --- a/media/libvpx/vp9/decoder/vp9_detokenize.c +++ b/media/libvpx/vp9/decoder/vp9_detokenize.c @@ -17,6 +17,7 @@ #if CONFIG_COEFFICIENT_RANGE_CHECKING #include "vp9/common/vp9_idct.h" #endif +#include "vp9/common/vp9_scan.h" #include "vp9/decoder/vp9_detokenize.h" @@ -34,7 +35,7 @@ #define INCREMENT_COUNT(token) \ do { \ - if (!cm->frame_parallel_decoding_mode) \ + if (counts) \ ++coef_counts[band][ctx][token]; \ } while (0) @@ -45,33 +46,21 @@ static INLINE int read_coeff(const vp9_prob *probs, int n, vp9_reader *r) { return val; } -static const vp9_tree_index coeff_subtree_high[TREE_SIZE(ENTROPY_TOKENS)] = { - 2, 6, /* 0 = LOW_VAL */ - -TWO_TOKEN, 4, /* 1 = TWO */ - -THREE_TOKEN, -FOUR_TOKEN, /* 2 = THREE */ - 8, 10, /* 3 = HIGH_LOW */ - -CATEGORY1_TOKEN, -CATEGORY2_TOKEN, /* 4 = CAT_ONE */ - 12, 14, /* 5 = CAT_THREEFOUR */ - -CATEGORY3_TOKEN, -CATEGORY4_TOKEN, /* 6 = CAT_THREE */ - -CATEGORY5_TOKEN, -CATEGORY6_TOKEN /* 7 = CAT_FIVE */ -}; - -static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd, - FRAME_COUNTS *counts, PLANE_TYPE type, +static int decode_coefs(const MACROBLOCKD *xd, + PLANE_TYPE type, tran_low_t *dqcoeff, TX_SIZE tx_size, const int16_t *dq, int ctx, const int16_t *scan, const int16_t *nb, vp9_reader *r) { + FRAME_COUNTS *counts = xd->counts; const int max_eob = 16 << (tx_size << 1); - const FRAME_CONTEXT *const fc = cm->fc; - const int ref = is_inter_block(&xd->mi[0].src_mi->mbmi); + const FRAME_CONTEXT *const fc = xd->fc; + const int ref = is_inter_block(&xd->mi[0]->mbmi); int band, c = 0; const vp9_prob (*coef_probs)[COEFF_CONTEXTS][UNCONSTRAINED_NODES] = fc->coef_probs[tx_size][type][ref]; const vp9_prob *prob; - unsigned int (*coef_counts)[COEFF_CONTEXTS][UNCONSTRAINED_NODES + 1] = - counts->coef[tx_size][type][ref]; - unsigned int (*eob_branch_count)[COEFF_CONTEXTS] = - counts->eob_branch[tx_size][type][ref]; + unsigned int (*coef_counts)[COEFF_CONTEXTS][UNCONSTRAINED_NODES + 1]; + unsigned int (*eob_branch_count)[COEFF_CONTEXTS]; uint8_t token_cache[32 * 32]; const uint8_t *band_translate = get_band_translate(tx_size); const int dq_shift = (tx_size == TX_32X32); @@ -84,9 +73,14 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd, const uint8_t *cat5_prob; const uint8_t *cat6_prob; + if (counts) { + coef_counts = counts->coef[tx_size][type][ref]; + eob_branch_count = counts->eob_branch[tx_size][type][ref]; + } + #if CONFIG_VP9_HIGHBITDEPTH - if (cm->use_highbitdepth) { - if (cm->bit_depth == VPX_BITS_10) { + if (xd->bd > VPX_BITS_8) { + if (xd->bd == VPX_BITS_10) { cat1_prob = vp9_cat1_prob_high10; cat2_prob = vp9_cat2_prob_high10; cat3_prob = vp9_cat3_prob_high10; @@ -122,7 +116,7 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd, int val = -1; band = *band_translate++; prob = coef_probs[band][ctx]; - if (!cm->frame_parallel_decoding_mode) + if (counts) ++eob_branch_count[band][ctx]; if (!vp9_read(r, prob[EOB_CONTEXT_NODE])) { INCREMENT_COUNT(EOB_MODEL_TOKEN); @@ -147,7 +141,7 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd, val = 1; } else { INCREMENT_COUNT(TWO_TOKEN); - token = vp9_read_tree(r, coeff_subtree_high, + token = vp9_read_tree(r, vp9_coef_con_tree, vp9_pareto8_full[prob[PIVOT_NODE] - 1]); switch (token) { case TWO_TOKEN: @@ -172,7 +166,7 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd, break; case CATEGORY6_TOKEN: #if CONFIG_VP9_HIGHBITDEPTH - switch (cm->bit_depth) { + switch (xd->bd) { case VPX_BITS_8: val = CAT6_MIN_VAL + read_coeff(cat6_prob, 14, r); break; @@ -196,7 +190,7 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd, #if CONFIG_COEFFICIENT_RANGE_CHECKING #if CONFIG_VP9_HIGHBITDEPTH dqcoeff[scan[c]] = highbd_check_range((vp9_read_bit(r) ? -v : v), - cm->bit_depth); + xd->bd); #else dqcoeff[scan[c]] = check_range(vp9_read_bit(r) ? -v : v); #endif // CONFIG_VP9_HIGHBITDEPTH @@ -212,16 +206,17 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd, return c; } -int vp9_decode_block_tokens(VP9_COMMON *cm, MACROBLOCKD *xd, - FRAME_COUNTS *counts, int plane, int block, +int vp9_decode_block_tokens(MACROBLOCKD *xd, + int plane, int block, BLOCK_SIZE plane_bsize, int x, int y, TX_SIZE tx_size, vp9_reader *r, - const int16_t *const dequant) { + int seg_id) { struct macroblockd_plane *const pd = &xd->plane[plane]; + const int16_t *const dequant = pd->seg_dequant[seg_id]; const int ctx = get_entropy_context(tx_size, pd->above_context + x, pd->left_context + y); const scan_order *so = get_scan(xd, tx_size, pd->plane_type, block); - const int eob = decode_coefs(cm, xd, counts, pd->plane_type, + const int eob = decode_coefs(xd, pd->plane_type, BLOCK_OFFSET(pd->dqcoeff, block), tx_size, dequant, ctx, so->scan, so->neighbors, r); vp9_set_contexts(xd, pd, plane_bsize, tx_size, eob > 0, x, y); diff --git a/media/libvpx/vp9/decoder/vp9_detokenize.h b/media/libvpx/vp9/decoder/vp9_detokenize.h index 6682b5ecc76..df176066898 100644 --- a/media/libvpx/vp9/decoder/vp9_detokenize.h +++ b/media/libvpx/vp9/decoder/vp9_detokenize.h @@ -19,11 +19,11 @@ extern "C" { #endif -int vp9_decode_block_tokens(VP9_COMMON *cm, MACROBLOCKD *xd, - FRAME_COUNTS *counts, int plane, int block, +int vp9_decode_block_tokens(MACROBLOCKD *xd, + int plane, int block, BLOCK_SIZE plane_bsize, int x, int y, TX_SIZE tx_size, vp9_reader *r, - const int16_t *const dequant); + int seg_id); #ifdef __cplusplus } // extern "C" diff --git a/media/libvpx/vp9/decoder/vp9_dthread.c b/media/libvpx/vp9/decoder/vp9_dthread.c index be2e6cd15ef..96a63bd9e14 100644 --- a/media/libvpx/vp9/decoder/vp9_dthread.c +++ b/media/libvpx/vp9/decoder/vp9_dthread.c @@ -155,6 +155,10 @@ void vp9_frameworker_copy_context(VP9Worker *const dst_worker, dst_worker_data->pbi->need_resync = src_worker_data->pbi->need_resync; vp9_frameworker_unlock_stats(src_worker); + dst_cm->bit_depth = src_cm->bit_depth; +#if CONFIG_VP9_HIGHBITDEPTH + dst_cm->use_highbitdepth = src_cm->use_highbitdepth; +#endif dst_cm->prev_frame = src_cm->show_existing_frame ? src_cm->prev_frame : src_cm->cur_frame; dst_cm->last_width = !src_cm->show_existing_frame ? diff --git a/media/libvpx/vp9/encoder/arm/neon/vp9_quantize_neon.c b/media/libvpx/vp9/encoder/arm/neon/vp9_quantize_neon.c index 9cf1e5e2c10..47363c75ba5 100644 --- a/media/libvpx/vp9/encoder/arm/neon/vp9_quantize_neon.c +++ b/media/libvpx/vp9/encoder/arm/neon/vp9_quantize_neon.c @@ -111,8 +111,8 @@ void vp9_quantize_fp_neon(const int16_t *coeff_ptr, intptr_t count, *eob_ptr = (uint16_t)vget_lane_s16(v_eobmax_final, 0); } } else { - vpx_memset(qcoeff_ptr, 0, count * sizeof(int16_t)); - vpx_memset(dqcoeff_ptr, 0, count * sizeof(int16_t)); + memset(qcoeff_ptr, 0, count * sizeof(int16_t)); + memset(dqcoeff_ptr, 0, count * sizeof(int16_t)); *eob_ptr = 0; } } diff --git a/media/libvpx/vp9/encoder/arm/neon/vp9_variance_neon.c b/media/libvpx/vp9/encoder/arm/neon/vp9_variance_neon.c index b1ad83731ad..0ac194e92b0 100644 --- a/media/libvpx/vp9/encoder/arm/neon/vp9_variance_neon.c +++ b/media/libvpx/vp9/encoder/arm/neon/vp9_variance_neon.c @@ -10,106 +10,24 @@ #include #include "./vp9_rtcd.h" +#include "./vpx_dsp_rtcd.h" #include "./vpx_config.h" #include "vpx_ports/mem.h" #include "vpx/vpx_integer.h" -#include "vp9/common/vp9_common.h" #include "vp9/common/vp9_filter.h" -#include "vp9/encoder/vp9_variance.h" - -enum { kWidth8 = 8 }; -enum { kHeight8 = 8 }; -enum { kHeight8PlusOne = 9 }; -enum { kWidth16 = 16 }; -enum { kHeight16 = 16 }; -enum { kHeight16PlusOne = 17 }; -enum { kWidth32 = 32 }; -enum { kHeight32 = 32 }; -enum { kHeight32PlusOne = 33 }; -enum { kWidth64 = 64 }; -enum { kHeight64 = 64 }; -enum { kHeight64PlusOne = 65 }; -enum { kPixelStepOne = 1 }; -enum { kAlign16 = 16 }; - -static INLINE int horizontal_add_s16x8(const int16x8_t v_16x8) { - const int32x4_t a = vpaddlq_s16(v_16x8); - const int64x2_t b = vpaddlq_s32(a); - const int32x2_t c = vadd_s32(vreinterpret_s32_s64(vget_low_s64(b)), - vreinterpret_s32_s64(vget_high_s64(b))); - return vget_lane_s32(c, 0); -} - -static INLINE int horizontal_add_s32x4(const int32x4_t v_32x4) { - const int64x2_t b = vpaddlq_s32(v_32x4); - const int32x2_t c = vadd_s32(vreinterpret_s32_s64(vget_low_s64(b)), - vreinterpret_s32_s64(vget_high_s64(b))); - return vget_lane_s32(c, 0); -} - -// w * h must be less than 2048 or local variable v_sum may overflow. -static void variance_neon_w8(const uint8_t *a, int a_stride, - const uint8_t *b, int b_stride, - int w, int h, uint32_t *sse, int *sum) { - int i, j; - int16x8_t v_sum = vdupq_n_s16(0); - int32x4_t v_sse_lo = vdupq_n_s32(0); - int32x4_t v_sse_hi = vdupq_n_s32(0); - - for (i = 0; i < h; ++i) { - for (j = 0; j < w; j += 8) { - const uint8x8_t v_a = vld1_u8(&a[j]); - const uint8x8_t v_b = vld1_u8(&b[j]); - const uint16x8_t v_diff = vsubl_u8(v_a, v_b); - const int16x8_t sv_diff = vreinterpretq_s16_u16(v_diff); - v_sum = vaddq_s16(v_sum, sv_diff); - v_sse_lo = vmlal_s16(v_sse_lo, - vget_low_s16(sv_diff), - vget_low_s16(sv_diff)); - v_sse_hi = vmlal_s16(v_sse_hi, - vget_high_s16(sv_diff), - vget_high_s16(sv_diff)); - } - a += a_stride; - b += b_stride; - } - - *sum = horizontal_add_s16x8(v_sum); - *sse = (unsigned int)horizontal_add_s32x4(vaddq_s32(v_sse_lo, v_sse_hi)); -} - -void vp9_get8x8var_neon(const uint8_t *src_ptr, int source_stride, - const uint8_t *ref_ptr, int ref_stride, - unsigned int *sse, int *sum) { - variance_neon_w8(src_ptr, source_stride, ref_ptr, ref_stride, kWidth8, - kHeight8, sse, sum); -} - -unsigned int vp9_variance8x8_neon(const uint8_t *a, int a_stride, - const uint8_t *b, int b_stride, - unsigned int *sse) { - int sum; - variance_neon_w8(a, a_stride, b, b_stride, kWidth8, kHeight8, sse, &sum); - return *sse - (((int64_t)sum * sum) >> 6); // >> 6 = / 8 * 8 -} - -void vp9_get16x16var_neon(const uint8_t *src_ptr, int source_stride, - const uint8_t *ref_ptr, int ref_stride, - unsigned int *sse, int *sum) { - variance_neon_w8(src_ptr, source_stride, ref_ptr, ref_stride, kWidth16, - kHeight16, sse, sum); -} - -unsigned int vp9_variance16x16_neon(const uint8_t *a, int a_stride, - const uint8_t *b, int b_stride, - unsigned int *sse) { - int sum; - variance_neon_w8(a, a_stride, b, b_stride, kWidth16, kHeight16, sse, &sum); - return *sse - (((int64_t)sum * sum) >> 8); // >> 8 = / 16 * 16 -} +static uint8_t bilinear_filters[8][2] = { + { 128, 0, }, + { 112, 16, }, + { 96, 32, }, + { 80, 48, }, + { 64, 64, }, + { 48, 80, }, + { 32, 96, }, + { 16, 112, }, +}; static void var_filter_block2d_bil_w8(const uint8_t *src_ptr, uint8_t *output_ptr, @@ -117,9 +35,9 @@ static void var_filter_block2d_bil_w8(const uint8_t *src_ptr, int pixel_step, unsigned int output_height, unsigned int output_width, - const int16_t *vp9_filter) { - const uint8x8_t f0 = vmov_n_u8((uint8_t)vp9_filter[0]); - const uint8x8_t f1 = vmov_n_u8((uint8_t)vp9_filter[1]); + const uint8_t *vp9_filter) { + const uint8x8_t f0 = vmov_n_u8(vp9_filter[0]); + const uint8x8_t f1 = vmov_n_u8(vp9_filter[1]); unsigned int i; for (i = 0; i < output_height; ++i) { const uint8x8_t src_0 = vld1_u8(&src_ptr[0]); @@ -140,9 +58,9 @@ static void var_filter_block2d_bil_w16(const uint8_t *src_ptr, int pixel_step, unsigned int output_height, unsigned int output_width, - const int16_t *vp9_filter) { - const uint8x8_t f0 = vmov_n_u8((uint8_t)vp9_filter[0]); - const uint8x8_t f1 = vmov_n_u8((uint8_t)vp9_filter[1]); + const uint8_t *vp9_filter) { + const uint8x8_t f0 = vmov_n_u8(vp9_filter[0]); + const uint8x8_t f1 = vmov_n_u8(vp9_filter[1]); unsigned int i, j; for (i = 0; i < output_height; ++i) { for (j = 0; j < output_width; j += 16) { @@ -169,15 +87,15 @@ unsigned int vp9_sub_pixel_variance8x8_neon(const uint8_t *src, const uint8_t *dst, int dst_stride, unsigned int *sse) { - DECLARE_ALIGNED_ARRAY(kAlign16, uint8_t, temp2, kHeight8 * kWidth8); - DECLARE_ALIGNED_ARRAY(kAlign16, uint8_t, fdata3, kHeight8PlusOne * kWidth8); + DECLARE_ALIGNED(16, uint8_t, temp2[8 * 8]); + DECLARE_ALIGNED(16, uint8_t, fdata3[9 * 8]); - var_filter_block2d_bil_w8(src, fdata3, src_stride, kPixelStepOne, - kHeight8PlusOne, kWidth8, - BILINEAR_FILTERS_2TAP(xoffset)); - var_filter_block2d_bil_w8(fdata3, temp2, kWidth8, kWidth8, kHeight8, - kWidth8, BILINEAR_FILTERS_2TAP(yoffset)); - return vp9_variance8x8_neon(temp2, kWidth8, dst, dst_stride, sse); + var_filter_block2d_bil_w8(src, fdata3, src_stride, 1, + 9, 8, + bilinear_filters[xoffset]); + var_filter_block2d_bil_w8(fdata3, temp2, 8, 8, 8, + 8, bilinear_filters[yoffset]); + return vpx_variance8x8_neon(temp2, 8, dst, dst_stride, sse); } unsigned int vp9_sub_pixel_variance16x16_neon(const uint8_t *src, @@ -187,85 +105,15 @@ unsigned int vp9_sub_pixel_variance16x16_neon(const uint8_t *src, const uint8_t *dst, int dst_stride, unsigned int *sse) { - DECLARE_ALIGNED_ARRAY(kAlign16, uint8_t, temp2, kHeight16 * kWidth16); - DECLARE_ALIGNED_ARRAY(kAlign16, uint8_t, fdata3, kHeight16PlusOne * kWidth16); + DECLARE_ALIGNED(16, uint8_t, temp2[16 * 16]); + DECLARE_ALIGNED(16, uint8_t, fdata3[17 * 16]); - var_filter_block2d_bil_w16(src, fdata3, src_stride, kPixelStepOne, - kHeight16PlusOne, kWidth16, - BILINEAR_FILTERS_2TAP(xoffset)); - var_filter_block2d_bil_w16(fdata3, temp2, kWidth16, kWidth16, kHeight16, - kWidth16, BILINEAR_FILTERS_2TAP(yoffset)); - return vp9_variance16x16_neon(temp2, kWidth16, dst, dst_stride, sse); -} - -void vp9_get32x32var_neon(const uint8_t *src_ptr, int source_stride, - const uint8_t *ref_ptr, int ref_stride, - unsigned int *sse, int *sum) { - variance_neon_w8(src_ptr, source_stride, ref_ptr, ref_stride, kWidth32, - kHeight32, sse, sum); -} - -unsigned int vp9_variance32x32_neon(const uint8_t *a, int a_stride, - const uint8_t *b, int b_stride, - unsigned int *sse) { - int sum; - variance_neon_w8(a, a_stride, b, b_stride, kWidth32, kHeight32, sse, &sum); - return *sse - (((int64_t)sum * sum) >> 10); // >> 10 = / 32 * 32 -} - -unsigned int vp9_variance32x64_neon(const uint8_t *a, int a_stride, - const uint8_t *b, int b_stride, - unsigned int *sse) { - int sum1, sum2; - uint32_t sse1, sse2; - variance_neon_w8(a, a_stride, b, b_stride, kWidth32, kHeight32, &sse1, &sum1); - variance_neon_w8(a + (kHeight32 * a_stride), a_stride, - b + (kHeight32 * b_stride), b_stride, kWidth32, kHeight32, - &sse2, &sum2); - *sse = sse1 + sse2; - sum1 += sum2; - return *sse - (((int64_t)sum1 * sum1) >> 11); // >> 11 = / 32 * 64 -} - -unsigned int vp9_variance64x32_neon(const uint8_t *a, int a_stride, - const uint8_t *b, int b_stride, - unsigned int *sse) { - int sum1, sum2; - uint32_t sse1, sse2; - variance_neon_w8(a, a_stride, b, b_stride, kWidth64, kHeight16, &sse1, &sum1); - variance_neon_w8(a + (kHeight16 * a_stride), a_stride, - b + (kHeight16 * b_stride), b_stride, kWidth64, kHeight16, - &sse2, &sum2); - *sse = sse1 + sse2; - sum1 += sum2; - return *sse - (((int64_t)sum1 * sum1) >> 11); // >> 11 = / 32 * 64 -} - -unsigned int vp9_variance64x64_neon(const uint8_t *a, int a_stride, - const uint8_t *b, int b_stride, - unsigned int *sse) { - int sum1, sum2; - uint32_t sse1, sse2; - - variance_neon_w8(a, a_stride, b, b_stride, kWidth64, kHeight16, &sse1, &sum1); - variance_neon_w8(a + (kHeight16 * a_stride), a_stride, - b + (kHeight16 * b_stride), b_stride, kWidth64, kHeight16, - &sse2, &sum2); - sse1 += sse2; - sum1 += sum2; - - variance_neon_w8(a + (kHeight16 * 2 * a_stride), a_stride, - b + (kHeight16 * 2 * b_stride), b_stride, - kWidth64, kHeight16, &sse2, &sum2); - sse1 += sse2; - sum1 += sum2; - - variance_neon_w8(a + (kHeight16 * 3 * a_stride), a_stride, - b + (kHeight16 * 3 * b_stride), b_stride, - kWidth64, kHeight16, &sse2, &sum2); - *sse = sse1 + sse2; - sum1 += sum2; - return *sse - (((int64_t)sum1 * sum1) >> 12); // >> 12 = / 64 * 64 + var_filter_block2d_bil_w16(src, fdata3, src_stride, 1, + 17, 16, + bilinear_filters[xoffset]); + var_filter_block2d_bil_w16(fdata3, temp2, 16, 16, 16, + 16, bilinear_filters[yoffset]); + return vpx_variance16x16_neon(temp2, 16, dst, dst_stride, sse); } unsigned int vp9_sub_pixel_variance32x32_neon(const uint8_t *src, @@ -275,15 +123,15 @@ unsigned int vp9_sub_pixel_variance32x32_neon(const uint8_t *src, const uint8_t *dst, int dst_stride, unsigned int *sse) { - DECLARE_ALIGNED_ARRAY(kAlign16, uint8_t, temp2, kHeight32 * kWidth32); - DECLARE_ALIGNED_ARRAY(kAlign16, uint8_t, fdata3, kHeight32PlusOne * kWidth32); + DECLARE_ALIGNED(16, uint8_t, temp2[32 * 32]); + DECLARE_ALIGNED(16, uint8_t, fdata3[33 * 32]); - var_filter_block2d_bil_w16(src, fdata3, src_stride, kPixelStepOne, - kHeight32PlusOne, kWidth32, - BILINEAR_FILTERS_2TAP(xoffset)); - var_filter_block2d_bil_w16(fdata3, temp2, kWidth32, kWidth32, kHeight32, - kWidth32, BILINEAR_FILTERS_2TAP(yoffset)); - return vp9_variance32x32_neon(temp2, kWidth32, dst, dst_stride, sse); + var_filter_block2d_bil_w16(src, fdata3, src_stride, 1, + 33, 32, + bilinear_filters[xoffset]); + var_filter_block2d_bil_w16(fdata3, temp2, 32, 32, 32, + 32, bilinear_filters[yoffset]); + return vpx_variance32x32_neon(temp2, 32, dst, dst_stride, sse); } unsigned int vp9_sub_pixel_variance64x64_neon(const uint8_t *src, @@ -293,13 +141,13 @@ unsigned int vp9_sub_pixel_variance64x64_neon(const uint8_t *src, const uint8_t *dst, int dst_stride, unsigned int *sse) { - DECLARE_ALIGNED_ARRAY(kAlign16, uint8_t, temp2, kHeight64 * kWidth64); - DECLARE_ALIGNED_ARRAY(kAlign16, uint8_t, fdata3, kHeight64PlusOne * kWidth64); + DECLARE_ALIGNED(16, uint8_t, temp2[64 * 64]); + DECLARE_ALIGNED(16, uint8_t, fdata3[65 * 64]); - var_filter_block2d_bil_w16(src, fdata3, src_stride, kPixelStepOne, - kHeight64PlusOne, kWidth64, - BILINEAR_FILTERS_2TAP(xoffset)); - var_filter_block2d_bil_w16(fdata3, temp2, kWidth64, kWidth64, kHeight64, - kWidth64, BILINEAR_FILTERS_2TAP(yoffset)); - return vp9_variance64x64_neon(temp2, kWidth64, dst, dst_stride, sse); + var_filter_block2d_bil_w16(src, fdata3, src_stride, 1, + 65, 64, + bilinear_filters[xoffset]); + var_filter_block2d_bil_w16(fdata3, temp2, 64, 64, 64, + 64, bilinear_filters[yoffset]); + return vpx_variance64x64_neon(temp2, 64, dst, dst_stride, sse); } diff --git a/media/libvpx/vp9/encoder/vp9_aq_complexity.c b/media/libvpx/vp9/encoder/vp9_aq_complexity.c index 22e5217b620..bea7653d2a7 100644 --- a/media/libvpx/vp9/encoder/vp9_aq_complexity.c +++ b/media/libvpx/vp9/encoder/vp9_aq_complexity.c @@ -11,6 +11,7 @@ #include #include +#include "vp9/encoder/vp9_aq_complexity.h" #include "vp9/encoder/vp9_aq_variance.h" #include "vp9/encoder/vp9_encodeframe.h" #include "vp9/common/vp9_seg_common.h" @@ -55,8 +56,7 @@ void vp9_setup_in_frame_q_adj(VP9_COMP *cpi) { const int aq_strength = get_aq_c_strength(cm->base_qindex, cm->bit_depth); // Clear down the segment map. - vpx_memset(cpi->segmentation_map, DEFAULT_AQ2_SEG, - cm->mi_rows * cm->mi_cols); + memset(cpi->segmentation_map, DEFAULT_AQ2_SEG, cm->mi_rows * cm->mi_cols); vp9_clearall_segfeatures(seg); diff --git a/media/libvpx/vp9/encoder/vp9_aq_complexity.h b/media/libvpx/vp9/encoder/vp9_aq_complexity.h index c0dce6c5b7d..e9acb1ca504 100644 --- a/media/libvpx/vp9/encoder/vp9_aq_complexity.h +++ b/media/libvpx/vp9/encoder/vp9_aq_complexity.h @@ -16,6 +16,8 @@ extern "C" { #endif +#include "vp9/common/vp9_enums.h" + struct VP9_COMP; struct macroblock; diff --git a/media/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c b/media/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c index 4bd083c27cc..4b1c9595750 100644 --- a/media/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c +++ b/media/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c @@ -279,29 +279,68 @@ void vp9_cyclic_refresh_set_golden_update(VP9_COMP *const cpi) { // period. Depending on past encoding stats, GF flag may be reset and update // may not occur until next baseline_gf_interval. if (cr->percent_refresh > 0) - rc->baseline_gf_interval = 2 * (100 / cr->percent_refresh); + rc->baseline_gf_interval = 4 * (100 / cr->percent_refresh); else - rc->baseline_gf_interval = 20; + rc->baseline_gf_interval = 40; } -// Update some encoding stats (from the just encoded frame), and if the golden -// reference is to be updated check if we should NOT update the golden ref. +// Update some encoding stats (from the just encoded frame). If this frame's +// background has high motion, refresh the golden frame. Otherwise, if the +// golden reference is to be updated check if we should NOT update the golden +// ref. void vp9_cyclic_refresh_check_golden_update(VP9_COMP *const cpi) { VP9_COMMON *const cm = &cpi->common; CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; int mi_row, mi_col; double fraction_low = 0.0; int low_content_frame = 0; - for (mi_row = 0; mi_row < cm->mi_rows; mi_row++) - for (mi_col = 0; mi_col < cm->mi_cols; mi_col++) { - if (cr->map[mi_row * cm->mi_cols + mi_col] < 1) + + MODE_INFO **mi = cm->mi_grid_visible; + RATE_CONTROL *const rc = &cpi->rc; + const int rows = cm->mi_rows, cols = cm->mi_cols; + int cnt1 = 0, cnt2 = 0; + int force_gf_refresh = 0; + + for (mi_row = 0; mi_row < rows; mi_row++) { + for (mi_col = 0; mi_col < cols; mi_col++) { + int16_t abs_mvr = mi[0]->mbmi.mv[0].as_mv.row >= 0 ? + mi[0]->mbmi.mv[0].as_mv.row : -1 * mi[0]->mbmi.mv[0].as_mv.row; + int16_t abs_mvc = mi[0]->mbmi.mv[0].as_mv.col >= 0 ? + mi[0]->mbmi.mv[0].as_mv.col : -1 * mi[0]->mbmi.mv[0].as_mv.col; + + // Calculate the motion of the background. + if (abs_mvr <= 16 && abs_mvc <= 16) { + cnt1++; + if (abs_mvr == 0 && abs_mvc == 0) + cnt2++; + } + mi++; + + // Accumulate low_content_frame. + if (cr->map[mi_row * cols + mi_col] < 1) low_content_frame++; } + mi += 8; + } + + // For video conference clips, if the background has high motion in current + // frame because of the camera movement, set this frame as the golden frame. + // Use 70% and 5% as the thresholds for golden frame refreshing. + if (cnt1 * 10 > (70 * rows * cols) && cnt2 * 20 < cnt1) { + vp9_cyclic_refresh_set_golden_update(cpi); + rc->frames_till_gf_update_due = rc->baseline_gf_interval; + + if (rc->frames_till_gf_update_due > rc->frames_to_key) + rc->frames_till_gf_update_due = rc->frames_to_key; + cpi->refresh_golden_frame = 1; + force_gf_refresh = 1; + } + fraction_low = - (double)low_content_frame / (cm->mi_rows * cm->mi_cols); + (double)low_content_frame / (rows * cols); // Update average. cr->low_content_avg = (fraction_low + 3 * cr->low_content_avg) / 4; - if (cpi->refresh_golden_frame == 1) { + if (!force_gf_refresh && cpi->refresh_golden_frame == 1) { // Don't update golden reference if the amount of low_content for the // current encoded frame is small, or if the recursive average of the // low_content over the update interval window falls below threshold. @@ -318,13 +357,13 @@ void vp9_cyclic_refresh_check_golden_update(VP9_COMP *const cpi) { // 1/CR_SEGMENT_ID_BOOST1 (refresh) for each superblock. // Blocks labeled as BOOST1 may later get set to BOOST2 (during the // encoding of the superblock). -void vp9_cyclic_refresh_update_map(VP9_COMP *const cpi) { +static void cyclic_refresh_update_map(VP9_COMP *const cpi) { VP9_COMMON *const cm = &cpi->common; CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; unsigned char *const seg_map = cpi->segmentation_map; int i, block_count, bl_index, sb_rows, sb_cols, sbs_in_frame; int xmis, ymis, x, y; - vpx_memset(seg_map, CR_SEGMENT_ID_BASE, cm->mi_rows * cm->mi_cols); + memset(seg_map, CR_SEGMENT_ID_BASE, cm->mi_rows * cm->mi_cols); sb_cols = (cm->mi_cols + MI_BLOCK_SIZE - 1) / MI_BLOCK_SIZE; sb_rows = (cm->mi_rows + MI_BLOCK_SIZE - 1) / MI_BLOCK_SIZE; sbs_in_frame = sb_cols * sb_rows; @@ -411,7 +450,7 @@ void vp9_cyclic_refresh_setup(VP9_COMP *const cpi) { (cpi->svc.spatial_layer_id > 0)) { // Set segmentation map to 0 and disable. unsigned char *const seg_map = cpi->segmentation_map; - vpx_memset(seg_map, 0, cm->mi_rows * cm->mi_cols); + memset(seg_map, 0, cm->mi_rows * cm->mi_cols); vp9_disable_segmentation(&cm->seg); if (cm->frame_type == KEY_FRAME) cr->sb_index = 0; @@ -471,7 +510,7 @@ void vp9_cyclic_refresh_setup(VP9_COMP *const cpi) { vp9_set_segdata(seg, CR_SEGMENT_ID_BOOST2, SEG_LVL_ALT_Q, qindex_delta); // Update the segmentation and refresh map. - vp9_cyclic_refresh_update_map(cpi); + cyclic_refresh_update_map(cpi); } } diff --git a/media/libvpx/vp9/encoder/vp9_aq_variance.c b/media/libvpx/vp9/encoder/vp9_aq_variance.c index be6f7e4ee53..f072717f1d9 100644 --- a/media/libvpx/vp9/encoder/vp9_aq_variance.c +++ b/media/libvpx/vp9/encoder/vp9_aq_variance.c @@ -10,6 +10,8 @@ #include +#include "vpx_ports/mem.h" + #include "vp9/encoder/vp9_aq_variance.h" #include "vp9/common/vp9_seg_common.h" @@ -80,6 +82,61 @@ void vp9_vaq_frame_setup(VP9_COMP *cpi) { } } +/* TODO(agrange, paulwilkins): The block_variance calls the unoptimized versions + * of variance() and highbd_8_variance(). It should not. + */ +static void aq_variance(const uint8_t *a, int a_stride, + const uint8_t *b, int b_stride, + int w, int h, unsigned int *sse, int *sum) { + int i, j; + + *sum = 0; + *sse = 0; + + for (i = 0; i < h; i++) { + for (j = 0; j < w; j++) { + const int diff = a[j] - b[j]; + *sum += diff; + *sse += diff * diff; + } + + a += a_stride; + b += b_stride; + } +} + +#if CONFIG_VP9_HIGHBITDEPTH +static void aq_highbd_variance64(const uint8_t *a8, int a_stride, + const uint8_t *b8, int b_stride, + int w, int h, uint64_t *sse, uint64_t *sum) { + int i, j; + + uint16_t *a = CONVERT_TO_SHORTPTR(a8); + uint16_t *b = CONVERT_TO_SHORTPTR(b8); + *sum = 0; + *sse = 0; + + for (i = 0; i < h; i++) { + for (j = 0; j < w; j++) { + const int diff = a[j] - b[j]; + *sum += diff; + *sse += diff * diff; + } + a += a_stride; + b += b_stride; + } +} + +static void aq_highbd_8_variance(const uint8_t *a8, int a_stride, + const uint8_t *b8, int b_stride, + int w, int h, unsigned int *sse, int *sum) { + uint64_t sse_long = 0; + uint64_t sum_long = 0; + aq_highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long); + *sse = (unsigned int)sse_long; + *sum = (int)sum_long; +} +#endif // CONFIG_VP9_HIGHBITDEPTH static unsigned int block_variance(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs) { @@ -96,18 +153,18 @@ static unsigned int block_variance(VP9_COMP *cpi, MACROBLOCK *x, int avg; #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - highbd_variance(x->plane[0].src.buf, x->plane[0].src.stride, - CONVERT_TO_BYTEPTR(vp9_highbd_64_zeros), 0, bw, bh, - &sse, &avg); + aq_highbd_8_variance(x->plane[0].src.buf, x->plane[0].src.stride, + CONVERT_TO_BYTEPTR(vp9_highbd_64_zeros), 0, bw, bh, + &sse, &avg); sse >>= 2 * (xd->bd - 8); avg >>= (xd->bd - 8); } else { - variance(x->plane[0].src.buf, x->plane[0].src.stride, - vp9_64_zeros, 0, bw, bh, &sse, &avg); + aq_variance(x->plane[0].src.buf, x->plane[0].src.stride, + vp9_64_zeros, 0, bw, bh, &sse, &avg); } #else - variance(x->plane[0].src.buf, x->plane[0].src.stride, - vp9_64_zeros, 0, bw, bh, &sse, &avg); + aq_variance(x->plane[0].src.buf, x->plane[0].src.stride, + vp9_64_zeros, 0, bw, bh, &sse, &avg); #endif // CONFIG_VP9_HIGHBITDEPTH var = sse - (((int64_t)avg * avg) / (bw * bh)); return (256 * var) / (bw * bh); diff --git a/media/libvpx/vp9/encoder/vp9_avg.c b/media/libvpx/vp9/encoder/vp9_avg.c index dc5cfe26863..b9987c1ce99 100644 --- a/media/libvpx/vp9/encoder/vp9_avg.c +++ b/media/libvpx/vp9/encoder/vp9_avg.c @@ -7,6 +7,7 @@ * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ +#include "./vp9_rtcd.h" #include "vp9/common/vp9_common.h" #include "vpx_ports/mem.h" @@ -28,6 +29,94 @@ unsigned int vp9_avg_4x4_c(const uint8_t *s, int p) { return (sum + 8) >> 4; } +static void hadamard_col8(const int16_t *src_diff, int src_stride, + int16_t *coeff) { + int16_t b0 = src_diff[0 * src_stride] + src_diff[1 * src_stride]; + int16_t b1 = src_diff[0 * src_stride] - src_diff[1 * src_stride]; + int16_t b2 = src_diff[2 * src_stride] + src_diff[3 * src_stride]; + int16_t b3 = src_diff[2 * src_stride] - src_diff[3 * src_stride]; + int16_t b4 = src_diff[4 * src_stride] + src_diff[5 * src_stride]; + int16_t b5 = src_diff[4 * src_stride] - src_diff[5 * src_stride]; + int16_t b6 = src_diff[6 * src_stride] + src_diff[7 * src_stride]; + int16_t b7 = src_diff[6 * src_stride] - src_diff[7 * src_stride]; + + int16_t c0 = b0 + b2; + int16_t c1 = b1 + b3; + int16_t c2 = b0 - b2; + int16_t c3 = b1 - b3; + int16_t c4 = b4 + b6; + int16_t c5 = b5 + b7; + int16_t c6 = b4 - b6; + int16_t c7 = b5 - b7; + + coeff[0] = c0 + c4; + coeff[7] = c1 + c5; + coeff[3] = c2 + c6; + coeff[4] = c3 + c7; + coeff[2] = c0 - c4; + coeff[6] = c1 - c5; + coeff[1] = c2 - c6; + coeff[5] = c3 - c7; +} + +void vp9_hadamard_8x8_c(int16_t const *src_diff, int src_stride, + int16_t *coeff) { + int idx; + int16_t buffer[64]; + int16_t *tmp_buf = &buffer[0]; + for (idx = 0; idx < 8; ++idx) { + hadamard_col8(src_diff, src_stride, tmp_buf); + tmp_buf += 8; + ++src_diff; + } + + tmp_buf = &buffer[0]; + for (idx = 0; idx < 8; ++idx) { + hadamard_col8(tmp_buf, 8, coeff); + coeff += 8; + ++tmp_buf; + } +} + +// In place 16x16 2D Hadamard transform +void vp9_hadamard_16x16_c(int16_t const *src_diff, int src_stride, + int16_t *coeff) { + int idx; + for (idx = 0; idx < 4; ++idx) { + int16_t const *src_ptr = src_diff + (idx >> 1) * 8 * src_stride + + (idx & 0x01) * 8; + vp9_hadamard_8x8_c(src_ptr, src_stride, coeff + idx * 64); + } + + for (idx = 0; idx < 64; ++idx) { + int16_t a0 = coeff[0]; + int16_t a1 = coeff[64]; + int16_t a2 = coeff[128]; + int16_t a3 = coeff[192]; + + int16_t b0 = a0 + a1; + int16_t b1 = a0 - a1; + int16_t b2 = a2 + a3; + int16_t b3 = a2 - a3; + + coeff[0] = (b0 + b2) >> 1; + coeff[64] = (b1 + b3) >> 1; + coeff[128] = (b0 - b2) >> 1; + coeff[192] = (b1 - b3) >> 1; + + ++coeff; + } +} + +int16_t vp9_satd_c(const int16_t *coeff, int length) { + int i; + int satd = 0; + for (i = 0; i < length; ++i) + satd += abs(coeff[i]); + + return (int16_t)satd; +} + // Integer projection onto row vectors. void vp9_int_pro_row_c(int16_t *hbuf, uint8_t const *ref, const int ref_stride, const int height) { @@ -67,6 +156,20 @@ int vp9_vector_var_c(int16_t const *ref, int16_t const *src, return var; } +void vp9_minmax_8x8_c(const uint8_t *s, int p, const uint8_t *d, int dp, + int *min, int *max) { + int i, j; + *min = 255; + *max = 0; + for (i = 0; i < 8; ++i, s += p, d += dp) { + for (j = 0; j < 8; ++j) { + int diff = abs(s[j]-d[j]); + *min = diff < *min ? diff : *min; + *max = diff > *max ? diff : *max; + } + } +} + #if CONFIG_VP9_HIGHBITDEPTH unsigned int vp9_highbd_avg_8x8_c(const uint8_t *s8, int p) { int i, j; @@ -87,6 +190,22 @@ unsigned int vp9_highbd_avg_4x4_c(const uint8_t *s8, int p) { return (sum + 8) >> 4; } + +void vp9_highbd_minmax_8x8_c(const uint8_t *s8, int p, const uint8_t *d8, + int dp, int *min, int *max) { + int i, j; + const uint16_t* s = CONVERT_TO_SHORTPTR(s8); + const uint16_t* d = CONVERT_TO_SHORTPTR(d8); + *min = 255; + *max = 0; + for (i = 0; i < 8; ++i, s += p, d += dp) { + for (j = 0; j < 8; ++j) { + int diff = abs(s[j]-d[j]); + *min = diff < *min ? diff : *min; + *max = diff > *max ? diff : *max; + } + } +} #endif // CONFIG_VP9_HIGHBITDEPTH diff --git a/media/libvpx/vp9/encoder/vp9_bitstream.c b/media/libvpx/vp9/encoder/vp9_bitstream.c index b24fe2950a1..d20e067669f 100644 --- a/media/libvpx/vp9/encoder/vp9_bitstream.c +++ b/media/libvpx/vp9/encoder/vp9_bitstream.c @@ -78,8 +78,8 @@ static void prob_diff_update(const vp9_tree_index *tree, static void write_selected_tx_size(const VP9_COMMON *cm, const MACROBLOCKD *xd, vp9_writer *w) { - TX_SIZE tx_size = xd->mi[0].src_mi->mbmi.tx_size; - BLOCK_SIZE bsize = xd->mi[0].src_mi->mbmi.sb_type; + TX_SIZE tx_size = xd->mi[0]->mbmi.tx_size; + BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type; const TX_SIZE max_tx_size = max_txsize_lookup[bsize]; const vp9_prob *const tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc->tx_probs); @@ -201,7 +201,7 @@ static void write_segment_id(vp9_writer *w, const struct segmentation *seg, // This function encodes the reference frame static void write_ref_frames(const VP9_COMMON *cm, const MACROBLOCKD *xd, vp9_writer *w) { - const MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi; + const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; const int is_compound = has_second_ref(mbmi); const int segment_id = mbmi->segment_id; @@ -268,8 +268,7 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, const MODE_INFO *mi, vp9_write(w, is_inter, vp9_get_intra_inter_prob(cm, xd)); if (bsize >= BLOCK_8X8 && cm->tx_mode == TX_MODE_SELECT && - !(is_inter && - (skip || vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP)))) { + !(is_inter && skip)) { write_selected_tx_size(cm, xd, w); } @@ -339,9 +338,9 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, const MODE_INFO *mi, } static void write_mb_modes_kf(const VP9_COMMON *cm, const MACROBLOCKD *xd, - MODE_INFO *mi_8x8, vp9_writer *w) { + MODE_INFO **mi_8x8, vp9_writer *w) { const struct segmentation *const seg = &cm->seg; - const MODE_INFO *const mi = mi_8x8; + const MODE_INFO *const mi = mi_8x8[0]; const MODE_INFO *const above_mi = xd->above_mi; const MODE_INFO *const left_mi = xd->left_mi; const MB_MODE_INFO *const mbmi = &mi->mbmi; @@ -382,8 +381,8 @@ static void write_modes_b(VP9_COMP *cpi, const TileInfo *const tile, MACROBLOCKD *const xd = &cpi->td.mb.e_mbd; MODE_INFO *m; - xd->mi = cm->mi + (mi_row * cm->mi_stride + mi_col); - m = xd->mi; + xd->mi = cm->mi_grid_visible + (mi_row * cm->mi_stride + mi_col); + m = xd->mi[0]; set_mi_row_col(xd, tile, mi_row, num_8x8_blocks_high_lookup[m->mbmi.sb_type], @@ -437,7 +436,7 @@ static void write_modes_sb(VP9_COMP *cpi, if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; - m = cm->mi[mi_row * cm->mi_stride + mi_col].src_mi; + m = cm->mi_grid_visible[mi_row * cm->mi_stride + mi_col]; partition = partition_lookup[bsl][m->mbmi.sb_type]; write_partition(cm, xd, bs, mi_row, mi_col, partition, bsize, w); @@ -929,8 +928,8 @@ static size_t encode_tiles(VP9_COMP *cpi, uint8_t *data_ptr) { const int tile_cols = 1 << cm->log2_tile_cols; const int tile_rows = 1 << cm->log2_tile_rows; - vpx_memset(cm->above_seg_context, 0, sizeof(*cm->above_seg_context) * - mi_cols_aligned_to_sb(cm->mi_cols)); + memset(cm->above_seg_context, 0, + sizeof(*cm->above_seg_context) * mi_cols_aligned_to_sb(cm->mi_cols)); for (tile_row = 0; tile_row < tile_rows; tile_row++) { for (tile_col = 0; tile_col < tile_cols; tile_col++) { diff --git a/media/libvpx/vp9/encoder/vp9_blockiness.c b/media/libvpx/vp9/encoder/vp9_blockiness.c new file mode 100644 index 00000000000..b8629bd3bb5 --- /dev/null +++ b/media/libvpx/vp9/encoder/vp9_blockiness.c @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "./vpx_config.h" +#include "./vp9_rtcd.h" +#include "vp9/common/vp9_common.h" +#include "vp9/common/vp9_convolve.h" +#include "vp9/common/vp9_filter.h" +#include "vpx/vpx_integer.h" +#include "vpx_ports/mem.h" + +static int horizontal_filter(const uint8_t *s) { + return (s[1] - s[-2]) * 2 + (s[-1] - s[0]) * 6; +} + +static int vertical_filter(const uint8_t *s, int p) { + return (s[p] - s[-2 * p]) * 2 + (s[-p] - s[0]) * 6; +} + +static int variance(int sum, int sum_squared, int size) { + return sum_squared / size - (sum / size) * (sum / size); +} +// Calculate a blockiness level for a vertical block edge. +// This function returns a new blockiness metric that's defined as + +// p0 p1 p2 p3 +// q0 q1 q2 q3 +// block edge -> +// r0 r1 r2 r3 +// s0 s1 s2 s3 + +// blockiness = p0*-2+q0*6+r0*-6+s0*2 + +// p1*-2+q1*6+r1*-6+s1*2 + +// p2*-2+q2*6+r2*-6+s2*2 + +// p3*-2+q3*6+r3*-6+s3*2 ; + +// reconstructed_blockiness = abs(blockiness from reconstructed buffer - +// blockiness from source buffer,0) +// +// I make the assumption that flat blocks are much more visible than high +// contrast blocks. As such, I scale the result of the blockiness calc +// by dividing the blockiness by the variance of the pixels on either side +// of the edge as follows: +// var_0 = (q0^2+q1^2+q2^2+q3^2) - ((q0 + q1 + q2 + q3) / 4 )^2 +// var_1 = (r0^2+r1^2+r2^2+r3^2) - ((r0 + r1 + r2 + r3) / 4 )^2 +// The returned blockiness is the scaled value +// Reconstructed blockiness / ( 1 + var_0 + var_1 ) ; +int blockiness_vertical(const uint8_t *s, int sp, const uint8_t *r, int rp, + int size) { + int s_blockiness = 0; + int r_blockiness = 0; + int sum_0 = 0; + int sum_sq_0 = 0; + int sum_1 = 0; + int sum_sq_1 = 0; + int i; + int var_0; + int var_1; + for (i = 0; i < size; ++i, s += sp, r += rp) { + s_blockiness += horizontal_filter(s); + r_blockiness += horizontal_filter(r); + sum_0 += s[0]; + sum_sq_0 += s[0]*s[0]; + sum_1 += s[-1]; + sum_sq_1 += s[-1]*s[-1]; + } + var_0 = variance(sum_0, sum_sq_0, size); + var_1 = variance(sum_1, sum_sq_1, size); + r_blockiness = abs(r_blockiness); + s_blockiness = abs(s_blockiness); + + if (r_blockiness > s_blockiness) + return (r_blockiness - s_blockiness) / (1 + var_0 + var_1); + else + return 0; +} + +// Calculate a blockiness level for a horizontal block edge +// same as above. +int blockiness_horizontal(const uint8_t *s, int sp, const uint8_t *r, int rp, + int size) { + int s_blockiness = 0; + int r_blockiness = 0; + int sum_0 = 0; + int sum_sq_0 = 0; + int sum_1 = 0; + int sum_sq_1 = 0; + int i; + int var_0; + int var_1; + for (i = 0; i < size; ++i, ++s, ++r) { + s_blockiness += vertical_filter(s, sp); + r_blockiness += vertical_filter(r, rp); + sum_0 += s[0]; + sum_sq_0 += s[0] * s[0]; + sum_1 += s[-sp]; + sum_sq_1 += s[-sp] * s[-sp]; + } + var_0 = variance(sum_0, sum_sq_0, size); + var_1 = variance(sum_1, sum_sq_1, size); + r_blockiness = abs(r_blockiness); + s_blockiness = abs(s_blockiness); + + if (r_blockiness > s_blockiness) + return (r_blockiness - s_blockiness) / (1 + var_0 + var_1); + else + return 0; +} + +// This function returns the blockiness for the entire frame currently by +// looking at all borders in steps of 4. +double vp9_get_blockiness(const unsigned char *img1, int img1_pitch, + const unsigned char *img2, int img2_pitch, + int width, int height ) { + double blockiness = 0; + int i, j; + vp9_clear_system_state(); + for (i = 0; i < height; i += 4, img1 += img1_pitch * 4, + img2 += img2_pitch * 4) { + for (j = 0; j < width; j += 4) { + if (i > 0 && i < height && j > 0 && j < width) { + blockiness += blockiness_vertical(img1 + j, img1_pitch, + img2 + j, img2_pitch, 4); + blockiness += blockiness_horizontal(img1 + j, img1_pitch, + img2 + j, img2_pitch, 4); + } + } + } + blockiness /= width * height / 16; + return blockiness; +} diff --git a/media/libvpx/vp9/encoder/vp9_dct.c b/media/libvpx/vp9/encoder/vp9_dct.c index 41f72f89bb9..414d2bb1502 100644 --- a/media/libvpx/vp9/encoder/vp9_dct.c +++ b/media/libvpx/vp9/encoder/vp9_dct.c @@ -14,6 +14,7 @@ #include "./vpx_config.h" #include "./vp9_rtcd.h" +#include "vpx_ports/mem.h" #include "vp9/common/vp9_blockd.h" #include "vp9/common/vp9_idct.h" #include "vp9/common/vp9_systemdependent.h" @@ -417,8 +418,8 @@ void vp9_fdct8x8_quant_c(const int16_t *input, int stride, (void)quant_shift_ptr; (void)iscan; - vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); - vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); + memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); + memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); if (!skip_block) { // Quantization pass: All coefficients with index >= zero_flag are diff --git a/media/libvpx/vp9/encoder/vp9_denoiser.c b/media/libvpx/vp9/encoder/vp9_denoiser.c index cf67e115e77..08134e152aa 100644 --- a/media/libvpx/vp9/encoder/vp9_denoiser.c +++ b/media/libvpx/vp9/encoder/vp9_denoiser.c @@ -203,7 +203,7 @@ static VP9_DENOISER_DECISION perform_motion_compensation(VP9_DENOISER *denoiser, int sse_diff = ctx->zeromv_sse - ctx->newmv_sse; MV_REFERENCE_FRAME frame; MACROBLOCKD *filter_mbd = &mb->e_mbd; - MB_MODE_INFO *mbmi = &filter_mbd->mi[0].src_mi->mbmi; + MB_MODE_INFO *mbmi = &filter_mbd->mi[0]->mbmi; MB_MODE_INFO saved_mbmi; int i, j; struct buf_2d saved_dst[MAX_MB_PLANE]; @@ -357,7 +357,7 @@ static void copy_frame(YV12_BUFFER_CONFIG dest, const YV12_BUFFER_CONFIG src) { assert(dest.y_height == src.y_height); for (r = 0; r < dest.y_height; ++r) { - vpx_memcpy(destbuf, srcbuf, dest.y_width); + memcpy(destbuf, srcbuf, dest.y_width); destbuf += dest.y_stride; srcbuf += src.y_stride; } diff --git a/media/libvpx/vp9/encoder/vp9_encodeframe.c b/media/libvpx/vp9/encoder/vp9_encodeframe.c index 9aeb9f8f7a0..49e8887687d 100644 --- a/media/libvpx/vp9/encoder/vp9_encodeframe.c +++ b/media/libvpx/vp9/encoder/vp9_encodeframe.c @@ -13,8 +13,10 @@ #include #include "./vp9_rtcd.h" +#include "./vpx_dsp_rtcd.h" #include "./vpx_config.h" +#include "vpx_ports/mem.h" #include "vpx_ports/vpx_timer.h" #include "vp9/common/vp9_common.h" @@ -99,9 +101,9 @@ static const uint16_t VP9_HIGH_VAR_OFFS_12[64] = { }; #endif // CONFIG_VP9_HIGHBITDEPTH -static unsigned int get_sby_perpixel_variance(VP9_COMP *cpi, - const struct buf_2d *ref, - BLOCK_SIZE bs) { +unsigned int vp9_get_sby_perpixel_variance(VP9_COMP *cpi, + const struct buf_2d *ref, + BLOCK_SIZE bs) { unsigned int sse; const unsigned int var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride, VP9_VAR_OFFS, 0, &sse); @@ -109,7 +111,7 @@ static unsigned int get_sby_perpixel_variance(VP9_COMP *cpi, } #if CONFIG_VP9_HIGHBITDEPTH -static unsigned int high_get_sby_perpixel_variance( +unsigned int vp9_high_get_sby_perpixel_variance( VP9_COMP *cpi, const struct buf_2d *ref, BLOCK_SIZE bs, int bd) { unsigned int var, sse; switch (bd) { @@ -165,21 +167,6 @@ static BLOCK_SIZE get_rd_var_based_fixed_partition(VP9_COMP *cpi, MACROBLOCK *x, return BLOCK_8X8; } -static BLOCK_SIZE get_nonrd_var_based_fixed_partition(VP9_COMP *cpi, - MACROBLOCK *x, - int mi_row, - int mi_col) { - unsigned int var = get_sby_perpixel_diff_variance(cpi, &x->plane[0].src, - mi_row, mi_col, - BLOCK_64X64); - if (var < 4) - return BLOCK_64X64; - else if (var < 10) - return BLOCK_32X32; - else - return BLOCK_16X16; -} - // Lighter version of set_offsets that only sets the mode info // pointers. static INLINE void set_mode_info_offsets(VP9_COMMON *const cm, @@ -187,8 +174,8 @@ static INLINE void set_mode_info_offsets(VP9_COMMON *const cm, int mi_row, int mi_col) { const int idx_str = xd->mi_stride * mi_row + mi_col; - xd->mi = cm->mi + idx_str; - xd->mi[0].src_mi = &xd->mi[0]; + xd->mi = cm->mi_grid_visible + idx_str; + xd->mi[0] = cm->mi + idx_str; } static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile, @@ -205,7 +192,7 @@ static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile, set_mode_info_offsets(cm, xd, mi_row, mi_col); - mbmi = &xd->mi[0].src_mi->mbmi; + mbmi = &xd->mi[0]->mbmi; // Set up destination pointers. vp9_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col); @@ -254,7 +241,7 @@ static void duplicate_mode_info_in_sb(VP9_COMMON *cm, MACROBLOCKD *xd, for (j = 0; j < block_height; ++j) for (i = 0; i < block_width; ++i) { if (mi_row + j < cm->mi_rows && mi_col + i < cm->mi_cols) - xd->mi[j * xd->mi_stride + i].src_mi = &xd->mi[0]; + xd->mi[j * xd->mi_stride + i] = xd->mi[0]; } } @@ -264,7 +251,7 @@ static void set_block_size(VP9_COMP * const cpi, BLOCK_SIZE bsize) { if (cpi->common.mi_cols > mi_col && cpi->common.mi_rows > mi_row) { set_mode_info_offsets(&cpi->common, xd, mi_row, mi_col); - xd->mi[0].src_mi->mbmi.sb_type = bsize; + xd->mi[0]->mbmi.sb_type = bsize; } } @@ -375,7 +362,7 @@ static void get_variance(var *v) { ((v->sum_error * v->sum_error) >> v->log2_count)) >> v->log2_count); } -void sum_2_variances(const var *a, const var *b, var *r) { +static void sum_2_variances(const var *a, const var *b, var *r) { assert(a->log2_count == b->log2_count); fill_variance(a->sum_square_error + b->sum_square_error, a->sum_error + b->sum_error, a->log2_count + 1, r); @@ -405,18 +392,21 @@ static int set_vt_partitioning(VP9_COMP *cpi, variance_node vt; const int block_width = num_8x8_blocks_wide_lookup[bsize]; const int block_height = num_8x8_blocks_high_lookup[bsize]; + const int low_res = (cm->width <= 352 && cm->height <= 288); assert(block_height == block_width); tree_to_node(data, bsize, &vt); - if (force_split) + if (force_split == 1) return 0; // For bsize=bsize_min (16x16/8x8 for 8x8/4x4 downsampling), select if // variance is below threshold, otherwise split will be selected. // No check for vert/horiz split as too few samples for variance. if (bsize == bsize_min) { - get_variance(&vt.part_variances->none); + // Variance already computed to set the force_split. + if (low_res || cm->frame_type == KEY_FRAME) + get_variance(&vt.part_variances->none); if (mi_col + block_width / 2 < cm->mi_cols && mi_row + block_height / 2 < cm->mi_rows && vt.part_variances->none.variance < threshold) { @@ -425,11 +415,10 @@ static int set_vt_partitioning(VP9_COMP *cpi, } return 0; } else if (bsize > bsize_min) { - // Variance is already computed for 32x32 blocks to set the force_split. - if (bsize != BLOCK_32X32) + // Variance already computed to set the force_split. + if (low_res || cm->frame_type == KEY_FRAME) get_variance(&vt.part_variances->none); - // For key frame or low_res: for bsize above 32X32 or very high variance, - // take split. + // For key frame: take split for bsize above 32X32 or very high variance. if (cm->frame_type == KEY_FRAME && (bsize > BLOCK_32X32 || vt.part_variances->none.variance > (threshold << 4))) { @@ -445,11 +434,12 @@ static int set_vt_partitioning(VP9_COMP *cpi, // Check vertical split. if (mi_row + block_height / 2 < cm->mi_rows) { + BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_VERT); get_variance(&vt.part_variances->vert[0]); get_variance(&vt.part_variances->vert[1]); if (vt.part_variances->vert[0].variance < threshold && - vt.part_variances->vert[1].variance < threshold) { - BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_VERT); + vt.part_variances->vert[1].variance < threshold && + get_plane_block_size(subsize, &xd->plane[1]) < BLOCK_INVALID) { set_block_size(cpi, xd, mi_row, mi_col, subsize); set_block_size(cpi, xd, mi_row, mi_col + block_width / 2, subsize); return 1; @@ -457,11 +447,12 @@ static int set_vt_partitioning(VP9_COMP *cpi, } // Check horizontal split. if (mi_col + block_width / 2 < cm->mi_cols) { + BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_HORZ); get_variance(&vt.part_variances->horz[0]); get_variance(&vt.part_variances->horz[1]); if (vt.part_variances->horz[0].variance < threshold && - vt.part_variances->horz[1].variance < threshold) { - BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_HORZ); + vt.part_variances->horz[1].variance < threshold && + get_plane_block_size(subsize, &xd->plane[1]) < BLOCK_INVALID) { set_block_size(cpi, xd, mi_row, mi_col, subsize); set_block_size(cpi, xd, mi_row + block_height / 2, mi_col, subsize); return 1; @@ -473,48 +464,184 @@ static int set_vt_partitioning(VP9_COMP *cpi, return 0; } +// Set the variance split thresholds for following the block sizes: +// 0 - threshold_64x64, 1 - threshold_32x32, 2 - threshold_16x16, +// 3 - vbp_threshold_8x8. vbp_threshold_8x8 (to split to 4x4 partition) is +// currently only used on key frame. +static void set_vbp_thresholds(VP9_COMP *cpi, int64_t thresholds[], int q) { + VP9_COMMON *const cm = &cpi->common; + const int is_key_frame = (cm->frame_type == KEY_FRAME); + const int threshold_multiplier = is_key_frame ? 20 : 1; + const int64_t threshold_base = (int64_t)(threshold_multiplier * + cpi->y_dequant[q][1]); + if (is_key_frame) { + thresholds[0] = threshold_base; + thresholds[1] = threshold_base >> 2; + thresholds[2] = threshold_base >> 2; + thresholds[3] = threshold_base << 2; + } else { + thresholds[1] = threshold_base; + if (cm->width <= 352 && cm->height <= 288) { + thresholds[0] = threshold_base >> 2; + thresholds[2] = threshold_base << 3; + } else { + thresholds[0] = threshold_base; + thresholds[1] = (5 * threshold_base) >> 2; + if (cm->width >= 1920 && cm->height >= 1080) + thresholds[1] = (7 * threshold_base) >> 2; + thresholds[2] = threshold_base << cpi->oxcf.speed; + } + } +} -void vp9_set_vbp_thresholds(VP9_COMP *cpi, int q) { +void vp9_set_variance_partition_thresholds(VP9_COMP *cpi, int q) { + VP9_COMMON *const cm = &cpi->common; SPEED_FEATURES *const sf = &cpi->sf; + const int is_key_frame = (cm->frame_type == KEY_FRAME); if (sf->partition_search_type != VAR_BASED_PARTITION && sf->partition_search_type != REFERENCE_PARTITION) { return; } else { - VP9_COMMON *const cm = &cpi->common; - const int is_key_frame = (cm->frame_type == KEY_FRAME); - const int threshold_multiplier = is_key_frame ? 80 : 4; - const int64_t threshold_base = (int64_t)(threshold_multiplier * - vp9_convert_qindex_to_q(q, cm->bit_depth)); - - // TODO(marpan): Allow 4x4 partitions for inter-frames. - // use_4x4_partition = (variance4x4downsample[i2 + j] == 1); - // If 4x4 partition is not used, then 8x8 partition will be selected - // if variance of 16x16 block is very high, so use larger threshold - // for 16x16 (threshold_bsize_min) in that case. + set_vbp_thresholds(cpi, cpi->vbp_thresholds, q); + // The thresholds below are not changed locally. if (is_key_frame) { - cpi->vbp_threshold = threshold_base >> 2; - cpi->vbp_threshold_bsize_max = threshold_base; - cpi->vbp_threshold_bsize_min = threshold_base << 2; - cpi->vbp_threshold_16x16 = cpi->vbp_threshold; + cpi->vbp_threshold_sad = 0; cpi->vbp_bsize_min = BLOCK_8X8; } else { - cpi->vbp_threshold = threshold_base; - if (cm->width <= 352 && cm->height <= 288) { - cpi->vbp_threshold_bsize_max = threshold_base >> 2; - cpi->vbp_threshold_bsize_min = threshold_base << 3; - } else { - cpi->vbp_threshold_bsize_max = threshold_base; - cpi->vbp_threshold_bsize_min = threshold_base << cpi->oxcf.speed; - } - cpi->vbp_threshold_16x16 = cpi->vbp_threshold_bsize_min; + if (cm->width <= 352 && cm->height <= 288) + cpi->vbp_threshold_sad = 100; + else + cpi->vbp_threshold_sad = (cpi->y_dequant[q][1] << 1) > 1000 ? + (cpi->y_dequant[q][1] << 1) : 1000; cpi->vbp_bsize_min = BLOCK_16X16; } + cpi->vbp_threshold_minmax = 15 + (q >> 3); + } +} + +// Compute the minmax over the 8x8 subblocks. +static int compute_minmax_8x8(const uint8_t *s, int sp, const uint8_t *d, + int dp, int x16_idx, int y16_idx, +#if CONFIG_VP9_HIGHBITDEPTH + int highbd_flag, +#endif + int pixels_wide, + int pixels_high) { + int k; + int minmax_max = 0; + int minmax_min = 255; + // Loop over the 4 8x8 subblocks. + for (k = 0; k < 4; k++) { + int x8_idx = x16_idx + ((k & 1) << 3); + int y8_idx = y16_idx + ((k >> 1) << 3); + int min = 0; + int max = 0; + if (x8_idx < pixels_wide && y8_idx < pixels_high) { +#if CONFIG_VP9_HIGHBITDEPTH + if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) { + vp9_highbd_minmax_8x8(s + y8_idx * sp + x8_idx, sp, + d + y8_idx * dp + x8_idx, dp, + &min, &max); + } else { + vp9_minmax_8x8(s + y8_idx * sp + x8_idx, sp, + d + y8_idx * dp + x8_idx, dp, + &min, &max); + } +#else + vp9_minmax_8x8(s + y8_idx * sp + x8_idx, sp, + d + y8_idx * dp + x8_idx, dp, + &min, &max); +#endif + if ((max - min) > minmax_max) + minmax_max = (max - min); + if ((max - min) < minmax_min) + minmax_min = (max - min); + } + } + return (minmax_max - minmax_min); +} + +static void fill_variance_4x4avg(const uint8_t *s, int sp, const uint8_t *d, + int dp, int x8_idx, int y8_idx, v8x8 *vst, +#if CONFIG_VP9_HIGHBITDEPTH + int highbd_flag, +#endif + int pixels_wide, + int pixels_high, + int is_key_frame) { + int k; + for (k = 0; k < 4; k++) { + int x4_idx = x8_idx + ((k & 1) << 2); + int y4_idx = y8_idx + ((k >> 1) << 2); + unsigned int sse = 0; + int sum = 0; + if (x4_idx < pixels_wide && y4_idx < pixels_high) { + int s_avg; + int d_avg = 128; +#if CONFIG_VP9_HIGHBITDEPTH + if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) { + s_avg = vp9_highbd_avg_4x4(s + y4_idx * sp + x4_idx, sp); + if (!is_key_frame) + d_avg = vp9_highbd_avg_4x4(d + y4_idx * dp + x4_idx, dp); + } else { + s_avg = vp9_avg_4x4(s + y4_idx * sp + x4_idx, sp); + if (!is_key_frame) + d_avg = vp9_avg_4x4(d + y4_idx * dp + x4_idx, dp); + } +#else + s_avg = vp9_avg_4x4(s + y4_idx * sp + x4_idx, sp); + if (!is_key_frame) + d_avg = vp9_avg_4x4(d + y4_idx * dp + x4_idx, dp); +#endif + sum = s_avg - d_avg; + sse = sum * sum; + } + fill_variance(sse, sum, 0, &vst->split[k].part_variances.none); + } +} + +static void fill_variance_8x8avg(const uint8_t *s, int sp, const uint8_t *d, + int dp, int x16_idx, int y16_idx, v16x16 *vst, +#if CONFIG_VP9_HIGHBITDEPTH + int highbd_flag, +#endif + int pixels_wide, + int pixels_high, + int is_key_frame) { + int k; + for (k = 0; k < 4; k++) { + int x8_idx = x16_idx + ((k & 1) << 3); + int y8_idx = y16_idx + ((k >> 1) << 3); + unsigned int sse = 0; + int sum = 0; + if (x8_idx < pixels_wide && y8_idx < pixels_high) { + int s_avg; + int d_avg = 128; +#if CONFIG_VP9_HIGHBITDEPTH + if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) { + s_avg = vp9_highbd_avg_8x8(s + y8_idx * sp + x8_idx, sp); + if (!is_key_frame) + d_avg = vp9_highbd_avg_8x8(d + y8_idx * dp + x8_idx, dp); + } else { + s_avg = vp9_avg_8x8(s + y8_idx * sp + x8_idx, sp); + if (!is_key_frame) + d_avg = vp9_avg_8x8(d + y8_idx * dp + x8_idx, dp); + } +#else + s_avg = vp9_avg_8x8(s + y8_idx * sp + x8_idx, sp); + if (!is_key_frame) + d_avg = vp9_avg_8x8(d + y8_idx * dp + x8_idx, dp); +#endif + sum = s_avg - d_avg; + sse = sum * sum; + } + fill_variance(sse, sum, 0, &vst->split[k].part_variances.none); } } // This function chooses partitioning based on the variance between source and // reconstructed last, where variance is computed for down-sampled inputs. -static void choose_partitioning(VP9_COMP *cpi, +static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile, MACROBLOCK *x, int mi_row, int mi_col) { @@ -523,12 +650,14 @@ static void choose_partitioning(VP9_COMP *cpi, int i, j, k, m; v64x64 vt; v16x16 vt2[16]; - int force_split[5]; + int force_split[21]; uint8_t *s; const uint8_t *d; int sp; int dp; int pixels_wide = 64, pixels_high = 64; + int64_t thresholds[4] = {cpi->vbp_thresholds[0], cpi->vbp_thresholds[1], + cpi->vbp_thresholds[2], cpi->vbp_thresholds[3]}; // Always use 4x4 partition for key frame. const int is_key_frame = (cm->frame_type == KEY_FRAME); @@ -541,6 +670,11 @@ static void choose_partitioning(VP9_COMP *cpi, const uint8_t *const map = cm->seg.update_map ? cpi->segmentation_map : cm->last_frame_seg_map; segment_id = vp9_get_segment_id(cm, map, BLOCK_64X64, mi_row, mi_col); + + if (cyclic_refresh_segment_id_boosted(segment_id)) { + int q = vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex); + set_vbp_thresholds(cpi, thresholds, q); + } } set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_64X64); @@ -553,25 +687,28 @@ static void choose_partitioning(VP9_COMP *cpi, s = x->plane[0].src.buf; sp = x->plane[0].src.stride; - if (!is_key_frame) { - MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi; + if (!is_key_frame && !(is_one_pass_cbr_svc(cpi) && + cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame)) { + // In the case of spatial/temporal scalable coding, the assumption here is + // that the temporal reference frame will always be of type LAST_FRAME. + // TODO(marpan): If that assumption is broken, we need to revisit this code. + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; unsigned int uv_sad; const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, LAST_FRAME); - const YV12_BUFFER_CONFIG *yv12_g = get_ref_frame_buffer(cpi, GOLDEN_FRAME); + const YV12_BUFFER_CONFIG *yv12_g = NULL; unsigned int y_sad, y_sad_g; - BLOCK_SIZE bsize; - if (mi_row + 4 < cm->mi_rows && mi_col + 4 < cm->mi_cols) - bsize = BLOCK_64X64; - else if (mi_row + 4 < cm->mi_rows && mi_col + 4 >= cm->mi_cols) - bsize = BLOCK_32X64; - else if (mi_row + 4 >= cm->mi_rows && mi_col + 4 < cm->mi_cols) - bsize = BLOCK_64X32; - else - bsize = BLOCK_32X32; + const BLOCK_SIZE bsize = BLOCK_32X32 + + (mi_col + 4 < cm->mi_cols) * 2 + (mi_row + 4 < cm->mi_rows); assert(yv12 != NULL); + if (!(is_one_pass_cbr_svc(cpi) && cpi->svc.spatial_layer_id)) { + // For now, GOLDEN will not be used for non-zero spatial layers, since + // it may not be a temporal reference. + yv12_g = get_ref_frame_buffer(cpi, GOLDEN_FRAME); + } + if (yv12_g && yv12_g != yv12) { vp9_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col, &cm->frame_refs[GOLDEN_FRAME - 1].sf); @@ -591,7 +728,7 @@ static void choose_partitioning(VP9_COMP *cpi, mbmi->mv[0].as_int = 0; mbmi->interp_filter = BILINEAR; - y_sad = vp9_int_pro_motion_estimation(cpi, x, bsize); + y_sad = vp9_int_pro_motion_estimation(cpi, x, bsize, mi_row, mi_col); if (y_sad_g < y_sad) { vp9_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col, &cm->frame_refs[GOLDEN_FRAME - 1].sf); @@ -620,6 +757,19 @@ static void choose_partitioning(VP9_COMP *cpi, d = xd->plane[0].dst.buf; dp = xd->plane[0].dst.stride; + + // If the y_sad is very small, take 64x64 as partition and exit. + // Don't check on boosted segment for now, as 64x64 is suppressed there. + if (segment_id == CR_SEGMENT_ID_BASE && + y_sad < cpi->vbp_threshold_sad) { + const int block_width = num_8x8_blocks_wide_lookup[BLOCK_64X64]; + const int block_height = num_8x8_blocks_high_lookup[BLOCK_64X64]; + if (mi_col + block_width / 2 < cm->mi_cols && + mi_row + block_height / 2 < cm->mi_rows) { + set_block_size(cpi, xd, mi_row, mi_col, BLOCK_64X64); + return 0; + } + } } else { d = VP9_VAR_OFFS; dp = 0; @@ -642,6 +792,7 @@ static void choose_partitioning(VP9_COMP *cpi, } // Index for force_split: 0 for 64x64, 1-4 for 32x32 blocks, + // 5-20 for the 16x16 blocks. force_split[0] = 0; // Fill in the entire tree of 8x8 (or 4x4 under some conditions) variances // for splits. @@ -653,46 +804,51 @@ static void choose_partitioning(VP9_COMP *cpi, for (j = 0; j < 4; j++) { const int x16_idx = x32_idx + ((j & 1) << 4); const int y16_idx = y32_idx + ((j >> 1) << 4); + const int split_index = 5 + i2 + j; v16x16 *vst = &vt.split[i].split[j]; + force_split[split_index] = 0; variance4x4downsample[i2 + j] = 0; if (!is_key_frame) { - for (k = 0; k < 4; k++) { - int x8_idx = x16_idx + ((k & 1) << 3); - int y8_idx = y16_idx + ((k >> 1) << 3); - unsigned int sse = 0; - int sum = 0; - if (x8_idx < pixels_wide && y8_idx < pixels_high) { - int s_avg, d_avg; + fill_variance_8x8avg(s, sp, d, dp, x16_idx, y16_idx, vst, #if CONFIG_VP9_HIGHBITDEPTH - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - s_avg = vp9_highbd_avg_8x8(s + y8_idx * sp + x8_idx, sp); - d_avg = vp9_highbd_avg_8x8(d + y8_idx * dp + x8_idx, dp); - } else { - s_avg = vp9_avg_8x8(s + y8_idx * sp + x8_idx, sp); - d_avg = vp9_avg_8x8(d + y8_idx * dp + x8_idx, dp); - } -#else - s_avg = vp9_avg_8x8(s + y8_idx * sp + x8_idx, sp); - d_avg = vp9_avg_8x8(d + y8_idx * dp + x8_idx, dp); + xd->cur_buf->flags, #endif - sum = s_avg - d_avg; - sse = sum * sum; - } - // If variance is based on 8x8 downsampling, we stop here and have - // one sample for 8x8 block (so use 1 for count in fill_variance), - // which of course means variance = 0 for 8x8 block. - fill_variance(sse, sum, 0, &vst->split[k].part_variances.none); - } + pixels_wide, + pixels_high, + is_key_frame); fill_variance_tree(&vt.split[i].split[j], BLOCK_16X16); - // For low-resolution, compute the variance based on 8x8 down-sampling, - // and if it is large (above the threshold) we go down for 4x4. - // For key frame we always go down to 4x4. - if (low_res) - get_variance(&vt.split[i].split[j].part_variances.none); + get_variance(&vt.split[i].split[j].part_variances.none); + if (vt.split[i].split[j].part_variances.none.variance > + thresholds[2]) { + // 16X16 variance is above threshold for split, so force split to 8x8 + // for this 16x16 block (this also forces splits for upper levels). + force_split[split_index] = 1; + force_split[i + 1] = 1; + force_split[0] = 1; + } else if (vt.split[i].split[j].part_variances.none.variance > + thresholds[1] && + !cyclic_refresh_segment_id_boosted(segment_id)) { + // We have some nominal amount of 16x16 variance (based on average), + // compute the minmax over the 8x8 sub-blocks, and if above threshold, + // force split to 8x8 block for this 16x16 block. + int minmax = compute_minmax_8x8(s, sp, d, dp, x16_idx, y16_idx, +#if CONFIG_VP9_HIGHBITDEPTH + xd->cur_buf->flags, +#endif + pixels_wide, pixels_high); + if (minmax > cpi->vbp_threshold_minmax) { + force_split[split_index] = 1; + force_split[i + 1] = 1; + force_split[0] = 1; + } + } } - if (is_key_frame || (low_res && + // TODO(marpan): There is an issue with variance based on 4x4 average in + // svc mode, don't allow it for now. + if (is_key_frame || (low_res && !cpi->use_svc && vt.split[i].split[j].part_variances.none.variance > - (cpi->vbp_threshold << 1))) { + (thresholds[1] << 1))) { + force_split[split_index] = 0; // Go down to 4x4 down-sampling for variance. variance4x4downsample[i2 + j] = 1; for (k = 0; k < 4; k++) { @@ -700,47 +856,18 @@ static void choose_partitioning(VP9_COMP *cpi, int y8_idx = y16_idx + ((k >> 1) << 3); v8x8 *vst2 = is_key_frame ? &vst->split[k] : &vt2[i2 + j].split[k]; - for (m = 0; m < 4; m++) { - int x4_idx = x8_idx + ((m & 1) << 2); - int y4_idx = y8_idx + ((m >> 1) << 2); - unsigned int sse = 0; - int sum = 0; - if (x4_idx < pixels_wide && y4_idx < pixels_high) { - int d_avg = 128; + fill_variance_4x4avg(s, sp, d, dp, x8_idx, y8_idx, vst2, #if CONFIG_VP9_HIGHBITDEPTH - int s_avg; - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - s_avg = vp9_highbd_avg_4x4(s + y4_idx * sp + x4_idx, sp); - if (cm->frame_type != KEY_FRAME) - d_avg = vp9_highbd_avg_4x4(d + y4_idx * dp + x4_idx, dp); - } else { - s_avg = vp9_avg_4x4(s + y4_idx * sp + x4_idx, sp); - if (cm->frame_type != KEY_FRAME) - d_avg = vp9_avg_4x4(d + y4_idx * dp + x4_idx, dp); - } -#else - int s_avg = vp9_avg_4x4(s + y4_idx * sp + x4_idx, sp); - if (!is_key_frame) - d_avg = vp9_avg_4x4(d + y4_idx * dp + x4_idx, dp); + xd->cur_buf->flags, #endif - sum = s_avg - d_avg; - sse = sum * sum; - } - // If variance is based on 4x4 down-sampling, we stop here and have - // one sample for 4x4 block (so use 1 for count in fill_variance), - // which of course means variance = 0 for 4x4 block. - fill_variance(sse, sum, 0, &vst2->split[m].part_variances.none); - } + pixels_wide, + pixels_high, + is_key_frame); } } } } - // No 64x64 blocks on segments other than base (un-boosted) segment, - // so force split. - if (cyclic_refresh_segment_id_boosted(segment_id)) - force_split[0] = 1; - // Fill the rest of the variance tree by summing split partition values. for (i = 0; i < 4; i++) { const int i2 = i << 2; @@ -756,29 +883,32 @@ static void choose_partitioning(VP9_COMP *cpi, fill_variance_tree(&vt.split[i], BLOCK_32X32); // If variance of this 32x32 block is above the threshold, force the block // to split. This also forces a split on the upper (64x64) level. - get_variance(&vt.split[i].part_variances.none); - if (vt.split[i].part_variances.none.variance > cpi->vbp_threshold) { - force_split[i + 1] = 1; - force_split[0] = 1; + if (!force_split[i + 1]) { + get_variance(&vt.split[i].part_variances.none); + if (vt.split[i].part_variances.none.variance > thresholds[1]) { + force_split[i + 1] = 1; + force_split[0] = 1; + } } } - if (!force_split[0]) + if (!force_split[0]) { fill_variance_tree(&vt, BLOCK_64X64); + get_variance(&vt.part_variances.none); + } - // Now go through the entire structure, splitting every block size until + // Now go through the entire structure, splitting every block size until // we get to one that's got a variance lower than our threshold. if ( mi_col + 8 > cm->mi_cols || mi_row + 8 > cm->mi_rows || !set_vt_partitioning(cpi, xd, &vt, BLOCK_64X64, mi_row, mi_col, - cpi->vbp_threshold_bsize_max, BLOCK_16X16, - force_split[0])) { + thresholds[0], BLOCK_16X16, force_split[0])) { for (i = 0; i < 4; ++i) { const int x32_idx = ((i & 1) << 2); const int y32_idx = ((i >> 1) << 2); const int i2 = i << 2; if (!set_vt_partitioning(cpi, xd, &vt.split[i], BLOCK_32X32, (mi_row + y32_idx), (mi_col + x32_idx), - cpi->vbp_threshold, - BLOCK_16X16, force_split[i + 1])) { + thresholds[1], BLOCK_16X16, + force_split[i + 1])) { for (j = 0; j < 4; ++j) { const int x16_idx = ((j & 1) << 1); const int y16_idx = ((j >> 1) << 1); @@ -791,8 +921,9 @@ static void choose_partitioning(VP9_COMP *cpi, if (!set_vt_partitioning(cpi, xd, vtemp, BLOCK_16X16, mi_row + y32_idx + y16_idx, mi_col + x32_idx + x16_idx, - cpi->vbp_threshold_16x16, - cpi->vbp_bsize_min, 0)) { + thresholds[2], + cpi->vbp_bsize_min, + force_split[5 + i2 + j])) { for (k = 0; k < 4; ++k) { const int x8_idx = (k & 1); const int y8_idx = (k >> 1); @@ -801,8 +932,7 @@ static void choose_partitioning(VP9_COMP *cpi, BLOCK_8X8, mi_row + y32_idx + y16_idx + y8_idx, mi_col + x32_idx + x16_idx + x8_idx, - cpi->vbp_threshold_bsize_min, - BLOCK_8X8, 0)) { + thresholds[3], BLOCK_8X8, 0)) { set_block_size(cpi, xd, (mi_row + y32_idx + y16_idx + y8_idx), (mi_col + x32_idx + x16_idx + x8_idx), @@ -820,6 +950,7 @@ static void choose_partitioning(VP9_COMP *cpi, } } } + return 0; } static void update_state(VP9_COMP *cpi, ThreadData *td, @@ -834,8 +965,8 @@ static void update_state(VP9_COMP *cpi, ThreadData *td, struct macroblock_plane *const p = x->plane; struct macroblockd_plane *const pd = xd->plane; MODE_INFO *mi = &ctx->mic; - MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi; - MODE_INFO *mi_addr = &xd->mi[0]; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; + MODE_INFO *mi_addr = xd->mi[0]; const struct segmentation *const seg = &cm->seg; const int bw = num_8x8_blocks_wide_lookup[mi->mbmi.sb_type]; const int bh = num_8x8_blocks_high_lookup[mi->mbmi.sb_type]; @@ -853,7 +984,6 @@ static void update_state(VP9_COMP *cpi, ThreadData *td, assert(mi->mbmi.sb_type == bsize); *mi_addr = *mi; - mi_addr->src_mi = mi_addr; // If segmentation in use if (seg->enabled) { @@ -867,7 +997,7 @@ static void update_state(VP9_COMP *cpi, ThreadData *td, // Else for cyclic refresh mode update the segment map, set the segment id // and then update the quantizer. if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) { - vp9_cyclic_refresh_update_segment(cpi, &xd->mi[0].src_mi->mbmi, mi_row, + vp9_cyclic_refresh_update_segment(cpi, &xd->mi[0]->mbmi, mi_row, mi_col, bsize, ctx->rate, ctx->dist, x->skip); } @@ -894,7 +1024,7 @@ static void update_state(VP9_COMP *cpi, ThreadData *td, for (x_idx = 0; x_idx < mi_width; x_idx++) if ((xd->mb_to_right_edge >> (3 + MI_SIZE_LOG2)) + mi_width > x_idx && (xd->mb_to_bottom_edge >> (3 + MI_SIZE_LOG2)) + mi_height > y) { - xd->mi[x_idx + y * mis].src_mi = mi_addr; + xd->mi[x_idx + y * mis] = mi_addr; } if (cpi->oxcf.aq_mode) @@ -914,8 +1044,8 @@ static void update_state(VP9_COMP *cpi, ThreadData *td, } x->skip = ctx->skip; - vpx_memcpy(x->zcoeff_blk[mbmi->tx_size], ctx->zcoeff_blk, - sizeof(uint8_t) * ctx->num_4x4_blk); + memcpy(x->zcoeff_blk[mbmi->tx_size], ctx->zcoeff_blk, + sizeof(uint8_t) * ctx->num_4x4_blk); if (!output_enabled) return; @@ -967,10 +1097,10 @@ static void update_state(VP9_COMP *cpi, ThreadData *td, MV_REF *const frame_mv = frame_mvs + h * cm->mi_cols; for (w = 0; w < x_mis; ++w) { MV_REF *const mv = frame_mv + w; - mv->ref_frame[0] = mi->src_mi->mbmi.ref_frame[0]; - mv->ref_frame[1] = mi->src_mi->mbmi.ref_frame[1]; - mv->mv[0].as_int = mi->src_mi->mbmi.mv[0].as_int; - mv->mv[1].as_int = mi->src_mi->mbmi.mv[1].as_int; + mv->ref_frame[0] = mi->mbmi.ref_frame[0]; + mv->ref_frame[1] = mi->mbmi.ref_frame[1]; + mv->mv[0].as_int = mi->mbmi.mv[0].as_int; + mv->mv[1].as_int = mi->mbmi.mv[1].as_int; } } } @@ -993,13 +1123,13 @@ void vp9_setup_src_planes(MACROBLOCK *x, const YV12_BUFFER_CONFIG *src, static void set_mode_info_seg_skip(MACROBLOCK *x, TX_MODE tx_mode, RD_COST *rd_cost, BLOCK_SIZE bsize) { MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; INTERP_FILTER filter_ref; if (xd->up_available) - filter_ref = xd->mi[-xd->mi_stride].src_mi->mbmi.interp_filter; + filter_ref = xd->mi[-xd->mi_stride]->mbmi.interp_filter; else if (xd->left_available) - filter_ref = xd->mi[-1].src_mi->mbmi.interp_filter; + filter_ref = xd->mi[-1]->mbmi.interp_filter; else filter_ref = EIGHTTAP; @@ -1014,7 +1144,7 @@ static void set_mode_info_seg_skip(MACROBLOCK *x, TX_MODE tx_mode, mbmi->mv[0].as_int = 0; mbmi->interp_filter = filter_ref; - xd->mi[0].src_mi->bmi[0].as_mv[0].as_int = 0; + xd->mi[0]->bmi[0].as_mv[0].as_int = 0; x->skip = 1; vp9_rd_cost_init(rd_cost); @@ -1053,7 +1183,7 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, x->use_lp32x32fdct = 1; set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); - mbmi = &xd->mi[0].src_mi->mbmi; + mbmi = &xd->mi[0]->mbmi; mbmi->sb_type = bsize; for (i = 0; i < MAX_MB_PLANE; ++i) { @@ -1073,13 +1203,15 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { x->source_variance = - high_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize, xd->bd); + vp9_high_get_sby_perpixel_variance(cpi, &x->plane[0].src, + bsize, xd->bd); } else { x->source_variance = - get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize); + vp9_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize); } #else - x->source_variance = get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize); + x->source_variance = + vp9_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize); #endif // CONFIG_VP9_HIGHBITDEPTH // Save rdmult before it might be changed, so it can be restored later. @@ -1103,8 +1235,9 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, } else if (aq_mode == CYCLIC_REFRESH_AQ) { const uint8_t *const map = cm->seg.update_map ? cpi->segmentation_map : cm->last_frame_seg_map; - // If segment 1, use rdmult for that segment. - if (vp9_get_segment_id(cm, map, bsize, mi_row, mi_col)) + // If segment is boosted, use rdmult for that segment. + if (cyclic_refresh_segment_id_boosted( + vp9_get_segment_id(cm, map, bsize, mi_row, mi_col))) x->rdmult = vp9_cyclic_refresh_get_rdmult(cpi->cyclic_refresh); } @@ -1150,7 +1283,7 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, static void update_stats(VP9_COMMON *cm, ThreadData *td) { const MACROBLOCK *x = &td->mb; const MACROBLOCKD *const xd = &x->e_mbd; - const MODE_INFO *const mi = xd->mi[0].src_mi; + const MODE_INFO *const mi = xd->mi[0]; const MB_MODE_INFO *const mbmi = &mi->mbmi; const BLOCK_SIZE bsize = mbmi->sb_type; @@ -1216,22 +1349,22 @@ static void restore_context(MACROBLOCK *const x, int mi_row, int mi_col, int mi_width = num_8x8_blocks_wide_lookup[bsize]; int mi_height = num_8x8_blocks_high_lookup[bsize]; for (p = 0; p < MAX_MB_PLANE; p++) { - vpx_memcpy( + memcpy( xd->above_context[p] + ((mi_col * 2) >> xd->plane[p].subsampling_x), a + num_4x4_blocks_wide * p, (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >> xd->plane[p].subsampling_x); - vpx_memcpy( + memcpy( xd->left_context[p] + ((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y), l + num_4x4_blocks_high * p, (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >> xd->plane[p].subsampling_y); } - vpx_memcpy(xd->above_seg_context + mi_col, sa, - sizeof(*xd->above_seg_context) * mi_width); - vpx_memcpy(xd->left_seg_context + (mi_row & MI_MASK), sl, - sizeof(xd->left_seg_context[0]) * mi_height); + memcpy(xd->above_seg_context + mi_col, sa, + sizeof(*xd->above_seg_context) * mi_width); + memcpy(xd->left_seg_context + (mi_row & MI_MASK), sl, + sizeof(xd->left_seg_context[0]) * mi_height); } static void save_context(MACROBLOCK *const x, int mi_row, int mi_col, @@ -1248,22 +1381,22 @@ static void save_context(MACROBLOCK *const x, int mi_row, int mi_col, // buffer the above/left context information of the block in search. for (p = 0; p < MAX_MB_PLANE; ++p) { - vpx_memcpy( + memcpy( a + num_4x4_blocks_wide * p, xd->above_context[p] + (mi_col * 2 >> xd->plane[p].subsampling_x), (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >> xd->plane[p].subsampling_x); - vpx_memcpy( + memcpy( l + num_4x4_blocks_high * p, xd->left_context[p] + ((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y), (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >> xd->plane[p].subsampling_y); } - vpx_memcpy(sa, xd->above_seg_context + mi_col, - sizeof(*xd->above_seg_context) * mi_width); - vpx_memcpy(sl, xd->left_seg_context + (mi_row & MI_MASK), - sizeof(xd->left_seg_context[0]) * mi_height); + memcpy(sa, xd->above_seg_context + mi_col, + sizeof(*xd->above_seg_context) * mi_width); + memcpy(sl, xd->left_seg_context + (mi_row & MI_MASK), + sizeof(xd->left_seg_context[0]) * mi_height); } static void encode_b(VP9_COMP *cpi, const TileInfo *const tile, @@ -1380,15 +1513,15 @@ static BLOCK_SIZE find_partition_size(BLOCK_SIZE bsize, static void set_partial_b64x64_partition(MODE_INFO *mi, int mis, int bh_in, int bw_in, int row8x8_remaining, int col8x8_remaining, - BLOCK_SIZE bsize, MODE_INFO *mi_8x8) { + BLOCK_SIZE bsize, MODE_INFO **mi_8x8) { int bh = bh_in; int r, c; for (r = 0; r < MI_BLOCK_SIZE; r += bh) { int bw = bw_in; for (c = 0; c < MI_BLOCK_SIZE; c += bw) { const int index = r * mis + c; - mi_8x8[index].src_mi = mi + index; - mi_8x8[index].src_mi->mbmi.sb_type = find_partition_size(bsize, + mi_8x8[index] = mi + index; + mi_8x8[index]->mbmi.sb_type = find_partition_size(bsize, row8x8_remaining - r, col8x8_remaining - c, &bh, &bw); } } @@ -1400,7 +1533,7 @@ static void set_partial_b64x64_partition(MODE_INFO *mi, int mis, // may not be allowed in which case this code attempts to choose the largest // allowable partition. static void set_fixed_partitioning(VP9_COMP *cpi, const TileInfo *const tile, - MODE_INFO *mi_8x8, int mi_row, int mi_col, + MODE_INFO **mi_8x8, int mi_row, int mi_col, BLOCK_SIZE bsize) { VP9_COMMON *const cm = &cpi->common; const int mis = cm->mi_stride; @@ -1419,8 +1552,8 @@ static void set_fixed_partitioning(VP9_COMP *cpi, const TileInfo *const tile, for (block_row = 0; block_row < MI_BLOCK_SIZE; block_row += bh) { for (block_col = 0; block_col < MI_BLOCK_SIZE; block_col += bw) { int index = block_row * mis + block_col; - mi_8x8[index].src_mi = mi_upper_left + index; - mi_8x8[index].src_mi->mbmi.sb_type = bsize; + mi_8x8[index] = mi_upper_left + index; + mi_8x8[index]->mbmi.sb_type = bsize; } } } else { @@ -1447,7 +1580,7 @@ const struct { static void set_source_var_based_partition(VP9_COMP *cpi, const TileInfo *const tile, MACROBLOCK *const x, - MODE_INFO *mi_8x8, + MODE_INFO **mi_8x8, int mi_row, int mi_col) { VP9_COMMON *const cm = &cpi->common; const int mis = cm->mi_stride; @@ -1470,7 +1603,7 @@ static void set_source_var_based_partition(VP9_COMP *cpi, int use32x32 = 0; unsigned int thr = cpi->source_var_thresh; - vpx_memset(d32, 0, 4 * sizeof(diff)); + memset(d32, 0, 4 * sizeof(diff)); for (i = 0; i < 4; i++) { diff *d16[4]; @@ -1484,8 +1617,8 @@ static void set_source_var_based_partition(VP9_COMP *cpi, d16[j] = cpi->source_diff_var + offset + boffset; index = b_mi_row * mis + b_mi_col; - mi_8x8[index].src_mi = mi_upper_left + index; - mi_8x8[index].src_mi->mbmi.sb_type = BLOCK_16X16; + mi_8x8[index] = mi_upper_left + index; + mi_8x8[index]->mbmi.sb_type = BLOCK_16X16; // TODO(yunqingwang): If d16[j].var is very large, use 8x8 partition // size to further improve quality. @@ -1506,8 +1639,8 @@ static void set_source_var_based_partition(VP9_COMP *cpi, d32[i].var = d32[i].sse - (((int64_t)d32[i].sum * d32[i].sum) >> 10); index = coord_lookup[i*4].row * mis + coord_lookup[i*4].col; - mi_8x8[index].src_mi = mi_upper_left + index; - mi_8x8[index].src_mi->mbmi.sb_type = BLOCK_32X32; + mi_8x8[index] = mi_upper_left + index; + mi_8x8[index]->mbmi.sb_type = BLOCK_32X32; } } @@ -1518,8 +1651,8 @@ static void set_source_var_based_partition(VP9_COMP *cpi, // Use 64x64 partition if (is_larger_better) { - mi_8x8[0].src_mi = mi_upper_left; - mi_8x8[0].src_mi->mbmi.sb_type = BLOCK_64X64; + mi_8x8[0] = mi_upper_left; + mi_8x8[0]->mbmi.sb_type = BLOCK_64X64; } } } else { // partial in-image SB64 @@ -1536,16 +1669,15 @@ static void update_state_rt(VP9_COMP *cpi, ThreadData *td, VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &td->mb; MACROBLOCKD *const xd = &x->e_mbd; - MODE_INFO *const mi = xd->mi[0].src_mi; - MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi; + MODE_INFO *const mi = xd->mi[0]; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; const struct segmentation *const seg = &cm->seg; const int bw = num_8x8_blocks_wide_lookup[mi->mbmi.sb_type]; const int bh = num_8x8_blocks_high_lookup[mi->mbmi.sb_type]; const int x_mis = MIN(bw, cm->mi_cols - mi_col); const int y_mis = MIN(bh, cm->mi_rows - mi_row); - xd->mi[0] = ctx->mic; - xd->mi[0].src_mi = &xd->mi[0]; + *(xd->mi[0]) = ctx->mic; if (seg->enabled && cpi->oxcf.aq_mode) { // For in frame complexity AQ or variance AQ, copy segment_id from @@ -1585,10 +1717,10 @@ static void update_state_rt(VP9_COMP *cpi, ThreadData *td, MV_REF *const frame_mv = frame_mvs + h * cm->mi_cols; for (w = 0; w < x_mis; ++w) { MV_REF *const mv = frame_mv + w; - mv->ref_frame[0] = mi->src_mi->mbmi.ref_frame[0]; - mv->ref_frame[1] = mi->src_mi->mbmi.ref_frame[1]; - mv->mv[0].as_int = mi->src_mi->mbmi.mv[0].as_int; - mv->mv[1].as_int = mi->src_mi->mbmi.mv[1].as_int; + mv->ref_frame[0] = mi->mbmi.ref_frame[0]; + mv->ref_frame[1] = mi->mbmi.ref_frame[1]; + mv->mv[0].as_int = mi->mbmi.mv[0].as_int; + mv->mv[1].as_int = mi->mbmi.mv[1].as_int; } } } @@ -1640,9 +1772,9 @@ static void encode_sb_rt(VP9_COMP *cpi, ThreadData *td, if (bsize >= BLOCK_8X8) { const int idx_str = xd->mi_stride * mi_row + mi_col; - MODE_INFO *mi_8x8 = cm->mi[idx_str].src_mi; + MODE_INFO ** mi_8x8 = cm->mi_grid_visible + idx_str; ctx = partition_plane_context(xd, mi_row, mi_col, bsize); - subsize = mi_8x8[0].src_mi->mbmi.sb_type; + subsize = mi_8x8[0]->mbmi.sb_type; } else { ctx = 0; subsize = BLOCK_4X4; @@ -1696,7 +1828,7 @@ static void encode_sb_rt(VP9_COMP *cpi, ThreadData *td, static void rd_use_partition(VP9_COMP *cpi, ThreadData *td, TileDataEnc *tile_data, - MODE_INFO *mi_8x8, TOKENEXTRA **tp, + MODE_INFO **mi_8x8, TOKENEXTRA **tp, int mi_row, int mi_col, BLOCK_SIZE bsize, int *rate, int64_t *dist, @@ -1717,7 +1849,7 @@ static void rd_use_partition(VP9_COMP *cpi, RD_COST last_part_rdc, none_rdc, chosen_rdc; BLOCK_SIZE sub_subsize = BLOCK_4X4; int splits_below = 0; - BLOCK_SIZE bs_type = mi_8x8[0].src_mi->mbmi.sb_type; + BLOCK_SIZE bs_type = mi_8x8[0]->mbmi.sb_type; int do_partition_search = 1; PICK_MODE_CONTEXT *ctx = &pc_tree->none; @@ -1751,7 +1883,7 @@ static void rd_use_partition(VP9_COMP *cpi, splits_below = 1; for (i = 0; i < 4; i++) { int jj = i >> 1, ii = i & 0x01; - MODE_INFO *this_mi = mi_8x8[jj * bss * mis + ii * bss].src_mi; + MODE_INFO *this_mi = mi_8x8[jj * bss * mis + ii * bss]; if (this_mi && this_mi->mbmi.sb_type >= sub_subsize) { splits_below = 0; } @@ -1776,7 +1908,7 @@ static void rd_use_partition(VP9_COMP *cpi, } restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); - mi_8x8[0].src_mi->mbmi.sb_type = bs_type; + mi_8x8[0]->mbmi.sb_type = bs_type; pc_tree->partitioning = partition; } } @@ -1934,7 +2066,7 @@ static void rd_use_partition(VP9_COMP *cpi, // If last_part is better set the partitioning to that. if (last_part_rdc.rdcost < chosen_rdc.rdcost) { - mi_8x8[0].src_mi->mbmi.sb_type = bsize; + mi_8x8[0]->mbmi.sb_type = bsize; if (bsize >= BLOCK_8X8) pc_tree->partitioning = partition; chosen_rdc = last_part_rdc; @@ -1986,7 +2118,7 @@ static const BLOCK_SIZE max_partition_size[BLOCK_SIZES] = { // // The min and max are assumed to have been initialized prior to calling this // function so repeat calls can accumulate a min and max of more than one sb64. -static void get_sb_partition_size_range(MACROBLOCKD *xd, MODE_INFO *mi_8x8, +static void get_sb_partition_size_range(MACROBLOCKD *xd, MODE_INFO **mi_8x8, BLOCK_SIZE *min_block_size, BLOCK_SIZE *max_block_size, int bs_hist[BLOCK_SIZES]) { @@ -1998,7 +2130,7 @@ static void get_sb_partition_size_range(MACROBLOCKD *xd, MODE_INFO *mi_8x8, // Check the sb_type for each block that belongs to this region. for (i = 0; i < sb_height_in_blocks; ++i) { for (j = 0; j < sb_width_in_blocks; ++j) { - MODE_INFO *mi = mi_8x8[index+j].src_mi; + MODE_INFO *mi = mi_8x8[index+j]; BLOCK_SIZE sb_type = mi ? mi->mbmi.sb_type : 0; bs_hist[sb_type]++; *min_block_size = MIN(*min_block_size, sb_type); @@ -2025,15 +2157,14 @@ static void rd_auto_partition_range(VP9_COMP *cpi, const TileInfo *const tile, BLOCK_SIZE *min_block_size, BLOCK_SIZE *max_block_size) { VP9_COMMON *const cm = &cpi->common; - MODE_INFO *mi = xd->mi[0].src_mi; - const int left_in_image = xd->left_available && mi[-1].src_mi; - const int above_in_image = xd->up_available && mi[-xd->mi_stride].src_mi; + MODE_INFO **mi = xd->mi; + const int left_in_image = xd->left_available && mi[-1]; + const int above_in_image = xd->up_available && mi[-xd->mi_stride]; const int row8x8_remaining = tile->mi_row_end - mi_row; const int col8x8_remaining = tile->mi_col_end - mi_col; int bh, bw; BLOCK_SIZE min_size = BLOCK_4X4; BLOCK_SIZE max_size = BLOCK_64X64; - int i = 0; int bs_hist[BLOCK_SIZES] = {0}; // Trap case where we do not have a prediction. @@ -2046,54 +2177,27 @@ static void rd_auto_partition_range(VP9_COMP *cpi, const TileInfo *const tile, // passed in values for min and max as a starting point. // Find the min and max partition used in previous frame at this location if (cm->frame_type != KEY_FRAME) { - MODE_INFO *prev_mi = - cm->prev_mip + cm->mi_stride + 1 + mi_row * xd->mi_stride + mi_col; - + MODE_INFO **prev_mi = + &cm->prev_mi_grid_visible[mi_row * xd->mi_stride + mi_col]; get_sb_partition_size_range(xd, prev_mi, &min_size, &max_size, bs_hist); } // Find the min and max partition sizes used in the left SB64 if (left_in_image) { - MODE_INFO *left_sb64_mi = mi[-MI_BLOCK_SIZE].src_mi; + MODE_INFO **left_sb64_mi = &mi[-MI_BLOCK_SIZE]; get_sb_partition_size_range(xd, left_sb64_mi, &min_size, &max_size, bs_hist); } // Find the min and max partition sizes used in the above SB64. if (above_in_image) { - MODE_INFO *above_sb64_mi = mi[-xd->mi_stride * MI_BLOCK_SIZE].src_mi; + MODE_INFO **above_sb64_mi = &mi[-xd->mi_stride * MI_BLOCK_SIZE]; get_sb_partition_size_range(xd, above_sb64_mi, &min_size, &max_size, bs_hist); } - // adjust observed min and max + // Adjust observed min and max for "relaxed" auto partition case. if (cpi->sf.auto_min_max_partition_size == RELAXED_NEIGHBORING_MIN_MAX) { min_size = min_partition_size[min_size]; max_size = max_partition_size[max_size]; - } else if (cpi->sf.auto_min_max_partition_size == - CONSTRAIN_NEIGHBORING_MIN_MAX) { - // adjust the search range based on the histogram of the observed - // partition sizes from left, above the previous co-located blocks - int sum = 0; - int first_moment = 0; - int second_moment = 0; - int var_unnormalized = 0; - - for (i = 0; i < BLOCK_SIZES; i++) { - sum += bs_hist[i]; - first_moment += bs_hist[i] * i; - second_moment += bs_hist[i] * i * i; - } - - // if variance is small enough, - // adjust the range around its mean size, which gives a tighter range - var_unnormalized = second_moment - first_moment * first_moment / sum; - if (var_unnormalized <= 4 * sum) { - int mean = first_moment / sum; - min_size = min_partition_size[mean]; - max_size = max_partition_size[mean]; - } else { - min_size = min_partition_size[min_size]; - max_size = max_partition_size[max_size]; - } } } @@ -2101,7 +2205,7 @@ static void rd_auto_partition_range(VP9_COMP *cpi, const TileInfo *const tile, max_size = find_partition_size(max_size, row8x8_remaining, col8x8_remaining, &bh, &bw); - min_size = MIN(min_size, max_size); + min_size = MIN(cpi->sf.rd_auto_partition_min_limit, MIN(min_size, max_size)); // When use_square_partition_only is true, make sure at least one square // partition is allowed by selecting the next smaller square size as @@ -2121,10 +2225,9 @@ static void auto_partition_range(VP9_COMP *cpi, const TileInfo *const tile, BLOCK_SIZE *min_block_size, BLOCK_SIZE *max_block_size) { VP9_COMMON *const cm = &cpi->common; - MODE_INFO *mi_8x8 = xd->mi; - const int left_in_image = xd->left_available && mi_8x8[-1].src_mi; - const int above_in_image = xd->up_available && - mi_8x8[-xd->mi_stride].src_mi; + MODE_INFO **mi_8x8 = xd->mi; + const int left_in_image = xd->left_available && mi_8x8[-1]; + const int above_in_image = xd->up_available && mi_8x8[-xd->mi_stride]; int row8x8_remaining = tile->mi_row_end - mi_row; int col8x8_remaining = tile->mi_col_end - mi_col; int bh, bw; @@ -2137,15 +2240,15 @@ static void auto_partition_range(VP9_COMP *cpi, const TileInfo *const tile, if (search_range_ctrl && (left_in_image || above_in_image || cm->frame_type != KEY_FRAME)) { int block; - MODE_INFO *mi; + MODE_INFO **mi; BLOCK_SIZE sb_type; // Find the min and max partition sizes used in the left SB64. if (left_in_image) { MODE_INFO *cur_mi; - mi = mi_8x8[-1].src_mi; + mi = &mi_8x8[-1]; for (block = 0; block < MI_BLOCK_SIZE; ++block) { - cur_mi = mi[block * xd->mi_stride].src_mi; + cur_mi = mi[block * xd->mi_stride]; sb_type = cur_mi ? cur_mi->mbmi.sb_type : 0; min_size = MIN(min_size, sb_type); max_size = MAX(max_size, sb_type); @@ -2153,9 +2256,9 @@ static void auto_partition_range(VP9_COMP *cpi, const TileInfo *const tile, } // Find the min and max partition sizes used in the above SB64. if (above_in_image) { - mi = mi_8x8[-xd->mi_stride * MI_BLOCK_SIZE].src_mi; + mi = &mi_8x8[-xd->mi_stride * MI_BLOCK_SIZE]; for (block = 0; block < MI_BLOCK_SIZE; ++block) { - sb_type = mi[block].src_mi ? mi[block].src_mi->mbmi.sb_type : 0; + sb_type = mi[block] ? mi[block]->mbmi.sb_type : 0; min_size = MIN(min_size, sb_type); max_size = MAX(max_size, sb_type); } @@ -2186,9 +2289,7 @@ static void set_partition_range(VP9_COMMON *cm, MACROBLOCKD *xd, MODE_INFO *mi; const int idx_str = cm->mi_stride * mi_row + mi_col; - MODE_INFO *prev_mi = (cm->prev_mip + cm->mi_stride + 1 + idx_str)->src_mi; - - + MODE_INFO **prev_mi = &cm->prev_mi_grid_visible[idx_str]; BLOCK_SIZE bs, min_size, max_size; min_size = BLOCK_64X64; @@ -2197,7 +2298,7 @@ static void set_partition_range(VP9_COMMON *cm, MACROBLOCKD *xd, if (prev_mi) { for (idy = 0; idy < mi_height; ++idy) { for (idx = 0; idx < mi_width; ++idx) { - mi = prev_mi[idy * cm->mi_stride + idx].src_mi; + mi = prev_mi[idy * cm->mi_stride + idx]; bs = mi ? mi->mbmi.sb_type : bsize; min_size = MIN(min_size, bs); max_size = MAX(max_size, bs); @@ -2207,7 +2308,7 @@ static void set_partition_range(VP9_COMMON *cm, MACROBLOCKD *xd, if (xd->left_available) { for (idy = 0; idy < mi_height; ++idy) { - mi = xd->mi[idy * cm->mi_stride - 1].src_mi; + mi = xd->mi[idy * cm->mi_stride - 1]; bs = mi ? mi->mbmi.sb_type : bsize; min_size = MIN(min_size, bs); max_size = MAX(max_size, bs); @@ -2216,7 +2317,7 @@ static void set_partition_range(VP9_COMMON *cm, MACROBLOCKD *xd, if (xd->up_available) { for (idx = 0; idx < mi_width; ++idx) { - mi = xd->mi[idx - cm->mi_stride].src_mi; + mi = xd->mi[idx - cm->mi_stride]; bs = mi ? mi->mbmi.sb_type : bsize; min_size = MIN(min_size, bs); max_size = MAX(max_size, bs); @@ -2233,11 +2334,11 @@ static void set_partition_range(VP9_COMMON *cm, MACROBLOCKD *xd, } static INLINE void store_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) { - vpx_memcpy(ctx->pred_mv, x->pred_mv, sizeof(x->pred_mv)); + memcpy(ctx->pred_mv, x->pred_mv, sizeof(x->pred_mv)); } static INLINE void load_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) { - vpx_memcpy(x->pred_mv, ctx->pred_mv, sizeof(x->pred_mv)); + memcpy(x->pred_mv, ctx->pred_mv, sizeof(x->pred_mv)); } #if CONFIG_FP_MB_STATS @@ -2712,8 +2813,8 @@ static void encode_rd_sb_row(VP9_COMP *cpi, int mi_col; // Initialize the left context for the new SB row - vpx_memset(&xd->left_context, 0, sizeof(xd->left_context)); - vpx_memset(xd->left_seg_context, 0, sizeof(xd->left_seg_context)); + memset(&xd->left_context, 0, sizeof(xd->left_context)); + memset(xd->left_seg_context, 0, sizeof(xd->left_seg_context)); // Code each SB in the row for (mi_col = tile_info->mi_col_start; mi_col < tile_info->mi_col_end; @@ -2726,7 +2827,7 @@ static void encode_rd_sb_row(VP9_COMP *cpi, int seg_skip = 0; const int idx_str = cm->mi_stride * mi_row + mi_col; - MODE_INFO *mi = cm->mi + idx_str; + MODE_INFO **mi = cm->mi_grid_visible + idx_str; if (sf->adaptive_pred_interp_filter) { for (i = 0; i < 64; ++i) @@ -2797,11 +2898,11 @@ static void init_encode_frame_mb_context(VP9_COMP *cpi) { // Note: this memset assumes above_context[0], [1] and [2] // are allocated as part of the same buffer. - vpx_memset(xd->above_context[0], 0, - sizeof(*xd->above_context[0]) * - 2 * aligned_mi_cols * MAX_MB_PLANE); - vpx_memset(xd->above_seg_context, 0, - sizeof(*xd->above_seg_context) * aligned_mi_cols); + memset(xd->above_context[0], 0, + sizeof(*xd->above_context[0]) * + 2 * aligned_mi_cols * MAX_MB_PLANE); + memset(xd->above_seg_context, 0, + sizeof(*xd->above_seg_context) * aligned_mi_cols); } static int check_dual_ref_flags(VP9_COMP *cpi) { @@ -2818,12 +2919,12 @@ static int check_dual_ref_flags(VP9_COMP *cpi) { static void reset_skip_tx_size(VP9_COMMON *cm, TX_SIZE max_tx_size) { int mi_row, mi_col; const int mis = cm->mi_stride; - MODE_INFO *mi_ptr = cm->mi; + MODE_INFO **mi_ptr = cm->mi_grid_visible; for (mi_row = 0; mi_row < cm->mi_rows; ++mi_row, mi_ptr += mis) { for (mi_col = 0; mi_col < cm->mi_cols; ++mi_col) { - if (mi_ptr[mi_col].src_mi->mbmi.tx_size > max_tx_size) - mi_ptr[mi_col].src_mi->mbmi.tx_size = max_tx_size; + if (mi_ptr[mi_col]->mbmi.tx_size > max_tx_size) + mi_ptr[mi_col]->mbmi.tx_size = max_tx_size; } } } @@ -2873,11 +2974,11 @@ static void nonrd_pick_sb_modes(VP9_COMP *cpi, MACROBLOCKD *const xd = &x->e_mbd; MB_MODE_INFO *mbmi; set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); - mbmi = &xd->mi[0].src_mi->mbmi; + mbmi = &xd->mi[0]->mbmi; mbmi->sb_type = bsize; if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled) - if (mbmi->segment_id) + if (cyclic_refresh_segment_id_boosted(mbmi->segment_id)) x->rdmult = vp9_cyclic_refresh_get_rdmult(cpi->cyclic_refresh); if (cm->frame_type == KEY_FRAME) @@ -2917,27 +3018,27 @@ static void fill_mode_info_sb(VP9_COMMON *cm, MACROBLOCK *x, switch (partition) { case PARTITION_NONE: set_mode_info_offsets(cm, xd, mi_row, mi_col); - *(xd->mi[0].src_mi) = pc_tree->none.mic; + *(xd->mi[0]) = pc_tree->none.mic; duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize); break; case PARTITION_VERT: set_mode_info_offsets(cm, xd, mi_row, mi_col); - *(xd->mi[0].src_mi) = pc_tree->vertical[0].mic; + *(xd->mi[0]) = pc_tree->vertical[0].mic; duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, subsize); if (mi_col + hbs < cm->mi_cols) { set_mode_info_offsets(cm, xd, mi_row, mi_col + hbs); - *(xd->mi[0].src_mi) = pc_tree->vertical[1].mic; + *(xd->mi[0]) = pc_tree->vertical[1].mic; duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col + hbs, subsize); } break; case PARTITION_HORZ: set_mode_info_offsets(cm, xd, mi_row, mi_col); - *(xd->mi[0].src_mi) = pc_tree->horizontal[0].mic; + *(xd->mi[0]) = pc_tree->horizontal[0].mic; duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, subsize); if (mi_row + hbs < cm->mi_rows) { set_mode_info_offsets(cm, xd, mi_row + hbs, mi_col); - *(xd->mi[0].src_mi) = pc_tree->horizontal[1].mic; + *(xd->mi[0]) = pc_tree->horizontal[1].mic; duplicate_mode_info_in_sb(cm, xd, mi_row + hbs, mi_col, subsize); } break; @@ -3037,7 +3138,7 @@ static void nonrd_pick_partition(VP9_COMP *cpi, ThreadData *td, if (partition_none_allowed) { nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, bsize, ctx); - ctx->mic.mbmi = xd->mi[0].src_mi->mbmi; + ctx->mic.mbmi = xd->mi[0]->mbmi; ctx->skip_txfm[0] = x->skip_txfm[0]; ctx->skip = x->skip; @@ -3119,7 +3220,7 @@ static void nonrd_pick_partition(VP9_COMP *cpi, ThreadData *td, nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize, &pc_tree->horizontal[0]); - pc_tree->horizontal[0].mic.mbmi = xd->mi[0].src_mi->mbmi; + pc_tree->horizontal[0].mic.mbmi = xd->mi[0]->mbmi; pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0]; pc_tree->horizontal[0].skip = x->skip; @@ -3130,7 +3231,7 @@ static void nonrd_pick_partition(VP9_COMP *cpi, ThreadData *td, &this_rdc, subsize, &pc_tree->horizontal[1]); - pc_tree->horizontal[1].mic.mbmi = xd->mi[0].src_mi->mbmi; + pc_tree->horizontal[1].mic.mbmi = xd->mi[0]->mbmi; pc_tree->horizontal[1].skip_txfm[0] = x->skip_txfm[0]; pc_tree->horizontal[1].skip = x->skip; @@ -3162,7 +3263,7 @@ static void nonrd_pick_partition(VP9_COMP *cpi, ThreadData *td, pc_tree->vertical[0].pred_pixel_ready = 1; nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize, &pc_tree->vertical[0]); - pc_tree->vertical[0].mic.mbmi = xd->mi[0].src_mi->mbmi; + pc_tree->vertical[0].mic.mbmi = xd->mi[0]->mbmi; pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0]; pc_tree->vertical[0].skip = x->skip; @@ -3172,7 +3273,7 @@ static void nonrd_pick_partition(VP9_COMP *cpi, ThreadData *td, nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + ms, &this_rdc, subsize, &pc_tree->vertical[1]); - pc_tree->vertical[1].mic.mbmi = xd->mi[0].src_mi->mbmi; + pc_tree->vertical[1].mic.mbmi = xd->mi[0]->mbmi; pc_tree->vertical[1].skip_txfm[0] = x->skip_txfm[0]; pc_tree->vertical[1].skip = x->skip; @@ -3224,7 +3325,7 @@ static void nonrd_pick_partition(VP9_COMP *cpi, ThreadData *td, static void nonrd_select_partition(VP9_COMP *cpi, ThreadData *td, TileDataEnc *tile_data, - MODE_INFO *mi, + MODE_INFO **mi, TOKENEXTRA **tp, int mi_row, int mi_col, BLOCK_SIZE bsize, int output_enabled, @@ -3243,7 +3344,7 @@ static void nonrd_select_partition(VP9_COMP *cpi, if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; - subsize = (bsize >= BLOCK_8X8) ? mi[0].src_mi->mbmi.sb_type : BLOCK_4X4; + subsize = (bsize >= BLOCK_8X8) ? mi[0]->mbmi.sb_type : BLOCK_4X4; partition = partition_lookup[bsl][subsize]; if (bsize == BLOCK_32X32 && partition != PARTITION_NONE && @@ -3263,7 +3364,7 @@ static void nonrd_select_partition(VP9_COMP *cpi, pc_tree->none.pred_pixel_ready = 1; nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, rd_cost, subsize, &pc_tree->none); - pc_tree->none.mic.mbmi = xd->mi[0].src_mi->mbmi; + pc_tree->none.mic.mbmi = xd->mi[0]->mbmi; pc_tree->none.skip_txfm[0] = x->skip_txfm[0]; pc_tree->none.skip = x->skip; break; @@ -3271,14 +3372,14 @@ static void nonrd_select_partition(VP9_COMP *cpi, pc_tree->vertical[0].pred_pixel_ready = 1; nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, rd_cost, subsize, &pc_tree->vertical[0]); - pc_tree->vertical[0].mic.mbmi = xd->mi[0].src_mi->mbmi; + pc_tree->vertical[0].mic.mbmi = xd->mi[0]->mbmi; pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0]; pc_tree->vertical[0].skip = x->skip; if (mi_col + hbs < cm->mi_cols) { pc_tree->vertical[1].pred_pixel_ready = 1; nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + hbs, &this_rdc, subsize, &pc_tree->vertical[1]); - pc_tree->vertical[1].mic.mbmi = xd->mi[0].src_mi->mbmi; + pc_tree->vertical[1].mic.mbmi = xd->mi[0]->mbmi; pc_tree->vertical[1].skip_txfm[0] = x->skip_txfm[0]; pc_tree->vertical[1].skip = x->skip; if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX && @@ -3292,14 +3393,14 @@ static void nonrd_select_partition(VP9_COMP *cpi, pc_tree->horizontal[0].pred_pixel_ready = 1; nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, rd_cost, subsize, &pc_tree->horizontal[0]); - pc_tree->horizontal[0].mic.mbmi = xd->mi[0].src_mi->mbmi; + pc_tree->horizontal[0].mic.mbmi = xd->mi[0]->mbmi; pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0]; pc_tree->horizontal[0].skip = x->skip; if (mi_row + hbs < cm->mi_rows) { pc_tree->horizontal[1].pred_pixel_ready = 1; nonrd_pick_sb_modes(cpi, tile_data, x, mi_row + hbs, mi_col, &this_rdc, subsize, &pc_tree->horizontal[1]); - pc_tree->horizontal[1].mic.mbmi = xd->mi[0].src_mi->mbmi; + pc_tree->horizontal[1].mic.mbmi = xd->mi[0]->mbmi; pc_tree->horizontal[1].skip_txfm[0] = x->skip_txfm[0]; pc_tree->horizontal[1].skip = x->skip; if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX && @@ -3353,7 +3454,7 @@ static void nonrd_select_partition(VP9_COMP *cpi, static void nonrd_use_partition(VP9_COMP *cpi, ThreadData *td, TileDataEnc *tile_data, - MODE_INFO *mi, + MODE_INFO **mi, TOKENEXTRA **tp, int mi_row, int mi_col, BLOCK_SIZE bsize, int output_enabled, @@ -3370,7 +3471,7 @@ static void nonrd_use_partition(VP9_COMP *cpi, if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; - subsize = (bsize >= BLOCK_8X8) ? mi[0].src_mi->mbmi.sb_type : BLOCK_4X4; + subsize = (bsize >= BLOCK_8X8) ? mi[0]->mbmi.sb_type : BLOCK_4X4; partition = partition_lookup[bsl][subsize]; if (output_enabled && bsize != BLOCK_4X4) { @@ -3383,7 +3484,7 @@ static void nonrd_use_partition(VP9_COMP *cpi, pc_tree->none.pred_pixel_ready = 1; nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, dummy_cost, subsize, &pc_tree->none); - pc_tree->none.mic.mbmi = xd->mi[0].src_mi->mbmi; + pc_tree->none.mic.mbmi = xd->mi[0]->mbmi; pc_tree->none.skip_txfm[0] = x->skip_txfm[0]; pc_tree->none.skip = x->skip; encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, @@ -3393,7 +3494,7 @@ static void nonrd_use_partition(VP9_COMP *cpi, pc_tree->vertical[0].pred_pixel_ready = 1; nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, dummy_cost, subsize, &pc_tree->vertical[0]); - pc_tree->vertical[0].mic.mbmi = xd->mi[0].src_mi->mbmi; + pc_tree->vertical[0].mic.mbmi = xd->mi[0]->mbmi; pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0]; pc_tree->vertical[0].skip = x->skip; encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, @@ -3402,7 +3503,7 @@ static void nonrd_use_partition(VP9_COMP *cpi, pc_tree->vertical[1].pred_pixel_ready = 1; nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + hbs, dummy_cost, subsize, &pc_tree->vertical[1]); - pc_tree->vertical[1].mic.mbmi = xd->mi[0].src_mi->mbmi; + pc_tree->vertical[1].mic.mbmi = xd->mi[0]->mbmi; pc_tree->vertical[1].skip_txfm[0] = x->skip_txfm[0]; pc_tree->vertical[1].skip = x->skip; encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col + hbs, @@ -3413,7 +3514,7 @@ static void nonrd_use_partition(VP9_COMP *cpi, pc_tree->horizontal[0].pred_pixel_ready = 1; nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, dummy_cost, subsize, &pc_tree->horizontal[0]); - pc_tree->horizontal[0].mic.mbmi = xd->mi[0].src_mi->mbmi; + pc_tree->horizontal[0].mic.mbmi = xd->mi[0]->mbmi; pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0]; pc_tree->horizontal[0].skip = x->skip; encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, @@ -3423,7 +3524,7 @@ static void nonrd_use_partition(VP9_COMP *cpi, pc_tree->horizontal[1].pred_pixel_ready = 1; nonrd_pick_sb_modes(cpi, tile_data, x, mi_row + hbs, mi_col, dummy_cost, subsize, &pc_tree->horizontal[1]); - pc_tree->horizontal[1].mic.mbmi = xd->mi[0].src_mi->mbmi; + pc_tree->horizontal[1].mic.mbmi = xd->mi[0]->mbmi; pc_tree->horizontal[1].skip_txfm[0] = x->skip_txfm[0]; pc_tree->horizontal[1].skip = x->skip; encode_b_rt(cpi, td, tile_info, tp, mi_row + hbs, mi_col, @@ -3474,8 +3575,8 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, int mi_col; // Initialize the left context for the new SB row - vpx_memset(&xd->left_context, 0, sizeof(xd->left_context)); - vpx_memset(xd->left_seg_context, 0, sizeof(xd->left_seg_context)); + memset(&xd->left_context, 0, sizeof(xd->left_context)); + memset(xd->left_seg_context, 0, sizeof(xd->left_seg_context)); // Code each SB in the row for (mi_col = tile_info->mi_col_start; mi_col < tile_info->mi_col_end; @@ -3483,7 +3584,7 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, const struct segmentation *const seg = &cm->seg; RD_COST dummy_rdc; const int idx_str = cm->mi_stride * mi_row + mi_col; - MODE_INFO *mi = cm->mi + idx_str; + MODE_INFO **mi = cm->mi_grid_visible + idx_str; PARTITION_SEARCH_TYPE partition_search_type = sf->partition_search_type; BLOCK_SIZE bsize = BLOCK_64X64; int seg_skip = 0; @@ -3529,7 +3630,7 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, case REFERENCE_PARTITION: set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64); if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled && - xd->mi[0].src_mi->mbmi.segment_id) { + xd->mi[0]->mbmi.segment_id) { x->max_partition_size = BLOCK_64X64; x->min_partition_size = BLOCK_8X8; nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, @@ -3563,13 +3664,13 @@ static int set_var_thresh_from_histogram(VP9_COMP *cpi) { const int cutoff = (MIN(cm->width, cm->height) >= 720) ? (cm->MBs * VAR_HIST_LARGE_CUT_OFF / 100) : (cm->MBs * VAR_HIST_SMALL_CUT_OFF / 100); - DECLARE_ALIGNED_ARRAY(16, int, hist, VAR_HIST_BINS); + DECLARE_ALIGNED(16, int, hist[VAR_HIST_BINS]); diff *var16 = cpi->source_diff_var; int sum = 0; int i, j; - vpx_memset(hist, 0, VAR_HIST_BINS * sizeof(hist[0])); + memset(hist, 0, VAR_HIST_BINS * sizeof(hist[0])); for (i = 0; i < cm->mb_rows; i++) { for (j = 0; j < cm->mb_cols; j++) { @@ -3577,15 +3678,15 @@ static int set_var_thresh_from_histogram(VP9_COMP *cpi) { if (cm->use_highbitdepth) { switch (cm->bit_depth) { case VPX_BITS_8: - vp9_highbd_get16x16var(src, src_stride, last_src, last_stride, + vpx_highbd_8_get16x16var(src, src_stride, last_src, last_stride, &var16->sse, &var16->sum); break; case VPX_BITS_10: - vp9_highbd_10_get16x16var(src, src_stride, last_src, last_stride, + vpx_highbd_10_get16x16var(src, src_stride, last_src, last_stride, &var16->sse, &var16->sum); break; case VPX_BITS_12: - vp9_highbd_12_get16x16var(src, src_stride, last_src, last_stride, + vpx_highbd_12_get16x16var(src, src_stride, last_src, last_stride, &var16->sse, &var16->sum); break; default: @@ -3594,11 +3695,11 @@ static int set_var_thresh_from_histogram(VP9_COMP *cpi) { return -1; } } else { - vp9_get16x16var(src, src_stride, last_src, last_stride, + vpx_get16x16var(src, src_stride, last_src, last_stride, &var16->sse, &var16->sum); } #else - vp9_get16x16var(src, src_stride, last_src, last_stride, + vpx_get16x16var(src, src_stride, last_src, last_stride, &var16->sse, &var16->sum); #endif // CONFIG_VP9_HIGHBITDEPTH var16->var = var16->sse - @@ -3774,8 +3875,8 @@ static void encode_frame_internal(VP9_COMP *cpi) { MACROBLOCKD *const xd = &x->e_mbd; RD_COUNTS *const rdc = &cpi->td.rd_counts; - xd->mi = cm->mi; - xd->mi[0].src_mi = &xd->mi[0]; + xd->mi = cm->mi_grid_visible; + xd->mi[0] = cm->mi; vp9_zero(*td->counts); vp9_zero(rdc->coef_counts); @@ -3839,6 +3940,9 @@ static void encode_frame_internal(VP9_COMP *cpi) { } vp9_zero(x->zcoeff_blk); + if (cm->frame_type != KEY_FRAME && cpi->rc.frames_since_golden == 0) + cpi->ref_frame_flags &= (~VP9_GOLD_FLAG); + if (sf->partition_search_type == SOURCE_VAR_BASED_PARTITION) source_var_based_partition_search_method(cpi); } @@ -4049,8 +4153,8 @@ static void encode_superblock(VP9_COMP *cpi, ThreadData *td, VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &td->mb; MACROBLOCKD *const xd = &x->e_mbd; - MODE_INFO *mi_8x8 = xd->mi; - MODE_INFO *mi = mi_8x8; + MODE_INFO **mi_8x8 = xd->mi; + MODE_INFO *mi = mi_8x8[0]; MB_MODE_INFO *mbmi = &mi->mbmi; const int seg_skip = vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP); @@ -4064,7 +4168,7 @@ static void encode_superblock(VP9_COMP *cpi, ThreadData *td, cpi->sf.allow_skip_recode; if (!x->skip_recode && !cpi->sf.use_nonrd_pick_mode) - vpx_memset(x->skip_txfm, 0, sizeof(x->skip_txfm)); + memset(x->skip_txfm, 0, sizeof(x->skip_txfm)); x->skip_optimize = ctx->is_coded; ctx->is_coded = 1; @@ -4124,7 +4228,7 @@ static void encode_superblock(VP9_COMP *cpi, ThreadData *td, for (y = 0; y < mi_height; y++) for (x = 0; x < mi_width; x++) if (mi_col + x < cm->mi_cols && mi_row + y < cm->mi_rows) - mi_8x8[mis * y + x].src_mi->mbmi.tx_size = tx_size; + mi_8x8[mis * y + x]->mbmi.tx_size = tx_size; } ++td->counts->tx.tx_totals[mbmi->tx_size]; ++td->counts->tx.tx_totals[get_uv_tx_size(mbmi, &xd->plane[1])]; diff --git a/media/libvpx/vp9/encoder/vp9_encodeframe.h b/media/libvpx/vp9/encoder/vp9_encodeframe.h index 8d545b671c6..6aaa56463b0 100644 --- a/media/libvpx/vp9/encoder/vp9_encodeframe.h +++ b/media/libvpx/vp9/encoder/vp9_encodeframe.h @@ -12,6 +12,8 @@ #ifndef VP9_ENCODER_VP9_ENCODEFRAME_H_ #define VP9_ENCODER_VP9_ENCODEFRAME_H_ +#include "vpx/vpx_integer.h" + #ifdef __cplusplus extern "C" { #endif @@ -38,7 +40,7 @@ void vp9_init_tile_data(struct VP9_COMP *cpi); void vp9_encode_tile(struct VP9_COMP *cpi, struct ThreadData *td, int tile_row, int tile_col); -void vp9_set_vbp_thresholds(struct VP9_COMP *cpi, int q); +void vp9_set_variance_partition_thresholds(struct VP9_COMP *cpi, int q); #ifdef __cplusplus } // extern "C" diff --git a/media/libvpx/vp9/encoder/vp9_encodemb.c b/media/libvpx/vp9/encoder/vp9_encodemb.c index 65e2997931b..2829365e533 100644 --- a/media/libvpx/vp9/encoder/vp9_encodemb.c +++ b/media/libvpx/vp9/encoder/vp9_encodemb.c @@ -13,10 +13,12 @@ #include "./vpx_config.h" #include "vpx_mem/vpx_mem.h" +#include "vpx_ports/mem.h" #include "vp9/common/vp9_idct.h" #include "vp9/common/vp9_reconinter.h" #include "vp9/common/vp9_reconintra.h" +#include "vp9/common/vp9_scan.h" #include "vp9/common/vp9_systemdependent.h" #include "vp9/encoder/vp9_encodemb.h" @@ -128,7 +130,7 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block, MACROBLOCKD *const xd = &mb->e_mbd; struct macroblock_plane *const p = &mb->plane[plane]; struct macroblockd_plane *const pd = &xd->plane[plane]; - const int ref = is_inter_block(&xd->mi[0].src_mi->mbmi); + const int ref = is_inter_block(&xd->mi[0]->mbmi); vp9_token_state tokens[1025][2]; unsigned best_index[1025][2]; uint8_t token_cache[1024]; @@ -319,8 +321,8 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block, UPDATE_RD_COST(); best = rd_cost1 < rd_cost0; final_eob = -1; - vpx_memset(qcoeff, 0, sizeof(*qcoeff) * (16 << (tx_size * 2))); - vpx_memset(dqcoeff, 0, sizeof(*dqcoeff) * (16 << (tx_size * 2))); + memset(qcoeff, 0, sizeof(*qcoeff) * (16 << (tx_size * 2))); + memset(dqcoeff, 0, sizeof(*dqcoeff) * (16 << (tx_size * 2))); for (i = next; i < eob; i = next) { const int x = tokens[i][best].qc; const int rc = scan[i]; @@ -773,7 +775,7 @@ void vp9_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize) { void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) { MACROBLOCKD *const xd = &x->e_mbd; struct optimize_ctx ctx; - MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi; + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; struct encode_b_args arg = {x, &ctx, &mbmi->skip}; int plane; @@ -803,7 +805,7 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, struct encode_b_args* const args = arg; MACROBLOCK *const x = args->x; MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi; + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; struct macroblock_plane *const p = &x->plane[plane]; struct macroblockd_plane *const pd = &xd->plane[plane]; tran_low_t *coeff = BLOCK_OFFSET(p->coeff, block); @@ -895,7 +897,7 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, case TX_4X4: tx_type = get_tx_type_4x4(pd->plane_type, xd, block); scan_order = &vp9_scan_orders[TX_4X4][tx_type]; - mode = plane == 0 ? get_y_mode(xd->mi[0].src_mi, block) : mbmi->uv_mode; + mode = plane == 0 ? get_y_mode(xd->mi[0], block) : mbmi->uv_mode; vp9_predict_intra_block(xd, block, bwl, TX_4X4, mode, x->skip_encode ? src : dst, x->skip_encode ? src_stride : dst_stride, @@ -998,7 +1000,7 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, case TX_4X4: tx_type = get_tx_type_4x4(pd->plane_type, xd, block); scan_order = &vp9_scan_orders[TX_4X4][tx_type]; - mode = plane == 0 ? get_y_mode(xd->mi[0].src_mi, block) : mbmi->uv_mode; + mode = plane == 0 ? get_y_mode(xd->mi[0], block) : mbmi->uv_mode; vp9_predict_intra_block(xd, block, bwl, TX_4X4, mode, x->skip_encode ? src : dst, x->skip_encode ? src_stride : dst_stride, @@ -1037,7 +1039,7 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, void vp9_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) { const MACROBLOCKD *const xd = &x->e_mbd; - struct encode_b_args arg = {x, NULL, &xd->mi[0].src_mi->mbmi.skip}; + struct encode_b_args arg = {x, NULL, &xd->mi[0]->mbmi.skip}; vp9_foreach_transformed_block_in_plane(xd, bsize, plane, vp9_encode_block_intra, &arg); diff --git a/media/libvpx/vp9/encoder/vp9_encodemv.c b/media/libvpx/vp9/encoder/vp9_encodemv.c index f2c4efc13e5..22759983ffa 100644 --- a/media/libvpx/vp9/encoder/vp9_encodemv.c +++ b/media/libvpx/vp9/encoder/vp9_encodemv.c @@ -22,7 +22,7 @@ static struct vp9_token mv_class_encodings[MV_CLASSES]; static struct vp9_token mv_fp_encodings[MV_FP_SIZE]; static struct vp9_token mv_class0_encodings[CLASS0_SIZE]; -void vp9_entropy_mv_init() { +void vp9_entropy_mv_init(void) { vp9_tokens_from_tree(mv_joint_encodings, vp9_mv_joint_tree); vp9_tokens_from_tree(mv_class_encodings, vp9_mv_class_tree); vp9_tokens_from_tree(mv_class0_encodings, vp9_mv_class0_tree); @@ -243,7 +243,7 @@ static void inc_mvs(const MB_MODE_INFO *mbmi, const int_mv mvs[2], void vp9_update_mv_count(ThreadData *td) { const MACROBLOCKD *xd = &td->mb.e_mbd; - const MODE_INFO *mi = xd->mi[0].src_mi; + const MODE_INFO *mi = xd->mi[0]; const MB_MODE_INFO *const mbmi = &mi->mbmi; if (mbmi->sb_type < BLOCK_8X8) { diff --git a/media/libvpx/vp9/encoder/vp9_encodemv.h b/media/libvpx/vp9/encoder/vp9_encodemv.h index 0ae473749ab..e8ee5ab6641 100644 --- a/media/libvpx/vp9/encoder/vp9_encodemv.h +++ b/media/libvpx/vp9/encoder/vp9_encodemv.h @@ -18,7 +18,7 @@ extern "C" { #endif -void vp9_entropy_mv_init(); +void vp9_entropy_mv_init(void); void vp9_write_nmv_probs(VP9_COMMON *cm, int usehp, vp9_writer *w, nmv_context_counts *const counts); diff --git a/media/libvpx/vp9/encoder/vp9_encoder.c b/media/libvpx/vp9/encoder/vp9_encoder.c index 43d4034693d..b79bc00d237 100644 --- a/media/libvpx/vp9/encoder/vp9_encoder.c +++ b/media/libvpx/vp9/encoder/vp9_encoder.c @@ -13,8 +13,11 @@ #include #include "./vpx_config.h" +#include "./vp9_rtcd.h" +#include "./vpx_dsp_rtcd.h" #include "./vpx_scale_rtcd.h" #include "vpx/internal/vpx_psnr.h" +#include "vpx_ports/mem.h" #include "vpx_ports/vpx_timer.h" #include "vp9/common/vp9_alloccommon.h" @@ -109,7 +112,7 @@ static INLINE void Scale2Ratio(VPX_SCALING mode, int *hr, int *hs) { // Mark all inactive blocks as active. Other segmentation features may be set // so memset cannot be used, instead only inactive blocks should be reset. -void vp9_suppress_active_map(VP9_COMP *cpi) { +static void suppress_active_map(VP9_COMP *cpi) { unsigned char *const seg_map = cpi->segmentation_map; int i; if (cpi->active_map.enabled || cpi->active_map.update) @@ -118,7 +121,7 @@ void vp9_suppress_active_map(VP9_COMP *cpi) { seg_map[i] = AM_SEGMENT_ID_ACTIVE; } -void vp9_apply_active_map(VP9_COMP *cpi) { +static void apply_active_map(VP9_COMP *cpi) { struct segmentation *const seg = &cpi->common.seg; unsigned char *const seg_map = cpi->segmentation_map; const unsigned char *const active_map = cpi->active_map.map; @@ -126,14 +129,25 @@ void vp9_apply_active_map(VP9_COMP *cpi) { assert(AM_SEGMENT_ID_ACTIVE == CR_SEGMENT_ID_BASE); + if (frame_is_intra_only(&cpi->common)) { + cpi->active_map.enabled = 0; + cpi->active_map.update = 1; + } + if (cpi->active_map.update) { if (cpi->active_map.enabled) { for (i = 0; i < cpi->common.mi_rows * cpi->common.mi_cols; ++i) if (seg_map[i] == AM_SEGMENT_ID_ACTIVE) seg_map[i] = active_map[i]; vp9_enable_segmentation(seg); vp9_enable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_SKIP); + vp9_enable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF); + // Setting the data to -MAX_LOOP_FILTER will result in the computed loop + // filter level being zero regardless of the value of seg->abs_delta. + vp9_set_segdata(seg, AM_SEGMENT_ID_INACTIVE, + SEG_LVL_ALT_LF, -MAX_LOOP_FILTER); } else { vp9_disable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_SKIP); + vp9_disable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF); if (seg->enabled) { seg->update_data = 1; seg->update_map = 1; @@ -172,6 +186,33 @@ int vp9_set_active_map(VP9_COMP* cpi, } } +int vp9_get_active_map(VP9_COMP* cpi, + unsigned char* new_map_16x16, + int rows, + int cols) { + if (rows == cpi->common.mb_rows && cols == cpi->common.mb_cols && + new_map_16x16) { + unsigned char* const seg_map_8x8 = cpi->segmentation_map; + const int mi_rows = cpi->common.mi_rows; + const int mi_cols = cpi->common.mi_cols; + memset(new_map_16x16, !cpi->active_map.enabled, rows * cols); + if (cpi->active_map.enabled) { + int r, c; + for (r = 0; r < mi_rows; ++r) { + for (c = 0; c < mi_cols; ++c) { + // Cyclic refresh segments are considered active despite not having + // AM_SEGMENT_ID_ACTIVE + new_map_16x16[(r >> 1) * cols + (c >> 1)] |= + seg_map_8x8[r * mi_cols + c] != AM_SEGMENT_ID_INACTIVE; + } + } + } + return 0; + } else { + return -1; + } +} + void vp9_set_high_precision_mv(VP9_COMP *cpi, int allow_high_precision_mv) { MACROBLOCK *const mb = &cpi->td.mb; cpi->common.allow_high_precision_mv = allow_high_precision_mv; @@ -212,13 +253,19 @@ static void setup_frame(VP9_COMP *cpi) { static void vp9_enc_setup_mi(VP9_COMMON *cm) { int i; cm->mi = cm->mip + cm->mi_stride + 1; - vpx_memset(cm->mip, 0, cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->mip)); + memset(cm->mip, 0, cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->mip)); cm->prev_mi = cm->prev_mip + cm->mi_stride + 1; // Clear top border row - vpx_memset(cm->prev_mip, 0, sizeof(*cm->prev_mip) * cm->mi_stride); + memset(cm->prev_mip, 0, sizeof(*cm->prev_mip) * cm->mi_stride); // Clear left border column for (i = 1; i < cm->mi_rows + 1; ++i) - vpx_memset(&cm->prev_mip[i * cm->mi_stride], 0, sizeof(*cm->prev_mip)); + memset(&cm->prev_mip[i * cm->mi_stride], 0, sizeof(*cm->prev_mip)); + + cm->mi_grid_visible = cm->mi_grid_base + cm->mi_stride + 1; + cm->prev_mi_grid_visible = cm->prev_mi_grid_base + cm->mi_stride + 1; + + memset(cm->mi_grid_base, 0, + cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->mi_grid_base)); } static int vp9_enc_alloc_mi(VP9_COMMON *cm, int mi_size) { @@ -229,6 +276,14 @@ static int vp9_enc_alloc_mi(VP9_COMMON *cm, int mi_size) { if (!cm->prev_mip) return 1; cm->mi_alloc_size = mi_size; + + cm->mi_grid_base = (MODE_INFO **)vpx_calloc(mi_size, sizeof(MODE_INFO*)); + if (!cm->mi_grid_base) + return 1; + cm->prev_mi_grid_base = (MODE_INFO **)vpx_calloc(mi_size, sizeof(MODE_INFO*)); + if (!cm->prev_mi_grid_base) + return 1; + return 0; } @@ -237,10 +292,15 @@ static void vp9_enc_free_mi(VP9_COMMON *cm) { cm->mip = NULL; vpx_free(cm->prev_mip); cm->prev_mip = NULL; + vpx_free(cm->mi_grid_base); + cm->mi_grid_base = NULL; + vpx_free(cm->prev_mi_grid_base); + cm->prev_mi_grid_base = NULL; } static void vp9_swap_mi_and_prev_mi(VP9_COMMON *cm) { // Current mip will be the prev_mip for the next frame. + MODE_INFO **temp_base = cm->prev_mi_grid_base; MODE_INFO *temp = cm->prev_mip; cm->prev_mip = cm->mip; cm->mip = temp; @@ -248,6 +308,11 @@ static void vp9_swap_mi_and_prev_mi(VP9_COMMON *cm) { // Update the upper left visible macroblock ptrs. cm->mi = cm->mip + cm->mi_stride + 1; cm->prev_mi = cm->prev_mip + cm->mi_stride + 1; + + cm->prev_mi_grid_base = cm->mi_grid_base; + cm->mi_grid_base = temp_base; + cm->mi_grid_visible = cm->mi_grid_base + cm->mi_stride + 1; + cm->prev_mi_grid_visible = cm->prev_mi_grid_base + cm->mi_stride + 1; } void vp9_initialize_enc(void) { @@ -255,6 +320,8 @@ void vp9_initialize_enc(void) { if (!init_done) { vp9_rtcd(); + vpx_dsp_rtcd(); + vpx_scale_rtcd(); vp9_init_intra_predictors(); vp9_init_me_luts(); vp9_rc_init_minq_luts(); @@ -303,7 +370,10 @@ static void dealloc_compressor_data(VP9_COMP *cpi) { vpx_free(cpi->active_map.map); cpi->active_map.map = NULL; - vp9_free_ref_frame_buffers(cm); + vp9_free_ref_frame_buffers(cm->buffer_pool); +#if CONFIG_VP9_POSTPROC + vp9_free_postproc_buffers(cm); +#endif vp9_free_context_buffers(cm); vp9_free_frame_buffer(&cpi->last_frame_uf); @@ -332,11 +402,11 @@ static void dealloc_compressor_data(VP9_COMP *cpi) { for (i = 0; i < MAX_LAG_BUFFERS; ++i) { vp9_free_frame_buffer(&cpi->svc.scaled_frames[i]); } - vpx_memset(&cpi->svc.scaled_frames[0], 0, - MAX_LAG_BUFFERS * sizeof(cpi->svc.scaled_frames[0])); + memset(&cpi->svc.scaled_frames[0], 0, + MAX_LAG_BUFFERS * sizeof(cpi->svc.scaled_frames[0])); vp9_free_frame_buffer(&cpi->svc.empty_frame.img); - vpx_memset(&cpi->svc.empty_frame, 0, sizeof(cpi->svc.empty_frame)); + memset(&cpi->svc.empty_frame, 0, sizeof(cpi->svc.empty_frame)); } static void save_coding_context(VP9_COMP *cpi) { @@ -349,19 +419,19 @@ static void save_coding_context(VP9_COMP *cpi) { // quantizer value is adjusted between loop iterations. vp9_copy(cc->nmvjointcost, cpi->td.mb.nmvjointcost); - vpx_memcpy(cc->nmvcosts[0], cpi->nmvcosts[0], - MV_VALS * sizeof(*cpi->nmvcosts[0])); - vpx_memcpy(cc->nmvcosts[1], cpi->nmvcosts[1], - MV_VALS * sizeof(*cpi->nmvcosts[1])); - vpx_memcpy(cc->nmvcosts_hp[0], cpi->nmvcosts_hp[0], - MV_VALS * sizeof(*cpi->nmvcosts_hp[0])); - vpx_memcpy(cc->nmvcosts_hp[1], cpi->nmvcosts_hp[1], - MV_VALS * sizeof(*cpi->nmvcosts_hp[1])); + memcpy(cc->nmvcosts[0], cpi->nmvcosts[0], + MV_VALS * sizeof(*cpi->nmvcosts[0])); + memcpy(cc->nmvcosts[1], cpi->nmvcosts[1], + MV_VALS * sizeof(*cpi->nmvcosts[1])); + memcpy(cc->nmvcosts_hp[0], cpi->nmvcosts_hp[0], + MV_VALS * sizeof(*cpi->nmvcosts_hp[0])); + memcpy(cc->nmvcosts_hp[1], cpi->nmvcosts_hp[1], + MV_VALS * sizeof(*cpi->nmvcosts_hp[1])); vp9_copy(cc->segment_pred_probs, cm->seg.pred_probs); - vpx_memcpy(cpi->coding_context.last_frame_seg_map_copy, - cm->last_frame_seg_map, (cm->mi_rows * cm->mi_cols)); + memcpy(cpi->coding_context.last_frame_seg_map_copy, + cm->last_frame_seg_map, (cm->mi_rows * cm->mi_cols)); vp9_copy(cc->last_ref_lf_deltas, cm->lf.last_ref_deltas); vp9_copy(cc->last_mode_lf_deltas, cm->lf.last_mode_deltas); @@ -377,20 +447,18 @@ static void restore_coding_context(VP9_COMP *cpi) { // previous call to vp9_save_coding_context. vp9_copy(cpi->td.mb.nmvjointcost, cc->nmvjointcost); - vpx_memcpy(cpi->nmvcosts[0], cc->nmvcosts[0], - MV_VALS * sizeof(*cc->nmvcosts[0])); - vpx_memcpy(cpi->nmvcosts[1], cc->nmvcosts[1], - MV_VALS * sizeof(*cc->nmvcosts[1])); - vpx_memcpy(cpi->nmvcosts_hp[0], cc->nmvcosts_hp[0], - MV_VALS * sizeof(*cc->nmvcosts_hp[0])); - vpx_memcpy(cpi->nmvcosts_hp[1], cc->nmvcosts_hp[1], - MV_VALS * sizeof(*cc->nmvcosts_hp[1])); + memcpy(cpi->nmvcosts[0], cc->nmvcosts[0], MV_VALS * sizeof(*cc->nmvcosts[0])); + memcpy(cpi->nmvcosts[1], cc->nmvcosts[1], MV_VALS * sizeof(*cc->nmvcosts[1])); + memcpy(cpi->nmvcosts_hp[0], cc->nmvcosts_hp[0], + MV_VALS * sizeof(*cc->nmvcosts_hp[0])); + memcpy(cpi->nmvcosts_hp[1], cc->nmvcosts_hp[1], + MV_VALS * sizeof(*cc->nmvcosts_hp[1])); vp9_copy(cm->seg.pred_probs, cc->segment_pred_probs); - vpx_memcpy(cm->last_frame_seg_map, - cpi->coding_context.last_frame_seg_map_copy, - (cm->mi_rows * cm->mi_cols)); + memcpy(cm->last_frame_seg_map, + cpi->coding_context.last_frame_seg_map_copy, + (cm->mi_rows * cm->mi_cols)); vp9_copy(cm->lf.last_ref_deltas, cc->last_ref_lf_deltas); vp9_copy(cm->lf.last_mode_deltas, cc->last_mode_lf_deltas); @@ -409,7 +477,7 @@ static void configure_static_seg_features(VP9_COMP *cpi) { // Disable and clear down for KF if (cm->frame_type == KEY_FRAME) { // Clear down the global segmentation map - vpx_memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols); + memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols); seg->update_map = 0; seg->update_data = 0; cpi->static_mb_pct = 0; @@ -422,7 +490,7 @@ static void configure_static_seg_features(VP9_COMP *cpi) { } else if (cpi->refresh_alt_ref_frame) { // If this is an alt ref frame // Clear down the global segmentation map - vpx_memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols); + memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols); seg->update_map = 0; seg->update_data = 0; cpi->static_mb_pct = 0; @@ -483,7 +551,7 @@ static void configure_static_seg_features(VP9_COMP *cpi) { vp9_disable_segmentation(seg); - vpx_memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols); + memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols); seg->update_map = 0; seg->update_data = 0; @@ -524,15 +592,15 @@ static void configure_static_seg_features(VP9_COMP *cpi) { static void update_reference_segmentation_map(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; - MODE_INFO *mi_8x8_ptr = cm->mi; + MODE_INFO **mi_8x8_ptr = cm->mi_grid_visible; uint8_t *cache_ptr = cm->last_frame_seg_map; int row, col; for (row = 0; row < cm->mi_rows; row++) { - MODE_INFO *mi_8x8 = mi_8x8_ptr; + MODE_INFO **mi_8x8 = mi_8x8_ptr; uint8_t *cache = cache_ptr; for (col = 0; col < cm->mi_cols; col++, mi_8x8++, cache++) - cache[0] = mi_8x8[0].src_mi->mbmi.segment_id; + cache[0] = mi_8x8[0]->mbmi.segment_id; mi_8x8_ptr += cm->mi_stride; cache_ptr += cm->mi_cols; } @@ -618,6 +686,29 @@ void vp9_alloc_compressor_data(VP9_COMP *cpi) { vp9_setup_pc_tree(&cpi->common, &cpi->td); } +void vp9_new_framerate(VP9_COMP *cpi, double framerate) { + cpi->framerate = framerate < 0.1 ? 30 : framerate; + vp9_rc_update_framerate(cpi); +} + +static void set_tile_limits(VP9_COMP *cpi) { + VP9_COMMON *const cm = &cpi->common; + + int min_log2_tile_cols, max_log2_tile_cols; + vp9_get_tile_n_bits(cm->mi_cols, &min_log2_tile_cols, &max_log2_tile_cols); + + if (is_two_pass_svc(cpi) && + (cpi->svc.encode_empty_frame_state == ENCODING || + cpi->svc.number_spatial_layers > 1)) { + cm->log2_tile_cols = 0; + cm->log2_tile_rows = 0; + } else { + cm->log2_tile_cols = clamp(cpi->oxcf.tile_columns, + min_log2_tile_cols, max_log2_tile_cols); + cm->log2_tile_rows = cpi->oxcf.tile_rows; + } +} + static void update_frame_size(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &cpi->td.mb.e_mbd; @@ -626,6 +717,8 @@ static void update_frame_size(VP9_COMP *cpi) { vp9_init_context_buffers(cm); init_macroblockd(cm, xd); + set_tile_limits(cpi); + if (is_two_pass_svc(cpi)) { if (vp9_realloc_frame_buffer(&cpi->alt_ref_buffer, cm->width, cm->height, @@ -640,22 +733,6 @@ static void update_frame_size(VP9_COMP *cpi) { } } -void vp9_new_framerate(VP9_COMP *cpi, double framerate) { - cpi->framerate = framerate < 0.1 ? 30 : framerate; - vp9_rc_update_framerate(cpi); -} - -static void set_tile_limits(VP9_COMP *cpi) { - VP9_COMMON *const cm = &cpi->common; - - int min_log2_tile_cols, max_log2_tile_cols; - vp9_get_tile_n_bits(cm->mi_cols, &min_log2_tile_cols, &max_log2_tile_cols); - - cm->log2_tile_cols = clamp(cpi->oxcf.tile_columns, - min_log2_tile_cols, max_log2_tile_cols); - cm->log2_tile_rows = cpi->oxcf.tile_rows; -} - static void init_buffer_indices(VP9_COMP *cpi) { cpi->lst_fb_idx = 0; cpi->gld_fb_idx = 1; @@ -679,6 +756,8 @@ static void init_config(struct VP9_COMP *cpi, VP9EncoderConfig *oxcf) { cm->height = oxcf->height; vp9_alloc_compressor_data(cpi); + cpi->svc.temporal_layering_mode = oxcf->temporal_layering_mode; + // Single thread case: use counts in common. cpi->td.counts = &cm->counts; @@ -859,61 +938,61 @@ static void fnname##_bits12(const uint8_t *src_ptr, \ sad_array[i] >>= 4; \ } -MAKE_BFP_SAD_WRAPPER(vp9_highbd_sad32x16) -MAKE_BFP_SADAVG_WRAPPER(vp9_highbd_sad32x16_avg) -MAKE_BFP_SAD4D_WRAPPER(vp9_highbd_sad32x16x4d) -MAKE_BFP_SAD_WRAPPER(vp9_highbd_sad16x32) -MAKE_BFP_SADAVG_WRAPPER(vp9_highbd_sad16x32_avg) -MAKE_BFP_SAD4D_WRAPPER(vp9_highbd_sad16x32x4d) -MAKE_BFP_SAD_WRAPPER(vp9_highbd_sad64x32) -MAKE_BFP_SADAVG_WRAPPER(vp9_highbd_sad64x32_avg) -MAKE_BFP_SAD4D_WRAPPER(vp9_highbd_sad64x32x4d) -MAKE_BFP_SAD_WRAPPER(vp9_highbd_sad32x64) -MAKE_BFP_SADAVG_WRAPPER(vp9_highbd_sad32x64_avg) -MAKE_BFP_SAD4D_WRAPPER(vp9_highbd_sad32x64x4d) -MAKE_BFP_SAD_WRAPPER(vp9_highbd_sad32x32) -MAKE_BFP_SADAVG_WRAPPER(vp9_highbd_sad32x32_avg) -MAKE_BFP_SAD3_WRAPPER(vp9_highbd_sad32x32x3) -MAKE_BFP_SAD8_WRAPPER(vp9_highbd_sad32x32x8) -MAKE_BFP_SAD4D_WRAPPER(vp9_highbd_sad32x32x4d) -MAKE_BFP_SAD_WRAPPER(vp9_highbd_sad64x64) -MAKE_BFP_SADAVG_WRAPPER(vp9_highbd_sad64x64_avg) -MAKE_BFP_SAD3_WRAPPER(vp9_highbd_sad64x64x3) -MAKE_BFP_SAD8_WRAPPER(vp9_highbd_sad64x64x8) -MAKE_BFP_SAD4D_WRAPPER(vp9_highbd_sad64x64x4d) -MAKE_BFP_SAD_WRAPPER(vp9_highbd_sad16x16) -MAKE_BFP_SADAVG_WRAPPER(vp9_highbd_sad16x16_avg) -MAKE_BFP_SAD3_WRAPPER(vp9_highbd_sad16x16x3) -MAKE_BFP_SAD8_WRAPPER(vp9_highbd_sad16x16x8) -MAKE_BFP_SAD4D_WRAPPER(vp9_highbd_sad16x16x4d) -MAKE_BFP_SAD_WRAPPER(vp9_highbd_sad16x8) -MAKE_BFP_SADAVG_WRAPPER(vp9_highbd_sad16x8_avg) -MAKE_BFP_SAD3_WRAPPER(vp9_highbd_sad16x8x3) -MAKE_BFP_SAD8_WRAPPER(vp9_highbd_sad16x8x8) -MAKE_BFP_SAD4D_WRAPPER(vp9_highbd_sad16x8x4d) -MAKE_BFP_SAD_WRAPPER(vp9_highbd_sad8x16) -MAKE_BFP_SADAVG_WRAPPER(vp9_highbd_sad8x16_avg) -MAKE_BFP_SAD3_WRAPPER(vp9_highbd_sad8x16x3) -MAKE_BFP_SAD8_WRAPPER(vp9_highbd_sad8x16x8) -MAKE_BFP_SAD4D_WRAPPER(vp9_highbd_sad8x16x4d) -MAKE_BFP_SAD_WRAPPER(vp9_highbd_sad8x8) -MAKE_BFP_SADAVG_WRAPPER(vp9_highbd_sad8x8_avg) -MAKE_BFP_SAD3_WRAPPER(vp9_highbd_sad8x8x3) -MAKE_BFP_SAD8_WRAPPER(vp9_highbd_sad8x8x8) -MAKE_BFP_SAD4D_WRAPPER(vp9_highbd_sad8x8x4d) -MAKE_BFP_SAD_WRAPPER(vp9_highbd_sad8x4) -MAKE_BFP_SADAVG_WRAPPER(vp9_highbd_sad8x4_avg) -MAKE_BFP_SAD8_WRAPPER(vp9_highbd_sad8x4x8) -MAKE_BFP_SAD4D_WRAPPER(vp9_highbd_sad8x4x4d) -MAKE_BFP_SAD_WRAPPER(vp9_highbd_sad4x8) -MAKE_BFP_SADAVG_WRAPPER(vp9_highbd_sad4x8_avg) -MAKE_BFP_SAD8_WRAPPER(vp9_highbd_sad4x8x8) -MAKE_BFP_SAD4D_WRAPPER(vp9_highbd_sad4x8x4d) -MAKE_BFP_SAD_WRAPPER(vp9_highbd_sad4x4) -MAKE_BFP_SADAVG_WRAPPER(vp9_highbd_sad4x4_avg) -MAKE_BFP_SAD3_WRAPPER(vp9_highbd_sad4x4x3) -MAKE_BFP_SAD8_WRAPPER(vp9_highbd_sad4x4x8) -MAKE_BFP_SAD4D_WRAPPER(vp9_highbd_sad4x4x4d) +MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad32x16) +MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad32x16_avg) +MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad32x16x4d) +MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad16x32) +MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad16x32_avg) +MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad16x32x4d) +MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad64x32) +MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad64x32_avg) +MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad64x32x4d) +MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad32x64) +MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad32x64_avg) +MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad32x64x4d) +MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad32x32) +MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad32x32_avg) +MAKE_BFP_SAD3_WRAPPER(vpx_highbd_sad32x32x3) +MAKE_BFP_SAD8_WRAPPER(vpx_highbd_sad32x32x8) +MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad32x32x4d) +MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad64x64) +MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad64x64_avg) +MAKE_BFP_SAD3_WRAPPER(vpx_highbd_sad64x64x3) +MAKE_BFP_SAD8_WRAPPER(vpx_highbd_sad64x64x8) +MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad64x64x4d) +MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad16x16) +MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad16x16_avg) +MAKE_BFP_SAD3_WRAPPER(vpx_highbd_sad16x16x3) +MAKE_BFP_SAD8_WRAPPER(vpx_highbd_sad16x16x8) +MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad16x16x4d) +MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad16x8) +MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad16x8_avg) +MAKE_BFP_SAD3_WRAPPER(vpx_highbd_sad16x8x3) +MAKE_BFP_SAD8_WRAPPER(vpx_highbd_sad16x8x8) +MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad16x8x4d) +MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad8x16) +MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad8x16_avg) +MAKE_BFP_SAD3_WRAPPER(vpx_highbd_sad8x16x3) +MAKE_BFP_SAD8_WRAPPER(vpx_highbd_sad8x16x8) +MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad8x16x4d) +MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad8x8) +MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad8x8_avg) +MAKE_BFP_SAD3_WRAPPER(vpx_highbd_sad8x8x3) +MAKE_BFP_SAD8_WRAPPER(vpx_highbd_sad8x8x8) +MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad8x8x4d) +MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad8x4) +MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad8x4_avg) +MAKE_BFP_SAD8_WRAPPER(vpx_highbd_sad8x4x8) +MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad8x4x4d) +MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad4x8) +MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad4x8_avg) +MAKE_BFP_SAD8_WRAPPER(vpx_highbd_sad4x8x8) +MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad4x8x4d) +MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad4x4) +MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad4x4_avg) +MAKE_BFP_SAD3_WRAPPER(vpx_highbd_sad4x4x3) +MAKE_BFP_SAD8_WRAPPER(vpx_highbd_sad4x4x8) +MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad4x4x4d) static void highbd_set_var_fns(VP9_COMP *const cpi) { VP9_COMMON *const cm = &cpi->common; @@ -921,398 +1000,398 @@ static void highbd_set_var_fns(VP9_COMP *const cpi) { switch (cm->bit_depth) { case VPX_BITS_8: HIGHBD_BFP(BLOCK_32X16, - vp9_highbd_sad32x16_bits8, - vp9_highbd_sad32x16_avg_bits8, - vp9_highbd_variance32x16, + vpx_highbd_sad32x16_bits8, + vpx_highbd_sad32x16_avg_bits8, + vpx_highbd_8_variance32x16, vp9_highbd_sub_pixel_variance32x16, vp9_highbd_sub_pixel_avg_variance32x16, NULL, NULL, - vp9_highbd_sad32x16x4d_bits8) + vpx_highbd_sad32x16x4d_bits8) HIGHBD_BFP(BLOCK_16X32, - vp9_highbd_sad16x32_bits8, - vp9_highbd_sad16x32_avg_bits8, - vp9_highbd_variance16x32, + vpx_highbd_sad16x32_bits8, + vpx_highbd_sad16x32_avg_bits8, + vpx_highbd_8_variance16x32, vp9_highbd_sub_pixel_variance16x32, vp9_highbd_sub_pixel_avg_variance16x32, NULL, NULL, - vp9_highbd_sad16x32x4d_bits8) + vpx_highbd_sad16x32x4d_bits8) HIGHBD_BFP(BLOCK_64X32, - vp9_highbd_sad64x32_bits8, - vp9_highbd_sad64x32_avg_bits8, - vp9_highbd_variance64x32, + vpx_highbd_sad64x32_bits8, + vpx_highbd_sad64x32_avg_bits8, + vpx_highbd_8_variance64x32, vp9_highbd_sub_pixel_variance64x32, vp9_highbd_sub_pixel_avg_variance64x32, NULL, NULL, - vp9_highbd_sad64x32x4d_bits8) + vpx_highbd_sad64x32x4d_bits8) HIGHBD_BFP(BLOCK_32X64, - vp9_highbd_sad32x64_bits8, - vp9_highbd_sad32x64_avg_bits8, - vp9_highbd_variance32x64, + vpx_highbd_sad32x64_bits8, + vpx_highbd_sad32x64_avg_bits8, + vpx_highbd_8_variance32x64, vp9_highbd_sub_pixel_variance32x64, vp9_highbd_sub_pixel_avg_variance32x64, NULL, NULL, - vp9_highbd_sad32x64x4d_bits8) + vpx_highbd_sad32x64x4d_bits8) HIGHBD_BFP(BLOCK_32X32, - vp9_highbd_sad32x32_bits8, - vp9_highbd_sad32x32_avg_bits8, - vp9_highbd_variance32x32, + vpx_highbd_sad32x32_bits8, + vpx_highbd_sad32x32_avg_bits8, + vpx_highbd_8_variance32x32, vp9_highbd_sub_pixel_variance32x32, vp9_highbd_sub_pixel_avg_variance32x32, - vp9_highbd_sad32x32x3_bits8, - vp9_highbd_sad32x32x8_bits8, - vp9_highbd_sad32x32x4d_bits8) + vpx_highbd_sad32x32x3_bits8, + vpx_highbd_sad32x32x8_bits8, + vpx_highbd_sad32x32x4d_bits8) HIGHBD_BFP(BLOCK_64X64, - vp9_highbd_sad64x64_bits8, - vp9_highbd_sad64x64_avg_bits8, - vp9_highbd_variance64x64, + vpx_highbd_sad64x64_bits8, + vpx_highbd_sad64x64_avg_bits8, + vpx_highbd_8_variance64x64, vp9_highbd_sub_pixel_variance64x64, vp9_highbd_sub_pixel_avg_variance64x64, - vp9_highbd_sad64x64x3_bits8, - vp9_highbd_sad64x64x8_bits8, - vp9_highbd_sad64x64x4d_bits8) + vpx_highbd_sad64x64x3_bits8, + vpx_highbd_sad64x64x8_bits8, + vpx_highbd_sad64x64x4d_bits8) HIGHBD_BFP(BLOCK_16X16, - vp9_highbd_sad16x16_bits8, - vp9_highbd_sad16x16_avg_bits8, - vp9_highbd_variance16x16, + vpx_highbd_sad16x16_bits8, + vpx_highbd_sad16x16_avg_bits8, + vpx_highbd_8_variance16x16, vp9_highbd_sub_pixel_variance16x16, vp9_highbd_sub_pixel_avg_variance16x16, - vp9_highbd_sad16x16x3_bits8, - vp9_highbd_sad16x16x8_bits8, - vp9_highbd_sad16x16x4d_bits8) + vpx_highbd_sad16x16x3_bits8, + vpx_highbd_sad16x16x8_bits8, + vpx_highbd_sad16x16x4d_bits8) HIGHBD_BFP(BLOCK_16X8, - vp9_highbd_sad16x8_bits8, - vp9_highbd_sad16x8_avg_bits8, - vp9_highbd_variance16x8, + vpx_highbd_sad16x8_bits8, + vpx_highbd_sad16x8_avg_bits8, + vpx_highbd_8_variance16x8, vp9_highbd_sub_pixel_variance16x8, vp9_highbd_sub_pixel_avg_variance16x8, - vp9_highbd_sad16x8x3_bits8, - vp9_highbd_sad16x8x8_bits8, - vp9_highbd_sad16x8x4d_bits8) + vpx_highbd_sad16x8x3_bits8, + vpx_highbd_sad16x8x8_bits8, + vpx_highbd_sad16x8x4d_bits8) HIGHBD_BFP(BLOCK_8X16, - vp9_highbd_sad8x16_bits8, - vp9_highbd_sad8x16_avg_bits8, - vp9_highbd_variance8x16, + vpx_highbd_sad8x16_bits8, + vpx_highbd_sad8x16_avg_bits8, + vpx_highbd_8_variance8x16, vp9_highbd_sub_pixel_variance8x16, vp9_highbd_sub_pixel_avg_variance8x16, - vp9_highbd_sad8x16x3_bits8, - vp9_highbd_sad8x16x8_bits8, - vp9_highbd_sad8x16x4d_bits8) + vpx_highbd_sad8x16x3_bits8, + vpx_highbd_sad8x16x8_bits8, + vpx_highbd_sad8x16x4d_bits8) HIGHBD_BFP(BLOCK_8X8, - vp9_highbd_sad8x8_bits8, - vp9_highbd_sad8x8_avg_bits8, - vp9_highbd_variance8x8, + vpx_highbd_sad8x8_bits8, + vpx_highbd_sad8x8_avg_bits8, + vpx_highbd_8_variance8x8, vp9_highbd_sub_pixel_variance8x8, vp9_highbd_sub_pixel_avg_variance8x8, - vp9_highbd_sad8x8x3_bits8, - vp9_highbd_sad8x8x8_bits8, - vp9_highbd_sad8x8x4d_bits8) + vpx_highbd_sad8x8x3_bits8, + vpx_highbd_sad8x8x8_bits8, + vpx_highbd_sad8x8x4d_bits8) HIGHBD_BFP(BLOCK_8X4, - vp9_highbd_sad8x4_bits8, - vp9_highbd_sad8x4_avg_bits8, - vp9_highbd_variance8x4, + vpx_highbd_sad8x4_bits8, + vpx_highbd_sad8x4_avg_bits8, + vpx_highbd_8_variance8x4, vp9_highbd_sub_pixel_variance8x4, vp9_highbd_sub_pixel_avg_variance8x4, NULL, - vp9_highbd_sad8x4x8_bits8, - vp9_highbd_sad8x4x4d_bits8) + vpx_highbd_sad8x4x8_bits8, + vpx_highbd_sad8x4x4d_bits8) HIGHBD_BFP(BLOCK_4X8, - vp9_highbd_sad4x8_bits8, - vp9_highbd_sad4x8_avg_bits8, - vp9_highbd_variance4x8, + vpx_highbd_sad4x8_bits8, + vpx_highbd_sad4x8_avg_bits8, + vpx_highbd_8_variance4x8, vp9_highbd_sub_pixel_variance4x8, vp9_highbd_sub_pixel_avg_variance4x8, NULL, - vp9_highbd_sad4x8x8_bits8, - vp9_highbd_sad4x8x4d_bits8) + vpx_highbd_sad4x8x8_bits8, + vpx_highbd_sad4x8x4d_bits8) HIGHBD_BFP(BLOCK_4X4, - vp9_highbd_sad4x4_bits8, - vp9_highbd_sad4x4_avg_bits8, - vp9_highbd_variance4x4, + vpx_highbd_sad4x4_bits8, + vpx_highbd_sad4x4_avg_bits8, + vpx_highbd_8_variance4x4, vp9_highbd_sub_pixel_variance4x4, vp9_highbd_sub_pixel_avg_variance4x4, - vp9_highbd_sad4x4x3_bits8, - vp9_highbd_sad4x4x8_bits8, - vp9_highbd_sad4x4x4d_bits8) + vpx_highbd_sad4x4x3_bits8, + vpx_highbd_sad4x4x8_bits8, + vpx_highbd_sad4x4x4d_bits8) break; case VPX_BITS_10: HIGHBD_BFP(BLOCK_32X16, - vp9_highbd_sad32x16_bits10, - vp9_highbd_sad32x16_avg_bits10, - vp9_highbd_10_variance32x16, + vpx_highbd_sad32x16_bits10, + vpx_highbd_sad32x16_avg_bits10, + vpx_highbd_10_variance32x16, vp9_highbd_10_sub_pixel_variance32x16, vp9_highbd_10_sub_pixel_avg_variance32x16, NULL, NULL, - vp9_highbd_sad32x16x4d_bits10) + vpx_highbd_sad32x16x4d_bits10) HIGHBD_BFP(BLOCK_16X32, - vp9_highbd_sad16x32_bits10, - vp9_highbd_sad16x32_avg_bits10, - vp9_highbd_10_variance16x32, + vpx_highbd_sad16x32_bits10, + vpx_highbd_sad16x32_avg_bits10, + vpx_highbd_10_variance16x32, vp9_highbd_10_sub_pixel_variance16x32, vp9_highbd_10_sub_pixel_avg_variance16x32, NULL, NULL, - vp9_highbd_sad16x32x4d_bits10) + vpx_highbd_sad16x32x4d_bits10) HIGHBD_BFP(BLOCK_64X32, - vp9_highbd_sad64x32_bits10, - vp9_highbd_sad64x32_avg_bits10, - vp9_highbd_10_variance64x32, + vpx_highbd_sad64x32_bits10, + vpx_highbd_sad64x32_avg_bits10, + vpx_highbd_10_variance64x32, vp9_highbd_10_sub_pixel_variance64x32, vp9_highbd_10_sub_pixel_avg_variance64x32, NULL, NULL, - vp9_highbd_sad64x32x4d_bits10) + vpx_highbd_sad64x32x4d_bits10) HIGHBD_BFP(BLOCK_32X64, - vp9_highbd_sad32x64_bits10, - vp9_highbd_sad32x64_avg_bits10, - vp9_highbd_10_variance32x64, + vpx_highbd_sad32x64_bits10, + vpx_highbd_sad32x64_avg_bits10, + vpx_highbd_10_variance32x64, vp9_highbd_10_sub_pixel_variance32x64, vp9_highbd_10_sub_pixel_avg_variance32x64, NULL, NULL, - vp9_highbd_sad32x64x4d_bits10) + vpx_highbd_sad32x64x4d_bits10) HIGHBD_BFP(BLOCK_32X32, - vp9_highbd_sad32x32_bits10, - vp9_highbd_sad32x32_avg_bits10, - vp9_highbd_10_variance32x32, + vpx_highbd_sad32x32_bits10, + vpx_highbd_sad32x32_avg_bits10, + vpx_highbd_10_variance32x32, vp9_highbd_10_sub_pixel_variance32x32, vp9_highbd_10_sub_pixel_avg_variance32x32, - vp9_highbd_sad32x32x3_bits10, - vp9_highbd_sad32x32x8_bits10, - vp9_highbd_sad32x32x4d_bits10) + vpx_highbd_sad32x32x3_bits10, + vpx_highbd_sad32x32x8_bits10, + vpx_highbd_sad32x32x4d_bits10) HIGHBD_BFP(BLOCK_64X64, - vp9_highbd_sad64x64_bits10, - vp9_highbd_sad64x64_avg_bits10, - vp9_highbd_10_variance64x64, + vpx_highbd_sad64x64_bits10, + vpx_highbd_sad64x64_avg_bits10, + vpx_highbd_10_variance64x64, vp9_highbd_10_sub_pixel_variance64x64, vp9_highbd_10_sub_pixel_avg_variance64x64, - vp9_highbd_sad64x64x3_bits10, - vp9_highbd_sad64x64x8_bits10, - vp9_highbd_sad64x64x4d_bits10) + vpx_highbd_sad64x64x3_bits10, + vpx_highbd_sad64x64x8_bits10, + vpx_highbd_sad64x64x4d_bits10) HIGHBD_BFP(BLOCK_16X16, - vp9_highbd_sad16x16_bits10, - vp9_highbd_sad16x16_avg_bits10, - vp9_highbd_10_variance16x16, + vpx_highbd_sad16x16_bits10, + vpx_highbd_sad16x16_avg_bits10, + vpx_highbd_10_variance16x16, vp9_highbd_10_sub_pixel_variance16x16, vp9_highbd_10_sub_pixel_avg_variance16x16, - vp9_highbd_sad16x16x3_bits10, - vp9_highbd_sad16x16x8_bits10, - vp9_highbd_sad16x16x4d_bits10) + vpx_highbd_sad16x16x3_bits10, + vpx_highbd_sad16x16x8_bits10, + vpx_highbd_sad16x16x4d_bits10) HIGHBD_BFP(BLOCK_16X8, - vp9_highbd_sad16x8_bits10, - vp9_highbd_sad16x8_avg_bits10, - vp9_highbd_10_variance16x8, + vpx_highbd_sad16x8_bits10, + vpx_highbd_sad16x8_avg_bits10, + vpx_highbd_10_variance16x8, vp9_highbd_10_sub_pixel_variance16x8, vp9_highbd_10_sub_pixel_avg_variance16x8, - vp9_highbd_sad16x8x3_bits10, - vp9_highbd_sad16x8x8_bits10, - vp9_highbd_sad16x8x4d_bits10) + vpx_highbd_sad16x8x3_bits10, + vpx_highbd_sad16x8x8_bits10, + vpx_highbd_sad16x8x4d_bits10) HIGHBD_BFP(BLOCK_8X16, - vp9_highbd_sad8x16_bits10, - vp9_highbd_sad8x16_avg_bits10, - vp9_highbd_10_variance8x16, + vpx_highbd_sad8x16_bits10, + vpx_highbd_sad8x16_avg_bits10, + vpx_highbd_10_variance8x16, vp9_highbd_10_sub_pixel_variance8x16, vp9_highbd_10_sub_pixel_avg_variance8x16, - vp9_highbd_sad8x16x3_bits10, - vp9_highbd_sad8x16x8_bits10, - vp9_highbd_sad8x16x4d_bits10) + vpx_highbd_sad8x16x3_bits10, + vpx_highbd_sad8x16x8_bits10, + vpx_highbd_sad8x16x4d_bits10) HIGHBD_BFP(BLOCK_8X8, - vp9_highbd_sad8x8_bits10, - vp9_highbd_sad8x8_avg_bits10, - vp9_highbd_10_variance8x8, + vpx_highbd_sad8x8_bits10, + vpx_highbd_sad8x8_avg_bits10, + vpx_highbd_10_variance8x8, vp9_highbd_10_sub_pixel_variance8x8, vp9_highbd_10_sub_pixel_avg_variance8x8, - vp9_highbd_sad8x8x3_bits10, - vp9_highbd_sad8x8x8_bits10, - vp9_highbd_sad8x8x4d_bits10) + vpx_highbd_sad8x8x3_bits10, + vpx_highbd_sad8x8x8_bits10, + vpx_highbd_sad8x8x4d_bits10) HIGHBD_BFP(BLOCK_8X4, - vp9_highbd_sad8x4_bits10, - vp9_highbd_sad8x4_avg_bits10, - vp9_highbd_10_variance8x4, + vpx_highbd_sad8x4_bits10, + vpx_highbd_sad8x4_avg_bits10, + vpx_highbd_10_variance8x4, vp9_highbd_10_sub_pixel_variance8x4, vp9_highbd_10_sub_pixel_avg_variance8x4, NULL, - vp9_highbd_sad8x4x8_bits10, - vp9_highbd_sad8x4x4d_bits10) + vpx_highbd_sad8x4x8_bits10, + vpx_highbd_sad8x4x4d_bits10) HIGHBD_BFP(BLOCK_4X8, - vp9_highbd_sad4x8_bits10, - vp9_highbd_sad4x8_avg_bits10, - vp9_highbd_10_variance4x8, + vpx_highbd_sad4x8_bits10, + vpx_highbd_sad4x8_avg_bits10, + vpx_highbd_10_variance4x8, vp9_highbd_10_sub_pixel_variance4x8, vp9_highbd_10_sub_pixel_avg_variance4x8, NULL, - vp9_highbd_sad4x8x8_bits10, - vp9_highbd_sad4x8x4d_bits10) + vpx_highbd_sad4x8x8_bits10, + vpx_highbd_sad4x8x4d_bits10) HIGHBD_BFP(BLOCK_4X4, - vp9_highbd_sad4x4_bits10, - vp9_highbd_sad4x4_avg_bits10, - vp9_highbd_10_variance4x4, + vpx_highbd_sad4x4_bits10, + vpx_highbd_sad4x4_avg_bits10, + vpx_highbd_10_variance4x4, vp9_highbd_10_sub_pixel_variance4x4, vp9_highbd_10_sub_pixel_avg_variance4x4, - vp9_highbd_sad4x4x3_bits10, - vp9_highbd_sad4x4x8_bits10, - vp9_highbd_sad4x4x4d_bits10) + vpx_highbd_sad4x4x3_bits10, + vpx_highbd_sad4x4x8_bits10, + vpx_highbd_sad4x4x4d_bits10) break; case VPX_BITS_12: HIGHBD_BFP(BLOCK_32X16, - vp9_highbd_sad32x16_bits12, - vp9_highbd_sad32x16_avg_bits12, - vp9_highbd_12_variance32x16, + vpx_highbd_sad32x16_bits12, + vpx_highbd_sad32x16_avg_bits12, + vpx_highbd_12_variance32x16, vp9_highbd_12_sub_pixel_variance32x16, vp9_highbd_12_sub_pixel_avg_variance32x16, NULL, NULL, - vp9_highbd_sad32x16x4d_bits12) + vpx_highbd_sad32x16x4d_bits12) HIGHBD_BFP(BLOCK_16X32, - vp9_highbd_sad16x32_bits12, - vp9_highbd_sad16x32_avg_bits12, - vp9_highbd_12_variance16x32, + vpx_highbd_sad16x32_bits12, + vpx_highbd_sad16x32_avg_bits12, + vpx_highbd_12_variance16x32, vp9_highbd_12_sub_pixel_variance16x32, vp9_highbd_12_sub_pixel_avg_variance16x32, NULL, NULL, - vp9_highbd_sad16x32x4d_bits12) + vpx_highbd_sad16x32x4d_bits12) HIGHBD_BFP(BLOCK_64X32, - vp9_highbd_sad64x32_bits12, - vp9_highbd_sad64x32_avg_bits12, - vp9_highbd_12_variance64x32, + vpx_highbd_sad64x32_bits12, + vpx_highbd_sad64x32_avg_bits12, + vpx_highbd_12_variance64x32, vp9_highbd_12_sub_pixel_variance64x32, vp9_highbd_12_sub_pixel_avg_variance64x32, NULL, NULL, - vp9_highbd_sad64x32x4d_bits12) + vpx_highbd_sad64x32x4d_bits12) HIGHBD_BFP(BLOCK_32X64, - vp9_highbd_sad32x64_bits12, - vp9_highbd_sad32x64_avg_bits12, - vp9_highbd_12_variance32x64, + vpx_highbd_sad32x64_bits12, + vpx_highbd_sad32x64_avg_bits12, + vpx_highbd_12_variance32x64, vp9_highbd_12_sub_pixel_variance32x64, vp9_highbd_12_sub_pixel_avg_variance32x64, NULL, NULL, - vp9_highbd_sad32x64x4d_bits12) + vpx_highbd_sad32x64x4d_bits12) HIGHBD_BFP(BLOCK_32X32, - vp9_highbd_sad32x32_bits12, - vp9_highbd_sad32x32_avg_bits12, - vp9_highbd_12_variance32x32, + vpx_highbd_sad32x32_bits12, + vpx_highbd_sad32x32_avg_bits12, + vpx_highbd_12_variance32x32, vp9_highbd_12_sub_pixel_variance32x32, vp9_highbd_12_sub_pixel_avg_variance32x32, - vp9_highbd_sad32x32x3_bits12, - vp9_highbd_sad32x32x8_bits12, - vp9_highbd_sad32x32x4d_bits12) + vpx_highbd_sad32x32x3_bits12, + vpx_highbd_sad32x32x8_bits12, + vpx_highbd_sad32x32x4d_bits12) HIGHBD_BFP(BLOCK_64X64, - vp9_highbd_sad64x64_bits12, - vp9_highbd_sad64x64_avg_bits12, - vp9_highbd_12_variance64x64, + vpx_highbd_sad64x64_bits12, + vpx_highbd_sad64x64_avg_bits12, + vpx_highbd_12_variance64x64, vp9_highbd_12_sub_pixel_variance64x64, vp9_highbd_12_sub_pixel_avg_variance64x64, - vp9_highbd_sad64x64x3_bits12, - vp9_highbd_sad64x64x8_bits12, - vp9_highbd_sad64x64x4d_bits12) + vpx_highbd_sad64x64x3_bits12, + vpx_highbd_sad64x64x8_bits12, + vpx_highbd_sad64x64x4d_bits12) HIGHBD_BFP(BLOCK_16X16, - vp9_highbd_sad16x16_bits12, - vp9_highbd_sad16x16_avg_bits12, - vp9_highbd_12_variance16x16, + vpx_highbd_sad16x16_bits12, + vpx_highbd_sad16x16_avg_bits12, + vpx_highbd_12_variance16x16, vp9_highbd_12_sub_pixel_variance16x16, vp9_highbd_12_sub_pixel_avg_variance16x16, - vp9_highbd_sad16x16x3_bits12, - vp9_highbd_sad16x16x8_bits12, - vp9_highbd_sad16x16x4d_bits12) + vpx_highbd_sad16x16x3_bits12, + vpx_highbd_sad16x16x8_bits12, + vpx_highbd_sad16x16x4d_bits12) HIGHBD_BFP(BLOCK_16X8, - vp9_highbd_sad16x8_bits12, - vp9_highbd_sad16x8_avg_bits12, - vp9_highbd_12_variance16x8, + vpx_highbd_sad16x8_bits12, + vpx_highbd_sad16x8_avg_bits12, + vpx_highbd_12_variance16x8, vp9_highbd_12_sub_pixel_variance16x8, vp9_highbd_12_sub_pixel_avg_variance16x8, - vp9_highbd_sad16x8x3_bits12, - vp9_highbd_sad16x8x8_bits12, - vp9_highbd_sad16x8x4d_bits12) + vpx_highbd_sad16x8x3_bits12, + vpx_highbd_sad16x8x8_bits12, + vpx_highbd_sad16x8x4d_bits12) HIGHBD_BFP(BLOCK_8X16, - vp9_highbd_sad8x16_bits12, - vp9_highbd_sad8x16_avg_bits12, - vp9_highbd_12_variance8x16, + vpx_highbd_sad8x16_bits12, + vpx_highbd_sad8x16_avg_bits12, + vpx_highbd_12_variance8x16, vp9_highbd_12_sub_pixel_variance8x16, vp9_highbd_12_sub_pixel_avg_variance8x16, - vp9_highbd_sad8x16x3_bits12, - vp9_highbd_sad8x16x8_bits12, - vp9_highbd_sad8x16x4d_bits12) + vpx_highbd_sad8x16x3_bits12, + vpx_highbd_sad8x16x8_bits12, + vpx_highbd_sad8x16x4d_bits12) HIGHBD_BFP(BLOCK_8X8, - vp9_highbd_sad8x8_bits12, - vp9_highbd_sad8x8_avg_bits12, - vp9_highbd_12_variance8x8, + vpx_highbd_sad8x8_bits12, + vpx_highbd_sad8x8_avg_bits12, + vpx_highbd_12_variance8x8, vp9_highbd_12_sub_pixel_variance8x8, vp9_highbd_12_sub_pixel_avg_variance8x8, - vp9_highbd_sad8x8x3_bits12, - vp9_highbd_sad8x8x8_bits12, - vp9_highbd_sad8x8x4d_bits12) + vpx_highbd_sad8x8x3_bits12, + vpx_highbd_sad8x8x8_bits12, + vpx_highbd_sad8x8x4d_bits12) HIGHBD_BFP(BLOCK_8X4, - vp9_highbd_sad8x4_bits12, - vp9_highbd_sad8x4_avg_bits12, - vp9_highbd_12_variance8x4, + vpx_highbd_sad8x4_bits12, + vpx_highbd_sad8x4_avg_bits12, + vpx_highbd_12_variance8x4, vp9_highbd_12_sub_pixel_variance8x4, vp9_highbd_12_sub_pixel_avg_variance8x4, NULL, - vp9_highbd_sad8x4x8_bits12, - vp9_highbd_sad8x4x4d_bits12) + vpx_highbd_sad8x4x8_bits12, + vpx_highbd_sad8x4x4d_bits12) HIGHBD_BFP(BLOCK_4X8, - vp9_highbd_sad4x8_bits12, - vp9_highbd_sad4x8_avg_bits12, - vp9_highbd_12_variance4x8, + vpx_highbd_sad4x8_bits12, + vpx_highbd_sad4x8_avg_bits12, + vpx_highbd_12_variance4x8, vp9_highbd_12_sub_pixel_variance4x8, vp9_highbd_12_sub_pixel_avg_variance4x8, NULL, - vp9_highbd_sad4x8x8_bits12, - vp9_highbd_sad4x8x4d_bits12) + vpx_highbd_sad4x8x8_bits12, + vpx_highbd_sad4x8x4d_bits12) HIGHBD_BFP(BLOCK_4X4, - vp9_highbd_sad4x4_bits12, - vp9_highbd_sad4x4_avg_bits12, - vp9_highbd_12_variance4x4, + vpx_highbd_sad4x4_bits12, + vpx_highbd_sad4x4_avg_bits12, + vpx_highbd_12_variance4x4, vp9_highbd_12_sub_pixel_variance4x4, vp9_highbd_12_sub_pixel_avg_variance4x4, - vp9_highbd_sad4x4x3_bits12, - vp9_highbd_sad4x4x8_bits12, - vp9_highbd_sad4x4x4d_bits12) + vpx_highbd_sad4x4x3_bits12, + vpx_highbd_sad4x4x8_bits12, + vpx_highbd_sad4x4x4d_bits12) break; default: @@ -1569,22 +1648,19 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf, cpi->b_calculate_psnr = CONFIG_INTERNAL_STATS; #if CONFIG_INTERNAL_STATS cpi->b_calculate_ssimg = 0; + cpi->b_calculate_blockiness = 1; + cpi->b_calculate_consistency = 1; + cpi->total_inconsistency = 0; + cpi->psnr.worst = 100.0; + cpi->worst_ssim = 100.0; cpi->count = 0; cpi->bytes = 0; if (cpi->b_calculate_psnr) { - cpi->total_y = 0.0; - cpi->total_u = 0.0; - cpi->total_v = 0.0; - cpi->total = 0.0; cpi->total_sq_error = 0; cpi->total_samples = 0; - cpi->totalp_y = 0.0; - cpi->totalp_u = 0.0; - cpi->totalp_v = 0.0; - cpi->totalp = 0.0; cpi->totalp_sq_error = 0; cpi->totalp_samples = 0; @@ -1596,10 +1672,20 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf, } if (cpi->b_calculate_ssimg) { - cpi->total_ssimg_y = 0; - cpi->total_ssimg_u = 0; - cpi->total_ssimg_v = 0; - cpi->total_ssimg_all = 0; + cpi->ssimg.worst= 100.0; + } + cpi->fastssim.worst = 100.0; + + cpi->psnrhvs.worst = 100.0; + + if (cpi->b_calculate_blockiness) { + cpi->total_blockiness = 0; + cpi->worst_blockiness = 0.0; + } + + if (cpi->b_calculate_consistency) { + cpi->ssim_vars = vpx_malloc(sizeof(*cpi->ssim_vars)*720*480); + cpi->worst_consistency = 100.0; } #endif @@ -1722,64 +1808,64 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf, cpi->fn_ptr[BT].sdx8f = SDX8F; \ cpi->fn_ptr[BT].sdx4df = SDX4DF; - BFP(BLOCK_32X16, vp9_sad32x16, vp9_sad32x16_avg, - vp9_variance32x16, vp9_sub_pixel_variance32x16, - vp9_sub_pixel_avg_variance32x16, NULL, NULL, vp9_sad32x16x4d) + BFP(BLOCK_32X16, vpx_sad32x16, vpx_sad32x16_avg, + vpx_variance32x16, vp9_sub_pixel_variance32x16, + vp9_sub_pixel_avg_variance32x16, NULL, NULL, vpx_sad32x16x4d) - BFP(BLOCK_16X32, vp9_sad16x32, vp9_sad16x32_avg, - vp9_variance16x32, vp9_sub_pixel_variance16x32, - vp9_sub_pixel_avg_variance16x32, NULL, NULL, vp9_sad16x32x4d) + BFP(BLOCK_16X32, vpx_sad16x32, vpx_sad16x32_avg, + vpx_variance16x32, vp9_sub_pixel_variance16x32, + vp9_sub_pixel_avg_variance16x32, NULL, NULL, vpx_sad16x32x4d) - BFP(BLOCK_64X32, vp9_sad64x32, vp9_sad64x32_avg, - vp9_variance64x32, vp9_sub_pixel_variance64x32, - vp9_sub_pixel_avg_variance64x32, NULL, NULL, vp9_sad64x32x4d) + BFP(BLOCK_64X32, vpx_sad64x32, vpx_sad64x32_avg, + vpx_variance64x32, vp9_sub_pixel_variance64x32, + vp9_sub_pixel_avg_variance64x32, NULL, NULL, vpx_sad64x32x4d) - BFP(BLOCK_32X64, vp9_sad32x64, vp9_sad32x64_avg, - vp9_variance32x64, vp9_sub_pixel_variance32x64, - vp9_sub_pixel_avg_variance32x64, NULL, NULL, vp9_sad32x64x4d) + BFP(BLOCK_32X64, vpx_sad32x64, vpx_sad32x64_avg, + vpx_variance32x64, vp9_sub_pixel_variance32x64, + vp9_sub_pixel_avg_variance32x64, NULL, NULL, vpx_sad32x64x4d) - BFP(BLOCK_32X32, vp9_sad32x32, vp9_sad32x32_avg, - vp9_variance32x32, vp9_sub_pixel_variance32x32, - vp9_sub_pixel_avg_variance32x32, vp9_sad32x32x3, vp9_sad32x32x8, - vp9_sad32x32x4d) + BFP(BLOCK_32X32, vpx_sad32x32, vpx_sad32x32_avg, + vpx_variance32x32, vp9_sub_pixel_variance32x32, + vp9_sub_pixel_avg_variance32x32, vpx_sad32x32x3, vpx_sad32x32x8, + vpx_sad32x32x4d) - BFP(BLOCK_64X64, vp9_sad64x64, vp9_sad64x64_avg, - vp9_variance64x64, vp9_sub_pixel_variance64x64, - vp9_sub_pixel_avg_variance64x64, vp9_sad64x64x3, vp9_sad64x64x8, - vp9_sad64x64x4d) + BFP(BLOCK_64X64, vpx_sad64x64, vpx_sad64x64_avg, + vpx_variance64x64, vp9_sub_pixel_variance64x64, + vp9_sub_pixel_avg_variance64x64, vpx_sad64x64x3, vpx_sad64x64x8, + vpx_sad64x64x4d) - BFP(BLOCK_16X16, vp9_sad16x16, vp9_sad16x16_avg, - vp9_variance16x16, vp9_sub_pixel_variance16x16, - vp9_sub_pixel_avg_variance16x16, vp9_sad16x16x3, vp9_sad16x16x8, - vp9_sad16x16x4d) + BFP(BLOCK_16X16, vpx_sad16x16, vpx_sad16x16_avg, + vpx_variance16x16, vp9_sub_pixel_variance16x16, + vp9_sub_pixel_avg_variance16x16, vpx_sad16x16x3, vpx_sad16x16x8, + vpx_sad16x16x4d) - BFP(BLOCK_16X8, vp9_sad16x8, vp9_sad16x8_avg, - vp9_variance16x8, vp9_sub_pixel_variance16x8, + BFP(BLOCK_16X8, vpx_sad16x8, vpx_sad16x8_avg, + vpx_variance16x8, vp9_sub_pixel_variance16x8, vp9_sub_pixel_avg_variance16x8, - vp9_sad16x8x3, vp9_sad16x8x8, vp9_sad16x8x4d) + vpx_sad16x8x3, vpx_sad16x8x8, vpx_sad16x8x4d) - BFP(BLOCK_8X16, vp9_sad8x16, vp9_sad8x16_avg, - vp9_variance8x16, vp9_sub_pixel_variance8x16, + BFP(BLOCK_8X16, vpx_sad8x16, vpx_sad8x16_avg, + vpx_variance8x16, vp9_sub_pixel_variance8x16, vp9_sub_pixel_avg_variance8x16, - vp9_sad8x16x3, vp9_sad8x16x8, vp9_sad8x16x4d) + vpx_sad8x16x3, vpx_sad8x16x8, vpx_sad8x16x4d) - BFP(BLOCK_8X8, vp9_sad8x8, vp9_sad8x8_avg, - vp9_variance8x8, vp9_sub_pixel_variance8x8, + BFP(BLOCK_8X8, vpx_sad8x8, vpx_sad8x8_avg, + vpx_variance8x8, vp9_sub_pixel_variance8x8, vp9_sub_pixel_avg_variance8x8, - vp9_sad8x8x3, vp9_sad8x8x8, vp9_sad8x8x4d) + vpx_sad8x8x3, vpx_sad8x8x8, vpx_sad8x8x4d) - BFP(BLOCK_8X4, vp9_sad8x4, vp9_sad8x4_avg, - vp9_variance8x4, vp9_sub_pixel_variance8x4, - vp9_sub_pixel_avg_variance8x4, NULL, vp9_sad8x4x8, vp9_sad8x4x4d) + BFP(BLOCK_8X4, vpx_sad8x4, vpx_sad8x4_avg, + vpx_variance8x4, vp9_sub_pixel_variance8x4, + vp9_sub_pixel_avg_variance8x4, NULL, vpx_sad8x4x8, vpx_sad8x4x4d) - BFP(BLOCK_4X8, vp9_sad4x8, vp9_sad4x8_avg, - vp9_variance4x8, vp9_sub_pixel_variance4x8, - vp9_sub_pixel_avg_variance4x8, NULL, vp9_sad4x8x8, vp9_sad4x8x4d) + BFP(BLOCK_4X8, vpx_sad4x8, vpx_sad4x8_avg, + vpx_variance4x8, vp9_sub_pixel_variance4x8, + vp9_sub_pixel_avg_variance4x8, NULL, vpx_sad4x8x8, vpx_sad4x8x4d) - BFP(BLOCK_4X4, vp9_sad4x4, vp9_sad4x4_avg, - vp9_variance4x4, vp9_sub_pixel_variance4x4, + BFP(BLOCK_4X4, vpx_sad4x4, vpx_sad4x4_avg, + vpx_variance4x4, vp9_sub_pixel_variance4x4, vp9_sub_pixel_avg_variance4x4, - vp9_sad4x4x3, vp9_sad4x4x8, vp9_sad4x4x4d) + vpx_sad4x4x3, vpx_sad4x4x8, vpx_sad4x4x4d) #if CONFIG_VP9_HIGHBITDEPTH highbd_set_var_fns(cpi); @@ -1798,6 +1884,11 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf, return cpi; } +#define SNPRINT(H, T) \ + snprintf((H) + strlen(H), sizeof(H) - strlen(H), (T)) + +#define SNPRINT2(H, T, V) \ + snprintf((H) + strlen(H), sizeof(H) - strlen(H), (T), (V)) void vp9_remove_compressor(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; @@ -1809,11 +1900,11 @@ void vp9_remove_compressor(VP9_COMP *cpi) { if (cpi && (cm->current_video_frame > 0)) { #if CONFIG_INTERNAL_STATS - vp9_clear_system_state(); - // printf("\n8x8-4x4:%d-%d\n", cpi->t8x8_count, cpi->t4x4_count); if (cpi->oxcf.pass != 1) { + char headings[512] = {0}; + char results[512] = {0}; FILE *f = fopen("opsnr.stt", "a"); double time_encoded = (cpi->last_end_time_stamp_seen - cpi->first_time_stamp_ever) / 10000000.000; @@ -1831,25 +1922,50 @@ void vp9_remove_compressor(VP9_COMP *cpi) { vpx_sse_to_psnr((double)cpi->totalp_samples, peak, (double)cpi->totalp_sq_error); const double total_ssim = 100 * pow(cpi->summed_quality / - cpi->summed_weights, 8.0); + cpi->summed_weights, 8.0); const double totalp_ssim = 100 * pow(cpi->summedp_quality / - cpi->summedp_weights, 8.0); + cpi->summedp_weights, 8.0); - fprintf(f, "Bitrate\tAVGPsnr\tGLBPsnr\tAVPsnrP\tGLPsnrP\t" - "VPXSSIM\tVPSSIMP\t Time(ms)\n"); - fprintf(f, "%7.2f\t%7.3f\t%7.3f\t%7.3f\t%7.3f\t%7.3f\t%7.3f\t%8.0f\n", - dr, cpi->total / cpi->count, total_psnr, - cpi->totalp / cpi->count, totalp_psnr, total_ssim, totalp_ssim, - total_encode_time); - } + snprintf(headings, sizeof(headings), + "Bitrate\tAVGPsnr\tGLBPsnr\tAVPsnrP\tGLPsnrP\t" + "VPXSSIM\tVPSSIMP\tFASTSIM\tPSNRHVS\t" + "WstPsnr\tWstSsim\tWstFast\tWstHVS"); + snprintf(results, sizeof(results), + "%7.2f\t%7.3f\t%7.3f\t%7.3f\t%7.3f\t" + "%7.3f\t%7.3f\t%7.3f\t%7.3f" + "%7.3f\t%7.3f\t%7.3f\t%7.3f", + dr, cpi->psnr.stat[ALL] / cpi->count, total_psnr, + cpi->psnrp.stat[ALL] / cpi->count, totalp_psnr, + total_ssim, totalp_ssim, + cpi->fastssim.stat[ALL] / cpi->count, + cpi->psnrhvs.stat[ALL] / cpi->count, + cpi->psnr.worst, cpi->worst_ssim, cpi->fastssim.worst, + cpi->psnrhvs.worst); - if (cpi->b_calculate_ssimg) { - fprintf(f, "BitRate\tSSIM_Y\tSSIM_U\tSSIM_V\tSSIM_A\t Time(ms)\n"); - fprintf(f, "%7.2f\t%6.4f\t%6.4f\t%6.4f\t%6.4f\t%8.0f\n", dr, - cpi->total_ssimg_y / cpi->count, - cpi->total_ssimg_u / cpi->count, - cpi->total_ssimg_v / cpi->count, - cpi->total_ssimg_all / cpi->count, total_encode_time); + if (cpi->b_calculate_blockiness) { + SNPRINT(headings, "\t Block\tWstBlck"); + SNPRINT2(results, "\t%7.3f", cpi->total_blockiness / cpi->count); + SNPRINT2(results, "\t%7.3f", cpi->worst_blockiness); + } + + if (cpi->b_calculate_consistency) { + double consistency = + vpx_sse_to_psnr((double)cpi->totalp_samples, peak, + (double)cpi->total_inconsistency); + + SNPRINT(headings, "\tConsist\tWstCons"); + SNPRINT2(results, "\t%7.3f", consistency); + SNPRINT2(results, "\t%7.3f", cpi->worst_consistency); + } + + if (cpi->b_calculate_ssimg) { + SNPRINT(headings, "\t SSIMG\tWtSSIMG"); + SNPRINT2(results, "\t%7.3f", cpi->ssimg.stat[ALL] / cpi->count); + SNPRINT2(results, "\t%7.3f", cpi->ssimg.worst); + } + + fprintf(f, "%s\t Time\n", headings); + fprintf(f, "%s\t%8.0f\n", results, total_encode_time); } fclose(f); @@ -1908,6 +2024,10 @@ void vp9_remove_compressor(VP9_COMP *cpi) { #endif vp9_remove_common(cm); + vp9_free_ref_frame_buffers(cm->buffer_pool); +#if CONFIG_VP9_POSTPROC + vp9_free_postproc_buffers(cm); +#endif vpx_free(cpi); #if CONFIG_VP9_TEMPORAL_DENOISING @@ -1936,6 +2056,65 @@ void vp9_remove_compressor(VP9_COMP *cpi) { #endif } +/* TODO(yaowu): The block_variance calls the unoptimized versions of variance() + * and highbd_8_variance(). It should not. + */ +static void encoder_variance(const uint8_t *a, int a_stride, + const uint8_t *b, int b_stride, + int w, int h, unsigned int *sse, int *sum) { + int i, j; + + *sum = 0; + *sse = 0; + + for (i = 0; i < h; i++) { + for (j = 0; j < w; j++) { + const int diff = a[j] - b[j]; + *sum += diff; + *sse += diff * diff; + } + + a += a_stride; + b += b_stride; + } +} + +#if CONFIG_VP9_HIGHBITDEPTH +static void encoder_highbd_variance64(const uint8_t *a8, int a_stride, + const uint8_t *b8, int b_stride, + int w, int h, uint64_t *sse, + uint64_t *sum) { + int i, j; + + uint16_t *a = CONVERT_TO_SHORTPTR(a8); + uint16_t *b = CONVERT_TO_SHORTPTR(b8); + *sum = 0; + *sse = 0; + + for (i = 0; i < h; i++) { + for (j = 0; j < w; j++) { + const int diff = a[j] - b[j]; + *sum += diff; + *sse += diff * diff; + } + a += a_stride; + b += b_stride; + } +} + +static void encoder_highbd_8_variance(const uint8_t *a8, int a_stride, + const uint8_t *b8, int b_stride, + int w, int h, + unsigned int *sse, int *sum) { + uint64_t sse_long = 0; + uint64_t sum_long = 0; + encoder_highbd_variance64(a8, a_stride, b8, b_stride, w, h, + &sse_long, &sum_long); + *sse = (unsigned int)sse_long; + *sum = (int)sum_long; +} +#endif // CONFIG_VP9_HIGHBITDEPTH + static int64_t get_sse(const uint8_t *a, int a_stride, const uint8_t *b, int b_stride, int width, int height) { @@ -1947,15 +2126,15 @@ static int64_t get_sse(const uint8_t *a, int a_stride, int x, y; if (dw > 0) { - variance(&a[width - dw], a_stride, &b[width - dw], b_stride, - dw, height, &sse, &sum); + encoder_variance(&a[width - dw], a_stride, &b[width - dw], b_stride, + dw, height, &sse, &sum); total_sse += sse; } if (dh > 0) { - variance(&a[(height - dh) * a_stride], a_stride, - &b[(height - dh) * b_stride], b_stride, - width - dw, dh, &sse, &sum); + encoder_variance(&a[(height - dh) * a_stride], a_stride, + &b[(height - dh) * b_stride], b_stride, + width - dw, dh, &sse, &sum); total_sse += sse; } @@ -1963,7 +2142,7 @@ static int64_t get_sse(const uint8_t *a, int a_stride, const uint8_t *pa = a; const uint8_t *pb = b; for (x = 0; x < width / 16; ++x) { - vp9_mse16x16(pa, a_stride, pb, b_stride, &sse); + vpx_mse16x16(pa, a_stride, pb, b_stride, &sse); total_sse += sse; pa += 16; @@ -2008,21 +2187,22 @@ static int64_t highbd_get_sse(const uint8_t *a, int a_stride, unsigned int sse = 0; int sum = 0; if (dw > 0) { - highbd_variance(&a[width - dw], a_stride, &b[width - dw], b_stride, - dw, height, &sse, &sum); + encoder_highbd_8_variance(&a[width - dw], a_stride, + &b[width - dw], b_stride, + dw, height, &sse, &sum); total_sse += sse; } if (dh > 0) { - highbd_variance(&a[(height - dh) * a_stride], a_stride, - &b[(height - dh) * b_stride], b_stride, - width - dw, dh, &sse, &sum); + encoder_highbd_8_variance(&a[(height - dh) * a_stride], a_stride, + &b[(height - dh) * b_stride], b_stride, + width - dw, dh, &sse, &sum); total_sse += sse; } for (y = 0; y < height / 16; ++y) { const uint8_t *pa = a; const uint8_t *pb = b; for (x = 0; x < width / 16; ++x) { - vp9_highbd_mse16x16(pa, a_stride, pb, b_stride, &sse); + vpx_highbd_8_mse16x16(pa, a_stride, pb, b_stride, &sse); total_sse += sse; pa += 16; pb += 16; @@ -2147,8 +2327,9 @@ static void generate_psnr_packet(VP9_COMP *cpi) { pkt.data.psnr.psnr[i] = psnr.psnr[i]; } pkt.kind = VPX_CODEC_PSNR_PKT; - if (is_two_pass_svc(cpi)) - cpi->svc.layer_context[cpi->svc.spatial_layer_id].psnr_pkt = pkt.data.psnr; + if (cpi->use_svc) + cpi->svc.layer_context[cpi->svc.spatial_layer_id * + cpi->svc.number_temporal_layers].psnr_pkt = pkt.data.psnr; else vpx_codec_pkt_list_add(cpi->output_pkt_list, &pkt); } @@ -2493,22 +2674,22 @@ void vp9_update_reference_frames(VP9_COMP *cpi) { ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[arf_idx], cm->new_fb_idx); - vpx_memcpy(cpi->interp_filter_selected[ALTREF_FRAME], - cpi->interp_filter_selected[0], - sizeof(cpi->interp_filter_selected[0])); + memcpy(cpi->interp_filter_selected[ALTREF_FRAME], + cpi->interp_filter_selected[0], + sizeof(cpi->interp_filter_selected[0])); } if (cpi->refresh_golden_frame) { ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->gld_fb_idx], cm->new_fb_idx); if (!cpi->rc.is_src_frame_alt_ref) - vpx_memcpy(cpi->interp_filter_selected[GOLDEN_FRAME], - cpi->interp_filter_selected[0], - sizeof(cpi->interp_filter_selected[0])); + memcpy(cpi->interp_filter_selected[GOLDEN_FRAME], + cpi->interp_filter_selected[0], + sizeof(cpi->interp_filter_selected[0])); else - vpx_memcpy(cpi->interp_filter_selected[GOLDEN_FRAME], - cpi->interp_filter_selected[ALTREF_FRAME], - sizeof(cpi->interp_filter_selected[ALTREF_FRAME])); + memcpy(cpi->interp_filter_selected[GOLDEN_FRAME], + cpi->interp_filter_selected[ALTREF_FRAME], + sizeof(cpi->interp_filter_selected[ALTREF_FRAME])); } } @@ -2516,9 +2697,9 @@ void vp9_update_reference_frames(VP9_COMP *cpi) { ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->lst_fb_idx], cm->new_fb_idx); if (!cpi->rc.is_src_frame_alt_ref) - vpx_memcpy(cpi->interp_filter_selected[LAST_FRAME], - cpi->interp_filter_selected[0], - sizeof(cpi->interp_filter_selected[0])); + memcpy(cpi->interp_filter_selected[LAST_FRAME], + cpi->interp_filter_selected[0], + sizeof(cpi->interp_filter_selected[0])); } #if CONFIG_VP9_TEMPORAL_DENOISING if (cpi->oxcf.noise_sensitivity > 0) { @@ -2598,7 +2779,10 @@ void vp9_scale_references(VP9_COMP *cpi) { #if CONFIG_VP9_HIGHBITDEPTH if (ref->y_crop_width != cm->width || ref->y_crop_height != cm->height) { const int new_fb = get_free_fb(cm); - RefCntBuffer *const new_fb_ptr = &pool->frame_bufs[new_fb]; + RefCntBuffer *new_fb_ptr = NULL; + if (cm->new_fb_idx == INVALID_IDX) + return; + new_fb_ptr = &pool->frame_bufs[new_fb]; cm->cur_frame = &pool->frame_bufs[new_fb]; vp9_realloc_frame_buffer(&pool->frame_bufs[new_fb].buf, cm->width, cm->height, @@ -2610,7 +2794,10 @@ void vp9_scale_references(VP9_COMP *cpi) { #else if (ref->y_crop_width != cm->width || ref->y_crop_height != cm->height) { const int new_fb = get_free_fb(cm); - RefCntBuffer *const new_fb_ptr = &pool->frame_bufs[new_fb]; + RefCntBuffer *new_fb_ptr = NULL; + if (cm->new_fb_idx == INVALID_IDX) + return; + new_fb_ptr = &pool->frame_bufs[new_fb]; vp9_realloc_frame_buffer(&new_fb_ptr->buf, cm->width, cm->height, cm->subsampling_x, cm->subsampling_y, @@ -2679,19 +2866,25 @@ static void output_frame_level_debug_stats(VP9_COMP *cpi) { recon_err = vp9_get_y_sse(cpi->Source, get_frame_new_buffer(cm)); if (cpi->twopass.total_left_stats.coded_error != 0.0) - fprintf(f, "%10u %dx%d %10d %10d %10d %10d" - "%10"PRId64" %10"PRId64" %10"PRId64" %10"PRId64" %10d " - "%7.2lf %7.2lf %7.2lf %7.2lf %7.2lf" + fprintf(f, "%10u %dx%d %d %d %10d %10d %10d %10d" + "%10"PRId64" %10"PRId64" %5d %5d %10"PRId64" " + "%10"PRId64" %10"PRId64" %10d " + "%7.2lf %7.2lf %7.2lf %7.2lf %7.2lf" "%6d %6d %5d %5d %5d " "%10"PRId64" %10.3lf" "%10lf %8u %10"PRId64" %10d %10d\n", cpi->common.current_video_frame, cm->width, cm->height, + cpi->rc.source_alt_ref_pending, + cpi->rc.source_alt_ref_active, cpi->rc.this_frame_target, cpi->rc.projected_frame_size, cpi->rc.projected_frame_size / cpi->common.MBs, (cpi->rc.projected_frame_size - cpi->rc.this_frame_target), cpi->rc.vbr_bits_off_target, + cpi->rc.vbr_bits_off_target_fast, + cpi->twopass.extend_minq, + cpi->twopass.extend_minq_fast, cpi->rc.total_target_vs_actual, (cpi->rc.starting_buffer_level - cpi->rc.bits_off_target), cpi->rc.total_actual_bits, cm->base_qindex, @@ -2820,7 +3013,7 @@ static void init_motion_estimation(VP9_COMP *cpi) { } } -void set_frame_size(VP9_COMP *cpi) { +static void set_frame_size(VP9_COMP *cpi) { int ref_frame; VP9_COMMON *const cm = &cpi->common; VP9EncoderConfig *const oxcf = &cpi->oxcf; @@ -2913,11 +3106,11 @@ static void encode_without_recode_loop(VP9_COMP *cpi) { set_size_dependent_vars(cpi, &q, &bottom_index, &top_index); vp9_set_quantizer(cm, q); - vp9_set_vbp_thresholds(cpi, q); + vp9_set_variance_partition_thresholds(cpi, q); setup_frame(cpi); - vp9_suppress_active_map(cpi); + suppress_active_map(cpi); // Variance adaptive and in frame q adjustment experiments are mutually // exclusive. if (cpi->oxcf.aq_mode == VARIANCE_AQ) { @@ -2927,7 +3120,7 @@ static void encode_without_recode_loop(VP9_COMP *cpi) { } else if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) { vp9_cyclic_refresh_setup(cpi); } - vp9_apply_active_map(cpi); + apply_active_map(cpi); // transform / motion compensation build reconstruction frame vp9_encode_frame(cpi); @@ -3281,7 +3474,7 @@ static void set_arf_sign_bias(VP9_COMP *cpi) { cm->ref_frame_sign_bias[ALTREF_FRAME] = arf_sign_bias; } -int setup_interp_filter_search_mask(VP9_COMP *cpi) { +static int setup_interp_filter_search_mask(VP9_COMP *cpi) { INTERP_FILTER ifilter; int ref_total[MAX_REF_FRAMES] = {0}; MV_REFERENCE_FRAME ref; @@ -3358,34 +3551,41 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, } } if (is_two_pass_svc(cpi) && cm->error_resilient_mode == 0) { - // Use the last frame context for the empty frame. + // Use context 0 for intra only empty frame, but the last frame context + // for other empty frames. + if (cpi->svc.encode_empty_frame_state == ENCODING) { + if (cpi->svc.encode_intra_empty_frame != 0) + cm->frame_context_idx = 0; + else + cm->frame_context_idx = FRAME_CONTEXTS - 1; + } else { cm->frame_context_idx = - (cpi->svc.encode_empty_frame_state == ENCODING) ? FRAME_CONTEXTS - 1 : cpi->svc.spatial_layer_id * cpi->svc.number_temporal_layers + cpi->svc.temporal_layer_id; + } + + cm->frame_parallel_decoding_mode = oxcf->frame_parallel_decoding_mode; // The probs will be updated based on the frame type of its previous // frame if frame_parallel_decoding_mode is 0. The type may vary for // the frame after a key frame in base layer since we may drop enhancement // layers. So set frame_parallel_decoding_mode to 1 in this case. - if (cpi->svc.number_temporal_layers == 1) { - if (cpi->svc.spatial_layer_id == 0 && - cpi->svc.layer_context[0].last_frame_type == KEY_FRAME) - cm->frame_parallel_decoding_mode = 1; - else - cm->frame_parallel_decoding_mode = 0; - } else if (cpi->svc.spatial_layer_id == 0) { - // Find the 2nd frame in temporal base layer and 1st frame in temporal - // enhancement layers from the key frame. - int i; - for (i = 0; i < cpi->svc.number_temporal_layers; ++i) { - if (cpi->svc.layer_context[0].frames_from_key_frame == 1 << i) { + if (cm->frame_parallel_decoding_mode == 0) { + if (cpi->svc.number_temporal_layers == 1) { + if (cpi->svc.spatial_layer_id == 0 && + cpi->svc.layer_context[0].last_frame_type == KEY_FRAME) cm->frame_parallel_decoding_mode = 1; - break; + } else if (cpi->svc.spatial_layer_id == 0) { + // Find the 2nd frame in temporal base layer and 1st frame in temporal + // enhancement layers from the key frame. + int i; + for (i = 0; i < cpi->svc.number_temporal_layers; ++i) { + if (cpi->svc.layer_context[0].frames_from_key_frame == 1 << i) { + cm->frame_parallel_decoding_mode = 1; + break; + } } } - if (i == cpi->svc.number_temporal_layers) - cm->frame_parallel_decoding_mode = 0; } } @@ -3404,8 +3604,8 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, vp9_clear_system_state(); #if CONFIG_INTERNAL_STATS - vpx_memset(cpi->mode_chosen_counts, 0, - MAX_MODES * sizeof(*cpi->mode_chosen_counts)); + memset(cpi->mode_chosen_counts, 0, + MAX_MODES * sizeof(*cpi->mode_chosen_counts)); #endif if (cpi->sf.recode_loop == DISALLOW_RECODE) { @@ -3530,9 +3730,11 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, } cm->prev_frame = cm->cur_frame; - if (is_two_pass_svc(cpi)) - cpi->svc.layer_context[cpi->svc.spatial_layer_id].last_frame_type = - cm->frame_type; + if (cpi->use_svc) + cpi->svc.layer_context[cpi->svc.spatial_layer_id * + cpi->svc.number_temporal_layers + + cpi->svc.temporal_layer_id].last_frame_type = + cm->frame_type; } static void SvcEncode(VP9_COMP *cpi, size_t *size, uint8_t *dest, @@ -3675,8 +3877,8 @@ static int frame_is_reference(const VP9_COMP *cpi) { cm->seg.update_data; } -void adjust_frame_rate(VP9_COMP *cpi, - const struct lookahead_entry *source) { +static void adjust_frame_rate(VP9_COMP *cpi, + const struct lookahead_entry *source) { int64_t this_duration; int step = 0; @@ -3755,6 +3957,21 @@ static void check_src_altref(VP9_COMP *cpi, } } +#if CONFIG_INTERNAL_STATS +extern double vp9_get_blockiness(const unsigned char *img1, int img1_pitch, + const unsigned char *img2, int img2_pitch, + int width, int height); +#endif + +static void adjust_image_stat(double y, double u, double v, double all, + ImageStat *s) { + s->stat[Y] += y; + s->stat[U] += u; + s->stat[V] += v; + s->stat[ALL] += all; + s->worst = MIN(s->worst, all); +} + int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, size_t *size, uint8_t *dest, int64_t *time_stamp, int64_t *time_end, int flush) { @@ -3778,6 +3995,8 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, #endif if (oxcf->pass == 2) vp9_restore_layer_context(cpi); + } else if (is_one_pass_cbr_svc(cpi)) { + vp9_one_pass_cbr_svc_start_layer(cpi); } vpx_usec_timer_start(&cmptimer); @@ -3796,9 +4015,11 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, // Normal defaults cm->reset_frame_context = 0; cm->refresh_frame_context = 1; - cpi->refresh_last_frame = 1; - cpi->refresh_golden_frame = 0; - cpi->refresh_alt_ref_frame = 0; + if (!is_one_pass_cbr_svc(cpi)) { + cpi->refresh_last_frame = 1; + cpi->refresh_golden_frame = 0; + cpi->refresh_alt_ref_frame = 0; + } // Should we encode an arf frame. arf_src_index = get_arf_src_index(cpi); @@ -3835,6 +4056,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, } cm->show_frame = 0; + cm->intra_only = 0; cpi->refresh_alt_ref_frame = 1; cpi->refresh_golden_frame = 0; cpi->refresh_last_frame = 0; @@ -3853,12 +4075,11 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, } // Read in the source frame. -#if CONFIG_SPATIAL_SVC - if (is_two_pass_svc(cpi)) + if (cpi->use_svc) source = vp9_svc_lookahead_pop(cpi, cpi->lookahead, flush); else -#endif source = vp9_lookahead_pop(cpi->lookahead, flush); + if (source != NULL) { cm->show_frame = 1; cm->intra_only = 0; @@ -3907,8 +4128,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, adjust_frame_rate(cpi, source); } - if (cpi->svc.number_temporal_layers > 1 && - oxcf->rc_mode == VPX_CBR) { + if (is_one_pass_cbr_svc(cpi)) { vp9_update_temporal_layer_framerate(cpi); vp9_restore_layer_context(cpi); } @@ -3990,11 +4210,10 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, } // Save layer specific state. - if ((cpi->svc.number_temporal_layers > 1 && - oxcf->rc_mode == VPX_CBR) || - ((cpi->svc.number_temporal_layers > 1 || - cpi->svc.number_spatial_layers > 1) && - oxcf->pass == 2)) { + if (is_one_pass_cbr_svc(cpi) || + ((cpi->svc.number_temporal_layers > 1 || + cpi->svc.number_spatial_layers > 1) && + oxcf->pass == 2)) { vp9_save_layer_context(cpi); } @@ -4007,6 +4226,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, #if CONFIG_INTERNAL_STATS if (oxcf->pass != 1) { + double samples; cpi->bytes += (int)(*size); if (cm->show_frame) { @@ -4024,12 +4244,11 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, calc_psnr(orig, recon, &psnr); #endif // CONFIG_VP9_HIGHBITDEPTH - cpi->total += psnr.psnr[0]; - cpi->total_y += psnr.psnr[1]; - cpi->total_u += psnr.psnr[2]; - cpi->total_v += psnr.psnr[3]; + adjust_image_stat(psnr.psnr[1], psnr.psnr[2], psnr.psnr[3], + psnr.psnr[0], &cpi->psnr); cpi->total_sq_error += psnr.sse[0]; cpi->total_samples += psnr.samples[0]; + samples = psnr.samples[0]; { PSNR_STATS psnr2; @@ -4053,18 +4272,16 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, vp9_clear_system_state(); #if CONFIG_VP9_HIGHBITDEPTH - calc_highbd_psnr(orig, pp, &psnr, cpi->td.mb.e_mbd.bd, + calc_highbd_psnr(orig, pp, &psnr2, cpi->td.mb.e_mbd.bd, cpi->oxcf.input_bit_depth); #else calc_psnr(orig, pp, &psnr2); #endif // CONFIG_VP9_HIGHBITDEPTH - cpi->totalp += psnr2.psnr[0]; - cpi->totalp_y += psnr2.psnr[1]; - cpi->totalp_u += psnr2.psnr[2]; - cpi->totalp_v += psnr2.psnr[3]; cpi->totalp_sq_error += psnr2.sse[0]; cpi->totalp_samples += psnr2.samples[0]; + adjust_image_stat(psnr2.psnr[1], psnr2.psnr[2], psnr2.psnr[3], + psnr2.psnr[0], &cpi->psnrp); #if CONFIG_VP9_HIGHBITDEPTH if (cm->use_highbitdepth) { @@ -4077,6 +4294,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, frame_ssim2 = vp9_calc_ssim(orig, recon, &weight); #endif // CONFIG_VP9_HIGHBITDEPTH + cpi->worst_ssim= MIN(cpi->worst_ssim, frame_ssim2); cpi->summed_quality += frame_ssim2 * weight; cpi->summed_weights += weight; @@ -4104,7 +4322,40 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, #endif } } + if (cpi->b_calculate_blockiness) { +#if CONFIG_VP9_HIGHBITDEPTH + if (!cm->use_highbitdepth) +#endif + { + double frame_blockiness = vp9_get_blockiness( + cpi->Source->y_buffer, cpi->Source->y_stride, + cm->frame_to_show->y_buffer, cm->frame_to_show->y_stride, + cpi->Source->y_width, cpi->Source->y_height); + cpi->worst_blockiness = MAX(cpi->worst_blockiness, frame_blockiness); + cpi->total_blockiness += frame_blockiness; + } + } + if (cpi->b_calculate_consistency) { +#if CONFIG_VP9_HIGHBITDEPTH + if (!cm->use_highbitdepth) +#endif + { + double this_inconsistency = vp9_get_ssim_metrics( + cpi->Source->y_buffer, cpi->Source->y_stride, + cm->frame_to_show->y_buffer, cm->frame_to_show->y_stride, + cpi->Source->y_width, cpi->Source->y_height, cpi->ssim_vars, + &cpi->metrics, 1); + + const double peak = (double)((1 << cpi->oxcf.input_bit_depth) - 1); + double consistency = vpx_sse_to_psnr(samples, peak, + (double)cpi->total_inconsistency); + if (consistency > 0.0) + cpi->worst_consistency = MIN(cpi->worst_consistency, + consistency); + cpi->total_inconsistency += this_inconsistency; + } + } if (cpi->b_calculate_ssimg) { double y, u, v, frame_all; @@ -4119,10 +4370,25 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, #else frame_all = vp9_calc_ssimg(cpi->Source, cm->frame_to_show, &y, &u, &v); #endif // CONFIG_VP9_HIGHBITDEPTH - cpi->total_ssimg_y += y; - cpi->total_ssimg_u += u; - cpi->total_ssimg_v += v; - cpi->total_ssimg_all += frame_all; + adjust_image_stat(y, u, v, frame_all, &cpi->ssimg); + } +#if CONFIG_VP9_HIGHBITDEPTH + if (!cm->use_highbitdepth) +#endif + { + double y, u, v, frame_all; + frame_all = vp9_calc_fastssim(cpi->Source, cm->frame_to_show, &y, &u, + &v); + adjust_image_stat(y, u, v, frame_all, &cpi->fastssim); + /* TODO(JBB): add 10/12 bit support */ + } +#if CONFIG_VP9_HIGHBITDEPTH + if (!cm->use_highbitdepth) +#endif + { + double y, u, v, frame_all; + frame_all = vp9_psnrhvs(cpi->Source, cm->frame_to_show, &y, &u, &v); + adjust_image_stat(y, u, v, frame_all, &cpi->psnrhvs); } } } @@ -4130,8 +4396,10 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, #endif if (is_two_pass_svc(cpi)) { - if (cpi->svc.encode_empty_frame_state == ENCODING) + if (cpi->svc.encode_empty_frame_state == ENCODING) { cpi->svc.encode_empty_frame_state = ENCODED; + cpi->svc.encode_intra_empty_frame = 0; + } if (cm->show_frame) { ++cpi->svc.spatial_layer_to_encode; @@ -4141,6 +4409,12 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, // May need the empty frame after an visible frame. cpi->svc.encode_empty_frame_state = NEED_TO_ENCODE; } + } else if (is_one_pass_cbr_svc(cpi)) { + if (cm->show_frame) { + ++cpi->svc.spatial_layer_to_encode; + if (cpi->svc.spatial_layer_to_encode >= cpi->svc.number_spatial_layers) + cpi->svc.spatial_layer_to_encode = 0; + } } return 0; } diff --git a/media/libvpx/vp9/encoder/vp9_encoder.h b/media/libvpx/vp9/encoder/vp9_encoder.h index 914080c6f2a..6ce4a67cdd1 100644 --- a/media/libvpx/vp9/encoder/vp9_encoder.h +++ b/media/libvpx/vp9/encoder/vp9_encoder.h @@ -34,6 +34,9 @@ #include "vp9/encoder/vp9_quantize.h" #include "vp9/encoder/vp9_ratectrl.h" #include "vp9/encoder/vp9_rd.h" +#if CONFIG_INTERNAL_STATS +#include "vp9/encoder/vp9_ssim.h" +#endif #include "vp9/encoder/vp9_speed_features.h" #include "vp9/encoder/vp9_svc_layercontext.h" #include "vp9/encoder/vp9_tokenize.h" @@ -191,10 +194,10 @@ typedef struct VP9EncoderConfig { int ss_number_layers; // Number of spatial layers. int ts_number_layers; // Number of temporal layers. // Bitrate allocation for spatial layers. + int layer_target_bitrate[VPX_MAX_LAYERS]; int ss_target_bitrate[VPX_SS_MAX_LAYERS]; int ss_enable_auto_arf[VPX_SS_MAX_LAYERS]; // Bitrate allocation (CBR mode) and framerate factor, for temporal layers. - int ts_target_bitrate[VPX_TS_MAX_LAYERS]; int ts_rate_decimator[VPX_TS_MAX_LAYERS]; int enable_auto_arf; @@ -234,6 +237,7 @@ typedef struct VP9EncoderConfig { int use_highbitdepth; #endif vpx_color_space_t color_space; + VP9E_TEMPORAL_LAYERING_MODE temporal_layering_mode; } VP9EncoderConfig; static INLINE int is_lossless_requested(const VP9EncoderConfig *cfg) { @@ -272,6 +276,18 @@ typedef struct ActiveMap { unsigned char *map; } ActiveMap; +typedef enum { + Y, + U, + V, + ALL +} STAT_TYPE; + +typedef struct IMAGE_STAT { + double stat[ALL+1]; + double worst; +} ImageStat; + typedef struct VP9_COMP { QUANTS quants; ThreadData td; @@ -388,19 +404,16 @@ typedef struct VP9_COMP { unsigned int mode_chosen_counts[MAX_MODES]; int count; - double total_y; - double total_u; - double total_v; - double total; uint64_t total_sq_error; uint64_t total_samples; + ImageStat psnr; - double totalp_y; - double totalp_u; - double totalp_v; - double totalp; uint64_t totalp_sq_error; uint64_t totalp_samples; + ImageStat psnrp; + + double total_blockiness; + double worst_blockiness; int bytes; double summed_quality; @@ -408,14 +421,21 @@ typedef struct VP9_COMP { double summedp_quality; double summedp_weights; unsigned int tot_recode_hits; + double worst_ssim; - - double total_ssimg_y; - double total_ssimg_u; - double total_ssimg_v; - double total_ssimg_all; + ImageStat ssimg; + ImageStat fastssim; + ImageStat psnrhvs; int b_calculate_ssimg; + int b_calculate_blockiness; + + int b_calculate_consistency; + + double total_inconsistency; + double worst_consistency; + Ssimv *ssim_vars; + Metrics metrics; #endif int b_calculate_psnr; @@ -460,10 +480,11 @@ typedef struct VP9_COMP { int resize_pending; // VAR_BASED_PARTITION thresholds - int64_t vbp_threshold; - int64_t vbp_threshold_bsize_min; - int64_t vbp_threshold_bsize_max; - int64_t vbp_threshold_16x16; + // 0 - threshold_64x64; 1 - threshold_32x32; + // 2 - threshold_16x16; 3 - vbp_threshold_8x8; + int64_t vbp_thresholds[4]; + int64_t vbp_threshold_minmax; + int64_t vbp_threshold_sad; BLOCK_SIZE vbp_bsize_min; // Multi-threading @@ -508,6 +529,8 @@ int vp9_update_entropy(VP9_COMP *cpi, int update); int vp9_set_active_map(VP9_COMP *cpi, unsigned char *map, int rows, int cols); +int vp9_get_active_map(VP9_COMP *cpi, unsigned char *map, int rows, int cols); + int vp9_set_internal_size(VP9_COMP *cpi, VPX_SCALING horiz_mode, VPX_SCALING vert_mode); @@ -589,9 +612,11 @@ YV12_BUFFER_CONFIG *vp9_scale_if_required(VP9_COMMON *cm, void vp9_apply_encoding_flags(VP9_COMP *cpi, vpx_enc_frame_flags_t flags); static INLINE int is_two_pass_svc(const struct VP9_COMP *const cpi) { - return cpi->use_svc && - ((cpi->svc.number_spatial_layers > 1) || - (cpi->svc.number_temporal_layers > 1 && cpi->oxcf.pass != 0)); + return cpi->use_svc && cpi->oxcf.pass != 0; +} + +static INLINE int is_one_pass_cbr_svc(const struct VP9_COMP *const cpi) { + return (cpi->use_svc && cpi->oxcf.pass == 0); } static INLINE int is_altref_enabled(const VP9_COMP *const cpi) { @@ -620,6 +645,8 @@ static INLINE int *cond_cost_list(const struct VP9_COMP *cpi, int *cost_list) { void vp9_new_framerate(VP9_COMP *cpi, double framerate); +#define LAYER_IDS_TO_IDX(sl, tl, num_tl) ((sl) * (num_tl) + (tl)) + #ifdef __cplusplus } // extern "C" #endif diff --git a/media/libvpx/vp9/encoder/vp9_ethread.c b/media/libvpx/vp9/encoder/vp9_ethread.c index 46814cdfd65..8700ccdaecd 100644 --- a/media/libvpx/vp9/encoder/vp9_ethread.c +++ b/media/libvpx/vp9/encoder/vp9_ethread.c @@ -124,8 +124,8 @@ void vp9_encode_tiles_mt(VP9_COMP *cpi) { thread_data->td->rd_counts = cpi->td.rd_counts; } if (thread_data->td->counts != &cpi->common.counts) { - vpx_memcpy(thread_data->td->counts, &cpi->common.counts, - sizeof(cpi->common.counts)); + memcpy(thread_data->td->counts, &cpi->common.counts, + sizeof(cpi->common.counts)); } // Handle use_nonrd_pick_mode case. diff --git a/media/libvpx/vp9/encoder/vp9_extend.c b/media/libvpx/vp9/encoder/vp9_extend.c index a1d7daac465..6e1ed365dab 100644 --- a/media/libvpx/vp9/encoder/vp9_extend.c +++ b/media/libvpx/vp9/encoder/vp9_extend.c @@ -9,6 +9,7 @@ */ #include "vpx_mem/vpx_mem.h" +#include "vpx_ports/mem.h" #include "vp9/common/vp9_common.h" #include "vp9/encoder/vp9_extend.h" @@ -27,9 +28,9 @@ static void copy_and_extend_plane(const uint8_t *src, int src_pitch, uint8_t *dst_ptr2 = dst + w; for (i = 0; i < h; i++) { - vpx_memset(dst_ptr1, src_ptr1[0], extend_left); - vpx_memcpy(dst_ptr1 + extend_left, src_ptr1, w); - vpx_memset(dst_ptr2, src_ptr2[0], extend_right); + memset(dst_ptr1, src_ptr1[0], extend_left); + memcpy(dst_ptr1 + extend_left, src_ptr1, w); + memset(dst_ptr2, src_ptr2[0], extend_right); src_ptr1 += src_pitch; src_ptr2 += src_pitch; dst_ptr1 += dst_pitch; @@ -45,12 +46,12 @@ static void copy_and_extend_plane(const uint8_t *src, int src_pitch, linesize = extend_left + extend_right + w; for (i = 0; i < extend_top; i++) { - vpx_memcpy(dst_ptr1, src_ptr1, linesize); + memcpy(dst_ptr1, src_ptr1, linesize); dst_ptr1 += dst_pitch; } for (i = 0; i < extend_bottom; i++) { - vpx_memcpy(dst_ptr2, src_ptr2, linesize); + memcpy(dst_ptr2, src_ptr2, linesize); dst_ptr2 += dst_pitch; } } @@ -73,7 +74,7 @@ static void highbd_copy_and_extend_plane(const uint8_t *src8, int src_pitch, for (i = 0; i < h; i++) { vpx_memset16(dst_ptr1, src_ptr1[0], extend_left); - vpx_memcpy(dst_ptr1 + extend_left, src_ptr1, w * sizeof(uint16_t)); + memcpy(dst_ptr1 + extend_left, src_ptr1, w * sizeof(uint16_t)); vpx_memset16(dst_ptr2, src_ptr2[0], extend_right); src_ptr1 += src_pitch; src_ptr2 += src_pitch; @@ -90,12 +91,12 @@ static void highbd_copy_and_extend_plane(const uint8_t *src8, int src_pitch, linesize = extend_left + extend_right + w; for (i = 0; i < extend_top; i++) { - vpx_memcpy(dst_ptr1, src_ptr1, linesize * sizeof(uint16_t)); + memcpy(dst_ptr1, src_ptr1, linesize * sizeof(uint16_t)); dst_ptr1 += dst_pitch; } for (i = 0; i < extend_bottom; i++) { - vpx_memcpy(dst_ptr2, src_ptr2, linesize * sizeof(uint16_t)); + memcpy(dst_ptr2, src_ptr2, linesize * sizeof(uint16_t)); dst_ptr2 += dst_pitch; } } diff --git a/media/libvpx/vp9/encoder/vp9_fastssim.c b/media/libvpx/vp9/encoder/vp9_fastssim.c new file mode 100644 index 00000000000..f1d408cbe7b --- /dev/null +++ b/media/libvpx/vp9/encoder/vp9_fastssim.c @@ -0,0 +1,465 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + * + * This code was originally written by: Nathan E. Egge, at the Daala + * project. + */ +#include +#include +#include "./vpx_config.h" +#include "./vp9_rtcd.h" +#include "vp9/encoder/vp9_ssim.h" +/* TODO(jbb): High bit depth version of this code needed */ +typedef struct fs_level fs_level; +typedef struct fs_ctx fs_ctx; + +#define SSIM_C1 (255 * 255 * 0.01 * 0.01) +#define SSIM_C2 (255 * 255 * 0.03 * 0.03) + +#define FS_MINI(_a, _b) ((_a) < (_b) ? (_a) : (_b)) +#define FS_MAXI(_a, _b) ((_a) > (_b) ? (_a) : (_b)) + +struct fs_level { + uint16_t *im1; + uint16_t *im2; + double *ssim; + int w; + int h; +}; + +struct fs_ctx { + fs_level *level; + int nlevels; + unsigned *col_buf; +}; + +static void fs_ctx_init(fs_ctx *_ctx, int _w, int _h, int _nlevels) { + unsigned char *data; + size_t data_size; + int lw; + int lh; + int l; + lw = (_w + 1) >> 1; + lh = (_h + 1) >> 1; + data_size = _nlevels * sizeof(fs_level) + + 2 * (lw + 8) * 8 * sizeof(*_ctx->col_buf); + for (l = 0; l < _nlevels; l++) { + size_t im_size; + size_t level_size; + im_size = lw * (size_t) lh; + level_size = 2 * im_size * sizeof(*_ctx->level[l].im1); + level_size += sizeof(*_ctx->level[l].ssim) - 1; + level_size /= sizeof(*_ctx->level[l].ssim); + level_size += im_size; + level_size *= sizeof(*_ctx->level[l].ssim); + data_size += level_size; + lw = (lw + 1) >> 1; + lh = (lh + 1) >> 1; + } + data = (unsigned char *) malloc(data_size); + _ctx->level = (fs_level *) data; + _ctx->nlevels = _nlevels; + data += _nlevels * sizeof(*_ctx->level); + lw = (_w + 1) >> 1; + lh = (_h + 1) >> 1; + for (l = 0; l < _nlevels; l++) { + size_t im_size; + size_t level_size; + _ctx->level[l].w = lw; + _ctx->level[l].h = lh; + im_size = lw * (size_t) lh; + level_size = 2 * im_size * sizeof(*_ctx->level[l].im1); + level_size += sizeof(*_ctx->level[l].ssim) - 1; + level_size /= sizeof(*_ctx->level[l].ssim); + level_size *= sizeof(*_ctx->level[l].ssim); + _ctx->level[l].im1 = (uint16_t *) data; + _ctx->level[l].im2 = _ctx->level[l].im1 + im_size; + data += level_size; + _ctx->level[l].ssim = (double *) data; + data += im_size * sizeof(*_ctx->level[l].ssim); + lw = (lw + 1) >> 1; + lh = (lh + 1) >> 1; + } + _ctx->col_buf = (unsigned *) data; +} + +static void fs_ctx_clear(fs_ctx *_ctx) { + free(_ctx->level); +} + +static void fs_downsample_level(fs_ctx *_ctx, int _l) { + const uint16_t *src1; + const uint16_t *src2; + uint16_t *dst1; + uint16_t *dst2; + int w2; + int h2; + int w; + int h; + int i; + int j; + w = _ctx->level[_l].w; + h = _ctx->level[_l].h; + dst1 = _ctx->level[_l].im1; + dst2 = _ctx->level[_l].im2; + w2 = _ctx->level[_l - 1].w; + h2 = _ctx->level[_l - 1].h; + src1 = _ctx->level[_l - 1].im1; + src2 = _ctx->level[_l - 1].im2; + for (j = 0; j < h; j++) { + int j0offs; + int j1offs; + j0offs = 2 * j * w2; + j1offs = FS_MINI(2 * j + 1, h2) * w2; + for (i = 0; i < w; i++) { + int i0; + int i1; + i0 = 2 * i; + i1 = FS_MINI(i0 + 1, w2); + dst1[j * w + i] = src1[j0offs + i0] + src1[j0offs + i1] + + src1[j1offs + i0] + src1[j1offs + i1]; + dst2[j * w + i] = src2[j0offs + i0] + src2[j0offs + i1] + + src2[j1offs + i0] + src2[j1offs + i1]; + } + } +} + +static void fs_downsample_level0(fs_ctx *_ctx, const unsigned char *_src1, + int _s1ystride, const unsigned char *_src2, + int _s2ystride, int _w, int _h) { + uint16_t *dst1; + uint16_t *dst2; + int w; + int h; + int i; + int j; + w = _ctx->level[0].w; + h = _ctx->level[0].h; + dst1 = _ctx->level[0].im1; + dst2 = _ctx->level[0].im2; + for (j = 0; j < h; j++) { + int j0; + int j1; + j0 = 2 * j; + j1 = FS_MINI(j0 + 1, _h); + for (i = 0; i < w; i++) { + int i0; + int i1; + i0 = 2 * i; + i1 = FS_MINI(i0 + 1, _w); + dst1[j * w + i] = _src1[j0 * _s1ystride + i0] + + _src1[j0 * _s1ystride + i1] + _src1[j1 * _s1ystride + i0] + + _src1[j1 * _s1ystride + i1]; + dst2[j * w + i] = _src2[j0 * _s2ystride + i0] + + _src2[j0 * _s2ystride + i1] + _src2[j1 * _s2ystride + i0] + + _src2[j1 * _s2ystride + i1]; + } + } +} + +static void fs_apply_luminance(fs_ctx *_ctx, int _l) { + unsigned *col_sums_x; + unsigned *col_sums_y; + uint16_t *im1; + uint16_t *im2; + double *ssim; + double c1; + int w; + int h; + int j0offs; + int j1offs; + int i; + int j; + w = _ctx->level[_l].w; + h = _ctx->level[_l].h; + col_sums_x = _ctx->col_buf; + col_sums_y = col_sums_x + w; + im1 = _ctx->level[_l].im1; + im2 = _ctx->level[_l].im2; + for (i = 0; i < w; i++) + col_sums_x[i] = 5 * im1[i]; + for (i = 0; i < w; i++) + col_sums_y[i] = 5 * im2[i]; + for (j = 1; j < 4; j++) { + j1offs = FS_MINI(j, h - 1) * w; + for (i = 0; i < w; i++) + col_sums_x[i] += im1[j1offs + i]; + for (i = 0; i < w; i++) + col_sums_y[i] += im2[j1offs + i]; + } + ssim = _ctx->level[_l].ssim; + c1 = (double) (SSIM_C1 * 4096 * (1 << 4 * _l)); + for (j = 0; j < h; j++) { + unsigned mux; + unsigned muy; + int i0; + int i1; + mux = 5 * col_sums_x[0]; + muy = 5 * col_sums_y[0]; + for (i = 1; i < 4; i++) { + i1 = FS_MINI(i, w - 1); + mux += col_sums_x[i1]; + muy += col_sums_y[i1]; + } + for (i = 0; i < w; i++) { + ssim[j * w + i] *= (2 * mux * (double) muy + c1) + / (mux * (double) mux + muy * (double) muy + c1); + if (i + 1 < w) { + i0 = FS_MAXI(0, i - 4); + i1 = FS_MINI(i + 4, w - 1); + mux += col_sums_x[i1] - col_sums_x[i0]; + muy += col_sums_x[i1] - col_sums_x[i0]; + } + } + if (j + 1 < h) { + j0offs = FS_MAXI(0, j - 4) * w; + for (i = 0; i < w; i++) + col_sums_x[i] -= im1[j0offs + i]; + for (i = 0; i < w; i++) + col_sums_y[i] -= im2[j0offs + i]; + j1offs = FS_MINI(j + 4, h - 1) * w; + for (i = 0; i < w; i++) + col_sums_x[i] += im1[j1offs + i]; + for (i = 0; i < w; i++) + col_sums_y[i] += im2[j1offs + i]; + } + } +} + +#define FS_COL_SET(_col, _joffs, _ioffs) \ + do { \ + unsigned gx; \ + unsigned gy; \ + gx = gx_buf[((j + (_joffs)) & 7) * stride + i + (_ioffs)]; \ + gy = gy_buf[((j + (_joffs)) & 7) * stride + i + (_ioffs)]; \ + col_sums_gx2[(_col)] = gx * (double)gx; \ + col_sums_gy2[(_col)] = gy * (double)gy; \ + col_sums_gxgy[(_col)] = gx * (double)gy; \ + } \ + while (0) + +#define FS_COL_ADD(_col, _joffs, _ioffs) \ + do { \ + unsigned gx; \ + unsigned gy; \ + gx = gx_buf[((j + (_joffs)) & 7) * stride + i + (_ioffs)]; \ + gy = gy_buf[((j + (_joffs)) & 7) * stride + i + (_ioffs)]; \ + col_sums_gx2[(_col)] += gx * (double)gx; \ + col_sums_gy2[(_col)] += gy * (double)gy; \ + col_sums_gxgy[(_col)] += gx * (double)gy; \ + } \ + while (0) + +#define FS_COL_SUB(_col, _joffs, _ioffs) \ + do { \ + unsigned gx; \ + unsigned gy; \ + gx = gx_buf[((j + (_joffs)) & 7) * stride + i + (_ioffs)]; \ + gy = gy_buf[((j + (_joffs)) & 7) * stride + i + (_ioffs)]; \ + col_sums_gx2[(_col)] -= gx * (double)gx; \ + col_sums_gy2[(_col)] -= gy * (double)gy; \ + col_sums_gxgy[(_col)] -= gx * (double)gy; \ + } \ + while (0) + +#define FS_COL_COPY(_col1, _col2) \ + do { \ + col_sums_gx2[(_col1)] = col_sums_gx2[(_col2)]; \ + col_sums_gy2[(_col1)] = col_sums_gy2[(_col2)]; \ + col_sums_gxgy[(_col1)] = col_sums_gxgy[(_col2)]; \ + } \ + while (0) + +#define FS_COL_HALVE(_col1, _col2) \ + do { \ + col_sums_gx2[(_col1)] = col_sums_gx2[(_col2)] * 0.5; \ + col_sums_gy2[(_col1)] = col_sums_gy2[(_col2)] * 0.5; \ + col_sums_gxgy[(_col1)] = col_sums_gxgy[(_col2)] * 0.5; \ + } \ + while (0) + +#define FS_COL_DOUBLE(_col1, _col2) \ + do { \ + col_sums_gx2[(_col1)] = col_sums_gx2[(_col2)] * 2; \ + col_sums_gy2[(_col1)] = col_sums_gy2[(_col2)] * 2; \ + col_sums_gxgy[(_col1)] = col_sums_gxgy[(_col2)] * 2; \ + } \ + while (0) + +static void fs_calc_structure(fs_ctx *_ctx, int _l) { + uint16_t *im1; + uint16_t *im2; + unsigned *gx_buf; + unsigned *gy_buf; + double *ssim; + double col_sums_gx2[8]; + double col_sums_gy2[8]; + double col_sums_gxgy[8]; + double c2; + int stride; + int w; + int h; + int i; + int j; + w = _ctx->level[_l].w; + h = _ctx->level[_l].h; + im1 = _ctx->level[_l].im1; + im2 = _ctx->level[_l].im2; + ssim = _ctx->level[_l].ssim; + gx_buf = _ctx->col_buf; + stride = w + 8; + gy_buf = gx_buf + 8 * stride; + memset(gx_buf, 0, 2 * 8 * stride * sizeof(*gx_buf)); + c2 = SSIM_C2 * (1 << 4 * _l) * 16 * 104; + for (j = 0; j < h + 4; j++) { + if (j < h - 1) { + for (i = 0; i < w - 1; i++) { + unsigned g1; + unsigned g2; + unsigned gx; + unsigned gy; + g1 = abs(im1[(j + 1) * w + i + 1] - im1[j * w + i]); + g2 = abs(im1[(j + 1) * w + i] - im1[j * w + i + 1]); + gx = 4 * FS_MAXI(g1, g2) + FS_MINI(g1, g2); + g1 = abs(im2[(j + 1) * w + i + 1] - im2[j * w + i]); + g2 = abs(im2[(j + 1) * w + i] - im2[j * w + i + 1]); + gy = 4 * FS_MAXI(g1, g2) + FS_MINI(g1, g2); + gx_buf[(j & 7) * stride + i + 4] = gx; + gy_buf[(j & 7) * stride + i + 4] = gy; + } + } else { + memset(gx_buf + (j & 7) * stride, 0, stride * sizeof(*gx_buf)); + memset(gy_buf + (j & 7) * stride, 0, stride * sizeof(*gy_buf)); + } + if (j >= 4) { + int k; + col_sums_gx2[3] = col_sums_gx2[2] = col_sums_gx2[1] = col_sums_gx2[0] = 0; + col_sums_gy2[3] = col_sums_gy2[2] = col_sums_gy2[1] = col_sums_gy2[0] = 0; + col_sums_gxgy[3] = col_sums_gxgy[2] = col_sums_gxgy[1] = + col_sums_gxgy[0] = 0; + for (i = 4; i < 8; i++) { + FS_COL_SET(i, -1, 0); + FS_COL_ADD(i, 0, 0); + for (k = 1; k < 8 - i; k++) { + FS_COL_DOUBLE(i, i); + FS_COL_ADD(i, -k - 1, 0); + FS_COL_ADD(i, k, 0); + } + } + for (i = 0; i < w; i++) { + double mugx2; + double mugy2; + double mugxgy; + mugx2 = col_sums_gx2[0]; + for (k = 1; k < 8; k++) + mugx2 += col_sums_gx2[k]; + mugy2 = col_sums_gy2[0]; + for (k = 1; k < 8; k++) + mugy2 += col_sums_gy2[k]; + mugxgy = col_sums_gxgy[0]; + for (k = 1; k < 8; k++) + mugxgy += col_sums_gxgy[k]; + ssim[(j - 4) * w + i] = (2 * mugxgy + c2) / (mugx2 + mugy2 + c2); + if (i + 1 < w) { + FS_COL_SET(0, -1, 1); + FS_COL_ADD(0, 0, 1); + FS_COL_SUB(2, -3, 2); + FS_COL_SUB(2, 2, 2); + FS_COL_HALVE(1, 2); + FS_COL_SUB(3, -4, 3); + FS_COL_SUB(3, 3, 3); + FS_COL_HALVE(2, 3); + FS_COL_COPY(3, 4); + FS_COL_DOUBLE(4, 5); + FS_COL_ADD(4, -4, 5); + FS_COL_ADD(4, 3, 5); + FS_COL_DOUBLE(5, 6); + FS_COL_ADD(5, -3, 6); + FS_COL_ADD(5, 2, 6); + FS_COL_DOUBLE(6, 7); + FS_COL_ADD(6, -2, 7); + FS_COL_ADD(6, 1, 7); + FS_COL_SET(7, -1, 8); + FS_COL_ADD(7, 0, 8); + } + } + } + } +} + +#define FS_NLEVELS (4) + +/*These weights were derived from the default weights found in Wang's original + Matlab implementation: {0.0448, 0.2856, 0.2363, 0.1333}. + We drop the finest scale and renormalize the rest to sum to 1.*/ + +static const double FS_WEIGHTS[FS_NLEVELS] = {0.2989654541015625, + 0.3141326904296875, 0.2473602294921875, 0.1395416259765625}; + +static double fs_average(fs_ctx *_ctx, int _l) { + double *ssim; + double ret; + int w; + int h; + int i; + int j; + w = _ctx->level[_l].w; + h = _ctx->level[_l].h; + ssim = _ctx->level[_l].ssim; + ret = 0; + for (j = 0; j < h; j++) + for (i = 0; i < w; i++) + ret += ssim[j * w + i]; + return pow(ret / (w * h), FS_WEIGHTS[_l]); +} + +static double calc_ssim(const unsigned char *_src, int _systride, + const unsigned char *_dst, int _dystride, int _w, int _h) { + fs_ctx ctx; + double ret; + int l; + ret = 1; + fs_ctx_init(&ctx, _w, _h, FS_NLEVELS); + fs_downsample_level0(&ctx, _src, _systride, _dst, _dystride, _w, _h); + for (l = 0; l < FS_NLEVELS - 1; l++) { + fs_calc_structure(&ctx, l); + ret *= fs_average(&ctx, l); + fs_downsample_level(&ctx, l + 1); + } + fs_calc_structure(&ctx, l); + fs_apply_luminance(&ctx, l); + ret *= fs_average(&ctx, l); + fs_ctx_clear(&ctx); + return ret; +} + +static double convert_ssim_db(double _ssim, double _weight) { + return 10 * (log10(_weight) - log10(_weight - _ssim)); +} + +double vp9_calc_fastssim(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, + double *ssim_y, double *ssim_u, double *ssim_v) { + double ssimv; + vp9_clear_system_state(); + + *ssim_y = calc_ssim(source->y_buffer, source->y_stride, dest->y_buffer, + dest->y_stride, source->y_crop_width, + source->y_crop_height); + + *ssim_u = calc_ssim(source->u_buffer, source->uv_stride, dest->u_buffer, + dest->uv_stride, source->uv_crop_width, + source->uv_crop_height); + + *ssim_v = calc_ssim(source->v_buffer, source->uv_stride, dest->v_buffer, + dest->uv_stride, source->uv_crop_width, + source->uv_crop_height); + ssimv = (*ssim_y) * .8 + .1 * ((*ssim_u) + (*ssim_v)); + + return convert_ssim_db(ssimv, 1.0); +} diff --git a/media/libvpx/vp9/encoder/vp9_firstpass.c b/media/libvpx/vp9/encoder/vp9_firstpass.c index 3f28e0ff346..856a6655c7a 100644 --- a/media/libvpx/vp9/encoder/vp9_firstpass.c +++ b/media/libvpx/vp9/encoder/vp9_firstpass.c @@ -12,9 +12,11 @@ #include #include +#include "./vpx_dsp_rtcd.h" #include "./vpx_scale_rtcd.h" #include "vpx_mem/vpx_mem.h" +#include "vpx_ports/mem.h" #include "vpx_scale/vpx_scale.h" #include "vpx_scale/yv12config.h" @@ -38,7 +40,7 @@ #define OUTPUT_FPF 0 #define ARF_STATS_OUTPUT 0 -#define GROUP_ADAPTIVE_MAXQ 0 +#define GROUP_ADAPTIVE_MAXQ 1 #define BOOST_BREAKOUT 12.5 #define BOOST_FACTOR 12.5 @@ -51,7 +53,6 @@ #define KF_MAX_BOOST 128.0 #define MIN_ARF_GF_BOOST 240 #define MIN_DECAY_FACTOR 0.01 -#define MIN_GF_INTERVAL 4 #define MIN_KF_BOOST 300 #define NEW_MV_MODE_PENALTY 32 #define SVC_FACTOR_PT_LOW 0.45 @@ -61,12 +62,9 @@ #define RC_FACTOR_MAX 1.75 -#define INTRA_WEIGHT_EXPERIMENT 0 -#if INTRA_WEIGHT_EXPERIMENT #define NCOUNT_INTRA_THRESH 8192 #define NCOUNT_INTRA_FACTOR 3 #define NCOUNT_FRAME_II_THRESH 5.0 -#endif #define DOUBLE_DIVIDE_CHECK(x) ((x) < 0 ? (x) - 0.000001 : (x) + 0.000001) @@ -270,13 +268,13 @@ void vp9_end_first_pass(VP9_COMP *cpi) { static vp9_variance_fn_t get_block_variance_fn(BLOCK_SIZE bsize) { switch (bsize) { case BLOCK_8X8: - return vp9_mse8x8; + return vpx_mse8x8; case BLOCK_16X8: - return vp9_mse16x8; + return vpx_mse16x8; case BLOCK_8X16: - return vp9_mse8x16; + return vpx_mse8x16; default: - return vp9_mse16x16; + return vpx_mse16x16; } } @@ -296,37 +294,37 @@ static vp9_variance_fn_t highbd_get_block_variance_fn(BLOCK_SIZE bsize, default: switch (bsize) { case BLOCK_8X8: - return vp9_highbd_mse8x8; + return vpx_highbd_8_mse8x8; case BLOCK_16X8: - return vp9_highbd_mse16x8; + return vpx_highbd_8_mse16x8; case BLOCK_8X16: - return vp9_highbd_mse8x16; + return vpx_highbd_8_mse8x16; default: - return vp9_highbd_mse16x16; + return vpx_highbd_8_mse16x16; } break; case 10: switch (bsize) { case BLOCK_8X8: - return vp9_highbd_10_mse8x8; + return vpx_highbd_10_mse8x8; case BLOCK_16X8: - return vp9_highbd_10_mse16x8; + return vpx_highbd_10_mse16x8; case BLOCK_8X16: - return vp9_highbd_10_mse8x16; + return vpx_highbd_10_mse8x16; default: - return vp9_highbd_10_mse16x16; + return vpx_highbd_10_mse16x16; } break; case 12: switch (bsize) { case BLOCK_8X8: - return vp9_highbd_12_mse8x8; + return vpx_highbd_12_mse8x8; case BLOCK_16X8: - return vp9_highbd_12_mse16x8; + return vpx_highbd_12_mse16x8; case BLOCK_8X16: - return vp9_highbd_12_mse8x16; + return vpx_highbd_12_mse8x16; default: - return vp9_highbd_12_mse16x16; + return vpx_highbd_12_mse16x16; } break; } @@ -361,7 +359,7 @@ static void first_pass_motion_search(VP9_COMP *cpi, MACROBLOCK *x, MV tmp_mv = {0, 0}; MV ref_mv_full = {ref_mv->row >> 3, ref_mv->col >> 3}; int num00, tmp_err, n; - const BLOCK_SIZE bsize = xd->mi[0].src_mi->mbmi.sb_type; + const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type; vp9_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[bsize]; const int new_mv_mode_penalty = NEW_MV_MODE_PENALTY; @@ -570,8 +568,8 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) { vp9_setup_pre_planes(xd, 0, first_ref_buf, 0, 0, NULL); } - xd->mi = cm->mi; - xd->mi[0].src_mi = &xd->mi[0]; + xd->mi = cm->mi_grid_visible; + xd->mi[0] = cm->mi; vp9_frame_init_quantizer(cpi); @@ -624,8 +622,8 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) { xd->plane[1].dst.buf = new_yv12->u_buffer + recon_uvoffset; xd->plane[2].dst.buf = new_yv12->v_buffer + recon_uvoffset; xd->left_available = (mb_col != 0); - xd->mi[0].src_mi->mbmi.sb_type = bsize; - xd->mi[0].src_mi->mbmi.ref_frame[0] = INTRA_FRAME; + xd->mi[0]->mbmi.sb_type = bsize; + xd->mi[0]->mbmi.ref_frame[0] = INTRA_FRAME; set_mi_row_col(xd, &tile, mb_row << 1, num_8x8_blocks_high_lookup[bsize], mb_col << 1, num_8x8_blocks_wide_lookup[bsize], @@ -633,11 +631,11 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) { // Do intra 16x16 prediction. x->skip_encode = 0; - xd->mi[0].src_mi->mbmi.mode = DC_PRED; - xd->mi[0].src_mi->mbmi.tx_size = use_dc_pred ? + xd->mi[0]->mbmi.mode = DC_PRED; + xd->mi[0]->mbmi.tx_size = use_dc_pred ? (bsize >= BLOCK_16X16 ? TX_16X16 : TX_8X8) : TX_4X4; vp9_encode_intra_block_plane(x, bsize, 0); - this_error = vp9_get_mb_ss(x->plane[0].src_diff); + this_error = vpx_get_mb_ss(x->plane[0].src_diff); #if CONFIG_VP9_HIGHBITDEPTH if (cm->use_highbitdepth) { switch (cm->bit_depth) { @@ -832,7 +830,6 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) { // Keep a count of cases where the inter and intra were very close // and very low. This helps with scene cut detection for example in // cropped clips with black bars at the sides or top and bottom. -#if INTRA_WEIGHT_EXPERIMENT if (((this_error - intrapenalty) * 9 <= motion_error * 10) && (this_error < (2 * intrapenalty))) { neutral_count += 1.0; @@ -843,21 +840,15 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) { neutral_count += (double)motion_error / DOUBLE_DIVIDE_CHECK((double)this_error); } -#else - if (((this_error - intrapenalty) * 9 <= motion_error * 10) && - (this_error < (2 * intrapenalty))) { - neutral_count += 1.0; - } -#endif mv.row *= 8; mv.col *= 8; this_error = motion_error; - xd->mi[0].src_mi->mbmi.mode = NEWMV; - xd->mi[0].src_mi->mbmi.mv[0].as_mv = mv; - xd->mi[0].src_mi->mbmi.tx_size = TX_4X4; - xd->mi[0].src_mi->mbmi.ref_frame[0] = LAST_FRAME; - xd->mi[0].src_mi->mbmi.ref_frame[1] = NONE; + xd->mi[0]->mbmi.mode = NEWMV; + xd->mi[0]->mbmi.mv[0].as_mv = mv; + xd->mi[0]->mbmi.tx_size = TX_4X4; + xd->mi[0]->mbmi.ref_frame[0] = LAST_FRAME; + xd->mi[0]->mbmi.ref_frame[1] = NONE; vp9_build_inter_predictors_sby(xd, mb_row << 1, mb_col << 1, bsize); vp9_encode_sby_pass1(x, bsize); sum_mvr += mv.row; @@ -1257,8 +1248,9 @@ void vp9_init_second_pass(VP9_COMP *cpi) { twopass->modified_error_left = modified_error_total; } - // Reset the vbr bits off target counter + // Reset the vbr bits off target counters cpi->rc.vbr_bits_off_target = 0; + cpi->rc.vbr_bits_off_target_fast = 0; cpi->rc.rate_error_estimate = 0; @@ -1291,11 +1283,10 @@ static double get_sr_decay_rate(const VP9_COMP *cpi, frame->pcnt_motion * ((frame->mvc_abs + frame->mvr_abs) / 2); modified_pct_inter = frame->pcnt_inter; -#if INTRA_WEIGHT_EXPERIMENT if ((frame->intra_error / DOUBLE_DIVIDE_CHECK(frame->coded_error)) < - (double)NCOUNT_FRAME_II_THRESH) + (double)NCOUNT_FRAME_II_THRESH) { modified_pct_inter = frame->pcnt_inter - frame->pcnt_neutral; -#endif + } modified_pcnt_intra = 100 * (1.0 - modified_pct_inter); @@ -1334,14 +1325,17 @@ static double get_prediction_decay_rate(const VP9_COMP *cpi, // Function to test for a condition where a complex transition is followed // by a static section. For example in slide shows where there is a fade // between slides. This is to help with more optimal kf and gf positioning. -static int detect_transition_to_still(const TWO_PASS *twopass, +static int detect_transition_to_still(VP9_COMP *cpi, int frame_interval, int still_interval, double loop_decay_rate, double last_decay_rate) { + TWO_PASS *const twopass = &cpi->twopass; + RATE_CONTROL *const rc = &cpi->rc; + // Break clause to detect very still sections after motion // For example a static image after a fade or other transition // instead of a clean scene cut. - if (frame_interval > MIN_GF_INTERVAL && + if (frame_interval > rc->min_gf_interval && loop_decay_rate >= 0.999 && last_decay_rate < 0.9) { int j; @@ -1704,7 +1698,7 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits, mid_frame_idx = frame_index + (rc->baseline_gf_interval >> 1) - 1; // Allocate bits to the other frames in the group. - for (i = 0; i < rc->baseline_gf_interval - 1; ++i) { + for (i = 0; i < rc->baseline_gf_interval - rc->source_alt_ref_pending; ++i) { int arf_idx = 0; if (EOF == input_stats(twopass, &frame_stats)) break; @@ -1849,7 +1843,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { int int_lbq = (int)(vp9_convert_qindex_to_q(rc->last_boosted_qindex, cpi->common.bit_depth)); - active_min_gf_interval = MIN_GF_INTERVAL + MIN(2, int_max_q / 200); + active_min_gf_interval = rc->min_gf_interval + MIN(2, int_max_q / 200); if (active_min_gf_interval > rc->max_gf_interval) active_min_gf_interval = rc->max_gf_interval; @@ -1863,6 +1857,8 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { active_max_gf_interval = 12 + MIN(4, (int_lbq / 6)); if (active_max_gf_interval > rc->max_gf_interval) active_max_gf_interval = rc->max_gf_interval; + if (active_max_gf_interval < active_min_gf_interval) + active_max_gf_interval = active_min_gf_interval; } } @@ -1903,7 +1899,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Break clause to detect very still sections after motion. For example, // a static image after a fade or other transition. - if (detect_transition_to_still(twopass, i, 5, loop_decay_rate, + if (detect_transition_to_still(cpi, i, 5, loop_decay_rate, last_loop_decay_rate)) { allow_alt_ref = 0; break; @@ -1940,8 +1936,26 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Was the group length constrained by the requirement for a new KF? rc->constrained_gf_group = (i >= rc->frames_to_key) ? 1 : 0; + // Should we use the alternate reference frame. + if (allow_alt_ref && + (i < cpi->oxcf.lag_in_frames) && + (i >= rc->min_gf_interval)) { + // Calculate the boost for alt ref. + rc->gfu_boost = calc_arf_boost(cpi, 0, (i - 1), (i - 1), &f_boost, + &b_boost); + rc->source_alt_ref_pending = 1; + + // Test to see if multi arf is appropriate. + cpi->multi_arf_enabled = + (cpi->multi_arf_allowed && (rc->baseline_gf_interval >= 6) && + (zero_motion_accumulator < 0.995)) ? 1 : 0; + } else { + rc->gfu_boost = MAX((int)boost_score, MIN_ARF_GF_BOOST); + rc->source_alt_ref_pending = 0; + } + // Set the interval until the next gf. - if (is_key_frame || rc->source_alt_ref_active) + if (is_key_frame || rc->source_alt_ref_pending) rc->baseline_gf_interval = i - 1; else rc->baseline_gf_interval = i; @@ -1966,24 +1980,6 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { rc->frames_till_gf_update_due = rc->baseline_gf_interval; - // Should we use the alternate reference frame. - if (allow_alt_ref && - (i < cpi->oxcf.lag_in_frames) && - (i >= MIN_GF_INTERVAL)) { - // Calculate the boost for alt ref. - rc->gfu_boost = calc_arf_boost(cpi, 0, (i - 1), (i - 1), &f_boost, - &b_boost); - rc->source_alt_ref_pending = 1; - - // Test to see if multi arf is appropriate. - cpi->multi_arf_enabled = - (cpi->multi_arf_allowed && (rc->baseline_gf_interval >= 6) && - (zero_motion_accumulator < 0.995)) ? 1 : 0; - } else { - rc->gfu_boost = MAX((int)boost_score, MIN_ARF_GF_BOOST); - rc->source_alt_ref_pending = 0; - } - // Reset the file position. reset_fpf_position(twopass, start_pos); @@ -2058,29 +2054,61 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { } } -// TODO(PGW) Re-examine the use of II ration in this code in the light of# -// changes elsewhere +// Threshold for use of the lagging second reference frame. High second ref +// usage may point to a transient event like a flash or occlusion rather than +// a real scene cut. +#define SECOND_REF_USEAGE_THRESH 0.1 +// Minimum % intra coding observed in first pass (1.0 = 100%) +#define MIN_INTRA_LEVEL 0.25 +// Minimum ratio between the % of intra coding and inter coding in the first +// pass after discounting neutral blocks (discounting neutral blocks in this +// way helps catch scene cuts in clips with very flat areas or letter box +// format clips with image padding. +#define INTRA_VS_INTER_THRESH 2.0 +// Hard threshold where the first pass chooses intra for almost all blocks. +// In such a case even if the frame is not a scene cut coding a key frame +// may be a good option. +#define VERY_LOW_INTER_THRESH 0.05 +// Maximum threshold for the relative ratio of intra error score vs best +// inter error score. +#define KF_II_ERR_THRESHOLD 2.5 +// In real scene cuts there is almost always a sharp change in the intra +// or inter error score. +#define ERR_CHANGE_THRESHOLD 0.4 +// For real scene cuts we expect an improvment in the intra inter error +// ratio in the next frame. +#define II_IMPROVEMENT_THRESHOLD 3.5 #define KF_II_MAX 128.0 + static int test_candidate_kf(TWO_PASS *twopass, const FIRSTPASS_STATS *last_frame, const FIRSTPASS_STATS *this_frame, const FIRSTPASS_STATS *next_frame) { int is_viable_kf = 0; + double pcnt_intra = 1.0 - this_frame->pcnt_inter; + double modified_pcnt_inter = + this_frame->pcnt_inter - this_frame->pcnt_neutral; // Does the frame satisfy the primary criteria of a key frame? + // See above for an explanation of the test criteria. // If so, then examine how well it predicts subsequent frames. - if ((this_frame->pcnt_second_ref < 0.10) && - (next_frame->pcnt_second_ref < 0.10) && - ((this_frame->pcnt_inter < 0.05) || - (((this_frame->pcnt_inter - this_frame->pcnt_neutral) < 0.35) && + if ((this_frame->pcnt_second_ref < SECOND_REF_USEAGE_THRESH) && + (next_frame->pcnt_second_ref < SECOND_REF_USEAGE_THRESH) && + ((this_frame->pcnt_inter < VERY_LOW_INTER_THRESH) || + ((pcnt_intra > MIN_INTRA_LEVEL) && + (pcnt_intra > (INTRA_VS_INTER_THRESH * modified_pcnt_inter)) && ((this_frame->intra_error / - DOUBLE_DIVIDE_CHECK(this_frame->coded_error)) < 2.5) && + DOUBLE_DIVIDE_CHECK(this_frame->coded_error)) < + KF_II_ERR_THRESHOLD) && ((fabs(last_frame->coded_error - this_frame->coded_error) / - DOUBLE_DIVIDE_CHECK(this_frame->coded_error) > 0.40) || + DOUBLE_DIVIDE_CHECK(this_frame->coded_error) > + ERR_CHANGE_THRESHOLD) || (fabs(last_frame->intra_error - this_frame->intra_error) / - DOUBLE_DIVIDE_CHECK(this_frame->intra_error) > 0.40) || + DOUBLE_DIVIDE_CHECK(this_frame->intra_error) > + ERR_CHANGE_THRESHOLD) || ((next_frame->intra_error / - DOUBLE_DIVIDE_CHECK(next_frame->coded_error)) > 3.5))))) { + DOUBLE_DIVIDE_CHECK(next_frame->coded_error)) > + II_IMPROVEMENT_THRESHOLD))))) { int i; const FIRSTPASS_STATS *start_pos = twopass->stats_in; FIRSTPASS_STATS local_next_frame = *next_frame; @@ -2216,7 +2244,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Special check for transition or high motion followed by a // static scene. - if (detect_transition_to_still(twopass, i, cpi->oxcf.key_freq - i, + if (detect_transition_to_still(cpi, i, cpi->oxcf.key_freq - i, loop_decay_rate, decay_accumulator)) break; @@ -2388,7 +2416,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { } // Define the reference buffers that will be updated post encode. -void configure_buffer_updates(VP9_COMP *cpi) { +static void configure_buffer_updates(VP9_COMP *cpi) { TWO_PASS *const twopass = &cpi->twopass; cpi->rc.is_src_frame_alt_ref = 0; @@ -2435,7 +2463,7 @@ void configure_buffer_updates(VP9_COMP *cpi) { } } -int is_skippable_frame(const VP9_COMP *cpi) { +static int is_skippable_frame(const VP9_COMP *cpi) { // If the current frame does not have non-zero motion vector detected in the // first pass, and so do its previous and forward frames, then this frame // can be skipped for partition check, and the partition size is assigned @@ -2555,9 +2583,8 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { cpi->ref_frame_flags &= (~VP9_LAST_FLAG & ~VP9_GOLD_FLAG & ~VP9_ALT_FLAG); lc->frames_from_key_frame = 0; - // Reset the empty frame resolution since we have a key frame. - cpi->svc.empty_frame_width = cm->width; - cpi->svc.empty_frame_height = cm->height; + // Encode an intra only empty frame since we have a key frame. + cpi->svc.encode_intra_empty_frame = 1; } } else { cm->frame_type = INTER_FRAME; @@ -2623,6 +2650,8 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { } #define MINQ_ADJ_LIMIT 48 +#define MINQ_ADJ_LIMIT_CQ 20 +#define HIGH_UNDERSHOOT_RATIO 2 void vp9_twopass_postencode_update(VP9_COMP *cpi) { TWO_PASS *const twopass = &cpi->twopass; RATE_CONTROL *const rc = &cpi->rc; @@ -2662,7 +2691,7 @@ void vp9_twopass_postencode_update(VP9_COMP *cpi) { const int maxq_adj_limit = rc->worst_quality - twopass->active_worst_quality; const int minq_adj_limit = - (cpi->oxcf.rc_mode == VPX_CQ) ? 0 : MINQ_ADJ_LIMIT; + (cpi->oxcf.rc_mode == VPX_CQ ? MINQ_ADJ_LIMIT_CQ : MINQ_ADJ_LIMIT); // Undershoot. if (rc->rate_error_estimate > cpi->oxcf.under_shoot_pct) { @@ -2689,5 +2718,32 @@ void vp9_twopass_postencode_update(VP9_COMP *cpi) { twopass->extend_minq = clamp(twopass->extend_minq, 0, minq_adj_limit); twopass->extend_maxq = clamp(twopass->extend_maxq, 0, maxq_adj_limit); + + // If there is a big and undexpected undershoot then feed the extra + // bits back in quickly. One situation where this may happen is if a + // frame is unexpectedly almost perfectly predicted by the ARF or GF + // but not very well predcited by the previous frame. + if (!frame_is_kf_gf_arf(cpi) && !cpi->rc.is_src_frame_alt_ref) { + int fast_extra_thresh = rc->base_frame_target / HIGH_UNDERSHOOT_RATIO; + if (rc->projected_frame_size < fast_extra_thresh) { + rc->vbr_bits_off_target_fast += + fast_extra_thresh - rc->projected_frame_size; + rc->vbr_bits_off_target_fast = + MIN(rc->vbr_bits_off_target_fast, (4 * rc->avg_frame_bandwidth)); + + // Fast adaptation of minQ if necessary to use up the extra bits. + if (rc->avg_frame_bandwidth) { + twopass->extend_minq_fast = + (int)(rc->vbr_bits_off_target_fast * 8 / rc->avg_frame_bandwidth); + } + twopass->extend_minq_fast = MIN(twopass->extend_minq_fast, + minq_adj_limit - twopass->extend_minq); + } else if (rc->vbr_bits_off_target_fast) { + twopass->extend_minq_fast = MIN(twopass->extend_minq_fast, + minq_adj_limit - twopass->extend_minq); + } else { + twopass->extend_minq_fast = 0; + } + } } } diff --git a/media/libvpx/vp9/encoder/vp9_firstpass.h b/media/libvpx/vp9/encoder/vp9_firstpass.h index 08e7a8bf114..4a0385506db 100644 --- a/media/libvpx/vp9/encoder/vp9_firstpass.h +++ b/media/libvpx/vp9/encoder/vp9_firstpass.h @@ -122,6 +122,7 @@ typedef struct { int baseline_active_worst_quality; int extend_minq; int extend_maxq; + int extend_minq_fast; GF_GROUP gf_group; } TWO_PASS; @@ -135,6 +136,7 @@ void vp9_end_first_pass(struct VP9_COMP *cpi); void vp9_init_second_pass(struct VP9_COMP *cpi); void vp9_rc_get_second_pass_params(struct VP9_COMP *cpi); +void vp9_twopass_postencode_update(struct VP9_COMP *cpi); // Post encode update of the rate control parameters for 2-pass void vp9_twopass_postencode_update(struct VP9_COMP *cpi); diff --git a/media/libvpx/vp9/encoder/vp9_mbgraph.c b/media/libvpx/vp9/encoder/vp9_mbgraph.c index b3a8df924fe..d5eeb9cc546 100644 --- a/media/libvpx/vp9/encoder/vp9_mbgraph.c +++ b/media/libvpx/vp9/encoder/vp9_mbgraph.c @@ -10,6 +10,9 @@ #include +#include "./vp9_rtcd.h" +#include "./vpx_dsp_rtcd.h" + #include "vpx_mem/vpx_mem.h" #include "vp9/encoder/vp9_segmentation.h" #include "vp9/encoder/vp9_mcomp.h" @@ -63,8 +66,8 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi, &distortion, &sse, NULL, 0, 0); } - xd->mi[0].src_mi->mbmi.mode = NEWMV; - xd->mi[0].src_mi->mbmi.mv[0].as_mv = *dst_mv; + xd->mi[0]->mbmi.mode = NEWMV; + xd->mi[0]->mbmi.mv[0].as_mv = *dst_mv; vp9_build_inter_predictors_sby(xd, mb_row, mb_col, BLOCK_16X16); @@ -74,8 +77,8 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi, x->mv_row_min = tmp_row_min; x->mv_row_max = tmp_row_max; - return vp9_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride, - xd->plane[0].dst.buf, xd->plane[0].dst.stride); + return vpx_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride, + xd->plane[0].dst.buf, xd->plane[0].dst.stride); } static int do_16x16_motion_search(VP9_COMP *cpi, const MV *ref_mv, @@ -87,7 +90,7 @@ static int do_16x16_motion_search(VP9_COMP *cpi, const MV *ref_mv, // Try zero MV first // FIXME should really use something like near/nearest MV and/or MV prediction - err = vp9_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride, + err = vpx_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].pre[0].buf, xd->plane[0].pre[0].stride); dst_mv->as_int = 0; @@ -123,7 +126,7 @@ static int do_16x16_zerozero_search(VP9_COMP *cpi, int_mv *dst_mv) { // Try zero MV first // FIXME should really use something like near/nearest MV and/or MV prediction - err = vp9_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride, + err = vpx_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].pre[0].buf, xd->plane[0].pre[0].stride); dst_mv->as_int = 0; @@ -141,12 +144,12 @@ static int find_best_16x16_intra(VP9_COMP *cpi, PREDICTION_MODE *pbest_mode) { for (mode = DC_PRED; mode <= TM_PRED; mode++) { unsigned int err; - xd->mi[0].src_mi->mbmi.mode = mode; + xd->mi[0]->mbmi.mode = mode; vp9_predict_intra_block(xd, 0, 2, TX_16X16, mode, x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].dst.buf, xd->plane[0].dst.stride, 0, 0, 0); - err = vp9_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride, + err = vpx_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].dst.buf, xd->plane[0].dst.stride); // find best @@ -247,7 +250,7 @@ static void update_mbgraph_frame_stats(VP9_COMP *cpi, xd->plane[0].dst.stride = buf->y_stride; xd->plane[0].pre[0].stride = buf->y_stride; xd->plane[1].dst.stride = buf->uv_stride; - xd->mi[0].src_mi = &mi_local; + xd->mi[0] = &mi_local; mi_local.mbmi.sb_type = BLOCK_16X16; mi_local.mbmi.ref_frame[0] = LAST_FRAME; mi_local.mbmi.ref_frame[1] = NONE; @@ -389,9 +392,8 @@ void vp9_update_mbgraph_stats(VP9_COMP *cpi) { cpi->mbgraph_n_frames = n_frames; for (i = 0; i < n_frames; i++) { MBGRAPH_FRAME_STATS *frame_stats = &cpi->mbgraph_stats[i]; - vpx_memset(frame_stats->mb_stats, 0, - cm->mb_rows * cm->mb_cols * - sizeof(*cpi->mbgraph_stats[i].mb_stats)); + memset(frame_stats->mb_stats, 0, + cm->mb_rows * cm->mb_cols * sizeof(*cpi->mbgraph_stats[i].mb_stats)); } // do motion search to find contribution of each reference to data diff --git a/media/libvpx/vp9/encoder/vp9_mcomp.c b/media/libvpx/vp9/encoder/vp9_mcomp.c index 9602eb56869..234272697c8 100644 --- a/media/libvpx/vp9/encoder/vp9_mcomp.c +++ b/media/libvpx/vp9/encoder/vp9_mcomp.c @@ -13,10 +13,13 @@ #include #include "./vpx_config.h" +#include "./vpx_dsp_rtcd.h" #include "vpx_mem/vpx_mem.h" +#include "vpx_ports/mem.h" #include "vp9/common/vp9_common.h" +#include "vp9/common/vp9_reconinter.h" #include "vp9/encoder/vp9_encoder.h" #include "vp9/encoder/vp9_mcomp.h" @@ -159,9 +162,9 @@ void vp9_init3smotion_compensation(search_site_config *cfg, int stride) { error_per_bit + 4096) >> 13 : 0) -// convert motion vector component to offset for svf calc +// convert motion vector component to offset for sv[a]f calc static INLINE int sp(int x) { - return (x & 7) << 1; + return x & 7; } static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) { @@ -301,14 +304,14 @@ static INLINE unsigned int setup_center_error(const MACROBLOCKD *xd, #if CONFIG_VP9_HIGHBITDEPTH if (second_pred != NULL) { if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - DECLARE_ALIGNED_ARRAY(16, uint16_t, comp_pred16, 64 * 64); - vp9_highbd_comp_avg_pred(comp_pred16, second_pred, w, h, y + offset, + DECLARE_ALIGNED(16, uint16_t, comp_pred16[64 * 64]); + vpx_highbd_comp_avg_pred(comp_pred16, second_pred, w, h, y + offset, y_stride); besterr = vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, src, src_stride, sse1); } else { - DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64); - vp9_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride); + DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]); + vpx_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride); besterr = vfp->vf(comp_pred, w, src, src_stride, sse1); } } else { @@ -319,8 +322,8 @@ static INLINE unsigned int setup_center_error(const MACROBLOCKD *xd, #else (void) xd; if (second_pred != NULL) { - DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64); - vp9_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride); + DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]); + vpx_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride); besterr = vfp->vf(comp_pred, w, src, src_stride, sse1); } else { besterr = vfp->vf(y + offset, y_stride, src, src_stride, sse1); @@ -676,16 +679,14 @@ int vp9_find_best_sub_pixel_tree(const MACROBLOCK *x, tc = bc + search_step[idx].col; if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) { const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3); - int row_offset = (tr & 0x07) << 1; - int col_offset = (tc & 0x07) << 1; MV this_mv; this_mv.row = tr; this_mv.col = tc; if (second_pred == NULL) - thismse = vfp->svf(pre_address, y_stride, col_offset, row_offset, + thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr), src_address, src_stride, &sse); else - thismse = vfp->svaf(pre_address, y_stride, col_offset, row_offset, + thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr), src_address, src_stride, &sse, second_pred); cost_array[idx] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); @@ -706,14 +707,12 @@ int vp9_find_best_sub_pixel_tree(const MACROBLOCK *x, tr = br + (cost_array[2] < cost_array[3] ? -hstep : hstep); if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) { const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3); - int row_offset = (tr & 0x07) << 1; - int col_offset = (tc & 0x07) << 1; MV this_mv = {tr, tc}; if (second_pred == NULL) - thismse = vfp->svf(pre_address, y_stride, col_offset, row_offset, + thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr), src_address, src_stride, &sse); else - thismse = vfp->svaf(pre_address, y_stride, col_offset, row_offset, + thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr), src_address, src_stride, &sse, second_pred); cost_array[4] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); @@ -1788,8 +1787,11 @@ static const MV search_pos[4] = { }; unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x, - BLOCK_SIZE bsize) { + BLOCK_SIZE bsize, + int mi_row, int mi_col) { MACROBLOCKD *xd = &x->e_mbd; + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; + struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0, 0}}; DECLARE_ALIGNED(16, int16_t, hbuf[128]); DECLARE_ALIGNED(16, int16_t, vbuf[128]); DECLARE_ALIGNED(16, int16_t, src_hbuf[64]); @@ -1802,16 +1804,38 @@ unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x, const int src_stride = x->plane[0].src.stride; const int ref_stride = xd->plane[0].pre[0].stride; uint8_t const *ref_buf, *src_buf; - MV *tmp_mv = &xd->mi[0].src_mi->mbmi.mv[0].as_mv; + MV *tmp_mv = &xd->mi[0]->mbmi.mv[0].as_mv; unsigned int best_sad, tmp_sad, this_sad[4]; MV this_mv; const int norm_factor = 3 + (bw >> 5); + const YV12_BUFFER_CONFIG *scaled_ref_frame = + vp9_get_scaled_ref_frame(cpi, mbmi->ref_frame[0]); + + if (scaled_ref_frame) { + int i; + // Swap out the reference frame for a version that's been scaled to + // match the resolution of the current frame, allowing the existing + // motion search code to be used without additional modifications. + for (i = 0; i < MAX_MB_PLANE; i++) + backup_yv12[i] = xd->plane[i].pre[0]; + vp9_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL); + } #if CONFIG_VP9_HIGHBITDEPTH - tmp_mv->row = 0; - tmp_mv->col = 0; - return cpi->fn_ptr[bsize].sdf(x->plane[0].src.buf, src_stride, - xd->plane[0].pre[0].buf, ref_stride); + { + unsigned int this_sad; + tmp_mv->row = 0; + tmp_mv->col = 0; + this_sad = cpi->fn_ptr[bsize].sdf(x->plane[0].src.buf, src_stride, + xd->plane[0].pre[0].buf, ref_stride); + + if (scaled_ref_frame) { + int i; + for (i = 0; i < MAX_MB_PLANE; i++) + xd->plane[i].pre[0] = backup_yv12[i]; + } + return this_sad; + } #endif // Set up prediction 1-D reference set @@ -1889,6 +1913,12 @@ unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x, tmp_mv->row *= 8; tmp_mv->col *= 8; + if (scaled_ref_frame) { + int i; + for (i = 0; i < MAX_MB_PLANE; i++) + xd->plane[i].pre[0] = backup_yv12[i]; + } + return best_sad; } @@ -2017,7 +2047,7 @@ int vp9_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv, if (fn_ptr->sdx3f != NULL) { while ((c + 2) < col_max) { int i; - unsigned int sads[3]; + DECLARE_ALIGNED(16, uint32_t, sads[3]); fn_ptr->sdx3f(what->buf, what->stride, check_here, in_what->stride, sads); @@ -2082,7 +2112,7 @@ int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv, if (fn_ptr->sdx8f != NULL) { while ((c + 7) < col_max) { int i; - unsigned int sads[8]; + DECLARE_ALIGNED(16, uint32_t, sads[8]); fn_ptr->sdx8f(what->buf, what->stride, check_here, in_what->stride, sads); @@ -2106,7 +2136,7 @@ int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv, if (fn_ptr->sdx3f != NULL) { while ((c + 2) < col_max) { int i; - unsigned int sads[3]; + DECLARE_ALIGNED(16, uint32_t, sads[3]); fn_ptr->sdx3f(what->buf, what->stride, check_here, in_what->stride, sads); diff --git a/media/libvpx/vp9/encoder/vp9_mcomp.h b/media/libvpx/vp9/encoder/vp9_mcomp.h index dd8a4607942..99c1afa28ff 100644 --- a/media/libvpx/vp9/encoder/vp9_mcomp.h +++ b/media/libvpx/vp9/encoder/vp9_mcomp.h @@ -83,7 +83,8 @@ int vp9_full_pixel_diamond(const struct VP9_COMP *cpi, MACROBLOCK *x, // Perform integral projection based motion estimation. unsigned int vp9_int_pro_motion_estimation(const struct VP9_COMP *cpi, MACROBLOCK *x, - BLOCK_SIZE bsize); + BLOCK_SIZE bsize, + int mi_row, int mi_col); typedef int (integer_mv_pattern_search_fn) ( const MACROBLOCK *x, diff --git a/media/libvpx/vp9/encoder/vp9_picklpf.c b/media/libvpx/vp9/encoder/vp9_picklpf.c index a95f0f46d5c..8e191038514 100644 --- a/media/libvpx/vp9/encoder/vp9_picklpf.c +++ b/media/libvpx/vp9/encoder/vp9_picklpf.c @@ -14,6 +14,7 @@ #include "./vpx_scale_rtcd.h" #include "vpx_mem/vpx_mem.h" +#include "vpx_ports/mem.h" #include "vp9/common/vp9_loopfilter.h" #include "vp9/common/vp9_onyxc_int.h" @@ -81,7 +82,7 @@ static int search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, int64_t ss_err[MAX_LOOP_FILTER + 1]; // Set each entry to -1 - vpx_memset(ss_err, 0xFF, sizeof(ss_err)); + memset(ss_err, 0xFF, sizeof(ss_err)); // Make a copy of the unfiltered / processed recon buffer vpx_yv12_copy_y(cm->frame_to_show, &cpi->last_frame_uf); diff --git a/media/libvpx/vp9/encoder/vp9_pickmode.c b/media/libvpx/vp9/encoder/vp9_pickmode.c index 6dfb9eedde1..2c78831832e 100644 --- a/media/libvpx/vp9/encoder/vp9_pickmode.c +++ b/media/libvpx/vp9/encoder/vp9_pickmode.c @@ -14,15 +14,20 @@ #include #include "./vp9_rtcd.h" +#include "./vpx_dsp_rtcd.h" #include "vpx_mem/vpx_mem.h" +#include "vpx_ports/mem.h" #include "vp9/common/vp9_blockd.h" #include "vp9/common/vp9_common.h" #include "vp9/common/vp9_mvref_common.h" +#include "vp9/common/vp9_pred_common.h" #include "vp9/common/vp9_reconinter.h" #include "vp9/common/vp9_reconintra.h" +#include "vp9/common/vp9_scan.h" +#include "vp9/encoder/vp9_cost.h" #include "vp9/encoder/vp9_encoder.h" #include "vp9/encoder/vp9_pickmode.h" #include "vp9/encoder/vp9_ratectrl.h" @@ -49,7 +54,7 @@ static int mv_refs_rt(const VP9_COMMON *cm, const MACROBLOCKD *xd, int const_motion = 0; // Blank the reference vector list - vpx_memset(mv_ref_list, 0, sizeof(*mv_ref_list) * MAX_MV_REF_CANDIDATES); + memset(mv_ref_list, 0, sizeof(*mv_ref_list) * MAX_MV_REF_CANDIDATES); // The nearest 2 blocks are treated differently // if the size < 8x8 we get the mv from the bmi substructure, @@ -58,7 +63,7 @@ static int mv_refs_rt(const VP9_COMMON *cm, const MACROBLOCKD *xd, const POSITION *const mv_ref = &mv_ref_search[i]; if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { const MODE_INFO *const candidate_mi = xd->mi[mv_ref->col + mv_ref->row * - xd->mi_stride].src_mi; + xd->mi_stride]; const MB_MODE_INFO *const candidate = &candidate_mi->mbmi; // Keep counts for entropy encoding. context_counter += mode_2_counter[candidate->mode]; @@ -79,7 +84,7 @@ static int mv_refs_rt(const VP9_COMMON *cm, const MACROBLOCKD *xd, const POSITION *const mv_ref = &mv_ref_search[i]; if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { const MB_MODE_INFO *const candidate = &xd->mi[mv_ref->col + mv_ref->row * - xd->mi_stride].src_mi->mbmi; + xd->mi_stride]->mbmi; different_ref_found = 1; if (candidate->ref_frame[0] == ref_frame) @@ -95,7 +100,7 @@ static int mv_refs_rt(const VP9_COMMON *cm, const MACROBLOCKD *xd, const POSITION *mv_ref = &mv_ref_search[i]; if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { const MB_MODE_INFO *const candidate = &xd->mi[mv_ref->col + mv_ref->row - * xd->mi_stride].src_mi->mbmi; + * xd->mi_stride]->mbmi; // If the candidate is INTRA we don't want to consider its mv. IF_DIFF_REF_FRAME_ADD_MV(candidate, ref_frame, ref_sign_bias, @@ -120,7 +125,7 @@ static int combined_motion_search(VP9_COMP *cpi, MACROBLOCK *x, int_mv *tmp_mv, int *rate_mv, int64_t best_rd_sofar) { MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi; + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0, 0}}; const int step_param = cpi->sf.mv.fullpel_search_step_param; const int sadpb = x->sadperbit16; @@ -188,6 +193,8 @@ static int combined_motion_search(VP9_COMP *cpi, MACROBLOCK *x, cond_cost_list(cpi, cost_list), x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, 0, 0); + *rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv, + x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); } if (scaled_ref_frame) { @@ -198,6 +205,248 @@ static int combined_motion_search(VP9_COMP *cpi, MACROBLOCK *x, return rv; } +static void block_variance(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, + int w, int h, unsigned int *sse, int *sum, + int block_size, unsigned int *sse8x8, + int *sum8x8, unsigned int *var8x8) { + int i, j, k = 0; + + *sse = 0; + *sum = 0; + + for (i = 0; i < h; i += block_size) { + for (j = 0; j < w; j += block_size) { + vpx_get8x8var(src + src_stride * i + j, src_stride, + ref + ref_stride * i + j, ref_stride, + &sse8x8[k], &sum8x8[k]); + *sse += sse8x8[k]; + *sum += sum8x8[k]; + var8x8[k] = sse8x8[k] - (((unsigned int)sum8x8[k] * sum8x8[k]) >> 6); + k++; + } + } +} + +static void calculate_variance(int bw, int bh, TX_SIZE tx_size, + unsigned int *sse_i, int *sum_i, + unsigned int *var_o, unsigned int *sse_o, + int *sum_o) { + const BLOCK_SIZE unit_size = txsize_to_bsize[tx_size]; + const int nw = 1 << (bw - b_width_log2_lookup[unit_size]); + const int nh = 1 << (bh - b_height_log2_lookup[unit_size]); + int i, j, k = 0; + + for (i = 0; i < nh; i += 2) { + for (j = 0; j < nw; j += 2) { + sse_o[k] = sse_i[i * nw + j] + sse_i[i * nw + j + 1] + + sse_i[(i + 1) * nw + j] + sse_i[(i + 1) * nw + j + 1]; + sum_o[k] = sum_i[i * nw + j] + sum_i[i * nw + j + 1] + + sum_i[(i + 1) * nw + j] + sum_i[(i + 1) * nw + j + 1]; + var_o[k] = sse_o[k] - (((unsigned int)sum_o[k] * sum_o[k]) >> + (b_width_log2_lookup[unit_size] + + b_height_log2_lookup[unit_size] + 6)); + k++; + } + } +} + +static void model_rd_for_sb_y_large(VP9_COMP *cpi, BLOCK_SIZE bsize, + MACROBLOCK *x, MACROBLOCKD *xd, + int *out_rate_sum, int64_t *out_dist_sum, + unsigned int *var_y, unsigned int *sse_y, + int mi_row, int mi_col, int *early_term) { + // Note our transform coeffs are 8 times an orthogonal transform. + // Hence quantizer step is also 8 times. To get effective quantizer + // we need to divide by 8 before sending to modeling function. + unsigned int sse; + int rate; + int64_t dist; + struct macroblock_plane *const p = &x->plane[0]; + struct macroblockd_plane *const pd = &xd->plane[0]; + const uint32_t dc_quant = pd->dequant[0]; + const uint32_t ac_quant = pd->dequant[1]; + const int64_t dc_thr = dc_quant * dc_quant >> 6; + const int64_t ac_thr = ac_quant * ac_quant >> 6; + unsigned int var; + int sum; + int skip_dc = 0; + + const int bw = b_width_log2_lookup[bsize]; + const int bh = b_height_log2_lookup[bsize]; + const int num8x8 = 1 << (bw + bh - 2); + unsigned int sse8x8[64] = {0}; + int sum8x8[64] = {0}; + unsigned int var8x8[64] = {0}; + TX_SIZE tx_size; + int i, k; + + // Calculate variance for whole partition, and also save 8x8 blocks' variance + // to be used in following transform skipping test. + block_variance(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride, + 4 << bw, 4 << bh, &sse, &sum, 8, sse8x8, sum8x8, var8x8); + var = sse - (((int64_t)sum * sum) >> (bw + bh + 4)); + + *var_y = var; + *sse_y = sse; + + if (cpi->common.tx_mode == TX_MODE_SELECT) { + if (sse > (var << 2)) + tx_size = MIN(max_txsize_lookup[bsize], + tx_mode_to_biggest_tx_size[cpi->common.tx_mode]); + else + tx_size = TX_8X8; + + if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && + cyclic_refresh_segment_id_boosted(xd->mi[0]->mbmi.segment_id)) + tx_size = TX_8X8; + else if (tx_size > TX_16X16) + tx_size = TX_16X16; + } else { + tx_size = MIN(max_txsize_lookup[bsize], + tx_mode_to_biggest_tx_size[cpi->common.tx_mode]); + } + + assert(tx_size >= TX_8X8); + xd->mi[0]->mbmi.tx_size = tx_size; + + // Evaluate if the partition block is a skippable block in Y plane. + { + unsigned int sse16x16[16] = {0}; + int sum16x16[16] = {0}; + unsigned int var16x16[16] = {0}; + const int num16x16 = num8x8 >> 2; + + unsigned int sse32x32[4] = {0}; + int sum32x32[4] = {0}; + unsigned int var32x32[4] = {0}; + const int num32x32 = num8x8 >> 4; + + int ac_test = 1; + int dc_test = 1; + const int num = (tx_size == TX_8X8) ? num8x8 : + ((tx_size == TX_16X16) ? num16x16 : num32x32); + const unsigned int *sse_tx = (tx_size == TX_8X8) ? sse8x8 : + ((tx_size == TX_16X16) ? sse16x16 : sse32x32); + const unsigned int *var_tx = (tx_size == TX_8X8) ? var8x8 : + ((tx_size == TX_16X16) ? var16x16 : var32x32); + + // Calculate variance if tx_size > TX_8X8 + if (tx_size >= TX_16X16) + calculate_variance(bw, bh, TX_8X8, sse8x8, sum8x8, var16x16, sse16x16, + sum16x16); + if (tx_size == TX_32X32) + calculate_variance(bw, bh, TX_16X16, sse16x16, sum16x16, var32x32, + sse32x32, sum32x32); + + // Skipping test + x->skip_txfm[0] = 0; + for (k = 0; k < num; k++) + // Check if all ac coefficients can be quantized to zero. + if (!(var_tx[k] < ac_thr || var == 0)) { + ac_test = 0; + break; + } + + for (k = 0; k < num; k++) + // Check if dc coefficient can be quantized to zero. + if (!(sse_tx[k] - var_tx[k] < dc_thr || sse == var)) { + dc_test = 0; + break; + } + + if (ac_test) { + x->skip_txfm[0] = 2; + + if (dc_test) + x->skip_txfm[0] = 1; + } else if (dc_test) { + skip_dc = 1; + } + } + + if (x->skip_txfm[0] == 1) { + int skip_uv[2] = {0}; + unsigned int var_uv[2]; + unsigned int sse_uv[2]; + + *out_rate_sum = 0; + *out_dist_sum = sse << 4; + + // Transform skipping test in UV planes. + for (i = 1; i <= 2; i++) { + struct macroblock_plane *const p = &x->plane[i]; + struct macroblockd_plane *const pd = &xd->plane[i]; + const TX_SIZE uv_tx_size = get_uv_tx_size(&xd->mi[0]->mbmi, pd); + const BLOCK_SIZE unit_size = txsize_to_bsize[uv_tx_size]; + const BLOCK_SIZE uv_bsize = get_plane_block_size(bsize, pd); + const int uv_bw = b_width_log2_lookup[uv_bsize]; + const int uv_bh = b_height_log2_lookup[uv_bsize]; + const int sf = (uv_bw - b_width_log2_lookup[unit_size]) + + (uv_bh - b_height_log2_lookup[unit_size]); + const uint32_t uv_dc_thr = pd->dequant[0] * pd->dequant[0] >> (6 - sf); + const uint32_t uv_ac_thr = pd->dequant[1] * pd->dequant[1] >> (6 - sf); + int j = i - 1; + + vp9_build_inter_predictors_sbp(xd, mi_row, mi_col, bsize, i); + var_uv[j] = cpi->fn_ptr[uv_bsize].vf(p->src.buf, p->src.stride, + pd->dst.buf, pd->dst.stride, &sse_uv[j]); + + if ((var_uv[j] < uv_ac_thr || var_uv[j] == 0) && + (sse_uv[j] - var_uv[j] < uv_dc_thr || sse_uv[j] == var_uv[j])) + skip_uv[j] = 1; + else + break; + } + + // If the transform in YUV planes are skippable, the mode search checks + // fewer inter modes and doesn't check intra modes. + if (skip_uv[0] & skip_uv[1]) { + *early_term = 1; + } + + return; + } + + if (!skip_dc) { +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bsize], + dc_quant >> (xd->bd - 5), &rate, &dist); + } else { + vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bsize], + dc_quant >> 3, &rate, &dist); + } +#else + vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bsize], + dc_quant >> 3, &rate, &dist); +#endif // CONFIG_VP9_HIGHBITDEPTH + } + + if (!skip_dc) { + *out_rate_sum = rate >> 1; + *out_dist_sum = dist << 3; + } else { + *out_rate_sum = 0; + *out_dist_sum = (sse - var) << 4; + } + +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + vp9_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bsize], + ac_quant >> (xd->bd - 5), &rate, &dist); + } else { + vp9_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bsize], + ac_quant >> 3, &rate, &dist); + } +#else + vp9_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bsize], + ac_quant >> 3, &rate, &dist); +#endif // CONFIG_VP9_HIGHBITDEPTH + + *out_rate_sum += rate; + *out_dist_sum += dist << 4; +} static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize, MACROBLOCK *x, MACROBLOCKD *xd, @@ -224,21 +473,19 @@ static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize, if (cpi->common.tx_mode == TX_MODE_SELECT) { if (sse > (var << 2)) - xd->mi[0].src_mi->mbmi.tx_size = + xd->mi[0]->mbmi.tx_size = MIN(max_txsize_lookup[bsize], tx_mode_to_biggest_tx_size[cpi->common.tx_mode]); else - xd->mi[0].src_mi->mbmi.tx_size = TX_8X8; + xd->mi[0]->mbmi.tx_size = TX_8X8; - if (cpi->sf.partition_search_type == VAR_BASED_PARTITION) { - if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && - cyclic_refresh_segment_id_boosted(xd->mi[0].src_mi->mbmi.segment_id)) - xd->mi[0].src_mi->mbmi.tx_size = TX_8X8; - else if (xd->mi[0].src_mi->mbmi.tx_size > TX_16X16) - xd->mi[0].src_mi->mbmi.tx_size = TX_16X16; - } + if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && + cyclic_refresh_segment_id_boosted(xd->mi[0]->mbmi.segment_id)) + xd->mi[0]->mbmi.tx_size = TX_8X8; + else if (xd->mi[0]->mbmi.tx_size > TX_16X16) + xd->mi[0]->mbmi.tx_size = TX_16X16; } else { - xd->mi[0].src_mi->mbmi.tx_size = + xd->mi[0]->mbmi.tx_size = MIN(max_txsize_lookup[bsize], tx_mode_to_biggest_tx_size[cpi->common.tx_mode]); } @@ -246,7 +493,7 @@ static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize, // Evaluate if the partition block is a skippable block in Y plane. { const BLOCK_SIZE unit_size = - txsize_to_bsize[xd->mi[0].src_mi->mbmi.tx_size]; + txsize_to_bsize[xd->mi[0]->mbmi.tx_size]; const unsigned int num_blk_log2 = (b_width_log2_lookup[bsize] - b_width_log2_lookup[unit_size]) + (b_height_log2_lookup[bsize] - b_height_log2_lookup[unit_size]); @@ -312,6 +559,132 @@ static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize, *out_dist_sum += dist << 4; } +#if CONFIG_VP9_HIGHBITDEPTH +static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int64_t *dist, + int *skippable, int64_t *sse, int plane, + BLOCK_SIZE bsize, TX_SIZE tx_size) { + MACROBLOCKD *xd = &x->e_mbd; + unsigned int var_y, sse_y; + (void)plane; + (void)tx_size; + model_rd_for_sb_y(cpi, bsize, x, xd, rate, dist, &var_y, &sse_y); + *sse = INT_MAX; + *skippable = 0; + return; +} +#else +static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int64_t *dist, + int *skippable, int64_t *sse, int plane, + BLOCK_SIZE bsize, TX_SIZE tx_size) { + MACROBLOCKD *xd = &x->e_mbd; + const struct macroblockd_plane *pd = &xd->plane[plane]; + const struct macroblock_plane *const p = &x->plane[plane]; + const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize]; + const int num_4x4_h = num_4x4_blocks_high_lookup[bsize]; + const int step = 1 << (tx_size << 1); + const int block_step = (1 << tx_size); + int block = 0, r, c; + int shift = tx_size == TX_32X32 ? 0 : 2; + const int max_blocks_wide = num_4x4_w + (xd->mb_to_right_edge >= 0 ? 0 : + xd->mb_to_right_edge >> (5 + pd->subsampling_x)); + const int max_blocks_high = num_4x4_h + (xd->mb_to_bottom_edge >= 0 ? 0 : + xd->mb_to_bottom_edge >> (5 + pd->subsampling_y)); + int eob_cost = 0; + + (void)cpi; + vp9_subtract_plane(x, bsize, plane); + *skippable = 1; + // Keep track of the row and column of the blocks we use so that we know + // if we are in the unrestricted motion border. + for (r = 0; r < max_blocks_high; r += block_step) { + for (c = 0; c < num_4x4_w; c += block_step) { + if (c < max_blocks_wide) { + const scan_order *const scan_order = &vp9_default_scan_orders[tx_size]; + tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block); + tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); + tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); + uint16_t *const eob = &p->eobs[block]; + const int diff_stride = 4 * num_4x4_blocks_wide_lookup[bsize]; + const int16_t *src_diff; + src_diff = &p->src_diff[(r * diff_stride + c) << 2]; + + switch (tx_size) { + case TX_32X32: + vp9_fdct32x32_rd(src_diff, coeff, diff_stride); + vp9_quantize_fp_32x32(coeff, 1024, x->skip_block, p->zbin, + p->round_fp, p->quant_fp, p->quant_shift, + qcoeff, dqcoeff, pd->dequant, eob, + scan_order->scan, scan_order->iscan); + break; + case TX_16X16: + vp9_hadamard_16x16(src_diff, diff_stride, (int16_t *)coeff); + vp9_quantize_fp(coeff, 256, x->skip_block, p->zbin, p->round_fp, + p->quant_fp, p->quant_shift, qcoeff, dqcoeff, + pd->dequant, eob, + scan_order->scan, scan_order->iscan); + break; + case TX_8X8: + vp9_hadamard_8x8(src_diff, diff_stride, (int16_t *)coeff); + vp9_quantize_fp(coeff, 64, x->skip_block, p->zbin, p->round_fp, + p->quant_fp, p->quant_shift, qcoeff, dqcoeff, + pd->dequant, eob, + scan_order->scan, scan_order->iscan); + break; + case TX_4X4: + x->fwd_txm4x4(src_diff, coeff, diff_stride); + vp9_quantize_fp(coeff, 16, x->skip_block, p->zbin, p->round_fp, + p->quant_fp, p->quant_shift, qcoeff, dqcoeff, + pd->dequant, eob, + scan_order->scan, scan_order->iscan); + break; + default: + assert(0); + break; + } + *skippable &= (*eob == 0); + eob_cost += 1; + } + block += step; + } + } + + if (*skippable && *sse < INT64_MAX) { + *rate = 0; + *dist = (*sse << 6) >> shift; + *sse = *dist; + return; + } + + block = 0; + *rate = 0; + *dist = 0; + *sse = (*sse << 6) >> shift; + for (r = 0; r < max_blocks_high; r += block_step) { + for (c = 0; c < num_4x4_w; c += block_step) { + if (c < max_blocks_wide) { + tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block); + tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); + tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); + uint16_t *const eob = &p->eobs[block]; + + if (*eob == 1) + *rate += (int)abs(qcoeff[0]); + else if (*eob > 1) + *rate += (int)vp9_satd((const int16_t *)qcoeff, step << 4); + + *dist += vp9_block_error_fp(coeff, dqcoeff, step << 4) >> shift; + } + block += step; + } + } + + if (*skippable == 0) { + *rate <<= 10; + *rate += (eob_cost << 8); + } +} +#endif + static void model_rd_for_sb_uv(VP9_COMP *cpi, BLOCK_SIZE bsize, MACROBLOCK *x, MACROBLOCKD *xd, int *out_rate_sum, int64_t *out_dist_sum, @@ -402,7 +775,7 @@ static void encode_breakout_test(VP9_COMP *cpi, MACROBLOCK *x, struct buf_2d yv12_mb[][MAX_MB_PLANE], int *rate, int64_t *dist) { MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi; + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; const BLOCK_SIZE uv_size = get_plane_block_size(bsize, &xd->plane[1]); unsigned int var = var_y, sse = sse_y; @@ -518,7 +891,9 @@ static void estimate_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, int i, j; int rate; int64_t dist; - unsigned int var_y, sse_y; + int64_t this_sse = INT64_MAX; + int is_skippable; + txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j); assert(plane == 0); (void) plane; @@ -533,8 +908,13 @@ static void estimate_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, x->skip_encode ? src_stride : dst_stride, pd->dst.buf, dst_stride, i, j, 0); - // This procedure assumes zero offset from p->src.buf and pd->dst.buf. - model_rd_for_sb_y(cpi, bsize_tx, x, xd, &rate, &dist, &var_y, &sse_y); + + // TODO(jingning): This needs further refactoring. + block_yrd(cpi, x, &rate, &dist, &is_skippable, &this_sse, 0, + bsize_tx, MIN(tx_size, TX_16X16)); + x->skip_txfm[0] = is_skippable; + rate += vp9_cost_bit(vp9_get_skip_prob(&cpi->common, xd), is_skippable); + p->src.buf = src_buf_base; pd->dst.buf = dst_buf_base; args->rate += rate; @@ -542,7 +922,7 @@ static void estimate_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, } static const THR_MODES mode_idx[MAX_REF_FRAMES - 1][4] = { - {THR_DC, THR_H_PRED, THR_V_PRED, THR_TM}, + {THR_DC, THR_V_PRED, THR_H_PRED, THR_TM}, {THR_NEARESTMV, THR_NEARMV, THR_ZEROMV, THR_NEWMV}, {THR_NEARESTG, THR_NEARG, THR_ZEROG, THR_NEWG}, }; @@ -551,20 +931,54 @@ static const PREDICTION_MODE intra_mode_list[] = { DC_PRED, V_PRED, H_PRED, TM_PRED }; +static int mode_offset(const PREDICTION_MODE mode) { + if (mode >= NEARESTMV) { + return INTER_OFFSET(mode); + } else { + switch (mode) { + case DC_PRED: + return 0; + case V_PRED: + return 1; + case H_PRED: + return 2; + case TM_PRED: + return 3; + default: + return -1; + } + } +} + +static INLINE void update_thresh_freq_fact(VP9_COMP *cpi, + TileDataEnc *tile_data, + BLOCK_SIZE bsize, + MV_REFERENCE_FRAME ref_frame, + THR_MODES best_mode_idx, + PREDICTION_MODE mode) { + THR_MODES thr_mode_idx = mode_idx[ref_frame][mode_offset(mode)]; + int *freq_fact = &tile_data->thresh_freq_fact[bsize][thr_mode_idx]; + if (thr_mode_idx == best_mode_idx) + *freq_fact -= (*freq_fact >> 4); + else + *freq_fact = MIN(*freq_fact + RD_THRESH_INC, + cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT); +} + void vp9_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *rd_cost, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx) { MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; RD_COST this_rdc, best_rdc; PREDICTION_MODE this_mode; struct estimate_block_intra_args args = { cpi, x, DC_PRED, 0, 0 }; const TX_SIZE intra_tx_size = MIN(max_txsize_lookup[bsize], tx_mode_to_biggest_tx_size[cpi->common.tx_mode]); - MODE_INFO *const mic = xd->mi[0].src_mi; + MODE_INFO *const mic = xd->mi[0]; int *bmode_costs; - const MODE_INFO *above_mi = xd->mi[-xd->mi_stride].src_mi; - const MODE_INFO *left_mi = xd->left_available ? xd->mi[-1].src_mi : NULL; + const MODE_INFO *above_mi = xd->mi[-xd->mi_stride]; + const MODE_INFO *left_mi = xd->left_available ? xd->mi[-1] : NULL; const PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, 0); const PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, 0); bmode_costs = cpi->y_mode_costs[A][L]; @@ -576,7 +990,7 @@ void vp9_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *rd_cost, mbmi->ref_frame[0] = INTRA_FRAME; mbmi->mv[0].as_int = INVALID_MV; mbmi->uv_mode = DC_PRED; - vpx_memset(x->skip_txfm, 0, sizeof(x->skip_txfm)); + memset(x->skip_txfm, 0, sizeof(x->skip_txfm)); // Change the limit of this loop to add other intra prediction // mode tests. @@ -602,9 +1016,23 @@ void vp9_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *rd_cost, *rd_cost = best_rdc; } -static const int ref_frame_cost[MAX_REF_FRAMES] = { - 1235, 229, 530, 615, -}; +static void init_ref_frame_cost(VP9_COMMON *const cm, + MACROBLOCKD *const xd, + int ref_frame_cost[MAX_REF_FRAMES]) { + vp9_prob intra_inter_p = vp9_get_intra_inter_prob(cm, xd); + vp9_prob ref_single_p1 = vp9_get_pred_prob_single_ref_p1(cm, xd); + vp9_prob ref_single_p2 = vp9_get_pred_prob_single_ref_p2(cm, xd); + + ref_frame_cost[INTRA_FRAME] = vp9_cost_bit(intra_inter_p, 0); + ref_frame_cost[LAST_FRAME] = ref_frame_cost[GOLDEN_FRAME] = + ref_frame_cost[ALTREF_FRAME] = vp9_cost_bit(intra_inter_p, 1); + + ref_frame_cost[LAST_FRAME] += vp9_cost_bit(ref_single_p1, 0); + ref_frame_cost[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p1, 1); + ref_frame_cost[ALTREF_FRAME] += vp9_cost_bit(ref_single_p1, 1); + ref_frame_cost[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p2, 0); + ref_frame_cost[ALTREF_FRAME] += vp9_cost_bit(ref_single_p2, 1); +} typedef struct { MV_REFERENCE_FRAME ref_frame; @@ -630,9 +1058,10 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, int mi_row, int mi_col, RD_COST *rd_cost, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx) { VP9_COMMON *const cm = &cpi->common; + SPEED_FEATURES *const sf = &cpi->sf; TileInfo *const tile_info = &tile_data->tile_info; MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; struct macroblockd_plane *const pd = &xd->plane[0]; PREDICTION_MODE best_mode = ZEROMV; MV_REFERENCE_FRAME ref_frame, best_ref_frame = LAST_FRAME; @@ -649,9 +1078,8 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, unsigned int var_y = UINT_MAX; unsigned int sse_y = UINT_MAX; // Reduce the intra cost penalty for small blocks (<=16x16). - const int reduction_fac = - (cpi->sf.partition_search_type == VAR_BASED_PARTITION && - bsize <= BLOCK_16X16) ? 2 : 0; + const int reduction_fac = (bsize <= BLOCK_16X16) ? + ((bsize <= BLOCK_8X8) ? 4 : 2) : 0; const int intra_cost_penalty = vp9_get_intra_cost_penalty( cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth) >> reduction_fac; const int64_t inter_mode_thresh = RDCOST(x->rdmult, x->rddiv, @@ -670,9 +1098,9 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, // process. // tmp[3] points to dst buffer, and the other 3 point to allocated buffers. PRED_BUFFER tmp[4]; - DECLARE_ALIGNED_ARRAY(16, uint8_t, pred_buf, 3 * 64 * 64); + DECLARE_ALIGNED(16, uint8_t, pred_buf[3 * 64 * 64]); #if CONFIG_VP9_HIGHBITDEPTH - DECLARE_ALIGNED_ARRAY(16, uint16_t, pred_buf_16, 3 * 64 * 64); + DECLARE_ALIGNED(16, uint16_t, pred_buf_16[3 * 64 * 64]); #endif struct buf_2d orig_dst = pd->dst; PRED_BUFFER *best_pred = NULL; @@ -682,6 +1110,10 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, int ref_frame_skip_mask = 0; int idx; int best_pred_sad = INT_MAX; + int best_early_term = 0; + int ref_frame_cost[MAX_REF_FRAMES]; + + init_ref_frame_cost(cm, xd, ref_frame_cost); if (reuse_inter_pred) { int i; @@ -706,9 +1138,9 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, x->skip = 0; if (xd->up_available) - filter_ref = xd->mi[-xd->mi_stride].src_mi->mbmi.interp_filter; + filter_ref = xd->mi[-xd->mi_stride]->mbmi.interp_filter; else if (xd->left_available) - filter_ref = xd->mi[-1].src_mi->mbmi.interp_filter; + filter_ref = xd->mi[-1]->mbmi.interp_filter; else filter_ref = cm->interp_filter; @@ -726,7 +1158,6 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, #endif if (cpi->rc.frames_since_golden == 0) { - cpi->ref_frame_flags &= (~VP9_GOLD_FLAG); usable_ref_frame = LAST_FRAME; } else { usable_ref_frame = GOLDEN_FRAME; @@ -747,11 +1178,11 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, sf, sf); if (cm->use_prev_frame_mvs) - vp9_find_mv_refs(cm, xd, tile_info, xd->mi[0].src_mi, ref_frame, + vp9_find_mv_refs(cm, xd, tile_info, xd->mi[0], ref_frame, candidates, mi_row, mi_col, NULL, NULL); else const_motion[ref_frame] = mv_refs_rt(cm, xd, tile_info, - xd->mi[0].src_mi, + xd->mi[0], ref_frame, candidates, mi_row, mi_col); @@ -773,6 +1204,10 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, int mode_index; int i; PREDICTION_MODE this_mode = ref_mode_set[idx].pred_mode; + int64_t this_sse; + int is_skippable; + int this_early_term = 0; + if (!(cpi->sf.inter_mode_mask[bsize] & (1 << this_mode))) continue; @@ -783,7 +1218,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, continue; i = (ref_frame == LAST_FRAME) ? GOLDEN_FRAME : LAST_FRAME; - if (cpi->ref_frame_flags & flag_list[i]) + if ((cpi->ref_frame_flags & flag_list[i]) && sf->reference_masking) if (x->pred_mv_sad[ref_frame] > (x->pred_mv_sad[i] << 1)) ref_frame_skip_mask |= (1 << ref_frame); if (ref_frame_skip_mask & (1 << ref_frame)) @@ -811,7 +1246,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, if (bsize < BLOCK_16X16) continue; - tmp_sad = vp9_int_pro_motion_estimation(cpi, x, bsize); + tmp_sad = vp9_int_pro_motion_estimation(cpi, x, bsize, mi_row, mi_col); if (tmp_sad > x->pred_mv_sad[LAST_FRAME]) continue; @@ -850,6 +1285,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, best_pred_sad = cpi->fn_ptr[bsize].sdf(x->plane[0].src.buf, x->plane[0].src.stride, pre_buf, pre_stride); + x->pred_mv_sad[LAST_FRAME] = best_pred_sad; } if (this_mode != NEARESTMV && @@ -885,7 +1321,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, INTERP_FILTER best_filter = SWITCHABLE, filter; PRED_BUFFER *current_pred = this_mode_pred; - for (filter = EIGHTTAP; filter <= EIGHTTAP_SHARP; ++filter) { + for (filter = EIGHTTAP; filter <= EIGHTTAP_SMOOTH; ++filter) { int64_t cost; mbmi->interp_filter = filter; vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize); @@ -924,17 +1360,55 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, var_y = pf_var[best_filter]; sse_y = pf_sse[best_filter]; x->skip_txfm[0] = skip_txfm; + if (reuse_inter_pred) { + pd->dst.buf = this_mode_pred->data; + pd->dst.stride = this_mode_pred->stride; + } } else { mbmi->interp_filter = (filter_ref == SWITCHABLE) ? EIGHTTAP : filter_ref; vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize); - model_rd_for_sb_y(cpi, bsize, x, xd, &this_rdc.rate, &this_rdc.dist, - &var_y, &sse_y); - this_rdc.rate += - cm->interp_filter == SWITCHABLE ? - vp9_get_switchable_rate(cpi, xd) : 0; + + // For large partition blocks, extra testing is done. + if (bsize > BLOCK_32X32 && + !cyclic_refresh_segment_id_boosted(xd->mi[0]->mbmi.segment_id) && + cm->base_qindex) { + model_rd_for_sb_y_large(cpi, bsize, x, xd, &this_rdc.rate, + &this_rdc.dist, &var_y, &sse_y, mi_row, mi_col, + &this_early_term); + } else { + model_rd_for_sb_y(cpi, bsize, x, xd, &this_rdc.rate, &this_rdc.dist, + &var_y, &sse_y); + } + } + + if (!this_early_term) { + this_sse = (int64_t)sse_y; + block_yrd(cpi, x, &this_rdc.rate, &this_rdc.dist, &is_skippable, + &this_sse, 0, bsize, MIN(mbmi->tx_size, TX_16X16)); + x->skip_txfm[0] = is_skippable; + if (is_skippable) { + this_rdc.rate = vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1); + } else { + if (RDCOST(x->rdmult, x->rddiv, this_rdc.rate, this_rdc.dist) < + RDCOST(x->rdmult, x->rddiv, 0, this_sse)) { + this_rdc.rate += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0); + } else { + this_rdc.rate = vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1); + this_rdc.dist = this_sse; + x->skip_txfm[0] = 1; + } + } + + if (cm->interp_filter == SWITCHABLE) { + if ((mbmi->mv[0].as_mv.row | mbmi->mv[0].as_mv.col) & 0x07) + this_rdc.rate += vp9_get_switchable_rate(cpi, xd); + } + } else { + this_rdc.rate += cm->interp_filter == SWITCHABLE ? + vp9_get_switchable_rate(cpi, xd) : 0; + this_rdc.rate += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1); } - // chroma component rate-distortion cost modeling if (x->color_sensitivity[0] || x->color_sensitivity[1]) { int uv_rate = 0; int64_t uv_dist = 0; @@ -942,7 +1416,8 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, vp9_build_inter_predictors_sbp(xd, mi_row, mi_col, bsize, 1); if (x->color_sensitivity[1]) vp9_build_inter_predictors_sbp(xd, mi_row, mi_col, bsize, 2); - model_rd_for_sb_uv(cpi, bsize, x, xd, &uv_rate, &uv_dist, &var_y, &sse_y); + model_rd_for_sb_uv(cpi, bsize, x, xd, &uv_rate, &uv_dist, + &var_y, &sse_y); this_rdc.rate += uv_rate; this_rdc.dist += uv_dist; } @@ -981,6 +1456,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, best_tx_size = mbmi->tx_size; best_ref_frame = ref_frame; best_mode_skip_txfm = x->skip_txfm[0]; + best_early_term = this_early_term; if (reuse_inter_pred) { free_pred_buffer(best_pred); @@ -993,6 +1469,13 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, if (x->skip) break; + + // If early termination flag is 1 and at least 2 modes are checked, + // the mode search is terminated. + if (best_early_term && idx > 0) { + x->skip = 1; + break; + } } mbmi->mode = best_mode; @@ -1000,7 +1483,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, mbmi->tx_size = best_tx_size; mbmi->ref_frame[0] = best_ref_frame; mbmi->mv[0].as_int = frame_mv[best_mode][best_ref_frame].as_int; - xd->mi[0].src_mi->bmi[0].as_mv[0].as_int = mbmi->mv[0].as_int; + xd->mi[0]->bmi[0].as_mv[0].as_int = mbmi->mv[0].as_int; x->skip_txfm[0] = best_mode_skip_txfm; // Perform intra prediction search, if the best SAD is above a certain @@ -1039,8 +1522,18 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, for (i = 0; i < 4; ++i) { const PREDICTION_MODE this_mode = intra_mode_list[i]; - if (!((1 << this_mode) & cpi->sf.intra_y_mode_mask[intra_tx_size])) + THR_MODES mode_index = mode_idx[INTRA_FRAME][mode_offset(this_mode)]; + int mode_rd_thresh = rd_threshes[mode_index]; + + if (!((1 << this_mode) & cpi->sf.intra_y_mode_bsize_mask[bsize])) continue; + + if (rd_less_than_thresh(best_rdc.rdcost, mode_rd_thresh, + rd_thresh_freq_fact[mode_index])) + continue; + + mbmi->mode = this_mode; + mbmi->ref_frame[0] = INTRA_FRAME; args.mode = this_mode; args.rate = 0; args.dist = 0; @@ -1057,17 +1550,17 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, if (this_rdc.rdcost < best_rdc.rdcost) { best_rdc = this_rdc; - mbmi->mode = this_mode; + best_mode = this_mode; best_intra_tx_size = mbmi->tx_size; - mbmi->ref_frame[0] = INTRA_FRAME; + best_ref_frame = INTRA_FRAME; mbmi->uv_mode = this_mode; mbmi->mv[0].as_int = INVALID_MV; + best_mode_skip_txfm = x->skip_txfm[0]; } } // Reset mb_mode_info to the best inter mode. - if (mbmi->ref_frame[0] != INTRA_FRAME) { - x->skip_txfm[0] = best_mode_skip_txfm; + if (best_ref_frame != INTRA_FRAME) { mbmi->tx_size = best_tx_size; } else { mbmi->tx_size = best_intra_tx_size; @@ -1075,6 +1568,9 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, } pd->dst = orig_dst; + mbmi->mode = best_mode; + mbmi->ref_frame[0] = best_ref_frame; + x->skip_txfm[0] = best_mode_skip_txfm; if (reuse_inter_pred && best_pred != NULL) { if (best_pred->data != orig_dst.buf && is_inter_mode(mbmi->mode)) { @@ -1096,20 +1592,27 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, } if (cpi->sf.adaptive_rd_thresh) { - THR_MODES best_mode_idx = is_inter_block(mbmi) ? - mode_idx[best_ref_frame][INTER_OFFSET(mbmi->mode)] : - mode_idx[INTRA_FRAME][mbmi->mode]; - PREDICTION_MODE this_mode; - for (ref_frame = LAST_FRAME; ref_frame <= GOLDEN_FRAME; ++ref_frame) { - if (best_ref_frame != ref_frame) continue; - for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) { - THR_MODES thr_mode_idx = mode_idx[ref_frame][INTER_OFFSET(this_mode)]; - int *freq_fact = &tile_data->thresh_freq_fact[bsize][thr_mode_idx]; - if (thr_mode_idx == best_mode_idx) - *freq_fact -= (*freq_fact >> 4); - else - *freq_fact = MIN(*freq_fact + RD_THRESH_INC, - cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT); + THR_MODES best_mode_idx = mode_idx[best_ref_frame][mode_offset(mbmi->mode)]; + + if (best_ref_frame == INTRA_FRAME) { + // Only consider the modes that are included in the intra_mode_list. + int intra_modes = sizeof(intra_mode_list)/sizeof(PREDICTION_MODE); + int i; + + // TODO(yunqingwang): Check intra mode mask and only update freq_fact + // for those valid modes. + for (i = 0; i < intra_modes; i++) { + update_thresh_freq_fact(cpi, tile_data, bsize, INTRA_FRAME, + best_mode_idx, intra_mode_list[i]); + } + } else { + for (ref_frame = LAST_FRAME; ref_frame <= GOLDEN_FRAME; ++ref_frame) { + PREDICTION_MODE this_mode; + if (best_ref_frame != ref_frame) continue; + for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) { + update_thresh_freq_fact(cpi, tile_data, bsize, ref_frame, + best_mode_idx, this_mode); + } } } } @@ -1125,7 +1628,7 @@ void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, TileInfo *const tile_info = &tile_data->tile_info; SPEED_FEATURES *const sf = &cpi->sf; MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; const struct segmentation *const seg = &cm->seg; MV_REFERENCE_FRAME ref_frame, second_ref_frame = NONE; MV_REFERENCE_FRAME best_ref_frame = NONE; @@ -1154,7 +1657,7 @@ void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, &cm->frame_refs[ref_frame - 1].sf; vp9_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, sf, sf); - vp9_find_mv_refs(cm, xd, tile_info, xd->mi[0].src_mi, ref_frame, + vp9_find_mv_refs(cm, xd, tile_info, xd->mi[0], ref_frame, candidates, mi_row, mi_col, NULL, NULL); vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, candidates, @@ -1214,7 +1717,7 @@ void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, const struct buf_2d orig_src = p->src; const struct buf_2d orig_dst = pd->dst; struct buf_2d orig_pre[2]; - vpx_memcpy(orig_pre, xd->plane[0].pre, sizeof(orig_pre)); + memcpy(orig_pre, xd->plane[0].pre, sizeof(orig_pre)); // set buffer pointers for sub8x8 motion search. p->src.buf = @@ -1233,7 +1736,7 @@ void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) { int b_rate = 0; - xd->mi[0].bmi[i].as_mv[0].as_int = b_mv[this_mode].as_int; + xd->mi[0]->bmi[i].as_mv[0].as_int = b_mv[this_mode].as_int; if (this_mode == NEWMV) { const int step_param = cpi->sf.mv.fullpel_search_step_param; @@ -1250,8 +1753,8 @@ void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, mvp_full.row = b_mv[NEARESTMV].as_mv.row >> 3; mvp_full.col = b_mv[NEARESTMV].as_mv.col >> 3; } else { - mvp_full.row = xd->mi[0].bmi[0].as_mv[0].as_mv.row >> 3; - mvp_full.col = xd->mi[0].bmi[0].as_mv[0].as_mv.col >> 3; + mvp_full.row = xd->mi[0]->bmi[0].as_mv[0].as_mv.row >> 3; + mvp_full.col = xd->mi[0]->bmi[0].as_mv[0].as_mv.col >> 3; } vp9_set_mv_search_range(x, &mbmi->ref_mvs[0]->as_mv); @@ -1293,7 +1796,7 @@ void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, &dummy_dist, &x->pred_sse[ref_frame], NULL, 0, 0); - xd->mi[0].bmi[i].as_mv[0].as_mv = tmp_mv; + xd->mi[0]->bmi[i].as_mv[0].as_mv = tmp_mv; } else { b_rate += cpi->inter_mode_cost[mbmi->mode_context[ref_frame]] [INTER_OFFSET(this_mode)]; @@ -1303,7 +1806,7 @@ void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { vp9_highbd_build_inter_predictor(pd->pre[0].buf, pd->pre[0].stride, pd->dst.buf, pd->dst.stride, - &xd->mi[0].bmi[i].as_mv[0].as_mv, + &xd->mi[0]->bmi[i].as_mv[0].as_mv, &xd->block_refs[0]->sf, 4 * num_4x4_blocks_wide, 4 * num_4x4_blocks_high, 0, @@ -1315,7 +1818,7 @@ void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, #endif vp9_build_inter_predictor(pd->pre[0].buf, pd->pre[0].stride, pd->dst.buf, pd->dst.stride, - &xd->mi[0].bmi[i].as_mv[0].as_mv, + &xd->mi[0]->bmi[i].as_mv[0].as_mv, &xd->block_refs[0]->sf, 4 * num_4x4_blocks_wide, 4 * num_4x4_blocks_high, 0, @@ -1337,7 +1840,7 @@ void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, if (this_rdc.rdcost < b_best_rd) { b_best_rd = this_rdc.rdcost; bsi[ref_frame][i].as_mode = this_mode; - bsi[ref_frame][i].as_mv[0].as_mv = xd->mi[0].bmi[i].as_mv[0].as_mv; + bsi[ref_frame][i].as_mv[0].as_mv = xd->mi[0]->bmi[i].as_mv[0].as_mv; } } // mode search @@ -1347,11 +1850,11 @@ void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, pd->dst = orig_dst; this_rd += b_best_rd; - xd->mi[0].bmi[i] = bsi[ref_frame][i]; + xd->mi[0]->bmi[i] = bsi[ref_frame][i]; if (num_4x4_blocks_wide > 1) - xd->mi[0].bmi[i + 1] = xd->mi[0].bmi[i]; + xd->mi[0]->bmi[i + 1] = xd->mi[0]->bmi[i]; if (num_4x4_blocks_high > 1) - xd->mi[0].bmi[i + 2] = xd->mi[0].bmi[i]; + xd->mi[0]->bmi[i + 2] = xd->mi[0]->bmi[i]; } } // loop through sub8x8 blocks @@ -1366,15 +1869,15 @@ void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, for (idy = 0; idy < 2; idy += num_4x4_blocks_high) { for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) { const int block = idy * 2 + idx; - xd->mi[0].bmi[block] = bsi[best_ref_frame][block]; + xd->mi[0]->bmi[block] = bsi[best_ref_frame][block]; if (num_4x4_blocks_wide > 1) - xd->mi[0].bmi[block + 1] = bsi[best_ref_frame][block]; + xd->mi[0]->bmi[block + 1] = bsi[best_ref_frame][block]; if (num_4x4_blocks_high > 1) - xd->mi[0].bmi[block + 2] = bsi[best_ref_frame][block]; + xd->mi[0]->bmi[block + 2] = bsi[best_ref_frame][block]; } } - mbmi->mode = xd->mi[0].bmi[3].as_mode; - ctx->mic = *(xd->mi[0].src_mi); + mbmi->mode = xd->mi[0]->bmi[3].as_mode; + ctx->mic = *(xd->mi[0]); ctx->skip_txfm[0] = 0; ctx->skip = 0; // Dummy assignment for speed -5. No effect in speed -6. diff --git a/media/libvpx/vp9/encoder/vp9_psnrhvs.c b/media/libvpx/vp9/encoder/vp9_psnrhvs.c new file mode 100644 index 00000000000..e10e0284c58 --- /dev/null +++ b/media/libvpx/vp9/encoder/vp9_psnrhvs.c @@ -0,0 +1,223 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + * + * This code was originally written by: Gregory Maxwell, at the Daala + * project. + */ +#include +#include +#include + +#include "./vpx_config.h" +#include "./vp9_rtcd.h" +#include "vp9/encoder/vp9_ssim.h" + +#if !defined(M_PI) +# define M_PI (3.141592653589793238462643) +#endif +#include + +void od_bin_fdct8x8(tran_low_t *y, int ystride, const int16_t *x, int xstride) { + (void) xstride; + vp9_fdct8x8_c(x, y, ystride); +} + +/* Normalized inverse quantization matrix for 8x8 DCT at the point of + * transparency. This is not the JPEG based matrix from the paper, + this one gives a slightly higher MOS agreement.*/ +float csf_y[8][8] = {{1.6193873005, 2.2901594831, 2.08509755623, 1.48366094411, + 1.00227514334, 0.678296995242, 0.466224900598, 0.3265091542}, {2.2901594831, + 1.94321815382, 2.04793073064, 1.68731108984, 1.2305666963, 0.868920337363, + 0.61280991668, 0.436405793551}, {2.08509755623, 2.04793073064, + 1.34329019223, 1.09205635862, 0.875748795257, 0.670882927016, + 0.501731932449, 0.372504254596}, {1.48366094411, 1.68731108984, + 1.09205635862, 0.772819797575, 0.605636379554, 0.48309405692, + 0.380429446972, 0.295774038565}, {1.00227514334, 1.2305666963, + 0.875748795257, 0.605636379554, 0.448996256676, 0.352889268808, + 0.283006984131, 0.226951348204}, {0.678296995242, 0.868920337363, + 0.670882927016, 0.48309405692, 0.352889268808, 0.27032073436, + 0.215017739696, 0.17408067321}, {0.466224900598, 0.61280991668, + 0.501731932449, 0.380429446972, 0.283006984131, 0.215017739696, + 0.168869545842, 0.136153931001}, {0.3265091542, 0.436405793551, + 0.372504254596, 0.295774038565, 0.226951348204, 0.17408067321, + 0.136153931001, 0.109083846276}}; +float csf_cb420[8][8] = { + {1.91113096927, 2.46074210438, 1.18284184739, 1.14982565193, 1.05017074788, + 0.898018824055, 0.74725392039, 0.615105596242}, {2.46074210438, + 1.58529308355, 1.21363250036, 1.38190029285, 1.33100189972, + 1.17428548929, 0.996404342439, 0.830890433625}, {1.18284184739, + 1.21363250036, 0.978712413627, 1.02624506078, 1.03145147362, + 0.960060382087, 0.849823426169, 0.731221236837}, {1.14982565193, + 1.38190029285, 1.02624506078, 0.861317501629, 0.801821139099, + 0.751437590932, 0.685398513368, 0.608694761374}, {1.05017074788, + 1.33100189972, 1.03145147362, 0.801821139099, 0.676555426187, + 0.605503172737, 0.55002013668, 0.495804539034}, {0.898018824055, + 1.17428548929, 0.960060382087, 0.751437590932, 0.605503172737, + 0.514674450957, 0.454353482512, 0.407050308965}, {0.74725392039, + 0.996404342439, 0.849823426169, 0.685398513368, 0.55002013668, + 0.454353482512, 0.389234902883, 0.342353999733}, {0.615105596242, + 0.830890433625, 0.731221236837, 0.608694761374, 0.495804539034, + 0.407050308965, 0.342353999733, 0.295530605237}}; +float csf_cr420[8][8] = { + {2.03871978502, 2.62502345193, 1.26180942886, 1.11019789803, 1.01397751469, + 0.867069376285, 0.721500455585, 0.593906509971}, {2.62502345193, + 1.69112867013, 1.17180569821, 1.3342742857, 1.28513006198, + 1.13381474809, 0.962064122248, 0.802254508198}, {1.26180942886, + 1.17180569821, 0.944981930573, 0.990876405848, 0.995903384143, + 0.926972725286, 0.820534991409, 0.706020324706}, {1.11019789803, + 1.3342742857, 0.990876405848, 0.831632933426, 0.77418706195, + 0.725539939514, 0.661776842059, 0.587716619023}, {1.01397751469, + 1.28513006198, 0.995903384143, 0.77418706195, 0.653238524286, + 0.584635025748, 0.531064164893, 0.478717061273}, {0.867069376285, + 1.13381474809, 0.926972725286, 0.725539939514, 0.584635025748, + 0.496936637883, 0.438694579826, 0.393021669543}, {0.721500455585, + 0.962064122248, 0.820534991409, 0.661776842059, 0.531064164893, + 0.438694579826, 0.375820256136, 0.330555063063}, {0.593906509971, + 0.802254508198, 0.706020324706, 0.587716619023, 0.478717061273, + 0.393021669543, 0.330555063063, 0.285345396658}}; + +static double convert_score_db(double _score, double _weight) { + return 10 * (log10(255 * 255) - log10(_weight * _score)); +} + +static double calc_psnrhvs(const unsigned char *_src, int _systride, + const unsigned char *_dst, int _dystride, + double _par, int _w, int _h, int _step, + float _csf[8][8]) { + float ret; + int16_t dct_s[8 * 8], dct_d[8 * 8]; + tran_low_t dct_s_coef[8 * 8], dct_d_coef[8 * 8]; + float mask[8][8]; + int pixels; + int x; + int y; + (void) _par; + ret = pixels = 0; + /*In the PSNR-HVS-M paper[1] the authors describe the construction of + their masking table as "we have used the quantization table for the + color component Y of JPEG [6] that has been also obtained on the + basis of CSF. Note that the values in quantization table JPEG have + been normalized and then squared." Their CSF matrix (from PSNR-HVS) + was also constructed from the JPEG matrices. I can not find any obvious + scheme of normalizing to produce their table, but if I multiply their + CSF by 0.38857 and square the result I get their masking table. + I have no idea where this constant comes from, but deviating from it + too greatly hurts MOS agreement. + + [1] Nikolay Ponomarenko, Flavia Silvestri, Karen Egiazarian, Marco Carli, + Jaakko Astola, Vladimir Lukin, "On between-coefficient contrast masking + of DCT basis functions", CD-ROM Proceedings of the Third + International Workshop on Video Processing and Quality Metrics for Consumer + Electronics VPQM-07, Scottsdale, Arizona, USA, 25-26 January, 2007, 4 p.*/ + for (x = 0; x < 8; x++) + for (y = 0; y < 8; y++) + mask[x][y] = (_csf[x][y] * 0.3885746225901003) + * (_csf[x][y] * 0.3885746225901003); + for (y = 0; y < _h - 7; y += _step) { + for (x = 0; x < _w - 7; x += _step) { + int i; + int j; + float s_means[4]; + float d_means[4]; + float s_vars[4]; + float d_vars[4]; + float s_gmean = 0; + float d_gmean = 0; + float s_gvar = 0; + float d_gvar = 0; + float s_mask = 0; + float d_mask = 0; + for (i = 0; i < 4; i++) + s_means[i] = d_means[i] = s_vars[i] = d_vars[i] = 0; + for (i = 0; i < 8; i++) { + for (j = 0; j < 8; j++) { + int sub = ((i & 12) >> 2) + ((j & 12) >> 1); + dct_s[i * 8 + j] = _src[(y + i) * _systride + (j + x)]; + dct_d[i * 8 + j] = _dst[(y + i) * _dystride + (j + x)]; + s_gmean += dct_s[i * 8 + j]; + d_gmean += dct_d[i * 8 + j]; + s_means[sub] += dct_s[i * 8 + j]; + d_means[sub] += dct_d[i * 8 + j]; + } + } + s_gmean /= 64.f; + d_gmean /= 64.f; + for (i = 0; i < 4; i++) + s_means[i] /= 16.f; + for (i = 0; i < 4; i++) + d_means[i] /= 16.f; + for (i = 0; i < 8; i++) { + for (j = 0; j < 8; j++) { + int sub = ((i & 12) >> 2) + ((j & 12) >> 1); + s_gvar += (dct_s[i * 8 + j] - s_gmean) * (dct_s[i * 8 + j] - s_gmean); + d_gvar += (dct_d[i * 8 + j] - d_gmean) * (dct_d[i * 8 + j] - d_gmean); + s_vars[sub] += (dct_s[i * 8 + j] - s_means[sub]) + * (dct_s[i * 8 + j] - s_means[sub]); + d_vars[sub] += (dct_d[i * 8 + j] - d_means[sub]) + * (dct_d[i * 8 + j] - d_means[sub]); + } + } + s_gvar *= 1 / 63.f * 64; + d_gvar *= 1 / 63.f * 64; + for (i = 0; i < 4; i++) + s_vars[i] *= 1 / 15.f * 16; + for (i = 0; i < 4; i++) + d_vars[i] *= 1 / 15.f * 16; + if (s_gvar > 0) + s_gvar = (s_vars[0] + s_vars[1] + s_vars[2] + s_vars[3]) / s_gvar; + if (d_gvar > 0) + d_gvar = (d_vars[0] + d_vars[1] + d_vars[2] + d_vars[3]) / d_gvar; + od_bin_fdct8x8(dct_s_coef, 8, dct_s, 8); + od_bin_fdct8x8(dct_d_coef, 8, dct_d, 8); + for (i = 0; i < 8; i++) + for (j = (i == 0); j < 8; j++) + s_mask += dct_s_coef[i * 8 + j] * dct_s_coef[i * 8 + j] * mask[i][j]; + for (i = 0; i < 8; i++) + for (j = (i == 0); j < 8; j++) + d_mask += dct_d_coef[i * 8 + j] * dct_d_coef[i * 8 + j] * mask[i][j]; + s_mask = sqrt(s_mask * s_gvar) / 32.f; + d_mask = sqrt(d_mask * d_gvar) / 32.f; + if (d_mask > s_mask) + s_mask = d_mask; + for (i = 0; i < 8; i++) { + for (j = 0; j < 8; j++) { + float err; + err = fabs(dct_s_coef[i * 8 + j] - dct_d_coef[i * 8 + j]); + if (i != 0 || j != 0) + err = err < s_mask / mask[i][j] ? 0 : err - s_mask / mask[i][j]; + ret += (err * _csf[i][j]) * (err * _csf[i][j]); + pixels++; + } + } + } + } + ret /= pixels; + return ret; +} +double vp9_psnrhvs(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, + double *y_psnrhvs, double *u_psnrhvs, double *v_psnrhvs) { + double psnrhvs; + double par = 1.0; + int step = 7; + vp9_clear_system_state(); + *y_psnrhvs = calc_psnrhvs(source->y_buffer, source->y_stride, dest->y_buffer, + dest->y_stride, par, source->y_crop_width, + source->y_crop_height, step, csf_y); + + *u_psnrhvs = calc_psnrhvs(source->u_buffer, source->uv_stride, dest->u_buffer, + dest->uv_stride, par, source->uv_crop_width, + source->uv_crop_height, step, csf_cb420); + + *v_psnrhvs = calc_psnrhvs(source->v_buffer, source->uv_stride, dest->v_buffer, + dest->uv_stride, par, source->uv_crop_width, + source->uv_crop_height, step, csf_cr420); + psnrhvs = (*y_psnrhvs) * .8 + .1 * ((*u_psnrhvs) + (*v_psnrhvs)); + + return convert_score_db(psnrhvs, 1.0); +} diff --git a/media/libvpx/vp9/encoder/vp9_quantize.c b/media/libvpx/vp9/encoder/vp9_quantize.c index 29b54b07ce0..db5460b6cca 100644 --- a/media/libvpx/vp9/encoder/vp9_quantize.c +++ b/media/libvpx/vp9/encoder/vp9_quantize.c @@ -11,6 +11,7 @@ #include #include "vpx_mem/vpx_mem.h" +#include "vpx_ports/mem.h" #include "vp9/common/vp9_quant_common.h" #include "vp9/common/vp9_seg_common.h" @@ -30,8 +31,8 @@ void vp9_quantize_dc(const tran_low_t *coeff_ptr, const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; int tmp, eob = -1; - vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); - vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); + memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); + memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); if (!skip_block) { tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX); @@ -52,8 +53,8 @@ void vp9_highbd_quantize_dc(const tran_low_t *coeff_ptr, const int16_t dequant_ptr, uint16_t *eob_ptr) { int eob = -1; - vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); - vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); + memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); + memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); if (!skip_block) { const int rc = 0; @@ -84,8 +85,8 @@ void vp9_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block, const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; int tmp, eob = -1; - vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); - vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); + memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); + memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); if (!skip_block) { @@ -112,8 +113,8 @@ void vp9_highbd_quantize_dc_32x32(const tran_low_t *coeff_ptr, const int n_coeffs = 1024; int eob = -1; - vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); - vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); + memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); + memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); if (!skip_block) { const int rc = 0; @@ -148,8 +149,8 @@ void vp9_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, (void)quant_shift_ptr; (void)iscan; - vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); - vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); + memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); + memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); if (!skip_block) { // Quantization pass: All coefficients with index >= zero_flag are @@ -195,8 +196,8 @@ void vp9_highbd_quantize_fp_c(const tran_low_t *coeff_ptr, (void)quant_shift_ptr; (void)iscan; - vpx_memset(qcoeff_ptr, 0, count * sizeof(*qcoeff_ptr)); - vpx_memset(dqcoeff_ptr, 0, count * sizeof(*dqcoeff_ptr)); + memset(qcoeff_ptr, 0, count * sizeof(*qcoeff_ptr)); + memset(dqcoeff_ptr, 0, count * sizeof(*dqcoeff_ptr)); if (!skip_block) { // Quantization pass: All coefficients with index >= zero_flag are @@ -238,8 +239,8 @@ void vp9_quantize_fp_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, (void)quant_shift_ptr; (void)iscan; - vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); - vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); + memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); + memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); if (!skip_block) { for (i = 0; i < n_coeffs; i++) { @@ -281,8 +282,8 @@ void vp9_highbd_quantize_fp_32x32_c(const tran_low_t *coeff_ptr, (void)quant_shift_ptr; (void)iscan; - vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); - vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); + memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); + memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); if (!skip_block) { for (i = 0; i < n_coeffs; i++) { @@ -321,8 +322,8 @@ void vp9_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int nzbins[2] = {zbins[0] * -1, zbins[1] * -1}; (void)iscan; - vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); - vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); + memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); + memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); if (!skip_block) { // Pre-scan pass @@ -373,8 +374,8 @@ void vp9_highbd_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int nzbins[2] = {zbins[0] * -1, zbins[1] * -1}; (void)iscan; - vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); - vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); + memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); + memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); if (!skip_block) { // Pre-scan pass @@ -431,8 +432,8 @@ void vp9_quantize_b_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int i, eob = -1; (void)iscan; - vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); - vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); + memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); + memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); if (!skip_block) { // Pre-scan pass @@ -490,8 +491,8 @@ void vp9_highbd_quantize_b_32x32_c(const tran_low_t *coeff_ptr, int i, eob = -1; (void)iscan; - vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); - vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); + memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); + memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); if (!skip_block) { // Pre-scan pass @@ -646,7 +647,7 @@ void vp9_init_plane_quantizers(VP9_COMP *cpi, MACROBLOCK *x) { const VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; QUANTS *const quants = &cpi->quants; - const int segment_id = xd->mi[0].src_mi->mbmi.segment_id; + const int segment_id = xd->mi[0]->mbmi.segment_id; const int qindex = vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex); const int rdmult = vp9_compute_rd_mult(cpi, qindex + cm->y_dc_delta_q); int i; diff --git a/media/libvpx/vp9/encoder/vp9_ratectrl.c b/media/libvpx/vp9/encoder/vp9_ratectrl.c index 7783f7bdc03..32682fe7480 100644 --- a/media/libvpx/vp9/encoder/vp9_ratectrl.c +++ b/media/libvpx/vp9/encoder/vp9_ratectrl.c @@ -16,6 +16,7 @@ #include #include "vpx_mem/vpx_mem.h" +#include "vpx_ports/mem.h" #include "vp9/common/vp9_alloccommon.h" #include "vp9/encoder/vp9_aq_cyclicrefresh.h" @@ -136,7 +137,7 @@ static void init_minq_luts(int *kf_low_m, int *kf_high_m, } } -void vp9_rc_init_minq_luts() { +void vp9_rc_init_minq_luts(void) { init_minq_luts(kf_low_motion_minq_8, kf_high_motion_minq_8, arfgf_low_motion_minq_8, arfgf_high_motion_minq_8, inter_minq_8, rtc_minq_8, VPX_BITS_8); @@ -233,13 +234,16 @@ int vp9_rc_clamp_iframe_target_size(const VP9_COMP *const cpi, int target) { return target; } -// Update the buffer level for higher layers, given the encoded current layer. +// Update the buffer level for higher temporal layers, given the encoded current +// temporal layer. static void update_layer_buffer_level(SVC *svc, int encoded_frame_size) { - int temporal_layer = 0; + int i = 0; int current_temporal_layer = svc->temporal_layer_id; - for (temporal_layer = current_temporal_layer + 1; - temporal_layer < svc->number_temporal_layers; ++temporal_layer) { - LAYER_CONTEXT *lc = &svc->layer_context[temporal_layer]; + for (i = current_temporal_layer + 1; + i < svc->number_temporal_layers; ++i) { + const int layer = LAYER_IDS_TO_IDX(svc->spatial_layer_id, i, + svc->number_temporal_layers); + LAYER_CONTEXT *lc = &svc->layer_context[layer]; RATE_CONTROL *lrc = &lc->rc; int bits_off_for_this_layer = (int)(lc->target_bandwidth / lc->framerate - encoded_frame_size); @@ -267,7 +271,7 @@ static void update_buffer_level(VP9_COMP *cpi, int encoded_frame_size) { rc->bits_off_target = MIN(rc->bits_off_target, rc->maximum_buffer_size); rc->buffer_level = rc->bits_off_target; - if (cpi->use_svc && cpi->oxcf.rc_mode == VPX_CBR) { + if (is_one_pass_cbr_svc(cpi)) { update_layer_buffer_level(&cpi->svc, encoded_frame_size); } } @@ -491,7 +495,10 @@ int vp9_rc_regulate_q(const VP9_COMP *cpi, int target_bits_per_frame, i = active_best_quality; do { - if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled) { + if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && + cm->seg.enabled && + cpi->svc.temporal_layer_id == 0 && + cpi->svc.spatial_layer_id == 0) { bits_per_mb_at_this_q = (int)vp9_cyclic_refresh_rc_bits_per_mb(cpi, i, correction_factor); } else { @@ -1057,10 +1064,12 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, if (frame_is_intra_only(cm) || (!rc->is_src_frame_alt_ref && (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame))) { - active_best_quality -= cpi->twopass.extend_minq; + active_best_quality -= + (cpi->twopass.extend_minq + cpi->twopass.extend_minq_fast); active_worst_quality += (cpi->twopass.extend_maxq / 2); } else { - active_best_quality -= cpi->twopass.extend_minq / 2; + active_best_quality -= + (cpi->twopass.extend_minq + cpi->twopass.extend_minq_fast) / 2; active_worst_quality += cpi->twopass.extend_maxq; } } @@ -1203,11 +1212,9 @@ static void update_golden_frame_stats(VP9_COMP *cpi) { // this frame refreshes means next frames don't unless specified by user rc->frames_since_golden = 0; - if (cpi->oxcf.pass == 2) { - if (!rc->source_alt_ref_pending && - cpi->twopass.gf_group.rf_level[0] == GF_ARF_STD) - rc->source_alt_ref_active = 0; - } else if (!rc->source_alt_ref_pending) { + // If we are not using alt ref in the up and coming group clear the arf + // active flag. + if (!rc->source_alt_ref_pending) { rc->source_alt_ref_active = 0; } @@ -1414,13 +1421,14 @@ static int calc_pframe_target_size_one_pass_cbr(const VP9_COMP *cpi) { } else { target = rc->avg_frame_bandwidth; } - if (svc->number_temporal_layers > 1 && - oxcf->rc_mode == VPX_CBR) { + if (is_one_pass_cbr_svc(cpi)) { // Note that for layers, avg_frame_bandwidth is the cumulative // per-frame-bandwidth. For the target size of this frame, use the // layer average frame size (i.e., non-cumulative per-frame-bw). - int current_temporal_layer = svc->temporal_layer_id; - const LAYER_CONTEXT *lc = &svc->layer_context[current_temporal_layer]; + int layer = + LAYER_IDS_TO_IDX(svc->spatial_layer_id, + svc->temporal_layer_id, svc->number_temporal_layers); + const LAYER_CONTEXT *lc = &svc->layer_context[layer]; target = lc->avg_frame_size; min_frame_target = MAX(lc->avg_frame_size >> 4, FRAME_OVERHEAD_BITS); } @@ -1455,7 +1463,9 @@ static int calc_iframe_target_size_one_pass_cbr(const VP9_COMP *cpi) { if (svc->number_temporal_layers > 1 && oxcf->rc_mode == VPX_CBR) { // Use the layer framerate for temporal layers CBR mode. - const LAYER_CONTEXT *lc = &svc->layer_context[svc->temporal_layer_id]; + const int layer = LAYER_IDS_TO_IDX(svc->spatial_layer_id, + svc->temporal_layer_id, svc->number_temporal_layers); + const LAYER_CONTEXT *lc = &svc->layer_context[layer]; framerate = lc->framerate; } kf_boost = MAX(kf_boost, (int)(2 * framerate - 16)); @@ -1468,10 +1478,27 @@ static int calc_iframe_target_size_one_pass_cbr(const VP9_COMP *cpi) { return vp9_rc_clamp_iframe_target_size(cpi, target); } +// Reset information needed to set proper reference frames and buffer updates +// for temporal layering. This is called when a key frame is encoded. +static void reset_temporal_layer_to_zero(VP9_COMP *cpi) { + int sl; + LAYER_CONTEXT *lc = NULL; + cpi->svc.temporal_layer_id = 0; + + for (sl = 0; sl < cpi->svc.number_spatial_layers; ++sl) { + lc = &cpi->svc.layer_context[sl * cpi->svc.number_temporal_layers]; + lc->current_video_frame_in_layer = 0; + lc->frames_from_key_frame = 0; + } +} + void vp9_rc_get_svc_params(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; RATE_CONTROL *const rc = &cpi->rc; int target = rc->avg_frame_bandwidth; + const int layer = LAYER_IDS_TO_IDX(cpi->svc.spatial_layer_id, + cpi->svc.temporal_layer_id, cpi->svc.number_temporal_layers); + if ((cm->current_video_frame == 0) || (cpi->frame_flags & FRAMEFLAGS_KEY) || (cpi->oxcf.auto_key && (rc->frames_since_key % @@ -1480,30 +1507,39 @@ void vp9_rc_get_svc_params(VP9_COMP *cpi) { rc->source_alt_ref_active = 0; if (is_two_pass_svc(cpi)) { - cpi->svc.layer_context[cpi->svc.spatial_layer_id].is_key_frame = 1; + cpi->svc.layer_context[layer].is_key_frame = 1; cpi->ref_frame_flags &= (~VP9_LAST_FLAG & ~VP9_GOLD_FLAG & ~VP9_ALT_FLAG); - } - - if (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_CBR) { + } else if (is_one_pass_cbr_svc(cpi)) { + cpi->svc.layer_context[layer].is_key_frame = 1; + reset_temporal_layer_to_zero(cpi); + cpi->ref_frame_flags &= + (~VP9_LAST_FLAG & ~VP9_GOLD_FLAG & ~VP9_ALT_FLAG); + // Assumption here is that LAST_FRAME is being updated for a keyframe. + // Thus no change in update flags. target = calc_iframe_target_size_one_pass_cbr(cpi); } } else { cm->frame_type = INTER_FRAME; - if (is_two_pass_svc(cpi)) { - LAYER_CONTEXT *lc = &cpi->svc.layer_context[cpi->svc.spatial_layer_id]; + LAYER_CONTEXT *lc = &cpi->svc.layer_context[layer]; if (cpi->svc.spatial_layer_id == 0) { lc->is_key_frame = 0; } else { - lc->is_key_frame = cpi->svc.layer_context[0].is_key_frame; + lc->is_key_frame = + cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame; if (lc->is_key_frame) cpi->ref_frame_flags &= (~VP9_LAST_FLAG); } cpi->ref_frame_flags &= (~VP9_ALT_FLAG); - } - - if (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_CBR) { + } else if (is_one_pass_cbr_svc(cpi)) { + LAYER_CONTEXT *lc = &cpi->svc.layer_context[layer]; + if (cpi->svc.spatial_layer_id == 0) { + lc->is_key_frame = 0; + } else { + lc->is_key_frame = + cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame; + } target = calc_pframe_target_size_one_pass_cbr(cpi); } } @@ -1609,11 +1645,21 @@ int vp9_compute_qdelta_by_rate(const RATE_CONTROL *rc, FRAME_TYPE frame_type, return target_index - qindex; } -void vp9_rc_set_gf_max_interval(const VP9_COMP *const cpi, - RATE_CONTROL *const rc) { +#define MIN_GF_INTERVAL 4 +#define MAX_GF_INTERVAL 16 +void vp9_rc_set_gf_interval_range(const VP9_COMP *const cpi, + RATE_CONTROL *const rc) { const VP9EncoderConfig *const oxcf = &cpi->oxcf; - // Set Maximum gf/arf interval - rc->max_gf_interval = 16; + + // Set a minimum interval. + rc->min_gf_interval = + MIN(MAX_GF_INTERVAL, MAX(MIN_GF_INTERVAL, (int)(cpi->framerate * 0.125))); + + // Set Maximum gf/arf interval. + rc->max_gf_interval = + MIN(MAX_GF_INTERVAL, (int)(cpi->framerate * 0.75)); + // Round up to next even number if odd. + rc->max_gf_interval += (rc->max_gf_interval & 0x01); // Extended interval for genuinely static scenes rc->static_scene_max_gf_interval = MAX_LAG_BUFFERS * 2; @@ -1625,6 +1671,9 @@ void vp9_rc_set_gf_max_interval(const VP9_COMP *const cpi, if (rc->max_gf_interval > rc->static_scene_max_gf_interval) rc->max_gf_interval = rc->static_scene_max_gf_interval; + + // Clamp min to max + rc->min_gf_interval = MIN(rc->min_gf_interval, rc->max_gf_interval); } void vp9_rc_update_framerate(VP9_COMP *cpi) { @@ -1651,14 +1700,14 @@ void vp9_rc_update_framerate(VP9_COMP *cpi) { rc->max_frame_bandwidth = MAX(MAX((cm->MBs * MAX_MB_RATE), MAXRATE_1080P), vbr_max_bits); - vp9_rc_set_gf_max_interval(cpi, rc); + vp9_rc_set_gf_interval_range(cpi, rc); } #define VBR_PCT_ADJUSTMENT_LIMIT 50 // For VBR...adjustment to the frame target based on error from previous frames -static void vbr_rate_correction(VP9_COMP *cpi, - int *this_frame_target, - int64_t vbr_bits_off_target) { +static void vbr_rate_correction(VP9_COMP *cpi, int *this_frame_target) { + RATE_CONTROL *const rc = &cpi->rc; + int64_t vbr_bits_off_target = rc->vbr_bits_off_target; int max_delta; double position_factor = 1.0; @@ -1682,6 +1731,20 @@ static void vbr_rate_correction(VP9_COMP *cpi, (vbr_bits_off_target < -max_delta) ? max_delta : (int)-vbr_bits_off_target; } + + // Fast redistribution of bits arising from massive local undershoot. + // Dont do it for kf,arf,gf or overlay frames. + if (!frame_is_kf_gf_arf(cpi) && !rc->is_src_frame_alt_ref && + rc->vbr_bits_off_target_fast) { + int one_frame_bits = MAX(rc->avg_frame_bandwidth, *this_frame_target); + int fast_extra_bits; + fast_extra_bits = + (int)MIN(rc->vbr_bits_off_target_fast, one_frame_bits); + fast_extra_bits = (int)MIN(fast_extra_bits, + MAX(one_frame_bits / 8, rc->vbr_bits_off_target_fast / 8)); + *this_frame_target += (int)fast_extra_bits; + rc->vbr_bits_off_target_fast -= fast_extra_bits; + } } void vp9_set_target_rate(VP9_COMP *cpi) { @@ -1690,6 +1753,6 @@ void vp9_set_target_rate(VP9_COMP *cpi) { // Correction to rate target based on prior over or under shoot. if (cpi->oxcf.rc_mode == VPX_VBR || cpi->oxcf.rc_mode == VPX_CQ) - vbr_rate_correction(cpi, &target_rate, rc->vbr_bits_off_target); + vbr_rate_correction(cpi, &target_rate); vp9_rc_set_frame_target(cpi, target_rate); } diff --git a/media/libvpx/vp9/encoder/vp9_ratectrl.h b/media/libvpx/vp9/encoder/vp9_ratectrl.h index 705796a8de6..e12d200be83 100644 --- a/media/libvpx/vp9/encoder/vp9_ratectrl.h +++ b/media/libvpx/vp9/encoder/vp9_ratectrl.h @@ -73,6 +73,7 @@ typedef struct { int frames_since_golden; int frames_till_gf_update_due; + int min_gf_interval; int max_gf_interval; int static_scene_max_gf_interval; int baseline_gf_interval; @@ -99,6 +100,7 @@ typedef struct { int64_t buffer_level; int64_t bits_off_target; int64_t vbr_bits_off_target; + int64_t vbr_bits_off_target_fast; int decimation_factor; int decimation_count; @@ -151,7 +153,7 @@ int vp9_estimate_bits_at_q(FRAME_TYPE frame_kind, int q, int mbs, double vp9_convert_qindex_to_q(int qindex, vpx_bit_depth_t bit_depth); -void vp9_rc_init_minq_luts(); +void vp9_rc_init_minq_luts(void); // Generally at the high level, the following flow is expected // to be enforced for rate control: @@ -238,8 +240,8 @@ int vp9_frame_type_qdelta(const struct VP9_COMP *cpi, int rf_level, int q); void vp9_rc_update_framerate(struct VP9_COMP *cpi); -void vp9_rc_set_gf_max_interval(const struct VP9_COMP *const cpi, - RATE_CONTROL *const rc); +void vp9_rc_set_gf_interval_range(const struct VP9_COMP *const cpi, + RATE_CONTROL *const rc); void vp9_set_target_rate(struct VP9_COMP *cpi); diff --git a/media/libvpx/vp9/encoder/vp9_rd.c b/media/libvpx/vp9/encoder/vp9_rd.c index ae5a01226f7..bbcbfe9293a 100644 --- a/media/libvpx/vp9/encoder/vp9_rd.c +++ b/media/libvpx/vp9/encoder/vp9_rd.c @@ -15,6 +15,7 @@ #include "./vp9_rtcd.h" #include "vpx_mem/vpx_mem.h" +#include "vpx_ports/mem.h" #include "vp9/common/vp9_common.h" #include "vp9/common/vp9_entropy.h" @@ -128,7 +129,7 @@ static void init_me_luts_bd(int *bit16lut, int *bit4lut, int range, } } -void vp9_init_me_luts() { +void vp9_init_me_luts(void) { init_me_luts_bd(sad_per_bit16lut_8, sad_per_bit4lut_8, QINDEX_RANGE, VPX_BITS_8); #if CONFIG_VP9_HIGHBITDEPTH @@ -419,8 +420,8 @@ void vp9_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size, int i; switch (tx_size) { case TX_4X4: - vpx_memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w); - vpx_memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h); + memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w); + memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h); break; case TX_8X8: for (i = 0; i < num_4x4_w; i += 2) @@ -450,13 +451,14 @@ void vp9_mv_pred(VP9_COMP *cpi, MACROBLOCK *x, uint8_t *ref_y_buffer, int ref_y_stride, int ref_frame, BLOCK_SIZE block_size) { MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi; + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; int i; int zero_seen = 0; int best_index = 0; int best_sad = INT_MAX; int this_sad = INT_MAX; int max_mv = 0; + int near_same_nearest; uint8_t *src_y_ptr = x->plane[0].src.buf; uint8_t *ref_y_ptr; const int num_mv_refs = MAX_MV_REF_CANDIDATES + @@ -469,23 +471,27 @@ void vp9_mv_pred(VP9_COMP *cpi, MACROBLOCK *x, pred_mv[2] = x->pred_mv[ref_frame]; assert(num_mv_refs <= (int)(sizeof(pred_mv) / sizeof(pred_mv[0]))); + near_same_nearest = + mbmi->ref_mvs[ref_frame][0].as_int == mbmi->ref_mvs[ref_frame][1].as_int; // Get the sad for each candidate reference mv. for (i = 0; i < num_mv_refs; ++i) { const MV *this_mv = &pred_mv[i]; + int fp_row, fp_col; - max_mv = MAX(max_mv, MAX(abs(this_mv->row), abs(this_mv->col)) >> 3); - if (is_zero_mv(this_mv) && zero_seen) + if (i == 1 && near_same_nearest) continue; + fp_row = (this_mv->row + 3 + (this_mv->row >= 0)) >> 3; + fp_col = (this_mv->col + 3 + (this_mv->col >= 0)) >> 3; + max_mv = MAX(max_mv, MAX(abs(this_mv->row), abs(this_mv->col)) >> 3); - zero_seen |= is_zero_mv(this_mv); - - ref_y_ptr = - &ref_y_buffer[ref_y_stride * (this_mv->row >> 3) + (this_mv->col >> 3)]; + if (fp_row ==0 && fp_col == 0 && zero_seen) + continue; + zero_seen |= (fp_row ==0 && fp_col == 0); + ref_y_ptr =&ref_y_buffer[ref_y_stride * fp_row + fp_col]; // Find sad for current vector. this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride, ref_y_ptr, ref_y_stride); - // Note if it is the best so far. if (this_sad < best_sad) { best_sad = this_sad; @@ -545,7 +551,7 @@ YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const VP9_COMP *cpi, } int vp9_get_switchable_rate(const VP9_COMP *cpi, const MACROBLOCKD *const xd) { - const MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi; + const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; const int ctx = vp9_get_pred_context_switchable_interp(xd); return SWITCHABLE_INTERP_RATE_FACTOR * cpi->switchable_interp_costs[ctx][mbmi->interp_filter]; @@ -611,8 +617,7 @@ void vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi) { {2000, 2000, 2000, 4000, 4000, 2000}}; RD_OPT *const rd = &cpi->rd; const int idx = cpi->oxcf.mode == BEST; - vpx_memcpy(rd->thresh_mult_sub8x8, thresh_mult[idx], - sizeof(thresh_mult[idx])); + memcpy(rd->thresh_mult_sub8x8, thresh_mult[idx], sizeof(thresh_mult[idx])); } void vp9_update_rd_thresh_fact(int (*factor_buf)[MAX_MODES], int rd_thresh, diff --git a/media/libvpx/vp9/encoder/vp9_rd.h b/media/libvpx/vp9/encoder/vp9_rd.h index 4d247342b0a..7ba2568fe68 100644 --- a/media/libvpx/vp9/encoder/vp9_rd.h +++ b/media/libvpx/vp9/encoder/vp9_rd.h @@ -150,7 +150,7 @@ int16_t* vp9_raster_block_offset_int16(BLOCK_SIZE plane_bsize, YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const struct VP9_COMP *cpi, int ref_frame); -void vp9_init_me_luts(); +void vp9_init_me_luts(void); void vp9_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size, const struct macroblockd_plane *pd, diff --git a/media/libvpx/vp9/encoder/vp9_rdopt.c b/media/libvpx/vp9/encoder/vp9_rdopt.c index f0c00610967..9fa258c611e 100644 --- a/media/libvpx/vp9/encoder/vp9_rdopt.c +++ b/media/libvpx/vp9/encoder/vp9_rdopt.c @@ -14,6 +14,7 @@ #include "./vp9_rtcd.h" #include "vpx_mem/vpx_mem.h" +#include "vpx_ports/mem.h" #include "vp9/common/vp9_common.h" #include "vp9/common/vp9_entropy.h" @@ -24,6 +25,7 @@ #include "vp9/common/vp9_quant_common.h" #include "vp9/common/vp9_reconinter.h" #include "vp9/common/vp9_reconintra.h" +#include "vp9/common/vp9_scan.h" #include "vp9/common/vp9_seg_common.h" #include "vp9/common/vp9_systemdependent.h" @@ -164,7 +166,7 @@ static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize, int i; int64_t rate_sum = 0; int64_t dist_sum = 0; - const int ref = xd->mi[0].src_mi->mbmi.ref_frame[0]; + const int ref = xd->mi[0]->mbmi.ref_frame[0]; unsigned int sse; unsigned int var = 0; unsigned int sum_sse = 0; @@ -292,6 +294,18 @@ int64_t vp9_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff, return error; } +int64_t vp9_block_error_fp_c(const int16_t *coeff, const int16_t *dqcoeff, + int block_size) { + int i; + int64_t error = 0; + + for (i = 0; i < block_size; i++) { + const int diff = coeff[i] - dqcoeff[i]; + error += diff * diff; + } + + return error; +} #if CONFIG_VP9_HIGHBITDEPTH int64_t vp9_highbd_block_error_c(const tran_low_t *coeff, @@ -335,7 +349,7 @@ static int cost_coeffs(MACROBLOCK *x, const int16_t *scan, const int16_t *nb, int use_fast_coef_costing) { MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi; + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; const struct macroblock_plane *p = &x->plane[plane]; const struct macroblockd_plane *pd = &xd->plane[plane]; const PLANE_TYPE type = pd->plane_type; @@ -440,7 +454,7 @@ static void dist_block(int plane, int block, TX_SIZE tx_size, #endif // CONFIG_VP9_HIGHBITDEPTH args->sse = this_sse >> shift; - if (x->skip_encode && !is_inter_block(&xd->mi[0].src_mi->mbmi)) { + if (x->skip_encode && !is_inter_block(&xd->mi[0]->mbmi)) { // TODO(jingning): tune the model to better capture the distortion. int64_t p = (pd->dequant[1] * pd->dequant[1] * (1 << ss_txfrm_size)) >> (shift + 2); @@ -470,7 +484,7 @@ static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize, struct rdcost_block_args *args = arg; MACROBLOCK *const x = args->x; MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; int64_t rd1, rd2, rd; if (args->skip) @@ -576,7 +590,7 @@ static void txfm_rd_in_plane(MACROBLOCK *x, args.use_fast_coef_costing = use_fast_coef_casting; if (plane == 0) - xd->mi[0].src_mi->mbmi.tx_size = tx_size; + xd->mi[0]->mbmi.tx_size = tx_size; vp9_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left); @@ -606,7 +620,7 @@ static void choose_largest_tx_size(VP9_COMP *cpi, MACROBLOCK *x, VP9_COMMON *const cm = &cpi->common; const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode]; MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; mbmi->tx_size = MIN(max_tx_size, largest_tx_size); @@ -626,7 +640,7 @@ static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, const TX_SIZE max_tx_size = max_txsize_lookup[bs]; VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; vp9_prob skip_prob = vp9_get_skip_prob(cm, xd); int r[TX_SIZES][2], s[TX_SIZES]; int64_t d[TX_SIZES], sse[TX_SIZES]; @@ -713,10 +727,10 @@ static void super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int64_t sse; int64_t *ret_sse = psse ? psse : &sse; - assert(bs == xd->mi[0].src_mi->mbmi.sb_type); + assert(bs == xd->mi[0]->mbmi.sb_type); if (cpi->sf.tx_size_search_method == USE_LARGESTALL || xd->lossless) { - vpx_memset(txfm_cache, 0, TX_MODES * sizeof(int64_t)); + memset(txfm_cache, 0, TX_MODES * sizeof(int64_t)); choose_largest_tx_size(cpi, x, rate, distortion, skip, ret_sse, ref_best_rd, bs); } else { @@ -778,9 +792,9 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, assert(ib < 4); - vpx_memcpy(ta, a, sizeof(ta)); - vpx_memcpy(tl, l, sizeof(tl)); - xd->mi[0].src_mi->mbmi.tx_size = TX_4X4; + memcpy(ta, a, sizeof(ta)); + memcpy(tl, l, sizeof(tl)); + xd->mi[0]->mbmi.tx_size = TX_4X4; #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { @@ -800,8 +814,8 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, continue; } - vpx_memcpy(tempa, ta, sizeof(ta)); - vpx_memcpy(templ, tl, sizeof(tl)); + memcpy(tempa, ta, sizeof(ta)); + memcpy(templ, tl, sizeof(tl)); for (idy = 0; idy < num_4x4_blocks_high; ++idy) { for (idx = 0; idx < num_4x4_blocks_wide; ++idx) { @@ -812,7 +826,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, block, p->src_diff); tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block); - xd->mi[0].src_mi->bmi[block].as_mode = mode; + xd->mi[0]->bmi[block].as_mode = mode; vp9_predict_intra_block(xd, block, 1, TX_4X4, mode, x->skip_encode ? src : dst, @@ -861,12 +875,12 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, *bestdistortion = distortion; best_rd = this_rd; *best_mode = mode; - vpx_memcpy(a, tempa, sizeof(tempa)); - vpx_memcpy(l, templ, sizeof(templ)); + memcpy(a, tempa, sizeof(tempa)); + memcpy(l, templ, sizeof(templ)); for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) { - vpx_memcpy(best_dst16 + idy * 8, - CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride), - num_4x4_blocks_wide * 4 * sizeof(uint16_t)); + memcpy(best_dst16 + idy * 8, + CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride), + num_4x4_blocks_wide * 4 * sizeof(uint16_t)); } } next_highbd: @@ -876,9 +890,9 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, return best_rd; for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) { - vpx_memcpy(CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride), - best_dst16 + idy * 8, - num_4x4_blocks_wide * 4 * sizeof(uint16_t)); + memcpy(CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride), + best_dst16 + idy * 8, + num_4x4_blocks_wide * 4 * sizeof(uint16_t)); } return best_rd; @@ -901,8 +915,8 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, continue; } - vpx_memcpy(tempa, ta, sizeof(ta)); - vpx_memcpy(templ, tl, sizeof(tl)); + memcpy(tempa, ta, sizeof(ta)); + memcpy(templ, tl, sizeof(tl)); for (idy = 0; idy < num_4x4_blocks_high; ++idy) { for (idx = 0; idx < num_4x4_blocks_wide; ++idx) { @@ -912,7 +926,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, int16_t *const src_diff = vp9_raster_block_offset_int16(BLOCK_8X8, block, p->src_diff); tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block); - xd->mi[0].src_mi->bmi[block].as_mode = mode; + xd->mi[0]->bmi[block].as_mode = mode; vp9_predict_intra_block(xd, block, 1, TX_4X4, mode, x->skip_encode ? src : dst, @@ -959,11 +973,11 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, *bestdistortion = distortion; best_rd = this_rd; *best_mode = mode; - vpx_memcpy(a, tempa, sizeof(tempa)); - vpx_memcpy(l, templ, sizeof(templ)); + memcpy(a, tempa, sizeof(tempa)); + memcpy(l, templ, sizeof(templ)); for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) - vpx_memcpy(best_dst + idy * 8, dst_init + idy * dst_stride, - num_4x4_blocks_wide * 4); + memcpy(best_dst + idy * 8, dst_init + idy * dst_stride, + num_4x4_blocks_wide * 4); } next: {} @@ -973,8 +987,8 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, return best_rd; for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) - vpx_memcpy(dst_init + idy * dst_stride, best_dst + idy * 8, - num_4x4_blocks_wide * 4); + memcpy(dst_init + idy * dst_stride, best_dst + idy * 8, + num_4x4_blocks_wide * 4); return best_rd; } @@ -985,10 +999,10 @@ static int64_t rd_pick_intra_sub_8x8_y_mode(VP9_COMP *cpi, MACROBLOCK *mb, int64_t best_rd) { int i, j; const MACROBLOCKD *const xd = &mb->e_mbd; - MODE_INFO *const mic = xd->mi[0].src_mi; + MODE_INFO *const mic = xd->mi[0]; const MODE_INFO *above_mi = xd->above_mi; const MODE_INFO *left_mi = xd->left_mi; - const BLOCK_SIZE bsize = xd->mi[0].src_mi->mbmi.sb_type; + const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type; const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; int idx, idy; @@ -999,8 +1013,8 @@ static int64_t rd_pick_intra_sub_8x8_y_mode(VP9_COMP *cpi, MACROBLOCK *mb, ENTROPY_CONTEXT t_above[4], t_left[4]; const int *bmode_costs = cpi->mbmode_cost; - vpx_memcpy(t_above, xd->plane[0].above_context, sizeof(t_above)); - vpx_memcpy(t_left, xd->plane[0].left_context, sizeof(t_left)); + memcpy(t_above, xd->plane[0].above_context, sizeof(t_above)); + memcpy(t_left, xd->plane[0].left_context, sizeof(t_left)); // Pick modes for each sub-block (of size 4x4, 4x8, or 8x4) in an 8x8 block. for (idy = 0; idy < 2; idy += num_4x4_blocks_high) { @@ -1056,7 +1070,7 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x, PREDICTION_MODE mode; PREDICTION_MODE mode_selected = DC_PRED; MACROBLOCKD *const xd = &x->e_mbd; - MODE_INFO *const mic = xd->mi[0].src_mi; + MODE_INFO *const mic = xd->mi[0]; int this_rate, this_rate_tokenonly, s; int64_t this_distortion, this_rd; TX_SIZE best_tx = TX_4X4; @@ -1072,7 +1086,7 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x, for (i = 0; i < TX_MODES; i++) tx_cache[i] = INT64_MAX; - vpx_memset(x->skip_txfm, 0, sizeof(x->skip_txfm)); + memset(x->skip_txfm, 0, sizeof(x->skip_txfm)); /* Y Search for intra prediction mode */ for (mode = DC_PRED; mode <= TM_PRED; mode++) { int64_t local_tx_cache[TX_MODES]; @@ -1131,7 +1145,7 @@ static int super_block_uvrd(const VP9_COMP *cpi, MACROBLOCK *x, int64_t *sse, BLOCK_SIZE bsize, int64_t ref_best_rd) { MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; const TX_SIZE uv_tx_size = get_uv_tx_size(mbmi, &xd->plane[1]); int plane; int pnrate = 0, pnskip = 1; @@ -1189,12 +1203,12 @@ static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x, int this_rate_tokenonly, this_rate, s; int64_t this_distortion, this_sse; - vpx_memset(x->skip_txfm, 0, sizeof(x->skip_txfm)); + memset(x->skip_txfm, 0, sizeof(x->skip_txfm)); for (mode = DC_PRED; mode <= TM_PRED; ++mode) { if (!(cpi->sf.intra_uv_mode_mask[max_tx_size] & (1 << mode))) continue; - xd->mi[0].src_mi->mbmi.uv_mode = mode; + xd->mi[0]->mbmi.uv_mode = mode; if (!super_block_uvrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s, &this_sse, bsize, best_rd)) @@ -1215,7 +1229,7 @@ static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x, } } - xd->mi[0].src_mi->mbmi.uv_mode = mode_selected; + xd->mi[0]->mbmi.uv_mode = mode_selected; return best_rd; } @@ -1226,8 +1240,8 @@ static int64_t rd_sbuv_dcpred(const VP9_COMP *cpi, MACROBLOCK *x, const VP9_COMMON *cm = &cpi->common; int64_t unused; - x->e_mbd.mi[0].src_mi->mbmi.uv_mode = DC_PRED; - vpx_memset(x->skip_txfm, 0, sizeof(x->skip_txfm)); + x->e_mbd.mi[0]->mbmi.uv_mode = DC_PRED; + memset(x->skip_txfm, 0, sizeof(x->skip_txfm)); super_block_uvrd(cpi, x, rate_tokenonly, distortion, skippable, &unused, bsize, INT64_MAX); *rate = *rate_tokenonly + cpi->intra_uv_mode_cost[cm->frame_type][DC_PRED]; @@ -1252,7 +1266,7 @@ static void choose_intra_uv_mode(VP9_COMP *cpi, MACROBLOCK *const x, rate_uv, rate_uv_tokenonly, dist_uv, skip_uv, bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize, max_tx_size); } - *mode_uv = x->e_mbd.mi[0].src_mi->mbmi.uv_mode; + *mode_uv = x->e_mbd.mi[0]->mbmi.uv_mode; } static int cost_mv_ref(const VP9_COMP *cpi, PREDICTION_MODE mode, @@ -1267,7 +1281,7 @@ static int set_and_cost_bmi_mvs(VP9_COMP *cpi, MACROBLOCKD *xd, int i, int_mv seg_mvs[MAX_REF_FRAMES], int_mv *best_ref_mv[2], const int *mvjcost, int *mvcost[2]) { - MODE_INFO *const mic = xd->mi[0].src_mi; + MODE_INFO *const mic = xd->mi[0]; const MB_MODE_INFO *const mbmi = &mic->mbmi; int thismvcost = 0; int idx, idy; @@ -1309,8 +1323,7 @@ static int set_and_cost_bmi_mvs(VP9_COMP *cpi, MACROBLOCKD *xd, int i, for (idy = 0; idy < num_4x4_blocks_high; ++idy) for (idx = 0; idx < num_4x4_blocks_wide; ++idx) - vpx_memmove(&mic->bmi[i + idy * 2 + idx], - &mic->bmi[i], sizeof(mic->bmi[i])); + memmove(&mic->bmi[i + idy * 2 + idx], &mic->bmi[i], sizeof(mic->bmi[i])); return cost_mv_ref(cpi, mode, mbmi->mode_context[mbmi->ref_frame[0]]) + thismvcost; @@ -1329,7 +1342,7 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi, MACROBLOCKD *xd = &x->e_mbd; struct macroblockd_plane *const pd = &xd->plane[0]; struct macroblock_plane *const p = &x->plane[0]; - MODE_INFO *const mi = xd->mi[0].src_mi; + MODE_INFO *const mi = xd->mi[0]; const BLOCK_SIZE plane_bsize = get_plane_block_size(mi->mbmi.sb_type, pd); const int width = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; const int height = 4 * num_4x4_blocks_high_lookup[plane_bsize]; @@ -1470,7 +1483,7 @@ static INLINE int mv_check_bounds(const MACROBLOCK *x, const MV *mv) { } static INLINE void mi_buf_shift(MACROBLOCK *x, int i) { - MB_MODE_INFO *const mbmi = &x->e_mbd.mi[0].src_mi->mbmi; + MB_MODE_INFO *const mbmi = &x->e_mbd.mi[0]->mbmi; struct macroblock_plane *const p = &x->plane[0]; struct macroblockd_plane *const pd = &x->e_mbd.plane[0]; @@ -1486,7 +1499,7 @@ static INLINE void mi_buf_shift(MACROBLOCK *x, int i) { static INLINE void mi_buf_restore(MACROBLOCK *x, struct buf_2d orig_src, struct buf_2d orig_pre[2]) { - MB_MODE_INFO *mbmi = &x->e_mbd.mi[0].src_mi->mbmi; + MB_MODE_INFO *mbmi = &x->e_mbd.mi[0]->mbmi; x->plane[0].src = orig_src; x->e_mbd.plane[0].pre[0] = orig_pre[0]; if (has_second_ref(mbmi)) @@ -1544,18 +1557,11 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, const int pw = 4 * num_4x4_blocks_wide_lookup[bsize]; const int ph = 4 * num_4x4_blocks_high_lookup[bsize]; MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi; + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; const int refs[2] = {mbmi->ref_frame[0], mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]}; int_mv ref_mv[2]; int ite, ref; - // Prediction buffer from second frame. -#if CONFIG_VP9_HIGHBITDEPTH - uint8_t *second_pred; - uint8_t *second_pred_alloc; -#else - uint8_t *second_pred = vpx_memalign(16, pw * ph * sizeof(uint8_t)); -#endif // CONFIG_VP9_HIGHBITDEPTH const InterpKernel *kernel = vp9_get_interp_kernel(mbmi->interp_filter); struct scale_factors sf; @@ -1566,14 +1572,13 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, vp9_get_scaled_ref_frame(cpi, mbmi->ref_frame[0]), vp9_get_scaled_ref_frame(cpi, mbmi->ref_frame[1]) }; + + // Prediction buffer from second frame. #if CONFIG_VP9_HIGHBITDEPTH - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - second_pred_alloc = vpx_memalign(16, pw * ph * sizeof(uint16_t)); - second_pred = CONVERT_TO_BYTEPTR(second_pred_alloc); - } else { - second_pred_alloc = vpx_memalign(16, pw * ph * sizeof(uint8_t)); - second_pred = second_pred_alloc; - } + DECLARE_ALIGNED(16, uint16_t, second_pred_alloc_16[64 * 64]); + uint8_t *second_pred; +#else + DECLARE_ALIGNED(16, uint8_t, second_pred[64 * 64]); #endif // CONFIG_VP9_HIGHBITDEPTH for (ref = 0; ref < 2; ++ref) { @@ -1628,6 +1633,7 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, // Get the prediction block from the 'other' reference frame. #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + second_pred = CONVERT_TO_BYTEPTR(second_pred_alloc_16); vp9_highbd_build_inter_predictor(ref_yv12[!id].buf, ref_yv12[!id].stride, second_pred, pw, @@ -1637,6 +1643,7 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, mi_col * MI_SIZE, mi_row * MI_SIZE, xd->bd); } else { + second_pred = (uint8_t *)second_pred_alloc_16; vp9_build_inter_predictor(ref_yv12[!id].buf, ref_yv12[!id].stride, second_pred, pw, @@ -1722,12 +1729,6 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, &mbmi->ref_mvs[refs[ref]][0].as_mv, x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); } - -#if CONFIG_VP9_HIGHBITDEPTH - vpx_free(second_pred_alloc); -#else - vpx_free(second_pred); -#endif // CONFIG_VP9_HIGHBITDEPTH } static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x, @@ -1745,7 +1746,7 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x, int i; BEST_SEG_INFO *bsi = bsi_buf + filter_idx; MACROBLOCKD *xd = &x->e_mbd; - MODE_INFO *mi = xd->mi[0].src_mi; + MODE_INFO *mi = xd->mi[0]; MB_MODE_INFO *mbmi = &mi->mbmi; int mode_idx; int k, br = 0, idx, idy; @@ -1777,8 +1778,8 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x, for (i = 0; i < 4; i++) bsi->modes[i] = ZEROMV; - vpx_memcpy(t_above, pd->above_context, sizeof(t_above)); - vpx_memcpy(t_left, pd->left_context, sizeof(t_left)); + memcpy(t_above, pd->above_context, sizeof(t_above)); + memcpy(t_left, pd->left_context, sizeof(t_left)); // 64 makes this threshold really big effectively // making it so that we very rarely check mvs on @@ -1820,11 +1821,11 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x, this_mode, mbmi->ref_frame)) continue; - vpx_memcpy(orig_pre, pd->pre, sizeof(orig_pre)); - vpx_memcpy(bsi->rdstat[i][mode_idx].ta, t_above, - sizeof(bsi->rdstat[i][mode_idx].ta)); - vpx_memcpy(bsi->rdstat[i][mode_idx].tl, t_left, - sizeof(bsi->rdstat[i][mode_idx].tl)); + memcpy(orig_pre, pd->pre, sizeof(orig_pre)); + memcpy(bsi->rdstat[i][mode_idx].ta, t_above, + sizeof(bsi->rdstat[i][mode_idx].ta)); + memcpy(bsi->rdstat[i][mode_idx].tl, t_left, + sizeof(bsi->rdstat[i][mode_idx].tl)); // motion search for newmv (single predictor case only) if (!has_second_rf && this_mode == NEWMV && @@ -2000,8 +2001,8 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x, if (!subpelmv && have_ref && ref_bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) { - vpx_memcpy(&bsi->rdstat[i][mode_idx], &ref_bsi->rdstat[i][mode_idx], - sizeof(SEG_RDSTAT)); + memcpy(&bsi->rdstat[i][mode_idx], &ref_bsi->rdstat[i][mode_idx], + sizeof(SEG_RDSTAT)); if (num_4x4_blocks_wide > 1) bsi->rdstat[i + 1][mode_idx].eobs = ref_bsi->rdstat[i + 1][mode_idx].eobs; @@ -2053,8 +2054,8 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x, } mode_idx = INTER_OFFSET(mode_selected); - vpx_memcpy(t_above, bsi->rdstat[i][mode_idx].ta, sizeof(t_above)); - vpx_memcpy(t_left, bsi->rdstat[i][mode_idx].tl, sizeof(t_left)); + memcpy(t_above, bsi->rdstat[i][mode_idx].ta, sizeof(t_above)); + memcpy(t_left, bsi->rdstat[i][mode_idx].tl, sizeof(t_left)); set_and_cost_bmi_mvs(cpi, xd, i, mode_selected, mode_mv[mode_selected], frame_mv, seg_mvs[i], bsi->ref_mv, x->nmvjointcost, @@ -2121,8 +2122,8 @@ static void estimate_ref_frame_costs(const VP9_COMMON *cm, int seg_ref_active = vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME); if (seg_ref_active) { - vpx_memset(ref_costs_single, 0, MAX_REF_FRAMES * sizeof(*ref_costs_single)); - vpx_memset(ref_costs_comp, 0, MAX_REF_FRAMES * sizeof(*ref_costs_comp)); + memset(ref_costs_single, 0, MAX_REF_FRAMES * sizeof(*ref_costs_single)); + memset(ref_costs_comp, 0, MAX_REF_FRAMES * sizeof(*ref_costs_comp)); *comp_mode_p = 128; } else { vp9_prob intra_inter_p = vp9_get_intra_inter_prob(cm, xd); @@ -2186,14 +2187,14 @@ static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, ctx->skip = x->skip; ctx->skippable = skippable; ctx->best_mode_index = mode_index; - ctx->mic = *xd->mi[0].src_mi; + ctx->mic = *xd->mi[0]; ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_REFERENCE]; ctx->comp_pred_diff = (int)comp_pred_diff[COMPOUND_REFERENCE]; ctx->hybrid_pred_diff = (int)comp_pred_diff[REFERENCE_MODE_SELECT]; - vpx_memcpy(ctx->tx_rd_diff, tx_size_diff, sizeof(ctx->tx_rd_diff)); - vpx_memcpy(ctx->best_filter_diff, best_filter_diff, - sizeof(*best_filter_diff) * SWITCHABLE_FILTER_CONTEXTS); + memcpy(ctx->tx_rd_diff, tx_size_diff, sizeof(ctx->tx_rd_diff)); + memcpy(ctx->best_filter_diff, best_filter_diff, + sizeof(*best_filter_diff) * SWITCHABLE_FILTER_CONTEXTS); } static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, @@ -2207,7 +2208,7 @@ static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, const VP9_COMMON *cm = &cpi->common; const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame); MACROBLOCKD *const xd = &x->e_mbd; - MODE_INFO *const mi = xd->mi[0].src_mi; + MODE_INFO *const mi = xd->mi[0]; int_mv *const candidates = mi->mbmi.ref_mvs[ref_frame]; const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf; @@ -2240,7 +2241,7 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, int_mv *tmp_mv, int *rate_mv) { MACROBLOCKD *xd = &x->e_mbd; const VP9_COMMON *cm = &cpi->common; - MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi; + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0, 0}}; int bestsme = INT_MAX; int step_param; @@ -2412,7 +2413,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, int64_t filter_cache[]) { VP9_COMMON *cm = &cpi->common; MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi; + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; const int is_comp_pred = has_second_ref(mbmi); const int this_mode = mbmi->mode; int_mv *frame_mv = mode_mv[this_mode]; @@ -2421,11 +2422,10 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) }; int_mv cur_mv[2]; #if CONFIG_VP9_HIGHBITDEPTH - DECLARE_ALIGNED_ARRAY(16, uint16_t, tmp_buf16, MAX_MB_PLANE * 64 * 64); - DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf8, MAX_MB_PLANE * 64 * 64); + DECLARE_ALIGNED(16, uint16_t, tmp_buf16[MAX_MB_PLANE * 64 * 64]); uint8_t *tmp_buf; #else - DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf, MAX_MB_PLANE * 64 * 64); + DECLARE_ALIGNED(16, uint8_t, tmp_buf[MAX_MB_PLANE * 64 * 64]); #endif // CONFIG_VP9_HIGHBITDEPTH int pred_exists = 0; int intpel_mv; @@ -2451,16 +2451,16 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { tmp_buf = CONVERT_TO_BYTEPTR(tmp_buf16); } else { - tmp_buf = tmp_buf8; + tmp_buf = (uint8_t *)tmp_buf16; } #endif // CONFIG_VP9_HIGHBITDEPTH if (pred_filter_search) { INTERP_FILTER af = SWITCHABLE, lf = SWITCHABLE; if (xd->up_available) - af = xd->mi[-xd->mi_stride].src_mi->mbmi.interp_filter; + af = xd->mi[-xd->mi_stride]->mbmi.interp_filter; if (xd->left_available) - lf = xd->mi[-1].src_mi->mbmi.interp_filter; + lf = xd->mi[-1]->mbmi.interp_filter; if ((this_mode != NEWMV) || (af == lf)) best_filter = af; @@ -2505,7 +2505,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, return INT64_MAX; frame_mv[refs[0]].as_int = - xd->mi[0].src_mi->bmi[0].as_mv[0].as_int = tmp_mv.as_int; + xd->mi[0]->bmi[0].as_mv[0].as_int = tmp_mv.as_int; single_newmv[refs[0]].as_int = tmp_mv.as_int; // Estimate the rate implications of a new mv but discount this @@ -2660,8 +2660,8 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, skip_txfm_sb = tmp_skip_sb; skip_sse_sb = tmp_skip_sse; - vpx_memcpy(skip_txfm, x->skip_txfm, sizeof(skip_txfm)); - vpx_memcpy(bsse, x->bsse, sizeof(bsse)); + memcpy(skip_txfm, x->skip_txfm, sizeof(skip_txfm)); + memcpy(bsse, x->bsse, sizeof(bsse)); } } restore_dst_buf(xd, orig_dst, orig_dst_stride); @@ -2691,8 +2691,8 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, model_rd_for_sb(cpi, bsize, x, xd, &tmp_rate, &tmp_dist, &skip_txfm_sb, &skip_sse_sb); rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate, tmp_dist); - vpx_memcpy(skip_txfm, x->skip_txfm, sizeof(skip_txfm)); - vpx_memcpy(bsse, x->bsse, sizeof(bsse)); + memcpy(skip_txfm, x->skip_txfm, sizeof(skip_txfm)); + memcpy(bsse, x->bsse, sizeof(bsse)); } if (!is_comp_pred) @@ -2702,7 +2702,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, if (is_comp_pred) if (single_skippable[this_mode][refs[0]] && single_skippable[this_mode][refs[1]]) - vpx_memset(skip_txfm, 1, sizeof(skip_txfm)); + memset(skip_txfm, 1, sizeof(skip_txfm)); if (cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) { // if current pred_error modeled rd is substantially more than the best @@ -2716,8 +2716,8 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, if (cm->interp_filter == SWITCHABLE) *rate2 += rs; - vpx_memcpy(x->skip_txfm, skip_txfm, sizeof(skip_txfm)); - vpx_memcpy(x->bsse, bsse, sizeof(bsse)); + memcpy(x->skip_txfm, skip_txfm, sizeof(skip_txfm)); + memcpy(x->bsse, bsse, sizeof(bsse)); if (!skip_txfm_sb) { int skippable_y, skippable_uv; @@ -2783,8 +2783,8 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, TX_SIZE max_uv_tx_size; x->skip_encode = 0; ctx->skip = 0; - xd->mi[0].src_mi->mbmi.ref_frame[0] = INTRA_FRAME; - xd->mi[0].src_mi->mbmi.ref_frame[1] = NONE; + xd->mi[0]->mbmi.ref_frame[0] = INTRA_FRAME; + xd->mi[0]->mbmi.ref_frame[1] = NONE; if (bsize >= BLOCK_8X8) { if (rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly, @@ -2801,7 +2801,7 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, return; } } - max_uv_tx_size = get_uv_tx_size_impl(xd->mi[0].src_mi->mbmi.tx_size, bsize, + max_uv_tx_size = get_uv_tx_size_impl(xd->mi[0]->mbmi.tx_size, bsize, pd[1].subsampling_x, pd[1].subsampling_y); rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly, @@ -2827,10 +2827,69 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, } } - ctx->mic = *xd->mi[0].src_mi; + ctx->mic = *xd->mi[0]; rd_cost->rdcost = RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist); } +// This function is designed to apply a bias or adjustment to an rd value based +// on the relative variance of the source and reconstruction. +#define LOW_VAR_THRESH 16 +#define VLOW_ADJ_MAX 25 +#define VHIGH_ADJ_MAX 8 +static void rd_variance_adjustment(VP9_COMP *cpi, + MACROBLOCK *x, + BLOCK_SIZE bsize, + int64_t *this_rd, + MV_REFERENCE_FRAME ref_frame, + unsigned int source_variance) { + MACROBLOCKD *const xd = &x->e_mbd; + unsigned int recon_variance; + unsigned int absvar_diff = 0; + int64_t var_error = 0; + int64_t var_factor = 0; + + if (*this_rd == INT64_MAX) + return; + +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + recon_variance = + vp9_high_get_sby_perpixel_variance(cpi, &xd->plane[0].dst, bsize, xd->bd); + } else { + recon_variance = + vp9_get_sby_perpixel_variance(cpi, &xd->plane[0].dst, bsize); + } +#else + recon_variance = + vp9_get_sby_perpixel_variance(cpi, &xd->plane[0].dst, bsize); +#endif // CONFIG_VP9_HIGHBITDEPTH + + if ((source_variance + recon_variance) > LOW_VAR_THRESH) { + absvar_diff = (source_variance > recon_variance) + ? (source_variance - recon_variance) + : (recon_variance - source_variance); + + var_error = (200 * source_variance * recon_variance) / + ((source_variance * source_variance) + + (recon_variance * recon_variance)); + var_error = 100 - var_error; + } + + // Source variance above a threshold and ref frame is intra. + // This case is targeted mainly at discouraging intra modes that give rise + // to a predictor with a low spatial complexity compared to the source. + if ((source_variance > LOW_VAR_THRESH) && (ref_frame == INTRA_FRAME) && + (source_variance > recon_variance)) { + var_factor = MIN(absvar_diff, MIN(VLOW_ADJ_MAX, var_error)); + // A second possible case of interest is where the source variance + // is very low and we wish to discourage false texture or motion trails. + } else if ((source_variance < (LOW_VAR_THRESH >> 1)) && + (recon_variance > source_variance)) { + var_factor = MIN(absvar_diff, MIN(VHIGH_ADJ_MAX, var_error)); + } + *this_rd += (*this_rd * var_factor) / 100; +} + void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, TileDataEnc *tile_data, MACROBLOCK *x, @@ -2843,7 +2902,7 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, RD_OPT *const rd_opt = &cpi->rd; SPEED_FEATURES *const sf = &cpi->sf; MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; const struct segmentation *const seg = &cm->seg; PREDICTION_MODE this_mode; MV_REFERENCE_FRAME ref_frame, second_ref_frame; @@ -3092,10 +3151,10 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, ref_mv.as_int = INVALID_MV; if ((mi_row - 1) >= tile_info->mi_row_start) { - ref_mv = xd->mi[-xd->mi_stride].src_mi->mbmi.mv[0]; - rf = xd->mi[-xd->mi_stride].src_mi->mbmi.ref_frame[0]; + ref_mv = xd->mi[-xd->mi_stride]->mbmi.mv[0]; + rf = xd->mi[-xd->mi_stride]->mbmi.ref_frame[0]; for (i = 0; i < mi_width; ++i) { - ref_mbmi = &xd->mi[-xd->mi_stride + i].src_mi->mbmi; + ref_mbmi = &xd->mi[-xd->mi_stride + i]->mbmi; const_motion &= (ref_mv.as_int == ref_mbmi->mv[0].as_int) && (ref_frame == ref_mbmi->ref_frame[0]); skip_ref_frame &= (rf == ref_mbmi->ref_frame[0]); @@ -3104,11 +3163,11 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, if ((mi_col - 1) >= tile_info->mi_col_start) { if (ref_mv.as_int == INVALID_MV) - ref_mv = xd->mi[-1].src_mi->mbmi.mv[0]; + ref_mv = xd->mi[-1]->mbmi.mv[0]; if (rf == NONE) - rf = xd->mi[-1].src_mi->mbmi.ref_frame[0]; + rf = xd->mi[-1]->mbmi.ref_frame[0]; for (i = 0; i < mi_height; ++i) { - ref_mbmi = &xd->mi[i * xd->mi_stride - 1].src_mi->mbmi; + ref_mbmi = &xd->mi[i * xd->mi_stride - 1]->mbmi; const_motion &= (ref_mv.as_int == ref_mbmi->mv[0].as_int) && (ref_frame == ref_mbmi->ref_frame[0]); skip_ref_frame &= (rf == ref_mbmi->ref_frame[0]); @@ -3208,7 +3267,7 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, if (ref_frame == INTRA_FRAME) { TX_SIZE uv_tx; struct macroblockd_plane *const pd = &xd->plane[1]; - vpx_memset(x->skip_txfm, 0, sizeof(x->skip_txfm)); + memset(x->skip_txfm, 0, sizeof(x->skip_txfm)); super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable, NULL, bsize, tx_cache, best_rd); if (rate_y == INT_MAX) @@ -3287,6 +3346,11 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); } + // Apply an adjustment to the rd value based on the similarity of the + // source variance and reconstructed variance. + rd_variance_adjustment(cpi, x, bsize, &this_rd, + ref_frame, x->source_variance); + if (ref_frame == INTRA_FRAME) { // Keep record of best intra rd if (this_rd < best_intra_rd) { @@ -3327,8 +3391,8 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, if (!x->select_tx_size) swap_block_ptr(x, ctx, 1, 0, 0, max_plane); - vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size], - sizeof(uint8_t) * ctx->num_4x4_blk); + memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size], + sizeof(uint8_t) * ctx->num_4x4_blk); // TODO(debargha): enhance this test with a better distortion prediction // based on qp, activity mask and history @@ -3517,7 +3581,7 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, if (!x->skip && !x->select_tx_size) { int has_high_freq_coeff = 0; int plane; - int max_plane = is_inter_block(&xd->mi[0].src_mi->mbmi) + int max_plane = is_inter_block(&xd->mi[0]->mbmi) ? MAX_MB_PLANE : 1; for (plane = 0; plane < max_plane; ++plane) { x->plane[plane].eobs = ctx->eobs_pbuf[plane][1]; @@ -3547,7 +3611,7 @@ void vp9_rd_pick_inter_mode_sb_seg_skip(VP9_COMP *cpi, int64_t best_rd_so_far) { VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; unsigned char segment_id = mbmi->segment_id; const int comp_pred = 0; int i; @@ -3653,7 +3717,7 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, RD_OPT *const rd_opt = &cpi->rd; SPEED_FEATURES *const sf = &cpi->sf; MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; const struct segmentation *const seg = &cm->seg; MV_REFERENCE_FRAME ref_frame, second_ref_frame; unsigned char segment_id = mbmi->segment_id; @@ -3688,7 +3752,7 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, int64_t filter_cache[SWITCHABLE_FILTER_CONTEXTS]; x->skip_encode = sf->skip_encode_frame && x->q_index < QIDX_SKIP_THRESH; - vpx_memset(x->zcoeff_blk[TX_4X4], 0, 4); + memset(x->zcoeff_blk[TX_4X4], 0, 4); vp9_zero(best_mbmode); for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) @@ -3944,7 +4008,7 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, tmp_best_skippable = skippable; tmp_best_mbmode = *mbmi; for (i = 0; i < 4; i++) { - tmp_best_bmodes[i] = xd->mi[0].src_mi->bmi[i]; + tmp_best_bmodes[i] = xd->mi[0]->bmi[i]; x->zcoeff_blk[TX_4X4][i] = !x->plane[0].eobs[i]; } pred_exists = 1; @@ -3988,7 +4052,7 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, skippable = tmp_best_skippable; *mbmi = tmp_best_mbmode; for (i = 0; i < 4; i++) - xd->mi[0].src_mi->bmi[i] = tmp_best_bmodes[i]; + xd->mi[0]->bmi[i] = tmp_best_bmodes[i]; } rate2 += rate; @@ -4012,7 +4076,7 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, // then dont bother looking at UV vp9_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col, BLOCK_8X8); - vpx_memset(x->skip_txfm, 0, sizeof(x->skip_txfm)); + memset(x->skip_txfm, 0, sizeof(x->skip_txfm)); if (!super_block_uvrd(cpi, x, &rate_uv, &distortion_uv, &uv_skippable, &uv_sse, BLOCK_8X8, tmp_best_rdu)) continue; @@ -4093,11 +4157,11 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, best_skip2 = this_skip2; if (!x->select_tx_size) swap_block_ptr(x, ctx, 1, 0, 0, max_plane); - vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[TX_4X4], - sizeof(uint8_t) * ctx->num_4x4_blk); + memcpy(ctx->zcoeff_blk, x->zcoeff_blk[TX_4X4], + sizeof(uint8_t) * ctx->num_4x4_blk); for (i = 0; i < 4; i++) - best_bmodes[i] = xd->mi[0].src_mi->bmi[i]; + best_bmodes[i] = xd->mi[0]->bmi[i]; // TODO(debargha): enhance this test with a better distortion prediction // based on qp, activity mask and history @@ -4215,14 +4279,13 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, x->skip |= best_skip2; if (!is_inter_block(&best_mbmode)) { for (i = 0; i < 4; i++) - xd->mi[0].src_mi->bmi[i].as_mode = best_bmodes[i].as_mode; + xd->mi[0]->bmi[i].as_mode = best_bmodes[i].as_mode; } else { for (i = 0; i < 4; ++i) - vpx_memcpy(&xd->mi[0].src_mi->bmi[i], &best_bmodes[i], - sizeof(b_mode_info)); + memcpy(&xd->mi[0]->bmi[i], &best_bmodes[i], sizeof(b_mode_info)); - mbmi->mv[0].as_int = xd->mi[0].src_mi->bmi[3].as_mv[0].as_int; - mbmi->mv[1].as_int = xd->mi[0].src_mi->bmi[3].as_mv[1].as_int; + mbmi->mv[0].as_int = xd->mi[0]->bmi[3].as_mv[0].as_int; + mbmi->mv[1].as_int = xd->mi[0]->bmi[3].as_mv[1].as_int; } for (i = 0; i < REFERENCE_MODES; ++i) { diff --git a/media/libvpx/vp9/encoder/vp9_rdopt.h b/media/libvpx/vp9/encoder/vp9_rdopt.h index 7bbc3c89a06..459b0324bcf 100644 --- a/media/libvpx/vp9/encoder/vp9_rdopt.h +++ b/media/libvpx/vp9/encoder/vp9_rdopt.h @@ -29,6 +29,15 @@ void vp9_rd_pick_intra_mode_sb(struct VP9_COMP *cpi, struct macroblock *x, struct RD_COST *rd_cost, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx, int64_t best_rd); +unsigned int vp9_get_sby_perpixel_variance(VP9_COMP *cpi, + const struct buf_2d *ref, + BLOCK_SIZE bs); +#if CONFIG_VP9_HIGHBITDEPTH +unsigned int vp9_high_get_sby_perpixel_variance(VP9_COMP *cpi, + const struct buf_2d *ref, + BLOCK_SIZE bs, int bd); +#endif + void vp9_rd_pick_inter_mode_sb(struct VP9_COMP *cpi, struct TileDataEnc *tile_data, struct macroblock *x, diff --git a/media/libvpx/vp9/encoder/vp9_resize.c b/media/libvpx/vp9/encoder/vp9_resize.c index 2ebdff291d6..bca5b132698 100644 --- a/media/libvpx/vp9/encoder/vp9_resize.c +++ b/media/libvpx/vp9/encoder/vp9_resize.c @@ -15,6 +15,7 @@ #include #include +#include "vpx_ports/mem.h" #include "vp9/common/vp9_common.h" #include "vp9/encoder/vp9_resize.h" @@ -427,7 +428,7 @@ static int get_down2_length(int length, int steps) { return length; } -int get_down2_steps(int in_length, int out_length) { +static int get_down2_steps(int in_length, int out_length) { int steps = 0; int proj_in_length; while ((proj_in_length = get_down2_length(in_length, 1)) >= out_length) { diff --git a/media/libvpx/vp9/encoder/vp9_segmentation.c b/media/libvpx/vp9/encoder/vp9_segmentation.c index c9874f7c67c..9b15072e98e 100644 --- a/media/libvpx/vp9/encoder/vp9_segmentation.c +++ b/media/libvpx/vp9/encoder/vp9_segmentation.c @@ -36,7 +36,7 @@ void vp9_set_segment_data(struct segmentation *seg, unsigned char abs_delta) { seg->abs_delta = abs_delta; - vpx_memcpy(seg->feature_data, feature_data, sizeof(seg->feature_data)); + memcpy(seg->feature_data, feature_data, sizeof(seg->feature_data)); } void vp9_disable_segfeature(struct segmentation *seg, int segment_id, SEG_LVL_FEATURES feature_id) { @@ -107,7 +107,7 @@ static int cost_segmap(int *segcounts, vp9_prob *probs) { } static void count_segs(const VP9_COMMON *cm, MACROBLOCKD *xd, - const TileInfo *tile, MODE_INFO *mi, + const TileInfo *tile, MODE_INFO **mi, int *no_pred_segcounts, int (*temporal_predictor_count)[2], int *t_unpred_seg_counts, @@ -118,7 +118,7 @@ static void count_segs(const VP9_COMMON *cm, MACROBLOCKD *xd, return; xd->mi = mi; - segment_id = xd->mi[0].src_mi->mbmi.segment_id; + segment_id = xd->mi[0]->mbmi.segment_id; set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols); @@ -127,7 +127,7 @@ static void count_segs(const VP9_COMMON *cm, MACROBLOCKD *xd, // Temporal prediction not allowed on key frames if (cm->frame_type != KEY_FRAME) { - const BLOCK_SIZE bsize = xd->mi[0].src_mi->mbmi.sb_type; + const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type; // Test to see if the segment id matches the predicted value. const int pred_segment_id = vp9_get_segment_id(cm, cm->last_frame_seg_map, bsize, mi_row, mi_col); @@ -136,7 +136,7 @@ static void count_segs(const VP9_COMMON *cm, MACROBLOCKD *xd, // Store the prediction status for this mb and update counts // as appropriate - xd->mi[0].src_mi->mbmi.seg_id_predicted = pred_flag; + xd->mi[0]->mbmi.seg_id_predicted = pred_flag; temporal_predictor_count[pred_context][pred_flag]++; // Update the "unpredicted" segment count @@ -146,7 +146,7 @@ static void count_segs(const VP9_COMMON *cm, MACROBLOCKD *xd, } static void count_segs_sb(const VP9_COMMON *cm, MACROBLOCKD *xd, - const TileInfo *tile, MODE_INFO *mi, + const TileInfo *tile, MODE_INFO **mi, int *no_pred_segcounts, int (*temporal_predictor_count)[2], int *t_unpred_seg_counts, @@ -159,8 +159,8 @@ static void count_segs_sb(const VP9_COMMON *cm, MACROBLOCKD *xd, if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; - bw = num_8x8_blocks_wide_lookup[mi[0].src_mi->mbmi.sb_type]; - bh = num_8x8_blocks_high_lookup[mi[0].src_mi->mbmi.sb_type]; + bw = num_8x8_blocks_wide_lookup[mi[0]->mbmi.sb_type]; + bh = num_8x8_blocks_high_lookup[mi[0]->mbmi.sb_type]; if (bw == bs && bh == bs) { count_segs(cm, xd, tile, mi, no_pred_segcounts, temporal_predictor_count, @@ -213,20 +213,20 @@ void vp9_choose_segmap_coding_method(VP9_COMMON *cm, MACROBLOCKD *xd) { // Set default state for the segment tree probabilities and the // temporal coding probabilities - vpx_memset(seg->tree_probs, 255, sizeof(seg->tree_probs)); - vpx_memset(seg->pred_probs, 255, sizeof(seg->pred_probs)); + memset(seg->tree_probs, 255, sizeof(seg->tree_probs)); + memset(seg->pred_probs, 255, sizeof(seg->pred_probs)); // First of all generate stats regarding how well the last segment map // predicts this one for (tile_col = 0; tile_col < 1 << cm->log2_tile_cols; tile_col++) { TileInfo tile; - MODE_INFO *mi_ptr; + MODE_INFO **mi_ptr; vp9_tile_init(&tile, cm, 0, tile_col); - mi_ptr = cm->mi + tile.mi_col_start; + mi_ptr = cm->mi_grid_visible + tile.mi_col_start; for (mi_row = 0; mi_row < cm->mi_rows; mi_row += 8, mi_ptr += 8 * cm->mi_stride) { - MODE_INFO *mi = mi_ptr; + MODE_INFO **mi = mi_ptr; for (mi_col = tile.mi_col_start; mi_col < tile.mi_col_end; mi_col += 8, mi += 8) count_segs_sb(cm, xd, &tile, mi, no_pred_segcounts, @@ -263,11 +263,11 @@ void vp9_choose_segmap_coding_method(VP9_COMMON *cm, MACROBLOCKD *xd) { // Now choose which coding method to use. if (t_pred_cost < no_pred_cost) { seg->temporal_update = 1; - vpx_memcpy(seg->tree_probs, t_pred_tree, sizeof(t_pred_tree)); - vpx_memcpy(seg->pred_probs, t_nopred_prob, sizeof(t_nopred_prob)); + memcpy(seg->tree_probs, t_pred_tree, sizeof(t_pred_tree)); + memcpy(seg->pred_probs, t_nopred_prob, sizeof(t_nopred_prob)); } else { seg->temporal_update = 0; - vpx_memcpy(seg->tree_probs, no_pred_tree, sizeof(no_pred_tree)); + memcpy(seg->tree_probs, no_pred_tree, sizeof(no_pred_tree)); } } @@ -276,6 +276,6 @@ void vp9_reset_segment_features(struct segmentation *seg) { seg->enabled = 0; seg->update_map = 0; seg->update_data = 0; - vpx_memset(seg->tree_probs, 255, sizeof(seg->tree_probs)); + memset(seg->tree_probs, 255, sizeof(seg->tree_probs)); vp9_clearall_segfeatures(seg); } diff --git a/media/libvpx/vp9/encoder/vp9_skin_detection.c b/media/libvpx/vp9/encoder/vp9_skin_detection.c index 08ebcf8f1c8..1cb0662834e 100644 --- a/media/libvpx/vp9/encoder/vp9_skin_detection.c +++ b/media/libvpx/vp9/encoder/vp9_skin_detection.c @@ -60,14 +60,14 @@ void vp9_compute_skin_map(VP9_COMP *const cpi, FILE *yuv_skinmap_file) { const int src_ystride = cpi->Source->y_stride; const int src_uvstride = cpi->Source->uv_stride; YV12_BUFFER_CONFIG skinmap; - vpx_memset(&skinmap, 0, sizeof(YV12_BUFFER_CONFIG)); + memset(&skinmap, 0, sizeof(YV12_BUFFER_CONFIG)); if (vp9_alloc_frame_buffer(&skinmap, cm->width, cm->height, cm->subsampling_x, cm->subsampling_y, VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment)) { vp9_free_frame_buffer(&skinmap); return; } - vpx_memset(skinmap.buffer_alloc, 128, skinmap.frame_size); + memset(skinmap.buffer_alloc, 128, skinmap.frame_size); y = skinmap.y_buffer; // Loop through 8x8 blocks and set skin map based on center pixel of block. // Set y to white for skin block, otherwise set to source with gray scale. diff --git a/media/libvpx/vp9/encoder/vp9_speed_features.c b/media/libvpx/vp9/encoder/vp9_speed_features.c index 3515b6e7530..4b206ba7bd3 100644 --- a/media/libvpx/vp9/encoder/vp9_speed_features.c +++ b/media/libvpx/vp9/encoder/vp9_speed_features.c @@ -72,6 +72,29 @@ static void set_good_speed_feature_framesize_dependent(VP9_COMMON *cm, } } +// Sets a partition size down to which the auto partition code will always +// search (can go lower), based on the image dimensions. The logic here +// is that the extent to which ringing artefacts are offensive, depends +// partly on the screen area that over which they propogate. Propogation is +// limited by transform block size but the screen area take up by a given block +// size will be larger for a small image format stretched to full screen. +static BLOCK_SIZE set_partition_min_limit(VP9_COMP *cpi) { + VP9_COMMON *const cm = &cpi->common; + unsigned int screen_area = (cm->width * cm->height); + + // Select block size based on image format size. + if (screen_area < 1280 * 720) { + // Formats smaller in area than 720P + return BLOCK_4X4; + } else if (screen_area < 1920 * 1080) { + // Format >= 720P and < 1080P + return BLOCK_8X8; + } else { + // Formats 1080P and up + return BLOCK_16X16; + } +} + static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm, SPEED_FEATURES *sf, int speed) { const int boosted = frame_is_boosted(cpi); @@ -115,8 +138,8 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm, FLAG_SKIP_INTRA_LOWVAR; sf->disable_filter_search_var_thresh = 100; sf->comp_inter_joint_search_thresh = BLOCK_SIZES; - sf->auto_min_max_partition_size = CONSTRAIN_NEIGHBORING_MIN_MAX; - + sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX; + sf->rd_auto_partition_min_limit = set_partition_min_limit(cpi); sf->allow_partition_search_skip = 1; } @@ -237,8 +260,12 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf, FLAG_SKIP_INTRA_LOWVAR; sf->adaptive_pred_interp_filter = 2; - // Reference masking is not supported in dynamic scaling mode. - sf->reference_masking = cpi->oxcf.resize_mode != RESIZE_DYNAMIC ? 1 : 0; + // Disable reference masking if using spatial scaling since + // pred_mv_sad will not be set (since vp9_mv_pred will not + // be called). + // TODO(marpan/agrange): Fix this condition. + sf->reference_masking = (cpi->oxcf.resize_mode != RESIZE_DYNAMIC && + cpi->svc.number_spatial_layers == 1) ? 1 : 0; sf->disable_filter_search_var_thresh = 50; sf->comp_inter_joint_search_thresh = BLOCK_SIZES; @@ -301,7 +328,7 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf, (frames_since_key % (sf->last_partitioning_redo_frequency << 1) == 1); sf->max_delta_qindex = is_keyframe ? 20 : 15; sf->partition_search_type = REFERENCE_PARTITION; - sf->use_nonrd_pick_mode = !is_keyframe; + sf->use_nonrd_pick_mode = 1; sf->allow_skip_recode = 0; sf->inter_mode_mask[BLOCK_32X32] = INTER_NEAREST_NEW_ZERO; sf->inter_mode_mask[BLOCK_32X64] = INTER_NEAREST_NEW_ZERO; @@ -314,15 +341,20 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf, sf->coeff_prob_appx_step = 4; sf->use_fast_coef_updates = is_keyframe ? TWO_LOOP : ONE_LOOP_REDUCED; sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH; + sf->tx_size_search_method = is_keyframe ? USE_LARGESTALL : USE_TX_8X8; if (!is_keyframe) { int i; if (content == VP9E_CONTENT_SCREEN) { - for (i = 0; i < TX_SIZES; ++i) - sf->intra_y_mode_mask[i] = INTRA_DC_TM_H_V; + for (i = 0; i < BLOCK_SIZES; ++i) + sf->intra_y_mode_bsize_mask[i] = INTRA_DC_TM_H_V; } else { - for (i = 0; i < TX_SIZES; i++) - sf->intra_y_mode_mask[i] = INTRA_DC; + for (i = 0; i < BLOCK_SIZES; ++i) + if (i >= BLOCK_16X16) + sf->intra_y_mode_bsize_mask[i] = INTRA_DC; + else + // Use H and V intra mode for block sizes <= 16X16. + sf->intra_y_mode_bsize_mask[i] = INTRA_DC_H_V; } } } @@ -333,7 +365,6 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf, // Turn on this to use non-RD key frame coding mode. sf->use_nonrd_pick_mode = 1; sf->mv.search_method = NSTEP; - sf->tx_size_search_method = is_keyframe ? USE_LARGESTALL : USE_TX_8X8; sf->mv.reduce_first_step_size = 1; sf->skip_encode_sb = 0; } @@ -416,6 +447,7 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) { sf->less_rectangular_check = 0; sf->use_square_partition_only = 0; sf->auto_min_max_partition_size = NOT_IN_USE; + sf->rd_auto_partition_min_limit = BLOCK_4X4; sf->default_max_partition_size = BLOCK_64X64; sf->default_min_partition_size = BLOCK_4X4; sf->adjust_partitioning_from_last_frame = 0; diff --git a/media/libvpx/vp9/encoder/vp9_speed_features.h b/media/libvpx/vp9/encoder/vp9_speed_features.h index 8722d9cec87..8575638d9a7 100644 --- a/media/libvpx/vp9/encoder/vp9_speed_features.h +++ b/media/libvpx/vp9/encoder/vp9_speed_features.h @@ -101,8 +101,7 @@ typedef enum { typedef enum { NOT_IN_USE = 0, RELAXED_NEIGHBORING_MIN_MAX = 1, - CONSTRAIN_NEIGHBORING_MIN_MAX = 2, - STRICT_NEIGHBORING_MIN_MAX = 3 + STRICT_NEIGHBORING_MIN_MAX = 2 } AUTO_MIN_MAX_MODE; typedef enum { @@ -272,6 +271,9 @@ typedef struct SPEED_FEATURES { // Sets min and max partition sizes for this 64x64 region based on the // same 64x64 in last encoded frame, and the left and above neighbor. AUTO_MIN_MAX_MODE auto_min_max_partition_size; + // Ensures the rd based auto partition search will always + // go down at least to the specified level. + BLOCK_SIZE rd_auto_partition_min_limit; // Min and max partition size we enable (block_size) as per auto // min max, but also used by adjust partitioning, and pick_partitioning. @@ -340,6 +342,10 @@ typedef struct SPEED_FEATURES { int intra_y_mode_mask[TX_SIZES]; int intra_uv_mode_mask[TX_SIZES]; + // These bit masks allow you to enable or disable intra modes for each + // prediction block size separately. + int intra_y_mode_bsize_mask[BLOCK_SIZES]; + // This variable enables an early break out of mode testing if the model for // rd built from the prediction signal indicates a value that's much // higher than the best rd we've seen so far. diff --git a/media/libvpx/vp9/encoder/vp9_ssim.h b/media/libvpx/vp9/encoder/vp9_ssim.h index e75623b2545..10f14c4d268 100644 --- a/media/libvpx/vp9/encoder/vp9_ssim.h +++ b/media/libvpx/vp9/encoder/vp9_ssim.h @@ -17,12 +17,64 @@ extern "C" { #include "vpx_scale/yv12config.h" +// metrics used for calculating ssim, ssim2, dssim, and ssimc +typedef struct { + // source sum ( over 8x8 region ) + uint64_t sum_s; + + // reference sum (over 8x8 region ) + uint64_t sum_r; + + // source sum squared ( over 8x8 region ) + uint64_t sum_sq_s; + + // reference sum squared (over 8x8 region ) + uint64_t sum_sq_r; + + // sum of source times reference (over 8x8 region) + uint64_t sum_sxr; + + // calculated ssim score between source and reference + double ssim; +} Ssimv; + +// metrics collected on a frame basis +typedef struct { + // ssim consistency error metric ( see code for explanation ) + double ssimc; + + // standard ssim + double ssim; + + // revised ssim ( see code for explanation) + double ssim2; + + // ssim restated as an error metric like sse + double dssim; + + // dssim converted to decibels + double dssimd; + + // ssimc converted to decibels + double ssimcd; +} Metrics; + +double vp9_get_ssim_metrics(uint8_t *img1, int img1_pitch, uint8_t *img2, + int img2_pitch, int width, int height, Ssimv *sv2, + Metrics *m, int do_inconsistency); + double vp9_calc_ssim(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, double *weight); double vp9_calc_ssimg(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, double *ssim_y, double *ssim_u, double *ssim_v); +double vp9_calc_fastssim(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, + double *ssim_y, double *ssim_u, double *ssim_v); + +double vp9_psnrhvs(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, + double *ssim_y, double *ssim_u, double *ssim_v); + #if CONFIG_VP9_HIGHBITDEPTH double vp9_highbd_calc_ssim(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, diff --git a/media/libvpx/vp9/encoder/vp9_subexp.c b/media/libvpx/vp9/encoder/vp9_subexp.c index 180dadde7d7..b345b162cdb 100644 --- a/media/libvpx/vp9/encoder/vp9_subexp.c +++ b/media/libvpx/vp9/encoder/vp9_subexp.c @@ -12,6 +12,7 @@ #include "vp9/common/vp9_entropy.h" #include "vp9/encoder/vp9_cost.h" +#include "vp9/encoder/vp9_subexp.h" #include "vp9/encoder/vp9_writer.h" #define vp9_cost_upd256 ((int)(vp9_cost_one(upd) - vp9_cost_zero(upd))) @@ -146,7 +147,7 @@ int vp9_prob_diff_update_savings_search_model(const unsigned int *ct, int newp; vp9_prob bestnewp, newplist[ENTROPY_NODES], oldplist[ENTROPY_NODES]; vp9_model_to_full_probs(oldp, oldplist); - vpx_memcpy(newplist, oldp, sizeof(vp9_prob) * UNCONSTRAINED_NODES); + memcpy(newplist, oldp, sizeof(vp9_prob) * UNCONSTRAINED_NODES); for (i = UNCONSTRAINED_NODES, old_b = 0; i < ENTROPY_NODES; ++i) old_b += cost_branch256(ct + 2 * i, oldplist[i]); old_b += cost_branch256(ct + 2 * PIVOT_NODE, oldplist[PIVOT_NODE]); diff --git a/media/libvpx/vp9/encoder/vp9_subexp.h b/media/libvpx/vp9/encoder/vp9_subexp.h index ac54893cf45..6fbb747e7d3 100644 --- a/media/libvpx/vp9/encoder/vp9_subexp.h +++ b/media/libvpx/vp9/encoder/vp9_subexp.h @@ -16,11 +16,15 @@ extern "C" { #endif -void vp9_write_prob_diff_update(vp9_writer *w, +#include "vp9/common/vp9_prob.h" + +struct vp9_writer; + +void vp9_write_prob_diff_update(struct vp9_writer *w, vp9_prob newp, vp9_prob oldp); -void vp9_cond_prob_diff_update(vp9_writer *w, vp9_prob *oldp, - unsigned int *ct); +void vp9_cond_prob_diff_update(struct vp9_writer *w, vp9_prob *oldp, + const unsigned int ct[2]); int vp9_prob_diff_update_savings_search(const unsigned int *ct, vp9_prob oldp, vp9_prob *bestp, diff --git a/media/libvpx/vp9/encoder/vp9_svc_layercontext.c b/media/libvpx/vp9/encoder/vp9_svc_layercontext.c index bf9cad00a79..cb1b0df4c9a 100644 --- a/media/libvpx/vp9/encoder/vp9_svc_layercontext.c +++ b/media/libvpx/vp9/encoder/vp9_svc_layercontext.c @@ -15,89 +15,85 @@ #include "vp9/encoder/vp9_extend.h" #define SMALL_FRAME_FB_IDX 7 +#define SMALL_FRAME_WIDTH 16 +#define SMALL_FRAME_HEIGHT 16 void vp9_init_layer_context(VP9_COMP *const cpi) { SVC *const svc = &cpi->svc; const VP9EncoderConfig *const oxcf = &cpi->oxcf; - int layer; - int layer_end; + int sl, tl; int alt_ref_idx = svc->number_spatial_layers; svc->spatial_layer_id = 0; svc->temporal_layer_id = 0; - if (svc->number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) { - layer_end = svc->number_temporal_layers; - } else { - layer_end = svc->number_spatial_layers; - - if (cpi->oxcf.error_resilient_mode == 0 && cpi->oxcf.pass == 2) { - if (vp9_realloc_frame_buffer(&cpi->svc.empty_frame.img, - cpi->common.width, cpi->common.height, - cpi->common.subsampling_x, - cpi->common.subsampling_y, + if (cpi->oxcf.error_resilient_mode == 0 && cpi->oxcf.pass == 2) { + if (vp9_realloc_frame_buffer(&cpi->svc.empty_frame.img, + SMALL_FRAME_WIDTH, SMALL_FRAME_HEIGHT, + cpi->common.subsampling_x, + cpi->common.subsampling_y, #if CONFIG_VP9_HIGHBITDEPTH cpi->common.use_highbitdepth, #endif VP9_ENC_BORDER_IN_PIXELS, cpi->common.byte_alignment, NULL, NULL, NULL)) - vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR, - "Failed to allocate empty frame for multiple frame " - "contexts"); + vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR, + "Failed to allocate empty frame for multiple frame " + "contexts"); - vpx_memset(cpi->svc.empty_frame.img.buffer_alloc, 0x80, - cpi->svc.empty_frame.img.buffer_alloc_sz); - cpi->svc.empty_frame_width = cpi->common.width; - cpi->svc.empty_frame_height = cpi->common.height; - } + memset(cpi->svc.empty_frame.img.buffer_alloc, 0x80, + cpi->svc.empty_frame.img.buffer_alloc_sz); } - for (layer = 0; layer < layer_end; ++layer) { - LAYER_CONTEXT *const lc = &svc->layer_context[layer]; - RATE_CONTROL *const lrc = &lc->rc; - int i; - lc->current_video_frame_in_layer = 0; - lc->layer_size = 0; - lc->frames_from_key_frame = 0; - lc->last_frame_type = FRAME_TYPES; - lrc->ni_av_qi = oxcf->worst_allowed_q; - lrc->total_actual_bits = 0; - lrc->total_target_vs_actual = 0; - lrc->ni_tot_qi = 0; - lrc->tot_q = 0.0; - lrc->avg_q = 0.0; - lrc->ni_frames = 0; - lrc->decimation_count = 0; - lrc->decimation_factor = 0; + for (sl = 0; sl < oxcf->ss_number_layers; ++sl) { + for (tl = 0; tl < oxcf->ts_number_layers; ++tl) { + int layer = LAYER_IDS_TO_IDX(sl, tl, oxcf->ts_number_layers); + LAYER_CONTEXT *const lc = &svc->layer_context[layer]; + RATE_CONTROL *const lrc = &lc->rc; + int i; + lc->current_video_frame_in_layer = 0; + lc->layer_size = 0; + lc->frames_from_key_frame = 0; + lc->last_frame_type = FRAME_TYPES; + lrc->ni_av_qi = oxcf->worst_allowed_q; + lrc->total_actual_bits = 0; + lrc->total_target_vs_actual = 0; + lrc->ni_tot_qi = 0; + lrc->tot_q = 0.0; + lrc->avg_q = 0.0; + lrc->ni_frames = 0; + lrc->decimation_count = 0; + lrc->decimation_factor = 0; - for (i = 0; i < RATE_FACTOR_LEVELS; ++i) { - lrc->rate_correction_factors[i] = 1.0; - } + for (i = 0; i < RATE_FACTOR_LEVELS; ++i) { + lrc->rate_correction_factors[i] = 1.0; + } - if (svc->number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) { - lc->target_bandwidth = oxcf->ts_target_bitrate[layer]; - lrc->last_q[INTER_FRAME] = oxcf->worst_allowed_q; - lrc->avg_frame_qindex[INTER_FRAME] = oxcf->worst_allowed_q; - lrc->avg_frame_qindex[KEY_FRAME] = oxcf->worst_allowed_q; - } else { - lc->target_bandwidth = oxcf->ss_target_bitrate[layer]; - lrc->last_q[KEY_FRAME] = oxcf->best_allowed_q; - lrc->last_q[INTER_FRAME] = oxcf->best_allowed_q; - lrc->avg_frame_qindex[KEY_FRAME] = (oxcf->worst_allowed_q + - oxcf->best_allowed_q) / 2; - lrc->avg_frame_qindex[INTER_FRAME] = (oxcf->worst_allowed_q + + if (cpi->oxcf.rc_mode == VPX_CBR) { + lc->target_bandwidth = oxcf->layer_target_bitrate[layer]; + lrc->last_q[INTER_FRAME] = oxcf->worst_allowed_q; + lrc->avg_frame_qindex[INTER_FRAME] = oxcf->worst_allowed_q; + lrc->avg_frame_qindex[KEY_FRAME] = oxcf->worst_allowed_q; + } else { + lc->target_bandwidth = oxcf->layer_target_bitrate[layer]; + lrc->last_q[KEY_FRAME] = oxcf->best_allowed_q; + lrc->last_q[INTER_FRAME] = oxcf->best_allowed_q; + lrc->avg_frame_qindex[KEY_FRAME] = (oxcf->worst_allowed_q + oxcf->best_allowed_q) / 2; - if (oxcf->ss_enable_auto_arf[layer]) - lc->alt_ref_idx = alt_ref_idx++; - else - lc->alt_ref_idx = INVALID_IDX; - lc->gold_ref_idx = INVALID_IDX; - } + lrc->avg_frame_qindex[INTER_FRAME] = (oxcf->worst_allowed_q + + oxcf->best_allowed_q) / 2; + if (oxcf->ss_enable_auto_arf[sl]) + lc->alt_ref_idx = alt_ref_idx++; + else + lc->alt_ref_idx = INVALID_IDX; + lc->gold_ref_idx = INVALID_IDX; + } - lrc->buffer_level = oxcf->starting_buffer_level_ms * - lc->target_bandwidth / 1000; - lrc->bits_off_target = lrc->buffer_level; + lrc->buffer_level = oxcf->starting_buffer_level_ms * + lc->target_bandwidth / 1000; + lrc->bits_off_target = lrc->buffer_level; + } } // Still have extra buffer for base layer golden frame @@ -112,53 +108,99 @@ void vp9_update_layer_context_change_config(VP9_COMP *const cpi, SVC *const svc = &cpi->svc; const VP9EncoderConfig *const oxcf = &cpi->oxcf; const RATE_CONTROL *const rc = &cpi->rc; - int layer; - int layer_end; + int sl, tl, layer = 0, spatial_layer_target; float bitrate_alloc = 1.0; - if (svc->number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) { - layer_end = svc->number_temporal_layers; + if (svc->temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING) { + for (sl = 0; sl < oxcf->ss_number_layers; ++sl) { + spatial_layer_target = 0; + + for (tl = 0; tl < oxcf->ts_number_layers; ++tl) { + layer = LAYER_IDS_TO_IDX(sl, tl, oxcf->ts_number_layers); + svc->layer_context[layer].target_bandwidth = + oxcf->layer_target_bitrate[layer]; + } + + layer = LAYER_IDS_TO_IDX(sl, ((oxcf->ts_number_layers - 1) < 0 ? + 0 : (oxcf->ts_number_layers - 1)), oxcf->ts_number_layers); + spatial_layer_target = + svc->layer_context[layer].target_bandwidth = + oxcf->layer_target_bitrate[layer]; + + for (tl = 0; tl < oxcf->ts_number_layers; ++tl) { + LAYER_CONTEXT *const lc = + &svc->layer_context[sl * oxcf->ts_number_layers + tl]; + RATE_CONTROL *const lrc = &lc->rc; + + lc->spatial_layer_target_bandwidth = spatial_layer_target; + bitrate_alloc = (float)lc->target_bandwidth / spatial_layer_target; + lrc->starting_buffer_level = + (int64_t)(rc->starting_buffer_level * bitrate_alloc); + lrc->optimal_buffer_level = + (int64_t)(rc->optimal_buffer_level * bitrate_alloc); + lrc->maximum_buffer_size = + (int64_t)(rc->maximum_buffer_size * bitrate_alloc); + lrc->bits_off_target = + MIN(lrc->bits_off_target, lrc->maximum_buffer_size); + lrc->buffer_level = MIN(lrc->buffer_level, lrc->maximum_buffer_size); + lc->framerate = cpi->framerate / oxcf->ts_rate_decimator[tl]; + lrc->avg_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate); + lrc->max_frame_bandwidth = rc->max_frame_bandwidth; + lrc->worst_quality = rc->worst_quality; + lrc->best_quality = rc->best_quality; + } + } } else { - layer_end = svc->number_spatial_layers; - } - - for (layer = 0; layer < layer_end; ++layer) { - LAYER_CONTEXT *const lc = &svc->layer_context[layer]; - RATE_CONTROL *const lrc = &lc->rc; + int layer_end; + float bitrate_alloc = 1.0; if (svc->number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) { - lc->target_bandwidth = oxcf->ts_target_bitrate[layer]; + layer_end = svc->number_temporal_layers; } else { - lc->target_bandwidth = oxcf->ss_target_bitrate[layer]; + layer_end = svc->number_spatial_layers; } - bitrate_alloc = (float)lc->target_bandwidth / target_bandwidth; - // Update buffer-related quantities. - lrc->starting_buffer_level = - (int64_t)(rc->starting_buffer_level * bitrate_alloc); - lrc->optimal_buffer_level = - (int64_t)(rc->optimal_buffer_level * bitrate_alloc); - lrc->maximum_buffer_size = - (int64_t)(rc->maximum_buffer_size * bitrate_alloc); - lrc->bits_off_target = MIN(lrc->bits_off_target, lrc->maximum_buffer_size); - lrc->buffer_level = MIN(lrc->buffer_level, lrc->maximum_buffer_size); - // Update framerate-related quantities. - if (svc->number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) { - lc->framerate = cpi->framerate / oxcf->ts_rate_decimator[layer]; - } else { - lc->framerate = cpi->framerate; + + for (layer = 0; layer < layer_end; ++layer) { + LAYER_CONTEXT *const lc = &svc->layer_context[layer]; + RATE_CONTROL *const lrc = &lc->rc; + + lc->target_bandwidth = oxcf->layer_target_bitrate[layer]; + + bitrate_alloc = (float)lc->target_bandwidth / target_bandwidth; + // Update buffer-related quantities. + lrc->starting_buffer_level = + (int64_t)(rc->starting_buffer_level * bitrate_alloc); + lrc->optimal_buffer_level = + (int64_t)(rc->optimal_buffer_level * bitrate_alloc); + lrc->maximum_buffer_size = + (int64_t)(rc->maximum_buffer_size * bitrate_alloc); + lrc->bits_off_target = MIN(lrc->bits_off_target, + lrc->maximum_buffer_size); + lrc->buffer_level = MIN(lrc->buffer_level, lrc->maximum_buffer_size); + // Update framerate-related quantities. + if (svc->number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) { + lc->framerate = cpi->framerate / oxcf->ts_rate_decimator[layer]; + } else { + lc->framerate = cpi->framerate; + } + lrc->avg_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate); + lrc->max_frame_bandwidth = rc->max_frame_bandwidth; + // Update qp-related quantities. + lrc->worst_quality = rc->worst_quality; + lrc->best_quality = rc->best_quality; } - lrc->avg_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate); - lrc->max_frame_bandwidth = rc->max_frame_bandwidth; - // Update qp-related quantities. - lrc->worst_quality = rc->worst_quality; - lrc->best_quality = rc->best_quality; } } static LAYER_CONTEXT *get_layer_context(VP9_COMP *const cpi) { - return (cpi->svc.number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) ? - &cpi->svc.layer_context[cpi->svc.temporal_layer_id] : - &cpi->svc.layer_context[cpi->svc.spatial_layer_id]; + if (is_one_pass_cbr_svc(cpi)) + return &cpi->svc.layer_context[cpi->svc.spatial_layer_id * + cpi->svc.number_temporal_layers + cpi->svc.temporal_layer_id]; + else + return (cpi->svc.number_temporal_layers > 1 && + cpi->oxcf.rc_mode == VPX_CBR) ? + &cpi->svc.layer_context[cpi->svc.temporal_layer_id] : + &cpi->svc.layer_context[cpi->svc.spatial_layer_id]; } void vp9_update_temporal_layer_framerate(VP9_COMP *const cpi) { @@ -166,18 +208,22 @@ void vp9_update_temporal_layer_framerate(VP9_COMP *const cpi) { const VP9EncoderConfig *const oxcf = &cpi->oxcf; LAYER_CONTEXT *const lc = get_layer_context(cpi); RATE_CONTROL *const lrc = &lc->rc; - const int layer = svc->temporal_layer_id; + // Index into spatial+temporal arrays. + const int st_idx = svc->spatial_layer_id * svc->number_temporal_layers + + svc->temporal_layer_id; + const int tl = svc->temporal_layer_id; - lc->framerate = cpi->framerate / oxcf->ts_rate_decimator[layer]; + lc->framerate = cpi->framerate / oxcf->ts_rate_decimator[tl]; lrc->avg_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate); lrc->max_frame_bandwidth = cpi->rc.max_frame_bandwidth; // Update the average layer frame size (non-cumulative per-frame-bw). - if (layer == 0) { + if (tl == 0) { lc->avg_frame_size = lrc->avg_frame_bandwidth; } else { const double prev_layer_framerate = - cpi->framerate / oxcf->ts_rate_decimator[layer - 1]; - const int prev_layer_target_bandwidth = oxcf->ts_target_bitrate[layer - 1]; + cpi->framerate / oxcf->ts_rate_decimator[tl - 1]; + const int prev_layer_target_bandwidth = + oxcf->layer_target_bitrate[st_idx - 1]; lc->avg_frame_size = (int)((lc->target_bandwidth - prev_layer_target_bandwidth) / (lc->framerate - prev_layer_framerate)); @@ -195,7 +241,7 @@ void vp9_update_spatial_layer_framerate(VP9_COMP *const cpi, double framerate) { oxcf->two_pass_vbrmin_section / 100); lrc->max_frame_bandwidth = (int)(((int64_t)lrc->avg_frame_bandwidth * oxcf->two_pass_vbrmax_section) / 100); - vp9_rc_set_gf_max_interval(cpi, lrc); + vp9_rc_set_gf_interval_range(cpi, lrc); } void vp9_restore_layer_context(VP9_COMP *const cpi) { @@ -243,9 +289,8 @@ void vp9_init_second_pass_spatial_svc(VP9_COMP *cpi) { void vp9_inc_frame_in_layer(VP9_COMP *const cpi) { LAYER_CONTEXT *const lc = - (cpi->svc.number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) ? - &cpi->svc.layer_context[cpi->svc.temporal_layer_id] : - &cpi->svc.layer_context[cpi->svc.spatial_layer_id]; + &cpi->svc.layer_context[cpi->svc.spatial_layer_id * + cpi->svc.number_temporal_layers]; ++lc->current_video_frame_in_layer; ++lc->frames_from_key_frame; } @@ -253,10 +298,11 @@ void vp9_inc_frame_in_layer(VP9_COMP *const cpi) { int vp9_is_upper_layer_key_frame(const VP9_COMP *const cpi) { return is_two_pass_svc(cpi) && cpi->svc.spatial_layer_id > 0 && - cpi->svc.layer_context[cpi->svc.spatial_layer_id].is_key_frame; + cpi->svc.layer_context[cpi->svc.spatial_layer_id * + cpi->svc.number_temporal_layers + + cpi->svc.temporal_layer_id].is_key_frame; } -#if CONFIG_SPATIAL_SVC static void get_layer_resolution(const int width_org, const int height_org, const int num, const int den, int *width_out, int *height_out) { @@ -276,6 +322,201 @@ static void get_layer_resolution(const int width_org, const int height_org, *height_out = h; } +// The function sets proper ref_frame_flags, buffer indices, and buffer update +// variables for temporal layering mode 3 - that does 0-2-1-2 temporal layering +// scheme. +static void set_flags_and_fb_idx_for_temporal_mode3(VP9_COMP *const cpi) { + int frame_num_within_temporal_struct = 0; + int spatial_id, temporal_id; + spatial_id = cpi->svc.spatial_layer_id = cpi->svc.spatial_layer_to_encode; + frame_num_within_temporal_struct = + cpi->svc.layer_context[cpi->svc.spatial_layer_id * + cpi->svc.number_temporal_layers].current_video_frame_in_layer % 4; + temporal_id = cpi->svc.temporal_layer_id = + (frame_num_within_temporal_struct & 1) ? 2 : + (frame_num_within_temporal_struct >> 1); + cpi->ext_refresh_last_frame = cpi->ext_refresh_golden_frame = + cpi->ext_refresh_alt_ref_frame = 0; + if (!temporal_id) { + cpi->ext_refresh_frame_flags_pending = 1; + cpi->ext_refresh_last_frame = 1; + if (!spatial_id) { + cpi->ref_frame_flags = VP9_LAST_FLAG; + } else if (cpi->svc.layer_context[temporal_id].is_key_frame) { + // base layer is a key frame. + cpi->ref_frame_flags = VP9_GOLD_FLAG; + } else { + cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG; + } + } else if (temporal_id == 1) { + cpi->ext_refresh_frame_flags_pending = 1; + cpi->ext_refresh_alt_ref_frame = 1; + if (!spatial_id) { + cpi->ref_frame_flags = VP9_LAST_FLAG; + } else { + cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG; + } + } else { + if (frame_num_within_temporal_struct == 1) { + // the first tl2 picture + if (!spatial_id) { + cpi->ext_refresh_frame_flags_pending = 1; + cpi->ext_refresh_alt_ref_frame = 1; + cpi->ref_frame_flags = VP9_LAST_FLAG; + } else if (spatial_id < cpi->svc.number_spatial_layers - 1) { + cpi->ext_refresh_frame_flags_pending = 1; + cpi->ext_refresh_alt_ref_frame = 1; + cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG; + } else { // Top layer + cpi->ext_refresh_frame_flags_pending = 0; + cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG; + } + } else { + // The second tl2 picture + if (!spatial_id) { + cpi->ext_refresh_frame_flags_pending = 1; + cpi->ref_frame_flags = VP9_LAST_FLAG; + cpi->ext_refresh_last_frame = 1; + } else if (spatial_id < cpi->svc.number_spatial_layers - 1) { + cpi->ext_refresh_frame_flags_pending = 1; + cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG; + cpi->ext_refresh_last_frame = 1; + } else { // top layer + cpi->ext_refresh_frame_flags_pending = 0; + cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG; + } + } + } + if (temporal_id == 0) { + cpi->lst_fb_idx = spatial_id; + if (spatial_id) + cpi->gld_fb_idx = spatial_id - 1; + else + cpi->gld_fb_idx = 0; + cpi->alt_fb_idx = 0; + } else if (temporal_id == 1) { + cpi->lst_fb_idx = spatial_id; + cpi->gld_fb_idx = cpi->svc.number_spatial_layers + spatial_id - 1; + cpi->alt_fb_idx = cpi->svc.number_spatial_layers + spatial_id; + } else if (frame_num_within_temporal_struct == 1) { + cpi->lst_fb_idx = spatial_id; + cpi->gld_fb_idx = cpi->svc.number_spatial_layers + spatial_id - 1; + cpi->alt_fb_idx = cpi->svc.number_spatial_layers + spatial_id; + } else { + cpi->lst_fb_idx = cpi->svc.number_spatial_layers + spatial_id; + cpi->gld_fb_idx = cpi->svc.number_spatial_layers + spatial_id - 1; + cpi->alt_fb_idx = 0; + } +} + +// The function sets proper ref_frame_flags, buffer indices, and buffer update +// variables for temporal layering mode 2 - that does 0-1-0-1 temporal layering +// scheme. +static void set_flags_and_fb_idx_for_temporal_mode2(VP9_COMP *const cpi) { + int spatial_id, temporal_id; + spatial_id = cpi->svc.spatial_layer_id = cpi->svc.spatial_layer_to_encode; + temporal_id = cpi->svc.temporal_layer_id = + cpi->svc.layer_context[cpi->svc.spatial_layer_id * + cpi->svc.number_temporal_layers].current_video_frame_in_layer & 1; + cpi->ext_refresh_last_frame = cpi->ext_refresh_golden_frame = + cpi->ext_refresh_alt_ref_frame = 0; + if (!temporal_id) { + cpi->ext_refresh_frame_flags_pending = 1; + cpi->ext_refresh_last_frame = 1; + if (!spatial_id) { + cpi->ref_frame_flags = VP9_LAST_FLAG; + } else if (cpi->svc.layer_context[temporal_id].is_key_frame) { + // base layer is a key frame. + cpi->ref_frame_flags = VP9_GOLD_FLAG; + } else { + cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG; + } + } else if (temporal_id == 1) { + cpi->ext_refresh_frame_flags_pending = 1; + cpi->ext_refresh_alt_ref_frame = 1; + if (!spatial_id) { + cpi->ref_frame_flags = VP9_LAST_FLAG; + } else { + cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG; + } + } + + if (temporal_id == 0) { + cpi->lst_fb_idx = spatial_id; + if (spatial_id) + cpi->gld_fb_idx = spatial_id - 1; + else + cpi->gld_fb_idx = 0; + cpi->alt_fb_idx = 0; + } else if (temporal_id == 1) { + cpi->lst_fb_idx = spatial_id; + cpi->gld_fb_idx = cpi->svc.number_spatial_layers + spatial_id - 1; + cpi->alt_fb_idx = cpi->svc.number_spatial_layers + spatial_id; + } +} + +// The function sets proper ref_frame_flags, buffer indices, and buffer update +// variables for temporal layering mode 0 - that has no temporal layering. +static void set_flags_and_fb_idx_for_temporal_mode_noLayering( + VP9_COMP *const cpi) { + int spatial_id; + spatial_id = cpi->svc.spatial_layer_id = cpi->svc.spatial_layer_to_encode; + cpi->ext_refresh_last_frame = + cpi->ext_refresh_golden_frame = cpi->ext_refresh_alt_ref_frame = 0; + cpi->ext_refresh_frame_flags_pending = 1; + cpi->ext_refresh_last_frame = 1; + if (!spatial_id) { + cpi->ref_frame_flags = VP9_LAST_FLAG; + } else if (cpi->svc.layer_context[0].is_key_frame) { + cpi->ref_frame_flags = VP9_GOLD_FLAG; + } else { + cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG; + } + cpi->lst_fb_idx = spatial_id; + if (spatial_id) + cpi->gld_fb_idx = spatial_id - 1; + else + cpi->gld_fb_idx = 0; +} + +int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) { + int width = 0, height = 0; + LAYER_CONTEXT *lc = NULL; + + if (cpi->svc.temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_0212) { + set_flags_and_fb_idx_for_temporal_mode3(cpi); + } else if (cpi->svc.temporal_layering_mode == + VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING) { + set_flags_and_fb_idx_for_temporal_mode_noLayering(cpi); + } else if (cpi->svc.temporal_layering_mode == + VP9E_TEMPORAL_LAYERING_MODE_0101) { + set_flags_and_fb_idx_for_temporal_mode2(cpi); + } else if (cpi->svc.temporal_layering_mode == + VP9E_TEMPORAL_LAYERING_MODE_BYPASS) { + // VP9E_TEMPORAL_LAYERING_MODE_BYPASS : + // if the code goes here, it means the encoder will be relying on the + // flags from outside for layering. + // However, since when spatial+temporal layering is used, the buffer indices + // cannot be derived automatically, the bypass mode will only work when the + // number of spatial layers equals 1. + assert(cpi->svc.number_spatial_layers == 1); + } + + lc = &cpi->svc.layer_context[cpi->svc.spatial_layer_id * + cpi->svc.number_temporal_layers + + cpi->svc.temporal_layer_id]; + + get_layer_resolution(cpi->oxcf.width, cpi->oxcf.height, + lc->scaling_factor_num, lc->scaling_factor_den, + &width, &height); + + if (vp9_set_size_literal(cpi, width, height) != 0) + return VPX_CODEC_INVALID_PARAM; + + return 0; +} + +#if CONFIG_SPATIAL_SVC int vp9_svc_start_frame(VP9_COMP *const cpi) { int width = 0, height = 0; LAYER_CONTEXT *lc; @@ -362,20 +603,11 @@ int vp9_svc_start_frame(VP9_COMP *const cpi) { cpi->lst_fb_idx = cpi->gld_fb_idx = cpi->alt_fb_idx = SMALL_FRAME_FB_IDX; - // Gradually make the empty frame smaller to save bits. Make it half of - // its previous size because of the scaling factor restriction. - cpi->svc.empty_frame_width >>= 1; - cpi->svc.empty_frame_width = (cpi->svc.empty_frame_width + 1) & ~1; - if (cpi->svc.empty_frame_width < 16) - cpi->svc.empty_frame_width = 16; + if (cpi->svc.encode_intra_empty_frame != 0) + cpi->common.intra_only = 1; - cpi->svc.empty_frame_height >>= 1; - cpi->svc.empty_frame_height = (cpi->svc.empty_frame_height + 1) & ~1; - if (cpi->svc.empty_frame_height < 16) - cpi->svc.empty_frame_height = 16; - - width = cpi->svc.empty_frame_width; - height = cpi->svc.empty_frame_height; + width = SMALL_FRAME_WIDTH; + height = SMALL_FRAME_HEIGHT; } } } @@ -395,11 +627,12 @@ int vp9_svc_start_frame(VP9_COMP *const cpi) { return 0; } +#endif + struct lookahead_entry *vp9_svc_lookahead_pop(VP9_COMP *const cpi, struct lookahead_ctx *ctx, int drain) { struct lookahead_entry *buf = NULL; - if (ctx->sz && (drain || ctx->sz == ctx->max_sz - MAX_PRE_FRAMES)) { buf = vp9_lookahead_peek(ctx, 0); if (buf != NULL) { @@ -409,7 +642,5 @@ struct lookahead_entry *vp9_svc_lookahead_pop(VP9_COMP *const cpi, } } } - return buf; } -#endif diff --git a/media/libvpx/vp9/encoder/vp9_svc_layercontext.h b/media/libvpx/vp9/encoder/vp9_svc_layercontext.h index e9645ce9f24..b6a5ea54835 100644 --- a/media/libvpx/vp9/encoder/vp9_svc_layercontext.h +++ b/media/libvpx/vp9/encoder/vp9_svc_layercontext.h @@ -22,6 +22,7 @@ extern "C" { typedef struct { RATE_CONTROL rc; int target_bandwidth; + int spatial_layer_target_bandwidth; // Target for the spatial layer. double framerate; int avg_frame_size; int max_q; @@ -57,17 +58,18 @@ typedef struct { NEED_TO_ENCODE }encode_empty_frame_state; struct lookahead_entry empty_frame; - int empty_frame_width; - int empty_frame_height; + int encode_intra_empty_frame; // Store scaled source frames to be used for temporal filter to generate // a alt ref frame. YV12_BUFFER_CONFIG scaled_frames[MAX_LAG_BUFFERS]; // Layer context used for rate control in one pass temporal CBR mode or - // two pass spatial mode. Defined for temporal or spatial layers for now. - // Does not support temporal combined with spatial RC. - LAYER_CONTEXT layer_context[MAX(VPX_TS_MAX_LAYERS, VPX_SS_MAX_LAYERS)]; + // two pass spatial mode. + LAYER_CONTEXT layer_context[VPX_MAX_LAYERS]; + // Indicates what sort of temporal layering is used. + // Currently, this only works for CBR mode. + VP9E_TEMPORAL_LAYERING_MODE temporal_layering_mode; } SVC; struct VP9_COMP; @@ -111,6 +113,8 @@ struct lookahead_entry *vp9_svc_lookahead_pop(struct VP9_COMP *const cpi, // Start a frame and initialize svc parameters int vp9_svc_start_frame(struct VP9_COMP *const cpi); +int vp9_one_pass_cbr_svc_start_layer(struct VP9_COMP *const cpi); + #ifdef __cplusplus } // extern "C" #endif diff --git a/media/libvpx/vp9/encoder/vp9_temporal_filter.c b/media/libvpx/vp9/encoder/vp9_temporal_filter.c index 424cc0843b4..24b6203cb66 100644 --- a/media/libvpx/vp9/encoder/vp9_temporal_filter.c +++ b/media/libvpx/vp9/encoder/vp9_temporal_filter.c @@ -23,7 +23,9 @@ #include "vp9/encoder/vp9_quantize.h" #include "vp9/encoder/vp9_ratectrl.h" #include "vp9/encoder/vp9_segmentation.h" +#include "vp9/encoder/vp9_temporal_filter.h" #include "vpx_mem/vpx_mem.h" +#include "vpx_ports/mem.h" #include "vpx_ports/vpx_timer.h" #include "vpx_scale/vpx_scale.h" @@ -44,7 +46,7 @@ static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd, const int which_mv = 0; const MV mv = { mv_row, mv_col }; const InterpKernel *const kernel = - vp9_get_interp_kernel(xd->mi[0].src_mi->mbmi.interp_filter); + vp9_get_interp_kernel(xd->mi[0]->mbmi.interp_filter); enum mv_precision mv_precision_uv; int uv_stride; @@ -109,7 +111,7 @@ static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd, kernel, mv_precision_uv, x, y); } -void vp9_temporal_filter_init() { +void vp9_temporal_filter_init(void) { int i; fixed_divide[0] = 0; @@ -225,7 +227,7 @@ static int temporal_filter_find_matching_mb_c(VP9_COMP *cpi, MV best_ref_mv1 = {0, 0}; MV best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */ - MV *ref_mv = &x->e_mbd.mi[0].src_mi->bmi[0].as_mv[0].as_mv; + MV *ref_mv = &x->e_mbd.mi[0]->bmi[0].as_mv[0].as_mv; // Save input state struct buf_2d src = x->plane[0].src; @@ -280,17 +282,17 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi, int mb_rows = (frames[alt_ref_index]->y_crop_height + 15) >> 4; int mb_y_offset = 0; int mb_uv_offset = 0; - DECLARE_ALIGNED_ARRAY(16, unsigned int, accumulator, 16 * 16 * 3); - DECLARE_ALIGNED_ARRAY(16, uint16_t, count, 16 * 16 * 3); + DECLARE_ALIGNED(16, unsigned int, accumulator[16 * 16 * 3]); + DECLARE_ALIGNED(16, uint16_t, count[16 * 16 * 3]); MACROBLOCKD *mbd = &cpi->td.mb.e_mbd; YV12_BUFFER_CONFIG *f = frames[alt_ref_index]; uint8_t *dst1, *dst2; #if CONFIG_VP9_HIGHBITDEPTH - DECLARE_ALIGNED_ARRAY(16, uint16_t, predictor16, 16 * 16 * 3); - DECLARE_ALIGNED_ARRAY(16, uint8_t, predictor8, 16 * 16 * 3); + DECLARE_ALIGNED(16, uint16_t, predictor16[16 * 16 * 3]); + DECLARE_ALIGNED(16, uint8_t, predictor8[16 * 16 * 3]); uint8_t *predictor; #else - DECLARE_ALIGNED_ARRAY(16, uint8_t, predictor, 16 * 16 * 3); + DECLARE_ALIGNED(16, uint8_t, predictor[16 * 16 * 3]); #endif const int mb_uv_height = 16 >> mbd->plane[1].subsampling_y; const int mb_uv_width = 16 >> mbd->plane[1].subsampling_x; @@ -329,8 +331,8 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi, int i, j, k; int stride; - vpx_memset(accumulator, 0, 16 * 16 * 3 * sizeof(accumulator[0])); - vpx_memset(count, 0, 16 * 16 * 3 * sizeof(count[0])); + memset(accumulator, 0, 16 * 16 * 3 * sizeof(accumulator[0])); + memset(count, 0, 16 * 16 * 3 * sizeof(count[0])); cpi->td.mb.mv_col_min = -((mb_col * 16) + (17 - 2 * VP9_INTERP_EXTEND)); cpi->td.mb.mv_col_max = ((mb_cols - 1 - mb_col) * 16) @@ -343,8 +345,8 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi, if (frames[frame] == NULL) continue; - mbd->mi[0].src_mi->bmi[0].as_mv[0].as_mv.row = 0; - mbd->mi[0].src_mi->bmi[0].as_mv[0].as_mv.col = 0; + mbd->mi[0]->bmi[0].as_mv[0].as_mv.row = 0; + mbd->mi[0]->bmi[0].as_mv[0].as_mv.col = 0; if (frame == alt_ref_index) { filter_weight = 2; @@ -370,8 +372,8 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi, frames[frame]->v_buffer + mb_uv_offset, frames[frame]->y_stride, mb_uv_width, mb_uv_height, - mbd->mi[0].src_mi->bmi[0].as_mv[0].as_mv.row, - mbd->mi[0].src_mi->bmi[0].as_mv[0].as_mv.col, + mbd->mi[0]->bmi[0].as_mv[0].as_mv.row, + mbd->mi[0]->bmi[0].as_mv[0].as_mv.col, predictor, scale, mb_col * 16, mb_row * 16); @@ -680,7 +682,7 @@ void vp9_temporal_filter(VP9_COMP *cpi, int distance) { if (frames_to_blur > 0) { // Setup scaling factors. Scaling on each of the arnr frames is not // supported. - if (is_two_pass_svc(cpi)) { + if (cpi->use_svc) { // In spatial svc the scaling factors might be less then 1/2. // So we will use non-normative scaling. int frame_used = 0; @@ -722,8 +724,8 @@ void vp9_temporal_filter(VP9_COMP *cpi, int distance) { } } cm->mi = cm->mip + cm->mi_stride + 1; - xd->mi = cm->mi; - xd->mi[0].src_mi = &xd->mi[0]; + xd->mi = cm->mi_grid_visible; + xd->mi[0] = cm->mi; } else { // ARF is produced at the native frame size and resized when coded. #if CONFIG_VP9_HIGHBITDEPTH diff --git a/media/libvpx/vp9/encoder/vp9_temporal_filter.h b/media/libvpx/vp9/encoder/vp9_temporal_filter.h index a971e0ae365..f537b8870a6 100644 --- a/media/libvpx/vp9/encoder/vp9_temporal_filter.h +++ b/media/libvpx/vp9/encoder/vp9_temporal_filter.h @@ -15,7 +15,7 @@ extern "C" { #endif -void vp9_temporal_filter_init(); +void vp9_temporal_filter_init(void); void vp9_temporal_filter(VP9_COMP *cpi, int distance); #ifdef __cplusplus diff --git a/media/libvpx/vp9/encoder/vp9_tokenize.c b/media/libvpx/vp9/encoder/vp9_tokenize.c index 4c89953565d..35920313ad3 100644 --- a/media/libvpx/vp9/encoder/vp9_tokenize.c +++ b/media/libvpx/vp9/encoder/vp9_tokenize.c @@ -17,6 +17,7 @@ #include "vp9/common/vp9_entropy.h" #include "vp9/common/vp9_pred_common.h" +#include "vp9/common/vp9_scan.h" #include "vp9/common/vp9_seg_common.h" #include "vp9/encoder/vp9_cost.h" @@ -65,18 +66,6 @@ const vp9_tree_index vp9_coef_tree[TREE_SIZE(ENTROPY_TOKENS)] = { -CATEGORY5_TOKEN, -CATEGORY6_TOKEN // 10 = CAT_FIVE }; -// Unconstrained Node Tree -const vp9_tree_index vp9_coef_con_tree[TREE_SIZE(ENTROPY_TOKENS)] = { - 2, 6, // 0 = LOW_VAL - -TWO_TOKEN, 4, // 1 = TWO - -THREE_TOKEN, -FOUR_TOKEN, // 2 = THREE - 8, 10, // 3 = HIGH_LOW - -CATEGORY1_TOKEN, -CATEGORY2_TOKEN, // 4 = CAT_ONE - 12, 14, // 5 = CAT_THREEFOUR - -CATEGORY3_TOKEN, -CATEGORY4_TOKEN, // 6 = CAT_THREE - -CATEGORY5_TOKEN, -CATEGORY6_TOKEN // 7 = CAT_FIVE -}; - static const vp9_tree_index cat1[2] = {0, 0}; static const vp9_tree_index cat2[4] = {2, 2, 0, 0}; static const vp9_tree_index cat3[6] = {2, 2, 4, 4, 0, 0}; @@ -509,7 +498,7 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize, uint8_t token_cache[32 * 32]; struct macroblock_plane *p = &x->plane[plane]; struct macroblockd_plane *pd = &xd->plane[plane]; - MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi; + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; int pt; /* near block/prev token context index */ int c; TOKENEXTRA *t = *tp; /* store tokens starting here */ @@ -624,8 +613,7 @@ void vp9_tokenize_sb(VP9_COMP *cpi, ThreadData *td, TOKENEXTRA **t, VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &td->mb; MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi; - TOKENEXTRA *t_backup = *t; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; const int ctx = vp9_get_skip_context(xd); const int skip_inc = !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP); @@ -634,8 +622,6 @@ void vp9_tokenize_sb(VP9_COMP *cpi, ThreadData *td, TOKENEXTRA **t, if (!dry_run) td->counts->skip[ctx][1] += skip_inc; reset_skip_context(xd, bsize); - if (dry_run) - *t = t_backup; return; } @@ -644,6 +630,5 @@ void vp9_tokenize_sb(VP9_COMP *cpi, ThreadData *td, TOKENEXTRA **t, vp9_foreach_transformed_block(xd, bsize, tokenize_b, &arg); } else { vp9_foreach_transformed_block(xd, bsize, set_entropy_context_b, &arg); - *t = t_backup; } } diff --git a/media/libvpx/vp9/encoder/vp9_variance.c b/media/libvpx/vp9/encoder/vp9_variance.c index fea5f3351cf..c571b7c9545 100644 --- a/media/libvpx/vp9/encoder/vp9_variance.c +++ b/media/libvpx/vp9/encoder/vp9_variance.c @@ -9,6 +9,7 @@ */ #include "./vp9_rtcd.h" +#include "./vpx_dsp_rtcd.h" #include "vpx_ports/mem.h" #include "vpx/vpx_integer.h" @@ -18,25 +19,16 @@ #include "vp9/encoder/vp9_variance.h" -void variance(const uint8_t *a, int a_stride, - const uint8_t *b, int b_stride, - int w, int h, unsigned int *sse, int *sum) { - int i, j; - - *sum = 0; - *sse = 0; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - const int diff = a[j] - b[j]; - *sum += diff; - *sse += diff * diff; - } - - a += a_stride; - b += b_stride; - } -} +static uint8_t bilinear_filters[8][2] = { + { 128, 0, }, + { 112, 16, }, + { 96, 32, }, + { 80, 48, }, + { 64, 64, }, + { 48, 80, }, + { 32, 96, }, + { 16, 112, }, +}; // Applies a 1-D 2-tap bi-linear filter to the source block in either horizontal // or vertical direction to produce the filtered output block. Used to implement @@ -52,7 +44,7 @@ static void var_filter_block2d_bil_first_pass(const uint8_t *src_ptr, int pixel_step, unsigned int output_height, unsigned int output_width, - const int16_t *vp9_filter) { + const uint8_t *vp9_filter) { unsigned int i, j; for (i = 0; i < output_height; i++) { @@ -84,7 +76,7 @@ static void var_filter_block2d_bil_second_pass(const uint16_t *src_ptr, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, - const int16_t *vp9_filter) { + const uint8_t *vp9_filter) { unsigned int i, j; for (i = 0; i < output_height; i++) { @@ -100,25 +92,6 @@ static void var_filter_block2d_bil_second_pass(const uint16_t *src_ptr, } } -unsigned int vp9_get_mb_ss_c(const int16_t *src_ptr) { - unsigned int i, sum = 0; - - for (i = 0; i < 256; ++i) { - sum += src_ptr[i] * src_ptr[i]; - } - - return sum; -} - -#define VAR(W, H) \ -unsigned int vp9_variance##W##x##H##_c(const uint8_t *a, int a_stride, \ - const uint8_t *b, int b_stride, \ - unsigned int *sse) { \ - int sum; \ - variance(a, a_stride, b, b_stride, W, H, sse, &sum); \ - return *sse - (((int64_t)sum * sum) / (W * H)); \ -} - #define SUBPIX_VAR(W, H) \ unsigned int vp9_sub_pixel_variance##W##x##H##_c( \ const uint8_t *src, int src_stride, \ @@ -129,11 +102,11 @@ unsigned int vp9_sub_pixel_variance##W##x##H##_c( \ uint8_t temp2[H * W]; \ \ var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, W, \ - BILINEAR_FILTERS_2TAP(xoffset)); \ + bilinear_filters[xoffset]); \ var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ - BILINEAR_FILTERS_2TAP(yoffset)); \ + bilinear_filters[yoffset]); \ \ - return vp9_variance##W##x##H##_c(temp2, W, dst, dst_stride, sse); \ + return vpx_variance##W##x##H##_c(temp2, W, dst, dst_stride, sse); \ } #define SUBPIX_AVG_VAR(W, H) \ @@ -145,185 +118,58 @@ unsigned int vp9_sub_pixel_avg_variance##W##x##H##_c( \ const uint8_t *second_pred) { \ uint16_t fdata3[(H + 1) * W]; \ uint8_t temp2[H * W]; \ - DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, H * W); \ + DECLARE_ALIGNED(16, uint8_t, temp3[H * W]); \ \ var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, W, \ - BILINEAR_FILTERS_2TAP(xoffset)); \ + bilinear_filters[xoffset]); \ var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ - BILINEAR_FILTERS_2TAP(yoffset)); \ + bilinear_filters[yoffset]); \ \ - vp9_comp_avg_pred(temp3, second_pred, W, H, temp2, W); \ + vpx_comp_avg_pred(temp3, second_pred, W, H, temp2, W); \ \ - return vp9_variance##W##x##H##_c(temp3, W, dst, dst_stride, sse); \ + return vpx_variance##W##x##H##_c(temp3, W, dst, dst_stride, sse); \ } -void vp9_get16x16var_c(const uint8_t *src_ptr, int source_stride, - const uint8_t *ref_ptr, int ref_stride, - unsigned int *sse, int *sum) { - variance(src_ptr, source_stride, ref_ptr, ref_stride, 16, 16, sse, sum); -} - -void vp9_get8x8var_c(const uint8_t *src_ptr, int source_stride, - const uint8_t *ref_ptr, int ref_stride, - unsigned int *sse, int *sum) { - variance(src_ptr, source_stride, ref_ptr, ref_stride, 8, 8, sse, sum); -} - -unsigned int vp9_mse16x16_c(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, - unsigned int *sse) { - int sum; - variance(src, src_stride, ref, ref_stride, 16, 16, sse, &sum); - return *sse; -} - -unsigned int vp9_mse16x8_c(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, - unsigned int *sse) { - int sum; - variance(src, src_stride, ref, ref_stride, 16, 8, sse, &sum); - return *sse; -} - -unsigned int vp9_mse8x16_c(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, - unsigned int *sse) { - int sum; - variance(src, src_stride, ref, ref_stride, 8, 16, sse, &sum); - return *sse; -} - -unsigned int vp9_mse8x8_c(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, - unsigned int *sse) { - int sum; - variance(src, src_stride, ref, ref_stride, 8, 8, sse, &sum); - return *sse; -} - -VAR(4, 4) SUBPIX_VAR(4, 4) SUBPIX_AVG_VAR(4, 4) -VAR(4, 8) SUBPIX_VAR(4, 8) SUBPIX_AVG_VAR(4, 8) -VAR(8, 4) SUBPIX_VAR(8, 4) SUBPIX_AVG_VAR(8, 4) -VAR(8, 8) SUBPIX_VAR(8, 8) SUBPIX_AVG_VAR(8, 8) -VAR(8, 16) SUBPIX_VAR(8, 16) SUBPIX_AVG_VAR(8, 16) -VAR(16, 8) SUBPIX_VAR(16, 8) SUBPIX_AVG_VAR(16, 8) -VAR(16, 16) SUBPIX_VAR(16, 16) SUBPIX_AVG_VAR(16, 16) -VAR(16, 32) SUBPIX_VAR(16, 32) SUBPIX_AVG_VAR(16, 32) -VAR(32, 16) SUBPIX_VAR(32, 16) SUBPIX_AVG_VAR(32, 16) -VAR(32, 32) SUBPIX_VAR(32, 32) SUBPIX_AVG_VAR(32, 32) -VAR(32, 64) SUBPIX_VAR(32, 64) SUBPIX_AVG_VAR(32, 64) -VAR(64, 32) SUBPIX_VAR(64, 32) SUBPIX_AVG_VAR(64, 32) -VAR(64, 64) SUBPIX_VAR(64, 64) SUBPIX_AVG_VAR(64, 64) -void vp9_comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width, - int height, const uint8_t *ref, int ref_stride) { - int i, j; - - for (i = 0; i < height; i++) { - for (j = 0; j < width; j++) { - const int tmp = pred[j] + ref[j]; - comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1); - } - comp_pred += width; - pred += width; - ref += ref_stride; - } -} - #if CONFIG_VP9_HIGHBITDEPTH -void highbd_variance64(const uint8_t *a8, int a_stride, - const uint8_t *b8, int b_stride, - int w, int h, uint64_t *sse, - uint64_t *sum) { - int i, j; - - uint16_t *a = CONVERT_TO_SHORTPTR(a8); - uint16_t *b = CONVERT_TO_SHORTPTR(b8); - *sum = 0; - *sse = 0; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - const int diff = a[j] - b[j]; - *sum += diff; - *sse += diff * diff; - } - a += a_stride; - b += b_stride; - } -} - -void highbd_variance(const uint8_t *a8, int a_stride, - const uint8_t *b8, int b_stride, - int w, int h, unsigned int *sse, - int *sum) { - uint64_t sse_long = 0; - uint64_t sum_long = 0; - highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long); - *sse = (unsigned int)sse_long; - *sum = (int)sum_long; -} - -void highbd_10_variance(const uint8_t *a8, int a_stride, - const uint8_t *b8, int b_stride, - int w, int h, unsigned int *sse, - int *sum) { - uint64_t sse_long = 0; - uint64_t sum_long = 0; - highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long); - *sum = (int)ROUND_POWER_OF_TWO(sum_long, 2); - *sse = (unsigned int)ROUND_POWER_OF_TWO(sse_long, 4); -} - -void highbd_12_variance(const uint8_t *a8, int a_stride, - const uint8_t *b8, int b_stride, - int w, int h, unsigned int *sse, - int *sum) { - uint64_t sse_long = 0; - uint64_t sum_long = 0; - highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long); - *sum = (int)ROUND_POWER_OF_TWO(sum_long, 4); - *sse = (unsigned int)ROUND_POWER_OF_TWO(sse_long, 8); -} - static void highbd_var_filter_block2d_bil_first_pass( const uint8_t *src_ptr8, uint16_t *output_ptr, @@ -331,7 +177,7 @@ static void highbd_var_filter_block2d_bil_first_pass( int pixel_step, unsigned int output_height, unsigned int output_width, - const int16_t *vp9_filter) { + const uint8_t *vp9_filter) { unsigned int i, j; uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src_ptr8); for (i = 0; i < output_height; i++) { @@ -357,7 +203,7 @@ static void highbd_var_filter_block2d_bil_second_pass( unsigned int pixel_step, unsigned int output_height, unsigned int output_width, - const int16_t *vp9_filter) { + const uint8_t *vp9_filter) { unsigned int i, j; for (i = 0; i < output_height; i++) { @@ -374,35 +220,6 @@ static void highbd_var_filter_block2d_bil_second_pass( } } -#define HIGHBD_VAR(W, H) \ -unsigned int vp9_highbd_variance##W##x##H##_c(const uint8_t *a, int a_stride, \ - const uint8_t *b, int b_stride, \ - unsigned int *sse) { \ - int sum; \ - highbd_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \ - return *sse - (((int64_t)sum * sum) / (W * H)); \ -} \ -\ -unsigned int vp9_highbd_10_variance##W##x##H##_c(const uint8_t *a, \ - int a_stride, \ - const uint8_t *b, \ - int b_stride, \ - unsigned int *sse) { \ - int sum; \ - highbd_10_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \ - return *sse - (((int64_t)sum * sum) / (W * H)); \ -} \ -\ -unsigned int vp9_highbd_12_variance##W##x##H##_c(const uint8_t *a, \ - int a_stride, \ - const uint8_t *b, \ - int b_stride, \ - unsigned int *sse) { \ - int sum; \ - highbd_12_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \ - return *sse - (((int64_t)sum * sum) / (W * H)); \ -} - #define HIGHBD_SUBPIX_VAR(W, H) \ unsigned int vp9_highbd_sub_pixel_variance##W##x##H##_c( \ const uint8_t *src, int src_stride, \ @@ -413,11 +230,11 @@ unsigned int vp9_highbd_sub_pixel_variance##W##x##H##_c( \ uint16_t temp2[H * W]; \ \ highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \ - W, BILINEAR_FILTERS_2TAP(xoffset)); \ + W, bilinear_filters[xoffset]); \ highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ - BILINEAR_FILTERS_2TAP(yoffset)); \ + bilinear_filters[yoffset]); \ \ - return vp9_highbd_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, dst, \ + return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, dst, \ dst_stride, sse); \ } \ \ @@ -430,11 +247,11 @@ unsigned int vp9_highbd_10_sub_pixel_variance##W##x##H##_c( \ uint16_t temp2[H * W]; \ \ highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \ - W, BILINEAR_FILTERS_2TAP(xoffset)); \ + W, bilinear_filters[xoffset]); \ highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ - BILINEAR_FILTERS_2TAP(yoffset)); \ + bilinear_filters[yoffset]); \ \ - return vp9_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \ + return vpx_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \ W, dst, dst_stride, sse); \ } \ \ @@ -447,11 +264,11 @@ unsigned int vp9_highbd_12_sub_pixel_variance##W##x##H##_c( \ uint16_t temp2[H * W]; \ \ highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \ - W, BILINEAR_FILTERS_2TAP(xoffset)); \ + W, bilinear_filters[xoffset]); \ highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ - BILINEAR_FILTERS_2TAP(yoffset)); \ + bilinear_filters[yoffset]); \ \ - return vp9_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \ + return vpx_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \ W, dst, dst_stride, sse); \ } @@ -464,17 +281,17 @@ unsigned int vp9_highbd_sub_pixel_avg_variance##W##x##H##_c( \ const uint8_t *second_pred) { \ uint16_t fdata3[(H + 1) * W]; \ uint16_t temp2[H * W]; \ - DECLARE_ALIGNED_ARRAY(16, uint16_t, temp3, H * W); \ + DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \ \ highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \ - W, BILINEAR_FILTERS_2TAP(xoffset)); \ + W, bilinear_filters[xoffset]); \ highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ - BILINEAR_FILTERS_2TAP(yoffset)); \ + bilinear_filters[yoffset]); \ \ - vp9_highbd_comp_avg_pred(temp3, second_pred, W, H, \ + vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \ CONVERT_TO_BYTEPTR(temp2), W); \ \ - return vp9_highbd_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, dst, \ + return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, dst, \ dst_stride, sse); \ } \ \ @@ -486,17 +303,17 @@ unsigned int vp9_highbd_10_sub_pixel_avg_variance##W##x##H##_c( \ const uint8_t *second_pred) { \ uint16_t fdata3[(H + 1) * W]; \ uint16_t temp2[H * W]; \ - DECLARE_ALIGNED_ARRAY(16, uint16_t, temp3, H * W); \ + DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \ \ highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \ - W, BILINEAR_FILTERS_2TAP(xoffset)); \ + W, bilinear_filters[xoffset]); \ highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ - BILINEAR_FILTERS_2TAP(yoffset)); \ + bilinear_filters[yoffset]); \ \ - vp9_highbd_comp_avg_pred(temp3, second_pred, W, H, \ + vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \ CONVERT_TO_BYTEPTR(temp2), W); \ \ - return vp9_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), \ + return vpx_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), \ W, dst, dst_stride, sse); \ } \ \ @@ -508,144 +325,56 @@ unsigned int vp9_highbd_12_sub_pixel_avg_variance##W##x##H##_c( \ const uint8_t *second_pred) { \ uint16_t fdata3[(H + 1) * W]; \ uint16_t temp2[H * W]; \ - DECLARE_ALIGNED_ARRAY(16, uint16_t, temp3, H * W); \ + DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \ \ highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \ - W, BILINEAR_FILTERS_2TAP(xoffset)); \ + W, bilinear_filters[xoffset]); \ highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ - BILINEAR_FILTERS_2TAP(yoffset)); \ + bilinear_filters[yoffset]); \ \ - vp9_highbd_comp_avg_pred(temp3, second_pred, W, H, \ + vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \ CONVERT_TO_BYTEPTR(temp2), W); \ \ - return vp9_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), \ + return vpx_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), \ W, dst, dst_stride, sse); \ } -#define HIGHBD_GET_VAR(S) \ -void vp9_highbd_get##S##x##S##var_c(const uint8_t *src, int src_stride, \ - const uint8_t *ref, int ref_stride, \ - unsigned int *sse, int *sum) { \ - highbd_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \ -} \ -\ -void vp9_highbd_10_get##S##x##S##var_c(const uint8_t *src, int src_stride, \ - const uint8_t *ref, int ref_stride, \ - unsigned int *sse, int *sum) { \ - highbd_10_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \ -} \ -\ -void vp9_highbd_12_get##S##x##S##var_c(const uint8_t *src, int src_stride, \ - const uint8_t *ref, int ref_stride, \ - unsigned int *sse, int *sum) { \ - highbd_12_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \ -} - -#define HIGHBD_MSE(W, H) \ -unsigned int vp9_highbd_mse##W##x##H##_c(const uint8_t *src, \ - int src_stride, \ - const uint8_t *ref, \ - int ref_stride, \ - unsigned int *sse) { \ - int sum; \ - highbd_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \ - return *sse; \ -} \ -\ -unsigned int vp9_highbd_10_mse##W##x##H##_c(const uint8_t *src, \ - int src_stride, \ - const uint8_t *ref, \ - int ref_stride, \ - unsigned int *sse) { \ - int sum; \ - highbd_10_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \ - return *sse; \ -} \ -\ -unsigned int vp9_highbd_12_mse##W##x##H##_c(const uint8_t *src, \ - int src_stride, \ - const uint8_t *ref, \ - int ref_stride, \ - unsigned int *sse) { \ - int sum; \ - highbd_12_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \ - return *sse; \ -} - -HIGHBD_GET_VAR(8) -HIGHBD_GET_VAR(16) - -HIGHBD_MSE(16, 16) -HIGHBD_MSE(16, 8) -HIGHBD_MSE(8, 16) -HIGHBD_MSE(8, 8) - -HIGHBD_VAR(4, 4) HIGHBD_SUBPIX_VAR(4, 4) HIGHBD_SUBPIX_AVG_VAR(4, 4) -HIGHBD_VAR(4, 8) HIGHBD_SUBPIX_VAR(4, 8) HIGHBD_SUBPIX_AVG_VAR(4, 8) -HIGHBD_VAR(8, 4) HIGHBD_SUBPIX_VAR(8, 4) HIGHBD_SUBPIX_AVG_VAR(8, 4) -HIGHBD_VAR(8, 8) HIGHBD_SUBPIX_VAR(8, 8) HIGHBD_SUBPIX_AVG_VAR(8, 8) -HIGHBD_VAR(8, 16) HIGHBD_SUBPIX_VAR(8, 16) HIGHBD_SUBPIX_AVG_VAR(8, 16) -HIGHBD_VAR(16, 8) HIGHBD_SUBPIX_VAR(16, 8) HIGHBD_SUBPIX_AVG_VAR(16, 8) -HIGHBD_VAR(16, 16) HIGHBD_SUBPIX_VAR(16, 16) HIGHBD_SUBPIX_AVG_VAR(16, 16) -HIGHBD_VAR(16, 32) HIGHBD_SUBPIX_VAR(16, 32) HIGHBD_SUBPIX_AVG_VAR(16, 32) -HIGHBD_VAR(32, 16) HIGHBD_SUBPIX_VAR(32, 16) HIGHBD_SUBPIX_AVG_VAR(32, 16) -HIGHBD_VAR(32, 32) HIGHBD_SUBPIX_VAR(32, 32) HIGHBD_SUBPIX_AVG_VAR(32, 32) -HIGHBD_VAR(32, 64) HIGHBD_SUBPIX_VAR(32, 64) HIGHBD_SUBPIX_AVG_VAR(32, 64) -HIGHBD_VAR(64, 32) HIGHBD_SUBPIX_VAR(64, 32) HIGHBD_SUBPIX_AVG_VAR(64, 32) -HIGHBD_VAR(64, 64) HIGHBD_SUBPIX_VAR(64, 64) HIGHBD_SUBPIX_AVG_VAR(64, 64) - -void vp9_highbd_comp_avg_pred(uint16_t *comp_pred, const uint8_t *pred8, - int width, int height, const uint8_t *ref8, - int ref_stride) { - int i, j; - uint16_t *pred = CONVERT_TO_SHORTPTR(pred8); - uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); - for (i = 0; i < height; i++) { - for (j = 0; j < width; j++) { - const int tmp = pred[j] + ref[j]; - comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1); - } - comp_pred += width; - pred += width; - ref += ref_stride; - } -} #endif // CONFIG_VP9_HIGHBITDEPTH diff --git a/media/libvpx/vp9/encoder/vp9_variance.h b/media/libvpx/vp9/encoder/vp9_variance.h index 53148f23c56..0a8739510f4 100644 --- a/media/libvpx/vp9/encoder/vp9_variance.h +++ b/media/libvpx/vp9/encoder/vp9_variance.h @@ -12,33 +12,12 @@ #define VP9_ENCODER_VP9_VARIANCE_H_ #include "vpx/vpx_integer.h" +#include "vpx_ports/mem.h" #ifdef __cplusplus extern "C" { #endif -void variance(const uint8_t *a, int a_stride, - const uint8_t *b, int b_stride, - int w, int h, - unsigned int *sse, int *sum); - -#if CONFIG_VP9_HIGHBITDEPTH -void highbd_variance(const uint8_t *a8, int a_stride, - const uint8_t *b8, int b_stride, - int w, int h, - unsigned int *sse, int *sum); - -void highbd_10_variance(const uint8_t *a8, int a_stride, - const uint8_t *b8, int b_stride, - int w, int h, - unsigned int *sse, int *sum); - -void highbd_12_variance(const uint8_t *a8, int a_stride, - const uint8_t *b8, int b_stride, - int w, int h, - unsigned int *sse, int *sum); -#endif - typedef unsigned int(*vp9_sad_fn_t)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, @@ -95,15 +74,6 @@ typedef struct vp9_variance_vtable { vp9_sad_multi_d_fn_t sdx4df; } vp9_variance_fn_ptr_t; -void vp9_comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width, - int height, const uint8_t *ref, int ref_stride); - -#if CONFIG_VP9_HIGHBITDEPTH -void vp9_highbd_comp_avg_pred(uint16_t *comp_pred, const uint8_t *pred, - int width, int height, - const uint8_t *ref, int ref_stride); -#endif - #ifdef __cplusplus } // extern "C" #endif diff --git a/media/libvpx/vp9/encoder/vp9_writer.h b/media/libvpx/vp9/encoder/vp9_writer.h index 9d161f95cf6..e347ea41441 100644 --- a/media/libvpx/vp9/encoder/vp9_writer.h +++ b/media/libvpx/vp9/encoder/vp9_writer.h @@ -19,7 +19,7 @@ extern "C" { #endif -typedef struct { +typedef struct vp9_writer { unsigned int lowvalue; unsigned int range; int count; diff --git a/media/libvpx/vp9/encoder/x86/vp9_avg_intrin_sse2.c b/media/libvpx/vp9/encoder/x86/vp9_avg_intrin_sse2.c index 4e80b255e88..56a91ed2d71 100644 --- a/media/libvpx/vp9/encoder/x86/vp9_avg_intrin_sse2.c +++ b/media/libvpx/vp9/encoder/x86/vp9_avg_intrin_sse2.c @@ -9,8 +9,87 @@ */ #include + +#include "./vp9_rtcd.h" #include "vpx_ports/mem.h" +void vp9_minmax_8x8_sse2(const uint8_t *s, int p, const uint8_t *d, int dp, + int *min, int *max) { + __m128i u0, s0, d0, diff, maxabsdiff, minabsdiff, negdiff, absdiff0, absdiff; + u0 = _mm_setzero_si128(); + // Row 0 + s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s)), u0); + d0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(d)), u0); + diff = _mm_subs_epi16(s0, d0); + negdiff = _mm_subs_epi16(u0, diff); + absdiff0 = _mm_max_epi16(diff, negdiff); + // Row 1 + s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + p)), u0); + d0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(d + dp)), u0); + diff = _mm_subs_epi16(s0, d0); + negdiff = _mm_subs_epi16(u0, diff); + absdiff = _mm_max_epi16(diff, negdiff); + maxabsdiff = _mm_max_epi16(absdiff0, absdiff); + minabsdiff = _mm_min_epi16(absdiff0, absdiff); + // Row 2 + s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 2 * p)), u0); + d0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(d + 2 * dp)), u0); + diff = _mm_subs_epi16(s0, d0); + negdiff = _mm_subs_epi16(u0, diff); + absdiff = _mm_max_epi16(diff, negdiff); + maxabsdiff = _mm_max_epi16(maxabsdiff, absdiff); + minabsdiff = _mm_min_epi16(minabsdiff, absdiff); + // Row 3 + s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 3 * p)), u0); + d0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(d + 3 * dp)), u0); + diff = _mm_subs_epi16(s0, d0); + negdiff = _mm_subs_epi16(u0, diff); + absdiff = _mm_max_epi16(diff, negdiff); + maxabsdiff = _mm_max_epi16(maxabsdiff, absdiff); + minabsdiff = _mm_min_epi16(minabsdiff, absdiff); + // Row 4 + s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 4 * p)), u0); + d0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(d + 4 * dp)), u0); + diff = _mm_subs_epi16(s0, d0); + negdiff = _mm_subs_epi16(u0, diff); + absdiff = _mm_max_epi16(diff, negdiff); + maxabsdiff = _mm_max_epi16(maxabsdiff, absdiff); + minabsdiff = _mm_min_epi16(minabsdiff, absdiff); + // Row 5 + s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 5 * p)), u0); + d0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(d + 5 * dp)), u0); + diff = _mm_subs_epi16(s0, d0); + negdiff = _mm_subs_epi16(u0, diff); + absdiff = _mm_max_epi16(diff, negdiff); + maxabsdiff = _mm_max_epi16(maxabsdiff, absdiff); + minabsdiff = _mm_min_epi16(minabsdiff, absdiff); + // Row 6 + s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 6 * p)), u0); + d0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(d + 6 * dp)), u0); + diff = _mm_subs_epi16(s0, d0); + negdiff = _mm_subs_epi16(u0, diff); + absdiff = _mm_max_epi16(diff, negdiff); + maxabsdiff = _mm_max_epi16(maxabsdiff, absdiff); + minabsdiff = _mm_min_epi16(minabsdiff, absdiff); + // Row 7 + s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 7 * p)), u0); + d0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(d + 7 * dp)), u0); + diff = _mm_subs_epi16(s0, d0); + negdiff = _mm_subs_epi16(u0, diff); + absdiff = _mm_max_epi16(diff, negdiff); + maxabsdiff = _mm_max_epi16(maxabsdiff, absdiff); + minabsdiff = _mm_min_epi16(minabsdiff, absdiff); + + maxabsdiff = _mm_max_epi16(maxabsdiff, _mm_srli_si128(maxabsdiff, 8)); + maxabsdiff = _mm_max_epi16(maxabsdiff, _mm_srli_epi64(maxabsdiff, 32)); + maxabsdiff = _mm_max_epi16(maxabsdiff, _mm_srli_epi64(maxabsdiff, 16)); + *max = _mm_extract_epi16(maxabsdiff, 0); + + minabsdiff = _mm_min_epi16(minabsdiff, _mm_srli_si128(minabsdiff, 8)); + minabsdiff = _mm_min_epi16(minabsdiff, _mm_srli_epi64(minabsdiff, 32)); + minabsdiff = _mm_min_epi16(minabsdiff, _mm_srli_epi64(minabsdiff, 16)); + *min = _mm_extract_epi16(minabsdiff, 0); +} unsigned int vp9_avg_8x8_sse2(const uint8_t *s, int p) { __m128i s0, s1, u0; @@ -57,6 +136,179 @@ unsigned int vp9_avg_4x4_sse2(const uint8_t *s, int p) { return (avg + 8) >> 4; } +static void hadamard_col8_sse2(__m128i *in, int iter) { + __m128i a0 = in[0]; + __m128i a1 = in[1]; + __m128i a2 = in[2]; + __m128i a3 = in[3]; + __m128i a4 = in[4]; + __m128i a5 = in[5]; + __m128i a6 = in[6]; + __m128i a7 = in[7]; + + __m128i b0 = _mm_add_epi16(a0, a1); + __m128i b1 = _mm_sub_epi16(a0, a1); + __m128i b2 = _mm_add_epi16(a2, a3); + __m128i b3 = _mm_sub_epi16(a2, a3); + __m128i b4 = _mm_add_epi16(a4, a5); + __m128i b5 = _mm_sub_epi16(a4, a5); + __m128i b6 = _mm_add_epi16(a6, a7); + __m128i b7 = _mm_sub_epi16(a6, a7); + + a0 = _mm_add_epi16(b0, b2); + a1 = _mm_add_epi16(b1, b3); + a2 = _mm_sub_epi16(b0, b2); + a3 = _mm_sub_epi16(b1, b3); + a4 = _mm_add_epi16(b4, b6); + a5 = _mm_add_epi16(b5, b7); + a6 = _mm_sub_epi16(b4, b6); + a7 = _mm_sub_epi16(b5, b7); + + if (iter == 0) { + b0 = _mm_add_epi16(a0, a4); + b7 = _mm_add_epi16(a1, a5); + b3 = _mm_add_epi16(a2, a6); + b4 = _mm_add_epi16(a3, a7); + b2 = _mm_sub_epi16(a0, a4); + b6 = _mm_sub_epi16(a1, a5); + b1 = _mm_sub_epi16(a2, a6); + b5 = _mm_sub_epi16(a3, a7); + + a0 = _mm_unpacklo_epi16(b0, b1); + a1 = _mm_unpacklo_epi16(b2, b3); + a2 = _mm_unpackhi_epi16(b0, b1); + a3 = _mm_unpackhi_epi16(b2, b3); + a4 = _mm_unpacklo_epi16(b4, b5); + a5 = _mm_unpacklo_epi16(b6, b7); + a6 = _mm_unpackhi_epi16(b4, b5); + a7 = _mm_unpackhi_epi16(b6, b7); + + b0 = _mm_unpacklo_epi32(a0, a1); + b1 = _mm_unpacklo_epi32(a4, a5); + b2 = _mm_unpackhi_epi32(a0, a1); + b3 = _mm_unpackhi_epi32(a4, a5); + b4 = _mm_unpacklo_epi32(a2, a3); + b5 = _mm_unpacklo_epi32(a6, a7); + b6 = _mm_unpackhi_epi32(a2, a3); + b7 = _mm_unpackhi_epi32(a6, a7); + + in[0] = _mm_unpacklo_epi64(b0, b1); + in[1] = _mm_unpackhi_epi64(b0, b1); + in[2] = _mm_unpacklo_epi64(b2, b3); + in[3] = _mm_unpackhi_epi64(b2, b3); + in[4] = _mm_unpacklo_epi64(b4, b5); + in[5] = _mm_unpackhi_epi64(b4, b5); + in[6] = _mm_unpacklo_epi64(b6, b7); + in[7] = _mm_unpackhi_epi64(b6, b7); + } else { + in[0] = _mm_add_epi16(a0, a4); + in[7] = _mm_add_epi16(a1, a5); + in[3] = _mm_add_epi16(a2, a6); + in[4] = _mm_add_epi16(a3, a7); + in[2] = _mm_sub_epi16(a0, a4); + in[6] = _mm_sub_epi16(a1, a5); + in[1] = _mm_sub_epi16(a2, a6); + in[5] = _mm_sub_epi16(a3, a7); + } +} + +void vp9_hadamard_8x8_sse2(int16_t const *src_diff, int src_stride, + int16_t *coeff) { + __m128i src[8]; + src[0] = _mm_load_si128((const __m128i *)src_diff); + src[1] = _mm_load_si128((const __m128i *)(src_diff += src_stride)); + src[2] = _mm_load_si128((const __m128i *)(src_diff += src_stride)); + src[3] = _mm_load_si128((const __m128i *)(src_diff += src_stride)); + src[4] = _mm_load_si128((const __m128i *)(src_diff += src_stride)); + src[5] = _mm_load_si128((const __m128i *)(src_diff += src_stride)); + src[6] = _mm_load_si128((const __m128i *)(src_diff += src_stride)); + src[7] = _mm_load_si128((const __m128i *)(src_diff += src_stride)); + + hadamard_col8_sse2(src, 0); + hadamard_col8_sse2(src, 1); + + _mm_store_si128((__m128i *)coeff, src[0]); + coeff += 8; + _mm_store_si128((__m128i *)coeff, src[1]); + coeff += 8; + _mm_store_si128((__m128i *)coeff, src[2]); + coeff += 8; + _mm_store_si128((__m128i *)coeff, src[3]); + coeff += 8; + _mm_store_si128((__m128i *)coeff, src[4]); + coeff += 8; + _mm_store_si128((__m128i *)coeff, src[5]); + coeff += 8; + _mm_store_si128((__m128i *)coeff, src[6]); + coeff += 8; + _mm_store_si128((__m128i *)coeff, src[7]); +} + +void vp9_hadamard_16x16_sse2(int16_t const *src_diff, int src_stride, + int16_t *coeff) { + int idx; + for (idx = 0; idx < 4; ++idx) { + int16_t const *src_ptr = src_diff + (idx >> 1) * 8 * src_stride + + (idx & 0x01) * 8; + vp9_hadamard_8x8_sse2(src_ptr, src_stride, coeff + idx * 64); + } + + for (idx = 0; idx < 64; idx += 8) { + __m128i coeff0 = _mm_load_si128((const __m128i *)coeff); + __m128i coeff1 = _mm_load_si128((const __m128i *)(coeff + 64)); + __m128i coeff2 = _mm_load_si128((const __m128i *)(coeff + 128)); + __m128i coeff3 = _mm_load_si128((const __m128i *)(coeff + 192)); + + __m128i b0 = _mm_add_epi16(coeff0, coeff1); + __m128i b1 = _mm_sub_epi16(coeff0, coeff1); + __m128i b2 = _mm_add_epi16(coeff2, coeff3); + __m128i b3 = _mm_sub_epi16(coeff2, coeff3); + + coeff0 = _mm_add_epi16(b0, b2); + coeff1 = _mm_add_epi16(b1, b3); + coeff0 = _mm_srai_epi16(coeff0, 1); + coeff1 = _mm_srai_epi16(coeff1, 1); + _mm_store_si128((__m128i *)coeff, coeff0); + _mm_store_si128((__m128i *)(coeff + 64), coeff1); + + coeff2 = _mm_sub_epi16(b0, b2); + coeff3 = _mm_sub_epi16(b1, b3); + coeff2 = _mm_srai_epi16(coeff2, 1); + coeff3 = _mm_srai_epi16(coeff3, 1); + _mm_store_si128((__m128i *)(coeff + 128), coeff2); + _mm_store_si128((__m128i *)(coeff + 192), coeff3); + + coeff += 8; + } +} + +int16_t vp9_satd_sse2(const int16_t *coeff, int length) { + int i; + __m128i sum = _mm_load_si128((const __m128i *)coeff); + __m128i sign = _mm_srai_epi16(sum, 15); + __m128i val = _mm_xor_si128(sum, sign); + sum = _mm_sub_epi16(val, sign); + coeff += 8; + + for (i = 8; i < length; i += 8) { + __m128i src_line = _mm_load_si128((const __m128i *)coeff); + sign = _mm_srai_epi16(src_line, 15); + val = _mm_xor_si128(src_line, sign); + val = _mm_sub_epi16(val, sign); + sum = _mm_add_epi16(sum, val); + coeff += 8; + } + + val = _mm_srli_si128(sum, 8); + sum = _mm_add_epi16(sum, val); + val = _mm_srli_epi64(sum, 32); + sum = _mm_add_epi16(sum, val); + val = _mm_srli_epi32(sum, 16); + sum = _mm_add_epi16(sum, val); + + return _mm_extract_epi16(sum, 0); +} + void vp9_int_pro_row_sse2(int16_t *hbuf, uint8_t const*ref, const int ref_stride, const int height) { int idx; diff --git a/media/libvpx/vp9/encoder/x86/vp9_dct32x32_avx2.c b/media/libvpx/vp9/encoder/x86/vp9_dct32x32_avx2_impl.h similarity index 99% rename from media/libvpx/vp9/encoder/x86/vp9_dct32x32_avx2.c rename to media/libvpx/vp9/encoder/x86/vp9_dct32x32_avx2_impl.h index 66827ad8037..ae6bfe5fa2d 100644 --- a/media/libvpx/vp9/encoder/x86/vp9_dct32x32_avx2.c +++ b/media/libvpx/vp9/encoder/x86/vp9_dct32x32_avx2_impl.h @@ -9,6 +9,8 @@ */ #include // AVX2 + +#include "./vp9_rtcd.h" #include "vp9/common/vp9_idct.h" // for cospi constants #include "vpx_ports/mem.h" diff --git a/media/libvpx/vp9/encoder/x86/vp9_dct32x32_sse2.c b/media/libvpx/vp9/encoder/x86/vp9_dct32x32_sse2_impl.h similarity index 99% rename from media/libvpx/vp9/encoder/x86/vp9_dct32x32_sse2.c rename to media/libvpx/vp9/encoder/x86/vp9_dct32x32_sse2_impl.h index 099993aa6a0..003ebd13fe3 100644 --- a/media/libvpx/vp9/encoder/x86/vp9_dct32x32_sse2.c +++ b/media/libvpx/vp9/encoder/x86/vp9_dct32x32_sse2_impl.h @@ -9,6 +9,8 @@ */ #include // SSE2 + +#include "./vp9_rtcd.h" #include "vp9/common/vp9_idct.h" // for cospi constants #include "vp9/encoder/x86/vp9_dct_sse2.h" #include "vp9/encoder/vp9_dct.h" diff --git a/media/libvpx/vp9/encoder/x86/vp9_dct_avx2.c b/media/libvpx/vp9/encoder/x86/vp9_dct_avx2.c index 3a19f52746c..8f3b61ad86d 100644 --- a/media/libvpx/vp9/encoder/x86/vp9_dct_avx2.c +++ b/media/libvpx/vp9/encoder/x86/vp9_dct_avx2.c @@ -15,12 +15,12 @@ #define FDCT32x32_2D_AVX2 vp9_fdct32x32_rd_avx2 #define FDCT32x32_HIGH_PRECISION 0 -#include "vp9/encoder/x86/vp9_dct32x32_avx2.c" +#include "vp9/encoder/x86/vp9_dct32x32_avx2_impl.h" #undef FDCT32x32_2D_AVX2 #undef FDCT32x32_HIGH_PRECISION #define FDCT32x32_2D_AVX2 vp9_fdct32x32_avx2 #define FDCT32x32_HIGH_PRECISION 1 -#include "vp9/encoder/x86/vp9_dct32x32_avx2.c" // NOLINT +#include "vp9/encoder/x86/vp9_dct32x32_avx2_impl.h" // NOLINT #undef FDCT32x32_2D_AVX2 #undef FDCT32x32_HIGH_PRECISION diff --git a/media/libvpx/vp9/encoder/x86/vp9_dct_sse2.c b/media/libvpx/vp9/encoder/x86/vp9_dct_sse2.c index 564b7955e5b..cff4fcbdce0 100644 --- a/media/libvpx/vp9/encoder/x86/vp9_dct_sse2.c +++ b/media/libvpx/vp9/encoder/x86/vp9_dct_sse2.c @@ -10,6 +10,8 @@ #include #include // SSE2 + +#include "./vp9_rtcd.h" #include "vp9/common/vp9_idct.h" // for cospi constants #include "vp9/encoder/vp9_dct.h" #include "vp9/encoder/x86/vp9_dct_sse2.h" @@ -96,7 +98,7 @@ static INLINE void transpose_4x4(__m128i *res) { res[3] = _mm_unpackhi_epi64(res[2], res[2]); } -void fdct4_sse2(__m128i *in) { +static void fdct4_sse2(__m128i *in) { const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64); const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64); @@ -129,7 +131,7 @@ void fdct4_sse2(__m128i *in) { transpose_4x4(in); } -void fadst4_sse2(__m128i *in) { +static void fadst4_sse2(__m128i *in) { const __m128i k__sinpi_p01_p02 = pair_set_epi16(sinpi_1_9, sinpi_2_9); const __m128i k__sinpi_p04_m01 = pair_set_epi16(sinpi_4_9, -sinpi_1_9); const __m128i k__sinpi_p03_p04 = pair_set_epi16(sinpi_3_9, sinpi_4_9); @@ -831,7 +833,7 @@ static INLINE void array_transpose_8x8(__m128i *in, __m128i *res) { // 07 17 27 37 47 57 67 77 } -void fdct8_sse2(__m128i *in) { +static void fdct8_sse2(__m128i *in) { // constants const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64); const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); @@ -971,7 +973,7 @@ void fdct8_sse2(__m128i *in) { array_transpose_8x8(in, in); } -void fadst8_sse2(__m128i *in) { +static void fadst8_sse2(__m128i *in) { // Constants const __m128i k__cospi_p02_p30 = pair_set_epi16(cospi_2_64, cospi_30_64); const __m128i k__cospi_p30_m02 = pair_set_epi16(cospi_30_64, -cospi_2_64); @@ -1353,7 +1355,7 @@ static INLINE void right_shift_16x16(__m128i *res0, __m128i *res1) { right_shift_8x8(res1 + 8, 2); } -void fdct16_8col(__m128i *in) { +static void fdct16_8col(__m128i *in) { // perform 16x16 1-D DCT for 8 columns __m128i i[8], s[8], p[8], t[8], u[16], v[16]; const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64); @@ -1675,7 +1677,7 @@ void fdct16_8col(__m128i *in) { in[15] = _mm_packs_epi32(v[14], v[15]); } -void fadst16_8col(__m128i *in) { +static void fadst16_8col(__m128i *in) { // perform 16x16 1-D ADST for 8 columns __m128i s[16], x[16], u[32], v[32]; const __m128i k__cospi_p01_p31 = pair_set_epi16(cospi_1_64, cospi_31_64); @@ -2145,13 +2147,13 @@ void fadst16_8col(__m128i *in) { in[15] = _mm_sub_epi16(kZero, s[1]); } -void fdct16_sse2(__m128i *in0, __m128i *in1) { +static void fdct16_sse2(__m128i *in0, __m128i *in1) { fdct16_8col(in0); fdct16_8col(in1); array_transpose_16x16(in0, in1); } -void fadst16_sse2(__m128i *in0, __m128i *in1) { +static void fadst16_sse2(__m128i *in0, __m128i *in1) { fadst16_8col(in0); fadst16_8col(in1); array_transpose_16x16(in0, in1); @@ -2334,7 +2336,7 @@ void vp9_highbd_fht8x8_sse2(const int16_t *input, tran_low_t *output, } } -void vp9_highbd_fht16x16_sse2(int16_t *input, tran_low_t *output, +void vp9_highbd_fht16x16_sse2(const int16_t *input, tran_low_t *output, int stride, int tx_type) { if (tx_type == DCT_DCT) { vp9_highbd_fdct16x16_sse2(input, output, stride); @@ -2368,8 +2370,8 @@ void vp9_highbd_fht16x16_sse2(int16_t *input, tran_low_t *output, /* * The DCTnxn functions are defined using the macros below. The main code for - * them is in separate files (vp9/encoder/x86/vp9_dct_impl_sse2.c & - * vp9/encoder/x86/vp9_dct32x32_sse2.c) which are used by both the 8 bit code + * them is in separate files (vp9/encoder/x86/vp9_dct_sse2_impl.h & + * vp9/encoder/x86/vp9_dct32x32_sse2_impl.h) which are used by both the 8 bit code * and the high bit depth code. */ @@ -2378,20 +2380,20 @@ void vp9_highbd_fht16x16_sse2(int16_t *input, tran_low_t *output, #define FDCT4x4_2D vp9_fdct4x4_sse2 #define FDCT8x8_2D vp9_fdct8x8_sse2 #define FDCT16x16_2D vp9_fdct16x16_sse2 -#include "vp9/encoder/x86/vp9_dct_impl_sse2.c" +#include "vp9/encoder/x86/vp9_dct_sse2_impl.h" #undef FDCT4x4_2D #undef FDCT8x8_2D #undef FDCT16x16_2D #define FDCT32x32_2D vp9_fdct32x32_rd_sse2 #define FDCT32x32_HIGH_PRECISION 0 -#include "vp9/encoder/x86/vp9_dct32x32_sse2.c" +#include "vp9/encoder/x86/vp9_dct32x32_sse2_impl.h" #undef FDCT32x32_2D #undef FDCT32x32_HIGH_PRECISION #define FDCT32x32_2D vp9_fdct32x32_sse2 #define FDCT32x32_HIGH_PRECISION 1 -#include "vp9/encoder/x86/vp9_dct32x32_sse2.c" // NOLINT +#include "vp9/encoder/x86/vp9_dct32x32_sse2_impl.h" // NOLINT #undef FDCT32x32_2D #undef FDCT32x32_HIGH_PRECISION @@ -2405,20 +2407,20 @@ void vp9_highbd_fht16x16_sse2(int16_t *input, tran_low_t *output, #define FDCT4x4_2D vp9_highbd_fdct4x4_sse2 #define FDCT8x8_2D vp9_highbd_fdct8x8_sse2 #define FDCT16x16_2D vp9_highbd_fdct16x16_sse2 -#include "vp9/encoder/x86/vp9_dct_impl_sse2.c" // NOLINT +#include "vp9/encoder/x86/vp9_dct_sse2_impl.h" // NOLINT #undef FDCT4x4_2D #undef FDCT8x8_2D #undef FDCT16x16_2D #define FDCT32x32_2D vp9_highbd_fdct32x32_rd_sse2 #define FDCT32x32_HIGH_PRECISION 0 -#include "vp9/encoder/x86/vp9_dct32x32_sse2.c" // NOLINT +#include "vp9/encoder/x86/vp9_dct32x32_sse2_impl.h" // NOLINT #undef FDCT32x32_2D #undef FDCT32x32_HIGH_PRECISION #define FDCT32x32_2D vp9_highbd_fdct32x32_sse2 #define FDCT32x32_HIGH_PRECISION 1 -#include "vp9/encoder/x86/vp9_dct32x32_sse2.c" // NOLINT +#include "vp9/encoder/x86/vp9_dct32x32_sse2_impl.h" // NOLINT #undef FDCT32x32_2D #undef FDCT32x32_HIGH_PRECISION diff --git a/media/libvpx/vp9/encoder/x86/vp9_dct_impl_sse2.c b/media/libvpx/vp9/encoder/x86/vp9_dct_sse2_impl.h similarity index 99% rename from media/libvpx/vp9/encoder/x86/vp9_dct_impl_sse2.c rename to media/libvpx/vp9/encoder/x86/vp9_dct_sse2_impl.h index 12fa747e859..11bf5a25e62 100644 --- a/media/libvpx/vp9/encoder/x86/vp9_dct_impl_sse2.c +++ b/media/libvpx/vp9/encoder/x86/vp9_dct_sse2_impl.h @@ -9,6 +9,8 @@ */ #include // SSE2 + +#include "./vp9_rtcd.h" #include "vp9/common/vp9_idct.h" // for cospi constants #include "vp9/encoder/vp9_dct.h" #include "vp9/encoder/x86/vp9_dct_sse2.h" @@ -578,7 +580,7 @@ void FDCT16x16_2D(const int16_t *input, tran_low_t *output, int stride) { // in normal/row positions). int pass; // We need an intermediate buffer between passes. - DECLARE_ALIGNED_ARRAY(16, int16_t, intermediate, 256); + DECLARE_ALIGNED(16, int16_t, intermediate[256]); const int16_t *in = input; int16_t *out0 = intermediate; tran_low_t *out1 = output; diff --git a/media/libvpx/vp9/encoder/x86/vp9_dct_ssse3.c b/media/libvpx/vp9/encoder/x86/vp9_dct_ssse3.c index bdc75e99371..96038fee16b 100644 --- a/media/libvpx/vp9/encoder/x86/vp9_dct_ssse3.c +++ b/media/libvpx/vp9/encoder/x86/vp9_dct_ssse3.c @@ -15,6 +15,8 @@ #include #endif #include // SSSE3 + +#include "./vp9_rtcd.h" #include "vp9/common/x86/vp9_idct_intrin_sse2.h" void vp9_fdct8x8_quant_ssse3(const int16_t *input, int stride, @@ -293,7 +295,8 @@ void vp9_fdct8x8_quant_ssse3(const int16_t *input, int stride, if (!skip_block) { __m128i eob; - __m128i round, quant, dequant; + __m128i round, quant, dequant, thr; + int16_t nzflag; { __m128i coeff0, coeff1; @@ -368,6 +371,7 @@ void vp9_fdct8x8_quant_ssse3(const int16_t *input, int stride, // AC only loop index = 2; + thr = _mm_srai_epi16(dequant, 1); while (n_coeffs < 0) { __m128i coeff0, coeff1; { @@ -387,28 +391,39 @@ void vp9_fdct8x8_quant_ssse3(const int16_t *input, int stride, qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign); qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign); - qcoeff0 = _mm_adds_epi16(qcoeff0, round); - qcoeff1 = _mm_adds_epi16(qcoeff1, round); - qtmp0 = _mm_mulhi_epi16(qcoeff0, quant); - qtmp1 = _mm_mulhi_epi16(qcoeff1, quant); + nzflag = _mm_movemask_epi8(_mm_cmpgt_epi16(qcoeff0, thr)) | + _mm_movemask_epi8(_mm_cmpgt_epi16(qcoeff1, thr)); - // Reinsert signs - qcoeff0 = _mm_xor_si128(qtmp0, coeff0_sign); - qcoeff1 = _mm_xor_si128(qtmp1, coeff1_sign); - qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign); - qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign); + if (nzflag) { + qcoeff0 = _mm_adds_epi16(qcoeff0, round); + qcoeff1 = _mm_adds_epi16(qcoeff1, round); + qtmp0 = _mm_mulhi_epi16(qcoeff0, quant); + qtmp1 = _mm_mulhi_epi16(qcoeff1, quant); - _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), qcoeff0); - _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, qcoeff1); + // Reinsert signs + qcoeff0 = _mm_xor_si128(qtmp0, coeff0_sign); + qcoeff1 = _mm_xor_si128(qtmp1, coeff1_sign); + qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign); + qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign); - coeff0 = _mm_mullo_epi16(qcoeff0, dequant); - coeff1 = _mm_mullo_epi16(qcoeff1, dequant); + _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), qcoeff0); + _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, qcoeff1); - _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), coeff0); - _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, coeff1); + coeff0 = _mm_mullo_epi16(qcoeff0, dequant); + coeff1 = _mm_mullo_epi16(qcoeff1, dequant); + + _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), coeff0); + _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, coeff1); + } else { + _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), zero); + _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, zero); + + _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), zero); + _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, zero); + } } - { + if (nzflag) { // Scan for eob __m128i zero_coeff0, zero_coeff1; __m128i nzero_coeff0, nzero_coeff1; diff --git a/media/libvpx/vp9/encoder/x86/vp9_dct_ssse3_x86_64.asm b/media/libvpx/vp9/encoder/x86/vp9_dct_ssse3_x86_64.asm index 28458dcdd52..3a29aba6f27 100644 --- a/media/libvpx/vp9/encoder/x86/vp9_dct_ssse3_x86_64.asm +++ b/media/libvpx/vp9/encoder/x86/vp9_dct_ssse3_x86_64.asm @@ -178,5 +178,78 @@ cglobal fdct8x8, 3, 5, 13, input, output, stride mova [outputq + 96], m6 mova [outputq + 112], m7 + RET + +%macro HMD8_1D 0 + psubw m8, m0, m1 + psubw m9, m2, m3 + paddw m0, m1 + paddw m2, m3 + SWAP 1, 8 + SWAP 3, 9 + psubw m8, m4, m5 + psubw m9, m6, m7 + paddw m4, m5 + paddw m6, m7 + SWAP 5, 8 + SWAP 7, 9 + + psubw m8, m0, m2 + psubw m9, m1, m3 + paddw m0, m2 + paddw m1, m3 + SWAP 2, 8 + SWAP 3, 9 + psubw m8, m4, m6 + psubw m9, m5, m7 + paddw m4, m6 + paddw m5, m7 + SWAP 6, 8 + SWAP 7, 9 + + psubw m8, m0, m4 + psubw m9, m1, m5 + paddw m0, m4 + paddw m1, m5 + SWAP 4, 8 + SWAP 5, 9 + psubw m8, m2, m6 + psubw m9, m3, m7 + paddw m2, m6 + paddw m3, m7 + SWAP 6, 8 + SWAP 7, 9 +%endmacro + +INIT_XMM ssse3 +cglobal hadamard_8x8, 3, 5, 10, input, stride, output + lea r3, [2 * strideq] + lea r4, [4 * strideq] + + mova m0, [inputq] + mova m1, [inputq + r3] + lea inputq, [inputq + r4] + mova m2, [inputq] + mova m3, [inputq + r3] + lea inputq, [inputq + r4] + mova m4, [inputq] + mova m5, [inputq + r3] + lea inputq, [inputq + r4] + mova m6, [inputq] + mova m7, [inputq + r3] + + HMD8_1D + TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9 + HMD8_1D + + mova [outputq + 0], m0 + mova [outputq + 16], m1 + mova [outputq + 32], m2 + mova [outputq + 48], m3 + mova [outputq + 64], m4 + mova [outputq + 80], m5 + mova [outputq + 96], m6 + mova [outputq + 112], m7 + RET %endif diff --git a/media/libvpx/vp9/encoder/x86/vp9_denoiser_sse2.c b/media/libvpx/vp9/encoder/x86/vp9_denoiser_sse2.c index 4ddee7b74c0..bf7c7af7707 100644 --- a/media/libvpx/vp9/encoder/x86/vp9_denoiser_sse2.c +++ b/media/libvpx/vp9/encoder/x86/vp9_denoiser_sse2.c @@ -150,38 +150,37 @@ static int vp9_denoiser_NxM_sse2_small( const uint8_t shift = (width == 4) ? 2 : 1; for (r = 0; r < ((4 << b_height_log2_lookup[bs]) >> shift); ++r) { - vpx_memcpy(sig_buffer[r], sig, width); - vpx_memcpy(sig_buffer[r] + width, sig + sig_stride, width); - vpx_memcpy(mc_running_buffer[r], mc_running_avg_y, width); - vpx_memcpy(mc_running_buffer[r] + width, - mc_running_avg_y + mc_avg_y_stride, width); - vpx_memcpy(running_buffer[r], running_avg_y, width); - vpx_memcpy(running_buffer[r] + width, - running_avg_y + avg_y_stride, width); + memcpy(sig_buffer[r], sig, width); + memcpy(sig_buffer[r] + width, sig + sig_stride, width); + memcpy(mc_running_buffer[r], mc_running_avg_y, width); + memcpy(mc_running_buffer[r] + width, + mc_running_avg_y + mc_avg_y_stride, width); + memcpy(running_buffer[r], running_avg_y, width); + memcpy(running_buffer[r] + width, running_avg_y + avg_y_stride, width); if (width == 4) { - vpx_memcpy(sig_buffer[r] + width * 2, sig + sig_stride * 2, width); - vpx_memcpy(sig_buffer[r] + width * 3, sig + sig_stride * 3, width); - vpx_memcpy(mc_running_buffer[r] + width * 2, - mc_running_avg_y + mc_avg_y_stride * 2, width); - vpx_memcpy(mc_running_buffer[r] + width * 3, - mc_running_avg_y + mc_avg_y_stride * 3, width); - vpx_memcpy(running_buffer[r] + width * 2, - running_avg_y + avg_y_stride * 2, width); - vpx_memcpy(running_buffer[r] + width * 3, - running_avg_y + avg_y_stride * 3, width); + memcpy(sig_buffer[r] + width * 2, sig + sig_stride * 2, width); + memcpy(sig_buffer[r] + width * 3, sig + sig_stride * 3, width); + memcpy(mc_running_buffer[r] + width * 2, + mc_running_avg_y + mc_avg_y_stride * 2, width); + memcpy(mc_running_buffer[r] + width * 3, + mc_running_avg_y + mc_avg_y_stride * 3, width); + memcpy(running_buffer[r] + width * 2, + running_avg_y + avg_y_stride * 2, width); + memcpy(running_buffer[r] + width * 3, + running_avg_y + avg_y_stride * 3, width); } acc_diff = vp9_denoiser_16x1_sse2(sig_buffer[r], mc_running_buffer[r], running_buffer[r], &k_0, &k_4, &k_8, &k_16, &l3, &l32, &l21, acc_diff); - vpx_memcpy(running_avg_y, running_buffer[r], width); - vpx_memcpy(running_avg_y + avg_y_stride, running_buffer[r] + width, width); + memcpy(running_avg_y, running_buffer[r], width); + memcpy(running_avg_y + avg_y_stride, running_buffer[r] + width, width); if (width == 4) { - vpx_memcpy(running_avg_y + avg_y_stride * 2, - running_buffer[r] + width * 2, width); - vpx_memcpy(running_avg_y + avg_y_stride * 3, - running_buffer[r] + width * 3, width); + memcpy(running_avg_y + avg_y_stride * 2, + running_buffer[r] + width * 2, width); + memcpy(running_avg_y + avg_y_stride * 3, + running_buffer[r] + width * 3, width); } // Update pointers for next iteration. sig += (sig_stride << shift); @@ -213,14 +212,14 @@ static int vp9_denoiser_NxM_sse2_small( acc_diff = vp9_denoiser_adj_16x1_sse2( sig_buffer[r], mc_running_buffer[r], running_buffer[r], k_0, k_delta, acc_diff); - vpx_memcpy(running_avg_y, running_buffer[r], width); - vpx_memcpy(running_avg_y + avg_y_stride, - running_buffer[r] + width, width); + memcpy(running_avg_y, running_buffer[r], width); + memcpy(running_avg_y + avg_y_stride, + running_buffer[r] + width, width); if (width == 4) { - vpx_memcpy(running_avg_y + avg_y_stride * 2, - running_buffer[r] + width * 2, width); - vpx_memcpy(running_avg_y + avg_y_stride * 3, - running_buffer[r] + width * 3, width); + memcpy(running_avg_y + avg_y_stride * 2, + running_buffer[r] + width * 2, width); + memcpy(running_avg_y + avg_y_stride * 3, + running_buffer[r] + width * 3, width); } // Update pointers for next iteration. running_avg_y += (avg_y_stride << shift); diff --git a/media/libvpx/vp9/encoder/x86/vp9_error_intrin_avx2.c b/media/libvpx/vp9/encoder/x86/vp9_error_intrin_avx2.c index c67490fad34..dfebaab0ac6 100644 --- a/media/libvpx/vp9/encoder/x86/vp9_error_intrin_avx2.c +++ b/media/libvpx/vp9/encoder/x86/vp9_error_intrin_avx2.c @@ -9,8 +9,9 @@ */ #include // AVX2 -#include "vpx/vpx_integer.h" +#include "./vp9_rtcd.h" +#include "vpx/vpx_integer.h" int64_t vp9_block_error_avx2(const int16_t *coeff, const int16_t *dqcoeff, diff --git a/media/libvpx/vp9/encoder/x86/vp9_error_sse2.asm b/media/libvpx/vp9/encoder/x86/vp9_error_sse2.asm index 1126fdb6164..56373e897c9 100644 --- a/media/libvpx/vp9/encoder/x86/vp9_error_sse2.asm +++ b/media/libvpx/vp9/encoder/x86/vp9_error_sse2.asm @@ -72,3 +72,49 @@ cglobal block_error, 3, 3, 8, uqc, dqc, size, ssz movd edx, m5 %endif RET + +; Compute the sum of squared difference between two int16_t vectors. +; int64_t vp9_block_error_fp(int16_t *coeff, int16_t *dqcoeff, +; intptr_t block_size) + +INIT_XMM sse2 +cglobal block_error_fp, 3, 3, 6, uqc, dqc, size + pxor m4, m4 ; sse accumulator + pxor m5, m5 ; dedicated zero register + lea uqcq, [uqcq+sizeq*2] + lea dqcq, [dqcq+sizeq*2] + neg sizeq +.loop: + mova m2, [uqcq+sizeq*2] + mova m0, [dqcq+sizeq*2] + mova m3, [uqcq+sizeq*2+mmsize] + mova m1, [dqcq+sizeq*2+mmsize] + psubw m0, m2 + psubw m1, m3 + ; individual errors are max. 15bit+sign, so squares are 30bit, and + ; thus the sum of 2 should fit in a 31bit integer (+ unused sign bit) + pmaddwd m0, m0 + pmaddwd m1, m1 + ; accumulate in 64bit + punpckldq m3, m0, m5 + punpckhdq m0, m5 + paddq m4, m3 + punpckldq m3, m1, m5 + paddq m4, m0 + punpckhdq m1, m5 + paddq m4, m3 + paddq m4, m1 + add sizeq, mmsize + jl .loop + + ; accumulate horizontally and store in return value + movhlps m5, m4 + paddq m4, m5 +%if ARCH_X86_64 + movq rax, m4 +%else + pshufd m5, m4, 0x1 + movd eax, m4 + movd edx, m5 +%endif + RET diff --git a/media/libvpx/vp9/encoder/x86/vp9_highbd_quantize_intrin_sse2.c b/media/libvpx/vp9/encoder/x86/vp9_highbd_quantize_intrin_sse2.c index 0bce9c321e6..cbdd1c93e1c 100644 --- a/media/libvpx/vp9/encoder/x86/vp9_highbd_quantize_intrin_sse2.c +++ b/media/libvpx/vp9/encoder/x86/vp9_highbd_quantize_intrin_sse2.c @@ -10,6 +10,7 @@ #include +#include "vpx_ports/mem.h" #include "vp9/common/vp9_common.h" #if CONFIG_VP9_HIGHBITDEPTH @@ -44,8 +45,8 @@ void vp9_highbd_quantize_b_sse2(const tran_low_t *coeff_ptr, (void)scan; - vpx_memset(qcoeff_ptr, 0, count * sizeof(*qcoeff_ptr)); - vpx_memset(dqcoeff_ptr, 0, count * sizeof(*dqcoeff_ptr)); + memset(qcoeff_ptr, 0, count * sizeof(*qcoeff_ptr)); + memset(dqcoeff_ptr, 0, count * sizeof(*dqcoeff_ptr)); if (!skip_block) { // Pre-scan pass @@ -132,8 +133,8 @@ void vp9_highbd_quantize_b_32x32_sse2(const tran_low_t *coeff_ptr, nzbins[0] = _mm_sub_epi32(nzbins[0], zbins[0]); nzbins[1] = _mm_sub_epi32(nzbins[1], zbins[1]); - vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); - vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); + memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); + memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); if (!skip_block) { // Pre-scan pass diff --git a/media/libvpx/vp9/encoder/x86/vp9_highbd_subpel_variance.asm b/media/libvpx/vp9/encoder/x86/vp9_highbd_subpel_variance.asm index 987729f962c..4594bb1aabd 100644 --- a/media/libvpx/vp9/encoder/x86/vp9_highbd_subpel_variance.asm +++ b/media/libvpx/vp9/encoder/x86/vp9_highbd_subpel_variance.asm @@ -14,35 +14,19 @@ SECTION_RODATA pw_8: times 8 dw 8 bilin_filter_m_sse2: times 8 dw 16 times 8 dw 0 - times 8 dw 15 - times 8 dw 1 times 8 dw 14 times 8 dw 2 - times 8 dw 13 - times 8 dw 3 times 8 dw 12 times 8 dw 4 - times 8 dw 11 - times 8 dw 5 times 8 dw 10 times 8 dw 6 - times 8 dw 9 - times 8 dw 7 times 16 dw 8 - times 8 dw 7 - times 8 dw 9 times 8 dw 6 times 8 dw 10 - times 8 dw 5 - times 8 dw 11 times 8 dw 4 times 8 dw 12 - times 8 dw 3 - times 8 dw 13 times 8 dw 2 times 8 dw 14 - times 8 dw 1 - times 8 dw 15 SECTION .text diff --git a/media/libvpx/vp9/encoder/x86/vp9_highbd_variance_sse2.c b/media/libvpx/vp9/encoder/x86/vp9_highbd_variance_sse2.c index 4bc3e7e2d15..29b7b278217 100644 --- a/media/libvpx/vp9/encoder/x86/vp9_highbd_variance_sse2.c +++ b/media/libvpx/vp9/encoder/x86/vp9_highbd_variance_sse2.c @@ -13,237 +13,6 @@ #include "vp9/encoder/vp9_variance.h" #include "vpx_ports/mem.h" -typedef uint32_t (*high_variance_fn_t) (const uint16_t *src, int src_stride, - const uint16_t *ref, int ref_stride, - uint32_t *sse, int *sum); - -uint32_t vp9_highbd_calc8x8var_sse2(const uint16_t *src, int src_stride, - const uint16_t *ref, int ref_stride, - uint32_t *sse, int *sum); - -uint32_t vp9_highbd_calc16x16var_sse2(const uint16_t *src, int src_stride, - const uint16_t *ref, int ref_stride, - uint32_t *sse, int *sum); - -static void highbd_variance_sse2(const uint16_t *src, int src_stride, - const uint16_t *ref, int ref_stride, - int w, int h, uint32_t *sse, int *sum, - high_variance_fn_t var_fn, int block_size) { - int i, j; - - *sse = 0; - *sum = 0; - - for (i = 0; i < h; i += block_size) { - for (j = 0; j < w; j += block_size) { - unsigned int sse0; - int sum0; - var_fn(src + src_stride * i + j, src_stride, - ref + ref_stride * i + j, ref_stride, &sse0, &sum0); - *sse += sse0; - *sum += sum0; - } - } -} - -static void highbd_10_variance_sse2(const uint16_t *src, int src_stride, - const uint16_t *ref, int ref_stride, - int w, int h, uint32_t *sse, int *sum, - high_variance_fn_t var_fn, int block_size) { - int i, j; - uint64_t sse_long = 0; - int64_t sum_long = 0; - - for (i = 0; i < h; i += block_size) { - for (j = 0; j < w; j += block_size) { - unsigned int sse0; - int sum0; - var_fn(src + src_stride * i + j, src_stride, - ref + ref_stride * i + j, ref_stride, &sse0, &sum0); - sse_long += sse0; - sum_long += sum0; - } - } - *sum = ROUND_POWER_OF_TWO(sum_long, 2); - *sse = ROUND_POWER_OF_TWO(sse_long, 4); -} - -static void highbd_12_variance_sse2(const uint16_t *src, int src_stride, - const uint16_t *ref, int ref_stride, - int w, int h, uint32_t *sse, int *sum, - high_variance_fn_t var_fn, int block_size) { - int i, j; - uint64_t sse_long = 0; - int64_t sum_long = 0; - - for (i = 0; i < h; i += block_size) { - for (j = 0; j < w; j += block_size) { - unsigned int sse0; - int sum0; - var_fn(src + src_stride * i + j, src_stride, - ref + ref_stride * i + j, ref_stride, &sse0, &sum0); - sse_long += sse0; - sum_long += sum0; - } - } - *sum = ROUND_POWER_OF_TWO(sum_long, 4); - *sse = ROUND_POWER_OF_TWO(sse_long, 8); -} - - -#define HIGH_GET_VAR(S) \ -void vp9_highbd_get##S##x##S##var_sse2(const uint8_t *src8, int src_stride, \ - const uint8_t *ref8, int ref_stride, \ - uint32_t *sse, int *sum) { \ - uint16_t *src = CONVERT_TO_SHORTPTR(src8); \ - uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \ - vp9_highbd_calc##S##x##S##var_sse2(src, src_stride, ref, ref_stride, \ - sse, sum); \ -} \ -\ -void vp9_highbd_10_get##S##x##S##var_sse2(const uint8_t *src8, int src_stride, \ - const uint8_t *ref8, int ref_stride, \ - uint32_t *sse, int *sum) { \ - uint16_t *src = CONVERT_TO_SHORTPTR(src8); \ - uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \ - vp9_highbd_calc##S##x##S##var_sse2(src, src_stride, ref, ref_stride, \ - sse, sum); \ - *sum = ROUND_POWER_OF_TWO(*sum, 2); \ - *sse = ROUND_POWER_OF_TWO(*sse, 4); \ -} \ -\ -void vp9_highbd_12_get##S##x##S##var_sse2(const uint8_t *src8, int src_stride, \ - const uint8_t *ref8, int ref_stride, \ - uint32_t *sse, int *sum) { \ - uint16_t *src = CONVERT_TO_SHORTPTR(src8); \ - uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \ - vp9_highbd_calc##S##x##S##var_sse2(src, src_stride, ref, ref_stride, \ - sse, sum); \ - *sum = ROUND_POWER_OF_TWO(*sum, 4); \ - *sse = ROUND_POWER_OF_TWO(*sse, 8); \ -} - -HIGH_GET_VAR(16); -HIGH_GET_VAR(8); - -#undef HIGH_GET_VAR - -#define VAR_FN(w, h, block_size, shift) \ -uint32_t vp9_highbd_variance##w##x##h##_sse2( \ - const uint8_t *src8, int src_stride, \ - const uint8_t *ref8, int ref_stride, uint32_t *sse) { \ - int sum; \ - uint16_t *src = CONVERT_TO_SHORTPTR(src8); \ - uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \ - highbd_variance_sse2(src, src_stride, ref, ref_stride, w, h, sse, &sum, \ - vp9_highbd_calc##block_size##x##block_size##var_sse2, \ - block_size); \ - return *sse - (((int64_t)sum * sum) >> shift); \ -} \ -\ -uint32_t vp9_highbd_10_variance##w##x##h##_sse2( \ - const uint8_t *src8, int src_stride, \ - const uint8_t *ref8, int ref_stride, uint32_t *sse) { \ - int sum; \ - uint16_t *src = CONVERT_TO_SHORTPTR(src8); \ - uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \ - highbd_10_variance_sse2( \ - src, src_stride, ref, ref_stride, w, h, sse, &sum, \ - vp9_highbd_calc##block_size##x##block_size##var_sse2, block_size); \ - return *sse - (((int64_t)sum * sum) >> shift); \ -} \ -\ -uint32_t vp9_highbd_12_variance##w##x##h##_sse2( \ - const uint8_t *src8, int src_stride, \ - const uint8_t *ref8, int ref_stride, uint32_t *sse) { \ - int sum; \ - uint16_t *src = CONVERT_TO_SHORTPTR(src8); \ - uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \ - highbd_12_variance_sse2( \ - src, src_stride, ref, ref_stride, w, h, sse, &sum, \ - vp9_highbd_calc##block_size##x##block_size##var_sse2, block_size); \ - return *sse - (((int64_t)sum * sum) >> shift); \ -} - -VAR_FN(64, 64, 16, 12); -VAR_FN(64, 32, 16, 11); -VAR_FN(32, 64, 16, 11); -VAR_FN(32, 32, 16, 10); -VAR_FN(32, 16, 16, 9); -VAR_FN(16, 32, 16, 9); -VAR_FN(16, 16, 16, 8); -VAR_FN(16, 8, 8, 7); -VAR_FN(8, 16, 8, 7); -VAR_FN(8, 8, 8, 6); - -#undef VAR_FN - -unsigned int vp9_highbd_mse16x16_sse2(const uint8_t *src8, int src_stride, - const uint8_t *ref8, int ref_stride, - unsigned int *sse) { - int sum; - uint16_t *src = CONVERT_TO_SHORTPTR(src8); - uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); - highbd_variance_sse2(src, src_stride, ref, ref_stride, 16, 16, - sse, &sum, vp9_highbd_calc16x16var_sse2, 16); - return *sse; -} - -unsigned int vp9_highbd_10_mse16x16_sse2(const uint8_t *src8, int src_stride, - const uint8_t *ref8, int ref_stride, - unsigned int *sse) { - int sum; - uint16_t *src = CONVERT_TO_SHORTPTR(src8); - uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); - highbd_10_variance_sse2(src, src_stride, ref, ref_stride, 16, 16, - sse, &sum, vp9_highbd_calc16x16var_sse2, 16); - return *sse; -} - -unsigned int vp9_highbd_12_mse16x16_sse2(const uint8_t *src8, int src_stride, - const uint8_t *ref8, int ref_stride, - unsigned int *sse) { - int sum; - uint16_t *src = CONVERT_TO_SHORTPTR(src8); - uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); - highbd_12_variance_sse2(src, src_stride, ref, ref_stride, 16, 16, - sse, &sum, vp9_highbd_calc16x16var_sse2, 16); - return *sse; -} - -unsigned int vp9_highbd_mse8x8_sse2(const uint8_t *src8, int src_stride, - const uint8_t *ref8, int ref_stride, - unsigned int *sse) { - int sum; - uint16_t *src = CONVERT_TO_SHORTPTR(src8); - uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); - highbd_variance_sse2(src, src_stride, ref, ref_stride, 8, 8, - sse, &sum, vp9_highbd_calc8x8var_sse2, 8); - return *sse; -} - -unsigned int vp9_highbd_10_mse8x8_sse2(const uint8_t *src8, int src_stride, - const uint8_t *ref8, int ref_stride, - unsigned int *sse) { - int sum; - uint16_t *src = CONVERT_TO_SHORTPTR(src8); - uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); - highbd_10_variance_sse2(src, src_stride, ref, ref_stride, 8, 8, - sse, &sum, vp9_highbd_calc8x8var_sse2, 8); - return *sse; -} - -unsigned int vp9_highbd_12_mse8x8_sse2(const uint8_t *src8, int src_stride, - const uint8_t *ref8, int ref_stride, - unsigned int *sse) { - int sum; - uint16_t *src = CONVERT_TO_SHORTPTR(src8); - uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); - highbd_12_variance_sse2(src, src_stride, ref, ref_stride, 8, 8, - sse, &sum, vp9_highbd_calc8x8var_sse2, 8); - return *sse; -} - #define DECL(w, opt) \ int vp9_highbd_sub_pixel_variance##w##xh_##opt(const uint16_t *src, \ ptrdiff_t src_stride, \ diff --git a/media/libvpx/vp9/encoder/x86/vp9_quantize_sse2.c b/media/libvpx/vp9/encoder/x86/vp9_quantize_sse2.c index 679c66e30b8..71fdfd71624 100644 --- a/media/libvpx/vp9/encoder/x86/vp9_quantize_sse2.c +++ b/media/libvpx/vp9/encoder/x86/vp9_quantize_sse2.c @@ -11,6 +11,7 @@ #include #include +#include "./vp9_rtcd.h" #include "vpx/vpx_integer.h" void vp9_quantize_b_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs, @@ -230,6 +231,8 @@ void vp9_quantize_fp_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs, const int16_t* scan_ptr, const int16_t* iscan_ptr) { __m128i zero; + __m128i thr; + int16_t nzflag; (void)scan_ptr; (void)zbin_ptr; (void)quant_shift_ptr; @@ -316,6 +319,8 @@ void vp9_quantize_fp_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs, n_coeffs += 8 * 2; } + thr = _mm_srai_epi16(dequant, 1); + // AC only loop while (n_coeffs < 0) { __m128i coeff0, coeff1; @@ -335,28 +340,39 @@ void vp9_quantize_fp_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs, qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign); qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign); - qcoeff0 = _mm_adds_epi16(qcoeff0, round); - qcoeff1 = _mm_adds_epi16(qcoeff1, round); - qtmp0 = _mm_mulhi_epi16(qcoeff0, quant); - qtmp1 = _mm_mulhi_epi16(qcoeff1, quant); + nzflag = _mm_movemask_epi8(_mm_cmpgt_epi16(qcoeff0, thr)) | + _mm_movemask_epi8(_mm_cmpgt_epi16(qcoeff1, thr)); - // Reinsert signs - qcoeff0 = _mm_xor_si128(qtmp0, coeff0_sign); - qcoeff1 = _mm_xor_si128(qtmp1, coeff1_sign); - qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign); - qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign); + if (nzflag) { + qcoeff0 = _mm_adds_epi16(qcoeff0, round); + qcoeff1 = _mm_adds_epi16(qcoeff1, round); + qtmp0 = _mm_mulhi_epi16(qcoeff0, quant); + qtmp1 = _mm_mulhi_epi16(qcoeff1, quant); - _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), qcoeff0); - _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, qcoeff1); + // Reinsert signs + qcoeff0 = _mm_xor_si128(qtmp0, coeff0_sign); + qcoeff1 = _mm_xor_si128(qtmp1, coeff1_sign); + qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign); + qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign); - coeff0 = _mm_mullo_epi16(qcoeff0, dequant); - coeff1 = _mm_mullo_epi16(qcoeff1, dequant); + _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), qcoeff0); + _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, qcoeff1); - _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), coeff0); - _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, coeff1); + coeff0 = _mm_mullo_epi16(qcoeff0, dequant); + coeff1 = _mm_mullo_epi16(qcoeff1, dequant); + + _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), coeff0); + _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, coeff1); + } else { + _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), zero); + _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, zero); + + _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), zero); + _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, zero); + } } - { + if (nzflag) { // Scan for eob __m128i zero_coeff0, zero_coeff1; __m128i nzero_coeff0, nzero_coeff1; diff --git a/media/libvpx/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm b/media/libvpx/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm index c35eb3603ce..449d52b22e7 100644 --- a/media/libvpx/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm +++ b/media/libvpx/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm @@ -282,6 +282,8 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ psignw m8, m9 psignw m13, m10 psrlw m0, m3, 2 +%else + psrlw m0, m3, 1 %endif mova [r4q+ncoeffq*2+ 0], m8 mova [r4q+ncoeffq*2+16], m13 @@ -302,7 +304,7 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ mova m10, [ coeffq+ncoeffq*2+16] ; m10 = c[i] pabsw m6, m9 ; m6 = abs(m9) pabsw m11, m10 ; m11 = abs(m10) -%ifidn %1, fp_32x32 + pcmpgtw m7, m6, m0 pcmpgtw m12, m11, m0 pmovmskb r6d, m7 @@ -310,7 +312,7 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ or r6, r2 jz .skip_iter -%endif + pcmpeqw m7, m7 paddsw m6, m1 ; m6 += round @@ -348,7 +350,6 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ add ncoeffq, mmsize jl .ac_only_loop -%ifidn %1, fp_32x32 jmp .accumulate_eob .skip_iter: mova [r3q+ncoeffq*2+ 0], m5 @@ -357,7 +358,6 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ mova [r4q+ncoeffq*2+16], m5 add ncoeffq, mmsize jl .ac_only_loop -%endif .accumulate_eob: ; horizontally accumulate/max eobs and write into [eob] memory pointer diff --git a/media/libvpx/vp9/encoder/x86/vp9_sad_ssse3.asm b/media/libvpx/vp9/encoder/x86/vp9_sad_ssse3.asm deleted file mode 100644 index 0cb35424ed6..00000000000 --- a/media/libvpx/vp9/encoder/x86/vp9_sad_ssse3.asm +++ /dev/null @@ -1,370 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" - -%macro PROCESS_16X2X3 1 -%if %1 - movdqa xmm0, XMMWORD PTR [rsi] - lddqu xmm5, XMMWORD PTR [rdi] - lddqu xmm6, XMMWORD PTR [rdi+1] - lddqu xmm7, XMMWORD PTR [rdi+2] - - psadbw xmm5, xmm0 - psadbw xmm6, xmm0 - psadbw xmm7, xmm0 -%else - movdqa xmm0, XMMWORD PTR [rsi] - lddqu xmm1, XMMWORD PTR [rdi] - lddqu xmm2, XMMWORD PTR [rdi+1] - lddqu xmm3, XMMWORD PTR [rdi+2] - - psadbw xmm1, xmm0 - psadbw xmm2, xmm0 - psadbw xmm3, xmm0 - - paddw xmm5, xmm1 - paddw xmm6, xmm2 - paddw xmm7, xmm3 -%endif - movdqa xmm0, XMMWORD PTR [rsi+rax] - lddqu xmm1, XMMWORD PTR [rdi+rdx] - lddqu xmm2, XMMWORD PTR [rdi+rdx+1] - lddqu xmm3, XMMWORD PTR [rdi+rdx+2] - - lea rsi, [rsi+rax*2] - lea rdi, [rdi+rdx*2] - - psadbw xmm1, xmm0 - psadbw xmm2, xmm0 - psadbw xmm3, xmm0 - - paddw xmm5, xmm1 - paddw xmm6, xmm2 - paddw xmm7, xmm3 -%endmacro - -%macro PROCESS_16X2X3_OFFSET 2 -%if %1 - movdqa xmm0, XMMWORD PTR [rsi] - movdqa xmm4, XMMWORD PTR [rdi] - movdqa xmm7, XMMWORD PTR [rdi+16] - - movdqa xmm5, xmm7 - palignr xmm5, xmm4, %2 - - movdqa xmm6, xmm7 - palignr xmm6, xmm4, (%2+1) - - palignr xmm7, xmm4, (%2+2) - - psadbw xmm5, xmm0 - psadbw xmm6, xmm0 - psadbw xmm7, xmm0 -%else - movdqa xmm0, XMMWORD PTR [rsi] - movdqa xmm4, XMMWORD PTR [rdi] - movdqa xmm3, XMMWORD PTR [rdi+16] - - movdqa xmm1, xmm3 - palignr xmm1, xmm4, %2 - - movdqa xmm2, xmm3 - palignr xmm2, xmm4, (%2+1) - - palignr xmm3, xmm4, (%2+2) - - psadbw xmm1, xmm0 - psadbw xmm2, xmm0 - psadbw xmm3, xmm0 - - paddw xmm5, xmm1 - paddw xmm6, xmm2 - paddw xmm7, xmm3 -%endif - movdqa xmm0, XMMWORD PTR [rsi+rax] - movdqa xmm4, XMMWORD PTR [rdi+rdx] - movdqa xmm3, XMMWORD PTR [rdi+rdx+16] - - movdqa xmm1, xmm3 - palignr xmm1, xmm4, %2 - - movdqa xmm2, xmm3 - palignr xmm2, xmm4, (%2+1) - - palignr xmm3, xmm4, (%2+2) - - lea rsi, [rsi+rax*2] - lea rdi, [rdi+rdx*2] - - psadbw xmm1, xmm0 - psadbw xmm2, xmm0 - psadbw xmm3, xmm0 - - paddw xmm5, xmm1 - paddw xmm6, xmm2 - paddw xmm7, xmm3 -%endmacro - -%macro PROCESS_16X16X3_OFFSET 2 -%2_aligned_by_%1: - - sub rdi, %1 - - PROCESS_16X2X3_OFFSET 1, %1 - PROCESS_16X2X3_OFFSET 0, %1 - PROCESS_16X2X3_OFFSET 0, %1 - PROCESS_16X2X3_OFFSET 0, %1 - PROCESS_16X2X3_OFFSET 0, %1 - PROCESS_16X2X3_OFFSET 0, %1 - PROCESS_16X2X3_OFFSET 0, %1 - PROCESS_16X2X3_OFFSET 0, %1 - - jmp %2_store_off - -%endmacro - -%macro PROCESS_16X8X3_OFFSET 2 -%2_aligned_by_%1: - - sub rdi, %1 - - PROCESS_16X2X3_OFFSET 1, %1 - PROCESS_16X2X3_OFFSET 0, %1 - PROCESS_16X2X3_OFFSET 0, %1 - PROCESS_16X2X3_OFFSET 0, %1 - - jmp %2_store_off - -%endmacro - -;void int vp9_sad16x16x3_ssse3( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride, -; int *results) -global sym(vp9_sad16x16x3_ssse3) PRIVATE -sym(vp9_sad16x16x3_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - SAVE_XMM 7 - push rsi - push rdi - push rcx - ; end prolog - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;ref_ptr - - mov rdx, 0xf - and rdx, rdi - - jmp .vp9_sad16x16x3_ssse3_skiptable -.vp9_sad16x16x3_ssse3_jumptable: - dd .vp9_sad16x16x3_ssse3_aligned_by_0 - .vp9_sad16x16x3_ssse3_do_jump - dd .vp9_sad16x16x3_ssse3_aligned_by_1 - .vp9_sad16x16x3_ssse3_do_jump - dd .vp9_sad16x16x3_ssse3_aligned_by_2 - .vp9_sad16x16x3_ssse3_do_jump - dd .vp9_sad16x16x3_ssse3_aligned_by_3 - .vp9_sad16x16x3_ssse3_do_jump - dd .vp9_sad16x16x3_ssse3_aligned_by_4 - .vp9_sad16x16x3_ssse3_do_jump - dd .vp9_sad16x16x3_ssse3_aligned_by_5 - .vp9_sad16x16x3_ssse3_do_jump - dd .vp9_sad16x16x3_ssse3_aligned_by_6 - .vp9_sad16x16x3_ssse3_do_jump - dd .vp9_sad16x16x3_ssse3_aligned_by_7 - .vp9_sad16x16x3_ssse3_do_jump - dd .vp9_sad16x16x3_ssse3_aligned_by_8 - .vp9_sad16x16x3_ssse3_do_jump - dd .vp9_sad16x16x3_ssse3_aligned_by_9 - .vp9_sad16x16x3_ssse3_do_jump - dd .vp9_sad16x16x3_ssse3_aligned_by_10 - .vp9_sad16x16x3_ssse3_do_jump - dd .vp9_sad16x16x3_ssse3_aligned_by_11 - .vp9_sad16x16x3_ssse3_do_jump - dd .vp9_sad16x16x3_ssse3_aligned_by_12 - .vp9_sad16x16x3_ssse3_do_jump - dd .vp9_sad16x16x3_ssse3_aligned_by_13 - .vp9_sad16x16x3_ssse3_do_jump - dd .vp9_sad16x16x3_ssse3_aligned_by_14 - .vp9_sad16x16x3_ssse3_do_jump - dd .vp9_sad16x16x3_ssse3_aligned_by_15 - .vp9_sad16x16x3_ssse3_do_jump -.vp9_sad16x16x3_ssse3_skiptable: - - call .vp9_sad16x16x3_ssse3_do_jump -.vp9_sad16x16x3_ssse3_do_jump: - pop rcx ; get the address of do_jump - mov rax, .vp9_sad16x16x3_ssse3_jumptable - .vp9_sad16x16x3_ssse3_do_jump - add rax, rcx ; get the absolute address of vp9_sad16x16x3_ssse3_jumptable - - movsxd rax, dword [rax + 4*rdx] ; get the 32 bit offset from the jumptable - add rcx, rax - - movsxd rax, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;ref_stride - - jmp rcx - - PROCESS_16X16X3_OFFSET 0, .vp9_sad16x16x3_ssse3 - PROCESS_16X16X3_OFFSET 1, .vp9_sad16x16x3_ssse3 - PROCESS_16X16X3_OFFSET 2, .vp9_sad16x16x3_ssse3 - PROCESS_16X16X3_OFFSET 3, .vp9_sad16x16x3_ssse3 - PROCESS_16X16X3_OFFSET 4, .vp9_sad16x16x3_ssse3 - PROCESS_16X16X3_OFFSET 5, .vp9_sad16x16x3_ssse3 - PROCESS_16X16X3_OFFSET 6, .vp9_sad16x16x3_ssse3 - PROCESS_16X16X3_OFFSET 7, .vp9_sad16x16x3_ssse3 - PROCESS_16X16X3_OFFSET 8, .vp9_sad16x16x3_ssse3 - PROCESS_16X16X3_OFFSET 9, .vp9_sad16x16x3_ssse3 - PROCESS_16X16X3_OFFSET 10, .vp9_sad16x16x3_ssse3 - PROCESS_16X16X3_OFFSET 11, .vp9_sad16x16x3_ssse3 - PROCESS_16X16X3_OFFSET 12, .vp9_sad16x16x3_ssse3 - PROCESS_16X16X3_OFFSET 13, .vp9_sad16x16x3_ssse3 - PROCESS_16X16X3_OFFSET 14, .vp9_sad16x16x3_ssse3 - -.vp9_sad16x16x3_ssse3_aligned_by_15: - PROCESS_16X2X3 1 - PROCESS_16X2X3 0 - PROCESS_16X2X3 0 - PROCESS_16X2X3 0 - PROCESS_16X2X3 0 - PROCESS_16X2X3 0 - PROCESS_16X2X3 0 - PROCESS_16X2X3 0 - -.vp9_sad16x16x3_ssse3_store_off: - mov rdi, arg(4) ;Results - - movq xmm0, xmm5 - psrldq xmm5, 8 - - paddw xmm0, xmm5 - movd [rdi], xmm0 -;- - movq xmm0, xmm6 - psrldq xmm6, 8 - - paddw xmm0, xmm6 - movd [rdi+4], xmm0 -;- - movq xmm0, xmm7 - psrldq xmm7, 8 - - paddw xmm0, xmm7 - movd [rdi+8], xmm0 - - ; begin epilog - pop rcx - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -;void int vp9_sad16x8x3_ssse3( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride, -; int *results) -global sym(vp9_sad16x8x3_ssse3) PRIVATE -sym(vp9_sad16x8x3_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - SAVE_XMM 7 - push rsi - push rdi - push rcx - ; end prolog - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;ref_ptr - - mov rdx, 0xf - and rdx, rdi - - jmp .vp9_sad16x8x3_ssse3_skiptable -.vp9_sad16x8x3_ssse3_jumptable: - dd .vp9_sad16x8x3_ssse3_aligned_by_0 - .vp9_sad16x8x3_ssse3_do_jump - dd .vp9_sad16x8x3_ssse3_aligned_by_1 - .vp9_sad16x8x3_ssse3_do_jump - dd .vp9_sad16x8x3_ssse3_aligned_by_2 - .vp9_sad16x8x3_ssse3_do_jump - dd .vp9_sad16x8x3_ssse3_aligned_by_3 - .vp9_sad16x8x3_ssse3_do_jump - dd .vp9_sad16x8x3_ssse3_aligned_by_4 - .vp9_sad16x8x3_ssse3_do_jump - dd .vp9_sad16x8x3_ssse3_aligned_by_5 - .vp9_sad16x8x3_ssse3_do_jump - dd .vp9_sad16x8x3_ssse3_aligned_by_6 - .vp9_sad16x8x3_ssse3_do_jump - dd .vp9_sad16x8x3_ssse3_aligned_by_7 - .vp9_sad16x8x3_ssse3_do_jump - dd .vp9_sad16x8x3_ssse3_aligned_by_8 - .vp9_sad16x8x3_ssse3_do_jump - dd .vp9_sad16x8x3_ssse3_aligned_by_9 - .vp9_sad16x8x3_ssse3_do_jump - dd .vp9_sad16x8x3_ssse3_aligned_by_10 - .vp9_sad16x8x3_ssse3_do_jump - dd .vp9_sad16x8x3_ssse3_aligned_by_11 - .vp9_sad16x8x3_ssse3_do_jump - dd .vp9_sad16x8x3_ssse3_aligned_by_12 - .vp9_sad16x8x3_ssse3_do_jump - dd .vp9_sad16x8x3_ssse3_aligned_by_13 - .vp9_sad16x8x3_ssse3_do_jump - dd .vp9_sad16x8x3_ssse3_aligned_by_14 - .vp9_sad16x8x3_ssse3_do_jump - dd .vp9_sad16x8x3_ssse3_aligned_by_15 - .vp9_sad16x8x3_ssse3_do_jump -.vp9_sad16x8x3_ssse3_skiptable: - - call .vp9_sad16x8x3_ssse3_do_jump -.vp9_sad16x8x3_ssse3_do_jump: - pop rcx ; get the address of do_jump - mov rax, .vp9_sad16x8x3_ssse3_jumptable - .vp9_sad16x8x3_ssse3_do_jump - add rax, rcx ; get the absolute address of vp9_sad16x8x3_ssse3_jumptable - - movsxd rax, dword [rax + 4*rdx] ; get the 32 bit offset from the jumptable - add rcx, rax - - movsxd rax, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;ref_stride - - jmp rcx - - PROCESS_16X8X3_OFFSET 0, .vp9_sad16x8x3_ssse3 - PROCESS_16X8X3_OFFSET 1, .vp9_sad16x8x3_ssse3 - PROCESS_16X8X3_OFFSET 2, .vp9_sad16x8x3_ssse3 - PROCESS_16X8X3_OFFSET 3, .vp9_sad16x8x3_ssse3 - PROCESS_16X8X3_OFFSET 4, .vp9_sad16x8x3_ssse3 - PROCESS_16X8X3_OFFSET 5, .vp9_sad16x8x3_ssse3 - PROCESS_16X8X3_OFFSET 6, .vp9_sad16x8x3_ssse3 - PROCESS_16X8X3_OFFSET 7, .vp9_sad16x8x3_ssse3 - PROCESS_16X8X3_OFFSET 8, .vp9_sad16x8x3_ssse3 - PROCESS_16X8X3_OFFSET 9, .vp9_sad16x8x3_ssse3 - PROCESS_16X8X3_OFFSET 10, .vp9_sad16x8x3_ssse3 - PROCESS_16X8X3_OFFSET 11, .vp9_sad16x8x3_ssse3 - PROCESS_16X8X3_OFFSET 12, .vp9_sad16x8x3_ssse3 - PROCESS_16X8X3_OFFSET 13, .vp9_sad16x8x3_ssse3 - PROCESS_16X8X3_OFFSET 14, .vp9_sad16x8x3_ssse3 - -.vp9_sad16x8x3_ssse3_aligned_by_15: - - PROCESS_16X2X3 1 - PROCESS_16X2X3 0 - PROCESS_16X2X3 0 - PROCESS_16X2X3 0 - -.vp9_sad16x8x3_ssse3_store_off: - mov rdi, arg(4) ;Results - - movq xmm0, xmm5 - psrldq xmm5, 8 - - paddw xmm0, xmm5 - movd [rdi], xmm0 -;- - movq xmm0, xmm6 - psrldq xmm6, 8 - - paddw xmm0, xmm6 - movd [rdi+4], xmm0 -;- - movq xmm0, xmm7 - psrldq xmm7, 8 - - paddw xmm0, xmm7 - movd [rdi+8], xmm0 - - ; begin epilog - pop rcx - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret diff --git a/media/libvpx/vp9/encoder/x86/vp9_subpel_variance.asm b/media/libvpx/vp9/encoder/x86/vp9_subpel_variance.asm index 06b8b034a5e..292cf34d1a2 100644 --- a/media/libvpx/vp9/encoder/x86/vp9_subpel_variance.asm +++ b/media/libvpx/vp9/encoder/x86/vp9_subpel_variance.asm @@ -14,52 +14,28 @@ SECTION_RODATA pw_8: times 8 dw 8 bilin_filter_m_sse2: times 8 dw 16 times 8 dw 0 - times 8 dw 15 - times 8 dw 1 times 8 dw 14 times 8 dw 2 - times 8 dw 13 - times 8 dw 3 times 8 dw 12 times 8 dw 4 - times 8 dw 11 - times 8 dw 5 times 8 dw 10 times 8 dw 6 - times 8 dw 9 - times 8 dw 7 times 16 dw 8 - times 8 dw 7 - times 8 dw 9 times 8 dw 6 times 8 dw 10 - times 8 dw 5 - times 8 dw 11 times 8 dw 4 times 8 dw 12 - times 8 dw 3 - times 8 dw 13 times 8 dw 2 times 8 dw 14 - times 8 dw 1 - times 8 dw 15 bilin_filter_m_ssse3: times 8 db 16, 0 - times 8 db 15, 1 times 8 db 14, 2 - times 8 db 13, 3 times 8 db 12, 4 - times 8 db 11, 5 times 8 db 10, 6 - times 8 db 9, 7 times 16 db 8 - times 8 db 7, 9 times 8 db 6, 10 - times 8 db 5, 11 times 8 db 4, 12 - times 8 db 3, 13 times 8 db 2, 14 - times 8 db 1, 15 SECTION .text diff --git a/media/libvpx/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c b/media/libvpx/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c index a441cadaf70..b1c79752076 100644 --- a/media/libvpx/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c +++ b/media/libvpx/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c @@ -9,42 +9,28 @@ */ #include // AVX2 + +#include "./vp9_rtcd.h" #include "vpx_ports/mem.h" #include "vp9/encoder/vp9_variance.h" DECLARE_ALIGNED(32, static const uint8_t, bilinear_filters_avx2[512]) = { 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, - 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, - 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, - 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, - 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, - 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, - 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, - 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, - 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, - 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, - 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, - 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, - 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, - 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, - 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, - 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15 }; #define FILTER_SRC(filter) \ diff --git a/media/libvpx/vp9/encoder/x86/vp9_variance_avx2.c b/media/libvpx/vp9/encoder/x86/vp9_variance_avx2.c index ea09b959e12..8cd071de5e2 100644 --- a/media/libvpx/vp9/encoder/x86/vp9_variance_avx2.c +++ b/media/libvpx/vp9/encoder/x86/vp9_variance_avx2.c @@ -7,23 +7,12 @@ * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ +#include "./vp9_rtcd.h" #include "./vpx_config.h" #include "vp9/encoder/vp9_variance.h" #include "vpx_ports/mem.h" -typedef void (*get_var_avx2)(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, - unsigned int *sse, int *sum); - -void vp9_get16x16var_avx2(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, - unsigned int *sse, int *sum); - -void vp9_get32x32var_avx2(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, - unsigned int *sse, int *sum); - unsigned int vp9_sub_pixel_variance32xh_avx2(const uint8_t *src, int src_stride, int x_offset, int y_offset, const uint8_t *dst, int dst_stride, @@ -41,81 +30,6 @@ unsigned int vp9_sub_pixel_avg_variance32xh_avx2(const uint8_t *src, int height, unsigned int *sseptr); -static void variance_avx2(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, - int w, int h, unsigned int *sse, int *sum, - get_var_avx2 var_fn, int block_size) { - int i, j; - - *sse = 0; - *sum = 0; - - for (i = 0; i < h; i += 16) { - for (j = 0; j < w; j += block_size) { - unsigned int sse0; - int sum0; - var_fn(&src[src_stride * i + j], src_stride, - &ref[ref_stride * i + j], ref_stride, &sse0, &sum0); - *sse += sse0; - *sum += sum0; - } - } -} - - -unsigned int vp9_variance16x16_avx2(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, - unsigned int *sse) { - int sum; - variance_avx2(src, src_stride, ref, ref_stride, 16, 16, - sse, &sum, vp9_get16x16var_avx2, 16); - return *sse - (((unsigned int)sum * sum) >> 8); -} - -unsigned int vp9_mse16x16_avx2(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, - unsigned int *sse) { - int sum; - vp9_get16x16var_avx2(src, src_stride, ref, ref_stride, sse, &sum); - return *sse; -} - -unsigned int vp9_variance32x16_avx2(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, - unsigned int *sse) { - int sum; - variance_avx2(src, src_stride, ref, ref_stride, 32, 16, - sse, &sum, vp9_get32x32var_avx2, 32); - return *sse - (((int64_t)sum * sum) >> 9); -} - -unsigned int vp9_variance32x32_avx2(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, - unsigned int *sse) { - int sum; - variance_avx2(src, src_stride, ref, ref_stride, 32, 32, - sse, &sum, vp9_get32x32var_avx2, 32); - return *sse - (((int64_t)sum * sum) >> 10); -} - -unsigned int vp9_variance64x64_avx2(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, - unsigned int *sse) { - int sum; - variance_avx2(src, src_stride, ref, ref_stride, 64, 64, - sse, &sum, vp9_get32x32var_avx2, 32); - return *sse - (((int64_t)sum * sum) >> 12); -} - -unsigned int vp9_variance64x32_avx2(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, - unsigned int *sse) { - int sum; - variance_avx2(src, src_stride, ref, ref_stride, 64, 32, - sse, &sum, vp9_get32x32var_avx2, 32); - return *sse - (((int64_t)sum * sum) >> 11); -} - unsigned int vp9_sub_pixel_variance64x64_avx2(const uint8_t *src, int src_stride, int x_offset, diff --git a/media/libvpx/vp9/encoder/x86/vp9_variance_sse2.c b/media/libvpx/vp9/encoder/x86/vp9_variance_sse2.c index 8490bbbdc2e..961efe34ee6 100644 --- a/media/libvpx/vp9/encoder/x86/vp9_variance_sse2.c +++ b/media/libvpx/vp9/encoder/x86/vp9_variance_sse2.c @@ -10,310 +10,12 @@ #include // SSE2 +#include "./vp9_rtcd.h" #include "./vpx_config.h" #include "vp9/encoder/vp9_variance.h" #include "vpx_ports/mem.h" -typedef unsigned int (*variance_fn_t) (const unsigned char *src, int src_stride, - const unsigned char *ref, int ref_stride, - unsigned int *sse, int *sum); - -unsigned int vp9_get_mb_ss_sse2(const int16_t *src) { - __m128i vsum = _mm_setzero_si128(); - int i; - - for (i = 0; i < 32; ++i) { - const __m128i v = _mm_loadu_si128((const __m128i *)src); - vsum = _mm_add_epi32(vsum, _mm_madd_epi16(v, v)); - src += 8; - } - - vsum = _mm_add_epi32(vsum, _mm_srli_si128(vsum, 8)); - vsum = _mm_add_epi32(vsum, _mm_srli_si128(vsum, 4)); - return _mm_cvtsi128_si32(vsum); -} - -#define READ64(p, stride, i) \ - _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(const uint32_t *)(p + i * stride)), \ - _mm_cvtsi32_si128(*(const uint32_t *)(p + (i + 1) * stride))) - -unsigned int vp9_get4x4var_sse2(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, - unsigned int *sse, int *sum) { - const __m128i zero = _mm_setzero_si128(); - const __m128i src0 = _mm_unpacklo_epi8(READ64(src, src_stride, 0), zero); - const __m128i src1 = _mm_unpacklo_epi8(READ64(src, src_stride, 2), zero); - const __m128i ref0 = _mm_unpacklo_epi8(READ64(ref, ref_stride, 0), zero); - const __m128i ref1 = _mm_unpacklo_epi8(READ64(ref, ref_stride, 2), zero); - const __m128i diff0 = _mm_sub_epi16(src0, ref0); - const __m128i diff1 = _mm_sub_epi16(src1, ref1); - - // sum - __m128i vsum = _mm_add_epi16(diff0, diff1); - vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 8)); - vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 4)); - vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 2)); - *sum = (int16_t)_mm_extract_epi16(vsum, 0); - - // sse - vsum = _mm_add_epi32(_mm_madd_epi16(diff0, diff0), - _mm_madd_epi16(diff1, diff1)); - vsum = _mm_add_epi32(vsum, _mm_srli_si128(vsum, 8)); - vsum = _mm_add_epi32(vsum, _mm_srli_si128(vsum, 4)); - *sse = _mm_cvtsi128_si32(vsum); - - return 0; -} - -unsigned int vp9_get8x8var_sse2(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, - unsigned int *sse, int *sum) { - const __m128i zero = _mm_setzero_si128(); - __m128i vsum = _mm_setzero_si128(); - __m128i vsse = _mm_setzero_si128(); - int i; - - for (i = 0; i < 8; i += 2) { - const __m128i src0 = _mm_unpacklo_epi8(_mm_loadl_epi64( - (const __m128i *)(src + i * src_stride)), zero); - const __m128i ref0 = _mm_unpacklo_epi8(_mm_loadl_epi64( - (const __m128i *)(ref + i * ref_stride)), zero); - const __m128i diff0 = _mm_sub_epi16(src0, ref0); - - const __m128i src1 = _mm_unpacklo_epi8(_mm_loadl_epi64( - (const __m128i *)(src + (i + 1) * src_stride)), zero); - const __m128i ref1 = _mm_unpacklo_epi8(_mm_loadl_epi64( - (const __m128i *)(ref + (i + 1) * ref_stride)), zero); - const __m128i diff1 = _mm_sub_epi16(src1, ref1); - - vsum = _mm_add_epi16(vsum, diff0); - vsum = _mm_add_epi16(vsum, diff1); - vsse = _mm_add_epi32(vsse, _mm_madd_epi16(diff0, diff0)); - vsse = _mm_add_epi32(vsse, _mm_madd_epi16(diff1, diff1)); - } - - // sum - vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 8)); - vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 4)); - vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 2)); - *sum = (int16_t)_mm_extract_epi16(vsum, 0); - - // sse - vsse = _mm_add_epi32(vsse, _mm_srli_si128(vsse, 8)); - vsse = _mm_add_epi32(vsse, _mm_srli_si128(vsse, 4)); - *sse = _mm_cvtsi128_si32(vsse); - - return 0; -} - -unsigned int vp9_get16x16var_sse2(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, - unsigned int *sse, int *sum) { - const __m128i zero = _mm_setzero_si128(); - __m128i vsum = _mm_setzero_si128(); - __m128i vsse = _mm_setzero_si128(); - int i; - - for (i = 0; i < 16; ++i) { - const __m128i s = _mm_loadu_si128((const __m128i *)src); - const __m128i r = _mm_loadu_si128((const __m128i *)ref); - - const __m128i src0 = _mm_unpacklo_epi8(s, zero); - const __m128i ref0 = _mm_unpacklo_epi8(r, zero); - const __m128i diff0 = _mm_sub_epi16(src0, ref0); - - const __m128i src1 = _mm_unpackhi_epi8(s, zero); - const __m128i ref1 = _mm_unpackhi_epi8(r, zero); - const __m128i diff1 = _mm_sub_epi16(src1, ref1); - - vsum = _mm_add_epi16(vsum, diff0); - vsum = _mm_add_epi16(vsum, diff1); - vsse = _mm_add_epi32(vsse, _mm_madd_epi16(diff0, diff0)); - vsse = _mm_add_epi32(vsse, _mm_madd_epi16(diff1, diff1)); - - src += src_stride; - ref += ref_stride; - } - - // sum - vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 8)); - vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 4)); - *sum = (int16_t)_mm_extract_epi16(vsum, 0) + - (int16_t)_mm_extract_epi16(vsum, 1); - - // sse - vsse = _mm_add_epi32(vsse, _mm_srli_si128(vsse, 8)); - vsse = _mm_add_epi32(vsse, _mm_srli_si128(vsse, 4)); - *sse = _mm_cvtsi128_si32(vsse); - - return 0; -} - - -static void variance_sse2(const unsigned char *src, int src_stride, - const unsigned char *ref, int ref_stride, - int w, int h, unsigned int *sse, int *sum, - variance_fn_t var_fn, int block_size) { - int i, j; - - *sse = 0; - *sum = 0; - - for (i = 0; i < h; i += block_size) { - for (j = 0; j < w; j += block_size) { - unsigned int sse0; - int sum0; - var_fn(src + src_stride * i + j, src_stride, - ref + ref_stride * i + j, ref_stride, &sse0, &sum0); - *sse += sse0; - *sum += sum0; - } - } -} - -unsigned int vp9_variance4x4_sse2(const unsigned char *src, int src_stride, - const unsigned char *ref, int ref_stride, - unsigned int *sse) { - int sum; - vp9_get4x4var_sse2(src, src_stride, ref, ref_stride, sse, &sum); - return *sse - (((unsigned int)sum * sum) >> 4); -} - -unsigned int vp9_variance8x4_sse2(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, - unsigned int *sse) { - int sum; - variance_sse2(src, src_stride, ref, ref_stride, 8, 4, - sse, &sum, vp9_get4x4var_sse2, 4); - return *sse - (((unsigned int)sum * sum) >> 5); -} - -unsigned int vp9_variance4x8_sse2(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, - unsigned int *sse) { - int sum; - variance_sse2(src, src_stride, ref, ref_stride, 4, 8, - sse, &sum, vp9_get4x4var_sse2, 4); - return *sse - (((unsigned int)sum * sum) >> 5); -} - -unsigned int vp9_variance8x8_sse2(const unsigned char *src, int src_stride, - const unsigned char *ref, int ref_stride, - unsigned int *sse) { - int sum; - vp9_get8x8var_sse2(src, src_stride, ref, ref_stride, sse, &sum); - return *sse - (((unsigned int)sum * sum) >> 6); -} - -unsigned int vp9_variance16x8_sse2(const unsigned char *src, int src_stride, - const unsigned char *ref, int ref_stride, - unsigned int *sse) { - int sum; - variance_sse2(src, src_stride, ref, ref_stride, 16, 8, - sse, &sum, vp9_get8x8var_sse2, 8); - return *sse - (((unsigned int)sum * sum) >> 7); -} - -unsigned int vp9_variance8x16_sse2(const unsigned char *src, int src_stride, - const unsigned char *ref, int ref_stride, - unsigned int *sse) { - int sum; - variance_sse2(src, src_stride, ref, ref_stride, 8, 16, - sse, &sum, vp9_get8x8var_sse2, 8); - return *sse - (((unsigned int)sum * sum) >> 7); -} - -unsigned int vp9_variance16x16_sse2(const unsigned char *src, int src_stride, - const unsigned char *ref, int ref_stride, - unsigned int *sse) { - int sum; - vp9_get16x16var_sse2(src, src_stride, ref, ref_stride, sse, &sum); - return *sse - (((unsigned int)sum * sum) >> 8); -} - -unsigned int vp9_variance32x32_sse2(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, - unsigned int *sse) { - int sum; - variance_sse2(src, src_stride, ref, ref_stride, 32, 32, - sse, &sum, vp9_get16x16var_sse2, 16); - return *sse - (((int64_t)sum * sum) >> 10); -} - -unsigned int vp9_variance32x16_sse2(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, - unsigned int *sse) { - int sum; - variance_sse2(src, src_stride, ref, ref_stride, 32, 16, - sse, &sum, vp9_get16x16var_sse2, 16); - return *sse - (((int64_t)sum * sum) >> 9); -} - -unsigned int vp9_variance16x32_sse2(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, - unsigned int *sse) { - int sum; - variance_sse2(src, src_stride, ref, ref_stride, 16, 32, - sse, &sum, vp9_get16x16var_sse2, 16); - return *sse - (((int64_t)sum * sum) >> 9); -} - -unsigned int vp9_variance64x64_sse2(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, - unsigned int *sse) { - int sum; - variance_sse2(src, src_stride, ref, ref_stride, 64, 64, - sse, &sum, vp9_get16x16var_sse2, 16); - return *sse - (((int64_t)sum * sum) >> 12); -} - -unsigned int vp9_variance64x32_sse2(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, - unsigned int *sse) { - int sum; - variance_sse2(src, src_stride, ref, ref_stride, 64, 32, - sse, &sum, vp9_get16x16var_sse2, 16); - return *sse - (((int64_t)sum * sum) >> 11); -} - -unsigned int vp9_variance32x64_sse2(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, - unsigned int *sse) { - int sum; - variance_sse2(src, src_stride, ref, ref_stride, 32, 64, - sse, &sum, vp9_get16x16var_sse2, 16); - return *sse - (((int64_t)sum * sum) >> 11); -} - -unsigned int vp9_mse8x8_sse2(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, - unsigned int *sse) { - vp9_variance8x8_sse2(src, src_stride, ref, ref_stride, sse); - return *sse; -} - -unsigned int vp9_mse8x16_sse2(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, - unsigned int *sse) { - vp9_variance8x16_sse2(src, src_stride, ref, ref_stride, sse); - return *sse; -} - -unsigned int vp9_mse16x8_sse2(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, - unsigned int *sse) { - vp9_variance16x8_sse2(src, src_stride, ref, ref_stride, sse); - return *sse; -} - -unsigned int vp9_mse16x16_sse2(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, - unsigned int *sse) { - vp9_variance16x16_sse2(src, src_stride, ref, ref_stride, sse); - return *sse; -} - // The 2 unused parameters are place holders for PIC enabled build. #define DECL(w, opt) \ int vp9_sub_pixel_variance##w##xh_##opt(const uint8_t *src, \ diff --git a/media/libvpx/vp9/vp9_cx_iface.c b/media/libvpx/vp9/vp9_cx_iface.c index 0ce37aae7d1..9462be9faf1 100644 --- a/media/libvpx/vp9/vp9_cx_iface.c +++ b/media/libvpx/vp9/vp9_cx_iface.c @@ -176,15 +176,23 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, RANGE_CHECK(cfg, ss_number_layers, 1, VPX_SS_MAX_LAYERS); RANGE_CHECK(cfg, ts_number_layers, 1, VPX_TS_MAX_LAYERS); + if (cfg->ss_number_layers * cfg->ts_number_layers > VPX_MAX_LAYERS) + ERROR("ss_number_layers * ts_number_layers is out of range"); if (cfg->ts_number_layers > 1) { - unsigned int i; - for (i = 1; i < cfg->ts_number_layers; ++i) - if (cfg->ts_target_bitrate[i] < cfg->ts_target_bitrate[i - 1]) + unsigned int sl, tl; + for (sl = 1; sl < cfg->ss_number_layers; ++sl) { + for (tl = 1; tl < cfg->ts_number_layers; ++tl) { + const int layer = + LAYER_IDS_TO_IDX(sl, tl, cfg->ts_number_layers); + if (cfg->layer_target_bitrate[layer] < + cfg->layer_target_bitrate[layer - 1]) ERROR("ts_target_bitrate entries are not increasing"); + } + } RANGE_CHECK(cfg, ts_rate_decimator[cfg->ts_number_layers - 1], 1, 1); - for (i = cfg->ts_number_layers - 2; i > 0; --i) - if (cfg->ts_rate_decimator[i - 1] != 2 * cfg->ts_rate_decimator[i]) + for (tl = cfg->ts_number_layers - 2; tl > 0; --tl) + if (cfg->ts_rate_decimator[tl - 1] != 2 * cfg->ts_rate_decimator[tl]) ERROR("ts_rate_decimator factors are not powers of 2"); } @@ -360,6 +368,7 @@ static vpx_codec_err_t set_encoder_config( const vpx_codec_enc_cfg_t *cfg, const struct vp9_extracfg *extra_cfg) { const int is_vbr = cfg->rc_end_usage == VPX_VBR; + int sl, tl; oxcf->profile = cfg->g_profile; oxcf->max_threads = (int)cfg->g_threads; oxcf->width = cfg->g_w; @@ -460,35 +469,33 @@ static vpx_codec_err_t set_encoder_config( oxcf->frame_periodic_boost = extra_cfg->frame_periodic_boost; oxcf->ss_number_layers = cfg->ss_number_layers; + oxcf->ts_number_layers = cfg->ts_number_layers; + oxcf->temporal_layering_mode = (enum vp9e_temporal_layering_mode) + cfg->temporal_layering_mode; - if (oxcf->ss_number_layers > 1) { - int i; - for (i = 0; i < VPX_SS_MAX_LAYERS; ++i) { - oxcf->ss_target_bitrate[i] = 1000 * cfg->ss_target_bitrate[i]; + for (sl = 0; sl < oxcf->ss_number_layers; ++sl) { #if CONFIG_SPATIAL_SVC - oxcf->ss_enable_auto_arf[i] = cfg->ss_enable_auto_alt_ref[i]; + oxcf->ss_enable_auto_arf[sl] = cfg->ss_enable_auto_alt_ref[sl]; #endif + for (tl = 0; tl < oxcf->ts_number_layers; ++tl) { + oxcf->layer_target_bitrate[sl * oxcf->ts_number_layers + tl] = + 1000 * cfg->layer_target_bitrate[sl * oxcf->ts_number_layers + tl]; } - } else if (oxcf->ss_number_layers == 1) { + } + if (oxcf->ss_number_layers == 1 && oxcf->pass != 0) { oxcf->ss_target_bitrate[0] = (int)oxcf->target_bandwidth; #if CONFIG_SPATIAL_SVC oxcf->ss_enable_auto_arf[0] = extra_cfg->enable_auto_alt_ref; #endif } - - oxcf->ts_number_layers = cfg->ts_number_layers; - if (oxcf->ts_number_layers > 1) { - int i; - for (i = 0; i < VPX_TS_MAX_LAYERS; ++i) { - oxcf->ts_target_bitrate[i] = 1000 * cfg->ts_target_bitrate[i]; - oxcf->ts_rate_decimator[i] = cfg->ts_rate_decimator[i]; + for (tl = 0; tl < VPX_TS_MAX_LAYERS; ++tl) { + oxcf->ts_rate_decimator[tl] = cfg->ts_rate_decimator[tl] ? + cfg->ts_rate_decimator[tl] : 1; } } else if (oxcf->ts_number_layers == 1) { - oxcf->ts_target_bitrate[0] = (int)oxcf->target_bandwidth; oxcf->ts_rate_decimator[0] = 1; } - /* printf("Current VP9 Settings: \n"); printf("target_bandwidth: %d\n", oxcf->target_bandwidth); @@ -902,11 +909,12 @@ static vpx_codec_frame_flags_t get_frame_pkt_flags(const VP9_COMP *cpi, unsigned int lib_flags) { vpx_codec_frame_flags_t flags = lib_flags << 16; - if (lib_flags & FRAMEFLAGS_KEY -#if CONFIG_SPATIAL_SVC - || (is_two_pass_svc(cpi) && cpi->svc.layer_context[0].is_key_frame) -#endif - ) + if (lib_flags & FRAMEFLAGS_KEY || + (cpi->use_svc && + cpi->svc.layer_context[cpi->svc.spatial_layer_id * + cpi->svc.number_temporal_layers + + cpi->svc.temporal_layer_id].is_key_frame) + ) flags |= VPX_FRAME_IS_KEY; if (cpi->droppable) @@ -1022,16 +1030,15 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx, vpx_codec_cx_pkt_t pkt; #if CONFIG_SPATIAL_SVC - if (is_two_pass_svc(cpi)) - cpi->svc.layer_context[cpi->svc.spatial_layer_id].layer_size += size; + if (cpi->use_svc) + cpi->svc.layer_context[cpi->svc.spatial_layer_id * + cpi->svc.number_temporal_layers].layer_size += size; #endif // Pack invisible frames with the next visible frame - if (!cpi->common.show_frame -#if CONFIG_SPATIAL_SVC - || (is_two_pass_svc(cpi) && - cpi->svc.spatial_layer_id < cpi->svc.number_spatial_layers - 1) -#endif + if (!cpi->common.show_frame || + (cpi->use_svc && + cpi->svc.spatial_layer_id < cpi->svc.number_spatial_layers - 1) ) { if (ctx->pending_cx_data == 0) ctx->pending_cx_data = cx_data; @@ -1089,24 +1096,27 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx, pkt.data.frame.partition_id = -1; if(ctx->output_cx_pkt_cb.output_cx_pkt) - ctx->output_cx_pkt_cb.output_cx_pkt(&pkt, ctx->output_cx_pkt_cb.user_priv); + ctx->output_cx_pkt_cb.output_cx_pkt(&pkt, + ctx->output_cx_pkt_cb.user_priv); else vpx_codec_pkt_list_add(&ctx->pkt_list.head, &pkt); cx_data += size; cx_data_sz -= size; +#if VPX_ENCODER_ABI_VERSION > (5 + VPX_CODEC_ABI_VERSION) #if CONFIG_SPATIAL_SVC - if (is_two_pass_svc(cpi) && !ctx->output_cx_pkt_cb.output_cx_pkt) { + if (cpi->use_svc && !ctx->output_cx_pkt_cb.output_cx_pkt) { vpx_codec_cx_pkt_t pkt_sizes, pkt_psnr; - int i; + int sl; vp9_zero(pkt_sizes); vp9_zero(pkt_psnr); pkt_sizes.kind = VPX_CODEC_SPATIAL_SVC_LAYER_SIZES; pkt_psnr.kind = VPX_CODEC_SPATIAL_SVC_LAYER_PSNR; - for (i = 0; i < cpi->svc.number_spatial_layers; ++i) { - LAYER_CONTEXT *lc = &cpi->svc.layer_context[i]; - pkt_sizes.data.layer_sizes[i] = lc->layer_size; - pkt_psnr.data.layer_psnr[i] = lc->psnr_pkt; + for (sl = 0; sl < cpi->svc.number_spatial_layers; ++sl) { + LAYER_CONTEXT *lc = + &cpi->svc.layer_context[sl * cpi->svc.number_temporal_layers]; + pkt_sizes.data.layer_sizes[sl] = lc->layer_size; + pkt_psnr.data.layer_psnr[sl] = lc->psnr_pkt; lc->layer_size = 0; } @@ -1115,6 +1125,12 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx, vpx_codec_pkt_list_add(&ctx->pkt_list.head, &pkt_psnr); } #endif +#endif + if (is_one_pass_cbr_svc(cpi) && + (cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1)) { + // Encoded all spatial layers; exit loop. + break; + } } } } @@ -1260,6 +1276,21 @@ static vpx_codec_err_t ctrl_set_active_map(vpx_codec_alg_priv_t *ctx, } } +static vpx_codec_err_t ctrl_get_active_map(vpx_codec_alg_priv_t *ctx, + va_list args) { + vpx_active_map_t *const map = va_arg(args, vpx_active_map_t *); + + if (map) { + if (!vp9_get_active_map(ctx->cpi, map->active_map, + (int)map->rows, (int)map->cols)) + return VPX_CODEC_OK; + else + return VPX_CODEC_INVALID_PARAM; + } else { + return VPX_CODEC_INVALID_PARAM; + } +} + static vpx_codec_err_t ctrl_set_scale_mode(vpx_codec_alg_priv_t *ctx, va_list args) { vpx_scaling_mode_t *const mode = va_arg(args, vpx_scaling_mode_t *); @@ -1277,16 +1308,20 @@ static vpx_codec_err_t ctrl_set_scale_mode(vpx_codec_alg_priv_t *ctx, static vpx_codec_err_t ctrl_set_svc(vpx_codec_alg_priv_t *ctx, va_list args) { int data = va_arg(args, int); const vpx_codec_enc_cfg_t *cfg = &ctx->cfg; + // Both one-pass and two-pass RC are supported now. + // User setting this has to make sure of the following. + // In two-pass setting: either (but not both) + // cfg->ss_number_layers > 1, or cfg->ts_number_layers > 1 + // In one-pass setting: + // either or both cfg->ss_number_layers > 1, or cfg->ts_number_layers > 1 vp9_set_svc(ctx->cpi, data); - // CBR or two pass mode for SVC with both temporal and spatial layers - // not yet supported. + if (data == 1 && - (cfg->rc_end_usage == VPX_CBR || - cfg->g_pass == VPX_RC_FIRST_PASS || + (cfg->g_pass == VPX_RC_FIRST_PASS || cfg->g_pass == VPX_RC_LAST_PASS) && - cfg->ss_number_layers > 1 && - cfg->ts_number_layers > 1) { + cfg->ss_number_layers > 1 && + cfg->ts_number_layers > 1) { return VPX_CODEC_INVALID_PARAM; } return VPX_CODEC_OK; @@ -1298,9 +1333,7 @@ static vpx_codec_err_t ctrl_set_svc_layer_id(vpx_codec_alg_priv_t *ctx, VP9_COMP *const cpi = (VP9_COMP *)ctx->cpi; SVC *const svc = &cpi->svc; -#if VPX_ENCODER_ABI_VERSION > (4 + VPX_CODEC_ABI_VERSION) svc->spatial_layer_id = data->spatial_layer_id; -#endif svc->temporal_layer_id = data->temporal_layer_id; // Checks on valid layer_id input. if (svc->temporal_layer_id < 0 || @@ -1320,9 +1353,7 @@ static vpx_codec_err_t ctrl_get_svc_layer_id(vpx_codec_alg_priv_t *ctx, VP9_COMP *const cpi = (VP9_COMP *)ctx->cpi; SVC *const svc = &cpi->svc; -#if VPX_ENCODER_ABI_VERSION > (4 + VPX_CODEC_ABI_VERSION) data->spatial_layer_id = svc->spatial_layer_id; -#endif data->temporal_layer_id = svc->temporal_layer_id; return VPX_CODEC_OK; @@ -1332,15 +1363,21 @@ static vpx_codec_err_t ctrl_set_svc_parameters(vpx_codec_alg_priv_t *ctx, va_list args) { VP9_COMP *const cpi = ctx->cpi; vpx_svc_extra_cfg_t *const params = va_arg(args, vpx_svc_extra_cfg_t *); - int i; + int sl, tl; - for (i = 0; i < cpi->svc.number_spatial_layers; ++i) { - LAYER_CONTEXT *lc = &cpi->svc.layer_context[i]; - - lc->max_q = params->max_quantizers[i]; - lc->min_q = params->min_quantizers[i]; - lc->scaling_factor_num = params->scaling_factor_num[i]; - lc->scaling_factor_den = params->scaling_factor_den[i]; + // Number of temporal layers and number of spatial layers have to be set + // properly before calling this control function. + for (sl = 0; sl < cpi->svc.number_spatial_layers; ++sl) { + for (tl = 0; tl < cpi->svc.number_temporal_layers; ++tl) { + const int layer = + LAYER_IDS_TO_IDX(sl, tl, cpi->svc.number_temporal_layers); + LAYER_CONTEXT *lc = + &cpi->svc.layer_context[layer]; + lc->max_q = params->max_quantizers[sl]; + lc->min_q = params->min_quantizers[sl]; + lc->scaling_factor_num = params->scaling_factor_num[sl]; + lc->scaling_factor_den = params->scaling_factor_den[sl]; + } } return VPX_CODEC_OK; @@ -1401,10 +1438,8 @@ static vpx_codec_ctrl_fn_map_t encoder_ctrl_maps[] = { {VP9E_SET_AQ_MODE, ctrl_set_aq_mode}, {VP9E_SET_FRAME_PERIODIC_BOOST, ctrl_set_frame_periodic_boost}, {VP9E_SET_SVC, ctrl_set_svc}, -#if VPX_ENCODER_ABI_VERSION > (4 + VPX_CODEC_ABI_VERSION) {VP9E_SET_SVC_PARAMETERS, ctrl_set_svc_parameters}, {VP9E_REGISTER_CX_CALLBACK, ctrl_register_cx_callback}, -#endif {VP9E_SET_SVC_LAYER_ID, ctrl_set_svc_layer_id}, {VP9E_SET_TUNE_CONTENT, ctrl_set_tune_content}, {VP9E_SET_COLOR_SPACE, ctrl_set_color_space}, @@ -1414,9 +1449,8 @@ static vpx_codec_ctrl_fn_map_t encoder_ctrl_maps[] = { {VP8E_GET_LAST_QUANTIZER, ctrl_get_quantizer}, {VP8E_GET_LAST_QUANTIZER_64, ctrl_get_quantizer64}, {VP9_GET_REFERENCE, ctrl_get_reference}, -#if VPX_ENCODER_ABI_VERSION > (4 + VPX_CODEC_ABI_VERSION) {VP9E_GET_SVC_LAYER_ID, ctrl_get_svc_layer_id}, -#endif + {VP9E_GET_ACTIVEMAP, ctrl_get_active_map}, { -1, NULL}, }; @@ -1479,6 +1513,8 @@ static vpx_codec_enc_cfg_map_t encoder_usage_cfg_map[] = { {0}, // ts_rate_decimator 0, // ts_periodicity {0}, // ts_layer_id + {0}, // layer_taget_bitrate + 0 // temporal_layering_mode } }, }; diff --git a/media/libvpx/vp9/vp9_dx_iface.c b/media/libvpx/vp9/vp9_dx_iface.c index 9e4c1a5c4b6..4080d64c170 100644 --- a/media/libvpx/vp9/vp9_dx_iface.c +++ b/media/libvpx/vp9/vp9_dx_iface.c @@ -55,6 +55,7 @@ struct vpx_codec_alg_priv { int invert_tile_order; int last_show_frame; // Index of last output frame. int byte_alignment; + int skip_loop_filter; // Frame parallel related. int frame_parallel_decode; // frame-based threading. @@ -116,6 +117,9 @@ static vpx_codec_err_t decoder_destroy(vpx_codec_alg_priv_t *ctx) { (FrameWorkerData *)worker->data1; vp9_get_worker_interface()->end(worker); vp9_remove_common(&frame_worker_data->pbi->common); +#if CONFIG_VP9_POSTPROC + vp9_free_postproc_buffers(&frame_worker_data->pbi->common); +#endif vp9_decoder_remove(frame_worker_data->pbi); vpx_free(frame_worker_data->scratch_buffer); #if CONFIG_MULTITHREAD @@ -129,8 +133,10 @@ static vpx_codec_err_t decoder_destroy(vpx_codec_alg_priv_t *ctx) { #endif } - if (ctx->buffer_pool) + if (ctx->buffer_pool) { + vp9_free_ref_frame_buffers(ctx->buffer_pool); vp9_free_internal_frame_buffers(&ctx->buffer_pool->int_frame_buffers); + } vpx_free(ctx->frame_workers); vpx_free(ctx->buffer_pool); @@ -280,6 +286,7 @@ static void init_buffer_callbacks(vpx_codec_alg_priv_t *ctx) { cm->new_fb_idx = INVALID_IDX; cm->byte_alignment = ctx->byte_alignment; + cm->skip_loop_filter = ctx->skip_loop_filter; if (ctx->get_ext_fb_cb != NULL && ctx->release_ext_fb_cb != NULL) { pool->get_fb_cb = ctx->get_ext_fb_cb; @@ -457,7 +464,6 @@ static INLINE void check_resync(vpx_codec_alg_priv_t *const ctx, static vpx_codec_err_t decode_one(vpx_codec_alg_priv_t *ctx, const uint8_t **data, unsigned int data_sz, void *user_priv, int64_t deadline) { - vp9_ppflags_t flags = {0, 0, 0}; const VP9WorkerInterface *const winterface = vp9_get_worker_interface(); (void)deadline; @@ -523,7 +529,7 @@ static vpx_codec_err_t decode_one(vpx_codec_alg_priv_t *ctx, frame_worker_data->scratch_buffer_size = data_sz; } frame_worker_data->data_size = data_sz; - vpx_memcpy(frame_worker_data->scratch_buffer, *data, data_sz); + memcpy(frame_worker_data->scratch_buffer, *data, data_sz); frame_worker_data->frame_decoded = 0; frame_worker_data->frame_context_ready = 0; @@ -542,9 +548,6 @@ static vpx_codec_err_t decode_one(vpx_codec_alg_priv_t *ctx, winterface->launch(worker); } - if (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC) - set_ppflags(ctx, &flags); - return VPX_CODEC_OK; } @@ -750,6 +753,8 @@ static vpx_image_t *decoder_get_frame(vpx_codec_alg_priv_t *ctx, (FrameWorkerData *)worker->data1; ctx->next_output_worker_id = (ctx->next_output_worker_id + 1) % ctx->num_frame_workers; + if (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC) + set_ppflags(ctx, &flags); // Wait for the frame from worker thread. if (winterface->sync(worker)) { // Check if worker has received any frames. @@ -934,7 +939,8 @@ static vpx_codec_err_t ctrl_get_frame_corrupted(vpx_codec_alg_priv_t *ctx, frame_worker_data->pbi->common.buffer_pool->frame_bufs; if (frame_worker_data->pbi->common.frame_to_show == NULL) return VPX_CODEC_ERROR; - *corrupted = frame_bufs[ctx->last_show_frame].buf.corrupted; + if (ctx->last_show_frame >= 0) + *corrupted = frame_bufs[ctx->last_show_frame].buf.corrupted; return VPX_CODEC_OK; } else { return VPX_CODEC_ERROR; @@ -1055,6 +1061,19 @@ static vpx_codec_err_t ctrl_set_byte_alignment(vpx_codec_alg_priv_t *ctx, return VPX_CODEC_OK; } +static vpx_codec_err_t ctrl_set_skip_loop_filter(vpx_codec_alg_priv_t *ctx, + va_list args) { + ctx->skip_loop_filter = va_arg(args, int); + + if (ctx->frame_workers) { + VP9Worker *const worker = ctx->frame_workers; + FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1; + frame_worker_data->pbi->common.skip_loop_filter = ctx->skip_loop_filter; + } + + return VPX_CODEC_OK; +} + static vpx_codec_ctrl_fn_map_t decoder_ctrl_maps[] = { {VP8_COPY_REFERENCE, ctrl_copy_reference}, @@ -1068,6 +1087,7 @@ static vpx_codec_ctrl_fn_map_t decoder_ctrl_maps[] = { {VP9_INVERT_TILE_DECODE_ORDER, ctrl_set_invert_tile_order}, {VPXD_SET_DECRYPTOR, ctrl_set_decryptor}, {VP9_SET_BYTE_ALIGNMENT, ctrl_set_byte_alignment}, + {VP9_SET_SKIP_LOOP_FILTER, ctrl_set_skip_loop_filter}, // Getters {VP8D_GET_LAST_REF_UPDATES, ctrl_get_last_ref_updates}, diff --git a/media/libvpx/vp9/vp9_iface_common.h b/media/libvpx/vp9/vp9_iface_common.h index e585aa14725..58bb7d5d648 100644 --- a/media/libvpx/vp9/vp9_iface_common.h +++ b/media/libvpx/vp9/vp9_iface_common.h @@ -10,6 +10,8 @@ #ifndef VP9_VP9_IFACE_COMMON_H_ #define VP9_VP9_IFACE_COMMON_H_ +#include "vpx_ports/mem.h" + static void yuvconfig2image(vpx_image_t *img, const YV12_BUFFER_CONFIG *yv12, void *user_priv) { /** vpx_img_wrap() doesn't allow specifying independent strides for diff --git a/media/libvpx/vp9_rtcd_armv7-android-gcc.h b/media/libvpx/vp9_rtcd_armv7-android-gcc.h index 9fe585d160f..dbec16dbb5a 100644 --- a/media/libvpx/vp9_rtcd_armv7-android-gcc.h +++ b/media/libvpx/vp9_rtcd_armv7-android-gcc.h @@ -39,6 +39,9 @@ RTCD_EXTERN unsigned int (*vp9_avg_8x8)(const uint8_t *, int p); int64_t vp9_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); #define vp9_block_error vp9_block_error_c +int64_t vp9_block_error_fp_c(const int16_t *coeff, const int16_t *dqcoeff, int block_size); +#define vp9_block_error_fp vp9_block_error_fp_c + void vp9_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve8_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); RTCD_EXTERN void (*vp9_convolve8)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); @@ -144,7 +147,8 @@ void vp9_d63_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *ab #define vp9_d63_predictor_8x8 vp9_d63_predictor_8x8_c void vp9_dc_128_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_128_predictor_16x16 vp9_dc_128_predictor_16x16_c +void vp9_dc_128_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vp9_dc_128_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vp9_dc_128_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vp9_dc_128_predictor_32x32 vp9_dc_128_predictor_32x32_c @@ -153,10 +157,12 @@ void vp9_dc_128_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t #define vp9_dc_128_predictor_4x4 vp9_dc_128_predictor_4x4_c void vp9_dc_128_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_128_predictor_8x8 vp9_dc_128_predictor_8x8_c +void vp9_dc_128_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vp9_dc_128_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vp9_dc_left_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_left_predictor_16x16 vp9_dc_left_predictor_16x16_c +void vp9_dc_left_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vp9_dc_left_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vp9_dc_left_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vp9_dc_left_predictor_32x32 vp9_dc_left_predictor_32x32_c @@ -165,10 +171,12 @@ void vp9_dc_left_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t #define vp9_dc_left_predictor_4x4 vp9_dc_left_predictor_4x4_c void vp9_dc_left_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_left_predictor_8x8 vp9_dc_left_predictor_8x8_c +void vp9_dc_left_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vp9_dc_left_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vp9_dc_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_predictor_16x16 vp9_dc_predictor_16x16_c +void vp9_dc_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vp9_dc_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vp9_dc_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vp9_dc_predictor_32x32 vp9_dc_predictor_32x32_c @@ -177,10 +185,12 @@ void vp9_dc_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *abo #define vp9_dc_predictor_4x4 vp9_dc_predictor_4x4_c void vp9_dc_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_predictor_8x8 vp9_dc_predictor_8x8_c +void vp9_dc_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vp9_dc_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vp9_dc_top_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_top_predictor_16x16 vp9_dc_top_predictor_16x16_c +void vp9_dc_top_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vp9_dc_top_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vp9_dc_top_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vp9_dc_top_predictor_32x32 vp9_dc_top_predictor_32x32_c @@ -189,7 +199,8 @@ void vp9_dc_top_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t #define vp9_dc_top_predictor_4x4 vp9_dc_top_predictor_4x4_c void vp9_dc_top_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_top_predictor_8x8 vp9_dc_top_predictor_8x8_c +void vp9_dc_top_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vp9_dc_top_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); int vp9_diamond_search_sad_c(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv); #define vp9_diamond_search_sad vp9_diamond_search_sad_c @@ -245,17 +256,6 @@ int vp9_full_search_sad_c(const struct macroblock *x, const struct mv *ref_mv, i void vp9_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride); #define vp9_fwht4x4 vp9_fwht4x4_c -void vp9_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); -void vp9_get16x16var_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); -RTCD_EXTERN void (*vp9_get16x16var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); - -void vp9_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); -void vp9_get8x8var_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); -RTCD_EXTERN void (*vp9_get8x8var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); - -unsigned int vp9_get_mb_ss_c(const int16_t *); -#define vp9_get_mb_ss vp9_get_mb_ss_c - void vp9_h_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vp9_h_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); RTCD_EXTERN void (*vp9_h_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -272,6 +272,12 @@ void vp9_h_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *abov void vp9_h_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); RTCD_EXTERN void (*vp9_h_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vp9_hadamard_16x16_c(int16_t const *src_diff, int src_stride, int16_t *coeff); +#define vp9_hadamard_16x16 vp9_hadamard_16x16_c + +void vp9_hadamard_8x8_c(int16_t const *src_diff, int src_stride, int16_t *coeff); +#define vp9_hadamard_8x8 vp9_hadamard_8x8_c + void vp9_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); void vp9_idct16x16_10_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride); RTCD_EXTERN void (*vp9_idct16x16_10_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); @@ -383,17 +389,8 @@ void vp9_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, co void vp9_lpf_vertical_8_dual_neon(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); RTCD_EXTERN void (*vp9_lpf_vertical_8_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -unsigned int vp9_mse16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -#define vp9_mse16x16 vp9_mse16x16_c - -unsigned int vp9_mse16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -#define vp9_mse16x8 vp9_mse16x8_c - -unsigned int vp9_mse8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -#define vp9_mse8x16 vp9_mse8x16_c - -unsigned int vp9_mse8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -#define vp9_mse8x8 vp9_mse8x8_c +void vp9_minmax_8x8_c(const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max); +#define vp9_minmax_8x8 vp9_minmax_8x8_c void vp9_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); #define vp9_quantize_b vp9_quantize_b_c @@ -408,177 +405,8 @@ RTCD_EXTERN void (*vp9_quantize_fp)(const tran_low_t *coeff_ptr, intptr_t n_coef void vp9_quantize_fp_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); #define vp9_quantize_fp_32x32 vp9_quantize_fp_32x32_c -unsigned int vp9_sad16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad16x16_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -RTCD_EXTERN unsigned int (*vp9_sad16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); - -unsigned int vp9_sad16x16_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad16x16_avg vp9_sad16x16_avg_c - -void vp9_sad16x16x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -#define vp9_sad16x16x3 vp9_sad16x16x3_c - -void vp9_sad16x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad16x16x4d_neon(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad16x16x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp9_sad16x16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad16x16x8 vp9_sad16x16x8_c - -unsigned int vp9_sad16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad16x32 vp9_sad16x32_c - -unsigned int vp9_sad16x32_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad16x32_avg vp9_sad16x32_avg_c - -void vp9_sad16x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp9_sad16x32x4d vp9_sad16x32x4d_c - -unsigned int vp9_sad16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad16x8 vp9_sad16x8_c - -unsigned int vp9_sad16x8_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad16x8_avg vp9_sad16x8_avg_c - -void vp9_sad16x8x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -#define vp9_sad16x8x3 vp9_sad16x8x3_c - -void vp9_sad16x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp9_sad16x8x4d vp9_sad16x8x4d_c - -void vp9_sad16x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad16x8x8 vp9_sad16x8x8_c - -unsigned int vp9_sad32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad32x16 vp9_sad32x16_c - -unsigned int vp9_sad32x16_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad32x16_avg vp9_sad32x16_avg_c - -void vp9_sad32x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp9_sad32x16x4d vp9_sad32x16x4d_c - -unsigned int vp9_sad32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad32x32_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -RTCD_EXTERN unsigned int (*vp9_sad32x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); - -unsigned int vp9_sad32x32_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad32x32_avg vp9_sad32x32_avg_c - -void vp9_sad32x32x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -#define vp9_sad32x32x3 vp9_sad32x32x3_c - -void vp9_sad32x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad32x32x4d_neon(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad32x32x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp9_sad32x32x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad32x32x8 vp9_sad32x32x8_c - -unsigned int vp9_sad32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad32x64 vp9_sad32x64_c - -unsigned int vp9_sad32x64_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad32x64_avg vp9_sad32x64_avg_c - -void vp9_sad32x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp9_sad32x64x4d vp9_sad32x64x4d_c - -unsigned int vp9_sad4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad4x4 vp9_sad4x4_c - -unsigned int vp9_sad4x4_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad4x4_avg vp9_sad4x4_avg_c - -void vp9_sad4x4x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -#define vp9_sad4x4x3 vp9_sad4x4x3_c - -void vp9_sad4x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp9_sad4x4x4d vp9_sad4x4x4d_c - -void vp9_sad4x4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad4x4x8 vp9_sad4x4x8_c - -unsigned int vp9_sad4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad4x8 vp9_sad4x8_c - -unsigned int vp9_sad4x8_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad4x8_avg vp9_sad4x8_avg_c - -void vp9_sad4x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp9_sad4x8x4d vp9_sad4x8x4d_c - -void vp9_sad4x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad4x8x8 vp9_sad4x8x8_c - -unsigned int vp9_sad64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad64x32 vp9_sad64x32_c - -unsigned int vp9_sad64x32_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad64x32_avg vp9_sad64x32_avg_c - -void vp9_sad64x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp9_sad64x32x4d vp9_sad64x32x4d_c - -unsigned int vp9_sad64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad64x64_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -RTCD_EXTERN unsigned int (*vp9_sad64x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); - -unsigned int vp9_sad64x64_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad64x64_avg vp9_sad64x64_avg_c - -void vp9_sad64x64x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -#define vp9_sad64x64x3 vp9_sad64x64x3_c - -void vp9_sad64x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad64x64x4d_neon(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad64x64x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp9_sad64x64x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad64x64x8 vp9_sad64x64x8_c - -unsigned int vp9_sad8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad8x16 vp9_sad8x16_c - -unsigned int vp9_sad8x16_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad8x16_avg vp9_sad8x16_avg_c - -void vp9_sad8x16x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -#define vp9_sad8x16x3 vp9_sad8x16x3_c - -void vp9_sad8x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp9_sad8x16x4d vp9_sad8x16x4d_c - -void vp9_sad8x16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad8x16x8 vp9_sad8x16x8_c - -unsigned int vp9_sad8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad8x4 vp9_sad8x4_c - -unsigned int vp9_sad8x4_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad8x4_avg vp9_sad8x4_avg_c - -void vp9_sad8x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp9_sad8x4x4d vp9_sad8x4x4d_c - -void vp9_sad8x4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad8x4x8 vp9_sad8x4x8_c - -unsigned int vp9_sad8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad8x8_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -RTCD_EXTERN unsigned int (*vp9_sad8x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); - -unsigned int vp9_sad8x8_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad8x8_avg vp9_sad8x8_avg_c - -void vp9_sad8x8x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -#define vp9_sad8x8x3 vp9_sad8x8x3_c - -void vp9_sad8x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp9_sad8x8x4d vp9_sad8x8x4d_c - -void vp9_sad8x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad8x8x8 vp9_sad8x8x8_c +int16_t vp9_satd_c(const int16_t *coeff, int length); +#define vp9_satd vp9_satd_c unsigned int vp9_sub_pixel_avg_variance16x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); #define vp9_sub_pixel_avg_variance16x16 vp9_sub_pixel_avg_variance16x16_c @@ -701,51 +529,6 @@ void vp9_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *abov void vp9_v_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); RTCD_EXTERN void (*vp9_v_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -unsigned int vp9_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance16x16_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_variance16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp9_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_variance16x32 vp9_variance16x32_c - -unsigned int vp9_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_variance16x8 vp9_variance16x8_c - -unsigned int vp9_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_variance32x16 vp9_variance32x16_c - -unsigned int vp9_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance32x32_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_variance32x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp9_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance32x64_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_variance32x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp9_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_variance4x4 vp9_variance4x4_c - -unsigned int vp9_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_variance4x8 vp9_variance4x8_c - -unsigned int vp9_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance64x32_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_variance64x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp9_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance64x64_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_variance64x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp9_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_variance8x16 vp9_variance8x16_c - -unsigned int vp9_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_variance8x4 vp9_variance8x4_c - -unsigned int vp9_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance8x8_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_variance8x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); - int vp9_vector_var_c(int16_t const *ref, int16_t const *src, const int bwl); #define vp9_vector_var vp9_vector_var_c @@ -779,16 +562,28 @@ static void setup_rtcd_internal(void) if (flags & HAS_NEON) vp9_convolve_avg = vp9_convolve_avg_neon; vp9_convolve_copy = vp9_convolve_copy_c; if (flags & HAS_NEON) vp9_convolve_copy = vp9_convolve_copy_neon; + vp9_dc_128_predictor_16x16 = vp9_dc_128_predictor_16x16_c; + if (flags & HAS_NEON) vp9_dc_128_predictor_16x16 = vp9_dc_128_predictor_16x16_neon; + vp9_dc_128_predictor_8x8 = vp9_dc_128_predictor_8x8_c; + if (flags & HAS_NEON) vp9_dc_128_predictor_8x8 = vp9_dc_128_predictor_8x8_neon; + vp9_dc_left_predictor_16x16 = vp9_dc_left_predictor_16x16_c; + if (flags & HAS_NEON) vp9_dc_left_predictor_16x16 = vp9_dc_left_predictor_16x16_neon; + vp9_dc_left_predictor_8x8 = vp9_dc_left_predictor_8x8_c; + if (flags & HAS_NEON) vp9_dc_left_predictor_8x8 = vp9_dc_left_predictor_8x8_neon; + vp9_dc_predictor_16x16 = vp9_dc_predictor_16x16_c; + if (flags & HAS_NEON) vp9_dc_predictor_16x16 = vp9_dc_predictor_16x16_neon; + vp9_dc_predictor_8x8 = vp9_dc_predictor_8x8_c; + if (flags & HAS_NEON) vp9_dc_predictor_8x8 = vp9_dc_predictor_8x8_neon; + vp9_dc_top_predictor_16x16 = vp9_dc_top_predictor_16x16_c; + if (flags & HAS_NEON) vp9_dc_top_predictor_16x16 = vp9_dc_top_predictor_16x16_neon; + vp9_dc_top_predictor_8x8 = vp9_dc_top_predictor_8x8_c; + if (flags & HAS_NEON) vp9_dc_top_predictor_8x8 = vp9_dc_top_predictor_8x8_neon; vp9_fdct8x8 = vp9_fdct8x8_c; if (flags & HAS_NEON) vp9_fdct8x8 = vp9_fdct8x8_neon; vp9_fdct8x8_1 = vp9_fdct8x8_1_c; if (flags & HAS_NEON) vp9_fdct8x8_1 = vp9_fdct8x8_1_neon; vp9_fdct8x8_quant = vp9_fdct8x8_quant_c; if (flags & HAS_NEON) vp9_fdct8x8_quant = vp9_fdct8x8_quant_neon; - vp9_get16x16var = vp9_get16x16var_c; - if (flags & HAS_NEON) vp9_get16x16var = vp9_get16x16var_neon; - vp9_get8x8var = vp9_get8x8var_c; - if (flags & HAS_NEON) vp9_get8x8var = vp9_get8x8var_neon; vp9_h_predictor_16x16 = vp9_h_predictor_16x16_c; if (flags & HAS_NEON) vp9_h_predictor_16x16 = vp9_h_predictor_16x16_neon; vp9_h_predictor_32x32 = vp9_h_predictor_32x32_c; @@ -847,20 +642,6 @@ static void setup_rtcd_internal(void) if (flags & HAS_NEON) vp9_lpf_vertical_8_dual = vp9_lpf_vertical_8_dual_neon; vp9_quantize_fp = vp9_quantize_fp_c; if (flags & HAS_NEON) vp9_quantize_fp = vp9_quantize_fp_neon; - vp9_sad16x16 = vp9_sad16x16_c; - if (flags & HAS_NEON) vp9_sad16x16 = vp9_sad16x16_neon; - vp9_sad16x16x4d = vp9_sad16x16x4d_c; - if (flags & HAS_NEON) vp9_sad16x16x4d = vp9_sad16x16x4d_neon; - vp9_sad32x32 = vp9_sad32x32_c; - if (flags & HAS_NEON) vp9_sad32x32 = vp9_sad32x32_neon; - vp9_sad32x32x4d = vp9_sad32x32x4d_c; - if (flags & HAS_NEON) vp9_sad32x32x4d = vp9_sad32x32x4d_neon; - vp9_sad64x64 = vp9_sad64x64_c; - if (flags & HAS_NEON) vp9_sad64x64 = vp9_sad64x64_neon; - vp9_sad64x64x4d = vp9_sad64x64x4d_c; - if (flags & HAS_NEON) vp9_sad64x64x4d = vp9_sad64x64x4d_neon; - vp9_sad8x8 = vp9_sad8x8_c; - if (flags & HAS_NEON) vp9_sad8x8 = vp9_sad8x8_neon; vp9_sub_pixel_variance16x16 = vp9_sub_pixel_variance16x16_c; if (flags & HAS_NEON) vp9_sub_pixel_variance16x16 = vp9_sub_pixel_variance16x16_neon; vp9_sub_pixel_variance32x32 = vp9_sub_pixel_variance32x32_c; @@ -887,18 +668,6 @@ static void setup_rtcd_internal(void) if (flags & HAS_NEON) vp9_v_predictor_4x4 = vp9_v_predictor_4x4_neon; vp9_v_predictor_8x8 = vp9_v_predictor_8x8_c; if (flags & HAS_NEON) vp9_v_predictor_8x8 = vp9_v_predictor_8x8_neon; - vp9_variance16x16 = vp9_variance16x16_c; - if (flags & HAS_NEON) vp9_variance16x16 = vp9_variance16x16_neon; - vp9_variance32x32 = vp9_variance32x32_c; - if (flags & HAS_NEON) vp9_variance32x32 = vp9_variance32x32_neon; - vp9_variance32x64 = vp9_variance32x64_c; - if (flags & HAS_NEON) vp9_variance32x64 = vp9_variance32x64_neon; - vp9_variance64x32 = vp9_variance64x32_c; - if (flags & HAS_NEON) vp9_variance64x32 = vp9_variance64x32_neon; - vp9_variance64x64 = vp9_variance64x64_c; - if (flags & HAS_NEON) vp9_variance64x64 = vp9_variance64x64_neon; - vp9_variance8x8 = vp9_variance8x8_c; - if (flags & HAS_NEON) vp9_variance8x8 = vp9_variance8x8_neon; } #endif diff --git a/media/libvpx/vp9_rtcd_generic-gnu.h b/media/libvpx/vp9_rtcd_generic-gnu.h index 9d356e09826..aa8f6676fdb 100644 --- a/media/libvpx/vp9_rtcd_generic-gnu.h +++ b/media/libvpx/vp9_rtcd_generic-gnu.h @@ -38,6 +38,9 @@ unsigned int vp9_avg_8x8_c(const uint8_t *, int p); int64_t vp9_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); #define vp9_block_error vp9_block_error_c +int64_t vp9_block_error_fp_c(const int16_t *coeff, const int16_t *dqcoeff, int block_size); +#define vp9_block_error_fp vp9_block_error_fp_c + void vp9_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); #define vp9_convolve8 vp9_convolve8_c @@ -233,15 +236,6 @@ int vp9_full_search_sad_c(const struct macroblock *x, const struct mv *ref_mv, i void vp9_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride); #define vp9_fwht4x4 vp9_fwht4x4_c -void vp9_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); -#define vp9_get16x16var vp9_get16x16var_c - -void vp9_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); -#define vp9_get8x8var vp9_get8x8var_c - -unsigned int vp9_get_mb_ss_c(const int16_t *); -#define vp9_get_mb_ss vp9_get_mb_ss_c - void vp9_h_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vp9_h_predictor_16x16 vp9_h_predictor_16x16_c @@ -254,6 +248,12 @@ void vp9_h_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *abov void vp9_h_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vp9_h_predictor_8x8 vp9_h_predictor_8x8_c +void vp9_hadamard_16x16_c(int16_t const *src_diff, int src_stride, int16_t *coeff); +#define vp9_hadamard_16x16 vp9_hadamard_16x16_c + +void vp9_hadamard_8x8_c(int16_t const *src_diff, int src_stride, int16_t *coeff); +#define vp9_hadamard_8x8 vp9_hadamard_8x8_c + void vp9_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); #define vp9_idct16x16_10_add vp9_idct16x16_10_add_c @@ -341,17 +341,8 @@ void vp9_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const ui void vp9_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); #define vp9_lpf_vertical_8_dual vp9_lpf_vertical_8_dual_c -unsigned int vp9_mse16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -#define vp9_mse16x16 vp9_mse16x16_c - -unsigned int vp9_mse16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -#define vp9_mse16x8 vp9_mse16x8_c - -unsigned int vp9_mse8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -#define vp9_mse8x16 vp9_mse8x16_c - -unsigned int vp9_mse8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -#define vp9_mse8x8 vp9_mse8x8_c +void vp9_minmax_8x8_c(const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max); +#define vp9_minmax_8x8 vp9_minmax_8x8_c void vp9_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); #define vp9_quantize_b vp9_quantize_b_c @@ -365,170 +356,8 @@ void vp9_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_ void vp9_quantize_fp_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); #define vp9_quantize_fp_32x32 vp9_quantize_fp_32x32_c -unsigned int vp9_sad16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad16x16 vp9_sad16x16_c - -unsigned int vp9_sad16x16_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad16x16_avg vp9_sad16x16_avg_c - -void vp9_sad16x16x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -#define vp9_sad16x16x3 vp9_sad16x16x3_c - -void vp9_sad16x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp9_sad16x16x4d vp9_sad16x16x4d_c - -void vp9_sad16x16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad16x16x8 vp9_sad16x16x8_c - -unsigned int vp9_sad16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad16x32 vp9_sad16x32_c - -unsigned int vp9_sad16x32_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad16x32_avg vp9_sad16x32_avg_c - -void vp9_sad16x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp9_sad16x32x4d vp9_sad16x32x4d_c - -unsigned int vp9_sad16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad16x8 vp9_sad16x8_c - -unsigned int vp9_sad16x8_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad16x8_avg vp9_sad16x8_avg_c - -void vp9_sad16x8x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -#define vp9_sad16x8x3 vp9_sad16x8x3_c - -void vp9_sad16x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp9_sad16x8x4d vp9_sad16x8x4d_c - -void vp9_sad16x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad16x8x8 vp9_sad16x8x8_c - -unsigned int vp9_sad32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad32x16 vp9_sad32x16_c - -unsigned int vp9_sad32x16_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad32x16_avg vp9_sad32x16_avg_c - -void vp9_sad32x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp9_sad32x16x4d vp9_sad32x16x4d_c - -unsigned int vp9_sad32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad32x32 vp9_sad32x32_c - -unsigned int vp9_sad32x32_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad32x32_avg vp9_sad32x32_avg_c - -void vp9_sad32x32x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -#define vp9_sad32x32x3 vp9_sad32x32x3_c - -void vp9_sad32x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp9_sad32x32x4d vp9_sad32x32x4d_c - -void vp9_sad32x32x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad32x32x8 vp9_sad32x32x8_c - -unsigned int vp9_sad32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad32x64 vp9_sad32x64_c - -unsigned int vp9_sad32x64_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad32x64_avg vp9_sad32x64_avg_c - -void vp9_sad32x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp9_sad32x64x4d vp9_sad32x64x4d_c - -unsigned int vp9_sad4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad4x4 vp9_sad4x4_c - -unsigned int vp9_sad4x4_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad4x4_avg vp9_sad4x4_avg_c - -void vp9_sad4x4x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -#define vp9_sad4x4x3 vp9_sad4x4x3_c - -void vp9_sad4x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp9_sad4x4x4d vp9_sad4x4x4d_c - -void vp9_sad4x4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad4x4x8 vp9_sad4x4x8_c - -unsigned int vp9_sad4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad4x8 vp9_sad4x8_c - -unsigned int vp9_sad4x8_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad4x8_avg vp9_sad4x8_avg_c - -void vp9_sad4x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp9_sad4x8x4d vp9_sad4x8x4d_c - -void vp9_sad4x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad4x8x8 vp9_sad4x8x8_c - -unsigned int vp9_sad64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad64x32 vp9_sad64x32_c - -unsigned int vp9_sad64x32_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad64x32_avg vp9_sad64x32_avg_c - -void vp9_sad64x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp9_sad64x32x4d vp9_sad64x32x4d_c - -unsigned int vp9_sad64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad64x64 vp9_sad64x64_c - -unsigned int vp9_sad64x64_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad64x64_avg vp9_sad64x64_avg_c - -void vp9_sad64x64x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -#define vp9_sad64x64x3 vp9_sad64x64x3_c - -void vp9_sad64x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp9_sad64x64x4d vp9_sad64x64x4d_c - -void vp9_sad64x64x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad64x64x8 vp9_sad64x64x8_c - -unsigned int vp9_sad8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad8x16 vp9_sad8x16_c - -unsigned int vp9_sad8x16_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad8x16_avg vp9_sad8x16_avg_c - -void vp9_sad8x16x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -#define vp9_sad8x16x3 vp9_sad8x16x3_c - -void vp9_sad8x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp9_sad8x16x4d vp9_sad8x16x4d_c - -void vp9_sad8x16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad8x16x8 vp9_sad8x16x8_c - -unsigned int vp9_sad8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad8x4 vp9_sad8x4_c - -unsigned int vp9_sad8x4_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad8x4_avg vp9_sad8x4_avg_c - -void vp9_sad8x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp9_sad8x4x4d vp9_sad8x4x4d_c - -void vp9_sad8x4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad8x4x8 vp9_sad8x4x8_c - -unsigned int vp9_sad8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad8x8 vp9_sad8x8_c - -unsigned int vp9_sad8x8_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad8x8_avg vp9_sad8x8_avg_c - -void vp9_sad8x8x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -#define vp9_sad8x8x3 vp9_sad8x8x3_c - -void vp9_sad8x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp9_sad8x8x4d vp9_sad8x8x4d_c - -void vp9_sad8x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad8x8x8 vp9_sad8x8x8_c +int16_t vp9_satd_c(const int16_t *coeff, int length); +#define vp9_satd vp9_satd_c unsigned int vp9_sub_pixel_avg_variance16x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); #define vp9_sub_pixel_avg_variance16x16 vp9_sub_pixel_avg_variance16x16_c @@ -638,45 +467,6 @@ void vp9_v_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *abov void vp9_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vp9_v_predictor_8x8 vp9_v_predictor_8x8_c -unsigned int vp9_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_variance16x16 vp9_variance16x16_c - -unsigned int vp9_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_variance16x32 vp9_variance16x32_c - -unsigned int vp9_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_variance16x8 vp9_variance16x8_c - -unsigned int vp9_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_variance32x16 vp9_variance32x16_c - -unsigned int vp9_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_variance32x32 vp9_variance32x32_c - -unsigned int vp9_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_variance32x64 vp9_variance32x64_c - -unsigned int vp9_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_variance4x4 vp9_variance4x4_c - -unsigned int vp9_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_variance4x8 vp9_variance4x8_c - -unsigned int vp9_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_variance64x32 vp9_variance64x32_c - -unsigned int vp9_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_variance64x64 vp9_variance64x64_c - -unsigned int vp9_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_variance8x16 vp9_variance8x16_c - -unsigned int vp9_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_variance8x4 vp9_variance8x4_c - -unsigned int vp9_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_variance8x8 vp9_variance8x8_c - int vp9_vector_var_c(int16_t const *ref, int16_t const *src, const int bwl); #define vp9_vector_var vp9_vector_var_c diff --git a/media/libvpx/vp9_rtcd_x86-darwin9-gcc.h b/media/libvpx/vp9_rtcd_x86-darwin9-gcc.h index cf98ec96490..0725c15ba04 100644 --- a/media/libvpx/vp9_rtcd_x86-darwin9-gcc.h +++ b/media/libvpx/vp9_rtcd_x86-darwin9-gcc.h @@ -42,6 +42,10 @@ int64_t vp9_block_error_sse2(const tran_low_t *coeff, const tran_low_t *dqcoeff, int64_t vp9_block_error_avx2(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); RTCD_EXTERN int64_t (*vp9_block_error)(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); +int64_t vp9_block_error_fp_c(const int16_t *coeff, const int16_t *dqcoeff, int block_size); +int64_t vp9_block_error_fp_sse2(const int16_t *coeff, const int16_t *dqcoeff, int block_size); +RTCD_EXTERN int64_t (*vp9_block_error_fp)(const int16_t *coeff, const int16_t *dqcoeff, int block_size); + void vp9_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); @@ -171,28 +175,36 @@ void vp9_d63_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t RTCD_EXTERN void (*vp9_d63_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vp9_dc_128_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_128_predictor_16x16 vp9_dc_128_predictor_16x16_c +void vp9_dc_128_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vp9_dc_128_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vp9_dc_128_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_128_predictor_32x32 vp9_dc_128_predictor_32x32_c +void vp9_dc_128_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vp9_dc_128_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vp9_dc_128_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_128_predictor_4x4 vp9_dc_128_predictor_4x4_c +void vp9_dc_128_predictor_4x4_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vp9_dc_128_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vp9_dc_128_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_128_predictor_8x8 vp9_dc_128_predictor_8x8_c +void vp9_dc_128_predictor_8x8_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vp9_dc_128_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vp9_dc_left_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_left_predictor_16x16 vp9_dc_left_predictor_16x16_c +void vp9_dc_left_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vp9_dc_left_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vp9_dc_left_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_left_predictor_32x32 vp9_dc_left_predictor_32x32_c +void vp9_dc_left_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vp9_dc_left_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vp9_dc_left_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_left_predictor_4x4 vp9_dc_left_predictor_4x4_c +void vp9_dc_left_predictor_4x4_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vp9_dc_left_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vp9_dc_left_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_left_predictor_8x8 vp9_dc_left_predictor_8x8_c +void vp9_dc_left_predictor_8x8_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vp9_dc_left_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vp9_dc_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vp9_dc_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -211,16 +223,20 @@ void vp9_dc_predictor_8x8_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *a RTCD_EXTERN void (*vp9_dc_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vp9_dc_top_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_top_predictor_16x16 vp9_dc_top_predictor_16x16_c +void vp9_dc_top_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vp9_dc_top_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vp9_dc_top_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_top_predictor_32x32 vp9_dc_top_predictor_32x32_c +void vp9_dc_top_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vp9_dc_top_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vp9_dc_top_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_top_predictor_4x4 vp9_dc_top_predictor_4x4_c +void vp9_dc_top_predictor_4x4_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vp9_dc_top_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vp9_dc_top_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_top_predictor_8x8 vp9_dc_top_predictor_8x8_c +void vp9_dc_top_predictor_8x8_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vp9_dc_top_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); int vp9_diamond_search_sad_c(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv); #define vp9_diamond_search_sad vp9_diamond_search_sad_c @@ -292,19 +308,6 @@ void vp9_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride); void vp9_fwht4x4_mmx(const int16_t *input, tran_low_t *output, int stride); RTCD_EXTERN void (*vp9_fwht4x4)(const int16_t *input, tran_low_t *output, int stride); -void vp9_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); -void vp9_get16x16var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); -void vp9_get16x16var_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); -RTCD_EXTERN void (*vp9_get16x16var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); - -void vp9_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); -void vp9_get8x8var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); -RTCD_EXTERN void (*vp9_get8x8var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); - -unsigned int vp9_get_mb_ss_c(const int16_t *); -unsigned int vp9_get_mb_ss_sse2(const int16_t *); -RTCD_EXTERN unsigned int (*vp9_get_mb_ss)(const int16_t *); - void vp9_h_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vp9_h_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); RTCD_EXTERN void (*vp9_h_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -321,9 +324,16 @@ void vp9_h_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *abov void vp9_h_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); RTCD_EXTERN void (*vp9_h_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vp9_hadamard_16x16_c(int16_t const *src_diff, int src_stride, int16_t *coeff); +void vp9_hadamard_16x16_sse2(int16_t const *src_diff, int src_stride, int16_t *coeff); +RTCD_EXTERN void (*vp9_hadamard_16x16)(int16_t const *src_diff, int src_stride, int16_t *coeff); + +void vp9_hadamard_8x8_c(int16_t const *src_diff, int src_stride, int16_t *coeff); +void vp9_hadamard_8x8_sse2(int16_t const *src_diff, int src_stride, int16_t *coeff); +RTCD_EXTERN void (*vp9_hadamard_8x8)(int16_t const *src_diff, int src_stride, int16_t *coeff); + void vp9_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); void vp9_idct16x16_10_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); -void vp9_idct16x16_10_add_ssse3(const tran_low_t *input, uint8_t *dest, int dest_stride); RTCD_EXTERN void (*vp9_idct16x16_10_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); void vp9_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); @@ -332,7 +342,6 @@ RTCD_EXTERN void (*vp9_idct16x16_1_add)(const tran_low_t *input, uint8_t *dest, void vp9_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); void vp9_idct16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); -void vp9_idct16x16_256_add_ssse3(const tran_low_t *input, uint8_t *dest, int dest_stride); RTCD_EXTERN void (*vp9_idct16x16_256_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); void vp9_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); @@ -438,22 +447,9 @@ void vp9_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, co void vp9_lpf_vertical_8_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); RTCD_EXTERN void (*vp9_lpf_vertical_8_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -unsigned int vp9_mse16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -unsigned int vp9_mse16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -unsigned int vp9_mse16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_mse16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); - -unsigned int vp9_mse16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -unsigned int vp9_mse16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_mse16x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); - -unsigned int vp9_mse8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -unsigned int vp9_mse8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_mse8x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); - -unsigned int vp9_mse8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -unsigned int vp9_mse8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_mse8x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +void vp9_minmax_8x8_c(const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max); +void vp9_minmax_8x8_sse2(const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max); +RTCD_EXTERN void (*vp9_minmax_8x8)(const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max); void vp9_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); void vp9_quantize_b_sse2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); @@ -469,228 +465,9 @@ RTCD_EXTERN void (*vp9_quantize_fp)(const tran_low_t *coeff_ptr, intptr_t n_coef void vp9_quantize_fp_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); #define vp9_quantize_fp_32x32 vp9_quantize_fp_32x32_c -unsigned int vp9_sad16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -RTCD_EXTERN unsigned int (*vp9_sad16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); - -unsigned int vp9_sad16x16_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad16x16_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -RTCD_EXTERN unsigned int (*vp9_sad16x16_avg)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); - -void vp9_sad16x16x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp9_sad16x16x3_sse3(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp9_sad16x16x3_ssse3(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad16x16x3)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp9_sad16x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad16x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad16x16x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp9_sad16x16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad16x16x8 vp9_sad16x16x8_c - -unsigned int vp9_sad16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad16x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -RTCD_EXTERN unsigned int (*vp9_sad16x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); - -unsigned int vp9_sad16x32_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad16x32_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -RTCD_EXTERN unsigned int (*vp9_sad16x32_avg)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); - -void vp9_sad16x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad16x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad16x32x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); - -unsigned int vp9_sad16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -RTCD_EXTERN unsigned int (*vp9_sad16x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); - -unsigned int vp9_sad16x8_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad16x8_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -RTCD_EXTERN unsigned int (*vp9_sad16x8_avg)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); - -void vp9_sad16x8x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp9_sad16x8x3_sse3(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp9_sad16x8x3_ssse3(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad16x8x3)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp9_sad16x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad16x8x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad16x8x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp9_sad16x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad16x8x8 vp9_sad16x8x8_c - -unsigned int vp9_sad32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad32x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad32x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -RTCD_EXTERN unsigned int (*vp9_sad32x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); - -unsigned int vp9_sad32x16_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad32x16_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad32x16_avg_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -RTCD_EXTERN unsigned int (*vp9_sad32x16_avg)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); - -void vp9_sad32x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad32x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad32x16x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); - -unsigned int vp9_sad32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad32x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad32x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -RTCD_EXTERN unsigned int (*vp9_sad32x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); - -unsigned int vp9_sad32x32_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad32x32_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad32x32_avg_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -RTCD_EXTERN unsigned int (*vp9_sad32x32_avg)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); - -void vp9_sad32x32x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -#define vp9_sad32x32x3 vp9_sad32x32x3_c - -void vp9_sad32x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad32x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad32x32x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad32x32x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp9_sad32x32x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad32x32x8 vp9_sad32x32x8_c - -unsigned int vp9_sad32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad32x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad32x64_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -RTCD_EXTERN unsigned int (*vp9_sad32x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); - -unsigned int vp9_sad32x64_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad32x64_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad32x64_avg_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -RTCD_EXTERN unsigned int (*vp9_sad32x64_avg)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); - -void vp9_sad32x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad32x64x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad32x64x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); - -unsigned int vp9_sad4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad4x4_sse(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -RTCD_EXTERN unsigned int (*vp9_sad4x4)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); - -unsigned int vp9_sad4x4_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad4x4_avg_sse(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -RTCD_EXTERN unsigned int (*vp9_sad4x4_avg)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); - -void vp9_sad4x4x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp9_sad4x4x3_sse3(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad4x4x3)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp9_sad4x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad4x4x4d_sse(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad4x4x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp9_sad4x4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad4x4x8 vp9_sad4x4x8_c - -unsigned int vp9_sad4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad4x8_sse(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -RTCD_EXTERN unsigned int (*vp9_sad4x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); - -unsigned int vp9_sad4x8_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad4x8_avg_sse(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -RTCD_EXTERN unsigned int (*vp9_sad4x8_avg)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); - -void vp9_sad4x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad4x8x4d_sse(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad4x8x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp9_sad4x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad4x8x8 vp9_sad4x8x8_c - -unsigned int vp9_sad64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad64x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad64x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -RTCD_EXTERN unsigned int (*vp9_sad64x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); - -unsigned int vp9_sad64x32_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad64x32_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad64x32_avg_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -RTCD_EXTERN unsigned int (*vp9_sad64x32_avg)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); - -void vp9_sad64x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad64x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad64x32x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); - -unsigned int vp9_sad64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad64x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad64x64_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -RTCD_EXTERN unsigned int (*vp9_sad64x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); - -unsigned int vp9_sad64x64_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad64x64_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad64x64_avg_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -RTCD_EXTERN unsigned int (*vp9_sad64x64_avg)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); - -void vp9_sad64x64x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -#define vp9_sad64x64x3 vp9_sad64x64x3_c - -void vp9_sad64x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad64x64x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad64x64x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad64x64x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp9_sad64x64x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad64x64x8 vp9_sad64x64x8_c - -unsigned int vp9_sad8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -RTCD_EXTERN unsigned int (*vp9_sad8x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); - -unsigned int vp9_sad8x16_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad8x16_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -RTCD_EXTERN unsigned int (*vp9_sad8x16_avg)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); - -void vp9_sad8x16x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp9_sad8x16x3_sse3(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad8x16x3)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp9_sad8x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad8x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad8x16x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp9_sad8x16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad8x16x8 vp9_sad8x16x8_c - -unsigned int vp9_sad8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad8x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -RTCD_EXTERN unsigned int (*vp9_sad8x4)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); - -unsigned int vp9_sad8x4_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad8x4_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -RTCD_EXTERN unsigned int (*vp9_sad8x4_avg)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); - -void vp9_sad8x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad8x4x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad8x4x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp9_sad8x4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad8x4x8 vp9_sad8x4x8_c - -unsigned int vp9_sad8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -RTCD_EXTERN unsigned int (*vp9_sad8x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); - -unsigned int vp9_sad8x8_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad8x8_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -RTCD_EXTERN unsigned int (*vp9_sad8x8_avg)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); - -void vp9_sad8x8x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp9_sad8x8x3_sse3(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad8x8x3)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp9_sad8x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad8x8x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad8x8x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp9_sad8x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad8x8x8 vp9_sad8x8x8_c +int16_t vp9_satd_c(const int16_t *coeff, int length); +int16_t vp9_satd_sse2(const int16_t *coeff, int length); +RTCD_EXTERN int16_t (*vp9_satd)(const int16_t *coeff, int length); unsigned int vp9_sub_pixel_avg_variance16x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); unsigned int vp9_sub_pixel_avg_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); @@ -865,63 +642,6 @@ void vp9_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *abov void vp9_v_predictor_8x8_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); RTCD_EXTERN void (*vp9_v_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -unsigned int vp9_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_variance16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp9_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance16x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_variance16x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp9_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_variance16x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp9_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance32x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance32x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_variance32x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp9_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_variance32x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp9_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance32x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_variance32x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp9_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance4x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_variance4x4)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp9_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance4x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_variance4x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp9_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance64x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance64x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_variance64x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp9_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_variance64x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp9_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_variance8x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp9_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance8x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_variance8x4)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp9_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_variance8x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); - int vp9_vector_var_c(int16_t const *ref, int16_t const *src, const int bwl); int vp9_vector_var_sse2(int16_t const *ref, int16_t const *src, const int bwl); RTCD_EXTERN int (*vp9_vector_var)(int16_t const *ref, int16_t const *src, const int bwl); @@ -943,6 +663,8 @@ static void setup_rtcd_internal(void) vp9_block_error = vp9_block_error_c; if (flags & HAS_SSE2) vp9_block_error = vp9_block_error_sse2; if (flags & HAS_AVX2) vp9_block_error = vp9_block_error_avx2; + vp9_block_error_fp = vp9_block_error_fp_c; + if (flags & HAS_SSE2) vp9_block_error_fp = vp9_block_error_fp_sse2; vp9_convolve8 = vp9_convolve8_c; if (flags & HAS_SSE2) vp9_convolve8 = vp9_convolve8_sse2; if (flags & HAS_SSSE3) vp9_convolve8 = vp9_convolve8_ssse3; @@ -998,6 +720,22 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSSE3) vp9_d63_predictor_4x4 = vp9_d63_predictor_4x4_ssse3; vp9_d63_predictor_8x8 = vp9_d63_predictor_8x8_c; if (flags & HAS_SSSE3) vp9_d63_predictor_8x8 = vp9_d63_predictor_8x8_ssse3; + vp9_dc_128_predictor_16x16 = vp9_dc_128_predictor_16x16_c; + if (flags & HAS_SSE2) vp9_dc_128_predictor_16x16 = vp9_dc_128_predictor_16x16_sse2; + vp9_dc_128_predictor_32x32 = vp9_dc_128_predictor_32x32_c; + if (flags & HAS_SSE2) vp9_dc_128_predictor_32x32 = vp9_dc_128_predictor_32x32_sse2; + vp9_dc_128_predictor_4x4 = vp9_dc_128_predictor_4x4_c; + if (flags & HAS_SSE) vp9_dc_128_predictor_4x4 = vp9_dc_128_predictor_4x4_sse; + vp9_dc_128_predictor_8x8 = vp9_dc_128_predictor_8x8_c; + if (flags & HAS_SSE) vp9_dc_128_predictor_8x8 = vp9_dc_128_predictor_8x8_sse; + vp9_dc_left_predictor_16x16 = vp9_dc_left_predictor_16x16_c; + if (flags & HAS_SSE2) vp9_dc_left_predictor_16x16 = vp9_dc_left_predictor_16x16_sse2; + vp9_dc_left_predictor_32x32 = vp9_dc_left_predictor_32x32_c; + if (flags & HAS_SSE2) vp9_dc_left_predictor_32x32 = vp9_dc_left_predictor_32x32_sse2; + vp9_dc_left_predictor_4x4 = vp9_dc_left_predictor_4x4_c; + if (flags & HAS_SSE) vp9_dc_left_predictor_4x4 = vp9_dc_left_predictor_4x4_sse; + vp9_dc_left_predictor_8x8 = vp9_dc_left_predictor_8x8_c; + if (flags & HAS_SSE) vp9_dc_left_predictor_8x8 = vp9_dc_left_predictor_8x8_sse; vp9_dc_predictor_16x16 = vp9_dc_predictor_16x16_c; if (flags & HAS_SSE2) vp9_dc_predictor_16x16 = vp9_dc_predictor_16x16_sse2; vp9_dc_predictor_32x32 = vp9_dc_predictor_32x32_c; @@ -1006,6 +744,14 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE) vp9_dc_predictor_4x4 = vp9_dc_predictor_4x4_sse; vp9_dc_predictor_8x8 = vp9_dc_predictor_8x8_c; if (flags & HAS_SSE) vp9_dc_predictor_8x8 = vp9_dc_predictor_8x8_sse; + vp9_dc_top_predictor_16x16 = vp9_dc_top_predictor_16x16_c; + if (flags & HAS_SSE2) vp9_dc_top_predictor_16x16 = vp9_dc_top_predictor_16x16_sse2; + vp9_dc_top_predictor_32x32 = vp9_dc_top_predictor_32x32_c; + if (flags & HAS_SSE2) vp9_dc_top_predictor_32x32 = vp9_dc_top_predictor_32x32_sse2; + vp9_dc_top_predictor_4x4 = vp9_dc_top_predictor_4x4_c; + if (flags & HAS_SSE) vp9_dc_top_predictor_4x4 = vp9_dc_top_predictor_4x4_sse; + vp9_dc_top_predictor_8x8 = vp9_dc_top_predictor_8x8_c; + if (flags & HAS_SSE) vp9_dc_top_predictor_8x8 = vp9_dc_top_predictor_8x8_sse; vp9_fdct16x16 = vp9_fdct16x16_c; if (flags & HAS_SSE2) vp9_fdct16x16 = vp9_fdct16x16_sse2; vp9_fdct16x16_1 = vp9_fdct16x16_1_c; @@ -1040,13 +786,6 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE4_1) vp9_full_search_sad = vp9_full_search_sadx8; vp9_fwht4x4 = vp9_fwht4x4_c; if (flags & HAS_MMX) vp9_fwht4x4 = vp9_fwht4x4_mmx; - vp9_get16x16var = vp9_get16x16var_c; - if (flags & HAS_SSE2) vp9_get16x16var = vp9_get16x16var_sse2; - if (flags & HAS_AVX2) vp9_get16x16var = vp9_get16x16var_avx2; - vp9_get8x8var = vp9_get8x8var_c; - if (flags & HAS_SSE2) vp9_get8x8var = vp9_get8x8var_sse2; - vp9_get_mb_ss = vp9_get_mb_ss_c; - if (flags & HAS_SSE2) vp9_get_mb_ss = vp9_get_mb_ss_sse2; vp9_h_predictor_16x16 = vp9_h_predictor_16x16_c; if (flags & HAS_SSSE3) vp9_h_predictor_16x16 = vp9_h_predictor_16x16_ssse3; vp9_h_predictor_32x32 = vp9_h_predictor_32x32_c; @@ -1055,14 +794,16 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSSE3) vp9_h_predictor_4x4 = vp9_h_predictor_4x4_ssse3; vp9_h_predictor_8x8 = vp9_h_predictor_8x8_c; if (flags & HAS_SSSE3) vp9_h_predictor_8x8 = vp9_h_predictor_8x8_ssse3; + vp9_hadamard_16x16 = vp9_hadamard_16x16_c; + if (flags & HAS_SSE2) vp9_hadamard_16x16 = vp9_hadamard_16x16_sse2; + vp9_hadamard_8x8 = vp9_hadamard_8x8_c; + if (flags & HAS_SSE2) vp9_hadamard_8x8 = vp9_hadamard_8x8_sse2; vp9_idct16x16_10_add = vp9_idct16x16_10_add_c; if (flags & HAS_SSE2) vp9_idct16x16_10_add = vp9_idct16x16_10_add_sse2; - if (flags & HAS_SSSE3) vp9_idct16x16_10_add = vp9_idct16x16_10_add_ssse3; vp9_idct16x16_1_add = vp9_idct16x16_1_add_c; if (flags & HAS_SSE2) vp9_idct16x16_1_add = vp9_idct16x16_1_add_sse2; vp9_idct16x16_256_add = vp9_idct16x16_256_add_c; if (flags & HAS_SSE2) vp9_idct16x16_256_add = vp9_idct16x16_256_add_sse2; - if (flags & HAS_SSSE3) vp9_idct16x16_256_add = vp9_idct16x16_256_add_ssse3; vp9_idct32x32_1024_add = vp9_idct32x32_1024_add_c; if (flags & HAS_SSE2) vp9_idct32x32_1024_add = vp9_idct32x32_1024_add_sse2; vp9_idct32x32_1_add = vp9_idct32x32_1_add_c; @@ -1112,121 +853,14 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE2) vp9_lpf_vertical_8 = vp9_lpf_vertical_8_sse2; vp9_lpf_vertical_8_dual = vp9_lpf_vertical_8_dual_c; if (flags & HAS_SSE2) vp9_lpf_vertical_8_dual = vp9_lpf_vertical_8_dual_sse2; - vp9_mse16x16 = vp9_mse16x16_c; - if (flags & HAS_SSE2) vp9_mse16x16 = vp9_mse16x16_sse2; - if (flags & HAS_AVX2) vp9_mse16x16 = vp9_mse16x16_avx2; - vp9_mse16x8 = vp9_mse16x8_c; - if (flags & HAS_SSE2) vp9_mse16x8 = vp9_mse16x8_sse2; - vp9_mse8x16 = vp9_mse8x16_c; - if (flags & HAS_SSE2) vp9_mse8x16 = vp9_mse8x16_sse2; - vp9_mse8x8 = vp9_mse8x8_c; - if (flags & HAS_SSE2) vp9_mse8x8 = vp9_mse8x8_sse2; + vp9_minmax_8x8 = vp9_minmax_8x8_c; + if (flags & HAS_SSE2) vp9_minmax_8x8 = vp9_minmax_8x8_sse2; vp9_quantize_b = vp9_quantize_b_c; if (flags & HAS_SSE2) vp9_quantize_b = vp9_quantize_b_sse2; vp9_quantize_fp = vp9_quantize_fp_c; if (flags & HAS_SSE2) vp9_quantize_fp = vp9_quantize_fp_sse2; - vp9_sad16x16 = vp9_sad16x16_c; - if (flags & HAS_SSE2) vp9_sad16x16 = vp9_sad16x16_sse2; - vp9_sad16x16_avg = vp9_sad16x16_avg_c; - if (flags & HAS_SSE2) vp9_sad16x16_avg = vp9_sad16x16_avg_sse2; - vp9_sad16x16x3 = vp9_sad16x16x3_c; - if (flags & HAS_SSE3) vp9_sad16x16x3 = vp9_sad16x16x3_sse3; - if (flags & HAS_SSSE3) vp9_sad16x16x3 = vp9_sad16x16x3_ssse3; - vp9_sad16x16x4d = vp9_sad16x16x4d_c; - if (flags & HAS_SSE2) vp9_sad16x16x4d = vp9_sad16x16x4d_sse2; - vp9_sad16x32 = vp9_sad16x32_c; - if (flags & HAS_SSE2) vp9_sad16x32 = vp9_sad16x32_sse2; - vp9_sad16x32_avg = vp9_sad16x32_avg_c; - if (flags & HAS_SSE2) vp9_sad16x32_avg = vp9_sad16x32_avg_sse2; - vp9_sad16x32x4d = vp9_sad16x32x4d_c; - if (flags & HAS_SSE2) vp9_sad16x32x4d = vp9_sad16x32x4d_sse2; - vp9_sad16x8 = vp9_sad16x8_c; - if (flags & HAS_SSE2) vp9_sad16x8 = vp9_sad16x8_sse2; - vp9_sad16x8_avg = vp9_sad16x8_avg_c; - if (flags & HAS_SSE2) vp9_sad16x8_avg = vp9_sad16x8_avg_sse2; - vp9_sad16x8x3 = vp9_sad16x8x3_c; - if (flags & HAS_SSE3) vp9_sad16x8x3 = vp9_sad16x8x3_sse3; - if (flags & HAS_SSSE3) vp9_sad16x8x3 = vp9_sad16x8x3_ssse3; - vp9_sad16x8x4d = vp9_sad16x8x4d_c; - if (flags & HAS_SSE2) vp9_sad16x8x4d = vp9_sad16x8x4d_sse2; - vp9_sad32x16 = vp9_sad32x16_c; - if (flags & HAS_SSE2) vp9_sad32x16 = vp9_sad32x16_sse2; - if (flags & HAS_AVX2) vp9_sad32x16 = vp9_sad32x16_avx2; - vp9_sad32x16_avg = vp9_sad32x16_avg_c; - if (flags & HAS_SSE2) vp9_sad32x16_avg = vp9_sad32x16_avg_sse2; - if (flags & HAS_AVX2) vp9_sad32x16_avg = vp9_sad32x16_avg_avx2; - vp9_sad32x16x4d = vp9_sad32x16x4d_c; - if (flags & HAS_SSE2) vp9_sad32x16x4d = vp9_sad32x16x4d_sse2; - vp9_sad32x32 = vp9_sad32x32_c; - if (flags & HAS_SSE2) vp9_sad32x32 = vp9_sad32x32_sse2; - if (flags & HAS_AVX2) vp9_sad32x32 = vp9_sad32x32_avx2; - vp9_sad32x32_avg = vp9_sad32x32_avg_c; - if (flags & HAS_SSE2) vp9_sad32x32_avg = vp9_sad32x32_avg_sse2; - if (flags & HAS_AVX2) vp9_sad32x32_avg = vp9_sad32x32_avg_avx2; - vp9_sad32x32x4d = vp9_sad32x32x4d_c; - if (flags & HAS_SSE2) vp9_sad32x32x4d = vp9_sad32x32x4d_sse2; - if (flags & HAS_AVX2) vp9_sad32x32x4d = vp9_sad32x32x4d_avx2; - vp9_sad32x64 = vp9_sad32x64_c; - if (flags & HAS_SSE2) vp9_sad32x64 = vp9_sad32x64_sse2; - if (flags & HAS_AVX2) vp9_sad32x64 = vp9_sad32x64_avx2; - vp9_sad32x64_avg = vp9_sad32x64_avg_c; - if (flags & HAS_SSE2) vp9_sad32x64_avg = vp9_sad32x64_avg_sse2; - if (flags & HAS_AVX2) vp9_sad32x64_avg = vp9_sad32x64_avg_avx2; - vp9_sad32x64x4d = vp9_sad32x64x4d_c; - if (flags & HAS_SSE2) vp9_sad32x64x4d = vp9_sad32x64x4d_sse2; - vp9_sad4x4 = vp9_sad4x4_c; - if (flags & HAS_SSE) vp9_sad4x4 = vp9_sad4x4_sse; - vp9_sad4x4_avg = vp9_sad4x4_avg_c; - if (flags & HAS_SSE) vp9_sad4x4_avg = vp9_sad4x4_avg_sse; - vp9_sad4x4x3 = vp9_sad4x4x3_c; - if (flags & HAS_SSE3) vp9_sad4x4x3 = vp9_sad4x4x3_sse3; - vp9_sad4x4x4d = vp9_sad4x4x4d_c; - if (flags & HAS_SSE) vp9_sad4x4x4d = vp9_sad4x4x4d_sse; - vp9_sad4x8 = vp9_sad4x8_c; - if (flags & HAS_SSE) vp9_sad4x8 = vp9_sad4x8_sse; - vp9_sad4x8_avg = vp9_sad4x8_avg_c; - if (flags & HAS_SSE) vp9_sad4x8_avg = vp9_sad4x8_avg_sse; - vp9_sad4x8x4d = vp9_sad4x8x4d_c; - if (flags & HAS_SSE) vp9_sad4x8x4d = vp9_sad4x8x4d_sse; - vp9_sad64x32 = vp9_sad64x32_c; - if (flags & HAS_SSE2) vp9_sad64x32 = vp9_sad64x32_sse2; - if (flags & HAS_AVX2) vp9_sad64x32 = vp9_sad64x32_avx2; - vp9_sad64x32_avg = vp9_sad64x32_avg_c; - if (flags & HAS_SSE2) vp9_sad64x32_avg = vp9_sad64x32_avg_sse2; - if (flags & HAS_AVX2) vp9_sad64x32_avg = vp9_sad64x32_avg_avx2; - vp9_sad64x32x4d = vp9_sad64x32x4d_c; - if (flags & HAS_SSE2) vp9_sad64x32x4d = vp9_sad64x32x4d_sse2; - vp9_sad64x64 = vp9_sad64x64_c; - if (flags & HAS_SSE2) vp9_sad64x64 = vp9_sad64x64_sse2; - if (flags & HAS_AVX2) vp9_sad64x64 = vp9_sad64x64_avx2; - vp9_sad64x64_avg = vp9_sad64x64_avg_c; - if (flags & HAS_SSE2) vp9_sad64x64_avg = vp9_sad64x64_avg_sse2; - if (flags & HAS_AVX2) vp9_sad64x64_avg = vp9_sad64x64_avg_avx2; - vp9_sad64x64x4d = vp9_sad64x64x4d_c; - if (flags & HAS_SSE2) vp9_sad64x64x4d = vp9_sad64x64x4d_sse2; - if (flags & HAS_AVX2) vp9_sad64x64x4d = vp9_sad64x64x4d_avx2; - vp9_sad8x16 = vp9_sad8x16_c; - if (flags & HAS_SSE2) vp9_sad8x16 = vp9_sad8x16_sse2; - vp9_sad8x16_avg = vp9_sad8x16_avg_c; - if (flags & HAS_SSE2) vp9_sad8x16_avg = vp9_sad8x16_avg_sse2; - vp9_sad8x16x3 = vp9_sad8x16x3_c; - if (flags & HAS_SSE3) vp9_sad8x16x3 = vp9_sad8x16x3_sse3; - vp9_sad8x16x4d = vp9_sad8x16x4d_c; - if (flags & HAS_SSE2) vp9_sad8x16x4d = vp9_sad8x16x4d_sse2; - vp9_sad8x4 = vp9_sad8x4_c; - if (flags & HAS_SSE2) vp9_sad8x4 = vp9_sad8x4_sse2; - vp9_sad8x4_avg = vp9_sad8x4_avg_c; - if (flags & HAS_SSE2) vp9_sad8x4_avg = vp9_sad8x4_avg_sse2; - vp9_sad8x4x4d = vp9_sad8x4x4d_c; - if (flags & HAS_SSE2) vp9_sad8x4x4d = vp9_sad8x4x4d_sse2; - vp9_sad8x8 = vp9_sad8x8_c; - if (flags & HAS_SSE2) vp9_sad8x8 = vp9_sad8x8_sse2; - vp9_sad8x8_avg = vp9_sad8x8_avg_c; - if (flags & HAS_SSE2) vp9_sad8x8_avg = vp9_sad8x8_avg_sse2; - vp9_sad8x8x3 = vp9_sad8x8x3_c; - if (flags & HAS_SSE3) vp9_sad8x8x3 = vp9_sad8x8x3_sse3; - vp9_sad8x8x4d = vp9_sad8x8x4d_c; - if (flags & HAS_SSE2) vp9_sad8x8x4d = vp9_sad8x8x4d_sse2; + vp9_satd = vp9_satd_c; + if (flags & HAS_SSE2) vp9_satd = vp9_satd_sse2; vp9_sub_pixel_avg_variance16x16 = vp9_sub_pixel_avg_variance16x16_c; if (flags & HAS_SSE2) vp9_sub_pixel_avg_variance16x16 = vp9_sub_pixel_avg_variance16x16_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance16x16 = vp9_sub_pixel_avg_variance16x16_ssse3; @@ -1327,37 +961,6 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE) vp9_v_predictor_4x4 = vp9_v_predictor_4x4_sse; vp9_v_predictor_8x8 = vp9_v_predictor_8x8_c; if (flags & HAS_SSE) vp9_v_predictor_8x8 = vp9_v_predictor_8x8_sse; - vp9_variance16x16 = vp9_variance16x16_c; - if (flags & HAS_SSE2) vp9_variance16x16 = vp9_variance16x16_sse2; - if (flags & HAS_AVX2) vp9_variance16x16 = vp9_variance16x16_avx2; - vp9_variance16x32 = vp9_variance16x32_c; - if (flags & HAS_SSE2) vp9_variance16x32 = vp9_variance16x32_sse2; - vp9_variance16x8 = vp9_variance16x8_c; - if (flags & HAS_SSE2) vp9_variance16x8 = vp9_variance16x8_sse2; - vp9_variance32x16 = vp9_variance32x16_c; - if (flags & HAS_SSE2) vp9_variance32x16 = vp9_variance32x16_sse2; - if (flags & HAS_AVX2) vp9_variance32x16 = vp9_variance32x16_avx2; - vp9_variance32x32 = vp9_variance32x32_c; - if (flags & HAS_SSE2) vp9_variance32x32 = vp9_variance32x32_sse2; - if (flags & HAS_AVX2) vp9_variance32x32 = vp9_variance32x32_avx2; - vp9_variance32x64 = vp9_variance32x64_c; - if (flags & HAS_SSE2) vp9_variance32x64 = vp9_variance32x64_sse2; - vp9_variance4x4 = vp9_variance4x4_c; - if (flags & HAS_SSE2) vp9_variance4x4 = vp9_variance4x4_sse2; - vp9_variance4x8 = vp9_variance4x8_c; - if (flags & HAS_SSE2) vp9_variance4x8 = vp9_variance4x8_sse2; - vp9_variance64x32 = vp9_variance64x32_c; - if (flags & HAS_SSE2) vp9_variance64x32 = vp9_variance64x32_sse2; - if (flags & HAS_AVX2) vp9_variance64x32 = vp9_variance64x32_avx2; - vp9_variance64x64 = vp9_variance64x64_c; - if (flags & HAS_SSE2) vp9_variance64x64 = vp9_variance64x64_sse2; - if (flags & HAS_AVX2) vp9_variance64x64 = vp9_variance64x64_avx2; - vp9_variance8x16 = vp9_variance8x16_c; - if (flags & HAS_SSE2) vp9_variance8x16 = vp9_variance8x16_sse2; - vp9_variance8x4 = vp9_variance8x4_c; - if (flags & HAS_SSE2) vp9_variance8x4 = vp9_variance8x4_sse2; - vp9_variance8x8 = vp9_variance8x8_c; - if (flags & HAS_SSE2) vp9_variance8x8 = vp9_variance8x8_sse2; vp9_vector_var = vp9_vector_var_c; if (flags & HAS_SSE2) vp9_vector_var = vp9_vector_var_sse2; } diff --git a/media/libvpx/vp9_rtcd_x86-linux-gcc.h b/media/libvpx/vp9_rtcd_x86-linux-gcc.h index 1951a460bef..8b7af70d737 100644 --- a/media/libvpx/vp9_rtcd_x86-linux-gcc.h +++ b/media/libvpx/vp9_rtcd_x86-linux-gcc.h @@ -40,6 +40,10 @@ RTCD_EXTERN unsigned int (*vp9_avg_8x8)(const uint8_t *, int p); int64_t vp9_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); #define vp9_block_error vp9_block_error_c +int64_t vp9_block_error_fp_c(const int16_t *coeff, const int16_t *dqcoeff, int block_size); +int64_t vp9_block_error_fp_sse2(const int16_t *coeff, const int16_t *dqcoeff, int block_size); +RTCD_EXTERN int64_t (*vp9_block_error_fp)(const int16_t *coeff, const int16_t *dqcoeff, int block_size); + void vp9_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); @@ -263,15 +267,6 @@ RTCD_EXTERN int (*vp9_full_search_sad)(const struct macroblock *x, const struct void vp9_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride); #define vp9_fwht4x4 vp9_fwht4x4_c -void vp9_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); -#define vp9_get16x16var vp9_get16x16var_c - -void vp9_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); -#define vp9_get8x8var vp9_get8x8var_c - -unsigned int vp9_get_mb_ss_c(const int16_t *); -#define vp9_get_mb_ss vp9_get_mb_ss_c - void vp9_h_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vp9_h_predictor_16x16 vp9_h_predictor_16x16_c @@ -284,9 +279,16 @@ void vp9_h_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *abov void vp9_h_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vp9_h_predictor_8x8 vp9_h_predictor_8x8_c +void vp9_hadamard_16x16_c(int16_t const *src_diff, int src_stride, int16_t *coeff); +void vp9_hadamard_16x16_sse2(int16_t const *src_diff, int src_stride, int16_t *coeff); +RTCD_EXTERN void (*vp9_hadamard_16x16)(int16_t const *src_diff, int src_stride, int16_t *coeff); + +void vp9_hadamard_8x8_c(int16_t const *src_diff, int src_stride, int16_t *coeff); +void vp9_hadamard_8x8_sse2(int16_t const *src_diff, int src_stride, int16_t *coeff); +RTCD_EXTERN void (*vp9_hadamard_8x8)(int16_t const *src_diff, int src_stride, int16_t *coeff); + void vp9_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); void vp9_idct16x16_10_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); -void vp9_idct16x16_10_add_ssse3(const tran_low_t *input, uint8_t *dest, int dest_stride); RTCD_EXTERN void (*vp9_idct16x16_10_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); void vp9_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); @@ -295,7 +297,6 @@ RTCD_EXTERN void (*vp9_idct16x16_1_add)(const tran_low_t *input, uint8_t *dest, void vp9_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); void vp9_idct16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); -void vp9_idct16x16_256_add_ssse3(const tran_low_t *input, uint8_t *dest, int dest_stride); RTCD_EXTERN void (*vp9_idct16x16_256_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); void vp9_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); @@ -400,17 +401,9 @@ void vp9_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, co void vp9_lpf_vertical_8_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); RTCD_EXTERN void (*vp9_lpf_vertical_8_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -unsigned int vp9_mse16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -#define vp9_mse16x16 vp9_mse16x16_c - -unsigned int vp9_mse16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -#define vp9_mse16x8 vp9_mse16x8_c - -unsigned int vp9_mse8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -#define vp9_mse8x16 vp9_mse8x16_c - -unsigned int vp9_mse8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -#define vp9_mse8x8 vp9_mse8x8_c +void vp9_minmax_8x8_c(const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max); +void vp9_minmax_8x8_sse2(const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max); +RTCD_EXTERN void (*vp9_minmax_8x8)(const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max); void vp9_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); void vp9_quantize_b_sse2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); @@ -426,190 +419,9 @@ RTCD_EXTERN void (*vp9_quantize_fp)(const tran_low_t *coeff_ptr, intptr_t n_coef void vp9_quantize_fp_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); #define vp9_quantize_fp_32x32 vp9_quantize_fp_32x32_c -unsigned int vp9_sad16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad16x16 vp9_sad16x16_c - -unsigned int vp9_sad16x16_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad16x16_avg vp9_sad16x16_avg_c - -void vp9_sad16x16x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp9_sad16x16x3_sse3(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp9_sad16x16x3_ssse3(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad16x16x3)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp9_sad16x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad16x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad16x16x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp9_sad16x16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad16x16x8 vp9_sad16x16x8_c - -unsigned int vp9_sad16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad16x32 vp9_sad16x32_c - -unsigned int vp9_sad16x32_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad16x32_avg vp9_sad16x32_avg_c - -void vp9_sad16x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad16x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad16x32x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); - -unsigned int vp9_sad16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad16x8 vp9_sad16x8_c - -unsigned int vp9_sad16x8_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad16x8_avg vp9_sad16x8_avg_c - -void vp9_sad16x8x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp9_sad16x8x3_sse3(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp9_sad16x8x3_ssse3(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad16x8x3)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp9_sad16x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad16x8x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad16x8x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp9_sad16x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad16x8x8 vp9_sad16x8x8_c - -unsigned int vp9_sad32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad32x16 vp9_sad32x16_c - -unsigned int vp9_sad32x16_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad32x16_avg vp9_sad32x16_avg_c - -void vp9_sad32x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad32x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad32x16x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); - -unsigned int vp9_sad32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad32x32 vp9_sad32x32_c - -unsigned int vp9_sad32x32_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad32x32_avg vp9_sad32x32_avg_c - -void vp9_sad32x32x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -#define vp9_sad32x32x3 vp9_sad32x32x3_c - -void vp9_sad32x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad32x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad32x32x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp9_sad32x32x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad32x32x8 vp9_sad32x32x8_c - -unsigned int vp9_sad32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad32x64 vp9_sad32x64_c - -unsigned int vp9_sad32x64_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad32x64_avg vp9_sad32x64_avg_c - -void vp9_sad32x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad32x64x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad32x64x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); - -unsigned int vp9_sad4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad4x4 vp9_sad4x4_c - -unsigned int vp9_sad4x4_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad4x4_avg vp9_sad4x4_avg_c - -void vp9_sad4x4x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp9_sad4x4x3_sse3(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad4x4x3)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp9_sad4x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad4x4x4d_sse(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad4x4x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp9_sad4x4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad4x4x8 vp9_sad4x4x8_c - -unsigned int vp9_sad4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad4x8 vp9_sad4x8_c - -unsigned int vp9_sad4x8_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad4x8_avg vp9_sad4x8_avg_c - -void vp9_sad4x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad4x8x4d_sse(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad4x8x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp9_sad4x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad4x8x8 vp9_sad4x8x8_c - -unsigned int vp9_sad64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad64x32 vp9_sad64x32_c - -unsigned int vp9_sad64x32_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad64x32_avg vp9_sad64x32_avg_c - -void vp9_sad64x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad64x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad64x32x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); - -unsigned int vp9_sad64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad64x64 vp9_sad64x64_c - -unsigned int vp9_sad64x64_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad64x64_avg vp9_sad64x64_avg_c - -void vp9_sad64x64x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -#define vp9_sad64x64x3 vp9_sad64x64x3_c - -void vp9_sad64x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad64x64x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad64x64x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp9_sad64x64x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad64x64x8 vp9_sad64x64x8_c - -unsigned int vp9_sad8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad8x16 vp9_sad8x16_c - -unsigned int vp9_sad8x16_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad8x16_avg vp9_sad8x16_avg_c - -void vp9_sad8x16x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp9_sad8x16x3_sse3(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad8x16x3)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp9_sad8x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad8x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad8x16x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp9_sad8x16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad8x16x8 vp9_sad8x16x8_c - -unsigned int vp9_sad8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad8x4 vp9_sad8x4_c - -unsigned int vp9_sad8x4_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad8x4_avg vp9_sad8x4_avg_c - -void vp9_sad8x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad8x4x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad8x4x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp9_sad8x4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad8x4x8 vp9_sad8x4x8_c - -unsigned int vp9_sad8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad8x8 vp9_sad8x8_c - -unsigned int vp9_sad8x8_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad8x8_avg vp9_sad8x8_avg_c - -void vp9_sad8x8x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp9_sad8x8x3_sse3(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad8x8x3)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp9_sad8x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad8x8x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad8x8x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp9_sad8x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad8x8x8 vp9_sad8x8x8_c +int16_t vp9_satd_c(const int16_t *coeff, int length); +int16_t vp9_satd_sse2(const int16_t *coeff, int length); +RTCD_EXTERN int16_t (*vp9_satd)(const int16_t *coeff, int length); unsigned int vp9_sub_pixel_avg_variance16x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); #define vp9_sub_pixel_avg_variance16x16 vp9_sub_pixel_avg_variance16x16_c @@ -720,45 +532,6 @@ void vp9_v_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *abov void vp9_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vp9_v_predictor_8x8 vp9_v_predictor_8x8_c -unsigned int vp9_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_variance16x16 vp9_variance16x16_c - -unsigned int vp9_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_variance16x32 vp9_variance16x32_c - -unsigned int vp9_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_variance16x8 vp9_variance16x8_c - -unsigned int vp9_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_variance32x16 vp9_variance32x16_c - -unsigned int vp9_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_variance32x32 vp9_variance32x32_c - -unsigned int vp9_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_variance32x64 vp9_variance32x64_c - -unsigned int vp9_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_variance4x4 vp9_variance4x4_c - -unsigned int vp9_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_variance4x8 vp9_variance4x8_c - -unsigned int vp9_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_variance64x32 vp9_variance64x32_c - -unsigned int vp9_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_variance64x64 vp9_variance64x64_c - -unsigned int vp9_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_variance8x16 vp9_variance8x16_c - -unsigned int vp9_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_variance8x4 vp9_variance8x4_c - -unsigned int vp9_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_variance8x8 vp9_variance8x8_c - int vp9_vector_var_c(int16_t const *ref, int16_t const *src, const int bwl); int vp9_vector_var_sse2(int16_t const *ref, int16_t const *src, const int bwl); RTCD_EXTERN int (*vp9_vector_var)(int16_t const *ref, int16_t const *src, const int bwl); @@ -777,6 +550,8 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE2) vp9_avg_4x4 = vp9_avg_4x4_sse2; vp9_avg_8x8 = vp9_avg_8x8_c; if (flags & HAS_SSE2) vp9_avg_8x8 = vp9_avg_8x8_sse2; + vp9_block_error_fp = vp9_block_error_fp_c; + if (flags & HAS_SSE2) vp9_block_error_fp = vp9_block_error_fp_sse2; vp9_convolve8 = vp9_convolve8_c; if (flags & HAS_SSE2) vp9_convolve8 = vp9_convolve8_sse2; if (flags & HAS_SSSE3) vp9_convolve8 = vp9_convolve8_ssse3; @@ -825,14 +600,16 @@ static void setup_rtcd_internal(void) vp9_full_search_sad = vp9_full_search_sad_c; if (flags & HAS_SSE3) vp9_full_search_sad = vp9_full_search_sadx3; if (flags & HAS_SSE4_1) vp9_full_search_sad = vp9_full_search_sadx8; + vp9_hadamard_16x16 = vp9_hadamard_16x16_c; + if (flags & HAS_SSE2) vp9_hadamard_16x16 = vp9_hadamard_16x16_sse2; + vp9_hadamard_8x8 = vp9_hadamard_8x8_c; + if (flags & HAS_SSE2) vp9_hadamard_8x8 = vp9_hadamard_8x8_sse2; vp9_idct16x16_10_add = vp9_idct16x16_10_add_c; if (flags & HAS_SSE2) vp9_idct16x16_10_add = vp9_idct16x16_10_add_sse2; - if (flags & HAS_SSSE3) vp9_idct16x16_10_add = vp9_idct16x16_10_add_ssse3; vp9_idct16x16_1_add = vp9_idct16x16_1_add_c; if (flags & HAS_SSE2) vp9_idct16x16_1_add = vp9_idct16x16_1_add_sse2; vp9_idct16x16_256_add = vp9_idct16x16_256_add_c; if (flags & HAS_SSE2) vp9_idct16x16_256_add = vp9_idct16x16_256_add_sse2; - if (flags & HAS_SSSE3) vp9_idct16x16_256_add = vp9_idct16x16_256_add_ssse3; vp9_idct32x32_1024_add = vp9_idct32x32_1024_add_c; if (flags & HAS_SSE2) vp9_idct32x32_1024_add = vp9_idct32x32_1024_add_sse2; vp9_idct32x32_1_add = vp9_idct32x32_1_add_c; @@ -881,48 +658,14 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE2) vp9_lpf_vertical_8 = vp9_lpf_vertical_8_sse2; vp9_lpf_vertical_8_dual = vp9_lpf_vertical_8_dual_c; if (flags & HAS_SSE2) vp9_lpf_vertical_8_dual = vp9_lpf_vertical_8_dual_sse2; + vp9_minmax_8x8 = vp9_minmax_8x8_c; + if (flags & HAS_SSE2) vp9_minmax_8x8 = vp9_minmax_8x8_sse2; vp9_quantize_b = vp9_quantize_b_c; if (flags & HAS_SSE2) vp9_quantize_b = vp9_quantize_b_sse2; vp9_quantize_fp = vp9_quantize_fp_c; if (flags & HAS_SSE2) vp9_quantize_fp = vp9_quantize_fp_sse2; - vp9_sad16x16x3 = vp9_sad16x16x3_c; - if (flags & HAS_SSE3) vp9_sad16x16x3 = vp9_sad16x16x3_sse3; - if (flags & HAS_SSSE3) vp9_sad16x16x3 = vp9_sad16x16x3_ssse3; - vp9_sad16x16x4d = vp9_sad16x16x4d_c; - if (flags & HAS_SSE2) vp9_sad16x16x4d = vp9_sad16x16x4d_sse2; - vp9_sad16x32x4d = vp9_sad16x32x4d_c; - if (flags & HAS_SSE2) vp9_sad16x32x4d = vp9_sad16x32x4d_sse2; - vp9_sad16x8x3 = vp9_sad16x8x3_c; - if (flags & HAS_SSE3) vp9_sad16x8x3 = vp9_sad16x8x3_sse3; - if (flags & HAS_SSSE3) vp9_sad16x8x3 = vp9_sad16x8x3_ssse3; - vp9_sad16x8x4d = vp9_sad16x8x4d_c; - if (flags & HAS_SSE2) vp9_sad16x8x4d = vp9_sad16x8x4d_sse2; - vp9_sad32x16x4d = vp9_sad32x16x4d_c; - if (flags & HAS_SSE2) vp9_sad32x16x4d = vp9_sad32x16x4d_sse2; - vp9_sad32x32x4d = vp9_sad32x32x4d_c; - if (flags & HAS_SSE2) vp9_sad32x32x4d = vp9_sad32x32x4d_sse2; - vp9_sad32x64x4d = vp9_sad32x64x4d_c; - if (flags & HAS_SSE2) vp9_sad32x64x4d = vp9_sad32x64x4d_sse2; - vp9_sad4x4x3 = vp9_sad4x4x3_c; - if (flags & HAS_SSE3) vp9_sad4x4x3 = vp9_sad4x4x3_sse3; - vp9_sad4x4x4d = vp9_sad4x4x4d_c; - if (flags & HAS_SSE) vp9_sad4x4x4d = vp9_sad4x4x4d_sse; - vp9_sad4x8x4d = vp9_sad4x8x4d_c; - if (flags & HAS_SSE) vp9_sad4x8x4d = vp9_sad4x8x4d_sse; - vp9_sad64x32x4d = vp9_sad64x32x4d_c; - if (flags & HAS_SSE2) vp9_sad64x32x4d = vp9_sad64x32x4d_sse2; - vp9_sad64x64x4d = vp9_sad64x64x4d_c; - if (flags & HAS_SSE2) vp9_sad64x64x4d = vp9_sad64x64x4d_sse2; - vp9_sad8x16x3 = vp9_sad8x16x3_c; - if (flags & HAS_SSE3) vp9_sad8x16x3 = vp9_sad8x16x3_sse3; - vp9_sad8x16x4d = vp9_sad8x16x4d_c; - if (flags & HAS_SSE2) vp9_sad8x16x4d = vp9_sad8x16x4d_sse2; - vp9_sad8x4x4d = vp9_sad8x4x4d_c; - if (flags & HAS_SSE2) vp9_sad8x4x4d = vp9_sad8x4x4d_sse2; - vp9_sad8x8x3 = vp9_sad8x8x3_c; - if (flags & HAS_SSE3) vp9_sad8x8x3 = vp9_sad8x8x3_sse3; - vp9_sad8x8x4d = vp9_sad8x8x4d_c; - if (flags & HAS_SSE2) vp9_sad8x8x4d = vp9_sad8x8x4d_sse2; + vp9_satd = vp9_satd_c; + if (flags & HAS_SSE2) vp9_satd = vp9_satd_sse2; vp9_temporal_filter_apply = vp9_temporal_filter_apply_c; if (flags & HAS_SSE2) vp9_temporal_filter_apply = vp9_temporal_filter_apply_sse2; vp9_vector_var = vp9_vector_var_c; diff --git a/media/libvpx/vp9_rtcd_x86-win32-gcc.h b/media/libvpx/vp9_rtcd_x86-win32-gcc.h index cf98ec96490..0725c15ba04 100644 --- a/media/libvpx/vp9_rtcd_x86-win32-gcc.h +++ b/media/libvpx/vp9_rtcd_x86-win32-gcc.h @@ -42,6 +42,10 @@ int64_t vp9_block_error_sse2(const tran_low_t *coeff, const tran_low_t *dqcoeff, int64_t vp9_block_error_avx2(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); RTCD_EXTERN int64_t (*vp9_block_error)(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); +int64_t vp9_block_error_fp_c(const int16_t *coeff, const int16_t *dqcoeff, int block_size); +int64_t vp9_block_error_fp_sse2(const int16_t *coeff, const int16_t *dqcoeff, int block_size); +RTCD_EXTERN int64_t (*vp9_block_error_fp)(const int16_t *coeff, const int16_t *dqcoeff, int block_size); + void vp9_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); @@ -171,28 +175,36 @@ void vp9_d63_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t RTCD_EXTERN void (*vp9_d63_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vp9_dc_128_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_128_predictor_16x16 vp9_dc_128_predictor_16x16_c +void vp9_dc_128_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vp9_dc_128_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vp9_dc_128_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_128_predictor_32x32 vp9_dc_128_predictor_32x32_c +void vp9_dc_128_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vp9_dc_128_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vp9_dc_128_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_128_predictor_4x4 vp9_dc_128_predictor_4x4_c +void vp9_dc_128_predictor_4x4_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vp9_dc_128_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vp9_dc_128_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_128_predictor_8x8 vp9_dc_128_predictor_8x8_c +void vp9_dc_128_predictor_8x8_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vp9_dc_128_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vp9_dc_left_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_left_predictor_16x16 vp9_dc_left_predictor_16x16_c +void vp9_dc_left_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vp9_dc_left_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vp9_dc_left_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_left_predictor_32x32 vp9_dc_left_predictor_32x32_c +void vp9_dc_left_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vp9_dc_left_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vp9_dc_left_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_left_predictor_4x4 vp9_dc_left_predictor_4x4_c +void vp9_dc_left_predictor_4x4_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vp9_dc_left_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vp9_dc_left_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_left_predictor_8x8 vp9_dc_left_predictor_8x8_c +void vp9_dc_left_predictor_8x8_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vp9_dc_left_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vp9_dc_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vp9_dc_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -211,16 +223,20 @@ void vp9_dc_predictor_8x8_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *a RTCD_EXTERN void (*vp9_dc_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vp9_dc_top_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_top_predictor_16x16 vp9_dc_top_predictor_16x16_c +void vp9_dc_top_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vp9_dc_top_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vp9_dc_top_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_top_predictor_32x32 vp9_dc_top_predictor_32x32_c +void vp9_dc_top_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vp9_dc_top_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vp9_dc_top_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_top_predictor_4x4 vp9_dc_top_predictor_4x4_c +void vp9_dc_top_predictor_4x4_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vp9_dc_top_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vp9_dc_top_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_top_predictor_8x8 vp9_dc_top_predictor_8x8_c +void vp9_dc_top_predictor_8x8_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vp9_dc_top_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); int vp9_diamond_search_sad_c(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv); #define vp9_diamond_search_sad vp9_diamond_search_sad_c @@ -292,19 +308,6 @@ void vp9_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride); void vp9_fwht4x4_mmx(const int16_t *input, tran_low_t *output, int stride); RTCD_EXTERN void (*vp9_fwht4x4)(const int16_t *input, tran_low_t *output, int stride); -void vp9_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); -void vp9_get16x16var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); -void vp9_get16x16var_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); -RTCD_EXTERN void (*vp9_get16x16var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); - -void vp9_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); -void vp9_get8x8var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); -RTCD_EXTERN void (*vp9_get8x8var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); - -unsigned int vp9_get_mb_ss_c(const int16_t *); -unsigned int vp9_get_mb_ss_sse2(const int16_t *); -RTCD_EXTERN unsigned int (*vp9_get_mb_ss)(const int16_t *); - void vp9_h_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vp9_h_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); RTCD_EXTERN void (*vp9_h_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -321,9 +324,16 @@ void vp9_h_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *abov void vp9_h_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); RTCD_EXTERN void (*vp9_h_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vp9_hadamard_16x16_c(int16_t const *src_diff, int src_stride, int16_t *coeff); +void vp9_hadamard_16x16_sse2(int16_t const *src_diff, int src_stride, int16_t *coeff); +RTCD_EXTERN void (*vp9_hadamard_16x16)(int16_t const *src_diff, int src_stride, int16_t *coeff); + +void vp9_hadamard_8x8_c(int16_t const *src_diff, int src_stride, int16_t *coeff); +void vp9_hadamard_8x8_sse2(int16_t const *src_diff, int src_stride, int16_t *coeff); +RTCD_EXTERN void (*vp9_hadamard_8x8)(int16_t const *src_diff, int src_stride, int16_t *coeff); + void vp9_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); void vp9_idct16x16_10_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); -void vp9_idct16x16_10_add_ssse3(const tran_low_t *input, uint8_t *dest, int dest_stride); RTCD_EXTERN void (*vp9_idct16x16_10_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); void vp9_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); @@ -332,7 +342,6 @@ RTCD_EXTERN void (*vp9_idct16x16_1_add)(const tran_low_t *input, uint8_t *dest, void vp9_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); void vp9_idct16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); -void vp9_idct16x16_256_add_ssse3(const tran_low_t *input, uint8_t *dest, int dest_stride); RTCD_EXTERN void (*vp9_idct16x16_256_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); void vp9_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); @@ -438,22 +447,9 @@ void vp9_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, co void vp9_lpf_vertical_8_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); RTCD_EXTERN void (*vp9_lpf_vertical_8_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -unsigned int vp9_mse16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -unsigned int vp9_mse16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -unsigned int vp9_mse16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_mse16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); - -unsigned int vp9_mse16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -unsigned int vp9_mse16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_mse16x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); - -unsigned int vp9_mse8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -unsigned int vp9_mse8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_mse8x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); - -unsigned int vp9_mse8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -unsigned int vp9_mse8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_mse8x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +void vp9_minmax_8x8_c(const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max); +void vp9_minmax_8x8_sse2(const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max); +RTCD_EXTERN void (*vp9_minmax_8x8)(const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max); void vp9_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); void vp9_quantize_b_sse2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); @@ -469,228 +465,9 @@ RTCD_EXTERN void (*vp9_quantize_fp)(const tran_low_t *coeff_ptr, intptr_t n_coef void vp9_quantize_fp_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); #define vp9_quantize_fp_32x32 vp9_quantize_fp_32x32_c -unsigned int vp9_sad16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -RTCD_EXTERN unsigned int (*vp9_sad16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); - -unsigned int vp9_sad16x16_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad16x16_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -RTCD_EXTERN unsigned int (*vp9_sad16x16_avg)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); - -void vp9_sad16x16x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp9_sad16x16x3_sse3(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp9_sad16x16x3_ssse3(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad16x16x3)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp9_sad16x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad16x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad16x16x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp9_sad16x16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad16x16x8 vp9_sad16x16x8_c - -unsigned int vp9_sad16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad16x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -RTCD_EXTERN unsigned int (*vp9_sad16x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); - -unsigned int vp9_sad16x32_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad16x32_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -RTCD_EXTERN unsigned int (*vp9_sad16x32_avg)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); - -void vp9_sad16x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad16x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad16x32x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); - -unsigned int vp9_sad16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -RTCD_EXTERN unsigned int (*vp9_sad16x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); - -unsigned int vp9_sad16x8_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad16x8_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -RTCD_EXTERN unsigned int (*vp9_sad16x8_avg)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); - -void vp9_sad16x8x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp9_sad16x8x3_sse3(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp9_sad16x8x3_ssse3(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad16x8x3)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp9_sad16x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad16x8x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad16x8x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp9_sad16x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad16x8x8 vp9_sad16x8x8_c - -unsigned int vp9_sad32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad32x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad32x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -RTCD_EXTERN unsigned int (*vp9_sad32x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); - -unsigned int vp9_sad32x16_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad32x16_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad32x16_avg_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -RTCD_EXTERN unsigned int (*vp9_sad32x16_avg)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); - -void vp9_sad32x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad32x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad32x16x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); - -unsigned int vp9_sad32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad32x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad32x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -RTCD_EXTERN unsigned int (*vp9_sad32x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); - -unsigned int vp9_sad32x32_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad32x32_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad32x32_avg_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -RTCD_EXTERN unsigned int (*vp9_sad32x32_avg)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); - -void vp9_sad32x32x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -#define vp9_sad32x32x3 vp9_sad32x32x3_c - -void vp9_sad32x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad32x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad32x32x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad32x32x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp9_sad32x32x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad32x32x8 vp9_sad32x32x8_c - -unsigned int vp9_sad32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad32x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad32x64_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -RTCD_EXTERN unsigned int (*vp9_sad32x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); - -unsigned int vp9_sad32x64_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad32x64_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad32x64_avg_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -RTCD_EXTERN unsigned int (*vp9_sad32x64_avg)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); - -void vp9_sad32x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad32x64x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad32x64x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); - -unsigned int vp9_sad4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad4x4_sse(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -RTCD_EXTERN unsigned int (*vp9_sad4x4)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); - -unsigned int vp9_sad4x4_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad4x4_avg_sse(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -RTCD_EXTERN unsigned int (*vp9_sad4x4_avg)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); - -void vp9_sad4x4x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp9_sad4x4x3_sse3(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad4x4x3)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp9_sad4x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad4x4x4d_sse(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad4x4x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp9_sad4x4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad4x4x8 vp9_sad4x4x8_c - -unsigned int vp9_sad4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad4x8_sse(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -RTCD_EXTERN unsigned int (*vp9_sad4x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); - -unsigned int vp9_sad4x8_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad4x8_avg_sse(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -RTCD_EXTERN unsigned int (*vp9_sad4x8_avg)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); - -void vp9_sad4x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad4x8x4d_sse(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad4x8x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp9_sad4x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad4x8x8 vp9_sad4x8x8_c - -unsigned int vp9_sad64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad64x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad64x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -RTCD_EXTERN unsigned int (*vp9_sad64x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); - -unsigned int vp9_sad64x32_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad64x32_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad64x32_avg_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -RTCD_EXTERN unsigned int (*vp9_sad64x32_avg)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); - -void vp9_sad64x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad64x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad64x32x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); - -unsigned int vp9_sad64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad64x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad64x64_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -RTCD_EXTERN unsigned int (*vp9_sad64x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); - -unsigned int vp9_sad64x64_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad64x64_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad64x64_avg_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -RTCD_EXTERN unsigned int (*vp9_sad64x64_avg)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); - -void vp9_sad64x64x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -#define vp9_sad64x64x3 vp9_sad64x64x3_c - -void vp9_sad64x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad64x64x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad64x64x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad64x64x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp9_sad64x64x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad64x64x8 vp9_sad64x64x8_c - -unsigned int vp9_sad8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -RTCD_EXTERN unsigned int (*vp9_sad8x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); - -unsigned int vp9_sad8x16_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad8x16_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -RTCD_EXTERN unsigned int (*vp9_sad8x16_avg)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); - -void vp9_sad8x16x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp9_sad8x16x3_sse3(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad8x16x3)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp9_sad8x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad8x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad8x16x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp9_sad8x16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad8x16x8 vp9_sad8x16x8_c - -unsigned int vp9_sad8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad8x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -RTCD_EXTERN unsigned int (*vp9_sad8x4)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); - -unsigned int vp9_sad8x4_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad8x4_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -RTCD_EXTERN unsigned int (*vp9_sad8x4_avg)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); - -void vp9_sad8x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad8x4x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad8x4x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp9_sad8x4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad8x4x8 vp9_sad8x4x8_c - -unsigned int vp9_sad8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -RTCD_EXTERN unsigned int (*vp9_sad8x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); - -unsigned int vp9_sad8x8_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad8x8_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -RTCD_EXTERN unsigned int (*vp9_sad8x8_avg)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); - -void vp9_sad8x8x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp9_sad8x8x3_sse3(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad8x8x3)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp9_sad8x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad8x8x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad8x8x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp9_sad8x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad8x8x8 vp9_sad8x8x8_c +int16_t vp9_satd_c(const int16_t *coeff, int length); +int16_t vp9_satd_sse2(const int16_t *coeff, int length); +RTCD_EXTERN int16_t (*vp9_satd)(const int16_t *coeff, int length); unsigned int vp9_sub_pixel_avg_variance16x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); unsigned int vp9_sub_pixel_avg_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); @@ -865,63 +642,6 @@ void vp9_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *abov void vp9_v_predictor_8x8_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); RTCD_EXTERN void (*vp9_v_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -unsigned int vp9_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_variance16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp9_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance16x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_variance16x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp9_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_variance16x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp9_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance32x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance32x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_variance32x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp9_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_variance32x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp9_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance32x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_variance32x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp9_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance4x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_variance4x4)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp9_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance4x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_variance4x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp9_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance64x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance64x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_variance64x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp9_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_variance64x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp9_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_variance8x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp9_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance8x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_variance8x4)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp9_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_variance8x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); - int vp9_vector_var_c(int16_t const *ref, int16_t const *src, const int bwl); int vp9_vector_var_sse2(int16_t const *ref, int16_t const *src, const int bwl); RTCD_EXTERN int (*vp9_vector_var)(int16_t const *ref, int16_t const *src, const int bwl); @@ -943,6 +663,8 @@ static void setup_rtcd_internal(void) vp9_block_error = vp9_block_error_c; if (flags & HAS_SSE2) vp9_block_error = vp9_block_error_sse2; if (flags & HAS_AVX2) vp9_block_error = vp9_block_error_avx2; + vp9_block_error_fp = vp9_block_error_fp_c; + if (flags & HAS_SSE2) vp9_block_error_fp = vp9_block_error_fp_sse2; vp9_convolve8 = vp9_convolve8_c; if (flags & HAS_SSE2) vp9_convolve8 = vp9_convolve8_sse2; if (flags & HAS_SSSE3) vp9_convolve8 = vp9_convolve8_ssse3; @@ -998,6 +720,22 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSSE3) vp9_d63_predictor_4x4 = vp9_d63_predictor_4x4_ssse3; vp9_d63_predictor_8x8 = vp9_d63_predictor_8x8_c; if (flags & HAS_SSSE3) vp9_d63_predictor_8x8 = vp9_d63_predictor_8x8_ssse3; + vp9_dc_128_predictor_16x16 = vp9_dc_128_predictor_16x16_c; + if (flags & HAS_SSE2) vp9_dc_128_predictor_16x16 = vp9_dc_128_predictor_16x16_sse2; + vp9_dc_128_predictor_32x32 = vp9_dc_128_predictor_32x32_c; + if (flags & HAS_SSE2) vp9_dc_128_predictor_32x32 = vp9_dc_128_predictor_32x32_sse2; + vp9_dc_128_predictor_4x4 = vp9_dc_128_predictor_4x4_c; + if (flags & HAS_SSE) vp9_dc_128_predictor_4x4 = vp9_dc_128_predictor_4x4_sse; + vp9_dc_128_predictor_8x8 = vp9_dc_128_predictor_8x8_c; + if (flags & HAS_SSE) vp9_dc_128_predictor_8x8 = vp9_dc_128_predictor_8x8_sse; + vp9_dc_left_predictor_16x16 = vp9_dc_left_predictor_16x16_c; + if (flags & HAS_SSE2) vp9_dc_left_predictor_16x16 = vp9_dc_left_predictor_16x16_sse2; + vp9_dc_left_predictor_32x32 = vp9_dc_left_predictor_32x32_c; + if (flags & HAS_SSE2) vp9_dc_left_predictor_32x32 = vp9_dc_left_predictor_32x32_sse2; + vp9_dc_left_predictor_4x4 = vp9_dc_left_predictor_4x4_c; + if (flags & HAS_SSE) vp9_dc_left_predictor_4x4 = vp9_dc_left_predictor_4x4_sse; + vp9_dc_left_predictor_8x8 = vp9_dc_left_predictor_8x8_c; + if (flags & HAS_SSE) vp9_dc_left_predictor_8x8 = vp9_dc_left_predictor_8x8_sse; vp9_dc_predictor_16x16 = vp9_dc_predictor_16x16_c; if (flags & HAS_SSE2) vp9_dc_predictor_16x16 = vp9_dc_predictor_16x16_sse2; vp9_dc_predictor_32x32 = vp9_dc_predictor_32x32_c; @@ -1006,6 +744,14 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE) vp9_dc_predictor_4x4 = vp9_dc_predictor_4x4_sse; vp9_dc_predictor_8x8 = vp9_dc_predictor_8x8_c; if (flags & HAS_SSE) vp9_dc_predictor_8x8 = vp9_dc_predictor_8x8_sse; + vp9_dc_top_predictor_16x16 = vp9_dc_top_predictor_16x16_c; + if (flags & HAS_SSE2) vp9_dc_top_predictor_16x16 = vp9_dc_top_predictor_16x16_sse2; + vp9_dc_top_predictor_32x32 = vp9_dc_top_predictor_32x32_c; + if (flags & HAS_SSE2) vp9_dc_top_predictor_32x32 = vp9_dc_top_predictor_32x32_sse2; + vp9_dc_top_predictor_4x4 = vp9_dc_top_predictor_4x4_c; + if (flags & HAS_SSE) vp9_dc_top_predictor_4x4 = vp9_dc_top_predictor_4x4_sse; + vp9_dc_top_predictor_8x8 = vp9_dc_top_predictor_8x8_c; + if (flags & HAS_SSE) vp9_dc_top_predictor_8x8 = vp9_dc_top_predictor_8x8_sse; vp9_fdct16x16 = vp9_fdct16x16_c; if (flags & HAS_SSE2) vp9_fdct16x16 = vp9_fdct16x16_sse2; vp9_fdct16x16_1 = vp9_fdct16x16_1_c; @@ -1040,13 +786,6 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE4_1) vp9_full_search_sad = vp9_full_search_sadx8; vp9_fwht4x4 = vp9_fwht4x4_c; if (flags & HAS_MMX) vp9_fwht4x4 = vp9_fwht4x4_mmx; - vp9_get16x16var = vp9_get16x16var_c; - if (flags & HAS_SSE2) vp9_get16x16var = vp9_get16x16var_sse2; - if (flags & HAS_AVX2) vp9_get16x16var = vp9_get16x16var_avx2; - vp9_get8x8var = vp9_get8x8var_c; - if (flags & HAS_SSE2) vp9_get8x8var = vp9_get8x8var_sse2; - vp9_get_mb_ss = vp9_get_mb_ss_c; - if (flags & HAS_SSE2) vp9_get_mb_ss = vp9_get_mb_ss_sse2; vp9_h_predictor_16x16 = vp9_h_predictor_16x16_c; if (flags & HAS_SSSE3) vp9_h_predictor_16x16 = vp9_h_predictor_16x16_ssse3; vp9_h_predictor_32x32 = vp9_h_predictor_32x32_c; @@ -1055,14 +794,16 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSSE3) vp9_h_predictor_4x4 = vp9_h_predictor_4x4_ssse3; vp9_h_predictor_8x8 = vp9_h_predictor_8x8_c; if (flags & HAS_SSSE3) vp9_h_predictor_8x8 = vp9_h_predictor_8x8_ssse3; + vp9_hadamard_16x16 = vp9_hadamard_16x16_c; + if (flags & HAS_SSE2) vp9_hadamard_16x16 = vp9_hadamard_16x16_sse2; + vp9_hadamard_8x8 = vp9_hadamard_8x8_c; + if (flags & HAS_SSE2) vp9_hadamard_8x8 = vp9_hadamard_8x8_sse2; vp9_idct16x16_10_add = vp9_idct16x16_10_add_c; if (flags & HAS_SSE2) vp9_idct16x16_10_add = vp9_idct16x16_10_add_sse2; - if (flags & HAS_SSSE3) vp9_idct16x16_10_add = vp9_idct16x16_10_add_ssse3; vp9_idct16x16_1_add = vp9_idct16x16_1_add_c; if (flags & HAS_SSE2) vp9_idct16x16_1_add = vp9_idct16x16_1_add_sse2; vp9_idct16x16_256_add = vp9_idct16x16_256_add_c; if (flags & HAS_SSE2) vp9_idct16x16_256_add = vp9_idct16x16_256_add_sse2; - if (flags & HAS_SSSE3) vp9_idct16x16_256_add = vp9_idct16x16_256_add_ssse3; vp9_idct32x32_1024_add = vp9_idct32x32_1024_add_c; if (flags & HAS_SSE2) vp9_idct32x32_1024_add = vp9_idct32x32_1024_add_sse2; vp9_idct32x32_1_add = vp9_idct32x32_1_add_c; @@ -1112,121 +853,14 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE2) vp9_lpf_vertical_8 = vp9_lpf_vertical_8_sse2; vp9_lpf_vertical_8_dual = vp9_lpf_vertical_8_dual_c; if (flags & HAS_SSE2) vp9_lpf_vertical_8_dual = vp9_lpf_vertical_8_dual_sse2; - vp9_mse16x16 = vp9_mse16x16_c; - if (flags & HAS_SSE2) vp9_mse16x16 = vp9_mse16x16_sse2; - if (flags & HAS_AVX2) vp9_mse16x16 = vp9_mse16x16_avx2; - vp9_mse16x8 = vp9_mse16x8_c; - if (flags & HAS_SSE2) vp9_mse16x8 = vp9_mse16x8_sse2; - vp9_mse8x16 = vp9_mse8x16_c; - if (flags & HAS_SSE2) vp9_mse8x16 = vp9_mse8x16_sse2; - vp9_mse8x8 = vp9_mse8x8_c; - if (flags & HAS_SSE2) vp9_mse8x8 = vp9_mse8x8_sse2; + vp9_minmax_8x8 = vp9_minmax_8x8_c; + if (flags & HAS_SSE2) vp9_minmax_8x8 = vp9_minmax_8x8_sse2; vp9_quantize_b = vp9_quantize_b_c; if (flags & HAS_SSE2) vp9_quantize_b = vp9_quantize_b_sse2; vp9_quantize_fp = vp9_quantize_fp_c; if (flags & HAS_SSE2) vp9_quantize_fp = vp9_quantize_fp_sse2; - vp9_sad16x16 = vp9_sad16x16_c; - if (flags & HAS_SSE2) vp9_sad16x16 = vp9_sad16x16_sse2; - vp9_sad16x16_avg = vp9_sad16x16_avg_c; - if (flags & HAS_SSE2) vp9_sad16x16_avg = vp9_sad16x16_avg_sse2; - vp9_sad16x16x3 = vp9_sad16x16x3_c; - if (flags & HAS_SSE3) vp9_sad16x16x3 = vp9_sad16x16x3_sse3; - if (flags & HAS_SSSE3) vp9_sad16x16x3 = vp9_sad16x16x3_ssse3; - vp9_sad16x16x4d = vp9_sad16x16x4d_c; - if (flags & HAS_SSE2) vp9_sad16x16x4d = vp9_sad16x16x4d_sse2; - vp9_sad16x32 = vp9_sad16x32_c; - if (flags & HAS_SSE2) vp9_sad16x32 = vp9_sad16x32_sse2; - vp9_sad16x32_avg = vp9_sad16x32_avg_c; - if (flags & HAS_SSE2) vp9_sad16x32_avg = vp9_sad16x32_avg_sse2; - vp9_sad16x32x4d = vp9_sad16x32x4d_c; - if (flags & HAS_SSE2) vp9_sad16x32x4d = vp9_sad16x32x4d_sse2; - vp9_sad16x8 = vp9_sad16x8_c; - if (flags & HAS_SSE2) vp9_sad16x8 = vp9_sad16x8_sse2; - vp9_sad16x8_avg = vp9_sad16x8_avg_c; - if (flags & HAS_SSE2) vp9_sad16x8_avg = vp9_sad16x8_avg_sse2; - vp9_sad16x8x3 = vp9_sad16x8x3_c; - if (flags & HAS_SSE3) vp9_sad16x8x3 = vp9_sad16x8x3_sse3; - if (flags & HAS_SSSE3) vp9_sad16x8x3 = vp9_sad16x8x3_ssse3; - vp9_sad16x8x4d = vp9_sad16x8x4d_c; - if (flags & HAS_SSE2) vp9_sad16x8x4d = vp9_sad16x8x4d_sse2; - vp9_sad32x16 = vp9_sad32x16_c; - if (flags & HAS_SSE2) vp9_sad32x16 = vp9_sad32x16_sse2; - if (flags & HAS_AVX2) vp9_sad32x16 = vp9_sad32x16_avx2; - vp9_sad32x16_avg = vp9_sad32x16_avg_c; - if (flags & HAS_SSE2) vp9_sad32x16_avg = vp9_sad32x16_avg_sse2; - if (flags & HAS_AVX2) vp9_sad32x16_avg = vp9_sad32x16_avg_avx2; - vp9_sad32x16x4d = vp9_sad32x16x4d_c; - if (flags & HAS_SSE2) vp9_sad32x16x4d = vp9_sad32x16x4d_sse2; - vp9_sad32x32 = vp9_sad32x32_c; - if (flags & HAS_SSE2) vp9_sad32x32 = vp9_sad32x32_sse2; - if (flags & HAS_AVX2) vp9_sad32x32 = vp9_sad32x32_avx2; - vp9_sad32x32_avg = vp9_sad32x32_avg_c; - if (flags & HAS_SSE2) vp9_sad32x32_avg = vp9_sad32x32_avg_sse2; - if (flags & HAS_AVX2) vp9_sad32x32_avg = vp9_sad32x32_avg_avx2; - vp9_sad32x32x4d = vp9_sad32x32x4d_c; - if (flags & HAS_SSE2) vp9_sad32x32x4d = vp9_sad32x32x4d_sse2; - if (flags & HAS_AVX2) vp9_sad32x32x4d = vp9_sad32x32x4d_avx2; - vp9_sad32x64 = vp9_sad32x64_c; - if (flags & HAS_SSE2) vp9_sad32x64 = vp9_sad32x64_sse2; - if (flags & HAS_AVX2) vp9_sad32x64 = vp9_sad32x64_avx2; - vp9_sad32x64_avg = vp9_sad32x64_avg_c; - if (flags & HAS_SSE2) vp9_sad32x64_avg = vp9_sad32x64_avg_sse2; - if (flags & HAS_AVX2) vp9_sad32x64_avg = vp9_sad32x64_avg_avx2; - vp9_sad32x64x4d = vp9_sad32x64x4d_c; - if (flags & HAS_SSE2) vp9_sad32x64x4d = vp9_sad32x64x4d_sse2; - vp9_sad4x4 = vp9_sad4x4_c; - if (flags & HAS_SSE) vp9_sad4x4 = vp9_sad4x4_sse; - vp9_sad4x4_avg = vp9_sad4x4_avg_c; - if (flags & HAS_SSE) vp9_sad4x4_avg = vp9_sad4x4_avg_sse; - vp9_sad4x4x3 = vp9_sad4x4x3_c; - if (flags & HAS_SSE3) vp9_sad4x4x3 = vp9_sad4x4x3_sse3; - vp9_sad4x4x4d = vp9_sad4x4x4d_c; - if (flags & HAS_SSE) vp9_sad4x4x4d = vp9_sad4x4x4d_sse; - vp9_sad4x8 = vp9_sad4x8_c; - if (flags & HAS_SSE) vp9_sad4x8 = vp9_sad4x8_sse; - vp9_sad4x8_avg = vp9_sad4x8_avg_c; - if (flags & HAS_SSE) vp9_sad4x8_avg = vp9_sad4x8_avg_sse; - vp9_sad4x8x4d = vp9_sad4x8x4d_c; - if (flags & HAS_SSE) vp9_sad4x8x4d = vp9_sad4x8x4d_sse; - vp9_sad64x32 = vp9_sad64x32_c; - if (flags & HAS_SSE2) vp9_sad64x32 = vp9_sad64x32_sse2; - if (flags & HAS_AVX2) vp9_sad64x32 = vp9_sad64x32_avx2; - vp9_sad64x32_avg = vp9_sad64x32_avg_c; - if (flags & HAS_SSE2) vp9_sad64x32_avg = vp9_sad64x32_avg_sse2; - if (flags & HAS_AVX2) vp9_sad64x32_avg = vp9_sad64x32_avg_avx2; - vp9_sad64x32x4d = vp9_sad64x32x4d_c; - if (flags & HAS_SSE2) vp9_sad64x32x4d = vp9_sad64x32x4d_sse2; - vp9_sad64x64 = vp9_sad64x64_c; - if (flags & HAS_SSE2) vp9_sad64x64 = vp9_sad64x64_sse2; - if (flags & HAS_AVX2) vp9_sad64x64 = vp9_sad64x64_avx2; - vp9_sad64x64_avg = vp9_sad64x64_avg_c; - if (flags & HAS_SSE2) vp9_sad64x64_avg = vp9_sad64x64_avg_sse2; - if (flags & HAS_AVX2) vp9_sad64x64_avg = vp9_sad64x64_avg_avx2; - vp9_sad64x64x4d = vp9_sad64x64x4d_c; - if (flags & HAS_SSE2) vp9_sad64x64x4d = vp9_sad64x64x4d_sse2; - if (flags & HAS_AVX2) vp9_sad64x64x4d = vp9_sad64x64x4d_avx2; - vp9_sad8x16 = vp9_sad8x16_c; - if (flags & HAS_SSE2) vp9_sad8x16 = vp9_sad8x16_sse2; - vp9_sad8x16_avg = vp9_sad8x16_avg_c; - if (flags & HAS_SSE2) vp9_sad8x16_avg = vp9_sad8x16_avg_sse2; - vp9_sad8x16x3 = vp9_sad8x16x3_c; - if (flags & HAS_SSE3) vp9_sad8x16x3 = vp9_sad8x16x3_sse3; - vp9_sad8x16x4d = vp9_sad8x16x4d_c; - if (flags & HAS_SSE2) vp9_sad8x16x4d = vp9_sad8x16x4d_sse2; - vp9_sad8x4 = vp9_sad8x4_c; - if (flags & HAS_SSE2) vp9_sad8x4 = vp9_sad8x4_sse2; - vp9_sad8x4_avg = vp9_sad8x4_avg_c; - if (flags & HAS_SSE2) vp9_sad8x4_avg = vp9_sad8x4_avg_sse2; - vp9_sad8x4x4d = vp9_sad8x4x4d_c; - if (flags & HAS_SSE2) vp9_sad8x4x4d = vp9_sad8x4x4d_sse2; - vp9_sad8x8 = vp9_sad8x8_c; - if (flags & HAS_SSE2) vp9_sad8x8 = vp9_sad8x8_sse2; - vp9_sad8x8_avg = vp9_sad8x8_avg_c; - if (flags & HAS_SSE2) vp9_sad8x8_avg = vp9_sad8x8_avg_sse2; - vp9_sad8x8x3 = vp9_sad8x8x3_c; - if (flags & HAS_SSE3) vp9_sad8x8x3 = vp9_sad8x8x3_sse3; - vp9_sad8x8x4d = vp9_sad8x8x4d_c; - if (flags & HAS_SSE2) vp9_sad8x8x4d = vp9_sad8x8x4d_sse2; + vp9_satd = vp9_satd_c; + if (flags & HAS_SSE2) vp9_satd = vp9_satd_sse2; vp9_sub_pixel_avg_variance16x16 = vp9_sub_pixel_avg_variance16x16_c; if (flags & HAS_SSE2) vp9_sub_pixel_avg_variance16x16 = vp9_sub_pixel_avg_variance16x16_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance16x16 = vp9_sub_pixel_avg_variance16x16_ssse3; @@ -1327,37 +961,6 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE) vp9_v_predictor_4x4 = vp9_v_predictor_4x4_sse; vp9_v_predictor_8x8 = vp9_v_predictor_8x8_c; if (flags & HAS_SSE) vp9_v_predictor_8x8 = vp9_v_predictor_8x8_sse; - vp9_variance16x16 = vp9_variance16x16_c; - if (flags & HAS_SSE2) vp9_variance16x16 = vp9_variance16x16_sse2; - if (flags & HAS_AVX2) vp9_variance16x16 = vp9_variance16x16_avx2; - vp9_variance16x32 = vp9_variance16x32_c; - if (flags & HAS_SSE2) vp9_variance16x32 = vp9_variance16x32_sse2; - vp9_variance16x8 = vp9_variance16x8_c; - if (flags & HAS_SSE2) vp9_variance16x8 = vp9_variance16x8_sse2; - vp9_variance32x16 = vp9_variance32x16_c; - if (flags & HAS_SSE2) vp9_variance32x16 = vp9_variance32x16_sse2; - if (flags & HAS_AVX2) vp9_variance32x16 = vp9_variance32x16_avx2; - vp9_variance32x32 = vp9_variance32x32_c; - if (flags & HAS_SSE2) vp9_variance32x32 = vp9_variance32x32_sse2; - if (flags & HAS_AVX2) vp9_variance32x32 = vp9_variance32x32_avx2; - vp9_variance32x64 = vp9_variance32x64_c; - if (flags & HAS_SSE2) vp9_variance32x64 = vp9_variance32x64_sse2; - vp9_variance4x4 = vp9_variance4x4_c; - if (flags & HAS_SSE2) vp9_variance4x4 = vp9_variance4x4_sse2; - vp9_variance4x8 = vp9_variance4x8_c; - if (flags & HAS_SSE2) vp9_variance4x8 = vp9_variance4x8_sse2; - vp9_variance64x32 = vp9_variance64x32_c; - if (flags & HAS_SSE2) vp9_variance64x32 = vp9_variance64x32_sse2; - if (flags & HAS_AVX2) vp9_variance64x32 = vp9_variance64x32_avx2; - vp9_variance64x64 = vp9_variance64x64_c; - if (flags & HAS_SSE2) vp9_variance64x64 = vp9_variance64x64_sse2; - if (flags & HAS_AVX2) vp9_variance64x64 = vp9_variance64x64_avx2; - vp9_variance8x16 = vp9_variance8x16_c; - if (flags & HAS_SSE2) vp9_variance8x16 = vp9_variance8x16_sse2; - vp9_variance8x4 = vp9_variance8x4_c; - if (flags & HAS_SSE2) vp9_variance8x4 = vp9_variance8x4_sse2; - vp9_variance8x8 = vp9_variance8x8_c; - if (flags & HAS_SSE2) vp9_variance8x8 = vp9_variance8x8_sse2; vp9_vector_var = vp9_vector_var_c; if (flags & HAS_SSE2) vp9_vector_var = vp9_vector_var_sse2; } diff --git a/media/libvpx/vp9_rtcd_x86-win32-vs12.h b/media/libvpx/vp9_rtcd_x86-win32-vs12.h index cf98ec96490..0725c15ba04 100644 --- a/media/libvpx/vp9_rtcd_x86-win32-vs12.h +++ b/media/libvpx/vp9_rtcd_x86-win32-vs12.h @@ -42,6 +42,10 @@ int64_t vp9_block_error_sse2(const tran_low_t *coeff, const tran_low_t *dqcoeff, int64_t vp9_block_error_avx2(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); RTCD_EXTERN int64_t (*vp9_block_error)(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); +int64_t vp9_block_error_fp_c(const int16_t *coeff, const int16_t *dqcoeff, int block_size); +int64_t vp9_block_error_fp_sse2(const int16_t *coeff, const int16_t *dqcoeff, int block_size); +RTCD_EXTERN int64_t (*vp9_block_error_fp)(const int16_t *coeff, const int16_t *dqcoeff, int block_size); + void vp9_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); @@ -171,28 +175,36 @@ void vp9_d63_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t RTCD_EXTERN void (*vp9_d63_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vp9_dc_128_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_128_predictor_16x16 vp9_dc_128_predictor_16x16_c +void vp9_dc_128_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vp9_dc_128_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vp9_dc_128_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_128_predictor_32x32 vp9_dc_128_predictor_32x32_c +void vp9_dc_128_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vp9_dc_128_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vp9_dc_128_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_128_predictor_4x4 vp9_dc_128_predictor_4x4_c +void vp9_dc_128_predictor_4x4_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vp9_dc_128_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vp9_dc_128_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_128_predictor_8x8 vp9_dc_128_predictor_8x8_c +void vp9_dc_128_predictor_8x8_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vp9_dc_128_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vp9_dc_left_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_left_predictor_16x16 vp9_dc_left_predictor_16x16_c +void vp9_dc_left_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vp9_dc_left_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vp9_dc_left_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_left_predictor_32x32 vp9_dc_left_predictor_32x32_c +void vp9_dc_left_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vp9_dc_left_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vp9_dc_left_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_left_predictor_4x4 vp9_dc_left_predictor_4x4_c +void vp9_dc_left_predictor_4x4_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vp9_dc_left_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vp9_dc_left_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_left_predictor_8x8 vp9_dc_left_predictor_8x8_c +void vp9_dc_left_predictor_8x8_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vp9_dc_left_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vp9_dc_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vp9_dc_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -211,16 +223,20 @@ void vp9_dc_predictor_8x8_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *a RTCD_EXTERN void (*vp9_dc_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vp9_dc_top_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_top_predictor_16x16 vp9_dc_top_predictor_16x16_c +void vp9_dc_top_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vp9_dc_top_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vp9_dc_top_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_top_predictor_32x32 vp9_dc_top_predictor_32x32_c +void vp9_dc_top_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vp9_dc_top_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vp9_dc_top_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_top_predictor_4x4 vp9_dc_top_predictor_4x4_c +void vp9_dc_top_predictor_4x4_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vp9_dc_top_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vp9_dc_top_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_top_predictor_8x8 vp9_dc_top_predictor_8x8_c +void vp9_dc_top_predictor_8x8_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vp9_dc_top_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); int vp9_diamond_search_sad_c(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv); #define vp9_diamond_search_sad vp9_diamond_search_sad_c @@ -292,19 +308,6 @@ void vp9_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride); void vp9_fwht4x4_mmx(const int16_t *input, tran_low_t *output, int stride); RTCD_EXTERN void (*vp9_fwht4x4)(const int16_t *input, tran_low_t *output, int stride); -void vp9_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); -void vp9_get16x16var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); -void vp9_get16x16var_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); -RTCD_EXTERN void (*vp9_get16x16var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); - -void vp9_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); -void vp9_get8x8var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); -RTCD_EXTERN void (*vp9_get8x8var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); - -unsigned int vp9_get_mb_ss_c(const int16_t *); -unsigned int vp9_get_mb_ss_sse2(const int16_t *); -RTCD_EXTERN unsigned int (*vp9_get_mb_ss)(const int16_t *); - void vp9_h_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vp9_h_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); RTCD_EXTERN void (*vp9_h_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -321,9 +324,16 @@ void vp9_h_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *abov void vp9_h_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); RTCD_EXTERN void (*vp9_h_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vp9_hadamard_16x16_c(int16_t const *src_diff, int src_stride, int16_t *coeff); +void vp9_hadamard_16x16_sse2(int16_t const *src_diff, int src_stride, int16_t *coeff); +RTCD_EXTERN void (*vp9_hadamard_16x16)(int16_t const *src_diff, int src_stride, int16_t *coeff); + +void vp9_hadamard_8x8_c(int16_t const *src_diff, int src_stride, int16_t *coeff); +void vp9_hadamard_8x8_sse2(int16_t const *src_diff, int src_stride, int16_t *coeff); +RTCD_EXTERN void (*vp9_hadamard_8x8)(int16_t const *src_diff, int src_stride, int16_t *coeff); + void vp9_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); void vp9_idct16x16_10_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); -void vp9_idct16x16_10_add_ssse3(const tran_low_t *input, uint8_t *dest, int dest_stride); RTCD_EXTERN void (*vp9_idct16x16_10_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); void vp9_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); @@ -332,7 +342,6 @@ RTCD_EXTERN void (*vp9_idct16x16_1_add)(const tran_low_t *input, uint8_t *dest, void vp9_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); void vp9_idct16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); -void vp9_idct16x16_256_add_ssse3(const tran_low_t *input, uint8_t *dest, int dest_stride); RTCD_EXTERN void (*vp9_idct16x16_256_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); void vp9_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); @@ -438,22 +447,9 @@ void vp9_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, co void vp9_lpf_vertical_8_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); RTCD_EXTERN void (*vp9_lpf_vertical_8_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -unsigned int vp9_mse16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -unsigned int vp9_mse16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -unsigned int vp9_mse16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_mse16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); - -unsigned int vp9_mse16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -unsigned int vp9_mse16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_mse16x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); - -unsigned int vp9_mse8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -unsigned int vp9_mse8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_mse8x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); - -unsigned int vp9_mse8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -unsigned int vp9_mse8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_mse8x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +void vp9_minmax_8x8_c(const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max); +void vp9_minmax_8x8_sse2(const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max); +RTCD_EXTERN void (*vp9_minmax_8x8)(const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max); void vp9_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); void vp9_quantize_b_sse2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); @@ -469,228 +465,9 @@ RTCD_EXTERN void (*vp9_quantize_fp)(const tran_low_t *coeff_ptr, intptr_t n_coef void vp9_quantize_fp_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); #define vp9_quantize_fp_32x32 vp9_quantize_fp_32x32_c -unsigned int vp9_sad16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -RTCD_EXTERN unsigned int (*vp9_sad16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); - -unsigned int vp9_sad16x16_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad16x16_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -RTCD_EXTERN unsigned int (*vp9_sad16x16_avg)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); - -void vp9_sad16x16x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp9_sad16x16x3_sse3(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp9_sad16x16x3_ssse3(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad16x16x3)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp9_sad16x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad16x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad16x16x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp9_sad16x16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad16x16x8 vp9_sad16x16x8_c - -unsigned int vp9_sad16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad16x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -RTCD_EXTERN unsigned int (*vp9_sad16x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); - -unsigned int vp9_sad16x32_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad16x32_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -RTCD_EXTERN unsigned int (*vp9_sad16x32_avg)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); - -void vp9_sad16x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad16x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad16x32x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); - -unsigned int vp9_sad16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -RTCD_EXTERN unsigned int (*vp9_sad16x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); - -unsigned int vp9_sad16x8_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad16x8_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -RTCD_EXTERN unsigned int (*vp9_sad16x8_avg)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); - -void vp9_sad16x8x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp9_sad16x8x3_sse3(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp9_sad16x8x3_ssse3(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad16x8x3)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp9_sad16x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad16x8x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad16x8x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp9_sad16x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad16x8x8 vp9_sad16x8x8_c - -unsigned int vp9_sad32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad32x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad32x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -RTCD_EXTERN unsigned int (*vp9_sad32x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); - -unsigned int vp9_sad32x16_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad32x16_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad32x16_avg_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -RTCD_EXTERN unsigned int (*vp9_sad32x16_avg)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); - -void vp9_sad32x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad32x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad32x16x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); - -unsigned int vp9_sad32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad32x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad32x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -RTCD_EXTERN unsigned int (*vp9_sad32x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); - -unsigned int vp9_sad32x32_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad32x32_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad32x32_avg_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -RTCD_EXTERN unsigned int (*vp9_sad32x32_avg)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); - -void vp9_sad32x32x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -#define vp9_sad32x32x3 vp9_sad32x32x3_c - -void vp9_sad32x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad32x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad32x32x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad32x32x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp9_sad32x32x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad32x32x8 vp9_sad32x32x8_c - -unsigned int vp9_sad32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad32x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad32x64_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -RTCD_EXTERN unsigned int (*vp9_sad32x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); - -unsigned int vp9_sad32x64_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad32x64_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad32x64_avg_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -RTCD_EXTERN unsigned int (*vp9_sad32x64_avg)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); - -void vp9_sad32x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad32x64x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad32x64x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); - -unsigned int vp9_sad4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad4x4_sse(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -RTCD_EXTERN unsigned int (*vp9_sad4x4)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); - -unsigned int vp9_sad4x4_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad4x4_avg_sse(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -RTCD_EXTERN unsigned int (*vp9_sad4x4_avg)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); - -void vp9_sad4x4x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp9_sad4x4x3_sse3(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad4x4x3)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp9_sad4x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad4x4x4d_sse(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad4x4x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp9_sad4x4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad4x4x8 vp9_sad4x4x8_c - -unsigned int vp9_sad4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad4x8_sse(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -RTCD_EXTERN unsigned int (*vp9_sad4x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); - -unsigned int vp9_sad4x8_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad4x8_avg_sse(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -RTCD_EXTERN unsigned int (*vp9_sad4x8_avg)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); - -void vp9_sad4x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad4x8x4d_sse(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad4x8x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp9_sad4x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad4x8x8 vp9_sad4x8x8_c - -unsigned int vp9_sad64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad64x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad64x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -RTCD_EXTERN unsigned int (*vp9_sad64x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); - -unsigned int vp9_sad64x32_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad64x32_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad64x32_avg_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -RTCD_EXTERN unsigned int (*vp9_sad64x32_avg)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); - -void vp9_sad64x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad64x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad64x32x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); - -unsigned int vp9_sad64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad64x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad64x64_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -RTCD_EXTERN unsigned int (*vp9_sad64x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); - -unsigned int vp9_sad64x64_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad64x64_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad64x64_avg_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -RTCD_EXTERN unsigned int (*vp9_sad64x64_avg)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); - -void vp9_sad64x64x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -#define vp9_sad64x64x3 vp9_sad64x64x3_c - -void vp9_sad64x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad64x64x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad64x64x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad64x64x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp9_sad64x64x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad64x64x8 vp9_sad64x64x8_c - -unsigned int vp9_sad8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -RTCD_EXTERN unsigned int (*vp9_sad8x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); - -unsigned int vp9_sad8x16_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad8x16_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -RTCD_EXTERN unsigned int (*vp9_sad8x16_avg)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); - -void vp9_sad8x16x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp9_sad8x16x3_sse3(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad8x16x3)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp9_sad8x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad8x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad8x16x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp9_sad8x16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad8x16x8 vp9_sad8x16x8_c - -unsigned int vp9_sad8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad8x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -RTCD_EXTERN unsigned int (*vp9_sad8x4)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); - -unsigned int vp9_sad8x4_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad8x4_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -RTCD_EXTERN unsigned int (*vp9_sad8x4_avg)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); - -void vp9_sad8x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad8x4x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad8x4x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp9_sad8x4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad8x4x8 vp9_sad8x4x8_c - -unsigned int vp9_sad8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -RTCD_EXTERN unsigned int (*vp9_sad8x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); - -unsigned int vp9_sad8x8_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad8x8_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -RTCD_EXTERN unsigned int (*vp9_sad8x8_avg)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); - -void vp9_sad8x8x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp9_sad8x8x3_sse3(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad8x8x3)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp9_sad8x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad8x8x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad8x8x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp9_sad8x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad8x8x8 vp9_sad8x8x8_c +int16_t vp9_satd_c(const int16_t *coeff, int length); +int16_t vp9_satd_sse2(const int16_t *coeff, int length); +RTCD_EXTERN int16_t (*vp9_satd)(const int16_t *coeff, int length); unsigned int vp9_sub_pixel_avg_variance16x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); unsigned int vp9_sub_pixel_avg_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); @@ -865,63 +642,6 @@ void vp9_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *abov void vp9_v_predictor_8x8_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); RTCD_EXTERN void (*vp9_v_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -unsigned int vp9_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_variance16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp9_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance16x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_variance16x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp9_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_variance16x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp9_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance32x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance32x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_variance32x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp9_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_variance32x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp9_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance32x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_variance32x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp9_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance4x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_variance4x4)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp9_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance4x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_variance4x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp9_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance64x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance64x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_variance64x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp9_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_variance64x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp9_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_variance8x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp9_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance8x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_variance8x4)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp9_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_variance8x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); - int vp9_vector_var_c(int16_t const *ref, int16_t const *src, const int bwl); int vp9_vector_var_sse2(int16_t const *ref, int16_t const *src, const int bwl); RTCD_EXTERN int (*vp9_vector_var)(int16_t const *ref, int16_t const *src, const int bwl); @@ -943,6 +663,8 @@ static void setup_rtcd_internal(void) vp9_block_error = vp9_block_error_c; if (flags & HAS_SSE2) vp9_block_error = vp9_block_error_sse2; if (flags & HAS_AVX2) vp9_block_error = vp9_block_error_avx2; + vp9_block_error_fp = vp9_block_error_fp_c; + if (flags & HAS_SSE2) vp9_block_error_fp = vp9_block_error_fp_sse2; vp9_convolve8 = vp9_convolve8_c; if (flags & HAS_SSE2) vp9_convolve8 = vp9_convolve8_sse2; if (flags & HAS_SSSE3) vp9_convolve8 = vp9_convolve8_ssse3; @@ -998,6 +720,22 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSSE3) vp9_d63_predictor_4x4 = vp9_d63_predictor_4x4_ssse3; vp9_d63_predictor_8x8 = vp9_d63_predictor_8x8_c; if (flags & HAS_SSSE3) vp9_d63_predictor_8x8 = vp9_d63_predictor_8x8_ssse3; + vp9_dc_128_predictor_16x16 = vp9_dc_128_predictor_16x16_c; + if (flags & HAS_SSE2) vp9_dc_128_predictor_16x16 = vp9_dc_128_predictor_16x16_sse2; + vp9_dc_128_predictor_32x32 = vp9_dc_128_predictor_32x32_c; + if (flags & HAS_SSE2) vp9_dc_128_predictor_32x32 = vp9_dc_128_predictor_32x32_sse2; + vp9_dc_128_predictor_4x4 = vp9_dc_128_predictor_4x4_c; + if (flags & HAS_SSE) vp9_dc_128_predictor_4x4 = vp9_dc_128_predictor_4x4_sse; + vp9_dc_128_predictor_8x8 = vp9_dc_128_predictor_8x8_c; + if (flags & HAS_SSE) vp9_dc_128_predictor_8x8 = vp9_dc_128_predictor_8x8_sse; + vp9_dc_left_predictor_16x16 = vp9_dc_left_predictor_16x16_c; + if (flags & HAS_SSE2) vp9_dc_left_predictor_16x16 = vp9_dc_left_predictor_16x16_sse2; + vp9_dc_left_predictor_32x32 = vp9_dc_left_predictor_32x32_c; + if (flags & HAS_SSE2) vp9_dc_left_predictor_32x32 = vp9_dc_left_predictor_32x32_sse2; + vp9_dc_left_predictor_4x4 = vp9_dc_left_predictor_4x4_c; + if (flags & HAS_SSE) vp9_dc_left_predictor_4x4 = vp9_dc_left_predictor_4x4_sse; + vp9_dc_left_predictor_8x8 = vp9_dc_left_predictor_8x8_c; + if (flags & HAS_SSE) vp9_dc_left_predictor_8x8 = vp9_dc_left_predictor_8x8_sse; vp9_dc_predictor_16x16 = vp9_dc_predictor_16x16_c; if (flags & HAS_SSE2) vp9_dc_predictor_16x16 = vp9_dc_predictor_16x16_sse2; vp9_dc_predictor_32x32 = vp9_dc_predictor_32x32_c; @@ -1006,6 +744,14 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE) vp9_dc_predictor_4x4 = vp9_dc_predictor_4x4_sse; vp9_dc_predictor_8x8 = vp9_dc_predictor_8x8_c; if (flags & HAS_SSE) vp9_dc_predictor_8x8 = vp9_dc_predictor_8x8_sse; + vp9_dc_top_predictor_16x16 = vp9_dc_top_predictor_16x16_c; + if (flags & HAS_SSE2) vp9_dc_top_predictor_16x16 = vp9_dc_top_predictor_16x16_sse2; + vp9_dc_top_predictor_32x32 = vp9_dc_top_predictor_32x32_c; + if (flags & HAS_SSE2) vp9_dc_top_predictor_32x32 = vp9_dc_top_predictor_32x32_sse2; + vp9_dc_top_predictor_4x4 = vp9_dc_top_predictor_4x4_c; + if (flags & HAS_SSE) vp9_dc_top_predictor_4x4 = vp9_dc_top_predictor_4x4_sse; + vp9_dc_top_predictor_8x8 = vp9_dc_top_predictor_8x8_c; + if (flags & HAS_SSE) vp9_dc_top_predictor_8x8 = vp9_dc_top_predictor_8x8_sse; vp9_fdct16x16 = vp9_fdct16x16_c; if (flags & HAS_SSE2) vp9_fdct16x16 = vp9_fdct16x16_sse2; vp9_fdct16x16_1 = vp9_fdct16x16_1_c; @@ -1040,13 +786,6 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE4_1) vp9_full_search_sad = vp9_full_search_sadx8; vp9_fwht4x4 = vp9_fwht4x4_c; if (flags & HAS_MMX) vp9_fwht4x4 = vp9_fwht4x4_mmx; - vp9_get16x16var = vp9_get16x16var_c; - if (flags & HAS_SSE2) vp9_get16x16var = vp9_get16x16var_sse2; - if (flags & HAS_AVX2) vp9_get16x16var = vp9_get16x16var_avx2; - vp9_get8x8var = vp9_get8x8var_c; - if (flags & HAS_SSE2) vp9_get8x8var = vp9_get8x8var_sse2; - vp9_get_mb_ss = vp9_get_mb_ss_c; - if (flags & HAS_SSE2) vp9_get_mb_ss = vp9_get_mb_ss_sse2; vp9_h_predictor_16x16 = vp9_h_predictor_16x16_c; if (flags & HAS_SSSE3) vp9_h_predictor_16x16 = vp9_h_predictor_16x16_ssse3; vp9_h_predictor_32x32 = vp9_h_predictor_32x32_c; @@ -1055,14 +794,16 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSSE3) vp9_h_predictor_4x4 = vp9_h_predictor_4x4_ssse3; vp9_h_predictor_8x8 = vp9_h_predictor_8x8_c; if (flags & HAS_SSSE3) vp9_h_predictor_8x8 = vp9_h_predictor_8x8_ssse3; + vp9_hadamard_16x16 = vp9_hadamard_16x16_c; + if (flags & HAS_SSE2) vp9_hadamard_16x16 = vp9_hadamard_16x16_sse2; + vp9_hadamard_8x8 = vp9_hadamard_8x8_c; + if (flags & HAS_SSE2) vp9_hadamard_8x8 = vp9_hadamard_8x8_sse2; vp9_idct16x16_10_add = vp9_idct16x16_10_add_c; if (flags & HAS_SSE2) vp9_idct16x16_10_add = vp9_idct16x16_10_add_sse2; - if (flags & HAS_SSSE3) vp9_idct16x16_10_add = vp9_idct16x16_10_add_ssse3; vp9_idct16x16_1_add = vp9_idct16x16_1_add_c; if (flags & HAS_SSE2) vp9_idct16x16_1_add = vp9_idct16x16_1_add_sse2; vp9_idct16x16_256_add = vp9_idct16x16_256_add_c; if (flags & HAS_SSE2) vp9_idct16x16_256_add = vp9_idct16x16_256_add_sse2; - if (flags & HAS_SSSE3) vp9_idct16x16_256_add = vp9_idct16x16_256_add_ssse3; vp9_idct32x32_1024_add = vp9_idct32x32_1024_add_c; if (flags & HAS_SSE2) vp9_idct32x32_1024_add = vp9_idct32x32_1024_add_sse2; vp9_idct32x32_1_add = vp9_idct32x32_1_add_c; @@ -1112,121 +853,14 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE2) vp9_lpf_vertical_8 = vp9_lpf_vertical_8_sse2; vp9_lpf_vertical_8_dual = vp9_lpf_vertical_8_dual_c; if (flags & HAS_SSE2) vp9_lpf_vertical_8_dual = vp9_lpf_vertical_8_dual_sse2; - vp9_mse16x16 = vp9_mse16x16_c; - if (flags & HAS_SSE2) vp9_mse16x16 = vp9_mse16x16_sse2; - if (flags & HAS_AVX2) vp9_mse16x16 = vp9_mse16x16_avx2; - vp9_mse16x8 = vp9_mse16x8_c; - if (flags & HAS_SSE2) vp9_mse16x8 = vp9_mse16x8_sse2; - vp9_mse8x16 = vp9_mse8x16_c; - if (flags & HAS_SSE2) vp9_mse8x16 = vp9_mse8x16_sse2; - vp9_mse8x8 = vp9_mse8x8_c; - if (flags & HAS_SSE2) vp9_mse8x8 = vp9_mse8x8_sse2; + vp9_minmax_8x8 = vp9_minmax_8x8_c; + if (flags & HAS_SSE2) vp9_minmax_8x8 = vp9_minmax_8x8_sse2; vp9_quantize_b = vp9_quantize_b_c; if (flags & HAS_SSE2) vp9_quantize_b = vp9_quantize_b_sse2; vp9_quantize_fp = vp9_quantize_fp_c; if (flags & HAS_SSE2) vp9_quantize_fp = vp9_quantize_fp_sse2; - vp9_sad16x16 = vp9_sad16x16_c; - if (flags & HAS_SSE2) vp9_sad16x16 = vp9_sad16x16_sse2; - vp9_sad16x16_avg = vp9_sad16x16_avg_c; - if (flags & HAS_SSE2) vp9_sad16x16_avg = vp9_sad16x16_avg_sse2; - vp9_sad16x16x3 = vp9_sad16x16x3_c; - if (flags & HAS_SSE3) vp9_sad16x16x3 = vp9_sad16x16x3_sse3; - if (flags & HAS_SSSE3) vp9_sad16x16x3 = vp9_sad16x16x3_ssse3; - vp9_sad16x16x4d = vp9_sad16x16x4d_c; - if (flags & HAS_SSE2) vp9_sad16x16x4d = vp9_sad16x16x4d_sse2; - vp9_sad16x32 = vp9_sad16x32_c; - if (flags & HAS_SSE2) vp9_sad16x32 = vp9_sad16x32_sse2; - vp9_sad16x32_avg = vp9_sad16x32_avg_c; - if (flags & HAS_SSE2) vp9_sad16x32_avg = vp9_sad16x32_avg_sse2; - vp9_sad16x32x4d = vp9_sad16x32x4d_c; - if (flags & HAS_SSE2) vp9_sad16x32x4d = vp9_sad16x32x4d_sse2; - vp9_sad16x8 = vp9_sad16x8_c; - if (flags & HAS_SSE2) vp9_sad16x8 = vp9_sad16x8_sse2; - vp9_sad16x8_avg = vp9_sad16x8_avg_c; - if (flags & HAS_SSE2) vp9_sad16x8_avg = vp9_sad16x8_avg_sse2; - vp9_sad16x8x3 = vp9_sad16x8x3_c; - if (flags & HAS_SSE3) vp9_sad16x8x3 = vp9_sad16x8x3_sse3; - if (flags & HAS_SSSE3) vp9_sad16x8x3 = vp9_sad16x8x3_ssse3; - vp9_sad16x8x4d = vp9_sad16x8x4d_c; - if (flags & HAS_SSE2) vp9_sad16x8x4d = vp9_sad16x8x4d_sse2; - vp9_sad32x16 = vp9_sad32x16_c; - if (flags & HAS_SSE2) vp9_sad32x16 = vp9_sad32x16_sse2; - if (flags & HAS_AVX2) vp9_sad32x16 = vp9_sad32x16_avx2; - vp9_sad32x16_avg = vp9_sad32x16_avg_c; - if (flags & HAS_SSE2) vp9_sad32x16_avg = vp9_sad32x16_avg_sse2; - if (flags & HAS_AVX2) vp9_sad32x16_avg = vp9_sad32x16_avg_avx2; - vp9_sad32x16x4d = vp9_sad32x16x4d_c; - if (flags & HAS_SSE2) vp9_sad32x16x4d = vp9_sad32x16x4d_sse2; - vp9_sad32x32 = vp9_sad32x32_c; - if (flags & HAS_SSE2) vp9_sad32x32 = vp9_sad32x32_sse2; - if (flags & HAS_AVX2) vp9_sad32x32 = vp9_sad32x32_avx2; - vp9_sad32x32_avg = vp9_sad32x32_avg_c; - if (flags & HAS_SSE2) vp9_sad32x32_avg = vp9_sad32x32_avg_sse2; - if (flags & HAS_AVX2) vp9_sad32x32_avg = vp9_sad32x32_avg_avx2; - vp9_sad32x32x4d = vp9_sad32x32x4d_c; - if (flags & HAS_SSE2) vp9_sad32x32x4d = vp9_sad32x32x4d_sse2; - if (flags & HAS_AVX2) vp9_sad32x32x4d = vp9_sad32x32x4d_avx2; - vp9_sad32x64 = vp9_sad32x64_c; - if (flags & HAS_SSE2) vp9_sad32x64 = vp9_sad32x64_sse2; - if (flags & HAS_AVX2) vp9_sad32x64 = vp9_sad32x64_avx2; - vp9_sad32x64_avg = vp9_sad32x64_avg_c; - if (flags & HAS_SSE2) vp9_sad32x64_avg = vp9_sad32x64_avg_sse2; - if (flags & HAS_AVX2) vp9_sad32x64_avg = vp9_sad32x64_avg_avx2; - vp9_sad32x64x4d = vp9_sad32x64x4d_c; - if (flags & HAS_SSE2) vp9_sad32x64x4d = vp9_sad32x64x4d_sse2; - vp9_sad4x4 = vp9_sad4x4_c; - if (flags & HAS_SSE) vp9_sad4x4 = vp9_sad4x4_sse; - vp9_sad4x4_avg = vp9_sad4x4_avg_c; - if (flags & HAS_SSE) vp9_sad4x4_avg = vp9_sad4x4_avg_sse; - vp9_sad4x4x3 = vp9_sad4x4x3_c; - if (flags & HAS_SSE3) vp9_sad4x4x3 = vp9_sad4x4x3_sse3; - vp9_sad4x4x4d = vp9_sad4x4x4d_c; - if (flags & HAS_SSE) vp9_sad4x4x4d = vp9_sad4x4x4d_sse; - vp9_sad4x8 = vp9_sad4x8_c; - if (flags & HAS_SSE) vp9_sad4x8 = vp9_sad4x8_sse; - vp9_sad4x8_avg = vp9_sad4x8_avg_c; - if (flags & HAS_SSE) vp9_sad4x8_avg = vp9_sad4x8_avg_sse; - vp9_sad4x8x4d = vp9_sad4x8x4d_c; - if (flags & HAS_SSE) vp9_sad4x8x4d = vp9_sad4x8x4d_sse; - vp9_sad64x32 = vp9_sad64x32_c; - if (flags & HAS_SSE2) vp9_sad64x32 = vp9_sad64x32_sse2; - if (flags & HAS_AVX2) vp9_sad64x32 = vp9_sad64x32_avx2; - vp9_sad64x32_avg = vp9_sad64x32_avg_c; - if (flags & HAS_SSE2) vp9_sad64x32_avg = vp9_sad64x32_avg_sse2; - if (flags & HAS_AVX2) vp9_sad64x32_avg = vp9_sad64x32_avg_avx2; - vp9_sad64x32x4d = vp9_sad64x32x4d_c; - if (flags & HAS_SSE2) vp9_sad64x32x4d = vp9_sad64x32x4d_sse2; - vp9_sad64x64 = vp9_sad64x64_c; - if (flags & HAS_SSE2) vp9_sad64x64 = vp9_sad64x64_sse2; - if (flags & HAS_AVX2) vp9_sad64x64 = vp9_sad64x64_avx2; - vp9_sad64x64_avg = vp9_sad64x64_avg_c; - if (flags & HAS_SSE2) vp9_sad64x64_avg = vp9_sad64x64_avg_sse2; - if (flags & HAS_AVX2) vp9_sad64x64_avg = vp9_sad64x64_avg_avx2; - vp9_sad64x64x4d = vp9_sad64x64x4d_c; - if (flags & HAS_SSE2) vp9_sad64x64x4d = vp9_sad64x64x4d_sse2; - if (flags & HAS_AVX2) vp9_sad64x64x4d = vp9_sad64x64x4d_avx2; - vp9_sad8x16 = vp9_sad8x16_c; - if (flags & HAS_SSE2) vp9_sad8x16 = vp9_sad8x16_sse2; - vp9_sad8x16_avg = vp9_sad8x16_avg_c; - if (flags & HAS_SSE2) vp9_sad8x16_avg = vp9_sad8x16_avg_sse2; - vp9_sad8x16x3 = vp9_sad8x16x3_c; - if (flags & HAS_SSE3) vp9_sad8x16x3 = vp9_sad8x16x3_sse3; - vp9_sad8x16x4d = vp9_sad8x16x4d_c; - if (flags & HAS_SSE2) vp9_sad8x16x4d = vp9_sad8x16x4d_sse2; - vp9_sad8x4 = vp9_sad8x4_c; - if (flags & HAS_SSE2) vp9_sad8x4 = vp9_sad8x4_sse2; - vp9_sad8x4_avg = vp9_sad8x4_avg_c; - if (flags & HAS_SSE2) vp9_sad8x4_avg = vp9_sad8x4_avg_sse2; - vp9_sad8x4x4d = vp9_sad8x4x4d_c; - if (flags & HAS_SSE2) vp9_sad8x4x4d = vp9_sad8x4x4d_sse2; - vp9_sad8x8 = vp9_sad8x8_c; - if (flags & HAS_SSE2) vp9_sad8x8 = vp9_sad8x8_sse2; - vp9_sad8x8_avg = vp9_sad8x8_avg_c; - if (flags & HAS_SSE2) vp9_sad8x8_avg = vp9_sad8x8_avg_sse2; - vp9_sad8x8x3 = vp9_sad8x8x3_c; - if (flags & HAS_SSE3) vp9_sad8x8x3 = vp9_sad8x8x3_sse3; - vp9_sad8x8x4d = vp9_sad8x8x4d_c; - if (flags & HAS_SSE2) vp9_sad8x8x4d = vp9_sad8x8x4d_sse2; + vp9_satd = vp9_satd_c; + if (flags & HAS_SSE2) vp9_satd = vp9_satd_sse2; vp9_sub_pixel_avg_variance16x16 = vp9_sub_pixel_avg_variance16x16_c; if (flags & HAS_SSE2) vp9_sub_pixel_avg_variance16x16 = vp9_sub_pixel_avg_variance16x16_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance16x16 = vp9_sub_pixel_avg_variance16x16_ssse3; @@ -1327,37 +961,6 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE) vp9_v_predictor_4x4 = vp9_v_predictor_4x4_sse; vp9_v_predictor_8x8 = vp9_v_predictor_8x8_c; if (flags & HAS_SSE) vp9_v_predictor_8x8 = vp9_v_predictor_8x8_sse; - vp9_variance16x16 = vp9_variance16x16_c; - if (flags & HAS_SSE2) vp9_variance16x16 = vp9_variance16x16_sse2; - if (flags & HAS_AVX2) vp9_variance16x16 = vp9_variance16x16_avx2; - vp9_variance16x32 = vp9_variance16x32_c; - if (flags & HAS_SSE2) vp9_variance16x32 = vp9_variance16x32_sse2; - vp9_variance16x8 = vp9_variance16x8_c; - if (flags & HAS_SSE2) vp9_variance16x8 = vp9_variance16x8_sse2; - vp9_variance32x16 = vp9_variance32x16_c; - if (flags & HAS_SSE2) vp9_variance32x16 = vp9_variance32x16_sse2; - if (flags & HAS_AVX2) vp9_variance32x16 = vp9_variance32x16_avx2; - vp9_variance32x32 = vp9_variance32x32_c; - if (flags & HAS_SSE2) vp9_variance32x32 = vp9_variance32x32_sse2; - if (flags & HAS_AVX2) vp9_variance32x32 = vp9_variance32x32_avx2; - vp9_variance32x64 = vp9_variance32x64_c; - if (flags & HAS_SSE2) vp9_variance32x64 = vp9_variance32x64_sse2; - vp9_variance4x4 = vp9_variance4x4_c; - if (flags & HAS_SSE2) vp9_variance4x4 = vp9_variance4x4_sse2; - vp9_variance4x8 = vp9_variance4x8_c; - if (flags & HAS_SSE2) vp9_variance4x8 = vp9_variance4x8_sse2; - vp9_variance64x32 = vp9_variance64x32_c; - if (flags & HAS_SSE2) vp9_variance64x32 = vp9_variance64x32_sse2; - if (flags & HAS_AVX2) vp9_variance64x32 = vp9_variance64x32_avx2; - vp9_variance64x64 = vp9_variance64x64_c; - if (flags & HAS_SSE2) vp9_variance64x64 = vp9_variance64x64_sse2; - if (flags & HAS_AVX2) vp9_variance64x64 = vp9_variance64x64_avx2; - vp9_variance8x16 = vp9_variance8x16_c; - if (flags & HAS_SSE2) vp9_variance8x16 = vp9_variance8x16_sse2; - vp9_variance8x4 = vp9_variance8x4_c; - if (flags & HAS_SSE2) vp9_variance8x4 = vp9_variance8x4_sse2; - vp9_variance8x8 = vp9_variance8x8_c; - if (flags & HAS_SSE2) vp9_variance8x8 = vp9_variance8x8_sse2; vp9_vector_var = vp9_vector_var_c; if (flags & HAS_SSE2) vp9_vector_var = vp9_vector_var_sse2; } diff --git a/media/libvpx/vp9_rtcd_x86_64-darwin9-gcc.h b/media/libvpx/vp9_rtcd_x86_64-darwin9-gcc.h index 82b03b8acfb..8d75771cf86 100644 --- a/media/libvpx/vp9_rtcd_x86_64-darwin9-gcc.h +++ b/media/libvpx/vp9_rtcd_x86_64-darwin9-gcc.h @@ -42,6 +42,10 @@ int64_t vp9_block_error_sse2(const tran_low_t *coeff, const tran_low_t *dqcoeff, int64_t vp9_block_error_avx2(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); RTCD_EXTERN int64_t (*vp9_block_error)(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); +int64_t vp9_block_error_fp_c(const int16_t *coeff, const int16_t *dqcoeff, int block_size); +int64_t vp9_block_error_fp_sse2(const int16_t *coeff, const int16_t *dqcoeff, int block_size); +#define vp9_block_error_fp vp9_block_error_fp_sse2 + void vp9_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); @@ -171,28 +175,36 @@ void vp9_d63_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t RTCD_EXTERN void (*vp9_d63_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vp9_dc_128_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_128_predictor_16x16 vp9_dc_128_predictor_16x16_c +void vp9_dc_128_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_dc_128_predictor_16x16 vp9_dc_128_predictor_16x16_sse2 void vp9_dc_128_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_128_predictor_32x32 vp9_dc_128_predictor_32x32_c +void vp9_dc_128_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_dc_128_predictor_32x32 vp9_dc_128_predictor_32x32_sse2 void vp9_dc_128_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_128_predictor_4x4 vp9_dc_128_predictor_4x4_c +void vp9_dc_128_predictor_4x4_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_dc_128_predictor_4x4 vp9_dc_128_predictor_4x4_sse void vp9_dc_128_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_128_predictor_8x8 vp9_dc_128_predictor_8x8_c +void vp9_dc_128_predictor_8x8_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_dc_128_predictor_8x8 vp9_dc_128_predictor_8x8_sse void vp9_dc_left_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_left_predictor_16x16 vp9_dc_left_predictor_16x16_c +void vp9_dc_left_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_dc_left_predictor_16x16 vp9_dc_left_predictor_16x16_sse2 void vp9_dc_left_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_left_predictor_32x32 vp9_dc_left_predictor_32x32_c +void vp9_dc_left_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_dc_left_predictor_32x32 vp9_dc_left_predictor_32x32_sse2 void vp9_dc_left_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_left_predictor_4x4 vp9_dc_left_predictor_4x4_c +void vp9_dc_left_predictor_4x4_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_dc_left_predictor_4x4 vp9_dc_left_predictor_4x4_sse void vp9_dc_left_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_left_predictor_8x8 vp9_dc_left_predictor_8x8_c +void vp9_dc_left_predictor_8x8_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_dc_left_predictor_8x8 vp9_dc_left_predictor_8x8_sse void vp9_dc_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vp9_dc_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -211,16 +223,20 @@ void vp9_dc_predictor_8x8_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *a #define vp9_dc_predictor_8x8 vp9_dc_predictor_8x8_sse void vp9_dc_top_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_top_predictor_16x16 vp9_dc_top_predictor_16x16_c +void vp9_dc_top_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_dc_top_predictor_16x16 vp9_dc_top_predictor_16x16_sse2 void vp9_dc_top_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_top_predictor_32x32 vp9_dc_top_predictor_32x32_c +void vp9_dc_top_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_dc_top_predictor_32x32 vp9_dc_top_predictor_32x32_sse2 void vp9_dc_top_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_top_predictor_4x4 vp9_dc_top_predictor_4x4_c +void vp9_dc_top_predictor_4x4_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_dc_top_predictor_4x4 vp9_dc_top_predictor_4x4_sse void vp9_dc_top_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_top_predictor_8x8 vp9_dc_top_predictor_8x8_c +void vp9_dc_top_predictor_8x8_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_dc_top_predictor_8x8 vp9_dc_top_predictor_8x8_sse int vp9_diamond_search_sad_c(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv); #define vp9_diamond_search_sad vp9_diamond_search_sad_c @@ -293,19 +309,6 @@ void vp9_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride); void vp9_fwht4x4_mmx(const int16_t *input, tran_low_t *output, int stride); #define vp9_fwht4x4 vp9_fwht4x4_mmx -void vp9_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); -void vp9_get16x16var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); -void vp9_get16x16var_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); -RTCD_EXTERN void (*vp9_get16x16var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); - -void vp9_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); -void vp9_get8x8var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); -#define vp9_get8x8var vp9_get8x8var_sse2 - -unsigned int vp9_get_mb_ss_c(const int16_t *); -unsigned int vp9_get_mb_ss_sse2(const int16_t *); -#define vp9_get_mb_ss vp9_get_mb_ss_sse2 - void vp9_h_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vp9_h_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); RTCD_EXTERN void (*vp9_h_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -322,10 +325,18 @@ void vp9_h_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *abov void vp9_h_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); RTCD_EXTERN void (*vp9_h_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vp9_hadamard_16x16_c(int16_t const *src_diff, int src_stride, int16_t *coeff); +void vp9_hadamard_16x16_sse2(int16_t const *src_diff, int src_stride, int16_t *coeff); +#define vp9_hadamard_16x16 vp9_hadamard_16x16_sse2 + +void vp9_hadamard_8x8_c(int16_t const *src_diff, int src_stride, int16_t *coeff); +void vp9_hadamard_8x8_sse2(int16_t const *src_diff, int src_stride, int16_t *coeff); +void vp9_hadamard_8x8_ssse3(int16_t const *src_diff, int src_stride, int16_t *coeff); +RTCD_EXTERN void (*vp9_hadamard_8x8)(int16_t const *src_diff, int src_stride, int16_t *coeff); + void vp9_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); void vp9_idct16x16_10_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); -void vp9_idct16x16_10_add_ssse3(const tran_low_t *input, uint8_t *dest, int dest_stride); -RTCD_EXTERN void (*vp9_idct16x16_10_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); +#define vp9_idct16x16_10_add vp9_idct16x16_10_add_sse2 void vp9_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); void vp9_idct16x16_1_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); @@ -333,8 +344,7 @@ void vp9_idct16x16_1_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_s void vp9_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); void vp9_idct16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); -void vp9_idct16x16_256_add_ssse3(const tran_low_t *input, uint8_t *dest, int dest_stride); -RTCD_EXTERN void (*vp9_idct16x16_256_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); +#define vp9_idct16x16_256_add vp9_idct16x16_256_add_sse2 void vp9_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); void vp9_idct32x32_1024_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); @@ -441,22 +451,9 @@ void vp9_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, co void vp9_lpf_vertical_8_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); #define vp9_lpf_vertical_8_dual vp9_lpf_vertical_8_dual_sse2 -unsigned int vp9_mse16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -unsigned int vp9_mse16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -unsigned int vp9_mse16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_mse16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); - -unsigned int vp9_mse16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -unsigned int vp9_mse16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -#define vp9_mse16x8 vp9_mse16x8_sse2 - -unsigned int vp9_mse8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -unsigned int vp9_mse8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -#define vp9_mse8x16 vp9_mse8x16_sse2 - -unsigned int vp9_mse8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -unsigned int vp9_mse8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -#define vp9_mse8x8 vp9_mse8x8_sse2 +void vp9_minmax_8x8_c(const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max); +void vp9_minmax_8x8_sse2(const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max); +#define vp9_minmax_8x8 vp9_minmax_8x8_sse2 void vp9_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); void vp9_quantize_b_sse2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); @@ -476,228 +473,9 @@ void vp9_quantize_fp_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int void vp9_quantize_fp_32x32_ssse3(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); RTCD_EXTERN void (*vp9_quantize_fp_32x32)(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); -unsigned int vp9_sad16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad16x16 vp9_sad16x16_sse2 - -unsigned int vp9_sad16x16_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad16x16_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad16x16_avg vp9_sad16x16_avg_sse2 - -void vp9_sad16x16x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp9_sad16x16x3_sse3(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp9_sad16x16x3_ssse3(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad16x16x3)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp9_sad16x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad16x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp9_sad16x16x4d vp9_sad16x16x4d_sse2 - -void vp9_sad16x16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad16x16x8 vp9_sad16x16x8_c - -unsigned int vp9_sad16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad16x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad16x32 vp9_sad16x32_sse2 - -unsigned int vp9_sad16x32_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad16x32_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad16x32_avg vp9_sad16x32_avg_sse2 - -void vp9_sad16x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad16x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp9_sad16x32x4d vp9_sad16x32x4d_sse2 - -unsigned int vp9_sad16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad16x8 vp9_sad16x8_sse2 - -unsigned int vp9_sad16x8_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad16x8_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad16x8_avg vp9_sad16x8_avg_sse2 - -void vp9_sad16x8x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp9_sad16x8x3_sse3(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp9_sad16x8x3_ssse3(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad16x8x3)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp9_sad16x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad16x8x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp9_sad16x8x4d vp9_sad16x8x4d_sse2 - -void vp9_sad16x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad16x8x8 vp9_sad16x8x8_c - -unsigned int vp9_sad32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad32x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad32x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -RTCD_EXTERN unsigned int (*vp9_sad32x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); - -unsigned int vp9_sad32x16_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad32x16_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad32x16_avg_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -RTCD_EXTERN unsigned int (*vp9_sad32x16_avg)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); - -void vp9_sad32x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad32x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp9_sad32x16x4d vp9_sad32x16x4d_sse2 - -unsigned int vp9_sad32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad32x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad32x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -RTCD_EXTERN unsigned int (*vp9_sad32x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); - -unsigned int vp9_sad32x32_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad32x32_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad32x32_avg_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -RTCD_EXTERN unsigned int (*vp9_sad32x32_avg)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); - -void vp9_sad32x32x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -#define vp9_sad32x32x3 vp9_sad32x32x3_c - -void vp9_sad32x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad32x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad32x32x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad32x32x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp9_sad32x32x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad32x32x8 vp9_sad32x32x8_c - -unsigned int vp9_sad32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad32x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad32x64_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -RTCD_EXTERN unsigned int (*vp9_sad32x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); - -unsigned int vp9_sad32x64_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad32x64_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad32x64_avg_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -RTCD_EXTERN unsigned int (*vp9_sad32x64_avg)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); - -void vp9_sad32x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad32x64x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp9_sad32x64x4d vp9_sad32x64x4d_sse2 - -unsigned int vp9_sad4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad4x4_sse(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad4x4 vp9_sad4x4_sse - -unsigned int vp9_sad4x4_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad4x4_avg_sse(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad4x4_avg vp9_sad4x4_avg_sse - -void vp9_sad4x4x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp9_sad4x4x3_sse3(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad4x4x3)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp9_sad4x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad4x4x4d_sse(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp9_sad4x4x4d vp9_sad4x4x4d_sse - -void vp9_sad4x4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad4x4x8 vp9_sad4x4x8_c - -unsigned int vp9_sad4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad4x8_sse(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad4x8 vp9_sad4x8_sse - -unsigned int vp9_sad4x8_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad4x8_avg_sse(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad4x8_avg vp9_sad4x8_avg_sse - -void vp9_sad4x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad4x8x4d_sse(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp9_sad4x8x4d vp9_sad4x8x4d_sse - -void vp9_sad4x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad4x8x8 vp9_sad4x8x8_c - -unsigned int vp9_sad64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad64x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad64x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -RTCD_EXTERN unsigned int (*vp9_sad64x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); - -unsigned int vp9_sad64x32_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad64x32_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad64x32_avg_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -RTCD_EXTERN unsigned int (*vp9_sad64x32_avg)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); - -void vp9_sad64x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad64x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp9_sad64x32x4d vp9_sad64x32x4d_sse2 - -unsigned int vp9_sad64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad64x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad64x64_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -RTCD_EXTERN unsigned int (*vp9_sad64x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); - -unsigned int vp9_sad64x64_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad64x64_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad64x64_avg_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -RTCD_EXTERN unsigned int (*vp9_sad64x64_avg)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); - -void vp9_sad64x64x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -#define vp9_sad64x64x3 vp9_sad64x64x3_c - -void vp9_sad64x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad64x64x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad64x64x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad64x64x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp9_sad64x64x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad64x64x8 vp9_sad64x64x8_c - -unsigned int vp9_sad8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad8x16 vp9_sad8x16_sse2 - -unsigned int vp9_sad8x16_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad8x16_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad8x16_avg vp9_sad8x16_avg_sse2 - -void vp9_sad8x16x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp9_sad8x16x3_sse3(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad8x16x3)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp9_sad8x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad8x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp9_sad8x16x4d vp9_sad8x16x4d_sse2 - -void vp9_sad8x16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad8x16x8 vp9_sad8x16x8_c - -unsigned int vp9_sad8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad8x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad8x4 vp9_sad8x4_sse2 - -unsigned int vp9_sad8x4_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad8x4_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad8x4_avg vp9_sad8x4_avg_sse2 - -void vp9_sad8x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad8x4x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp9_sad8x4x4d vp9_sad8x4x4d_sse2 - -void vp9_sad8x4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad8x4x8 vp9_sad8x4x8_c - -unsigned int vp9_sad8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad8x8 vp9_sad8x8_sse2 - -unsigned int vp9_sad8x8_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad8x8_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad8x8_avg vp9_sad8x8_avg_sse2 - -void vp9_sad8x8x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp9_sad8x8x3_sse3(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad8x8x3)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp9_sad8x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad8x8x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp9_sad8x8x4d vp9_sad8x8x4d_sse2 - -void vp9_sad8x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad8x8x8 vp9_sad8x8x8_c +int16_t vp9_satd_c(const int16_t *coeff, int length); +int16_t vp9_satd_sse2(const int16_t *coeff, int length); +#define vp9_satd vp9_satd_sse2 unsigned int vp9_sub_pixel_avg_variance16x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); unsigned int vp9_sub_pixel_avg_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); @@ -873,63 +651,6 @@ void vp9_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *abov void vp9_v_predictor_8x8_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vp9_v_predictor_8x8 vp9_v_predictor_8x8_sse -unsigned int vp9_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_variance16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp9_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance16x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_variance16x32 vp9_variance16x32_sse2 - -unsigned int vp9_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_variance16x8 vp9_variance16x8_sse2 - -unsigned int vp9_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance32x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance32x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_variance32x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp9_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_variance32x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp9_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance32x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_variance32x64 vp9_variance32x64_sse2 - -unsigned int vp9_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance4x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_variance4x4 vp9_variance4x4_sse2 - -unsigned int vp9_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance4x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_variance4x8 vp9_variance4x8_sse2 - -unsigned int vp9_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance64x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance64x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_variance64x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp9_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_variance64x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp9_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_variance8x16 vp9_variance8x16_sse2 - -unsigned int vp9_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance8x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_variance8x4 vp9_variance8x4_sse2 - -unsigned int vp9_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_variance8x8 vp9_variance8x8_sse2 - int vp9_vector_var_c(int16_t const *ref, int16_t const *src, const int bwl); int vp9_vector_var_sse2(int16_t const *ref, int16_t const *src, const int bwl); #define vp9_vector_var vp9_vector_var_sse2 @@ -1002,8 +723,6 @@ static void setup_rtcd_internal(void) vp9_full_search_sad = vp9_full_search_sad_c; if (flags & HAS_SSE3) vp9_full_search_sad = vp9_full_search_sadx3; if (flags & HAS_SSE4_1) vp9_full_search_sad = vp9_full_search_sadx8; - vp9_get16x16var = vp9_get16x16var_sse2; - if (flags & HAS_AVX2) vp9_get16x16var = vp9_get16x16var_avx2; vp9_h_predictor_16x16 = vp9_h_predictor_16x16_c; if (flags & HAS_SSSE3) vp9_h_predictor_16x16 = vp9_h_predictor_16x16_ssse3; vp9_h_predictor_32x32 = vp9_h_predictor_32x32_c; @@ -1012,18 +731,14 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSSE3) vp9_h_predictor_4x4 = vp9_h_predictor_4x4_ssse3; vp9_h_predictor_8x8 = vp9_h_predictor_8x8_c; if (flags & HAS_SSSE3) vp9_h_predictor_8x8 = vp9_h_predictor_8x8_ssse3; - vp9_idct16x16_10_add = vp9_idct16x16_10_add_sse2; - if (flags & HAS_SSSE3) vp9_idct16x16_10_add = vp9_idct16x16_10_add_ssse3; - vp9_idct16x16_256_add = vp9_idct16x16_256_add_sse2; - if (flags & HAS_SSSE3) vp9_idct16x16_256_add = vp9_idct16x16_256_add_ssse3; + vp9_hadamard_8x8 = vp9_hadamard_8x8_sse2; + if (flags & HAS_SSSE3) vp9_hadamard_8x8 = vp9_hadamard_8x8_ssse3; vp9_idct8x8_12_add = vp9_idct8x8_12_add_sse2; if (flags & HAS_SSSE3) vp9_idct8x8_12_add = vp9_idct8x8_12_add_ssse3; vp9_idct8x8_64_add = vp9_idct8x8_64_add_sse2; if (flags & HAS_SSSE3) vp9_idct8x8_64_add = vp9_idct8x8_64_add_ssse3; vp9_lpf_horizontal_16 = vp9_lpf_horizontal_16_sse2; if (flags & HAS_AVX2) vp9_lpf_horizontal_16 = vp9_lpf_horizontal_16_avx2; - vp9_mse16x16 = vp9_mse16x16_sse2; - if (flags & HAS_AVX2) vp9_mse16x16 = vp9_mse16x16_avx2; vp9_quantize_b = vp9_quantize_b_sse2; if (flags & HAS_SSSE3) vp9_quantize_b = vp9_quantize_b_ssse3; vp9_quantize_b_32x32 = vp9_quantize_b_32x32_c; @@ -1032,42 +747,6 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSSE3) vp9_quantize_fp = vp9_quantize_fp_ssse3; vp9_quantize_fp_32x32 = vp9_quantize_fp_32x32_c; if (flags & HAS_SSSE3) vp9_quantize_fp_32x32 = vp9_quantize_fp_32x32_ssse3; - vp9_sad16x16x3 = vp9_sad16x16x3_c; - if (flags & HAS_SSE3) vp9_sad16x16x3 = vp9_sad16x16x3_sse3; - if (flags & HAS_SSSE3) vp9_sad16x16x3 = vp9_sad16x16x3_ssse3; - vp9_sad16x8x3 = vp9_sad16x8x3_c; - if (flags & HAS_SSE3) vp9_sad16x8x3 = vp9_sad16x8x3_sse3; - if (flags & HAS_SSSE3) vp9_sad16x8x3 = vp9_sad16x8x3_ssse3; - vp9_sad32x16 = vp9_sad32x16_sse2; - if (flags & HAS_AVX2) vp9_sad32x16 = vp9_sad32x16_avx2; - vp9_sad32x16_avg = vp9_sad32x16_avg_sse2; - if (flags & HAS_AVX2) vp9_sad32x16_avg = vp9_sad32x16_avg_avx2; - vp9_sad32x32 = vp9_sad32x32_sse2; - if (flags & HAS_AVX2) vp9_sad32x32 = vp9_sad32x32_avx2; - vp9_sad32x32_avg = vp9_sad32x32_avg_sse2; - if (flags & HAS_AVX2) vp9_sad32x32_avg = vp9_sad32x32_avg_avx2; - vp9_sad32x32x4d = vp9_sad32x32x4d_sse2; - if (flags & HAS_AVX2) vp9_sad32x32x4d = vp9_sad32x32x4d_avx2; - vp9_sad32x64 = vp9_sad32x64_sse2; - if (flags & HAS_AVX2) vp9_sad32x64 = vp9_sad32x64_avx2; - vp9_sad32x64_avg = vp9_sad32x64_avg_sse2; - if (flags & HAS_AVX2) vp9_sad32x64_avg = vp9_sad32x64_avg_avx2; - vp9_sad4x4x3 = vp9_sad4x4x3_c; - if (flags & HAS_SSE3) vp9_sad4x4x3 = vp9_sad4x4x3_sse3; - vp9_sad64x32 = vp9_sad64x32_sse2; - if (flags & HAS_AVX2) vp9_sad64x32 = vp9_sad64x32_avx2; - vp9_sad64x32_avg = vp9_sad64x32_avg_sse2; - if (flags & HAS_AVX2) vp9_sad64x32_avg = vp9_sad64x32_avg_avx2; - vp9_sad64x64 = vp9_sad64x64_sse2; - if (flags & HAS_AVX2) vp9_sad64x64 = vp9_sad64x64_avx2; - vp9_sad64x64_avg = vp9_sad64x64_avg_sse2; - if (flags & HAS_AVX2) vp9_sad64x64_avg = vp9_sad64x64_avg_avx2; - vp9_sad64x64x4d = vp9_sad64x64x4d_sse2; - if (flags & HAS_AVX2) vp9_sad64x64x4d = vp9_sad64x64x4d_avx2; - vp9_sad8x16x3 = vp9_sad8x16x3_c; - if (flags & HAS_SSE3) vp9_sad8x16x3 = vp9_sad8x16x3_sse3; - vp9_sad8x8x3 = vp9_sad8x8x3_c; - if (flags & HAS_SSE3) vp9_sad8x8x3 = vp9_sad8x8x3_sse3; vp9_sub_pixel_avg_variance16x16 = vp9_sub_pixel_avg_variance16x16_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance16x16 = vp9_sub_pixel_avg_variance16x16_ssse3; vp9_sub_pixel_avg_variance16x32 = vp9_sub_pixel_avg_variance16x32_sse2; @@ -1124,16 +803,6 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSSE3) vp9_sub_pixel_variance8x4 = vp9_sub_pixel_variance8x4_ssse3; vp9_sub_pixel_variance8x8 = vp9_sub_pixel_variance8x8_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance8x8 = vp9_sub_pixel_variance8x8_ssse3; - vp9_variance16x16 = vp9_variance16x16_sse2; - if (flags & HAS_AVX2) vp9_variance16x16 = vp9_variance16x16_avx2; - vp9_variance32x16 = vp9_variance32x16_sse2; - if (flags & HAS_AVX2) vp9_variance32x16 = vp9_variance32x16_avx2; - vp9_variance32x32 = vp9_variance32x32_sse2; - if (flags & HAS_AVX2) vp9_variance32x32 = vp9_variance32x32_avx2; - vp9_variance64x32 = vp9_variance64x32_sse2; - if (flags & HAS_AVX2) vp9_variance64x32 = vp9_variance64x32_avx2; - vp9_variance64x64 = vp9_variance64x64_sse2; - if (flags & HAS_AVX2) vp9_variance64x64 = vp9_variance64x64_avx2; } #endif diff --git a/media/libvpx/vp9_rtcd_x86_64-linux-gcc.h b/media/libvpx/vp9_rtcd_x86_64-linux-gcc.h index be3a56e0e51..34e7f3160e6 100644 --- a/media/libvpx/vp9_rtcd_x86_64-linux-gcc.h +++ b/media/libvpx/vp9_rtcd_x86_64-linux-gcc.h @@ -41,6 +41,10 @@ int64_t vp9_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff, in int64_t vp9_block_error_sse2(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); #define vp9_block_error vp9_block_error_sse2 +int64_t vp9_block_error_fp_c(const int16_t *coeff, const int16_t *dqcoeff, int block_size); +int64_t vp9_block_error_fp_sse2(const int16_t *coeff, const int16_t *dqcoeff, int block_size); +#define vp9_block_error_fp vp9_block_error_fp_sse2 + void vp9_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); @@ -167,28 +171,36 @@ void vp9_d63_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t RTCD_EXTERN void (*vp9_d63_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vp9_dc_128_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_128_predictor_16x16 vp9_dc_128_predictor_16x16_c +void vp9_dc_128_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_dc_128_predictor_16x16 vp9_dc_128_predictor_16x16_sse2 void vp9_dc_128_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_128_predictor_32x32 vp9_dc_128_predictor_32x32_c +void vp9_dc_128_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_dc_128_predictor_32x32 vp9_dc_128_predictor_32x32_sse2 void vp9_dc_128_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_128_predictor_4x4 vp9_dc_128_predictor_4x4_c +void vp9_dc_128_predictor_4x4_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_dc_128_predictor_4x4 vp9_dc_128_predictor_4x4_sse void vp9_dc_128_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_128_predictor_8x8 vp9_dc_128_predictor_8x8_c +void vp9_dc_128_predictor_8x8_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_dc_128_predictor_8x8 vp9_dc_128_predictor_8x8_sse void vp9_dc_left_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_left_predictor_16x16 vp9_dc_left_predictor_16x16_c +void vp9_dc_left_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_dc_left_predictor_16x16 vp9_dc_left_predictor_16x16_sse2 void vp9_dc_left_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_left_predictor_32x32 vp9_dc_left_predictor_32x32_c +void vp9_dc_left_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_dc_left_predictor_32x32 vp9_dc_left_predictor_32x32_sse2 void vp9_dc_left_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_left_predictor_4x4 vp9_dc_left_predictor_4x4_c +void vp9_dc_left_predictor_4x4_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_dc_left_predictor_4x4 vp9_dc_left_predictor_4x4_sse void vp9_dc_left_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_left_predictor_8x8 vp9_dc_left_predictor_8x8_c +void vp9_dc_left_predictor_8x8_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_dc_left_predictor_8x8 vp9_dc_left_predictor_8x8_sse void vp9_dc_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vp9_dc_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -207,16 +219,20 @@ void vp9_dc_predictor_8x8_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *a #define vp9_dc_predictor_8x8 vp9_dc_predictor_8x8_sse void vp9_dc_top_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_top_predictor_16x16 vp9_dc_top_predictor_16x16_c +void vp9_dc_top_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_dc_top_predictor_16x16 vp9_dc_top_predictor_16x16_sse2 void vp9_dc_top_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_top_predictor_32x32 vp9_dc_top_predictor_32x32_c +void vp9_dc_top_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_dc_top_predictor_32x32 vp9_dc_top_predictor_32x32_sse2 void vp9_dc_top_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_top_predictor_4x4 vp9_dc_top_predictor_4x4_c +void vp9_dc_top_predictor_4x4_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_dc_top_predictor_4x4 vp9_dc_top_predictor_4x4_sse void vp9_dc_top_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_top_predictor_8x8 vp9_dc_top_predictor_8x8_c +void vp9_dc_top_predictor_8x8_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_dc_top_predictor_8x8 vp9_dc_top_predictor_8x8_sse int vp9_diamond_search_sad_c(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv); #define vp9_diamond_search_sad vp9_diamond_search_sad_c @@ -287,18 +303,6 @@ void vp9_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride); void vp9_fwht4x4_mmx(const int16_t *input, tran_low_t *output, int stride); #define vp9_fwht4x4 vp9_fwht4x4_mmx -void vp9_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); -void vp9_get16x16var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); -#define vp9_get16x16var vp9_get16x16var_sse2 - -void vp9_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); -void vp9_get8x8var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); -#define vp9_get8x8var vp9_get8x8var_sse2 - -unsigned int vp9_get_mb_ss_c(const int16_t *); -unsigned int vp9_get_mb_ss_sse2(const int16_t *); -#define vp9_get_mb_ss vp9_get_mb_ss_sse2 - void vp9_h_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vp9_h_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); RTCD_EXTERN void (*vp9_h_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -315,10 +319,18 @@ void vp9_h_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *abov void vp9_h_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); RTCD_EXTERN void (*vp9_h_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vp9_hadamard_16x16_c(int16_t const *src_diff, int src_stride, int16_t *coeff); +void vp9_hadamard_16x16_sse2(int16_t const *src_diff, int src_stride, int16_t *coeff); +#define vp9_hadamard_16x16 vp9_hadamard_16x16_sse2 + +void vp9_hadamard_8x8_c(int16_t const *src_diff, int src_stride, int16_t *coeff); +void vp9_hadamard_8x8_sse2(int16_t const *src_diff, int src_stride, int16_t *coeff); +void vp9_hadamard_8x8_ssse3(int16_t const *src_diff, int src_stride, int16_t *coeff); +RTCD_EXTERN void (*vp9_hadamard_8x8)(int16_t const *src_diff, int src_stride, int16_t *coeff); + void vp9_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); void vp9_idct16x16_10_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); -void vp9_idct16x16_10_add_ssse3(const tran_low_t *input, uint8_t *dest, int dest_stride); -RTCD_EXTERN void (*vp9_idct16x16_10_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); +#define vp9_idct16x16_10_add vp9_idct16x16_10_add_sse2 void vp9_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); void vp9_idct16x16_1_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); @@ -326,8 +338,7 @@ void vp9_idct16x16_1_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_s void vp9_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); void vp9_idct16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); -void vp9_idct16x16_256_add_ssse3(const tran_low_t *input, uint8_t *dest, int dest_stride); -RTCD_EXTERN void (*vp9_idct16x16_256_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); +#define vp9_idct16x16_256_add vp9_idct16x16_256_add_sse2 void vp9_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); void vp9_idct32x32_1024_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); @@ -433,21 +444,9 @@ void vp9_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, co void vp9_lpf_vertical_8_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); #define vp9_lpf_vertical_8_dual vp9_lpf_vertical_8_dual_sse2 -unsigned int vp9_mse16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -unsigned int vp9_mse16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -#define vp9_mse16x16 vp9_mse16x16_sse2 - -unsigned int vp9_mse16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -unsigned int vp9_mse16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -#define vp9_mse16x8 vp9_mse16x8_sse2 - -unsigned int vp9_mse8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -unsigned int vp9_mse8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -#define vp9_mse8x16 vp9_mse8x16_sse2 - -unsigned int vp9_mse8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -unsigned int vp9_mse8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -#define vp9_mse8x8 vp9_mse8x8_sse2 +void vp9_minmax_8x8_c(const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max); +void vp9_minmax_8x8_sse2(const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max); +#define vp9_minmax_8x8 vp9_minmax_8x8_sse2 void vp9_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); void vp9_quantize_b_sse2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); @@ -467,216 +466,9 @@ void vp9_quantize_fp_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int void vp9_quantize_fp_32x32_ssse3(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); RTCD_EXTERN void (*vp9_quantize_fp_32x32)(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); -unsigned int vp9_sad16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad16x16 vp9_sad16x16_sse2 - -unsigned int vp9_sad16x16_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad16x16_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad16x16_avg vp9_sad16x16_avg_sse2 - -void vp9_sad16x16x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp9_sad16x16x3_sse3(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp9_sad16x16x3_ssse3(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad16x16x3)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp9_sad16x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad16x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp9_sad16x16x4d vp9_sad16x16x4d_sse2 - -void vp9_sad16x16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad16x16x8 vp9_sad16x16x8_c - -unsigned int vp9_sad16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad16x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad16x32 vp9_sad16x32_sse2 - -unsigned int vp9_sad16x32_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad16x32_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad16x32_avg vp9_sad16x32_avg_sse2 - -void vp9_sad16x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad16x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp9_sad16x32x4d vp9_sad16x32x4d_sse2 - -unsigned int vp9_sad16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad16x8 vp9_sad16x8_sse2 - -unsigned int vp9_sad16x8_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad16x8_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad16x8_avg vp9_sad16x8_avg_sse2 - -void vp9_sad16x8x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp9_sad16x8x3_sse3(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp9_sad16x8x3_ssse3(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad16x8x3)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp9_sad16x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad16x8x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp9_sad16x8x4d vp9_sad16x8x4d_sse2 - -void vp9_sad16x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad16x8x8 vp9_sad16x8x8_c - -unsigned int vp9_sad32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad32x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad32x16 vp9_sad32x16_sse2 - -unsigned int vp9_sad32x16_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad32x16_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad32x16_avg vp9_sad32x16_avg_sse2 - -void vp9_sad32x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad32x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp9_sad32x16x4d vp9_sad32x16x4d_sse2 - -unsigned int vp9_sad32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad32x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad32x32 vp9_sad32x32_sse2 - -unsigned int vp9_sad32x32_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad32x32_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad32x32_avg vp9_sad32x32_avg_sse2 - -void vp9_sad32x32x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -#define vp9_sad32x32x3 vp9_sad32x32x3_c - -void vp9_sad32x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad32x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp9_sad32x32x4d vp9_sad32x32x4d_sse2 - -void vp9_sad32x32x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad32x32x8 vp9_sad32x32x8_c - -unsigned int vp9_sad32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad32x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad32x64 vp9_sad32x64_sse2 - -unsigned int vp9_sad32x64_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad32x64_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad32x64_avg vp9_sad32x64_avg_sse2 - -void vp9_sad32x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad32x64x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp9_sad32x64x4d vp9_sad32x64x4d_sse2 - -unsigned int vp9_sad4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad4x4_sse(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad4x4 vp9_sad4x4_sse - -unsigned int vp9_sad4x4_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad4x4_avg_sse(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad4x4_avg vp9_sad4x4_avg_sse - -void vp9_sad4x4x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp9_sad4x4x3_sse3(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad4x4x3)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp9_sad4x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad4x4x4d_sse(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp9_sad4x4x4d vp9_sad4x4x4d_sse - -void vp9_sad4x4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad4x4x8 vp9_sad4x4x8_c - -unsigned int vp9_sad4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad4x8_sse(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad4x8 vp9_sad4x8_sse - -unsigned int vp9_sad4x8_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad4x8_avg_sse(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad4x8_avg vp9_sad4x8_avg_sse - -void vp9_sad4x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad4x8x4d_sse(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp9_sad4x8x4d vp9_sad4x8x4d_sse - -void vp9_sad4x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad4x8x8 vp9_sad4x8x8_c - -unsigned int vp9_sad64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad64x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad64x32 vp9_sad64x32_sse2 - -unsigned int vp9_sad64x32_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad64x32_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad64x32_avg vp9_sad64x32_avg_sse2 - -void vp9_sad64x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad64x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp9_sad64x32x4d vp9_sad64x32x4d_sse2 - -unsigned int vp9_sad64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad64x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad64x64 vp9_sad64x64_sse2 - -unsigned int vp9_sad64x64_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad64x64_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad64x64_avg vp9_sad64x64_avg_sse2 - -void vp9_sad64x64x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -#define vp9_sad64x64x3 vp9_sad64x64x3_c - -void vp9_sad64x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad64x64x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp9_sad64x64x4d vp9_sad64x64x4d_sse2 - -void vp9_sad64x64x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad64x64x8 vp9_sad64x64x8_c - -unsigned int vp9_sad8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad8x16 vp9_sad8x16_sse2 - -unsigned int vp9_sad8x16_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad8x16_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad8x16_avg vp9_sad8x16_avg_sse2 - -void vp9_sad8x16x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp9_sad8x16x3_sse3(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad8x16x3)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp9_sad8x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad8x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp9_sad8x16x4d vp9_sad8x16x4d_sse2 - -void vp9_sad8x16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad8x16x8 vp9_sad8x16x8_c - -unsigned int vp9_sad8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad8x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad8x4 vp9_sad8x4_sse2 - -unsigned int vp9_sad8x4_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad8x4_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad8x4_avg vp9_sad8x4_avg_sse2 - -void vp9_sad8x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad8x4x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp9_sad8x4x4d vp9_sad8x4x4d_sse2 - -void vp9_sad8x4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad8x4x8 vp9_sad8x4x8_c - -unsigned int vp9_sad8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad8x8 vp9_sad8x8_sse2 - -unsigned int vp9_sad8x8_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad8x8_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad8x8_avg vp9_sad8x8_avg_sse2 - -void vp9_sad8x8x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp9_sad8x8x3_sse3(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad8x8x3)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp9_sad8x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad8x8x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp9_sad8x8x4d vp9_sad8x8x4d_sse2 - -void vp9_sad8x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad8x8x8 vp9_sad8x8x8_c +int16_t vp9_satd_c(const int16_t *coeff, int length); +int16_t vp9_satd_sse2(const int16_t *coeff, int length); +#define vp9_satd vp9_satd_sse2 unsigned int vp9_sub_pixel_avg_variance16x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); unsigned int vp9_sub_pixel_avg_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); @@ -848,58 +640,6 @@ void vp9_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *abov void vp9_v_predictor_8x8_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vp9_v_predictor_8x8 vp9_v_predictor_8x8_sse -unsigned int vp9_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_variance16x16 vp9_variance16x16_sse2 - -unsigned int vp9_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance16x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_variance16x32 vp9_variance16x32_sse2 - -unsigned int vp9_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_variance16x8 vp9_variance16x8_sse2 - -unsigned int vp9_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance32x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_variance32x16 vp9_variance32x16_sse2 - -unsigned int vp9_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_variance32x32 vp9_variance32x32_sse2 - -unsigned int vp9_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance32x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_variance32x64 vp9_variance32x64_sse2 - -unsigned int vp9_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance4x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_variance4x4 vp9_variance4x4_sse2 - -unsigned int vp9_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance4x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_variance4x8 vp9_variance4x8_sse2 - -unsigned int vp9_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance64x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_variance64x32 vp9_variance64x32_sse2 - -unsigned int vp9_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_variance64x64 vp9_variance64x64_sse2 - -unsigned int vp9_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_variance8x16 vp9_variance8x16_sse2 - -unsigned int vp9_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance8x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_variance8x4 vp9_variance8x4_sse2 - -unsigned int vp9_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_variance8x8 vp9_variance8x8_sse2 - int vp9_vector_var_c(int16_t const *ref, int16_t const *src, const int bwl); int vp9_vector_var_sse2(int16_t const *ref, int16_t const *src, const int bwl); #define vp9_vector_var vp9_vector_var_sse2 @@ -971,10 +711,8 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSSE3) vp9_h_predictor_4x4 = vp9_h_predictor_4x4_ssse3; vp9_h_predictor_8x8 = vp9_h_predictor_8x8_c; if (flags & HAS_SSSE3) vp9_h_predictor_8x8 = vp9_h_predictor_8x8_ssse3; - vp9_idct16x16_10_add = vp9_idct16x16_10_add_sse2; - if (flags & HAS_SSSE3) vp9_idct16x16_10_add = vp9_idct16x16_10_add_ssse3; - vp9_idct16x16_256_add = vp9_idct16x16_256_add_sse2; - if (flags & HAS_SSSE3) vp9_idct16x16_256_add = vp9_idct16x16_256_add_ssse3; + vp9_hadamard_8x8 = vp9_hadamard_8x8_sse2; + if (flags & HAS_SSSE3) vp9_hadamard_8x8 = vp9_hadamard_8x8_ssse3; vp9_idct8x8_12_add = vp9_idct8x8_12_add_sse2; if (flags & HAS_SSSE3) vp9_idct8x8_12_add = vp9_idct8x8_12_add_ssse3; vp9_idct8x8_64_add = vp9_idct8x8_64_add_sse2; @@ -987,18 +725,6 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSSE3) vp9_quantize_fp = vp9_quantize_fp_ssse3; vp9_quantize_fp_32x32 = vp9_quantize_fp_32x32_c; if (flags & HAS_SSSE3) vp9_quantize_fp_32x32 = vp9_quantize_fp_32x32_ssse3; - vp9_sad16x16x3 = vp9_sad16x16x3_c; - if (flags & HAS_SSE3) vp9_sad16x16x3 = vp9_sad16x16x3_sse3; - if (flags & HAS_SSSE3) vp9_sad16x16x3 = vp9_sad16x16x3_ssse3; - vp9_sad16x8x3 = vp9_sad16x8x3_c; - if (flags & HAS_SSE3) vp9_sad16x8x3 = vp9_sad16x8x3_sse3; - if (flags & HAS_SSSE3) vp9_sad16x8x3 = vp9_sad16x8x3_ssse3; - vp9_sad4x4x3 = vp9_sad4x4x3_c; - if (flags & HAS_SSE3) vp9_sad4x4x3 = vp9_sad4x4x3_sse3; - vp9_sad8x16x3 = vp9_sad8x16x3_c; - if (flags & HAS_SSE3) vp9_sad8x16x3 = vp9_sad8x16x3_sse3; - vp9_sad8x8x3 = vp9_sad8x8x3_c; - if (flags & HAS_SSE3) vp9_sad8x8x3 = vp9_sad8x8x3_sse3; vp9_sub_pixel_avg_variance16x16 = vp9_sub_pixel_avg_variance16x16_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance16x16 = vp9_sub_pixel_avg_variance16x16_ssse3; vp9_sub_pixel_avg_variance16x32 = vp9_sub_pixel_avg_variance16x32_sse2; diff --git a/media/libvpx/vp9_rtcd_x86_64-win64-gcc.h b/media/libvpx/vp9_rtcd_x86_64-win64-gcc.h index 82b03b8acfb..8d75771cf86 100644 --- a/media/libvpx/vp9_rtcd_x86_64-win64-gcc.h +++ b/media/libvpx/vp9_rtcd_x86_64-win64-gcc.h @@ -42,6 +42,10 @@ int64_t vp9_block_error_sse2(const tran_low_t *coeff, const tran_low_t *dqcoeff, int64_t vp9_block_error_avx2(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); RTCD_EXTERN int64_t (*vp9_block_error)(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); +int64_t vp9_block_error_fp_c(const int16_t *coeff, const int16_t *dqcoeff, int block_size); +int64_t vp9_block_error_fp_sse2(const int16_t *coeff, const int16_t *dqcoeff, int block_size); +#define vp9_block_error_fp vp9_block_error_fp_sse2 + void vp9_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); @@ -171,28 +175,36 @@ void vp9_d63_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t RTCD_EXTERN void (*vp9_d63_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vp9_dc_128_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_128_predictor_16x16 vp9_dc_128_predictor_16x16_c +void vp9_dc_128_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_dc_128_predictor_16x16 vp9_dc_128_predictor_16x16_sse2 void vp9_dc_128_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_128_predictor_32x32 vp9_dc_128_predictor_32x32_c +void vp9_dc_128_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_dc_128_predictor_32x32 vp9_dc_128_predictor_32x32_sse2 void vp9_dc_128_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_128_predictor_4x4 vp9_dc_128_predictor_4x4_c +void vp9_dc_128_predictor_4x4_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_dc_128_predictor_4x4 vp9_dc_128_predictor_4x4_sse void vp9_dc_128_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_128_predictor_8x8 vp9_dc_128_predictor_8x8_c +void vp9_dc_128_predictor_8x8_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_dc_128_predictor_8x8 vp9_dc_128_predictor_8x8_sse void vp9_dc_left_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_left_predictor_16x16 vp9_dc_left_predictor_16x16_c +void vp9_dc_left_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_dc_left_predictor_16x16 vp9_dc_left_predictor_16x16_sse2 void vp9_dc_left_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_left_predictor_32x32 vp9_dc_left_predictor_32x32_c +void vp9_dc_left_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_dc_left_predictor_32x32 vp9_dc_left_predictor_32x32_sse2 void vp9_dc_left_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_left_predictor_4x4 vp9_dc_left_predictor_4x4_c +void vp9_dc_left_predictor_4x4_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_dc_left_predictor_4x4 vp9_dc_left_predictor_4x4_sse void vp9_dc_left_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_left_predictor_8x8 vp9_dc_left_predictor_8x8_c +void vp9_dc_left_predictor_8x8_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_dc_left_predictor_8x8 vp9_dc_left_predictor_8x8_sse void vp9_dc_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vp9_dc_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -211,16 +223,20 @@ void vp9_dc_predictor_8x8_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *a #define vp9_dc_predictor_8x8 vp9_dc_predictor_8x8_sse void vp9_dc_top_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_top_predictor_16x16 vp9_dc_top_predictor_16x16_c +void vp9_dc_top_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_dc_top_predictor_16x16 vp9_dc_top_predictor_16x16_sse2 void vp9_dc_top_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_top_predictor_32x32 vp9_dc_top_predictor_32x32_c +void vp9_dc_top_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_dc_top_predictor_32x32 vp9_dc_top_predictor_32x32_sse2 void vp9_dc_top_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_top_predictor_4x4 vp9_dc_top_predictor_4x4_c +void vp9_dc_top_predictor_4x4_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_dc_top_predictor_4x4 vp9_dc_top_predictor_4x4_sse void vp9_dc_top_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_top_predictor_8x8 vp9_dc_top_predictor_8x8_c +void vp9_dc_top_predictor_8x8_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_dc_top_predictor_8x8 vp9_dc_top_predictor_8x8_sse int vp9_diamond_search_sad_c(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv); #define vp9_diamond_search_sad vp9_diamond_search_sad_c @@ -293,19 +309,6 @@ void vp9_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride); void vp9_fwht4x4_mmx(const int16_t *input, tran_low_t *output, int stride); #define vp9_fwht4x4 vp9_fwht4x4_mmx -void vp9_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); -void vp9_get16x16var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); -void vp9_get16x16var_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); -RTCD_EXTERN void (*vp9_get16x16var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); - -void vp9_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); -void vp9_get8x8var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); -#define vp9_get8x8var vp9_get8x8var_sse2 - -unsigned int vp9_get_mb_ss_c(const int16_t *); -unsigned int vp9_get_mb_ss_sse2(const int16_t *); -#define vp9_get_mb_ss vp9_get_mb_ss_sse2 - void vp9_h_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vp9_h_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); RTCD_EXTERN void (*vp9_h_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -322,10 +325,18 @@ void vp9_h_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *abov void vp9_h_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); RTCD_EXTERN void (*vp9_h_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vp9_hadamard_16x16_c(int16_t const *src_diff, int src_stride, int16_t *coeff); +void vp9_hadamard_16x16_sse2(int16_t const *src_diff, int src_stride, int16_t *coeff); +#define vp9_hadamard_16x16 vp9_hadamard_16x16_sse2 + +void vp9_hadamard_8x8_c(int16_t const *src_diff, int src_stride, int16_t *coeff); +void vp9_hadamard_8x8_sse2(int16_t const *src_diff, int src_stride, int16_t *coeff); +void vp9_hadamard_8x8_ssse3(int16_t const *src_diff, int src_stride, int16_t *coeff); +RTCD_EXTERN void (*vp9_hadamard_8x8)(int16_t const *src_diff, int src_stride, int16_t *coeff); + void vp9_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); void vp9_idct16x16_10_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); -void vp9_idct16x16_10_add_ssse3(const tran_low_t *input, uint8_t *dest, int dest_stride); -RTCD_EXTERN void (*vp9_idct16x16_10_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); +#define vp9_idct16x16_10_add vp9_idct16x16_10_add_sse2 void vp9_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); void vp9_idct16x16_1_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); @@ -333,8 +344,7 @@ void vp9_idct16x16_1_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_s void vp9_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); void vp9_idct16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); -void vp9_idct16x16_256_add_ssse3(const tran_low_t *input, uint8_t *dest, int dest_stride); -RTCD_EXTERN void (*vp9_idct16x16_256_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); +#define vp9_idct16x16_256_add vp9_idct16x16_256_add_sse2 void vp9_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); void vp9_idct32x32_1024_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); @@ -441,22 +451,9 @@ void vp9_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, co void vp9_lpf_vertical_8_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); #define vp9_lpf_vertical_8_dual vp9_lpf_vertical_8_dual_sse2 -unsigned int vp9_mse16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -unsigned int vp9_mse16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -unsigned int vp9_mse16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_mse16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); - -unsigned int vp9_mse16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -unsigned int vp9_mse16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -#define vp9_mse16x8 vp9_mse16x8_sse2 - -unsigned int vp9_mse8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -unsigned int vp9_mse8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -#define vp9_mse8x16 vp9_mse8x16_sse2 - -unsigned int vp9_mse8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -unsigned int vp9_mse8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -#define vp9_mse8x8 vp9_mse8x8_sse2 +void vp9_minmax_8x8_c(const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max); +void vp9_minmax_8x8_sse2(const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max); +#define vp9_minmax_8x8 vp9_minmax_8x8_sse2 void vp9_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); void vp9_quantize_b_sse2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); @@ -476,228 +473,9 @@ void vp9_quantize_fp_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int void vp9_quantize_fp_32x32_ssse3(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); RTCD_EXTERN void (*vp9_quantize_fp_32x32)(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); -unsigned int vp9_sad16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad16x16 vp9_sad16x16_sse2 - -unsigned int vp9_sad16x16_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad16x16_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad16x16_avg vp9_sad16x16_avg_sse2 - -void vp9_sad16x16x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp9_sad16x16x3_sse3(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp9_sad16x16x3_ssse3(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad16x16x3)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp9_sad16x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad16x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp9_sad16x16x4d vp9_sad16x16x4d_sse2 - -void vp9_sad16x16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad16x16x8 vp9_sad16x16x8_c - -unsigned int vp9_sad16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad16x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad16x32 vp9_sad16x32_sse2 - -unsigned int vp9_sad16x32_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad16x32_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad16x32_avg vp9_sad16x32_avg_sse2 - -void vp9_sad16x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad16x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp9_sad16x32x4d vp9_sad16x32x4d_sse2 - -unsigned int vp9_sad16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad16x8 vp9_sad16x8_sse2 - -unsigned int vp9_sad16x8_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad16x8_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad16x8_avg vp9_sad16x8_avg_sse2 - -void vp9_sad16x8x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp9_sad16x8x3_sse3(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp9_sad16x8x3_ssse3(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad16x8x3)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp9_sad16x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad16x8x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp9_sad16x8x4d vp9_sad16x8x4d_sse2 - -void vp9_sad16x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad16x8x8 vp9_sad16x8x8_c - -unsigned int vp9_sad32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad32x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad32x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -RTCD_EXTERN unsigned int (*vp9_sad32x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); - -unsigned int vp9_sad32x16_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad32x16_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad32x16_avg_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -RTCD_EXTERN unsigned int (*vp9_sad32x16_avg)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); - -void vp9_sad32x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad32x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp9_sad32x16x4d vp9_sad32x16x4d_sse2 - -unsigned int vp9_sad32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad32x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad32x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -RTCD_EXTERN unsigned int (*vp9_sad32x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); - -unsigned int vp9_sad32x32_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad32x32_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad32x32_avg_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -RTCD_EXTERN unsigned int (*vp9_sad32x32_avg)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); - -void vp9_sad32x32x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -#define vp9_sad32x32x3 vp9_sad32x32x3_c - -void vp9_sad32x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad32x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad32x32x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad32x32x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp9_sad32x32x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad32x32x8 vp9_sad32x32x8_c - -unsigned int vp9_sad32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad32x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad32x64_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -RTCD_EXTERN unsigned int (*vp9_sad32x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); - -unsigned int vp9_sad32x64_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad32x64_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad32x64_avg_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -RTCD_EXTERN unsigned int (*vp9_sad32x64_avg)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); - -void vp9_sad32x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad32x64x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp9_sad32x64x4d vp9_sad32x64x4d_sse2 - -unsigned int vp9_sad4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad4x4_sse(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad4x4 vp9_sad4x4_sse - -unsigned int vp9_sad4x4_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad4x4_avg_sse(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad4x4_avg vp9_sad4x4_avg_sse - -void vp9_sad4x4x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp9_sad4x4x3_sse3(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad4x4x3)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp9_sad4x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad4x4x4d_sse(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp9_sad4x4x4d vp9_sad4x4x4d_sse - -void vp9_sad4x4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad4x4x8 vp9_sad4x4x8_c - -unsigned int vp9_sad4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad4x8_sse(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad4x8 vp9_sad4x8_sse - -unsigned int vp9_sad4x8_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad4x8_avg_sse(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad4x8_avg vp9_sad4x8_avg_sse - -void vp9_sad4x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad4x8x4d_sse(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp9_sad4x8x4d vp9_sad4x8x4d_sse - -void vp9_sad4x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad4x8x8 vp9_sad4x8x8_c - -unsigned int vp9_sad64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad64x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad64x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -RTCD_EXTERN unsigned int (*vp9_sad64x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); - -unsigned int vp9_sad64x32_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad64x32_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad64x32_avg_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -RTCD_EXTERN unsigned int (*vp9_sad64x32_avg)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); - -void vp9_sad64x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad64x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp9_sad64x32x4d vp9_sad64x32x4d_sse2 - -unsigned int vp9_sad64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad64x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad64x64_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -RTCD_EXTERN unsigned int (*vp9_sad64x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); - -unsigned int vp9_sad64x64_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad64x64_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad64x64_avg_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -RTCD_EXTERN unsigned int (*vp9_sad64x64_avg)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); - -void vp9_sad64x64x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -#define vp9_sad64x64x3 vp9_sad64x64x3_c - -void vp9_sad64x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad64x64x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad64x64x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad64x64x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp9_sad64x64x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad64x64x8 vp9_sad64x64x8_c - -unsigned int vp9_sad8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad8x16 vp9_sad8x16_sse2 - -unsigned int vp9_sad8x16_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad8x16_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad8x16_avg vp9_sad8x16_avg_sse2 - -void vp9_sad8x16x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp9_sad8x16x3_sse3(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad8x16x3)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp9_sad8x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad8x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp9_sad8x16x4d vp9_sad8x16x4d_sse2 - -void vp9_sad8x16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad8x16x8 vp9_sad8x16x8_c - -unsigned int vp9_sad8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad8x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad8x4 vp9_sad8x4_sse2 - -unsigned int vp9_sad8x4_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad8x4_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad8x4_avg vp9_sad8x4_avg_sse2 - -void vp9_sad8x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad8x4x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp9_sad8x4x4d vp9_sad8x4x4d_sse2 - -void vp9_sad8x4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad8x4x8 vp9_sad8x4x8_c - -unsigned int vp9_sad8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad8x8 vp9_sad8x8_sse2 - -unsigned int vp9_sad8x8_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad8x8_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad8x8_avg vp9_sad8x8_avg_sse2 - -void vp9_sad8x8x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp9_sad8x8x3_sse3(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad8x8x3)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp9_sad8x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad8x8x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp9_sad8x8x4d vp9_sad8x8x4d_sse2 - -void vp9_sad8x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad8x8x8 vp9_sad8x8x8_c +int16_t vp9_satd_c(const int16_t *coeff, int length); +int16_t vp9_satd_sse2(const int16_t *coeff, int length); +#define vp9_satd vp9_satd_sse2 unsigned int vp9_sub_pixel_avg_variance16x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); unsigned int vp9_sub_pixel_avg_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); @@ -873,63 +651,6 @@ void vp9_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *abov void vp9_v_predictor_8x8_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vp9_v_predictor_8x8 vp9_v_predictor_8x8_sse -unsigned int vp9_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_variance16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp9_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance16x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_variance16x32 vp9_variance16x32_sse2 - -unsigned int vp9_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_variance16x8 vp9_variance16x8_sse2 - -unsigned int vp9_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance32x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance32x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_variance32x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp9_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_variance32x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp9_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance32x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_variance32x64 vp9_variance32x64_sse2 - -unsigned int vp9_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance4x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_variance4x4 vp9_variance4x4_sse2 - -unsigned int vp9_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance4x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_variance4x8 vp9_variance4x8_sse2 - -unsigned int vp9_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance64x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance64x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_variance64x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp9_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_variance64x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp9_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_variance8x16 vp9_variance8x16_sse2 - -unsigned int vp9_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance8x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_variance8x4 vp9_variance8x4_sse2 - -unsigned int vp9_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_variance8x8 vp9_variance8x8_sse2 - int vp9_vector_var_c(int16_t const *ref, int16_t const *src, const int bwl); int vp9_vector_var_sse2(int16_t const *ref, int16_t const *src, const int bwl); #define vp9_vector_var vp9_vector_var_sse2 @@ -1002,8 +723,6 @@ static void setup_rtcd_internal(void) vp9_full_search_sad = vp9_full_search_sad_c; if (flags & HAS_SSE3) vp9_full_search_sad = vp9_full_search_sadx3; if (flags & HAS_SSE4_1) vp9_full_search_sad = vp9_full_search_sadx8; - vp9_get16x16var = vp9_get16x16var_sse2; - if (flags & HAS_AVX2) vp9_get16x16var = vp9_get16x16var_avx2; vp9_h_predictor_16x16 = vp9_h_predictor_16x16_c; if (flags & HAS_SSSE3) vp9_h_predictor_16x16 = vp9_h_predictor_16x16_ssse3; vp9_h_predictor_32x32 = vp9_h_predictor_32x32_c; @@ -1012,18 +731,14 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSSE3) vp9_h_predictor_4x4 = vp9_h_predictor_4x4_ssse3; vp9_h_predictor_8x8 = vp9_h_predictor_8x8_c; if (flags & HAS_SSSE3) vp9_h_predictor_8x8 = vp9_h_predictor_8x8_ssse3; - vp9_idct16x16_10_add = vp9_idct16x16_10_add_sse2; - if (flags & HAS_SSSE3) vp9_idct16x16_10_add = vp9_idct16x16_10_add_ssse3; - vp9_idct16x16_256_add = vp9_idct16x16_256_add_sse2; - if (flags & HAS_SSSE3) vp9_idct16x16_256_add = vp9_idct16x16_256_add_ssse3; + vp9_hadamard_8x8 = vp9_hadamard_8x8_sse2; + if (flags & HAS_SSSE3) vp9_hadamard_8x8 = vp9_hadamard_8x8_ssse3; vp9_idct8x8_12_add = vp9_idct8x8_12_add_sse2; if (flags & HAS_SSSE3) vp9_idct8x8_12_add = vp9_idct8x8_12_add_ssse3; vp9_idct8x8_64_add = vp9_idct8x8_64_add_sse2; if (flags & HAS_SSSE3) vp9_idct8x8_64_add = vp9_idct8x8_64_add_ssse3; vp9_lpf_horizontal_16 = vp9_lpf_horizontal_16_sse2; if (flags & HAS_AVX2) vp9_lpf_horizontal_16 = vp9_lpf_horizontal_16_avx2; - vp9_mse16x16 = vp9_mse16x16_sse2; - if (flags & HAS_AVX2) vp9_mse16x16 = vp9_mse16x16_avx2; vp9_quantize_b = vp9_quantize_b_sse2; if (flags & HAS_SSSE3) vp9_quantize_b = vp9_quantize_b_ssse3; vp9_quantize_b_32x32 = vp9_quantize_b_32x32_c; @@ -1032,42 +747,6 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSSE3) vp9_quantize_fp = vp9_quantize_fp_ssse3; vp9_quantize_fp_32x32 = vp9_quantize_fp_32x32_c; if (flags & HAS_SSSE3) vp9_quantize_fp_32x32 = vp9_quantize_fp_32x32_ssse3; - vp9_sad16x16x3 = vp9_sad16x16x3_c; - if (flags & HAS_SSE3) vp9_sad16x16x3 = vp9_sad16x16x3_sse3; - if (flags & HAS_SSSE3) vp9_sad16x16x3 = vp9_sad16x16x3_ssse3; - vp9_sad16x8x3 = vp9_sad16x8x3_c; - if (flags & HAS_SSE3) vp9_sad16x8x3 = vp9_sad16x8x3_sse3; - if (flags & HAS_SSSE3) vp9_sad16x8x3 = vp9_sad16x8x3_ssse3; - vp9_sad32x16 = vp9_sad32x16_sse2; - if (flags & HAS_AVX2) vp9_sad32x16 = vp9_sad32x16_avx2; - vp9_sad32x16_avg = vp9_sad32x16_avg_sse2; - if (flags & HAS_AVX2) vp9_sad32x16_avg = vp9_sad32x16_avg_avx2; - vp9_sad32x32 = vp9_sad32x32_sse2; - if (flags & HAS_AVX2) vp9_sad32x32 = vp9_sad32x32_avx2; - vp9_sad32x32_avg = vp9_sad32x32_avg_sse2; - if (flags & HAS_AVX2) vp9_sad32x32_avg = vp9_sad32x32_avg_avx2; - vp9_sad32x32x4d = vp9_sad32x32x4d_sse2; - if (flags & HAS_AVX2) vp9_sad32x32x4d = vp9_sad32x32x4d_avx2; - vp9_sad32x64 = vp9_sad32x64_sse2; - if (flags & HAS_AVX2) vp9_sad32x64 = vp9_sad32x64_avx2; - vp9_sad32x64_avg = vp9_sad32x64_avg_sse2; - if (flags & HAS_AVX2) vp9_sad32x64_avg = vp9_sad32x64_avg_avx2; - vp9_sad4x4x3 = vp9_sad4x4x3_c; - if (flags & HAS_SSE3) vp9_sad4x4x3 = vp9_sad4x4x3_sse3; - vp9_sad64x32 = vp9_sad64x32_sse2; - if (flags & HAS_AVX2) vp9_sad64x32 = vp9_sad64x32_avx2; - vp9_sad64x32_avg = vp9_sad64x32_avg_sse2; - if (flags & HAS_AVX2) vp9_sad64x32_avg = vp9_sad64x32_avg_avx2; - vp9_sad64x64 = vp9_sad64x64_sse2; - if (flags & HAS_AVX2) vp9_sad64x64 = vp9_sad64x64_avx2; - vp9_sad64x64_avg = vp9_sad64x64_avg_sse2; - if (flags & HAS_AVX2) vp9_sad64x64_avg = vp9_sad64x64_avg_avx2; - vp9_sad64x64x4d = vp9_sad64x64x4d_sse2; - if (flags & HAS_AVX2) vp9_sad64x64x4d = vp9_sad64x64x4d_avx2; - vp9_sad8x16x3 = vp9_sad8x16x3_c; - if (flags & HAS_SSE3) vp9_sad8x16x3 = vp9_sad8x16x3_sse3; - vp9_sad8x8x3 = vp9_sad8x8x3_c; - if (flags & HAS_SSE3) vp9_sad8x8x3 = vp9_sad8x8x3_sse3; vp9_sub_pixel_avg_variance16x16 = vp9_sub_pixel_avg_variance16x16_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance16x16 = vp9_sub_pixel_avg_variance16x16_ssse3; vp9_sub_pixel_avg_variance16x32 = vp9_sub_pixel_avg_variance16x32_sse2; @@ -1124,16 +803,6 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSSE3) vp9_sub_pixel_variance8x4 = vp9_sub_pixel_variance8x4_ssse3; vp9_sub_pixel_variance8x8 = vp9_sub_pixel_variance8x8_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance8x8 = vp9_sub_pixel_variance8x8_ssse3; - vp9_variance16x16 = vp9_variance16x16_sse2; - if (flags & HAS_AVX2) vp9_variance16x16 = vp9_variance16x16_avx2; - vp9_variance32x16 = vp9_variance32x16_sse2; - if (flags & HAS_AVX2) vp9_variance32x16 = vp9_variance32x16_avx2; - vp9_variance32x32 = vp9_variance32x32_sse2; - if (flags & HAS_AVX2) vp9_variance32x32 = vp9_variance32x32_avx2; - vp9_variance64x32 = vp9_variance64x32_sse2; - if (flags & HAS_AVX2) vp9_variance64x32 = vp9_variance64x32_avx2; - vp9_variance64x64 = vp9_variance64x64_sse2; - if (flags & HAS_AVX2) vp9_variance64x64 = vp9_variance64x64_avx2; } #endif diff --git a/media/libvpx/vp9_rtcd_x86_64-win64-vs12.h b/media/libvpx/vp9_rtcd_x86_64-win64-vs12.h index 82b03b8acfb..8d75771cf86 100644 --- a/media/libvpx/vp9_rtcd_x86_64-win64-vs12.h +++ b/media/libvpx/vp9_rtcd_x86_64-win64-vs12.h @@ -42,6 +42,10 @@ int64_t vp9_block_error_sse2(const tran_low_t *coeff, const tran_low_t *dqcoeff, int64_t vp9_block_error_avx2(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); RTCD_EXTERN int64_t (*vp9_block_error)(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); +int64_t vp9_block_error_fp_c(const int16_t *coeff, const int16_t *dqcoeff, int block_size); +int64_t vp9_block_error_fp_sse2(const int16_t *coeff, const int16_t *dqcoeff, int block_size); +#define vp9_block_error_fp vp9_block_error_fp_sse2 + void vp9_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); @@ -171,28 +175,36 @@ void vp9_d63_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t RTCD_EXTERN void (*vp9_d63_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vp9_dc_128_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_128_predictor_16x16 vp9_dc_128_predictor_16x16_c +void vp9_dc_128_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_dc_128_predictor_16x16 vp9_dc_128_predictor_16x16_sse2 void vp9_dc_128_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_128_predictor_32x32 vp9_dc_128_predictor_32x32_c +void vp9_dc_128_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_dc_128_predictor_32x32 vp9_dc_128_predictor_32x32_sse2 void vp9_dc_128_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_128_predictor_4x4 vp9_dc_128_predictor_4x4_c +void vp9_dc_128_predictor_4x4_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_dc_128_predictor_4x4 vp9_dc_128_predictor_4x4_sse void vp9_dc_128_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_128_predictor_8x8 vp9_dc_128_predictor_8x8_c +void vp9_dc_128_predictor_8x8_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_dc_128_predictor_8x8 vp9_dc_128_predictor_8x8_sse void vp9_dc_left_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_left_predictor_16x16 vp9_dc_left_predictor_16x16_c +void vp9_dc_left_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_dc_left_predictor_16x16 vp9_dc_left_predictor_16x16_sse2 void vp9_dc_left_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_left_predictor_32x32 vp9_dc_left_predictor_32x32_c +void vp9_dc_left_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_dc_left_predictor_32x32 vp9_dc_left_predictor_32x32_sse2 void vp9_dc_left_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_left_predictor_4x4 vp9_dc_left_predictor_4x4_c +void vp9_dc_left_predictor_4x4_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_dc_left_predictor_4x4 vp9_dc_left_predictor_4x4_sse void vp9_dc_left_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_left_predictor_8x8 vp9_dc_left_predictor_8x8_c +void vp9_dc_left_predictor_8x8_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_dc_left_predictor_8x8 vp9_dc_left_predictor_8x8_sse void vp9_dc_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vp9_dc_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -211,16 +223,20 @@ void vp9_dc_predictor_8x8_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *a #define vp9_dc_predictor_8x8 vp9_dc_predictor_8x8_sse void vp9_dc_top_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_top_predictor_16x16 vp9_dc_top_predictor_16x16_c +void vp9_dc_top_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_dc_top_predictor_16x16 vp9_dc_top_predictor_16x16_sse2 void vp9_dc_top_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_top_predictor_32x32 vp9_dc_top_predictor_32x32_c +void vp9_dc_top_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_dc_top_predictor_32x32 vp9_dc_top_predictor_32x32_sse2 void vp9_dc_top_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_top_predictor_4x4 vp9_dc_top_predictor_4x4_c +void vp9_dc_top_predictor_4x4_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_dc_top_predictor_4x4 vp9_dc_top_predictor_4x4_sse void vp9_dc_top_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vp9_dc_top_predictor_8x8 vp9_dc_top_predictor_8x8_c +void vp9_dc_top_predictor_8x8_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_dc_top_predictor_8x8 vp9_dc_top_predictor_8x8_sse int vp9_diamond_search_sad_c(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv); #define vp9_diamond_search_sad vp9_diamond_search_sad_c @@ -293,19 +309,6 @@ void vp9_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride); void vp9_fwht4x4_mmx(const int16_t *input, tran_low_t *output, int stride); #define vp9_fwht4x4 vp9_fwht4x4_mmx -void vp9_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); -void vp9_get16x16var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); -void vp9_get16x16var_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); -RTCD_EXTERN void (*vp9_get16x16var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); - -void vp9_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); -void vp9_get8x8var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); -#define vp9_get8x8var vp9_get8x8var_sse2 - -unsigned int vp9_get_mb_ss_c(const int16_t *); -unsigned int vp9_get_mb_ss_sse2(const int16_t *); -#define vp9_get_mb_ss vp9_get_mb_ss_sse2 - void vp9_h_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vp9_h_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); RTCD_EXTERN void (*vp9_h_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -322,10 +325,18 @@ void vp9_h_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *abov void vp9_h_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); RTCD_EXTERN void (*vp9_h_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vp9_hadamard_16x16_c(int16_t const *src_diff, int src_stride, int16_t *coeff); +void vp9_hadamard_16x16_sse2(int16_t const *src_diff, int src_stride, int16_t *coeff); +#define vp9_hadamard_16x16 vp9_hadamard_16x16_sse2 + +void vp9_hadamard_8x8_c(int16_t const *src_diff, int src_stride, int16_t *coeff); +void vp9_hadamard_8x8_sse2(int16_t const *src_diff, int src_stride, int16_t *coeff); +void vp9_hadamard_8x8_ssse3(int16_t const *src_diff, int src_stride, int16_t *coeff); +RTCD_EXTERN void (*vp9_hadamard_8x8)(int16_t const *src_diff, int src_stride, int16_t *coeff); + void vp9_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); void vp9_idct16x16_10_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); -void vp9_idct16x16_10_add_ssse3(const tran_low_t *input, uint8_t *dest, int dest_stride); -RTCD_EXTERN void (*vp9_idct16x16_10_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); +#define vp9_idct16x16_10_add vp9_idct16x16_10_add_sse2 void vp9_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); void vp9_idct16x16_1_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); @@ -333,8 +344,7 @@ void vp9_idct16x16_1_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_s void vp9_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); void vp9_idct16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); -void vp9_idct16x16_256_add_ssse3(const tran_low_t *input, uint8_t *dest, int dest_stride); -RTCD_EXTERN void (*vp9_idct16x16_256_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); +#define vp9_idct16x16_256_add vp9_idct16x16_256_add_sse2 void vp9_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); void vp9_idct32x32_1024_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); @@ -441,22 +451,9 @@ void vp9_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, co void vp9_lpf_vertical_8_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); #define vp9_lpf_vertical_8_dual vp9_lpf_vertical_8_dual_sse2 -unsigned int vp9_mse16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -unsigned int vp9_mse16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -unsigned int vp9_mse16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_mse16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); - -unsigned int vp9_mse16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -unsigned int vp9_mse16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -#define vp9_mse16x8 vp9_mse16x8_sse2 - -unsigned int vp9_mse8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -unsigned int vp9_mse8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -#define vp9_mse8x16 vp9_mse8x16_sse2 - -unsigned int vp9_mse8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -unsigned int vp9_mse8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -#define vp9_mse8x8 vp9_mse8x8_sse2 +void vp9_minmax_8x8_c(const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max); +void vp9_minmax_8x8_sse2(const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max); +#define vp9_minmax_8x8 vp9_minmax_8x8_sse2 void vp9_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); void vp9_quantize_b_sse2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); @@ -476,228 +473,9 @@ void vp9_quantize_fp_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int void vp9_quantize_fp_32x32_ssse3(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); RTCD_EXTERN void (*vp9_quantize_fp_32x32)(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); -unsigned int vp9_sad16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad16x16 vp9_sad16x16_sse2 - -unsigned int vp9_sad16x16_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad16x16_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad16x16_avg vp9_sad16x16_avg_sse2 - -void vp9_sad16x16x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp9_sad16x16x3_sse3(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp9_sad16x16x3_ssse3(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad16x16x3)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp9_sad16x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad16x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp9_sad16x16x4d vp9_sad16x16x4d_sse2 - -void vp9_sad16x16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad16x16x8 vp9_sad16x16x8_c - -unsigned int vp9_sad16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad16x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad16x32 vp9_sad16x32_sse2 - -unsigned int vp9_sad16x32_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad16x32_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad16x32_avg vp9_sad16x32_avg_sse2 - -void vp9_sad16x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad16x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp9_sad16x32x4d vp9_sad16x32x4d_sse2 - -unsigned int vp9_sad16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad16x8 vp9_sad16x8_sse2 - -unsigned int vp9_sad16x8_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad16x8_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad16x8_avg vp9_sad16x8_avg_sse2 - -void vp9_sad16x8x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp9_sad16x8x3_sse3(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp9_sad16x8x3_ssse3(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad16x8x3)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp9_sad16x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad16x8x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp9_sad16x8x4d vp9_sad16x8x4d_sse2 - -void vp9_sad16x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad16x8x8 vp9_sad16x8x8_c - -unsigned int vp9_sad32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad32x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad32x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -RTCD_EXTERN unsigned int (*vp9_sad32x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); - -unsigned int vp9_sad32x16_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad32x16_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad32x16_avg_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -RTCD_EXTERN unsigned int (*vp9_sad32x16_avg)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); - -void vp9_sad32x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad32x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp9_sad32x16x4d vp9_sad32x16x4d_sse2 - -unsigned int vp9_sad32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad32x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad32x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -RTCD_EXTERN unsigned int (*vp9_sad32x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); - -unsigned int vp9_sad32x32_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad32x32_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad32x32_avg_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -RTCD_EXTERN unsigned int (*vp9_sad32x32_avg)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); - -void vp9_sad32x32x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -#define vp9_sad32x32x3 vp9_sad32x32x3_c - -void vp9_sad32x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad32x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad32x32x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad32x32x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp9_sad32x32x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad32x32x8 vp9_sad32x32x8_c - -unsigned int vp9_sad32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad32x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad32x64_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -RTCD_EXTERN unsigned int (*vp9_sad32x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); - -unsigned int vp9_sad32x64_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad32x64_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad32x64_avg_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -RTCD_EXTERN unsigned int (*vp9_sad32x64_avg)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); - -void vp9_sad32x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad32x64x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp9_sad32x64x4d vp9_sad32x64x4d_sse2 - -unsigned int vp9_sad4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad4x4_sse(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad4x4 vp9_sad4x4_sse - -unsigned int vp9_sad4x4_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad4x4_avg_sse(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad4x4_avg vp9_sad4x4_avg_sse - -void vp9_sad4x4x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp9_sad4x4x3_sse3(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad4x4x3)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp9_sad4x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad4x4x4d_sse(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp9_sad4x4x4d vp9_sad4x4x4d_sse - -void vp9_sad4x4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad4x4x8 vp9_sad4x4x8_c - -unsigned int vp9_sad4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad4x8_sse(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad4x8 vp9_sad4x8_sse - -unsigned int vp9_sad4x8_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad4x8_avg_sse(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad4x8_avg vp9_sad4x8_avg_sse - -void vp9_sad4x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad4x8x4d_sse(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp9_sad4x8x4d vp9_sad4x8x4d_sse - -void vp9_sad4x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad4x8x8 vp9_sad4x8x8_c - -unsigned int vp9_sad64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad64x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad64x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -RTCD_EXTERN unsigned int (*vp9_sad64x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); - -unsigned int vp9_sad64x32_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad64x32_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad64x32_avg_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -RTCD_EXTERN unsigned int (*vp9_sad64x32_avg)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); - -void vp9_sad64x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad64x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp9_sad64x32x4d vp9_sad64x32x4d_sse2 - -unsigned int vp9_sad64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad64x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad64x64_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -RTCD_EXTERN unsigned int (*vp9_sad64x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); - -unsigned int vp9_sad64x64_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad64x64_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad64x64_avg_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -RTCD_EXTERN unsigned int (*vp9_sad64x64_avg)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); - -void vp9_sad64x64x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -#define vp9_sad64x64x3 vp9_sad64x64x3_c - -void vp9_sad64x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad64x64x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad64x64x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad64x64x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); - -void vp9_sad64x64x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad64x64x8 vp9_sad64x64x8_c - -unsigned int vp9_sad8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad8x16 vp9_sad8x16_sse2 - -unsigned int vp9_sad8x16_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad8x16_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad8x16_avg vp9_sad8x16_avg_sse2 - -void vp9_sad8x16x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp9_sad8x16x3_sse3(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad8x16x3)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp9_sad8x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad8x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp9_sad8x16x4d vp9_sad8x16x4d_sse2 - -void vp9_sad8x16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad8x16x8 vp9_sad8x16x8_c - -unsigned int vp9_sad8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad8x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad8x4 vp9_sad8x4_sse2 - -unsigned int vp9_sad8x4_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad8x4_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad8x4_avg vp9_sad8x4_avg_sse2 - -void vp9_sad8x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad8x4x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp9_sad8x4x4d vp9_sad8x4x4d_sse2 - -void vp9_sad8x4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad8x4x8 vp9_sad8x4x8_c - -unsigned int vp9_sad8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -unsigned int vp9_sad8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); -#define vp9_sad8x8 vp9_sad8x8_sse2 - -unsigned int vp9_sad8x8_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -unsigned int vp9_sad8x8_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vp9_sad8x8_avg vp9_sad8x8_avg_sse2 - -void vp9_sad8x8x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -void vp9_sad8x8x3_sse3(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -RTCD_EXTERN void (*vp9_sad8x8x3)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); - -void vp9_sad8x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -void vp9_sad8x8x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); -#define vp9_sad8x8x4d vp9_sad8x8x4d_sse2 - -void vp9_sad8x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); -#define vp9_sad8x8x8 vp9_sad8x8x8_c +int16_t vp9_satd_c(const int16_t *coeff, int length); +int16_t vp9_satd_sse2(const int16_t *coeff, int length); +#define vp9_satd vp9_satd_sse2 unsigned int vp9_sub_pixel_avg_variance16x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); unsigned int vp9_sub_pixel_avg_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); @@ -873,63 +651,6 @@ void vp9_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *abov void vp9_v_predictor_8x8_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vp9_v_predictor_8x8 vp9_v_predictor_8x8_sse -unsigned int vp9_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_variance16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp9_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance16x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_variance16x32 vp9_variance16x32_sse2 - -unsigned int vp9_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_variance16x8 vp9_variance16x8_sse2 - -unsigned int vp9_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance32x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance32x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_variance32x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp9_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_variance32x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp9_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance32x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_variance32x64 vp9_variance32x64_sse2 - -unsigned int vp9_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance4x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_variance4x4 vp9_variance4x4_sse2 - -unsigned int vp9_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance4x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_variance4x8 vp9_variance4x8_sse2 - -unsigned int vp9_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance64x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance64x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_variance64x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp9_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -RTCD_EXTERN unsigned int (*vp9_variance64x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); - -unsigned int vp9_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_variance8x16 vp9_variance8x16_sse2 - -unsigned int vp9_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance8x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_variance8x4 vp9_variance8x4_sse2 - -unsigned int vp9_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_variance8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_variance8x8 vp9_variance8x8_sse2 - int vp9_vector_var_c(int16_t const *ref, int16_t const *src, const int bwl); int vp9_vector_var_sse2(int16_t const *ref, int16_t const *src, const int bwl); #define vp9_vector_var vp9_vector_var_sse2 @@ -1002,8 +723,6 @@ static void setup_rtcd_internal(void) vp9_full_search_sad = vp9_full_search_sad_c; if (flags & HAS_SSE3) vp9_full_search_sad = vp9_full_search_sadx3; if (flags & HAS_SSE4_1) vp9_full_search_sad = vp9_full_search_sadx8; - vp9_get16x16var = vp9_get16x16var_sse2; - if (flags & HAS_AVX2) vp9_get16x16var = vp9_get16x16var_avx2; vp9_h_predictor_16x16 = vp9_h_predictor_16x16_c; if (flags & HAS_SSSE3) vp9_h_predictor_16x16 = vp9_h_predictor_16x16_ssse3; vp9_h_predictor_32x32 = vp9_h_predictor_32x32_c; @@ -1012,18 +731,14 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSSE3) vp9_h_predictor_4x4 = vp9_h_predictor_4x4_ssse3; vp9_h_predictor_8x8 = vp9_h_predictor_8x8_c; if (flags & HAS_SSSE3) vp9_h_predictor_8x8 = vp9_h_predictor_8x8_ssse3; - vp9_idct16x16_10_add = vp9_idct16x16_10_add_sse2; - if (flags & HAS_SSSE3) vp9_idct16x16_10_add = vp9_idct16x16_10_add_ssse3; - vp9_idct16x16_256_add = vp9_idct16x16_256_add_sse2; - if (flags & HAS_SSSE3) vp9_idct16x16_256_add = vp9_idct16x16_256_add_ssse3; + vp9_hadamard_8x8 = vp9_hadamard_8x8_sse2; + if (flags & HAS_SSSE3) vp9_hadamard_8x8 = vp9_hadamard_8x8_ssse3; vp9_idct8x8_12_add = vp9_idct8x8_12_add_sse2; if (flags & HAS_SSSE3) vp9_idct8x8_12_add = vp9_idct8x8_12_add_ssse3; vp9_idct8x8_64_add = vp9_idct8x8_64_add_sse2; if (flags & HAS_SSSE3) vp9_idct8x8_64_add = vp9_idct8x8_64_add_ssse3; vp9_lpf_horizontal_16 = vp9_lpf_horizontal_16_sse2; if (flags & HAS_AVX2) vp9_lpf_horizontal_16 = vp9_lpf_horizontal_16_avx2; - vp9_mse16x16 = vp9_mse16x16_sse2; - if (flags & HAS_AVX2) vp9_mse16x16 = vp9_mse16x16_avx2; vp9_quantize_b = vp9_quantize_b_sse2; if (flags & HAS_SSSE3) vp9_quantize_b = vp9_quantize_b_ssse3; vp9_quantize_b_32x32 = vp9_quantize_b_32x32_c; @@ -1032,42 +747,6 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSSE3) vp9_quantize_fp = vp9_quantize_fp_ssse3; vp9_quantize_fp_32x32 = vp9_quantize_fp_32x32_c; if (flags & HAS_SSSE3) vp9_quantize_fp_32x32 = vp9_quantize_fp_32x32_ssse3; - vp9_sad16x16x3 = vp9_sad16x16x3_c; - if (flags & HAS_SSE3) vp9_sad16x16x3 = vp9_sad16x16x3_sse3; - if (flags & HAS_SSSE3) vp9_sad16x16x3 = vp9_sad16x16x3_ssse3; - vp9_sad16x8x3 = vp9_sad16x8x3_c; - if (flags & HAS_SSE3) vp9_sad16x8x3 = vp9_sad16x8x3_sse3; - if (flags & HAS_SSSE3) vp9_sad16x8x3 = vp9_sad16x8x3_ssse3; - vp9_sad32x16 = vp9_sad32x16_sse2; - if (flags & HAS_AVX2) vp9_sad32x16 = vp9_sad32x16_avx2; - vp9_sad32x16_avg = vp9_sad32x16_avg_sse2; - if (flags & HAS_AVX2) vp9_sad32x16_avg = vp9_sad32x16_avg_avx2; - vp9_sad32x32 = vp9_sad32x32_sse2; - if (flags & HAS_AVX2) vp9_sad32x32 = vp9_sad32x32_avx2; - vp9_sad32x32_avg = vp9_sad32x32_avg_sse2; - if (flags & HAS_AVX2) vp9_sad32x32_avg = vp9_sad32x32_avg_avx2; - vp9_sad32x32x4d = vp9_sad32x32x4d_sse2; - if (flags & HAS_AVX2) vp9_sad32x32x4d = vp9_sad32x32x4d_avx2; - vp9_sad32x64 = vp9_sad32x64_sse2; - if (flags & HAS_AVX2) vp9_sad32x64 = vp9_sad32x64_avx2; - vp9_sad32x64_avg = vp9_sad32x64_avg_sse2; - if (flags & HAS_AVX2) vp9_sad32x64_avg = vp9_sad32x64_avg_avx2; - vp9_sad4x4x3 = vp9_sad4x4x3_c; - if (flags & HAS_SSE3) vp9_sad4x4x3 = vp9_sad4x4x3_sse3; - vp9_sad64x32 = vp9_sad64x32_sse2; - if (flags & HAS_AVX2) vp9_sad64x32 = vp9_sad64x32_avx2; - vp9_sad64x32_avg = vp9_sad64x32_avg_sse2; - if (flags & HAS_AVX2) vp9_sad64x32_avg = vp9_sad64x32_avg_avx2; - vp9_sad64x64 = vp9_sad64x64_sse2; - if (flags & HAS_AVX2) vp9_sad64x64 = vp9_sad64x64_avx2; - vp9_sad64x64_avg = vp9_sad64x64_avg_sse2; - if (flags & HAS_AVX2) vp9_sad64x64_avg = vp9_sad64x64_avg_avx2; - vp9_sad64x64x4d = vp9_sad64x64x4d_sse2; - if (flags & HAS_AVX2) vp9_sad64x64x4d = vp9_sad64x64x4d_avx2; - vp9_sad8x16x3 = vp9_sad8x16x3_c; - if (flags & HAS_SSE3) vp9_sad8x16x3 = vp9_sad8x16x3_sse3; - vp9_sad8x8x3 = vp9_sad8x8x3_c; - if (flags & HAS_SSE3) vp9_sad8x8x3 = vp9_sad8x8x3_sse3; vp9_sub_pixel_avg_variance16x16 = vp9_sub_pixel_avg_variance16x16_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance16x16 = vp9_sub_pixel_avg_variance16x16_ssse3; vp9_sub_pixel_avg_variance16x32 = vp9_sub_pixel_avg_variance16x32_sse2; @@ -1124,16 +803,6 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSSE3) vp9_sub_pixel_variance8x4 = vp9_sub_pixel_variance8x4_ssse3; vp9_sub_pixel_variance8x8 = vp9_sub_pixel_variance8x8_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance8x8 = vp9_sub_pixel_variance8x8_ssse3; - vp9_variance16x16 = vp9_variance16x16_sse2; - if (flags & HAS_AVX2) vp9_variance16x16 = vp9_variance16x16_avx2; - vp9_variance32x16 = vp9_variance32x16_sse2; - if (flags & HAS_AVX2) vp9_variance32x16 = vp9_variance32x16_avx2; - vp9_variance32x32 = vp9_variance32x32_sse2; - if (flags & HAS_AVX2) vp9_variance32x32 = vp9_variance32x32_avx2; - vp9_variance64x32 = vp9_variance64x32_sse2; - if (flags & HAS_AVX2) vp9_variance64x32 = vp9_variance64x32_avx2; - vp9_variance64x64 = vp9_variance64x64_sse2; - if (flags & HAS_AVX2) vp9_variance64x64 = vp9_variance64x64_avx2; } #endif diff --git a/media/libvpx/vpx/internal/vpx_codec_internal.h b/media/libvpx/vpx/internal/vpx_codec_internal.h index cbfffd0af2a..7380fcc7e24 100644 --- a/media/libvpx/vpx/internal/vpx_codec_internal.h +++ b/media/libvpx/vpx/internal/vpx_codec_internal.h @@ -425,10 +425,18 @@ struct vpx_internal_error_info { jmp_buf jmp; }; +#define CLANG_ANALYZER_NORETURN +#if defined(__has_feature) +#if __has_feature(attribute_analyzer_noreturn) +#undef CLANG_ANALYZER_NORETURN +#define CLANG_ANALYZER_NORETURN __attribute__((analyzer_noreturn)) +#endif +#endif + void vpx_internal_error(struct vpx_internal_error_info *info, vpx_codec_err_t error, const char *fmt, - ...); + ...) CLANG_ANALYZER_NORETURN; #ifdef __cplusplus } // extern "C" diff --git a/media/libvpx/vpx/src/svc_encodeframe.c b/media/libvpx/vpx/src/svc_encodeframe.c index e711cf909ba..9844ace54dc 100644 --- a/media/libvpx/vpx/src/svc_encodeframe.c +++ b/media/libvpx/vpx/src/svc_encodeframe.c @@ -302,31 +302,79 @@ void assign_layer_bitrates(const SvcContext *svc_ctx, vpx_codec_enc_cfg_t *const enc_cfg) { int i; const SvcInternal_t *const si = get_const_svc_internal(svc_ctx); + int sl, tl, spatial_layer_target; - if (si->bitrates[0] != 0) { - enc_cfg->rc_target_bitrate = 0; - for (i = 0; i < svc_ctx->spatial_layers; ++i) { - enc_cfg->ss_target_bitrate[i] = (unsigned int)si->bitrates[i]; - enc_cfg->rc_target_bitrate += si->bitrates[i]; - } - } else { - float total = 0; - float alloc_ratio[VPX_SS_MAX_LAYERS] = {0}; + if (svc_ctx->temporal_layering_mode != 0) { + if (si->bitrates[0] != 0) { + enc_cfg->rc_target_bitrate = 0; + for (sl = 0; sl < svc_ctx->spatial_layers; ++sl) { + enc_cfg->ss_target_bitrate[sl*svc_ctx->temporal_layers] = 0; + for (tl = 0; tl < svc_ctx->temporal_layers; ++tl) { + enc_cfg->ss_target_bitrate[sl*svc_ctx->temporal_layers] + += (unsigned int)si->bitrates[sl * svc_ctx->temporal_layers + tl]; + enc_cfg->layer_target_bitrate[sl*svc_ctx->temporal_layers + tl] + = si->bitrates[sl * svc_ctx->temporal_layers + tl]; + } + } + } else { + float total = 0; + float alloc_ratio[VPX_MAX_LAYERS] = {0}; - for (i = 0; i < svc_ctx->spatial_layers; ++i) { - if (si->svc_params.scaling_factor_den[i] > 0) { - alloc_ratio[i] = (float)(si->svc_params.scaling_factor_num[i] * 1.0 / - si->svc_params.scaling_factor_den[i]); + for (sl = 0; sl < svc_ctx->spatial_layers; ++sl) { + if (si->svc_params.scaling_factor_den[sl] > 0) { + alloc_ratio[sl] = (float)(si->svc_params.scaling_factor_num[sl] * + 1.0 / si->svc_params.scaling_factor_den[sl]); + total += alloc_ratio[sl]; + } + } - alloc_ratio[i] *= alloc_ratio[i]; - total += alloc_ratio[i]; + for (sl = 0; sl < svc_ctx->spatial_layers; ++sl) { + enc_cfg->ss_target_bitrate[sl] = spatial_layer_target = + (unsigned int)(enc_cfg->rc_target_bitrate * + alloc_ratio[sl] / total); + if (svc_ctx->temporal_layering_mode == 3) { + enc_cfg->layer_target_bitrate[sl * svc_ctx->temporal_layers] = + spatial_layer_target >> 1; + enc_cfg->layer_target_bitrate[sl * svc_ctx->temporal_layers + 1] = + (spatial_layer_target >> 1) + (spatial_layer_target >> 2); + enc_cfg->layer_target_bitrate[sl * svc_ctx->temporal_layers + 2] = + spatial_layer_target; + } else if (svc_ctx->temporal_layering_mode == 2) { + enc_cfg->layer_target_bitrate[sl * svc_ctx->temporal_layers] = + spatial_layer_target * 2 / 3; + enc_cfg->layer_target_bitrate[sl * svc_ctx->temporal_layers + 1] = + spatial_layer_target; + } else { + // User should explicitly assign bitrates in this case. + assert(0); + } } } + } else { + if (si->bitrates[0] != 0) { + enc_cfg->rc_target_bitrate = 0; + for (i = 0; i < svc_ctx->spatial_layers; ++i) { + enc_cfg->ss_target_bitrate[i] = (unsigned int)si->bitrates[i]; + enc_cfg->rc_target_bitrate += si->bitrates[i]; + } + } else { + float total = 0; + float alloc_ratio[VPX_MAX_LAYERS] = {0}; - for (i = 0; i < VPX_SS_MAX_LAYERS; ++i) { - if (total > 0) { - enc_cfg->ss_target_bitrate[i] = (unsigned int) - (enc_cfg->rc_target_bitrate * alloc_ratio[i] / total); + for (i = 0; i < svc_ctx->spatial_layers; ++i) { + if (si->svc_params.scaling_factor_den[i] > 0) { + alloc_ratio[i] = (float)(si->svc_params.scaling_factor_num[i] * 1.0 / + si->svc_params.scaling_factor_den[i]); + + alloc_ratio[i] *= alloc_ratio[i]; + total += alloc_ratio[i]; + } + } + for (i = 0; i < VPX_SS_MAX_LAYERS; ++i) { + if (total > 0) { + enc_cfg->layer_target_bitrate[i] = (unsigned int) + (enc_cfg->rc_target_bitrate * alloc_ratio[i] / total); + } } } } @@ -365,6 +413,14 @@ vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, return VPX_CODEC_INVALID_PARAM; } + // Note: temporal_layering_mode only applies to one-pass CBR + // si->svc_params.temporal_layering_mode = svc_ctx->temporal_layering_mode; + if (svc_ctx->temporal_layering_mode == 3) { + svc_ctx->temporal_layers = 3; + } else if (svc_ctx->temporal_layering_mode == 2) { + svc_ctx->temporal_layers = 2; + } + for (i = 0; i < VPX_SS_MAX_LAYERS; ++i) { si->svc_params.max_quantizers[i] = MAX_QUANTIZER; si->svc_params.min_quantizers[i] = 0; @@ -387,6 +443,14 @@ vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, if (svc_ctx->temporal_layers > VPX_TS_MAX_LAYERS) svc_ctx->temporal_layers = VPX_TS_MAX_LAYERS; + if (svc_ctx->temporal_layers * svc_ctx->spatial_layers > VPX_MAX_LAYERS) { + svc_log(svc_ctx, SVC_LOG_ERROR, + "spatial layers * temporal layers exceeds the maximum number of " + "allowed layers of %d\n", + svc_ctx->spatial_layers * svc_ctx->temporal_layers, + (int) VPX_MAX_LAYERS); + return VPX_CODEC_INVALID_PARAM; + } assign_layer_bitrates(svc_ctx, enc_cfg); #if CONFIG_SPATIAL_SVC @@ -403,10 +467,24 @@ vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, } } - // modify encoder configuration + if (svc_ctx->threads) + enc_cfg->g_threads = svc_ctx->threads; + + // Modify encoder configuration enc_cfg->ss_number_layers = svc_ctx->spatial_layers; enc_cfg->ts_number_layers = svc_ctx->temporal_layers; + if (enc_cfg->rc_end_usage == VPX_CBR) { + enc_cfg->rc_resize_allowed = 0; + enc_cfg->rc_min_quantizer = 2; + enc_cfg->rc_max_quantizer = 63; + enc_cfg->rc_undershoot_pct = 50; + enc_cfg->rc_overshoot_pct = 50; + enc_cfg->rc_buf_initial_sz = 20; + enc_cfg->rc_buf_optimal_sz = 600; + enc_cfg->rc_buf_sz = 1000; + } + if (enc_cfg->g_error_resilient == 0 && si->use_multiple_frame_contexts == 0) enc_cfg->g_error_resilient = 1; @@ -451,6 +529,7 @@ vpx_codec_err_t vpx_svc_encode(SvcContext *svc_ctx, iter = NULL; while ((cx_pkt = vpx_codec_get_cx_data(codec_ctx, &iter))) { switch (cx_pkt->kind) { +#if VPX_ENCODER_ABI_VERSION > (5 + VPX_CODEC_ABI_VERSION) #if CONFIG_SPATIAL_SVC case VPX_CODEC_SPATIAL_SVC_LAYER_PSNR: { int i; @@ -488,6 +567,7 @@ vpx_codec_err_t vpx_svc_encode(SvcContext *svc_ctx, si->bytes_sum[i] += cx_pkt->data.layer_sizes[i]; break; } +#endif #endif default: { break; @@ -554,7 +634,7 @@ const char *vpx_svc_dump_statistics(SvcContext *svc_ctx) { mse[1], mse[2], mse[3]); bytes_total += si->bytes_sum[i]; - // clear sums for next time + // Clear sums for next time. si->bytes_sum[i] = 0; for (j = 0; j < COMPONENTS; ++j) { si->psnr_sum[i][j] = 0; diff --git a/media/libvpx/vpx/svc_context.h b/media/libvpx/vpx/svc_context.h index cf791bdeb56..a09651cc991 100644 --- a/media/libvpx/vpx/svc_context.h +++ b/media/libvpx/vpx/svc_context.h @@ -33,10 +33,13 @@ typedef struct { // public interface to svc_command options int spatial_layers; // number of spatial layers int temporal_layers; // number of temporal layers + int temporal_layering_mode; SVC_LOG_LEVEL log_level; // amount of information to display int log_print; // when set, printf log messages instead of returning the // message with svc_get_message - + int output_rc_stat; // for outputting rc stats + int speed; // speed setting for codec + int threads; // private storage for vpx_svc_encode void *internal; } SvcContext; diff --git a/media/libvpx/vpx/vp8cx.h b/media/libvpx/vpx/vp8cx.h index 60b588f0e0c..19bc4bdcce1 100644 --- a/media/libvpx/vpx/vp8cx.h +++ b/media/libvpx/vpx/vp8cx.h @@ -326,6 +326,8 @@ enum vp8e_enc_control_id { VP8E_SET_TEMPORAL_LAYER_ID, /*!\brief Codec control function to set encoder screen content mode. + * + * 0: off, 1: On, 2: On with more aggressive rate control. * * Supported in codecs: VP8 */ @@ -448,7 +450,6 @@ enum vp8e_enc_control_id { */ VP9E_SET_SVC, -#if VPX_ENCODER_ABI_VERSION > (4 + VPX_CODEC_ABI_VERSION) /*!\brief Codec control function to set parameters for SVC. * \note Parameters contain min_q, max_q, scaling factor for each of the * SVC layers. @@ -456,7 +457,6 @@ enum vp8e_enc_control_id { * Supported in codecs: VP9 */ VP9E_SET_SVC_PARAMETERS, -#endif /*!\brief Codec control function to set svc layer for spatial and temporal. * \note Valid ranges: 0..#vpx_codec_enc_cfg::ss_number_layers for spatial @@ -476,7 +476,6 @@ enum vp8e_enc_control_id { */ VP9E_SET_TUNE_CONTENT, -#if VPX_ENCODER_ABI_VERSION > (4 + VPX_CODEC_ABI_VERSION) /*!\brief Codec control function to get svc layer ID. * \note The layer ID returned is for the data packet from the registered * callback function. @@ -492,7 +491,6 @@ enum vp8e_enc_control_id { * Supported in codecs: VP9 */ VP9E_REGISTER_CX_CALLBACK, -#endif /*!\brief Codec control function to set color space info. * \note Valid ranges: 0..7, default is "UNKNOWN". @@ -508,6 +506,23 @@ enum vp8e_enc_control_id { * Supported in codecs: VP9 */ VP9E_SET_COLOR_SPACE, + + /*!\brief Codec control function to set temporal layering mode. + * \note Valid ranges: 0..3, default is "0" (VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING). + * 0 = VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING + * 1 = VP9E_TEMPORAL_LAYERING_MODE_BYPASS + * 2 = VP9E_TEMPORAL_LAYERING_MODE_0101 + * 3 = VP9E_TEMPORAL_LAYERING_MODE_0212 + * + * Supported in codecs: VP9 + */ + VP9E_SET_TEMPORAL_LAYERING_MODE, + + /*!\brief Codec control function to get an Active map back from the encoder. + * + * Supported in codecs: VP9 + */ + VP9E_GET_ACTIVEMAP, }; /*!\brief vpx 1-D scaling mode @@ -521,6 +536,32 @@ typedef enum vpx_scaling_mode_1d { VP8E_ONETWO = 3 } VPX_SCALING_MODE; +/*!\brief Temporal layering mode enum for VP9 SVC. + * + * This set of macros define the different temporal layering modes. + * Supported codecs: VP9 (in SVC mode) + * + */ +typedef enum vp9e_temporal_layering_mode { + /*!\brief No temporal layering. + * Used when only spatial layering is used. + */ + VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING = 0, + + /*!\brief Bypass mode. + * Used when application needs to control temporal layering. + * This will only work when the number of spatial layers equals 1. + */ + VP9E_TEMPORAL_LAYERING_MODE_BYPASS = 1, + + /*!\brief 0-1-0-1... temporal layering scheme with two temporal layers. + */ + VP9E_TEMPORAL_LAYERING_MODE_0101 = 2, + + /*!\brief 0-2-1-2... temporal layering scheme with three temporal layers. + */ + VP9E_TEMPORAL_LAYERING_MODE_0212 = 3 +} VP9E_TEMPORAL_LAYERING_MODE; /*!\brief vpx region of interest map * @@ -596,7 +637,6 @@ typedef enum { VP8_TUNE_SSIM } vp8e_tuning; -#if VPX_ENCODER_ABI_VERSION > (4 + VPX_CODEC_ABI_VERSION) /*!\brief vp9 svc layer parameters * * This defines the spatial and temporal layer id numbers for svc encoding. @@ -608,18 +648,6 @@ typedef struct vpx_svc_layer_id { int spatial_layer_id; /**< Spatial layer id number. */ int temporal_layer_id; /**< Temporal layer id number. */ } vpx_svc_layer_id_t; -#else -/*!\brief vp9 svc layer parameters - * - * This defines the temporal layer id numbers for svc encoding. - * This is used with the #VP9E_SET_SVC_LAYER_ID control to set the - * temporal layer id for the current frame. - * - */ -typedef struct vpx_svc_layer_id { - int temporal_layer_id; /**< Temporal layer id number. */ -} vpx_svc_layer_id_t; -#endif /*!\brief VP8 encoder control function parameter type * @@ -643,10 +671,8 @@ VPX_CTRL_USE_TYPE(VP8E_SET_ACTIVEMAP, vpx_active_map_t *) VPX_CTRL_USE_TYPE(VP8E_SET_SCALEMODE, vpx_scaling_mode_t *) VPX_CTRL_USE_TYPE(VP9E_SET_SVC, int) -#if VPX_ENCODER_ABI_VERSION > (4 + VPX_CODEC_ABI_VERSION) VPX_CTRL_USE_TYPE(VP9E_SET_SVC_PARAMETERS, void *) VPX_CTRL_USE_TYPE(VP9E_REGISTER_CX_CALLBACK, void *) -#endif VPX_CTRL_USE_TYPE(VP9E_SET_SVC_LAYER_ID, vpx_svc_layer_id_t *) VPX_CTRL_USE_TYPE(VP8E_SET_CPUUSED, int) @@ -667,9 +693,7 @@ VPX_CTRL_USE_TYPE(VP9E_SET_TILE_ROWS, int) VPX_CTRL_USE_TYPE(VP8E_GET_LAST_QUANTIZER, int *) VPX_CTRL_USE_TYPE(VP8E_GET_LAST_QUANTIZER_64, int *) -#if VPX_ENCODER_ABI_VERSION > (4 + VPX_CODEC_ABI_VERSION) VPX_CTRL_USE_TYPE(VP9E_GET_SVC_LAYER_ID, vpx_svc_layer_id_t *) -#endif VPX_CTRL_USE_TYPE(VP8E_SET_MAX_INTRA_BITRATE_PCT, unsigned int) VPX_CTRL_USE_TYPE(VP8E_SET_MAX_INTER_BITRATE_PCT, unsigned int) @@ -691,6 +715,8 @@ VPX_CTRL_USE_TYPE(VP9E_SET_NOISE_SENSITIVITY, unsigned int) VPX_CTRL_USE_TYPE(VP9E_SET_TUNE_CONTENT, int) /* vp9e_tune_content */ VPX_CTRL_USE_TYPE(VP9E_SET_COLOR_SPACE, int) + +VPX_CTRL_USE_TYPE(VP9E_GET_ACTIVEMAP, vpx_active_map_t *) /*! @} - end defgroup vp8_encoder */ #ifdef __cplusplus } // extern "C" diff --git a/media/libvpx/vpx/vp8dx.h b/media/libvpx/vpx/vp8dx.h index 83898bf8496..bc9cb1a62fc 100644 --- a/media/libvpx/vpx/vp8dx.h +++ b/media/libvpx/vpx/vp8dx.h @@ -106,6 +106,13 @@ enum vp8_dec_control_id { */ VP9_INVERT_TILE_DECODE_ORDER, + /** control function to set the skip loop filter flag. Valid values are + * integers. The decoder will skip the loop filter when its value is set to + * nonzero. If the loop filter is skipped the decoder may accumulate decode + * artifacts. The default value is 0. + */ + VP9_SET_SKIP_LOOP_FILTER, + VP8_DECODER_CTRL_ID_MAX }; diff --git a/media/libvpx/vpx/vpx_encoder.h b/media/libvpx/vpx/vpx_encoder.h index bf75584d589..2b17f98a231 100644 --- a/media/libvpx/vpx/vpx_encoder.h +++ b/media/libvpx/vpx/vpx_encoder.h @@ -42,8 +42,11 @@ extern "C" { /*!\deprecated Use #VPX_TS_MAX_PERIODICITY instead. */ #define MAX_PERIODICITY VPX_TS_MAX_PERIODICITY - /*!\deprecated Use #VPX_TS_MAX_LAYERS instead. */ -#define MAX_LAYERS VPX_TS_MAX_LAYERS +/*! Temporal+Spatial Scalability: Maximum number of coding layers */ +#define VPX_MAX_LAYERS 12 // 3 temporal + 4 spatial layers are allowed. + +/*!\deprecated Use #VPX_MAX_LAYERS instead. */ +#define MAX_LAYERS VPX_MAX_LAYERS // 3 temporal + 4 spatial layers allowed. /*! Spatial Scalability: Maximum number of coding layers */ #define VPX_SS_MAX_LAYERS 5 @@ -59,7 +62,7 @@ extern "C" { * types, removing or reassigning enums, adding/removing/rearranging * fields to structures */ -#define VPX_ENCODER_ABI_VERSION (4 + VPX_CODEC_ABI_VERSION) /**<\hideinitializer*/ +#define VPX_ENCODER_ABI_VERSION (5 + VPX_CODEC_ABI_VERSION) /**<\hideinitializer*/ /*! \brief Encoder capabilities bitfield @@ -163,7 +166,7 @@ extern "C" { VPX_CODEC_PSNR_PKT, /**< PSNR statistics for this frame */ // Spatial SVC is still experimental and may be removed before the next ABI // bump. -#if VPX_ENCODER_ABI_VERSION > (4 + VPX_CODEC_ABI_VERSION) +#if VPX_ENCODER_ABI_VERSION > (5 + VPX_CODEC_ABI_VERSION) VPX_CODEC_SPATIAL_SVC_LAYER_SIZES, /**< Sizes for each layer in this frame*/ VPX_CODEC_SPATIAL_SVC_LAYER_PSNR, /**< PSNR for each layer in this frame*/ #endif @@ -205,7 +208,7 @@ extern "C" { vpx_fixed_buf_t raw; /**< data for arbitrary packets */ // Spatial SVC is still experimental and may be removed before the next // ABI bump. -#if VPX_ENCODER_ABI_VERSION > (4 + VPX_CODEC_ABI_VERSION) +#if VPX_ENCODER_ABI_VERSION > (5 + VPX_CODEC_ABI_VERSION) size_t layer_sizes[VPX_SS_MAX_LAYERS]; struct vpx_psnr_pkt layer_psnr[VPX_SS_MAX_LAYERS]; #endif @@ -729,6 +732,22 @@ extern "C" { * ts_periodicity=8, then ts_layer_id = (0,1,0,1,0,1,0,1). */ unsigned int ts_layer_id[VPX_TS_MAX_PERIODICITY]; + + /*!\brief Target bitrate for each spatial/temporal layer. + * + * These values specify the target coding bitrate to be used for each + * spatial/temporal layer. + * + */ + unsigned int layer_target_bitrate[VPX_MAX_LAYERS]; + + /*!\brief Temporal layering mode indicating which temporal layering scheme to use. + * + * The value (refer to VP9E_TEMPORAL_LAYERING_MODE) specifies the + * temporal layering mode to use. + * + */ + int temporal_layering_mode; } vpx_codec_enc_cfg_t; /**< alias for struct vpx_codec_enc_cfg */ /*!\brief vp9 svc extra configure parameters @@ -737,10 +756,11 @@ extern "C" { * */ typedef struct vpx_svc_parameters { - int max_quantizers[VPX_SS_MAX_LAYERS]; /**< Max Q for each layer */ - int min_quantizers[VPX_SS_MAX_LAYERS]; /**< Min Q for each layer */ - int scaling_factor_num[VPX_SS_MAX_LAYERS]; /**< Scaling factor-numerator*/ - int scaling_factor_den[VPX_SS_MAX_LAYERS]; /**< Scaling factor-denominator*/ + int max_quantizers[VPX_MAX_LAYERS]; /**< Max Q for each layer */ + int min_quantizers[VPX_MAX_LAYERS]; /**< Min Q for each layer */ + int scaling_factor_num[VPX_MAX_LAYERS]; /**< Scaling factor-numerator */ + int scaling_factor_den[VPX_MAX_LAYERS]; /**< Scaling factor-denominator */ + int temporal_layering_mode; /**< Temporal layering mode */ } vpx_svc_extra_cfg_t; diff --git a/media/libvpx/vpx_config_armv7-android-gcc.asm b/media/libvpx/vpx_config_armv7-android-gcc.asm index 7571b302357..6a21c78ecdd 100644 --- a/media/libvpx/vpx_config_armv7-android-gcc.asm +++ b/media/libvpx/vpx_config_armv7-android-gcc.asm @@ -5,14 +5,13 @@ .equ ARCH_MIPS , 0 .equ ARCH_X86 , 0 .equ ARCH_X86_64 , 0 -.equ ARCH_PPC32 , 0 -.equ ARCH_PPC64 , 0 .equ HAVE_EDSP , 0 .equ HAVE_MEDIA , 1 .equ HAVE_NEON , 1 .equ HAVE_NEON_ASM , 1 .equ HAVE_MIPS32 , 0 .equ HAVE_DSPR2 , 0 +.equ HAVE_MSA , 0 .equ HAVE_MIPS64 , 0 .equ HAVE_MMX , 0 .equ HAVE_SSE , 0 @@ -22,10 +21,8 @@ .equ HAVE_SSE4_1 , 0 .equ HAVE_AVX , 0 .equ HAVE_AVX2 , 0 -.equ HAVE_ALTIVEC , 0 .equ HAVE_VPX_PORTS , 1 .equ HAVE_STDINT_H , 1 -.equ HAVE_ALT_TREE_LAYOUT , 0 .equ HAVE_PTHREAD_H , 1 .equ HAVE_SYS_MMAN_H , 1 .equ HAVE_UNISTD_H , 1 @@ -46,10 +43,6 @@ .equ CONFIG_BIG_ENDIAN , 0 .equ CONFIG_CODEC_SRCS , 0 .equ CONFIG_DEBUG_LIBS , 0 -.equ CONFIG_FAST_UNALIGNED , 1 -.equ CONFIG_MEM_MANAGER , 0 -.equ CONFIG_MEM_TRACKER , 0 -.equ CONFIG_MEM_CHECKS , 0 .equ CONFIG_DEQUANT_TOKENS , 0 .equ CONFIG_DC_RECON , 0 .equ CONFIG_RUNTIME_CPU_DETECT , 1 diff --git a/media/libvpx/vpx_config_armv7-android-gcc.h b/media/libvpx/vpx_config_armv7-android-gcc.h index c9611db886f..468099c3a90 100644 --- a/media/libvpx/vpx_config_armv7-android-gcc.h +++ b/media/libvpx/vpx_config_armv7-android-gcc.h @@ -14,14 +14,13 @@ #define ARCH_MIPS 0 #define ARCH_X86 0 #define ARCH_X86_64 0 -#define ARCH_PPC32 0 -#define ARCH_PPC64 0 #define HAVE_EDSP 0 #define HAVE_MEDIA 1 #define HAVE_NEON 1 #define HAVE_NEON_ASM 1 #define HAVE_MIPS32 0 #define HAVE_DSPR2 0 +#define HAVE_MSA 0 #define HAVE_MIPS64 0 #define HAVE_MMX 0 #define HAVE_SSE 0 @@ -31,10 +30,8 @@ #define HAVE_SSE4_1 0 #define HAVE_AVX 0 #define HAVE_AVX2 0 -#define HAVE_ALTIVEC 0 #define HAVE_VPX_PORTS 1 #define HAVE_STDINT_H 1 -#define HAVE_ALT_TREE_LAYOUT 0 #define HAVE_PTHREAD_H 1 #define HAVE_SYS_MMAN_H 1 #define HAVE_UNISTD_H 1 @@ -55,10 +52,6 @@ #define CONFIG_BIG_ENDIAN 0 #define CONFIG_CODEC_SRCS 0 #define CONFIG_DEBUG_LIBS 0 -#define CONFIG_FAST_UNALIGNED 1 -#define CONFIG_MEM_MANAGER 0 -#define CONFIG_MEM_TRACKER 0 -#define CONFIG_MEM_CHECKS 0 #define CONFIG_DEQUANT_TOKENS 0 #define CONFIG_DC_RECON 0 #define CONFIG_RUNTIME_CPU_DETECT 1 diff --git a/media/libvpx/vpx_config_generic-gnu.asm b/media/libvpx/vpx_config_generic-gnu.asm index ae0c81ee06f..ac108f9eb0f 100644 --- a/media/libvpx/vpx_config_generic-gnu.asm +++ b/media/libvpx/vpx_config_generic-gnu.asm @@ -5,14 +5,13 @@ .equ ARCH_MIPS , 0 .equ ARCH_X86 , 0 .equ ARCH_X86_64 , 0 -.equ ARCH_PPC32 , 0 -.equ ARCH_PPC64 , 0 .equ HAVE_EDSP , 0 .equ HAVE_MEDIA , 0 .equ HAVE_NEON , 0 .equ HAVE_NEON_ASM , 0 .equ HAVE_MIPS32 , 0 .equ HAVE_DSPR2 , 0 +.equ HAVE_MSA , 0 .equ HAVE_MIPS64 , 0 .equ HAVE_MMX , 0 .equ HAVE_SSE , 0 @@ -22,10 +21,8 @@ .equ HAVE_SSE4_1 , 0 .equ HAVE_AVX , 0 .equ HAVE_AVX2 , 0 -.equ HAVE_ALTIVEC , 0 .equ HAVE_VPX_PORTS , 1 .equ HAVE_STDINT_H , 1 -.equ HAVE_ALT_TREE_LAYOUT , 0 .equ HAVE_PTHREAD_H , 1 .equ HAVE_SYS_MMAN_H , 1 .equ HAVE_UNISTD_H , 1 @@ -46,10 +43,6 @@ .equ CONFIG_BIG_ENDIAN , 0 .equ CONFIG_CODEC_SRCS , 0 .equ CONFIG_DEBUG_LIBS , 0 -.equ CONFIG_FAST_UNALIGNED , 1 -.equ CONFIG_MEM_MANAGER , 0 -.equ CONFIG_MEM_TRACKER , 0 -.equ CONFIG_MEM_CHECKS , 0 .equ CONFIG_DEQUANT_TOKENS , 0 .equ CONFIG_DC_RECON , 0 .equ CONFIG_RUNTIME_CPU_DETECT , 0 diff --git a/media/libvpx/vpx_config_generic-gnu.h b/media/libvpx/vpx_config_generic-gnu.h index d114b0c7e4a..db883cf060f 100644 --- a/media/libvpx/vpx_config_generic-gnu.h +++ b/media/libvpx/vpx_config_generic-gnu.h @@ -14,14 +14,13 @@ #define ARCH_MIPS 0 #define ARCH_X86 0 #define ARCH_X86_64 0 -#define ARCH_PPC32 0 -#define ARCH_PPC64 0 #define HAVE_EDSP 0 #define HAVE_MEDIA 0 #define HAVE_NEON 0 #define HAVE_NEON_ASM 0 #define HAVE_MIPS32 0 #define HAVE_DSPR2 0 +#define HAVE_MSA 0 #define HAVE_MIPS64 0 #define HAVE_MMX 0 #define HAVE_SSE 0 @@ -31,10 +30,8 @@ #define HAVE_SSE4_1 0 #define HAVE_AVX 0 #define HAVE_AVX2 0 -#define HAVE_ALTIVEC 0 #define HAVE_VPX_PORTS 1 #define HAVE_STDINT_H 1 -#define HAVE_ALT_TREE_LAYOUT 0 #define HAVE_PTHREAD_H 1 #define HAVE_SYS_MMAN_H 1 #define HAVE_UNISTD_H 1 @@ -55,10 +52,6 @@ #define CONFIG_BIG_ENDIAN 0 #define CONFIG_CODEC_SRCS 0 #define CONFIG_DEBUG_LIBS 0 -#define CONFIG_FAST_UNALIGNED 1 -#define CONFIG_MEM_MANAGER 0 -#define CONFIG_MEM_TRACKER 0 -#define CONFIG_MEM_CHECKS 0 #define CONFIG_DEQUANT_TOKENS 0 #define CONFIG_DC_RECON 0 #define CONFIG_RUNTIME_CPU_DETECT 0 diff --git a/media/libvpx/vpx_config_x86-darwin9-gcc.asm b/media/libvpx/vpx_config_x86-darwin9-gcc.asm index 006f4e85049..9b6b8509b0f 100644 --- a/media/libvpx/vpx_config_x86-darwin9-gcc.asm +++ b/media/libvpx/vpx_config_x86-darwin9-gcc.asm @@ -2,14 +2,13 @@ ARCH_ARM equ 0 ARCH_MIPS equ 0 ARCH_X86 equ 1 ARCH_X86_64 equ 0 -ARCH_PPC32 equ 0 -ARCH_PPC64 equ 0 HAVE_EDSP equ 0 HAVE_MEDIA equ 0 HAVE_NEON equ 0 HAVE_NEON_ASM equ 0 HAVE_MIPS32 equ 0 HAVE_DSPR2 equ 0 +HAVE_MSA equ 0 HAVE_MIPS64 equ 0 HAVE_MMX equ 1 HAVE_SSE equ 1 @@ -19,10 +18,8 @@ HAVE_SSSE3 equ 1 HAVE_SSE4_1 equ 1 HAVE_AVX equ 1 HAVE_AVX2 equ 1 -HAVE_ALTIVEC equ 0 HAVE_VPX_PORTS equ 1 HAVE_STDINT_H equ 1 -HAVE_ALT_TREE_LAYOUT equ 0 HAVE_PTHREAD_H equ 1 HAVE_SYS_MMAN_H equ 1 HAVE_UNISTD_H equ 1 @@ -43,10 +40,6 @@ CONFIG_PIC equ 1 CONFIG_BIG_ENDIAN equ 0 CONFIG_CODEC_SRCS equ 0 CONFIG_DEBUG_LIBS equ 0 -CONFIG_FAST_UNALIGNED equ 1 -CONFIG_MEM_MANAGER equ 0 -CONFIG_MEM_TRACKER equ 0 -CONFIG_MEM_CHECKS equ 0 CONFIG_DEQUANT_TOKENS equ 0 CONFIG_DC_RECON equ 0 CONFIG_RUNTIME_CPU_DETECT equ 1 diff --git a/media/libvpx/vpx_config_x86-darwin9-gcc.h b/media/libvpx/vpx_config_x86-darwin9-gcc.h index c95c507a683..40502b410da 100644 --- a/media/libvpx/vpx_config_x86-darwin9-gcc.h +++ b/media/libvpx/vpx_config_x86-darwin9-gcc.h @@ -14,14 +14,13 @@ #define ARCH_MIPS 0 #define ARCH_X86 1 #define ARCH_X86_64 0 -#define ARCH_PPC32 0 -#define ARCH_PPC64 0 #define HAVE_EDSP 0 #define HAVE_MEDIA 0 #define HAVE_NEON 0 #define HAVE_NEON_ASM 0 #define HAVE_MIPS32 0 #define HAVE_DSPR2 0 +#define HAVE_MSA 0 #define HAVE_MIPS64 0 #define HAVE_MMX 1 #define HAVE_SSE 1 @@ -31,10 +30,8 @@ #define HAVE_SSE4_1 1 #define HAVE_AVX 1 #define HAVE_AVX2 1 -#define HAVE_ALTIVEC 0 #define HAVE_VPX_PORTS 1 #define HAVE_STDINT_H 1 -#define HAVE_ALT_TREE_LAYOUT 0 #define HAVE_PTHREAD_H 1 #define HAVE_SYS_MMAN_H 1 #define HAVE_UNISTD_H 1 @@ -55,10 +52,6 @@ #define CONFIG_BIG_ENDIAN 0 #define CONFIG_CODEC_SRCS 0 #define CONFIG_DEBUG_LIBS 0 -#define CONFIG_FAST_UNALIGNED 1 -#define CONFIG_MEM_MANAGER 0 -#define CONFIG_MEM_TRACKER 0 -#define CONFIG_MEM_CHECKS 0 #define CONFIG_DEQUANT_TOKENS 0 #define CONFIG_DC_RECON 0 #define CONFIG_RUNTIME_CPU_DETECT 1 diff --git a/media/libvpx/vpx_config_x86-linux-gcc.asm b/media/libvpx/vpx_config_x86-linux-gcc.asm index b67dc3bc318..5400f57d5a5 100644 --- a/media/libvpx/vpx_config_x86-linux-gcc.asm +++ b/media/libvpx/vpx_config_x86-linux-gcc.asm @@ -2,14 +2,13 @@ ARCH_ARM equ 0 ARCH_MIPS equ 0 ARCH_X86 equ 1 ARCH_X86_64 equ 0 -ARCH_PPC32 equ 0 -ARCH_PPC64 equ 0 HAVE_EDSP equ 0 HAVE_MEDIA equ 0 HAVE_NEON equ 0 HAVE_NEON_ASM equ 0 HAVE_MIPS32 equ 0 HAVE_DSPR2 equ 0 +HAVE_MSA equ 0 HAVE_MIPS64 equ 0 HAVE_MMX equ 1 HAVE_SSE equ 1 @@ -19,10 +18,8 @@ HAVE_SSSE3 equ 1 HAVE_SSE4_1 equ 1 HAVE_AVX equ 1 HAVE_AVX2 equ 0 -HAVE_ALTIVEC equ 0 HAVE_VPX_PORTS equ 1 HAVE_STDINT_H equ 1 -HAVE_ALT_TREE_LAYOUT equ 0 HAVE_PTHREAD_H equ 1 HAVE_SYS_MMAN_H equ 1 HAVE_UNISTD_H equ 1 @@ -43,10 +40,6 @@ CONFIG_PIC equ 1 CONFIG_BIG_ENDIAN equ 0 CONFIG_CODEC_SRCS equ 0 CONFIG_DEBUG_LIBS equ 0 -CONFIG_FAST_UNALIGNED equ 1 -CONFIG_MEM_MANAGER equ 0 -CONFIG_MEM_TRACKER equ 0 -CONFIG_MEM_CHECKS equ 0 CONFIG_DEQUANT_TOKENS equ 0 CONFIG_DC_RECON equ 0 CONFIG_RUNTIME_CPU_DETECT equ 1 diff --git a/media/libvpx/vpx_config_x86-linux-gcc.h b/media/libvpx/vpx_config_x86-linux-gcc.h index ec008c80dfd..e7b472e84fe 100644 --- a/media/libvpx/vpx_config_x86-linux-gcc.h +++ b/media/libvpx/vpx_config_x86-linux-gcc.h @@ -14,14 +14,13 @@ #define ARCH_MIPS 0 #define ARCH_X86 1 #define ARCH_X86_64 0 -#define ARCH_PPC32 0 -#define ARCH_PPC64 0 #define HAVE_EDSP 0 #define HAVE_MEDIA 0 #define HAVE_NEON 0 #define HAVE_NEON_ASM 0 #define HAVE_MIPS32 0 #define HAVE_DSPR2 0 +#define HAVE_MSA 0 #define HAVE_MIPS64 0 #define HAVE_MMX 1 #define HAVE_SSE 1 @@ -31,10 +30,8 @@ #define HAVE_SSE4_1 1 #define HAVE_AVX 1 #define HAVE_AVX2 0 -#define HAVE_ALTIVEC 0 #define HAVE_VPX_PORTS 1 #define HAVE_STDINT_H 1 -#define HAVE_ALT_TREE_LAYOUT 0 #define HAVE_PTHREAD_H 1 #define HAVE_SYS_MMAN_H 1 #define HAVE_UNISTD_H 1 @@ -55,10 +52,6 @@ #define CONFIG_BIG_ENDIAN 0 #define CONFIG_CODEC_SRCS 0 #define CONFIG_DEBUG_LIBS 0 -#define CONFIG_FAST_UNALIGNED 1 -#define CONFIG_MEM_MANAGER 0 -#define CONFIG_MEM_TRACKER 0 -#define CONFIG_MEM_CHECKS 0 #define CONFIG_DEQUANT_TOKENS 0 #define CONFIG_DC_RECON 0 #define CONFIG_RUNTIME_CPU_DETECT 1 diff --git a/media/libvpx/vpx_config_x86-win32-gcc.asm b/media/libvpx/vpx_config_x86-win32-gcc.asm index c39f43a95e8..29c89a5186b 100644 --- a/media/libvpx/vpx_config_x86-win32-gcc.asm +++ b/media/libvpx/vpx_config_x86-win32-gcc.asm @@ -2,14 +2,13 @@ ARCH_ARM equ 0 ARCH_MIPS equ 0 ARCH_X86 equ 1 ARCH_X86_64 equ 0 -ARCH_PPC32 equ 0 -ARCH_PPC64 equ 0 HAVE_EDSP equ 0 HAVE_MEDIA equ 0 HAVE_NEON equ 0 HAVE_NEON_ASM equ 0 HAVE_MIPS32 equ 0 HAVE_DSPR2 equ 0 +HAVE_MSA equ 0 HAVE_MIPS64 equ 0 HAVE_MMX equ 1 HAVE_SSE equ 1 @@ -19,10 +18,8 @@ HAVE_SSSE3 equ 1 HAVE_SSE4_1 equ 1 HAVE_AVX equ 1 HAVE_AVX2 equ 1 -HAVE_ALTIVEC equ 0 HAVE_VPX_PORTS equ 1 HAVE_STDINT_H equ 1 -HAVE_ALT_TREE_LAYOUT equ 0 HAVE_PTHREAD_H equ 1 HAVE_SYS_MMAN_H equ 1 HAVE_UNISTD_H equ 1 @@ -43,10 +40,6 @@ CONFIG_PIC equ 0 CONFIG_BIG_ENDIAN equ 0 CONFIG_CODEC_SRCS equ 0 CONFIG_DEBUG_LIBS equ 0 -CONFIG_FAST_UNALIGNED equ 1 -CONFIG_MEM_MANAGER equ 0 -CONFIG_MEM_TRACKER equ 0 -CONFIG_MEM_CHECKS equ 0 CONFIG_DEQUANT_TOKENS equ 0 CONFIG_DC_RECON equ 0 CONFIG_RUNTIME_CPU_DETECT equ 1 diff --git a/media/libvpx/vpx_config_x86-win32-gcc.h b/media/libvpx/vpx_config_x86-win32-gcc.h index 12b5c84f858..e60f84d8c17 100644 --- a/media/libvpx/vpx_config_x86-win32-gcc.h +++ b/media/libvpx/vpx_config_x86-win32-gcc.h @@ -14,14 +14,13 @@ #define ARCH_MIPS 0 #define ARCH_X86 1 #define ARCH_X86_64 0 -#define ARCH_PPC32 0 -#define ARCH_PPC64 0 #define HAVE_EDSP 0 #define HAVE_MEDIA 0 #define HAVE_NEON 0 #define HAVE_NEON_ASM 0 #define HAVE_MIPS32 0 #define HAVE_DSPR2 0 +#define HAVE_MSA 0 #define HAVE_MIPS64 0 #define HAVE_MMX 1 #define HAVE_SSE 1 @@ -31,10 +30,8 @@ #define HAVE_SSE4_1 1 #define HAVE_AVX 1 #define HAVE_AVX2 1 -#define HAVE_ALTIVEC 0 #define HAVE_VPX_PORTS 1 #define HAVE_STDINT_H 1 -#define HAVE_ALT_TREE_LAYOUT 0 #undef HAVE_PTHREAD_H #define HAVE_PTHREAD_H 0 #define HAVE_SYS_MMAN_H 1 @@ -56,10 +53,6 @@ #define CONFIG_BIG_ENDIAN 0 #define CONFIG_CODEC_SRCS 0 #define CONFIG_DEBUG_LIBS 0 -#define CONFIG_FAST_UNALIGNED 1 -#define CONFIG_MEM_MANAGER 0 -#define CONFIG_MEM_TRACKER 0 -#define CONFIG_MEM_CHECKS 0 #define CONFIG_DEQUANT_TOKENS 0 #define CONFIG_DC_RECON 0 #define CONFIG_RUNTIME_CPU_DETECT 1 diff --git a/media/libvpx/vpx_config_x86-win32-vs12.asm b/media/libvpx/vpx_config_x86-win32-vs12.asm index abaa435a427..da0e226da7d 100644 --- a/media/libvpx/vpx_config_x86-win32-vs12.asm +++ b/media/libvpx/vpx_config_x86-win32-vs12.asm @@ -2,14 +2,13 @@ ARCH_ARM equ 0 ARCH_MIPS equ 0 ARCH_X86 equ 1 ARCH_X86_64 equ 0 -ARCH_PPC32 equ 0 -ARCH_PPC64 equ 0 HAVE_EDSP equ 0 HAVE_MEDIA equ 0 HAVE_NEON equ 0 HAVE_NEON_ASM equ 0 HAVE_MIPS32 equ 0 HAVE_DSPR2 equ 0 +HAVE_MSA equ 0 HAVE_MIPS64 equ 0 HAVE_MMX equ 1 HAVE_SSE equ 1 @@ -19,10 +18,8 @@ HAVE_SSSE3 equ 1 HAVE_SSE4_1 equ 1 HAVE_AVX equ 1 HAVE_AVX2 equ 1 -HAVE_ALTIVEC equ 0 HAVE_VPX_PORTS equ 1 HAVE_STDINT_H equ 0 -HAVE_ALT_TREE_LAYOUT equ 0 HAVE_PTHREAD_H equ 0 HAVE_SYS_MMAN_H equ 0 HAVE_UNISTD_H equ 0 @@ -43,10 +40,6 @@ CONFIG_PIC equ 0 CONFIG_BIG_ENDIAN equ 0 CONFIG_CODEC_SRCS equ 0 CONFIG_DEBUG_LIBS equ 0 -CONFIG_FAST_UNALIGNED equ 1 -CONFIG_MEM_MANAGER equ 0 -CONFIG_MEM_TRACKER equ 0 -CONFIG_MEM_CHECKS equ 0 CONFIG_DEQUANT_TOKENS equ 0 CONFIG_DC_RECON equ 0 CONFIG_RUNTIME_CPU_DETECT equ 1 diff --git a/media/libvpx/vpx_config_x86-win32-vs12.h b/media/libvpx/vpx_config_x86-win32-vs12.h index 1f4dbc003e9..a91bb8396ea 100644 --- a/media/libvpx/vpx_config_x86-win32-vs12.h +++ b/media/libvpx/vpx_config_x86-win32-vs12.h @@ -14,14 +14,13 @@ #define ARCH_MIPS 0 #define ARCH_X86 1 #define ARCH_X86_64 0 -#define ARCH_PPC32 0 -#define ARCH_PPC64 0 #define HAVE_EDSP 0 #define HAVE_MEDIA 0 #define HAVE_NEON 0 #define HAVE_NEON_ASM 0 #define HAVE_MIPS32 0 #define HAVE_DSPR2 0 +#define HAVE_MSA 0 #define HAVE_MIPS64 0 #define HAVE_MMX 1 #define HAVE_SSE 1 @@ -31,10 +30,8 @@ #define HAVE_SSE4_1 1 #define HAVE_AVX 1 #define HAVE_AVX2 1 -#define HAVE_ALTIVEC 0 #define HAVE_VPX_PORTS 1 #define HAVE_STDINT_H 0 -#define HAVE_ALT_TREE_LAYOUT 0 #define HAVE_PTHREAD_H 0 #define HAVE_SYS_MMAN_H 0 #define HAVE_UNISTD_H 0 @@ -55,10 +52,6 @@ #define CONFIG_BIG_ENDIAN 0 #define CONFIG_CODEC_SRCS 0 #define CONFIG_DEBUG_LIBS 0 -#define CONFIG_FAST_UNALIGNED 1 -#define CONFIG_MEM_MANAGER 0 -#define CONFIG_MEM_TRACKER 0 -#define CONFIG_MEM_CHECKS 0 #define CONFIG_DEQUANT_TOKENS 0 #define CONFIG_DC_RECON 0 #define CONFIG_RUNTIME_CPU_DETECT 1 diff --git a/media/libvpx/vpx_config_x86_64-darwin9-gcc.asm b/media/libvpx/vpx_config_x86_64-darwin9-gcc.asm index 7da1492a105..3dbb38fc2e6 100644 --- a/media/libvpx/vpx_config_x86_64-darwin9-gcc.asm +++ b/media/libvpx/vpx_config_x86_64-darwin9-gcc.asm @@ -2,14 +2,13 @@ ARCH_ARM equ 0 ARCH_MIPS equ 0 ARCH_X86 equ 0 ARCH_X86_64 equ 1 -ARCH_PPC32 equ 0 -ARCH_PPC64 equ 0 HAVE_EDSP equ 0 HAVE_MEDIA equ 0 HAVE_NEON equ 0 HAVE_NEON_ASM equ 0 HAVE_MIPS32 equ 0 HAVE_DSPR2 equ 0 +HAVE_MSA equ 0 HAVE_MIPS64 equ 0 HAVE_MMX equ 1 HAVE_SSE equ 1 @@ -19,10 +18,8 @@ HAVE_SSSE3 equ 1 HAVE_SSE4_1 equ 1 HAVE_AVX equ 1 HAVE_AVX2 equ 1 -HAVE_ALTIVEC equ 0 HAVE_VPX_PORTS equ 1 HAVE_STDINT_H equ 1 -HAVE_ALT_TREE_LAYOUT equ 0 HAVE_PTHREAD_H equ 1 HAVE_SYS_MMAN_H equ 1 HAVE_UNISTD_H equ 1 @@ -43,10 +40,6 @@ CONFIG_PIC equ 1 CONFIG_BIG_ENDIAN equ 0 CONFIG_CODEC_SRCS equ 0 CONFIG_DEBUG_LIBS equ 0 -CONFIG_FAST_UNALIGNED equ 1 -CONFIG_MEM_MANAGER equ 0 -CONFIG_MEM_TRACKER equ 0 -CONFIG_MEM_CHECKS equ 0 CONFIG_DEQUANT_TOKENS equ 0 CONFIG_DC_RECON equ 0 CONFIG_RUNTIME_CPU_DETECT equ 1 diff --git a/media/libvpx/vpx_config_x86_64-darwin9-gcc.h b/media/libvpx/vpx_config_x86_64-darwin9-gcc.h index bd1791e14d8..0d0477ceeca 100644 --- a/media/libvpx/vpx_config_x86_64-darwin9-gcc.h +++ b/media/libvpx/vpx_config_x86_64-darwin9-gcc.h @@ -14,14 +14,13 @@ #define ARCH_MIPS 0 #define ARCH_X86 0 #define ARCH_X86_64 1 -#define ARCH_PPC32 0 -#define ARCH_PPC64 0 #define HAVE_EDSP 0 #define HAVE_MEDIA 0 #define HAVE_NEON 0 #define HAVE_NEON_ASM 0 #define HAVE_MIPS32 0 #define HAVE_DSPR2 0 +#define HAVE_MSA 0 #define HAVE_MIPS64 0 #define HAVE_MMX 1 #define HAVE_SSE 1 @@ -31,10 +30,8 @@ #define HAVE_SSE4_1 1 #define HAVE_AVX 1 #define HAVE_AVX2 1 -#define HAVE_ALTIVEC 0 #define HAVE_VPX_PORTS 1 #define HAVE_STDINT_H 1 -#define HAVE_ALT_TREE_LAYOUT 0 #define HAVE_PTHREAD_H 1 #define HAVE_SYS_MMAN_H 1 #define HAVE_UNISTD_H 1 @@ -55,10 +52,6 @@ #define CONFIG_BIG_ENDIAN 0 #define CONFIG_CODEC_SRCS 0 #define CONFIG_DEBUG_LIBS 0 -#define CONFIG_FAST_UNALIGNED 1 -#define CONFIG_MEM_MANAGER 0 -#define CONFIG_MEM_TRACKER 0 -#define CONFIG_MEM_CHECKS 0 #define CONFIG_DEQUANT_TOKENS 0 #define CONFIG_DC_RECON 0 #define CONFIG_RUNTIME_CPU_DETECT 1 diff --git a/media/libvpx/vpx_config_x86_64-linux-gcc.asm b/media/libvpx/vpx_config_x86_64-linux-gcc.asm index 8ab2613e898..6da93489bfb 100644 --- a/media/libvpx/vpx_config_x86_64-linux-gcc.asm +++ b/media/libvpx/vpx_config_x86_64-linux-gcc.asm @@ -2,14 +2,13 @@ ARCH_ARM equ 0 ARCH_MIPS equ 0 ARCH_X86 equ 0 ARCH_X86_64 equ 1 -ARCH_PPC32 equ 0 -ARCH_PPC64 equ 0 HAVE_EDSP equ 0 HAVE_MEDIA equ 0 HAVE_NEON equ 0 HAVE_NEON_ASM equ 0 HAVE_MIPS32 equ 0 HAVE_DSPR2 equ 0 +HAVE_MSA equ 0 HAVE_MIPS64 equ 0 HAVE_MMX equ 1 HAVE_SSE equ 1 @@ -19,10 +18,8 @@ HAVE_SSSE3 equ 1 HAVE_SSE4_1 equ 1 HAVE_AVX equ 1 HAVE_AVX2 equ 0 -HAVE_ALTIVEC equ 0 HAVE_VPX_PORTS equ 1 HAVE_STDINT_H equ 1 -HAVE_ALT_TREE_LAYOUT equ 0 HAVE_PTHREAD_H equ 1 HAVE_SYS_MMAN_H equ 1 HAVE_UNISTD_H equ 1 @@ -43,10 +40,6 @@ CONFIG_PIC equ 1 CONFIG_BIG_ENDIAN equ 0 CONFIG_CODEC_SRCS equ 0 CONFIG_DEBUG_LIBS equ 0 -CONFIG_FAST_UNALIGNED equ 1 -CONFIG_MEM_MANAGER equ 0 -CONFIG_MEM_TRACKER equ 0 -CONFIG_MEM_CHECKS equ 0 CONFIG_DEQUANT_TOKENS equ 0 CONFIG_DC_RECON equ 0 CONFIG_RUNTIME_CPU_DETECT equ 1 diff --git a/media/libvpx/vpx_config_x86_64-linux-gcc.h b/media/libvpx/vpx_config_x86_64-linux-gcc.h index ce6f64d7182..21228cd1dd0 100644 --- a/media/libvpx/vpx_config_x86_64-linux-gcc.h +++ b/media/libvpx/vpx_config_x86_64-linux-gcc.h @@ -14,14 +14,13 @@ #define ARCH_MIPS 0 #define ARCH_X86 0 #define ARCH_X86_64 1 -#define ARCH_PPC32 0 -#define ARCH_PPC64 0 #define HAVE_EDSP 0 #define HAVE_MEDIA 0 #define HAVE_NEON 0 #define HAVE_NEON_ASM 0 #define HAVE_MIPS32 0 #define HAVE_DSPR2 0 +#define HAVE_MSA 0 #define HAVE_MIPS64 0 #define HAVE_MMX 1 #define HAVE_SSE 1 @@ -31,10 +30,8 @@ #define HAVE_SSE4_1 1 #define HAVE_AVX 1 #define HAVE_AVX2 0 -#define HAVE_ALTIVEC 0 #define HAVE_VPX_PORTS 1 #define HAVE_STDINT_H 1 -#define HAVE_ALT_TREE_LAYOUT 0 #define HAVE_PTHREAD_H 1 #define HAVE_SYS_MMAN_H 1 #define HAVE_UNISTD_H 1 @@ -55,10 +52,6 @@ #define CONFIG_BIG_ENDIAN 0 #define CONFIG_CODEC_SRCS 0 #define CONFIG_DEBUG_LIBS 0 -#define CONFIG_FAST_UNALIGNED 1 -#define CONFIG_MEM_MANAGER 0 -#define CONFIG_MEM_TRACKER 0 -#define CONFIG_MEM_CHECKS 0 #define CONFIG_DEQUANT_TOKENS 0 #define CONFIG_DC_RECON 0 #define CONFIG_RUNTIME_CPU_DETECT 1 diff --git a/media/libvpx/vpx_config_x86_64-win64-gcc.asm b/media/libvpx/vpx_config_x86_64-win64-gcc.asm index bd56e9f6e06..20ba15baee0 100644 --- a/media/libvpx/vpx_config_x86_64-win64-gcc.asm +++ b/media/libvpx/vpx_config_x86_64-win64-gcc.asm @@ -2,14 +2,13 @@ ARCH_ARM equ 0 ARCH_MIPS equ 0 ARCH_X86 equ 0 ARCH_X86_64 equ 1 -ARCH_PPC32 equ 0 -ARCH_PPC64 equ 0 HAVE_EDSP equ 0 HAVE_MEDIA equ 0 HAVE_NEON equ 0 HAVE_NEON_ASM equ 0 HAVE_MIPS32 equ 0 HAVE_DSPR2 equ 0 +HAVE_MSA equ 0 HAVE_MIPS64 equ 0 HAVE_MMX equ 1 HAVE_SSE equ 1 @@ -19,10 +18,8 @@ HAVE_SSSE3 equ 1 HAVE_SSE4_1 equ 1 HAVE_AVX equ 1 HAVE_AVX2 equ 1 -HAVE_ALTIVEC equ 0 HAVE_VPX_PORTS equ 1 HAVE_STDINT_H equ 1 -HAVE_ALT_TREE_LAYOUT equ 0 HAVE_PTHREAD_H equ 1 HAVE_SYS_MMAN_H equ 1 HAVE_UNISTD_H equ 1 @@ -43,10 +40,6 @@ CONFIG_PIC equ 0 CONFIG_BIG_ENDIAN equ 0 CONFIG_CODEC_SRCS equ 0 CONFIG_DEBUG_LIBS equ 0 -CONFIG_FAST_UNALIGNED equ 1 -CONFIG_MEM_MANAGER equ 0 -CONFIG_MEM_TRACKER equ 0 -CONFIG_MEM_CHECKS equ 0 CONFIG_DEQUANT_TOKENS equ 0 CONFIG_DC_RECON equ 0 CONFIG_RUNTIME_CPU_DETECT equ 1 diff --git a/media/libvpx/vpx_config_x86_64-win64-gcc.h b/media/libvpx/vpx_config_x86_64-win64-gcc.h index 4d75a4b7a60..b056a0ea385 100644 --- a/media/libvpx/vpx_config_x86_64-win64-gcc.h +++ b/media/libvpx/vpx_config_x86_64-win64-gcc.h @@ -14,14 +14,13 @@ #define ARCH_MIPS 0 #define ARCH_X86 0 #define ARCH_X86_64 1 -#define ARCH_PPC32 0 -#define ARCH_PPC64 0 #define HAVE_EDSP 0 #define HAVE_MEDIA 0 #define HAVE_NEON 0 #define HAVE_NEON_ASM 0 #define HAVE_MIPS32 0 #define HAVE_DSPR2 0 +#define HAVE_MSA 0 #define HAVE_MIPS64 0 #define HAVE_MMX 1 #define HAVE_SSE 1 @@ -31,10 +30,8 @@ #define HAVE_SSE4_1 1 #define HAVE_AVX 1 #define HAVE_AVX2 1 -#define HAVE_ALTIVEC 0 #define HAVE_VPX_PORTS 1 #define HAVE_STDINT_H 1 -#define HAVE_ALT_TREE_LAYOUT 0 #undef HAVE_PTHREAD_H #define HAVE_PTHREAD_H 0 #define HAVE_SYS_MMAN_H 1 @@ -56,10 +53,6 @@ #define CONFIG_BIG_ENDIAN 0 #define CONFIG_CODEC_SRCS 0 #define CONFIG_DEBUG_LIBS 0 -#define CONFIG_FAST_UNALIGNED 1 -#define CONFIG_MEM_MANAGER 0 -#define CONFIG_MEM_TRACKER 0 -#define CONFIG_MEM_CHECKS 0 #define CONFIG_DEQUANT_TOKENS 0 #define CONFIG_DC_RECON 0 #define CONFIG_RUNTIME_CPU_DETECT 1 diff --git a/media/libvpx/vpx_config_x86_64-win64-vs12.asm b/media/libvpx/vpx_config_x86_64-win64-vs12.asm index f4a33528449..39c17a07c3e 100644 --- a/media/libvpx/vpx_config_x86_64-win64-vs12.asm +++ b/media/libvpx/vpx_config_x86_64-win64-vs12.asm @@ -2,14 +2,13 @@ ARCH_ARM equ 0 ARCH_MIPS equ 0 ARCH_X86 equ 0 ARCH_X86_64 equ 1 -ARCH_PPC32 equ 0 -ARCH_PPC64 equ 0 HAVE_EDSP equ 0 HAVE_MEDIA equ 0 HAVE_NEON equ 0 HAVE_NEON_ASM equ 0 HAVE_MIPS32 equ 0 HAVE_DSPR2 equ 0 +HAVE_MSA equ 0 HAVE_MIPS64 equ 0 HAVE_MMX equ 1 HAVE_SSE equ 1 @@ -19,10 +18,8 @@ HAVE_SSSE3 equ 1 HAVE_SSE4_1 equ 1 HAVE_AVX equ 1 HAVE_AVX2 equ 1 -HAVE_ALTIVEC equ 0 HAVE_VPX_PORTS equ 1 HAVE_STDINT_H equ 0 -HAVE_ALT_TREE_LAYOUT equ 0 HAVE_PTHREAD_H equ 0 HAVE_SYS_MMAN_H equ 0 HAVE_UNISTD_H equ 0 @@ -43,10 +40,6 @@ CONFIG_PIC equ 0 CONFIG_BIG_ENDIAN equ 0 CONFIG_CODEC_SRCS equ 0 CONFIG_DEBUG_LIBS equ 0 -CONFIG_FAST_UNALIGNED equ 1 -CONFIG_MEM_MANAGER equ 0 -CONFIG_MEM_TRACKER equ 0 -CONFIG_MEM_CHECKS equ 0 CONFIG_DEQUANT_TOKENS equ 0 CONFIG_DC_RECON equ 0 CONFIG_RUNTIME_CPU_DETECT equ 1 diff --git a/media/libvpx/vpx_config_x86_64-win64-vs12.h b/media/libvpx/vpx_config_x86_64-win64-vs12.h index 746a1a402d5..4ad69bdb844 100644 --- a/media/libvpx/vpx_config_x86_64-win64-vs12.h +++ b/media/libvpx/vpx_config_x86_64-win64-vs12.h @@ -14,14 +14,13 @@ #define ARCH_MIPS 0 #define ARCH_X86 0 #define ARCH_X86_64 1 -#define ARCH_PPC32 0 -#define ARCH_PPC64 0 #define HAVE_EDSP 0 #define HAVE_MEDIA 0 #define HAVE_NEON 0 #define HAVE_NEON_ASM 0 #define HAVE_MIPS32 0 #define HAVE_DSPR2 0 +#define HAVE_MSA 0 #define HAVE_MIPS64 0 #define HAVE_MMX 1 #define HAVE_SSE 1 @@ -31,10 +30,8 @@ #define HAVE_SSE4_1 1 #define HAVE_AVX 1 #define HAVE_AVX2 1 -#define HAVE_ALTIVEC 0 #define HAVE_VPX_PORTS 1 #define HAVE_STDINT_H 0 -#define HAVE_ALT_TREE_LAYOUT 0 #define HAVE_PTHREAD_H 0 #define HAVE_SYS_MMAN_H 0 #define HAVE_UNISTD_H 0 @@ -55,10 +52,6 @@ #define CONFIG_BIG_ENDIAN 0 #define CONFIG_CODEC_SRCS 0 #define CONFIG_DEBUG_LIBS 0 -#define CONFIG_FAST_UNALIGNED 1 -#define CONFIG_MEM_MANAGER 0 -#define CONFIG_MEM_TRACKER 0 -#define CONFIG_MEM_CHECKS 0 #define CONFIG_DEQUANT_TOKENS 0 #define CONFIG_DC_RECON 0 #define CONFIG_RUNTIME_CPU_DETECT 1 diff --git a/media/libvpx/vp9/encoder/arm/neon/vp9_sad4d_neon.c b/media/libvpx/vpx_dsp/arm/sad4d_neon.c similarity index 96% rename from media/libvpx/vp9/encoder/arm/neon/vp9_sad4d_neon.c rename to media/libvpx/vpx_dsp/arm/sad4d_neon.c index cec1689f1f9..c7704dc1be6 100644 --- a/media/libvpx/vp9/encoder/arm/neon/vp9_sad4d_neon.c +++ b/media/libvpx/vpx_dsp/arm/sad4d_neon.c @@ -9,9 +9,9 @@ */ #include -#include "./vp9_rtcd.h" -#include "./vpx_config.h" +#include "./vpx_config.h" +#include "./vpx_dsp_rtcd.h" #include "vpx/vpx_integer.h" static INLINE unsigned int horizontal_long_add_16x8(const uint16x8_t vec_lo, @@ -80,9 +80,9 @@ static void sad_neon_32(const uint8x16_t vec_src_00, vget_high_u8(vec_ref_16)); } -void vp9_sad64x64x4d_neon(const uint8_t *src, int src_stride, +void vpx_sad64x64x4d_neon(const uint8_t *src, int src_stride, const uint8_t* const ref[4], int ref_stride, - unsigned int *res) { + uint32_t *res) { int i; uint16x8_t vec_sum_ref0_lo = vdupq_n_u16(0); uint16x8_t vec_sum_ref0_hi = vdupq_n_u16(0); @@ -126,9 +126,9 @@ void vp9_sad64x64x4d_neon(const uint8_t *src, int src_stride, res[3] = horizontal_long_add_16x8(vec_sum_ref3_lo, vec_sum_ref3_hi); } -void vp9_sad32x32x4d_neon(const uint8_t *src, int src_stride, +void vpx_sad32x32x4d_neon(const uint8_t *src, int src_stride, const uint8_t* const ref[4], int ref_stride, - unsigned int *res) { + uint32_t *res) { int i; uint16x8_t vec_sum_ref0_lo = vdupq_n_u16(0); uint16x8_t vec_sum_ref0_hi = vdupq_n_u16(0); @@ -170,9 +170,9 @@ void vp9_sad32x32x4d_neon(const uint8_t *src, int src_stride, res[3] = horizontal_long_add_16x8(vec_sum_ref3_lo, vec_sum_ref3_hi); } -void vp9_sad16x16x4d_neon(const uint8_t *src, int src_stride, +void vpx_sad16x16x4d_neon(const uint8_t *src, int src_stride, const uint8_t* const ref[4], int ref_stride, - unsigned int *res) { + uint32_t *res) { int i; uint16x8_t vec_sum_ref0_lo = vdupq_n_u16(0); uint16x8_t vec_sum_ref0_hi = vdupq_n_u16(0); diff --git a/media/libvpx/vp8/common/arm/armv6/vp8_sad16x16_armv6.asm b/media/libvpx/vpx_dsp/arm/sad_media.asm similarity index 97% rename from media/libvpx/vp8/common/arm/armv6/vp8_sad16x16_armv6.asm rename to media/libvpx/vpx_dsp/arm/sad_media.asm index 1b4f5cf3b0f..aed1d3a22ed 100644 --- a/media/libvpx/vp8/common/arm/armv6/vp8_sad16x16_armv6.asm +++ b/media/libvpx/vpx_dsp/arm/sad_media.asm @@ -9,7 +9,7 @@ ; - EXPORT |vp8_sad16x16_armv6| + EXPORT |vpx_sad16x16_media| ARM REQUIRE8 @@ -21,8 +21,7 @@ ; r1 int src_stride ; r2 const unsigned char *ref_ptr ; r3 int ref_stride -; stack max_sad (not used) -|vp8_sad16x16_armv6| PROC +|vpx_sad16x16_media| PROC stmfd sp!, {r4-r12, lr} pld [r0, r1, lsl #0] diff --git a/media/libvpx/vp9/encoder/arm/neon/vp9_sad_neon.c b/media/libvpx/vpx_dsp/arm/sad_neon.c similarity index 65% rename from media/libvpx/vp9/encoder/arm/neon/vp9_sad_neon.c rename to media/libvpx/vpx_dsp/arm/sad_neon.c index c4cd856804d..173f08ac3c3 100644 --- a/media/libvpx/vp9/encoder/arm/neon/vp9_sad_neon.c +++ b/media/libvpx/vpx_dsp/arm/sad_neon.c @@ -9,11 +9,113 @@ */ #include -#include "./vp9_rtcd.h" + #include "./vpx_config.h" #include "vpx/vpx_integer.h" +unsigned int vpx_sad8x16_neon( + unsigned char *src_ptr, + int src_stride, + unsigned char *ref_ptr, + int ref_stride) { + uint8x8_t d0, d8; + uint16x8_t q12; + uint32x4_t q1; + uint64x2_t q3; + uint32x2_t d5; + int i; + + d0 = vld1_u8(src_ptr); + src_ptr += src_stride; + d8 = vld1_u8(ref_ptr); + ref_ptr += ref_stride; + q12 = vabdl_u8(d0, d8); + + for (i = 0; i < 15; i++) { + d0 = vld1_u8(src_ptr); + src_ptr += src_stride; + d8 = vld1_u8(ref_ptr); + ref_ptr += ref_stride; + q12 = vabal_u8(q12, d0, d8); + } + + q1 = vpaddlq_u16(q12); + q3 = vpaddlq_u32(q1); + d5 = vadd_u32(vreinterpret_u32_u64(vget_low_u64(q3)), + vreinterpret_u32_u64(vget_high_u64(q3))); + + return vget_lane_u32(d5, 0); +} + +unsigned int vpx_sad4x4_neon( + unsigned char *src_ptr, + int src_stride, + unsigned char *ref_ptr, + int ref_stride) { + uint8x8_t d0, d8; + uint16x8_t q12; + uint32x2_t d1; + uint64x1_t d3; + int i; + + d0 = vld1_u8(src_ptr); + src_ptr += src_stride; + d8 = vld1_u8(ref_ptr); + ref_ptr += ref_stride; + q12 = vabdl_u8(d0, d8); + + for (i = 0; i < 3; i++) { + d0 = vld1_u8(src_ptr); + src_ptr += src_stride; + d8 = vld1_u8(ref_ptr); + ref_ptr += ref_stride; + q12 = vabal_u8(q12, d0, d8); + } + + d1 = vpaddl_u16(vget_low_u16(q12)); + d3 = vpaddl_u32(d1); + + return vget_lane_u32(vreinterpret_u32_u64(d3), 0); +} + +unsigned int vpx_sad16x8_neon( + unsigned char *src_ptr, + int src_stride, + unsigned char *ref_ptr, + int ref_stride) { + uint8x16_t q0, q4; + uint16x8_t q12, q13; + uint32x4_t q1; + uint64x2_t q3; + uint32x2_t d5; + int i; + + q0 = vld1q_u8(src_ptr); + src_ptr += src_stride; + q4 = vld1q_u8(ref_ptr); + ref_ptr += ref_stride; + q12 = vabdl_u8(vget_low_u8(q0), vget_low_u8(q4)); + q13 = vabdl_u8(vget_high_u8(q0), vget_high_u8(q4)); + + for (i = 0; i < 7; i++) { + q0 = vld1q_u8(src_ptr); + src_ptr += src_stride; + q4 = vld1q_u8(ref_ptr); + ref_ptr += ref_stride; + q12 = vabal_u8(q12, vget_low_u8(q0), vget_low_u8(q4)); + q13 = vabal_u8(q13, vget_high_u8(q0), vget_high_u8(q4)); + } + + q12 = vaddq_u16(q12, q13); + q1 = vpaddlq_u16(q12); + q3 = vpaddlq_u32(q1); + d5 = vadd_u32(vreinterpret_u32_u64(vget_low_u64(q3)), + vreinterpret_u32_u64(vget_high_u64(q3))); + + return vget_lane_u32(d5, 0); +} + static INLINE unsigned int horizontal_long_add_16x8(const uint16x8_t vec_lo, const uint16x8_t vec_hi) { const uint32x4_t vec_l_lo = vaddl_u16(vget_low_u16(vec_lo), @@ -34,7 +136,7 @@ static INLINE unsigned int horizontal_add_16x8(const uint16x8_t vec_16x8) { return vget_lane_u32(c, 0); } -unsigned int vp9_sad64x64_neon(const uint8_t *src, int src_stride, +unsigned int vpx_sad64x64_neon(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride) { int i; uint16x8_t vec_accum_lo = vdupq_n_u16(0); @@ -70,7 +172,7 @@ unsigned int vp9_sad64x64_neon(const uint8_t *src, int src_stride, return horizontal_long_add_16x8(vec_accum_lo, vec_accum_hi); } -unsigned int vp9_sad32x32_neon(const uint8_t *src, int src_stride, +unsigned int vpx_sad32x32_neon(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride) { int i; uint16x8_t vec_accum_lo = vdupq_n_u16(0); @@ -95,7 +197,7 @@ unsigned int vp9_sad32x32_neon(const uint8_t *src, int src_stride, return horizontal_add_16x8(vaddq_u16(vec_accum_lo, vec_accum_hi)); } -unsigned int vp9_sad16x16_neon(const uint8_t *src, int src_stride, +unsigned int vpx_sad16x16_neon(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride) { int i; uint16x8_t vec_accum_lo = vdupq_n_u16(0); @@ -114,7 +216,7 @@ unsigned int vp9_sad16x16_neon(const uint8_t *src, int src_stride, return horizontal_add_16x8(vaddq_u16(vec_accum_lo, vec_accum_hi)); } -unsigned int vp9_sad8x8_neon(const uint8_t *src, int src_stride, +unsigned int vpx_sad8x8_neon(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride) { int i; uint16x8_t vec_accum = vdupq_n_u16(0); diff --git a/media/libvpx/vpx_dsp/arm/variance_media.asm b/media/libvpx/vpx_dsp/arm/variance_media.asm new file mode 100644 index 00000000000..f7f9e14b0a7 --- /dev/null +++ b/media/libvpx/vpx_dsp/arm/variance_media.asm @@ -0,0 +1,358 @@ +; +; Copyright (c) 2011 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + + + EXPORT |vpx_variance16x16_media| + EXPORT |vpx_variance8x8_media| + EXPORT |vpx_mse16x16_media| + + ARM + REQUIRE8 + PRESERVE8 + + AREA ||.text||, CODE, READONLY, ALIGN=2 + +; r0 unsigned char *src_ptr +; r1 int source_stride +; r2 unsigned char *ref_ptr +; r3 int recon_stride +; stack unsigned int *sse +|vpx_variance16x16_media| PROC + + stmfd sp!, {r4-r12, lr} + + pld [r0, r1, lsl #0] + pld [r2, r3, lsl #0] + + mov r8, #0 ; initialize sum = 0 + mov r11, #0 ; initialize sse = 0 + mov r12, #16 ; set loop counter to 16 (=block height) + +loop16x16 + ; 1st 4 pixels + ldr r4, [r0, #0] ; load 4 src pixels + ldr r5, [r2, #0] ; load 4 ref pixels + + mov lr, #0 ; constant zero + + usub8 r6, r4, r5 ; calculate difference + pld [r0, r1, lsl #1] + sel r7, r6, lr ; select bytes with positive difference + usub8 r9, r5, r4 ; calculate difference with reversed operands + pld [r2, r3, lsl #1] + sel r6, r9, lr ; select bytes with negative difference + + ; calculate partial sums + usad8 r4, r7, lr ; calculate sum of positive differences + usad8 r5, r6, lr ; calculate sum of negative differences + orr r6, r6, r7 ; differences of all 4 pixels + ; calculate total sum + adds r8, r8, r4 ; add positive differences to sum + subs r8, r8, r5 ; subtract negative differences from sum + + ; calculate sse + uxtb16 r5, r6 ; byte (two pixels) to halfwords + uxtb16 r10, r6, ror #8 ; another two pixels to halfwords + smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) + + ; 2nd 4 pixels + ldr r4, [r0, #4] ; load 4 src pixels + ldr r5, [r2, #4] ; load 4 ref pixels + smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2) + + usub8 r6, r4, r5 ; calculate difference + sel r7, r6, lr ; select bytes with positive difference + usub8 r9, r5, r4 ; calculate difference with reversed operands + sel r6, r9, lr ; select bytes with negative difference + + ; calculate partial sums + usad8 r4, r7, lr ; calculate sum of positive differences + usad8 r5, r6, lr ; calculate sum of negative differences + orr r6, r6, r7 ; differences of all 4 pixels + + ; calculate total sum + add r8, r8, r4 ; add positive differences to sum + sub r8, r8, r5 ; subtract negative differences from sum + + ; calculate sse + uxtb16 r5, r6 ; byte (two pixels) to halfwords + uxtb16 r10, r6, ror #8 ; another two pixels to halfwords + smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) + + ; 3rd 4 pixels + ldr r4, [r0, #8] ; load 4 src pixels + ldr r5, [r2, #8] ; load 4 ref pixels + smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2) + + usub8 r6, r4, r5 ; calculate difference + sel r7, r6, lr ; select bytes with positive difference + usub8 r9, r5, r4 ; calculate difference with reversed operands + sel r6, r9, lr ; select bytes with negative difference + + ; calculate partial sums + usad8 r4, r7, lr ; calculate sum of positive differences + usad8 r5, r6, lr ; calculate sum of negative differences + orr r6, r6, r7 ; differences of all 4 pixels + + ; calculate total sum + add r8, r8, r4 ; add positive differences to sum + sub r8, r8, r5 ; subtract negative differences from sum + + ; calculate sse + uxtb16 r5, r6 ; byte (two pixels) to halfwords + uxtb16 r10, r6, ror #8 ; another two pixels to halfwords + smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) + + ; 4th 4 pixels + ldr r4, [r0, #12] ; load 4 src pixels + ldr r5, [r2, #12] ; load 4 ref pixels + smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2) + + usub8 r6, r4, r5 ; calculate difference + add r0, r0, r1 ; set src_ptr to next row + sel r7, r6, lr ; select bytes with positive difference + usub8 r9, r5, r4 ; calculate difference with reversed operands + add r2, r2, r3 ; set dst_ptr to next row + sel r6, r9, lr ; select bytes with negative difference + + ; calculate partial sums + usad8 r4, r7, lr ; calculate sum of positive differences + usad8 r5, r6, lr ; calculate sum of negative differences + orr r6, r6, r7 ; differences of all 4 pixels + + ; calculate total sum + add r8, r8, r4 ; add positive differences to sum + sub r8, r8, r5 ; subtract negative differences from sum + + ; calculate sse + uxtb16 r5, r6 ; byte (two pixels) to halfwords + uxtb16 r10, r6, ror #8 ; another two pixels to halfwords + smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) + smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2) + + + subs r12, r12, #1 + + bne loop16x16 + + ; return stuff + ldr r6, [sp, #40] ; get address of sse + mul r0, r8, r8 ; sum * sum + str r11, [r6] ; store sse + sub r0, r11, r0, lsr #8 ; return (sse - ((sum * sum) >> 8)) + + ldmfd sp!, {r4-r12, pc} + + ENDP + +; r0 unsigned char *src_ptr +; r1 int source_stride +; r2 unsigned char *ref_ptr +; r3 int recon_stride +; stack unsigned int *sse +|vpx_variance8x8_media| PROC + + push {r4-r10, lr} + + pld [r0, r1, lsl #0] + pld [r2, r3, lsl #0] + + mov r12, #8 ; set loop counter to 8 (=block height) + mov r4, #0 ; initialize sum = 0 + mov r5, #0 ; initialize sse = 0 + +loop8x8 + ; 1st 4 pixels + ldr r6, [r0, #0x0] ; load 4 src pixels + ldr r7, [r2, #0x0] ; load 4 ref pixels + + mov lr, #0 ; constant zero + + usub8 r8, r6, r7 ; calculate difference + pld [r0, r1, lsl #1] + sel r10, r8, lr ; select bytes with positive difference + usub8 r9, r7, r6 ; calculate difference with reversed operands + pld [r2, r3, lsl #1] + sel r8, r9, lr ; select bytes with negative difference + + ; calculate partial sums + usad8 r6, r10, lr ; calculate sum of positive differences + usad8 r7, r8, lr ; calculate sum of negative differences + orr r8, r8, r10 ; differences of all 4 pixels + ; calculate total sum + add r4, r4, r6 ; add positive differences to sum + sub r4, r4, r7 ; subtract negative differences from sum + + ; calculate sse + uxtb16 r7, r8 ; byte (two pixels) to halfwords + uxtb16 r10, r8, ror #8 ; another two pixels to halfwords + smlad r5, r7, r7, r5 ; dual signed multiply, add and accumulate (1) + + ; 2nd 4 pixels + ldr r6, [r0, #0x4] ; load 4 src pixels + ldr r7, [r2, #0x4] ; load 4 ref pixels + smlad r5, r10, r10, r5 ; dual signed multiply, add and accumulate (2) + + usub8 r8, r6, r7 ; calculate difference + add r0, r0, r1 ; set src_ptr to next row + sel r10, r8, lr ; select bytes with positive difference + usub8 r9, r7, r6 ; calculate difference with reversed operands + add r2, r2, r3 ; set dst_ptr to next row + sel r8, r9, lr ; select bytes with negative difference + + ; calculate partial sums + usad8 r6, r10, lr ; calculate sum of positive differences + usad8 r7, r8, lr ; calculate sum of negative differences + orr r8, r8, r10 ; differences of all 4 pixels + + ; calculate total sum + add r4, r4, r6 ; add positive differences to sum + sub r4, r4, r7 ; subtract negative differences from sum + + ; calculate sse + uxtb16 r7, r8 ; byte (two pixels) to halfwords + uxtb16 r10, r8, ror #8 ; another two pixels to halfwords + smlad r5, r7, r7, r5 ; dual signed multiply, add and accumulate (1) + subs r12, r12, #1 ; next row + smlad r5, r10, r10, r5 ; dual signed multiply, add and accumulate (2) + + bne loop8x8 + + ; return stuff + ldr r8, [sp, #32] ; get address of sse + mul r1, r4, r4 ; sum * sum + str r5, [r8] ; store sse + sub r0, r5, r1, ASR #6 ; return (sse - ((sum * sum) >> 6)) + + pop {r4-r10, pc} + + ENDP + +; r0 unsigned char *src_ptr +; r1 int source_stride +; r2 unsigned char *ref_ptr +; r3 int recon_stride +; stack unsigned int *sse +; +;note: Based on vpx_variance16x16_media. In this function, sum is never used. +; So, we can remove this part of calculation. + +|vpx_mse16x16_media| PROC + + push {r4-r9, lr} + + pld [r0, r1, lsl #0] + pld [r2, r3, lsl #0] + + mov r12, #16 ; set loop counter to 16 (=block height) + mov r4, #0 ; initialize sse = 0 + +loopmse + ; 1st 4 pixels + ldr r5, [r0, #0x0] ; load 4 src pixels + ldr r6, [r2, #0x0] ; load 4 ref pixels + + mov lr, #0 ; constant zero + + usub8 r8, r5, r6 ; calculate difference + pld [r0, r1, lsl #1] + sel r7, r8, lr ; select bytes with positive difference + usub8 r9, r6, r5 ; calculate difference with reversed operands + pld [r2, r3, lsl #1] + sel r8, r9, lr ; select bytes with negative difference + + ; calculate partial sums + usad8 r5, r7, lr ; calculate sum of positive differences + usad8 r6, r8, lr ; calculate sum of negative differences + orr r8, r8, r7 ; differences of all 4 pixels + + ldr r5, [r0, #0x4] ; load 4 src pixels + + ; calculate sse + uxtb16 r6, r8 ; byte (two pixels) to halfwords + uxtb16 r7, r8, ror #8 ; another two pixels to halfwords + smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1) + + ; 2nd 4 pixels + ldr r6, [r2, #0x4] ; load 4 ref pixels + smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2) + + usub8 r8, r5, r6 ; calculate difference + sel r7, r8, lr ; select bytes with positive difference + usub8 r9, r6, r5 ; calculate difference with reversed operands + sel r8, r9, lr ; select bytes with negative difference + + ; calculate partial sums + usad8 r5, r7, lr ; calculate sum of positive differences + usad8 r6, r8, lr ; calculate sum of negative differences + orr r8, r8, r7 ; differences of all 4 pixels + ldr r5, [r0, #0x8] ; load 4 src pixels + ; calculate sse + uxtb16 r6, r8 ; byte (two pixels) to halfwords + uxtb16 r7, r8, ror #8 ; another two pixels to halfwords + smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1) + + ; 3rd 4 pixels + ldr r6, [r2, #0x8] ; load 4 ref pixels + smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2) + + usub8 r8, r5, r6 ; calculate difference + sel r7, r8, lr ; select bytes with positive difference + usub8 r9, r6, r5 ; calculate difference with reversed operands + sel r8, r9, lr ; select bytes with negative difference + + ; calculate partial sums + usad8 r5, r7, lr ; calculate sum of positive differences + usad8 r6, r8, lr ; calculate sum of negative differences + orr r8, r8, r7 ; differences of all 4 pixels + + ldr r5, [r0, #0xc] ; load 4 src pixels + + ; calculate sse + uxtb16 r6, r8 ; byte (two pixels) to halfwords + uxtb16 r7, r8, ror #8 ; another two pixels to halfwords + smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1) + + ; 4th 4 pixels + ldr r6, [r2, #0xc] ; load 4 ref pixels + smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2) + + usub8 r8, r5, r6 ; calculate difference + add r0, r0, r1 ; set src_ptr to next row + sel r7, r8, lr ; select bytes with positive difference + usub8 r9, r6, r5 ; calculate difference with reversed operands + add r2, r2, r3 ; set dst_ptr to next row + sel r8, r9, lr ; select bytes with negative difference + + ; calculate partial sums + usad8 r5, r7, lr ; calculate sum of positive differences + usad8 r6, r8, lr ; calculate sum of negative differences + orr r8, r8, r7 ; differences of all 4 pixels + + subs r12, r12, #1 ; next row + + ; calculate sse + uxtb16 r6, r8 ; byte (two pixels) to halfwords + uxtb16 r7, r8, ror #8 ; another two pixels to halfwords + smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1) + smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2) + + bne loopmse + + ; return stuff + ldr r1, [sp, #28] ; get address of sse + mov r0, r4 ; return sse + str r4, [r1] ; store sse + + pop {r4-r9, pc} + + ENDP + + END diff --git a/media/libvpx/vpx_dsp/arm/variance_neon.c b/media/libvpx/vpx_dsp/arm/variance_neon.c new file mode 100644 index 00000000000..ede6e7bbb03 --- /dev/null +++ b/media/libvpx/vpx_dsp/arm/variance_neon.c @@ -0,0 +1,418 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "./vpx_dsp_rtcd.h" +#include "./vpx_config.h" + +#include "vpx/vpx_integer.h" +#include "vpx_ports/mem.h" + +static INLINE int horizontal_add_s16x8(const int16x8_t v_16x8) { + const int32x4_t a = vpaddlq_s16(v_16x8); + const int64x2_t b = vpaddlq_s32(a); + const int32x2_t c = vadd_s32(vreinterpret_s32_s64(vget_low_s64(b)), + vreinterpret_s32_s64(vget_high_s64(b))); + return vget_lane_s32(c, 0); +} + +static INLINE int horizontal_add_s32x4(const int32x4_t v_32x4) { + const int64x2_t b = vpaddlq_s32(v_32x4); + const int32x2_t c = vadd_s32(vreinterpret_s32_s64(vget_low_s64(b)), + vreinterpret_s32_s64(vget_high_s64(b))); + return vget_lane_s32(c, 0); +} + +// w * h must be less than 2048 or local variable v_sum may overflow. +static void variance_neon_w8(const uint8_t *a, int a_stride, + const uint8_t *b, int b_stride, + int w, int h, uint32_t *sse, int *sum) { + int i, j; + int16x8_t v_sum = vdupq_n_s16(0); + int32x4_t v_sse_lo = vdupq_n_s32(0); + int32x4_t v_sse_hi = vdupq_n_s32(0); + + for (i = 0; i < h; ++i) { + for (j = 0; j < w; j += 8) { + const uint8x8_t v_a = vld1_u8(&a[j]); + const uint8x8_t v_b = vld1_u8(&b[j]); + const uint16x8_t v_diff = vsubl_u8(v_a, v_b); + const int16x8_t sv_diff = vreinterpretq_s16_u16(v_diff); + v_sum = vaddq_s16(v_sum, sv_diff); + v_sse_lo = vmlal_s16(v_sse_lo, + vget_low_s16(sv_diff), + vget_low_s16(sv_diff)); + v_sse_hi = vmlal_s16(v_sse_hi, + vget_high_s16(sv_diff), + vget_high_s16(sv_diff)); + } + a += a_stride; + b += b_stride; + } + + *sum = horizontal_add_s16x8(v_sum); + *sse = (unsigned int)horizontal_add_s32x4(vaddq_s32(v_sse_lo, v_sse_hi)); +} + +void vpx_get8x8var_neon(const uint8_t *a, int a_stride, + const uint8_t *b, int b_stride, + unsigned int *sse, int *sum) { + variance_neon_w8(a, a_stride, b, b_stride, 8, 8, sse, sum); +} + +void vpx_get16x16var_neon(const uint8_t *a, int a_stride, + const uint8_t *b, int b_stride, + unsigned int *sse, int *sum) { + variance_neon_w8(a, a_stride, b, b_stride, 16, 16, sse, sum); +} + +unsigned int vpx_variance8x8_neon(const uint8_t *a, int a_stride, + const uint8_t *b, int b_stride, + unsigned int *sse) { + int sum; + variance_neon_w8(a, a_stride, b, b_stride, 8, 8, sse, &sum); + return *sse - (((int64_t)sum * sum) >> 6); // >> 6 = / 8 * 8 +} + +unsigned int vpx_variance16x16_neon(const uint8_t *a, int a_stride, + const uint8_t *b, int b_stride, + unsigned int *sse) { + int sum; + variance_neon_w8(a, a_stride, b, b_stride, 16, 16, sse, &sum); + return *sse - (((int64_t)sum * sum) >> 8); // >> 8 = / 16 * 16 +} + +unsigned int vpx_variance32x32_neon(const uint8_t *a, int a_stride, + const uint8_t *b, int b_stride, + unsigned int *sse) { + int sum; + variance_neon_w8(a, a_stride, b, b_stride, 32, 32, sse, &sum); + return *sse - (((int64_t)sum * sum) >> 10); // >> 10 = / 32 * 32 +} + +unsigned int vpx_variance32x64_neon(const uint8_t *a, int a_stride, + const uint8_t *b, int b_stride, + unsigned int *sse) { + int sum1, sum2; + uint32_t sse1, sse2; + variance_neon_w8(a, a_stride, b, b_stride, 32, 32, &sse1, &sum1); + variance_neon_w8(a + (32 * a_stride), a_stride, + b + (32 * b_stride), b_stride, 32, 32, + &sse2, &sum2); + *sse = sse1 + sse2; + sum1 += sum2; + return *sse - (((int64_t)sum1 * sum1) >> 11); // >> 11 = / 32 * 64 +} + +unsigned int vpx_variance64x32_neon(const uint8_t *a, int a_stride, + const uint8_t *b, int b_stride, + unsigned int *sse) { + int sum1, sum2; + uint32_t sse1, sse2; + variance_neon_w8(a, a_stride, b, b_stride, 64, 16, &sse1, &sum1); + variance_neon_w8(a + (16 * a_stride), a_stride, + b + (16 * b_stride), b_stride, 64, 16, + &sse2, &sum2); + *sse = sse1 + sse2; + sum1 += sum2; + return *sse - (((int64_t)sum1 * sum1) >> 11); // >> 11 = / 32 * 64 +} + +unsigned int vpx_variance64x64_neon(const uint8_t *a, int a_stride, + const uint8_t *b, int b_stride, + unsigned int *sse) { + int sum1, sum2; + uint32_t sse1, sse2; + + variance_neon_w8(a, a_stride, b, b_stride, 64, 16, &sse1, &sum1); + variance_neon_w8(a + (16 * a_stride), a_stride, + b + (16 * b_stride), b_stride, 64, 16, + &sse2, &sum2); + sse1 += sse2; + sum1 += sum2; + + variance_neon_w8(a + (16 * 2 * a_stride), a_stride, + b + (16 * 2 * b_stride), b_stride, + 64, 16, &sse2, &sum2); + sse1 += sse2; + sum1 += sum2; + + variance_neon_w8(a + (16 * 3 * a_stride), a_stride, + b + (16 * 3 * b_stride), b_stride, + 64, 16, &sse2, &sum2); + *sse = sse1 + sse2; + sum1 += sum2; + return *sse - (((int64_t)sum1 * sum1) >> 12); // >> 12 = / 64 * 64 +} + +unsigned int vpx_variance16x8_neon( + const unsigned char *src_ptr, + int source_stride, + const unsigned char *ref_ptr, + int recon_stride, + unsigned int *sse) { + int i; + int16x4_t d22s16, d23s16, d24s16, d25s16, d26s16, d27s16, d28s16, d29s16; + uint32x2_t d0u32, d10u32; + int64x1_t d0s64, d1s64; + uint8x16_t q0u8, q1u8, q2u8, q3u8; + uint16x8_t q11u16, q12u16, q13u16, q14u16; + int32x4_t q8s32, q9s32, q10s32; + int64x2_t q0s64, q1s64, q5s64; + + q8s32 = vdupq_n_s32(0); + q9s32 = vdupq_n_s32(0); + q10s32 = vdupq_n_s32(0); + + for (i = 0; i < 4; i++) { + q0u8 = vld1q_u8(src_ptr); + src_ptr += source_stride; + q1u8 = vld1q_u8(src_ptr); + src_ptr += source_stride; + __builtin_prefetch(src_ptr); + + q2u8 = vld1q_u8(ref_ptr); + ref_ptr += recon_stride; + q3u8 = vld1q_u8(ref_ptr); + ref_ptr += recon_stride; + __builtin_prefetch(ref_ptr); + + q11u16 = vsubl_u8(vget_low_u8(q0u8), vget_low_u8(q2u8)); + q12u16 = vsubl_u8(vget_high_u8(q0u8), vget_high_u8(q2u8)); + q13u16 = vsubl_u8(vget_low_u8(q1u8), vget_low_u8(q3u8)); + q14u16 = vsubl_u8(vget_high_u8(q1u8), vget_high_u8(q3u8)); + + d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16)); + d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16)); + q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q11u16)); + q9s32 = vmlal_s16(q9s32, d22s16, d22s16); + q10s32 = vmlal_s16(q10s32, d23s16, d23s16); + + d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16)); + d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16)); + q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q12u16)); + q9s32 = vmlal_s16(q9s32, d24s16, d24s16); + q10s32 = vmlal_s16(q10s32, d25s16, d25s16); + + d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16)); + d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16)); + q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q13u16)); + q9s32 = vmlal_s16(q9s32, d26s16, d26s16); + q10s32 = vmlal_s16(q10s32, d27s16, d27s16); + + d28s16 = vreinterpret_s16_u16(vget_low_u16(q14u16)); + d29s16 = vreinterpret_s16_u16(vget_high_u16(q14u16)); + q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q14u16)); + q9s32 = vmlal_s16(q9s32, d28s16, d28s16); + q10s32 = vmlal_s16(q10s32, d29s16, d29s16); + } + + q10s32 = vaddq_s32(q10s32, q9s32); + q0s64 = vpaddlq_s32(q8s32); + q1s64 = vpaddlq_s32(q10s32); + + d0s64 = vadd_s64(vget_low_s64(q0s64), vget_high_s64(q0s64)); + d1s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64)); + + q5s64 = vmull_s32(vreinterpret_s32_s64(d0s64), + vreinterpret_s32_s64(d0s64)); + vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d1s64), 0); + + d10u32 = vshr_n_u32(vreinterpret_u32_s64(vget_low_s64(q5s64)), 7); + d0u32 = vsub_u32(vreinterpret_u32_s64(d1s64), d10u32); + + return vget_lane_u32(d0u32, 0); +} + +unsigned int vpx_variance8x16_neon( + const unsigned char *src_ptr, + int source_stride, + const unsigned char *ref_ptr, + int recon_stride, + unsigned int *sse) { + int i; + uint8x8_t d0u8, d2u8, d4u8, d6u8; + int16x4_t d22s16, d23s16, d24s16, d25s16; + uint32x2_t d0u32, d10u32; + int64x1_t d0s64, d1s64; + uint16x8_t q11u16, q12u16; + int32x4_t q8s32, q9s32, q10s32; + int64x2_t q0s64, q1s64, q5s64; + + q8s32 = vdupq_n_s32(0); + q9s32 = vdupq_n_s32(0); + q10s32 = vdupq_n_s32(0); + + for (i = 0; i < 8; i++) { + d0u8 = vld1_u8(src_ptr); + src_ptr += source_stride; + d2u8 = vld1_u8(src_ptr); + src_ptr += source_stride; + __builtin_prefetch(src_ptr); + + d4u8 = vld1_u8(ref_ptr); + ref_ptr += recon_stride; + d6u8 = vld1_u8(ref_ptr); + ref_ptr += recon_stride; + __builtin_prefetch(ref_ptr); + + q11u16 = vsubl_u8(d0u8, d4u8); + q12u16 = vsubl_u8(d2u8, d6u8); + + d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16)); + d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16)); + q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q11u16)); + q9s32 = vmlal_s16(q9s32, d22s16, d22s16); + q10s32 = vmlal_s16(q10s32, d23s16, d23s16); + + d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16)); + d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16)); + q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q12u16)); + q9s32 = vmlal_s16(q9s32, d24s16, d24s16); + q10s32 = vmlal_s16(q10s32, d25s16, d25s16); + } + + q10s32 = vaddq_s32(q10s32, q9s32); + q0s64 = vpaddlq_s32(q8s32); + q1s64 = vpaddlq_s32(q10s32); + + d0s64 = vadd_s64(vget_low_s64(q0s64), vget_high_s64(q0s64)); + d1s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64)); + + q5s64 = vmull_s32(vreinterpret_s32_s64(d0s64), + vreinterpret_s32_s64(d0s64)); + vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d1s64), 0); + + d10u32 = vshr_n_u32(vreinterpret_u32_s64(vget_low_s64(q5s64)), 7); + d0u32 = vsub_u32(vreinterpret_u32_s64(d1s64), d10u32); + + return vget_lane_u32(d0u32, 0); +} + +unsigned int vpx_mse16x16_neon( + const unsigned char *src_ptr, + int source_stride, + const unsigned char *ref_ptr, + int recon_stride, + unsigned int *sse) { + int i; + int16x4_t d22s16, d23s16, d24s16, d25s16, d26s16, d27s16, d28s16, d29s16; + int64x1_t d0s64; + uint8x16_t q0u8, q1u8, q2u8, q3u8; + int32x4_t q7s32, q8s32, q9s32, q10s32; + uint16x8_t q11u16, q12u16, q13u16, q14u16; + int64x2_t q1s64; + + q7s32 = vdupq_n_s32(0); + q8s32 = vdupq_n_s32(0); + q9s32 = vdupq_n_s32(0); + q10s32 = vdupq_n_s32(0); + + for (i = 0; i < 8; i++) { // mse16x16_neon_loop + q0u8 = vld1q_u8(src_ptr); + src_ptr += source_stride; + q1u8 = vld1q_u8(src_ptr); + src_ptr += source_stride; + q2u8 = vld1q_u8(ref_ptr); + ref_ptr += recon_stride; + q3u8 = vld1q_u8(ref_ptr); + ref_ptr += recon_stride; + + q11u16 = vsubl_u8(vget_low_u8(q0u8), vget_low_u8(q2u8)); + q12u16 = vsubl_u8(vget_high_u8(q0u8), vget_high_u8(q2u8)); + q13u16 = vsubl_u8(vget_low_u8(q1u8), vget_low_u8(q3u8)); + q14u16 = vsubl_u8(vget_high_u8(q1u8), vget_high_u8(q3u8)); + + d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16)); + d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16)); + q7s32 = vmlal_s16(q7s32, d22s16, d22s16); + q8s32 = vmlal_s16(q8s32, d23s16, d23s16); + + d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16)); + d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16)); + q9s32 = vmlal_s16(q9s32, d24s16, d24s16); + q10s32 = vmlal_s16(q10s32, d25s16, d25s16); + + d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16)); + d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16)); + q7s32 = vmlal_s16(q7s32, d26s16, d26s16); + q8s32 = vmlal_s16(q8s32, d27s16, d27s16); + + d28s16 = vreinterpret_s16_u16(vget_low_u16(q14u16)); + d29s16 = vreinterpret_s16_u16(vget_high_u16(q14u16)); + q9s32 = vmlal_s16(q9s32, d28s16, d28s16); + q10s32 = vmlal_s16(q10s32, d29s16, d29s16); + } + + q7s32 = vaddq_s32(q7s32, q8s32); + q9s32 = vaddq_s32(q9s32, q10s32); + q10s32 = vaddq_s32(q7s32, q9s32); + + q1s64 = vpaddlq_s32(q10s32); + d0s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64)); + + vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d0s64), 0); + return vget_lane_u32(vreinterpret_u32_s64(d0s64), 0); +} + +unsigned int vpx_get4x4sse_cs_neon( + const unsigned char *src_ptr, + int source_stride, + const unsigned char *ref_ptr, + int recon_stride) { + int16x4_t d22s16, d24s16, d26s16, d28s16; + int64x1_t d0s64; + uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8; + int32x4_t q7s32, q8s32, q9s32, q10s32; + uint16x8_t q11u16, q12u16, q13u16, q14u16; + int64x2_t q1s64; + + d0u8 = vld1_u8(src_ptr); + src_ptr += source_stride; + d4u8 = vld1_u8(ref_ptr); + ref_ptr += recon_stride; + d1u8 = vld1_u8(src_ptr); + src_ptr += source_stride; + d5u8 = vld1_u8(ref_ptr); + ref_ptr += recon_stride; + d2u8 = vld1_u8(src_ptr); + src_ptr += source_stride; + d6u8 = vld1_u8(ref_ptr); + ref_ptr += recon_stride; + d3u8 = vld1_u8(src_ptr); + src_ptr += source_stride; + d7u8 = vld1_u8(ref_ptr); + ref_ptr += recon_stride; + + q11u16 = vsubl_u8(d0u8, d4u8); + q12u16 = vsubl_u8(d1u8, d5u8); + q13u16 = vsubl_u8(d2u8, d6u8); + q14u16 = vsubl_u8(d3u8, d7u8); + + d22s16 = vget_low_s16(vreinterpretq_s16_u16(q11u16)); + d24s16 = vget_low_s16(vreinterpretq_s16_u16(q12u16)); + d26s16 = vget_low_s16(vreinterpretq_s16_u16(q13u16)); + d28s16 = vget_low_s16(vreinterpretq_s16_u16(q14u16)); + + q7s32 = vmull_s16(d22s16, d22s16); + q8s32 = vmull_s16(d24s16, d24s16); + q9s32 = vmull_s16(d26s16, d26s16); + q10s32 = vmull_s16(d28s16, d28s16); + + q7s32 = vaddq_s32(q7s32, q8s32); + q9s32 = vaddq_s32(q9s32, q10s32); + q9s32 = vaddq_s32(q7s32, q9s32); + + q1s64 = vpaddlq_s32(q9s32); + d0s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64)); + + return vget_lane_u32(vreinterpret_u32_s64(d0s64), 0); +} diff --git a/media/libvpx/vp9/encoder/vp9_sad.c b/media/libvpx/vpx_dsp/sad.c similarity index 59% rename from media/libvpx/vp9/encoder/vp9_sad.c rename to media/libvpx/vpx_dsp/sad.c index 73134f2f2c0..c0c3ff99645 100644 --- a/media/libvpx/vp9/encoder/vp9_sad.c +++ b/media/libvpx/vpx_dsp/sad.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source @@ -10,15 +10,13 @@ #include -#include "./vp9_rtcd.h" #include "./vpx_config.h" +#include "./vpx_dsp_rtcd.h" #include "vpx/vpx_integer.h" -#if CONFIG_VP9_HIGHBITDEPTH -#include "vp9/common/vp9_common.h" -#endif -#include "vp9/encoder/vp9_variance.h" +#include "vpx_ports/mem.h" +/* Sum the difference between every corresponding element of the buffers. */ static INLINE unsigned int sad(const uint8_t *a, int a_stride, const uint8_t *b, int b_stride, int width, int height) { @@ -35,35 +33,79 @@ static INLINE unsigned int sad(const uint8_t *a, int a_stride, return sad; } +// TODO(johannkoenig): this moved to vpx_dsp, should be able to clean this up. +/* Remove dependency on vp9 variance function by duplicating vp9_comp_avg_pred. + * The function averages every corresponding element of the buffers and stores + * the value in a third buffer, comp_pred. + * pred and comp_pred are assumed to have stride = width + * In the usage below comp_pred is a local array. + */ +static INLINE void avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width, + int height, const uint8_t *ref, int ref_stride) { + int i, j; + + for (i = 0; i < height; i++) { + for (j = 0; j < width; j++) { + const int tmp = pred[j] + ref[j]; + comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1); + } + comp_pred += width; + pred += width; + ref += ref_stride; + } +} + +#if CONFIG_VP9_HIGHBITDEPTH +static INLINE void highbd_avg_pred(uint16_t *comp_pred, const uint8_t *pred8, + int width, int height, const uint8_t *ref8, + int ref_stride) { + int i, j; + uint16_t *pred = CONVERT_TO_SHORTPTR(pred8); + uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); + for (i = 0; i < height; i++) { + for (j = 0; j < width; j++) { + const int tmp = pred[j] + ref[j]; + comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1); + } + comp_pred += width; + pred += width; + ref += ref_stride; + } +} +#endif // CONFIG_VP9_HIGHBITDEPTH + #define sadMxN(m, n) \ -unsigned int vp9_sad##m##x##n##_c(const uint8_t *src, int src_stride, \ +unsigned int vpx_sad##m##x##n##_c(const uint8_t *src, int src_stride, \ const uint8_t *ref, int ref_stride) { \ return sad(src, src_stride, ref, ref_stride, m, n); \ } \ -unsigned int vp9_sad##m##x##n##_avg_c(const uint8_t *src, int src_stride, \ +unsigned int vpx_sad##m##x##n##_avg_c(const uint8_t *src, int src_stride, \ const uint8_t *ref, int ref_stride, \ const uint8_t *second_pred) { \ uint8_t comp_pred[m * n]; \ - vp9_comp_avg_pred(comp_pred, second_pred, m, n, ref, ref_stride); \ + avg_pred(comp_pred, second_pred, m, n, ref, ref_stride); \ return sad(src, src_stride, comp_pred, m, m, n); \ } +// depending on call sites, pass **ref_array to avoid & in subsequent call and +// de-dup with 4D below. #define sadMxNxK(m, n, k) \ -void vp9_sad##m##x##n##x##k##_c(const uint8_t *src, int src_stride, \ - const uint8_t *ref, int ref_stride, \ - unsigned int *sads) { \ +void vpx_sad##m##x##n##x##k##_c(const uint8_t *src, int src_stride, \ + const uint8_t *ref_array, int ref_stride, \ + uint32_t *sad_array) { \ int i; \ for (i = 0; i < k; ++i) \ - sads[i] = vp9_sad##m##x##n##_c(src, src_stride, &ref[i], ref_stride); \ + sad_array[i] = vpx_sad##m##x##n##_c(src, src_stride, &ref_array[i], ref_stride); \ } +// This appears to be equivalent to the above when k == 4 and refs is const #define sadMxNx4D(m, n) \ -void vp9_sad##m##x##n##x4d_c(const uint8_t *src, int src_stride, \ - const uint8_t *const refs[], int ref_stride, \ - unsigned int *sads) { \ +void vpx_sad##m##x##n##x4d_c(const uint8_t *src, int src_stride, \ + const uint8_t *const ref_array[], int ref_stride, \ + uint32_t *sad_array) { \ int i; \ for (i = 0; i < 4; ++i) \ - sads[i] = vp9_sad##m##x##n##_c(src, src_stride, refs[i], ref_stride); \ + sad_array[i] = vpx_sad##m##x##n##_c(src, src_stride, ref_array[i], ref_stride); \ } // 64x64 @@ -169,40 +211,40 @@ static INLINE unsigned int highbd_sadb(const uint8_t *a8, int a_stride, } #define highbd_sadMxN(m, n) \ -unsigned int vp9_highbd_sad##m##x##n##_c(const uint8_t *src, int src_stride, \ +unsigned int vpx_highbd_sad##m##x##n##_c(const uint8_t *src, int src_stride, \ const uint8_t *ref, int ref_stride) { \ return highbd_sad(src, src_stride, ref, ref_stride, m, n); \ } \ -unsigned int vp9_highbd_sad##m##x##n##_avg_c(const uint8_t *src, \ +unsigned int vpx_highbd_sad##m##x##n##_avg_c(const uint8_t *src, \ int src_stride, \ const uint8_t *ref, \ int ref_stride, \ const uint8_t *second_pred) { \ uint16_t comp_pred[m * n]; \ - vp9_highbd_comp_avg_pred(comp_pred, second_pred, m, n, ref, ref_stride); \ + highbd_avg_pred(comp_pred, second_pred, m, n, ref, ref_stride); \ return highbd_sadb(src, src_stride, comp_pred, m, m, n); \ } #define highbd_sadMxNxK(m, n, k) \ -void vp9_highbd_sad##m##x##n##x##k##_c(const uint8_t *src, int src_stride, \ - const uint8_t *ref, int ref_stride, \ - unsigned int *sads) { \ +void vpx_highbd_sad##m##x##n##x##k##_c(const uint8_t *src, int src_stride, \ + const uint8_t *ref_array, int ref_stride, \ + uint32_t *sad_array) { \ int i; \ for (i = 0; i < k; ++i) { \ - sads[i] = vp9_highbd_sad##m##x##n##_c(src, src_stride, &ref[i], \ - ref_stride); \ + sad_array[i] = vpx_highbd_sad##m##x##n##_c(src, src_stride, &ref_array[i], \ + ref_stride); \ } \ } #define highbd_sadMxNx4D(m, n) \ -void vp9_highbd_sad##m##x##n##x4d_c(const uint8_t *src, int src_stride, \ - const uint8_t *const refs[], \ - int ref_stride, unsigned int *sads) { \ +void vpx_highbd_sad##m##x##n##x4d_c(const uint8_t *src, int src_stride, \ + const uint8_t *const ref_array[], \ + int ref_stride, uint32_t *sad_array) { \ int i; \ for (i = 0; i < 4; ++i) { \ - sads[i] = vp9_highbd_sad##m##x##n##_c(src, src_stride, refs[i], \ - ref_stride); \ - } \ + sad_array[i] = vpx_highbd_sad##m##x##n##_c(src, src_stride, ref_array[i], \ + ref_stride); \ + } \ } // 64x64 diff --git a/media/libvpx/vpx_dsp/variance.c b/media/libvpx/vpx_dsp/variance.c new file mode 100644 index 00000000000..084dd7b7ead --- /dev/null +++ b/media/libvpx/vpx_dsp/variance.c @@ -0,0 +1,306 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "./vpx_config.h" +#include "./vpx_dsp_rtcd.h" + +#include "vpx_ports/mem.h" +#include "vpx/vpx_integer.h" + +unsigned int vpx_get4x4sse_cs_c(const unsigned char *a, int a_stride, + const unsigned char *b, int b_stride) { + int distortion = 0; + int r, c; + + for (r = 0; r < 4; r++) { + for (c = 0; c < 4; c++) { + int diff = a[c] - b[c]; + distortion += diff * diff; + } + + a += a_stride; + b += b_stride; + } + + return distortion; +} + +unsigned int vpx_get_mb_ss_c(const int16_t *a) { + unsigned int i, sum = 0; + + for (i = 0; i < 256; ++i) { + sum += a[i] * a[i]; + } + + return sum; +} + +static void variance(const uint8_t *a, int a_stride, + const uint8_t *b, int b_stride, + int w, int h, unsigned int *sse, int *sum) { + int i, j; + + *sum = 0; + *sse = 0; + + for (i = 0; i < h; i++) { + for (j = 0; j < w; j++) { + const int diff = a[j] - b[j]; + *sum += diff; + *sse += diff * diff; + } + + a += a_stride; + b += b_stride; + } +} + +#define VAR(W, H) \ +unsigned int vpx_variance##W##x##H##_c(const uint8_t *a, int a_stride, \ + const uint8_t *b, int b_stride, \ + unsigned int *sse) { \ + int sum; \ + variance(a, a_stride, b, b_stride, W, H, sse, &sum); \ + return *sse - (((int64_t)sum * sum) / (W * H)); \ +} + +/* Identical to the variance call except it takes an additional parameter, sum, + * and returns that value using pass-by-reference instead of returning + * sse - sum^2 / w*h + */ +#define GET_VAR(W, H) \ +void vpx_get##W##x##H##var_c(const uint8_t *a, int a_stride, \ + const uint8_t *b, int b_stride, \ + unsigned int *sse, int *sum) { \ + variance(a, a_stride, b, b_stride, W, H, sse, sum); \ +} + +/* Identical to the variance call except it does not calculate the + * sse - sum^2 / w*h and returns sse in addtion to modifying the passed in + * variable. + */ +#define MSE(W, H) \ +unsigned int vpx_mse##W##x##H##_c(const uint8_t *a, int a_stride, \ + const uint8_t *b, int b_stride, \ + unsigned int *sse) { \ + int sum; \ + variance(a, a_stride, b, b_stride, W, H, sse, &sum); \ + return *sse; \ +} + +VAR(64, 64) +VAR(64, 32) +VAR(32, 64) +VAR(32, 32) +VAR(32, 16) +VAR(16, 32) +VAR(16, 16) +VAR(16, 8) +VAR(8, 16) +VAR(8, 8) +VAR(8, 4) +VAR(4, 8) +VAR(4, 4) + +GET_VAR(16, 16) +GET_VAR(8, 8) + +MSE(16, 16) +MSE(16, 8) +MSE(8, 16) +MSE(8, 8) + +void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width, + int height, const uint8_t *ref, int ref_stride) { + int i, j; + + for (i = 0; i < height; i++) { + for (j = 0; j < width; j++) { + const int tmp = pred[j] + ref[j]; + comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1); + } + comp_pred += width; + pred += width; + ref += ref_stride; + } +} + +#if CONFIG_VP9_HIGHBITDEPTH +static void highbd_variance64(const uint8_t *a8, int a_stride, + const uint8_t *b8, int b_stride, + int w, int h, uint64_t *sse, uint64_t *sum) { + int i, j; + + uint16_t *a = CONVERT_TO_SHORTPTR(a8); + uint16_t *b = CONVERT_TO_SHORTPTR(b8); + *sum = 0; + *sse = 0; + + for (i = 0; i < h; i++) { + for (j = 0; j < w; j++) { + const int diff = a[j] - b[j]; + *sum += diff; + *sse += diff * diff; + } + a += a_stride; + b += b_stride; + } +} + +static void highbd_8_variance(const uint8_t *a8, int a_stride, + const uint8_t *b8, int b_stride, + int w, int h, unsigned int *sse, int *sum) { + uint64_t sse_long = 0; + uint64_t sum_long = 0; + highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long); + *sse = (unsigned int)sse_long; + *sum = (int)sum_long; +} + +static void highbd_10_variance(const uint8_t *a8, int a_stride, + const uint8_t *b8, int b_stride, + int w, int h, unsigned int *sse, int *sum) { + uint64_t sse_long = 0; + uint64_t sum_long = 0; + highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long); + *sse = (unsigned int)ROUND_POWER_OF_TWO(sse_long, 4); + *sum = (int)ROUND_POWER_OF_TWO(sum_long, 2); +} + +static void highbd_12_variance(const uint8_t *a8, int a_stride, + const uint8_t *b8, int b_stride, + int w, int h, unsigned int *sse, int *sum) { + uint64_t sse_long = 0; + uint64_t sum_long = 0; + highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long); + *sse = (unsigned int)ROUND_POWER_OF_TWO(sse_long, 8); + *sum = (int)ROUND_POWER_OF_TWO(sum_long, 4); +} + +#define HIGHBD_VAR(W, H) \ +unsigned int vpx_highbd_8_variance##W##x##H##_c(const uint8_t *a, \ + int a_stride, \ + const uint8_t *b, \ + int b_stride, \ + unsigned int *sse) { \ + int sum; \ + highbd_8_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \ + return *sse - (((int64_t)sum * sum) / (W * H)); \ +} \ +\ +unsigned int vpx_highbd_10_variance##W##x##H##_c(const uint8_t *a, \ + int a_stride, \ + const uint8_t *b, \ + int b_stride, \ + unsigned int *sse) { \ + int sum; \ + highbd_10_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \ + return *sse - (((int64_t)sum * sum) / (W * H)); \ +} \ +\ +unsigned int vpx_highbd_12_variance##W##x##H##_c(const uint8_t *a, \ + int a_stride, \ + const uint8_t *b, \ + int b_stride, \ + unsigned int *sse) { \ + int sum; \ + highbd_12_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \ + return *sse - (((int64_t)sum * sum) / (W * H)); \ +} + +#define HIGHBD_GET_VAR(S) \ +void vpx_highbd_8_get##S##x##S##var_c(const uint8_t *src, int src_stride, \ + const uint8_t *ref, int ref_stride, \ + unsigned int *sse, int *sum) { \ + highbd_8_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \ +} \ +\ +void vpx_highbd_10_get##S##x##S##var_c(const uint8_t *src, int src_stride, \ + const uint8_t *ref, int ref_stride, \ + unsigned int *sse, int *sum) { \ + highbd_10_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \ +} \ +\ +void vpx_highbd_12_get##S##x##S##var_c(const uint8_t *src, int src_stride, \ + const uint8_t *ref, int ref_stride, \ + unsigned int *sse, int *sum) { \ + highbd_12_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \ +} + +#define HIGHBD_MSE(W, H) \ +unsigned int vpx_highbd_8_mse##W##x##H##_c(const uint8_t *src, \ + int src_stride, \ + const uint8_t *ref, \ + int ref_stride, \ + unsigned int *sse) { \ + int sum; \ + highbd_8_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \ + return *sse; \ +} \ +\ +unsigned int vpx_highbd_10_mse##W##x##H##_c(const uint8_t *src, \ + int src_stride, \ + const uint8_t *ref, \ + int ref_stride, \ + unsigned int *sse) { \ + int sum; \ + highbd_10_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \ + return *sse; \ +} \ +\ +unsigned int vpx_highbd_12_mse##W##x##H##_c(const uint8_t *src, \ + int src_stride, \ + const uint8_t *ref, \ + int ref_stride, \ + unsigned int *sse) { \ + int sum; \ + highbd_12_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \ + return *sse; \ +} + +HIGHBD_GET_VAR(8) +HIGHBD_GET_VAR(16) + +HIGHBD_MSE(16, 16) +HIGHBD_MSE(16, 8) +HIGHBD_MSE(8, 16) +HIGHBD_MSE(8, 8) + +HIGHBD_VAR(64, 64) +HIGHBD_VAR(64, 32) +HIGHBD_VAR(32, 64) +HIGHBD_VAR(32, 32) +HIGHBD_VAR(32, 16) +HIGHBD_VAR(16, 32) +HIGHBD_VAR(16, 16) +HIGHBD_VAR(16, 8) +HIGHBD_VAR(8, 16) +HIGHBD_VAR(8, 8) +HIGHBD_VAR(8, 4) +HIGHBD_VAR(4, 8) +HIGHBD_VAR(4, 4) + +void vpx_highbd_comp_avg_pred(uint16_t *comp_pred, const uint8_t *pred8, + int width, int height, const uint8_t *ref8, + int ref_stride) { + int i, j; + uint16_t *pred = CONVERT_TO_SHORTPTR(pred8); + uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); + for (i = 0; i < height; i++) { + for (j = 0; j < width; j++) { + const int tmp = pred[j] + ref[j]; + comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1); + } + comp_pred += width; + pred += width; + ref += ref_stride; + } +} +#endif // CONFIG_VP9_HIGHBITDEPTH diff --git a/media/libvpx/vpx_dsp/vpx_dsp_rtcd.c b/media/libvpx/vpx_dsp/vpx_dsp_rtcd.c new file mode 100644 index 00000000000..5fe27b614bd --- /dev/null +++ b/media/libvpx/vpx_dsp/vpx_dsp_rtcd.c @@ -0,0 +1,17 @@ +/* + * Copyright (c) 2015 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "./vpx_config.h" +#define RTCD_C +#include "./vpx_dsp_rtcd.h" +#include "vpx_ports/vpx_once.h" + +void vpx_dsp_rtcd() { + once(setup_rtcd_internal); +} diff --git a/media/libvpx/vp9/encoder/x86/vp9_highbd_sad4d_sse2.asm b/media/libvpx/vpx_dsp/x86/highbd_sad4d_sse2.asm similarity index 98% rename from media/libvpx/vp9/encoder/x86/vp9_highbd_sad4d_sse2.asm rename to media/libvpx/vpx_dsp/x86/highbd_sad4d_sse2.asm index f79a59f02e1..95cc4372ec3 100644 --- a/media/libvpx/vp9/encoder/x86/vp9_highbd_sad4d_sse2.asm +++ b/media/libvpx/vpx_dsp/x86/highbd_sad4d_sse2.asm @@ -8,6 +8,8 @@ ; be found in the AUTHORS file in the root of the source tree. ; +%define program_name vpx + %include "third_party/x86inc/x86inc.asm" SECTION .text @@ -209,9 +211,9 @@ SECTION .text HIGH_PROCESS_32x2x4 0, %4, %5, (%4 + 32), (%5 + 32), %6 %endmacro -; void vp9_highbd_sadNxNx4d_sse2(uint8_t *src, int src_stride, +; void vpx_highbd_sadNxNx4d_sse2(uint8_t *src, int src_stride, ; uint8_t *ref[4], int ref_stride, -; unsigned int res[4]); +; uint32_t res[4]); ; where NxN = 64x64, 32x32, 16x16, 16x8, 8x16 or 8x8 %macro HIGH_SADNXN4D 2 %if UNIX64 diff --git a/media/libvpx/vp9/encoder/x86/vp9_highbd_sad_sse2.asm b/media/libvpx/vpx_dsp/x86/highbd_sad_sse2.asm similarity index 97% rename from media/libvpx/vp9/encoder/x86/vp9_highbd_sad_sse2.asm rename to media/libvpx/vpx_dsp/x86/highbd_sad_sse2.asm index c895ac0ee59..4d422dde3af 100644 --- a/media/libvpx/vp9/encoder/x86/vp9_highbd_sad_sse2.asm +++ b/media/libvpx/vpx_dsp/x86/highbd_sad_sse2.asm @@ -8,6 +8,8 @@ ; be found in the AUTHORS file in the root of the source tree. ; +%define program_name vpx + %include "third_party/x86inc/x86inc.asm" SECTION .text @@ -50,7 +52,7 @@ cglobal highbd_sad%1x%2_avg, 5, ARCH_X86_64 + %3, 7, src, src_stride, \ %endif %endmacro -; unsigned int vp9_highbd_sad64x{16,32,64}_sse2(uint8_t *src, int src_stride, +; unsigned int vpx_highbd_sad64x{16,32,64}_sse2(uint8_t *src, int src_stride, ; uint8_t *ref, int ref_stride); %macro HIGH_SAD64XN 1-2 0 HIGH_SAD_FN 64, %1, 5, %2 @@ -157,7 +159,7 @@ HIGH_SAD64XN 64, 1 ; highbd_sad64x64_avg_sse2 HIGH_SAD64XN 32, 1 ; highbd_sad64x32_avg_sse2 -; unsigned int vp9_highbd_sad32x{16,32,64}_sse2(uint8_t *src, int src_stride, +; unsigned int vpx_highbd_sad32x{16,32,64}_sse2(uint8_t *src, int src_stride, ; uint8_t *ref, int ref_stride); %macro HIGH_SAD32XN 1-2 0 HIGH_SAD_FN 32, %1, 5, %2 @@ -225,7 +227,7 @@ HIGH_SAD32XN 64, 1 ; highbd_sad32x64_avg_sse2 HIGH_SAD32XN 32, 1 ; highbd_sad32x32_avg_sse2 HIGH_SAD32XN 16, 1 ; highbd_sad32x16_avg_sse2 -; unsigned int vp9_highbd_sad16x{8,16,32}_sse2(uint8_t *src, int src_stride, +; unsigned int vpx_highbd_sad16x{8,16,32}_sse2(uint8_t *src, int src_stride, ; uint8_t *ref, int ref_stride); %macro HIGH_SAD16XN 1-2 0 HIGH_SAD_FN 16, %1, 5, %2 @@ -294,7 +296,7 @@ HIGH_SAD16XN 16, 1 ; highbd_sad16x16_avg_sse2 HIGH_SAD16XN 8, 1 ; highbd_sad16x8_avg_sse2 -; unsigned int vp9_highbd_sad8x{4,8,16}_sse2(uint8_t *src, int src_stride, +; unsigned int vpx_highbd_sad8x{4,8,16}_sse2(uint8_t *src, int src_stride, ; uint8_t *ref, int ref_stride); %macro HIGH_SAD8XN 1-2 0 HIGH_SAD_FN 8, %1, 7, %2 diff --git a/media/libvpx/vp9/encoder/x86/vp9_highbd_variance_impl_sse2.asm b/media/libvpx/vpx_dsp/x86/highbd_variance_impl_sse2.asm similarity index 97% rename from media/libvpx/vp9/encoder/x86/vp9_highbd_variance_impl_sse2.asm rename to media/libvpx/vpx_dsp/x86/highbd_variance_impl_sse2.asm index 821dd0660bc..923418a9921 100644 --- a/media/libvpx/vp9/encoder/x86/vp9_highbd_variance_impl_sse2.asm +++ b/media/libvpx/vpx_dsp/x86/highbd_variance_impl_sse2.asm @@ -11,7 +11,7 @@ %include "vpx_ports/x86_abi_support.asm" -;unsigned int vp9_highbd_calc16x16var_sse2 +;unsigned int vpx_highbd_calc16x16var_sse2 ;( ; unsigned char * src_ptr, ; int source_stride, @@ -20,8 +20,8 @@ ; unsigned int * SSE, ; int * Sum ;) -global sym(vp9_highbd_calc16x16var_sse2) PRIVATE -sym(vp9_highbd_calc16x16var_sse2): +global sym(vpx_highbd_calc16x16var_sse2) PRIVATE +sym(vpx_highbd_calc16x16var_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 @@ -164,7 +164,7 @@ sym(vp9_highbd_calc16x16var_sse2): ret -;unsigned int vp9_highbd_calc8x8var_sse2 +;unsigned int vpx_highbd_calc8x8var_sse2 ;( ; unsigned char * src_ptr, ; int source_stride, @@ -173,8 +173,8 @@ sym(vp9_highbd_calc16x16var_sse2): ; unsigned int * SSE, ; int * Sum ;) -global sym(vp9_highbd_calc8x8var_sse2) PRIVATE -sym(vp9_highbd_calc8x8var_sse2): +global sym(vpx_highbd_calc8x8var_sse2) PRIVATE +sym(vpx_highbd_calc8x8var_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 diff --git a/media/libvpx/vpx_dsp/x86/highbd_variance_sse2.c b/media/libvpx/vpx_dsp/x86/highbd_variance_sse2.c new file mode 100644 index 00000000000..343c0478b9a --- /dev/null +++ b/media/libvpx/vpx_dsp/x86/highbd_variance_sse2.c @@ -0,0 +1,245 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "./vpx_config.h" +#include "vp9/common/vp9_common.h" + +#include "vp9/encoder/vp9_variance.h" +#include "vpx_ports/mem.h" + +typedef uint32_t (*high_variance_fn_t) (const uint16_t *src, int src_stride, + const uint16_t *ref, int ref_stride, + uint32_t *sse, int *sum); + +uint32_t vpx_highbd_calc8x8var_sse2(const uint16_t *src, int src_stride, + const uint16_t *ref, int ref_stride, + uint32_t *sse, int *sum); + +uint32_t vpx_highbd_calc16x16var_sse2(const uint16_t *src, int src_stride, + const uint16_t *ref, int ref_stride, + uint32_t *sse, int *sum); + +static void highbd_8_variance_sse2(const uint16_t *src, int src_stride, + const uint16_t *ref, int ref_stride, + int w, int h, uint32_t *sse, int *sum, + high_variance_fn_t var_fn, int block_size) { + int i, j; + + *sse = 0; + *sum = 0; + + for (i = 0; i < h; i += block_size) { + for (j = 0; j < w; j += block_size) { + unsigned int sse0; + int sum0; + var_fn(src + src_stride * i + j, src_stride, + ref + ref_stride * i + j, ref_stride, &sse0, &sum0); + *sse += sse0; + *sum += sum0; + } + } +} + +static void highbd_10_variance_sse2(const uint16_t *src, int src_stride, + const uint16_t *ref, int ref_stride, + int w, int h, uint32_t *sse, int *sum, + high_variance_fn_t var_fn, int block_size) { + int i, j; + uint64_t sse_long = 0; + int64_t sum_long = 0; + + for (i = 0; i < h; i += block_size) { + for (j = 0; j < w; j += block_size) { + unsigned int sse0; + int sum0; + var_fn(src + src_stride * i + j, src_stride, + ref + ref_stride * i + j, ref_stride, &sse0, &sum0); + sse_long += sse0; + sum_long += sum0; + } + } + *sum = ROUND_POWER_OF_TWO(sum_long, 2); + *sse = ROUND_POWER_OF_TWO(sse_long, 4); +} + +static void highbd_12_variance_sse2(const uint16_t *src, int src_stride, + const uint16_t *ref, int ref_stride, + int w, int h, uint32_t *sse, int *sum, + high_variance_fn_t var_fn, int block_size) { + int i, j; + uint64_t sse_long = 0; + int64_t sum_long = 0; + + for (i = 0; i < h; i += block_size) { + for (j = 0; j < w; j += block_size) { + unsigned int sse0; + int sum0; + var_fn(src + src_stride * i + j, src_stride, + ref + ref_stride * i + j, ref_stride, &sse0, &sum0); + sse_long += sse0; + sum_long += sum0; + } + } + *sum = ROUND_POWER_OF_TWO(sum_long, 4); + *sse = ROUND_POWER_OF_TWO(sse_long, 8); +} + + +#define HIGH_GET_VAR(S) \ +void vpx_highbd_get##S##x##S##var_sse2(const uint8_t *src8, int src_stride, \ + const uint8_t *ref8, int ref_stride, \ + uint32_t *sse, int *sum) { \ + uint16_t *src = CONVERT_TO_SHORTPTR(src8); \ + uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \ + vpx_highbd_calc##S##x##S##var_sse2(src, src_stride, ref, ref_stride, \ + sse, sum); \ +} \ +\ +void vpx_highbd_10_get##S##x##S##var_sse2(const uint8_t *src8, int src_stride, \ + const uint8_t *ref8, int ref_stride, \ + uint32_t *sse, int *sum) { \ + uint16_t *src = CONVERT_TO_SHORTPTR(src8); \ + uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \ + vpx_highbd_calc##S##x##S##var_sse2(src, src_stride, ref, ref_stride, \ + sse, sum); \ + *sum = ROUND_POWER_OF_TWO(*sum, 2); \ + *sse = ROUND_POWER_OF_TWO(*sse, 4); \ +} \ +\ +void vpx_highbd_12_get##S##x##S##var_sse2(const uint8_t *src8, int src_stride, \ + const uint8_t *ref8, int ref_stride, \ + uint32_t *sse, int *sum) { \ + uint16_t *src = CONVERT_TO_SHORTPTR(src8); \ + uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \ + vpx_highbd_calc##S##x##S##var_sse2(src, src_stride, ref, ref_stride, \ + sse, sum); \ + *sum = ROUND_POWER_OF_TWO(*sum, 4); \ + *sse = ROUND_POWER_OF_TWO(*sse, 8); \ +} + +HIGH_GET_VAR(16); +HIGH_GET_VAR(8); + +#undef HIGH_GET_VAR + +#define VAR_FN(w, h, block_size, shift) \ +uint32_t vpx_highbd_8_variance##w##x##h##_sse2( \ + const uint8_t *src8, int src_stride, \ + const uint8_t *ref8, int ref_stride, uint32_t *sse) { \ + int sum; \ + uint16_t *src = CONVERT_TO_SHORTPTR(src8); \ + uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \ + highbd_8_variance_sse2(src, src_stride, ref, ref_stride, w, h, sse, &sum, \ + vpx_highbd_calc##block_size##x##block_size##var_sse2, \ + block_size); \ + return *sse - (((int64_t)sum * sum) >> shift); \ +} \ +\ +uint32_t vpx_highbd_10_variance##w##x##h##_sse2( \ + const uint8_t *src8, int src_stride, \ + const uint8_t *ref8, int ref_stride, uint32_t *sse) { \ + int sum; \ + uint16_t *src = CONVERT_TO_SHORTPTR(src8); \ + uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \ + highbd_10_variance_sse2( \ + src, src_stride, ref, ref_stride, w, h, sse, &sum, \ + vpx_highbd_calc##block_size##x##block_size##var_sse2, block_size); \ + return *sse - (((int64_t)sum * sum) >> shift); \ +} \ +\ +uint32_t vpx_highbd_12_variance##w##x##h##_sse2( \ + const uint8_t *src8, int src_stride, \ + const uint8_t *ref8, int ref_stride, uint32_t *sse) { \ + int sum; \ + uint16_t *src = CONVERT_TO_SHORTPTR(src8); \ + uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \ + highbd_12_variance_sse2( \ + src, src_stride, ref, ref_stride, w, h, sse, &sum, \ + vpx_highbd_calc##block_size##x##block_size##var_sse2, block_size); \ + return *sse - (((int64_t)sum * sum) >> shift); \ +} + +VAR_FN(64, 64, 16, 12); +VAR_FN(64, 32, 16, 11); +VAR_FN(32, 64, 16, 11); +VAR_FN(32, 32, 16, 10); +VAR_FN(32, 16, 16, 9); +VAR_FN(16, 32, 16, 9); +VAR_FN(16, 16, 16, 8); +VAR_FN(16, 8, 8, 7); +VAR_FN(8, 16, 8, 7); +VAR_FN(8, 8, 8, 6); + +#undef VAR_FN + +unsigned int vpx_highbd_8_mse16x16_sse2(const uint8_t *src8, int src_stride, + const uint8_t *ref8, int ref_stride, + unsigned int *sse) { + int sum; + uint16_t *src = CONVERT_TO_SHORTPTR(src8); + uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); + highbd_8_variance_sse2(src, src_stride, ref, ref_stride, 16, 16, + sse, &sum, vpx_highbd_calc16x16var_sse2, 16); + return *sse; +} + +unsigned int vpx_highbd_10_mse16x16_sse2(const uint8_t *src8, int src_stride, + const uint8_t *ref8, int ref_stride, + unsigned int *sse) { + int sum; + uint16_t *src = CONVERT_TO_SHORTPTR(src8); + uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); + highbd_10_variance_sse2(src, src_stride, ref, ref_stride, 16, 16, + sse, &sum, vpx_highbd_calc16x16var_sse2, 16); + return *sse; +} + +unsigned int vpx_highbd_12_mse16x16_sse2(const uint8_t *src8, int src_stride, + const uint8_t *ref8, int ref_stride, + unsigned int *sse) { + int sum; + uint16_t *src = CONVERT_TO_SHORTPTR(src8); + uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); + highbd_12_variance_sse2(src, src_stride, ref, ref_stride, 16, 16, + sse, &sum, vpx_highbd_calc16x16var_sse2, 16); + return *sse; +} + +unsigned int vpx_highbd_8_mse8x8_sse2(const uint8_t *src8, int src_stride, + const uint8_t *ref8, int ref_stride, + unsigned int *sse) { + int sum; + uint16_t *src = CONVERT_TO_SHORTPTR(src8); + uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); + highbd_8_variance_sse2(src, src_stride, ref, ref_stride, 8, 8, + sse, &sum, vpx_highbd_calc8x8var_sse2, 8); + return *sse; +} + +unsigned int vpx_highbd_10_mse8x8_sse2(const uint8_t *src8, int src_stride, + const uint8_t *ref8, int ref_stride, + unsigned int *sse) { + int sum; + uint16_t *src = CONVERT_TO_SHORTPTR(src8); + uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); + highbd_10_variance_sse2(src, src_stride, ref, ref_stride, 8, 8, + sse, &sum, vpx_highbd_calc8x8var_sse2, 8); + return *sse; +} + +unsigned int vpx_highbd_12_mse8x8_sse2(const uint8_t *src8, int src_stride, + const uint8_t *ref8, int ref_stride, + unsigned int *sse) { + int sum; + uint16_t *src = CONVERT_TO_SHORTPTR(src8); + uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); + highbd_12_variance_sse2(src, src_stride, ref, ref_stride, 8, 8, + sse, &sum, vpx_highbd_calc8x8var_sse2, 8); + return *sse; +} diff --git a/media/libvpx/vp9/encoder/x86/vp9_sad4d_intrin_avx2.c b/media/libvpx/vpx_dsp/x86/sad4d_avx2.c similarity index 79% rename from media/libvpx/vp9/encoder/x86/vp9_sad4d_intrin_avx2.c rename to media/libvpx/vpx_dsp/x86/sad4d_avx2.c index 1feed62566b..793658f9ea9 100644 --- a/media/libvpx/vp9/encoder/x86/vp9_sad4d_intrin_avx2.c +++ b/media/libvpx/vpx_dsp/x86/sad4d_avx2.c @@ -8,18 +8,19 @@ * be found in the AUTHORS file in the root of the source tree. */ #include // AVX2 +#include "./vpx_dsp_rtcd.h" #include "vpx/vpx_integer.h" -void vp9_sad32x32x4d_avx2(uint8_t *src, +void vpx_sad32x32x4d_avx2(const uint8_t *src, int src_stride, - uint8_t *ref[4], + const uint8_t *const ref[4], int ref_stride, - unsigned int res[4]) { + uint32_t res[4]) { __m256i src_reg, ref0_reg, ref1_reg, ref2_reg, ref3_reg; __m256i sum_ref0, sum_ref1, sum_ref2, sum_ref3; __m256i sum_mlow, sum_mhigh; int i; - uint8_t *ref0, *ref1, *ref2, *ref3; + const uint8_t *ref0, *ref1, *ref2, *ref3; ref0 = ref[0]; ref1 = ref[1]; @@ -31,11 +32,11 @@ void vp9_sad32x32x4d_avx2(uint8_t *src, sum_ref3 = _mm256_set1_epi16(0); for (i = 0; i < 32 ; i++) { // load src and all refs - src_reg = _mm256_loadu_si256((__m256i *)(src)); - ref0_reg = _mm256_loadu_si256((__m256i *) (ref0)); - ref1_reg = _mm256_loadu_si256((__m256i *) (ref1)); - ref2_reg = _mm256_loadu_si256((__m256i *) (ref2)); - ref3_reg = _mm256_loadu_si256((__m256i *) (ref3)); + src_reg = _mm256_loadu_si256((const __m256i *)src); + ref0_reg = _mm256_loadu_si256((const __m256i *)ref0); + ref1_reg = _mm256_loadu_si256((const __m256i *)ref1); + ref2_reg = _mm256_loadu_si256((const __m256i *)ref2); + ref3_reg = _mm256_loadu_si256((const __m256i *)ref3); // sum of the absolute differences between every ref-i to src ref0_reg = _mm256_sad_epu8(ref0_reg, src_reg); ref1_reg = _mm256_sad_epu8(ref1_reg, src_reg); @@ -80,18 +81,18 @@ void vp9_sad32x32x4d_avx2(uint8_t *src, } } -void vp9_sad64x64x4d_avx2(uint8_t *src, +void vpx_sad64x64x4d_avx2(const uint8_t *src, int src_stride, - uint8_t *ref[4], + const uint8_t *const ref[4], int ref_stride, - unsigned int res[4]) { + uint32_t res[4]) { __m256i src_reg, srcnext_reg, ref0_reg, ref0next_reg; __m256i ref1_reg, ref1next_reg, ref2_reg, ref2next_reg; __m256i ref3_reg, ref3next_reg; __m256i sum_ref0, sum_ref1, sum_ref2, sum_ref3; __m256i sum_mlow, sum_mhigh; int i; - uint8_t *ref0, *ref1, *ref2, *ref3; + const uint8_t *ref0, *ref1, *ref2, *ref3; ref0 = ref[0]; ref1 = ref[1]; @@ -103,16 +104,16 @@ void vp9_sad64x64x4d_avx2(uint8_t *src, sum_ref3 = _mm256_set1_epi16(0); for (i = 0; i < 64 ; i++) { // load 64 bytes from src and all refs - src_reg = _mm256_loadu_si256((__m256i *)(src)); - srcnext_reg = _mm256_loadu_si256((__m256i *)(src + 32)); - ref0_reg = _mm256_loadu_si256((__m256i *) (ref0)); - ref0next_reg = _mm256_loadu_si256((__m256i *) (ref0 + 32)); - ref1_reg = _mm256_loadu_si256((__m256i *) (ref1)); - ref1next_reg = _mm256_loadu_si256((__m256i *) (ref1 + 32)); - ref2_reg = _mm256_loadu_si256((__m256i *) (ref2)); - ref2next_reg = _mm256_loadu_si256((__m256i *) (ref2 + 32)); - ref3_reg = _mm256_loadu_si256((__m256i *) (ref3)); - ref3next_reg = _mm256_loadu_si256((__m256i *) (ref3 + 32)); + src_reg = _mm256_loadu_si256((const __m256i *)src); + srcnext_reg = _mm256_loadu_si256((const __m256i *)(src + 32)); + ref0_reg = _mm256_loadu_si256((const __m256i *)ref0); + ref0next_reg = _mm256_loadu_si256((const __m256i *)(ref0 + 32)); + ref1_reg = _mm256_loadu_si256((const __m256i *)ref1); + ref1next_reg = _mm256_loadu_si256((const __m256i *)(ref1 + 32)); + ref2_reg = _mm256_loadu_si256((const __m256i *)ref2); + ref2next_reg = _mm256_loadu_si256((const __m256i *)(ref2 + 32)); + ref3_reg = _mm256_loadu_si256((const __m256i *)ref3); + ref3next_reg = _mm256_loadu_si256((const __m256i *)(ref3 + 32)); // sum of the absolute differences between every ref-i to src ref0_reg = _mm256_sad_epu8(ref0_reg, src_reg); ref1_reg = _mm256_sad_epu8(ref1_reg, src_reg); diff --git a/media/libvpx/vp9/encoder/x86/vp9_sad4d_sse2.asm b/media/libvpx/vpx_dsp/x86/sad4d_sse2.asm similarity index 98% rename from media/libvpx/vp9/encoder/x86/vp9_sad4d_sse2.asm rename to media/libvpx/vpx_dsp/x86/sad4d_sse2.asm index b4936281f62..0f7fb93d47c 100644 --- a/media/libvpx/vp9/encoder/x86/vp9_sad4d_sse2.asm +++ b/media/libvpx/vpx_dsp/x86/sad4d_sse2.asm @@ -8,6 +8,8 @@ ; be found in the AUTHORS file in the root of the source tree. ; +%define program_name vpx + %include "third_party/x86inc/x86inc.asm" SECTION .text @@ -167,9 +169,9 @@ SECTION .text PROCESS_32x2x4 0, %4, %5, %4 + 32, %5 + 32, %6 %endmacro -; void vp9_sadNxNx4d_sse2(uint8_t *src, int src_stride, +; void vpx_sadNxNx4d_sse2(uint8_t *src, int src_stride, ; uint8_t *ref[4], int ref_stride, -; unsigned int res[4]); +; uint32_t res[4]); ; where NxN = 64x64, 32x32, 16x16, 16x8, 8x16 or 8x8 %macro SADNXN4D 2 %if UNIX64 diff --git a/media/libvpx/vp9/encoder/x86/vp9_sad_intrin_avx2.c b/media/libvpx/vpx_dsp/x86/sad_avx2.c similarity index 95% rename from media/libvpx/vp9/encoder/x86/vp9_sad_intrin_avx2.c rename to media/libvpx/vpx_dsp/x86/sad_avx2.c index 113193070e1..ce9ad8f780c 100644 --- a/media/libvpx/vp9/encoder/x86/vp9_sad_intrin_avx2.c +++ b/media/libvpx/vpx_dsp/x86/sad_avx2.c @@ -8,10 +8,11 @@ * be found in the AUTHORS file in the root of the source tree. */ #include +#include "./vpx_dsp_rtcd.h" #include "vpx_ports/mem.h" #define FSAD64_H(h) \ -unsigned int vp9_sad64x##h##_avx2(const uint8_t *src_ptr, \ +unsigned int vpx_sad64x##h##_avx2(const uint8_t *src_ptr, \ int src_stride, \ const uint8_t *ref_ptr, \ int ref_stride) { \ @@ -40,7 +41,7 @@ unsigned int vp9_sad64x##h##_avx2(const uint8_t *src_ptr, \ } #define FSAD32_H(h) \ -unsigned int vp9_sad32x##h##_avx2(const uint8_t *src_ptr, \ +unsigned int vpx_sad32x##h##_avx2(const uint8_t *src_ptr, \ int src_stride, \ const uint8_t *ref_ptr, \ int ref_stride) { \ @@ -89,7 +90,7 @@ FSAD32; #undef FSAD32_H #define FSADAVG64_H(h) \ -unsigned int vp9_sad64x##h##_avg_avx2(const uint8_t *src_ptr, \ +unsigned int vpx_sad64x##h##_avg_avx2(const uint8_t *src_ptr, \ int src_stride, \ const uint8_t *ref_ptr, \ int ref_stride, \ @@ -124,7 +125,7 @@ unsigned int vp9_sad64x##h##_avg_avx2(const uint8_t *src_ptr, \ } #define FSADAVG32_H(h) \ -unsigned int vp9_sad32x##h##_avg_avx2(const uint8_t *src_ptr, \ +unsigned int vpx_sad32x##h##_avg_avx2(const uint8_t *src_ptr, \ int src_stride, \ const uint8_t *ref_ptr, \ int ref_stride, \ diff --git a/media/libvpx/vp8/common/x86/sad_mmx.asm b/media/libvpx/vpx_dsp/x86/sad_mmx.asm similarity index 95% rename from media/libvpx/vp8/common/x86/sad_mmx.asm rename to media/libvpx/vpx_dsp/x86/sad_mmx.asm index 592112fa91d..9968992bd13 100644 --- a/media/libvpx/vp8/common/x86/sad_mmx.asm +++ b/media/libvpx/vpx_dsp/x86/sad_mmx.asm @@ -11,18 +11,18 @@ %include "vpx_ports/x86_abi_support.asm" -global sym(vp8_sad16x16_mmx) PRIVATE -global sym(vp8_sad8x16_mmx) PRIVATE -global sym(vp8_sad8x8_mmx) PRIVATE -global sym(vp8_sad4x4_mmx) PRIVATE -global sym(vp8_sad16x8_mmx) PRIVATE +global sym(vpx_sad16x16_mmx) PRIVATE +global sym(vpx_sad8x16_mmx) PRIVATE +global sym(vpx_sad8x8_mmx) PRIVATE +global sym(vpx_sad4x4_mmx) PRIVATE +global sym(vpx_sad16x8_mmx) PRIVATE -;unsigned int vp8_sad16x16_mmx( +;unsigned int vpx_sad16x16_mmx( ; unsigned char *src_ptr, ; int src_stride, ; unsigned char *ref_ptr, ; int ref_stride) -sym(vp8_sad16x16_mmx): +sym(vpx_sad16x16_mmx): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 4 @@ -109,12 +109,12 @@ sym(vp8_sad16x16_mmx): ret -;unsigned int vp8_sad8x16_mmx( +;unsigned int vpx_sad8x16_mmx( ; unsigned char *src_ptr, ; int src_stride, ; unsigned char *ref_ptr, ; int ref_stride) -sym(vp8_sad8x16_mmx): +sym(vpx_sad8x16_mmx): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 4 @@ -181,12 +181,12 @@ sym(vp8_sad8x16_mmx): ret -;unsigned int vp8_sad8x8_mmx( +;unsigned int vpx_sad8x8_mmx( ; unsigned char *src_ptr, ; int src_stride, ; unsigned char *ref_ptr, ; int ref_stride) -sym(vp8_sad8x8_mmx): +sym(vpx_sad8x8_mmx): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 4 @@ -251,12 +251,12 @@ sym(vp8_sad8x8_mmx): ret -;unsigned int vp8_sad4x4_mmx( +;unsigned int vpx_sad4x4_mmx( ; unsigned char *src_ptr, ; int src_stride, ; unsigned char *ref_ptr, ; int ref_stride) -sym(vp8_sad4x4_mmx): +sym(vpx_sad4x4_mmx): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 4 @@ -340,12 +340,12 @@ sym(vp8_sad4x4_mmx): ret -;unsigned int vp8_sad16x8_mmx( +;unsigned int vpx_sad16x8_mmx( ; unsigned char *src_ptr, ; int src_stride, ; unsigned char *ref_ptr, ; int ref_stride) -sym(vp8_sad16x8_mmx): +sym(vpx_sad16x8_mmx): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 4 diff --git a/media/libvpx/vp9/encoder/x86/vp9_sad_sse2.asm b/media/libvpx/vpx_dsp/x86/sad_sse2.asm similarity index 95% rename from media/libvpx/vp9/encoder/x86/vp9_sad_sse2.asm rename to media/libvpx/vpx_dsp/x86/sad_sse2.asm index c4c5c54f0e4..c6a829dc21e 100644 --- a/media/libvpx/vp9/encoder/x86/vp9_sad_sse2.asm +++ b/media/libvpx/vpx_dsp/x86/sad_sse2.asm @@ -8,6 +8,8 @@ ; be found in the AUTHORS file in the root of the source tree. ; +%define program_name vpx + %include "third_party/x86inc/x86inc.asm" SECTION .text @@ -44,7 +46,7 @@ cglobal sad%1x%2_avg, 5, ARCH_X86_64 + %3, 5, src, src_stride, \ %endif ; %3 == 7 %endmacro -; unsigned int vp9_sad64x64_sse2(uint8_t *src, int src_stride, +; unsigned int vpx_sad64x64_sse2(uint8_t *src, int src_stride, ; uint8_t *ref, int ref_stride); %macro SAD64XN 1-2 0 SAD_FN 64, %1, 5, %2 @@ -87,7 +89,7 @@ SAD64XN 32 ; sad64x32_sse2 SAD64XN 64, 1 ; sad64x64_avg_sse2 SAD64XN 32, 1 ; sad64x32_avg_sse2 -; unsigned int vp9_sad32x32_sse2(uint8_t *src, int src_stride, +; unsigned int vpx_sad32x32_sse2(uint8_t *src, int src_stride, ; uint8_t *ref, int ref_stride); %macro SAD32XN 1-2 0 SAD_FN 32, %1, 5, %2 @@ -132,7 +134,7 @@ SAD32XN 64, 1 ; sad32x64_avg_sse2 SAD32XN 32, 1 ; sad32x32_avg_sse2 SAD32XN 16, 1 ; sad32x16_avg_sse2 -; unsigned int vp9_sad16x{8,16}_sse2(uint8_t *src, int src_stride, +; unsigned int vpx_sad16x{8,16}_sse2(uint8_t *src, int src_stride, ; uint8_t *ref, int ref_stride); %macro SAD16XN 1-2 0 SAD_FN 16, %1, 7, %2 @@ -178,7 +180,7 @@ SAD16XN 32, 1 ; sad16x32_avg_sse2 SAD16XN 16, 1 ; sad16x16_avg_sse2 SAD16XN 8, 1 ; sad16x8_avg_sse2 -; unsigned int vp9_sad8x{8,16}_sse2(uint8_t *src, int src_stride, +; unsigned int vpx_sad8x{8,16}_sse2(uint8_t *src, int src_stride, ; uint8_t *ref, int ref_stride); %macro SAD8XN 1-2 0 SAD_FN 8, %1, 7, %2 @@ -222,7 +224,7 @@ SAD8XN 16, 1 ; sad8x16_avg_sse2 SAD8XN 8, 1 ; sad8x8_avg_sse2 SAD8XN 4, 1 ; sad8x4_avg_sse2 -; unsigned int vp9_sad4x{4, 8}_sse(uint8_t *src, int src_stride, +; unsigned int vpx_sad4x{4, 8}_sse(uint8_t *src, int src_stride, ; uint8_t *ref, int ref_stride); %macro SAD4XN 1-2 0 SAD_FN 4, %1, 7, %2 diff --git a/media/libvpx/vp9/encoder/x86/vp9_sad_sse3.asm b/media/libvpx/vpx_dsp/x86/sad_sse3.asm similarity index 94% rename from media/libvpx/vp9/encoder/x86/vp9_sad_sse3.asm rename to media/libvpx/vpx_dsp/x86/sad_sse3.asm index 2b90a5d5478..18279bdb9de 100644 --- a/media/libvpx/vp9/encoder/x86/vp9_sad_sse3.asm +++ b/media/libvpx/vpx_dsp/x86/sad_sse3.asm @@ -19,7 +19,6 @@ %define end_ptr rcx %define ret_var rbx %define result_ptr arg(4) - %define max_err arg(4) %define height dword ptr arg(4) push rbp mov rbp, rsp @@ -42,7 +41,6 @@ %define end_ptr r10 %define ret_var r11 %define result_ptr [rsp+xmm_stack_space+8+4*8] - %define max_err [rsp+xmm_stack_space+8+4*8] %define height dword ptr [rsp+xmm_stack_space+8+4*8] %else %define src_ptr rdi @@ -52,7 +50,6 @@ %define end_ptr r9 %define ret_var r10 %define result_ptr r8 - %define max_err r8 %define height r8 %endif %endif @@ -67,7 +64,6 @@ %define end_ptr %define ret_var %define result_ptr - %define max_err %define height %if ABI_IS_32BIT @@ -169,14 +165,14 @@ paddw mm7, mm3 %endmacro -;void int vp9_sad16x16x3_sse3( +;void int vpx_sad16x16x3_sse3( ; unsigned char *src_ptr, ; int src_stride, ; unsigned char *ref_ptr, ; int ref_stride, ; int *results) -global sym(vp9_sad16x16x3_sse3) PRIVATE -sym(vp9_sad16x16x3_sse3): +global sym(vpx_sad16x16x3_sse3) PRIVATE +sym(vpx_sad16x16x3_sse3): STACK_FRAME_CREATE_X3 @@ -211,14 +207,14 @@ sym(vp9_sad16x16x3_sse3): STACK_FRAME_DESTROY_X3 -;void int vp9_sad16x8x3_sse3( +;void int vpx_sad16x8x3_sse3( ; unsigned char *src_ptr, ; int src_stride, ; unsigned char *ref_ptr, ; int ref_stride, ; int *results) -global sym(vp9_sad16x8x3_sse3) PRIVATE -sym(vp9_sad16x8x3_sse3): +global sym(vpx_sad16x8x3_sse3) PRIVATE +sym(vpx_sad16x8x3_sse3): STACK_FRAME_CREATE_X3 @@ -249,14 +245,14 @@ sym(vp9_sad16x8x3_sse3): STACK_FRAME_DESTROY_X3 -;void int vp9_sad8x16x3_sse3( +;void int vpx_sad8x16x3_sse3( ; unsigned char *src_ptr, ; int src_stride, ; unsigned char *ref_ptr, ; int ref_stride, ; int *results) -global sym(vp9_sad8x16x3_sse3) PRIVATE -sym(vp9_sad8x16x3_sse3): +global sym(vpx_sad8x16x3_sse3) PRIVATE +sym(vpx_sad8x16x3_sse3): STACK_FRAME_CREATE_X3 @@ -278,14 +274,14 @@ sym(vp9_sad8x16x3_sse3): STACK_FRAME_DESTROY_X3 -;void int vp9_sad8x8x3_sse3( +;void int vpx_sad8x8x3_sse3( ; unsigned char *src_ptr, ; int src_stride, ; unsigned char *ref_ptr, ; int ref_stride, ; int *results) -global sym(vp9_sad8x8x3_sse3) PRIVATE -sym(vp9_sad8x8x3_sse3): +global sym(vpx_sad8x8x3_sse3) PRIVATE +sym(vpx_sad8x8x3_sse3): STACK_FRAME_CREATE_X3 @@ -303,14 +299,14 @@ sym(vp9_sad8x8x3_sse3): STACK_FRAME_DESTROY_X3 -;void int vp9_sad4x4x3_sse3( +;void int vpx_sad4x4x3_sse3( ; unsigned char *src_ptr, ; int src_stride, ; unsigned char *ref_ptr, ; int ref_stride, ; int *results) -global sym(vp9_sad4x4x3_sse3) PRIVATE -sym(vp9_sad4x4x3_sse3): +global sym(vpx_sad4x4x3_sse3) PRIVATE +sym(vpx_sad4x4x3_sse3): STACK_FRAME_CREATE_X3 diff --git a/media/libvpx/vp9/encoder/x86/vp9_sad_sse4.asm b/media/libvpx/vpx_dsp/x86/sad_sse4.asm similarity index 95% rename from media/libvpx/vp9/encoder/x86/vp9_sad_sse4.asm rename to media/libvpx/vpx_dsp/x86/sad_sse4.asm index faf1768a983..bc674479715 100644 --- a/media/libvpx/vp9/encoder/x86/vp9_sad_sse4.asm +++ b/media/libvpx/vpx_dsp/x86/sad_sse4.asm @@ -165,14 +165,14 @@ movdqa [rdi + 16], xmm2 %endmacro -;void vp9_sad16x16x8_sse4( +;void vpx_sad16x16x8_sse4_1( ; const unsigned char *src_ptr, ; int src_stride, ; const unsigned char *ref_ptr, ; int ref_stride, ; unsigned short *sad_array); -global sym(vp9_sad16x16x8_sse4) PRIVATE -sym(vp9_sad16x16x8_sse4): +global sym(vpx_sad16x16x8_sse4_1) PRIVATE +sym(vpx_sad16x16x8_sse4_1): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 5 @@ -205,15 +205,15 @@ sym(vp9_sad16x16x8_sse4): ret -;void vp9_sad16x8x8_sse4( +;void vpx_sad16x8x8_sse4_1( ; const unsigned char *src_ptr, ; int src_stride, ; const unsigned char *ref_ptr, ; int ref_stride, ; unsigned short *sad_array ;); -global sym(vp9_sad16x8x8_sse4) PRIVATE -sym(vp9_sad16x8x8_sse4): +global sym(vpx_sad16x8x8_sse4_1) PRIVATE +sym(vpx_sad16x8x8_sse4_1): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 5 @@ -242,15 +242,15 @@ sym(vp9_sad16x8x8_sse4): ret -;void vp9_sad8x8x8_sse4( +;void vpx_sad8x8x8_sse4_1( ; const unsigned char *src_ptr, ; int src_stride, ; const unsigned char *ref_ptr, ; int ref_stride, ; unsigned short *sad_array ;); -global sym(vp9_sad8x8x8_sse4) PRIVATE -sym(vp9_sad8x8x8_sse4): +global sym(vpx_sad8x8x8_sse4_1) PRIVATE +sym(vpx_sad8x8x8_sse4_1): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 5 @@ -279,15 +279,15 @@ sym(vp9_sad8x8x8_sse4): ret -;void vp9_sad8x16x8_sse4( +;void vpx_sad8x16x8_sse4_1( ; const unsigned char *src_ptr, ; int src_stride, ; const unsigned char *ref_ptr, ; int ref_stride, ; unsigned short *sad_array ;); -global sym(vp9_sad8x16x8_sse4) PRIVATE -sym(vp9_sad8x16x8_sse4): +global sym(vpx_sad8x16x8_sse4_1) PRIVATE +sym(vpx_sad8x16x8_sse4_1): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 5 @@ -320,15 +320,15 @@ sym(vp9_sad8x16x8_sse4): ret -;void vp9_sad4x4x8_c( +;void vpx_sad4x4x8_sse4_1( ; const unsigned char *src_ptr, ; int src_stride, ; const unsigned char *ref_ptr, ; int ref_stride, ; unsigned short *sad_array ;); -global sym(vp9_sad4x4x8_sse4) PRIVATE -sym(vp9_sad4x4x8_sse4): +global sym(vpx_sad4x4x8_sse4_1) PRIVATE +sym(vpx_sad4x4x8_sse4_1): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 5 diff --git a/media/libvpx/vp8/common/x86/sad_ssse3.asm b/media/libvpx/vpx_dsp/x86/sad_ssse3.asm similarity index 64% rename from media/libvpx/vp8/common/x86/sad_ssse3.asm rename to media/libvpx/vpx_dsp/x86/sad_ssse3.asm index 278fc0640ed..49f204fa04b 100644 --- a/media/libvpx/vp8/common/x86/sad_ssse3.asm +++ b/media/libvpx/vpx_dsp/x86/sad_ssse3.asm @@ -146,14 +146,14 @@ %endmacro -;void int vp8_sad16x16x3_ssse3( +;void int vpx_sad16x16x3_ssse3( ; unsigned char *src_ptr, ; int src_stride, ; unsigned char *ref_ptr, ; int ref_stride, ; int *results) -global sym(vp8_sad16x16x3_ssse3) PRIVATE -sym(vp8_sad16x16x3_ssse3): +global sym(vpx_sad16x16x3_ssse3) PRIVATE +sym(vpx_sad16x16x3_ssse3): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 5 @@ -169,31 +169,31 @@ sym(vp8_sad16x16x3_ssse3): mov rdx, 0xf and rdx, rdi - jmp .vp8_sad16x16x3_ssse3_skiptable -.vp8_sad16x16x3_ssse3_jumptable: - dd .vp8_sad16x16x3_ssse3_aligned_by_0 - .vp8_sad16x16x3_ssse3_do_jump - dd .vp8_sad16x16x3_ssse3_aligned_by_1 - .vp8_sad16x16x3_ssse3_do_jump - dd .vp8_sad16x16x3_ssse3_aligned_by_2 - .vp8_sad16x16x3_ssse3_do_jump - dd .vp8_sad16x16x3_ssse3_aligned_by_3 - .vp8_sad16x16x3_ssse3_do_jump - dd .vp8_sad16x16x3_ssse3_aligned_by_4 - .vp8_sad16x16x3_ssse3_do_jump - dd .vp8_sad16x16x3_ssse3_aligned_by_5 - .vp8_sad16x16x3_ssse3_do_jump - dd .vp8_sad16x16x3_ssse3_aligned_by_6 - .vp8_sad16x16x3_ssse3_do_jump - dd .vp8_sad16x16x3_ssse3_aligned_by_7 - .vp8_sad16x16x3_ssse3_do_jump - dd .vp8_sad16x16x3_ssse3_aligned_by_8 - .vp8_sad16x16x3_ssse3_do_jump - dd .vp8_sad16x16x3_ssse3_aligned_by_9 - .vp8_sad16x16x3_ssse3_do_jump - dd .vp8_sad16x16x3_ssse3_aligned_by_10 - .vp8_sad16x16x3_ssse3_do_jump - dd .vp8_sad16x16x3_ssse3_aligned_by_11 - .vp8_sad16x16x3_ssse3_do_jump - dd .vp8_sad16x16x3_ssse3_aligned_by_12 - .vp8_sad16x16x3_ssse3_do_jump - dd .vp8_sad16x16x3_ssse3_aligned_by_13 - .vp8_sad16x16x3_ssse3_do_jump - dd .vp8_sad16x16x3_ssse3_aligned_by_14 - .vp8_sad16x16x3_ssse3_do_jump - dd .vp8_sad16x16x3_ssse3_aligned_by_15 - .vp8_sad16x16x3_ssse3_do_jump -.vp8_sad16x16x3_ssse3_skiptable: + jmp .vpx_sad16x16x3_ssse3_skiptable +.vpx_sad16x16x3_ssse3_jumptable: + dd .vpx_sad16x16x3_ssse3_aligned_by_0 - .vpx_sad16x16x3_ssse3_do_jump + dd .vpx_sad16x16x3_ssse3_aligned_by_1 - .vpx_sad16x16x3_ssse3_do_jump + dd .vpx_sad16x16x3_ssse3_aligned_by_2 - .vpx_sad16x16x3_ssse3_do_jump + dd .vpx_sad16x16x3_ssse3_aligned_by_3 - .vpx_sad16x16x3_ssse3_do_jump + dd .vpx_sad16x16x3_ssse3_aligned_by_4 - .vpx_sad16x16x3_ssse3_do_jump + dd .vpx_sad16x16x3_ssse3_aligned_by_5 - .vpx_sad16x16x3_ssse3_do_jump + dd .vpx_sad16x16x3_ssse3_aligned_by_6 - .vpx_sad16x16x3_ssse3_do_jump + dd .vpx_sad16x16x3_ssse3_aligned_by_7 - .vpx_sad16x16x3_ssse3_do_jump + dd .vpx_sad16x16x3_ssse3_aligned_by_8 - .vpx_sad16x16x3_ssse3_do_jump + dd .vpx_sad16x16x3_ssse3_aligned_by_9 - .vpx_sad16x16x3_ssse3_do_jump + dd .vpx_sad16x16x3_ssse3_aligned_by_10 - .vpx_sad16x16x3_ssse3_do_jump + dd .vpx_sad16x16x3_ssse3_aligned_by_11 - .vpx_sad16x16x3_ssse3_do_jump + dd .vpx_sad16x16x3_ssse3_aligned_by_12 - .vpx_sad16x16x3_ssse3_do_jump + dd .vpx_sad16x16x3_ssse3_aligned_by_13 - .vpx_sad16x16x3_ssse3_do_jump + dd .vpx_sad16x16x3_ssse3_aligned_by_14 - .vpx_sad16x16x3_ssse3_do_jump + dd .vpx_sad16x16x3_ssse3_aligned_by_15 - .vpx_sad16x16x3_ssse3_do_jump +.vpx_sad16x16x3_ssse3_skiptable: - call .vp8_sad16x16x3_ssse3_do_jump -.vp8_sad16x16x3_ssse3_do_jump: + call .vpx_sad16x16x3_ssse3_do_jump +.vpx_sad16x16x3_ssse3_do_jump: pop rcx ; get the address of do_jump - mov rax, .vp8_sad16x16x3_ssse3_jumptable - .vp8_sad16x16x3_ssse3_do_jump - add rax, rcx ; get the absolute address of vp8_sad16x16x3_ssse3_jumptable + mov rax, .vpx_sad16x16x3_ssse3_jumptable - .vpx_sad16x16x3_ssse3_do_jump + add rax, rcx ; get the absolute address of vpx_sad16x16x3_ssse3_jumptable movsxd rax, dword [rax + 4*rdx] ; get the 32 bit offset from the jumptable add rcx, rax @@ -203,23 +203,23 @@ sym(vp8_sad16x16x3_ssse3): jmp rcx - PROCESS_16X16X3_OFFSET 0, .vp8_sad16x16x3_ssse3 - PROCESS_16X16X3_OFFSET 1, .vp8_sad16x16x3_ssse3 - PROCESS_16X16X3_OFFSET 2, .vp8_sad16x16x3_ssse3 - PROCESS_16X16X3_OFFSET 3, .vp8_sad16x16x3_ssse3 - PROCESS_16X16X3_OFFSET 4, .vp8_sad16x16x3_ssse3 - PROCESS_16X16X3_OFFSET 5, .vp8_sad16x16x3_ssse3 - PROCESS_16X16X3_OFFSET 6, .vp8_sad16x16x3_ssse3 - PROCESS_16X16X3_OFFSET 7, .vp8_sad16x16x3_ssse3 - PROCESS_16X16X3_OFFSET 8, .vp8_sad16x16x3_ssse3 - PROCESS_16X16X3_OFFSET 9, .vp8_sad16x16x3_ssse3 - PROCESS_16X16X3_OFFSET 10, .vp8_sad16x16x3_ssse3 - PROCESS_16X16X3_OFFSET 11, .vp8_sad16x16x3_ssse3 - PROCESS_16X16X3_OFFSET 12, .vp8_sad16x16x3_ssse3 - PROCESS_16X16X3_OFFSET 13, .vp8_sad16x16x3_ssse3 - PROCESS_16X16X3_OFFSET 14, .vp8_sad16x16x3_ssse3 + PROCESS_16X16X3_OFFSET 0, .vpx_sad16x16x3_ssse3 + PROCESS_16X16X3_OFFSET 1, .vpx_sad16x16x3_ssse3 + PROCESS_16X16X3_OFFSET 2, .vpx_sad16x16x3_ssse3 + PROCESS_16X16X3_OFFSET 3, .vpx_sad16x16x3_ssse3 + PROCESS_16X16X3_OFFSET 4, .vpx_sad16x16x3_ssse3 + PROCESS_16X16X3_OFFSET 5, .vpx_sad16x16x3_ssse3 + PROCESS_16X16X3_OFFSET 6, .vpx_sad16x16x3_ssse3 + PROCESS_16X16X3_OFFSET 7, .vpx_sad16x16x3_ssse3 + PROCESS_16X16X3_OFFSET 8, .vpx_sad16x16x3_ssse3 + PROCESS_16X16X3_OFFSET 9, .vpx_sad16x16x3_ssse3 + PROCESS_16X16X3_OFFSET 10, .vpx_sad16x16x3_ssse3 + PROCESS_16X16X3_OFFSET 11, .vpx_sad16x16x3_ssse3 + PROCESS_16X16X3_OFFSET 12, .vpx_sad16x16x3_ssse3 + PROCESS_16X16X3_OFFSET 13, .vpx_sad16x16x3_ssse3 + PROCESS_16X16X3_OFFSET 14, .vpx_sad16x16x3_ssse3 -.vp8_sad16x16x3_ssse3_aligned_by_15: +.vpx_sad16x16x3_ssse3_aligned_by_15: PROCESS_16X2X3 1 PROCESS_16X2X3 0 PROCESS_16X2X3 0 @@ -229,7 +229,7 @@ sym(vp8_sad16x16x3_ssse3): PROCESS_16X2X3 0 PROCESS_16X2X3 0 -.vp8_sad16x16x3_ssse3_store_off: +.vpx_sad16x16x3_ssse3_store_off: mov rdi, arg(4) ;Results movq xmm0, xmm5 @@ -259,14 +259,14 @@ sym(vp8_sad16x16x3_ssse3): pop rbp ret -;void int vp8_sad16x8x3_ssse3( +;void int vpx_sad16x8x3_ssse3( ; unsigned char *src_ptr, ; int src_stride, ; unsigned char *ref_ptr, ; int ref_stride, ; int *results) -global sym(vp8_sad16x8x3_ssse3) PRIVATE -sym(vp8_sad16x8x3_ssse3): +global sym(vpx_sad16x8x3_ssse3) PRIVATE +sym(vpx_sad16x8x3_ssse3): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 5 @@ -282,31 +282,31 @@ sym(vp8_sad16x8x3_ssse3): mov rdx, 0xf and rdx, rdi - jmp .vp8_sad16x8x3_ssse3_skiptable -.vp8_sad16x8x3_ssse3_jumptable: - dd .vp8_sad16x8x3_ssse3_aligned_by_0 - .vp8_sad16x8x3_ssse3_do_jump - dd .vp8_sad16x8x3_ssse3_aligned_by_1 - .vp8_sad16x8x3_ssse3_do_jump - dd .vp8_sad16x8x3_ssse3_aligned_by_2 - .vp8_sad16x8x3_ssse3_do_jump - dd .vp8_sad16x8x3_ssse3_aligned_by_3 - .vp8_sad16x8x3_ssse3_do_jump - dd .vp8_sad16x8x3_ssse3_aligned_by_4 - .vp8_sad16x8x3_ssse3_do_jump - dd .vp8_sad16x8x3_ssse3_aligned_by_5 - .vp8_sad16x8x3_ssse3_do_jump - dd .vp8_sad16x8x3_ssse3_aligned_by_6 - .vp8_sad16x8x3_ssse3_do_jump - dd .vp8_sad16x8x3_ssse3_aligned_by_7 - .vp8_sad16x8x3_ssse3_do_jump - dd .vp8_sad16x8x3_ssse3_aligned_by_8 - .vp8_sad16x8x3_ssse3_do_jump - dd .vp8_sad16x8x3_ssse3_aligned_by_9 - .vp8_sad16x8x3_ssse3_do_jump - dd .vp8_sad16x8x3_ssse3_aligned_by_10 - .vp8_sad16x8x3_ssse3_do_jump - dd .vp8_sad16x8x3_ssse3_aligned_by_11 - .vp8_sad16x8x3_ssse3_do_jump - dd .vp8_sad16x8x3_ssse3_aligned_by_12 - .vp8_sad16x8x3_ssse3_do_jump - dd .vp8_sad16x8x3_ssse3_aligned_by_13 - .vp8_sad16x8x3_ssse3_do_jump - dd .vp8_sad16x8x3_ssse3_aligned_by_14 - .vp8_sad16x8x3_ssse3_do_jump - dd .vp8_sad16x8x3_ssse3_aligned_by_15 - .vp8_sad16x8x3_ssse3_do_jump -.vp8_sad16x8x3_ssse3_skiptable: + jmp .vpx_sad16x8x3_ssse3_skiptable +.vpx_sad16x8x3_ssse3_jumptable: + dd .vpx_sad16x8x3_ssse3_aligned_by_0 - .vpx_sad16x8x3_ssse3_do_jump + dd .vpx_sad16x8x3_ssse3_aligned_by_1 - .vpx_sad16x8x3_ssse3_do_jump + dd .vpx_sad16x8x3_ssse3_aligned_by_2 - .vpx_sad16x8x3_ssse3_do_jump + dd .vpx_sad16x8x3_ssse3_aligned_by_3 - .vpx_sad16x8x3_ssse3_do_jump + dd .vpx_sad16x8x3_ssse3_aligned_by_4 - .vpx_sad16x8x3_ssse3_do_jump + dd .vpx_sad16x8x3_ssse3_aligned_by_5 - .vpx_sad16x8x3_ssse3_do_jump + dd .vpx_sad16x8x3_ssse3_aligned_by_6 - .vpx_sad16x8x3_ssse3_do_jump + dd .vpx_sad16x8x3_ssse3_aligned_by_7 - .vpx_sad16x8x3_ssse3_do_jump + dd .vpx_sad16x8x3_ssse3_aligned_by_8 - .vpx_sad16x8x3_ssse3_do_jump + dd .vpx_sad16x8x3_ssse3_aligned_by_9 - .vpx_sad16x8x3_ssse3_do_jump + dd .vpx_sad16x8x3_ssse3_aligned_by_10 - .vpx_sad16x8x3_ssse3_do_jump + dd .vpx_sad16x8x3_ssse3_aligned_by_11 - .vpx_sad16x8x3_ssse3_do_jump + dd .vpx_sad16x8x3_ssse3_aligned_by_12 - .vpx_sad16x8x3_ssse3_do_jump + dd .vpx_sad16x8x3_ssse3_aligned_by_13 - .vpx_sad16x8x3_ssse3_do_jump + dd .vpx_sad16x8x3_ssse3_aligned_by_14 - .vpx_sad16x8x3_ssse3_do_jump + dd .vpx_sad16x8x3_ssse3_aligned_by_15 - .vpx_sad16x8x3_ssse3_do_jump +.vpx_sad16x8x3_ssse3_skiptable: - call .vp8_sad16x8x3_ssse3_do_jump -.vp8_sad16x8x3_ssse3_do_jump: + call .vpx_sad16x8x3_ssse3_do_jump +.vpx_sad16x8x3_ssse3_do_jump: pop rcx ; get the address of do_jump - mov rax, .vp8_sad16x8x3_ssse3_jumptable - .vp8_sad16x8x3_ssse3_do_jump - add rax, rcx ; get the absolute address of vp8_sad16x8x3_ssse3_jumptable + mov rax, .vpx_sad16x8x3_ssse3_jumptable - .vpx_sad16x8x3_ssse3_do_jump + add rax, rcx ; get the absolute address of vpx_sad16x8x3_ssse3_jumptable movsxd rax, dword [rax + 4*rdx] ; get the 32 bit offset from the jumptable add rcx, rax @@ -316,30 +316,30 @@ sym(vp8_sad16x8x3_ssse3): jmp rcx - PROCESS_16X8X3_OFFSET 0, .vp8_sad16x8x3_ssse3 - PROCESS_16X8X3_OFFSET 1, .vp8_sad16x8x3_ssse3 - PROCESS_16X8X3_OFFSET 2, .vp8_sad16x8x3_ssse3 - PROCESS_16X8X3_OFFSET 3, .vp8_sad16x8x3_ssse3 - PROCESS_16X8X3_OFFSET 4, .vp8_sad16x8x3_ssse3 - PROCESS_16X8X3_OFFSET 5, .vp8_sad16x8x3_ssse3 - PROCESS_16X8X3_OFFSET 6, .vp8_sad16x8x3_ssse3 - PROCESS_16X8X3_OFFSET 7, .vp8_sad16x8x3_ssse3 - PROCESS_16X8X3_OFFSET 8, .vp8_sad16x8x3_ssse3 - PROCESS_16X8X3_OFFSET 9, .vp8_sad16x8x3_ssse3 - PROCESS_16X8X3_OFFSET 10, .vp8_sad16x8x3_ssse3 - PROCESS_16X8X3_OFFSET 11, .vp8_sad16x8x3_ssse3 - PROCESS_16X8X3_OFFSET 12, .vp8_sad16x8x3_ssse3 - PROCESS_16X8X3_OFFSET 13, .vp8_sad16x8x3_ssse3 - PROCESS_16X8X3_OFFSET 14, .vp8_sad16x8x3_ssse3 + PROCESS_16X8X3_OFFSET 0, .vpx_sad16x8x3_ssse3 + PROCESS_16X8X3_OFFSET 1, .vpx_sad16x8x3_ssse3 + PROCESS_16X8X3_OFFSET 2, .vpx_sad16x8x3_ssse3 + PROCESS_16X8X3_OFFSET 3, .vpx_sad16x8x3_ssse3 + PROCESS_16X8X3_OFFSET 4, .vpx_sad16x8x3_ssse3 + PROCESS_16X8X3_OFFSET 5, .vpx_sad16x8x3_ssse3 + PROCESS_16X8X3_OFFSET 6, .vpx_sad16x8x3_ssse3 + PROCESS_16X8X3_OFFSET 7, .vpx_sad16x8x3_ssse3 + PROCESS_16X8X3_OFFSET 8, .vpx_sad16x8x3_ssse3 + PROCESS_16X8X3_OFFSET 9, .vpx_sad16x8x3_ssse3 + PROCESS_16X8X3_OFFSET 10, .vpx_sad16x8x3_ssse3 + PROCESS_16X8X3_OFFSET 11, .vpx_sad16x8x3_ssse3 + PROCESS_16X8X3_OFFSET 12, .vpx_sad16x8x3_ssse3 + PROCESS_16X8X3_OFFSET 13, .vpx_sad16x8x3_ssse3 + PROCESS_16X8X3_OFFSET 14, .vpx_sad16x8x3_ssse3 -.vp8_sad16x8x3_ssse3_aligned_by_15: +.vpx_sad16x8x3_ssse3_aligned_by_15: PROCESS_16X2X3 1 PROCESS_16X2X3 0 PROCESS_16X2X3 0 PROCESS_16X2X3 0 -.vp8_sad16x8x3_ssse3_store_off: +.vpx_sad16x8x3_ssse3_store_off: mov rdi, arg(4) ;Results movq xmm0, xmm5 diff --git a/media/libvpx/vpx_dsp/x86/variance_avx2.c b/media/libvpx/vpx_dsp/x86/variance_avx2.c new file mode 100644 index 00000000000..82cef4af0af --- /dev/null +++ b/media/libvpx/vpx_dsp/x86/variance_avx2.c @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "./vpx_dsp_rtcd.h" + +typedef void (*get_var_avx2)(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, + unsigned int *sse, int *sum); + +void vpx_get32x32var_avx2(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, + unsigned int *sse, int *sum); + +static void variance_avx2(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, + int w, int h, unsigned int *sse, int *sum, + get_var_avx2 var_fn, int block_size) { + int i, j; + + *sse = 0; + *sum = 0; + + for (i = 0; i < h; i += 16) { + for (j = 0; j < w; j += block_size) { + unsigned int sse0; + int sum0; + var_fn(&src[src_stride * i + j], src_stride, + &ref[ref_stride * i + j], ref_stride, &sse0, &sum0); + *sse += sse0; + *sum += sum0; + } + } +} + + +unsigned int vpx_variance16x16_avx2(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, + unsigned int *sse) { + int sum; + variance_avx2(src, src_stride, ref, ref_stride, 16, 16, + sse, &sum, vpx_get16x16var_avx2, 16); + return *sse - (((unsigned int)sum * sum) >> 8); +} + +unsigned int vpx_mse16x16_avx2(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, + unsigned int *sse) { + int sum; + vpx_get16x16var_avx2(src, src_stride, ref, ref_stride, sse, &sum); + return *sse; +} + +unsigned int vpx_variance32x16_avx2(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, + unsigned int *sse) { + int sum; + variance_avx2(src, src_stride, ref, ref_stride, 32, 16, + sse, &sum, vpx_get32x32var_avx2, 32); + return *sse - (((int64_t)sum * sum) >> 9); +} + +unsigned int vpx_variance32x32_avx2(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, + unsigned int *sse) { + int sum; + variance_avx2(src, src_stride, ref, ref_stride, 32, 32, + sse, &sum, vpx_get32x32var_avx2, 32); + return *sse - (((int64_t)sum * sum) >> 10); +} + +unsigned int vpx_variance64x64_avx2(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, + unsigned int *sse) { + int sum; + variance_avx2(src, src_stride, ref, ref_stride, 64, 64, + sse, &sum, vpx_get32x32var_avx2, 32); + return *sse - (((int64_t)sum * sum) >> 12); +} + +unsigned int vpx_variance64x32_avx2(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, + unsigned int *sse) { + int sum; + variance_avx2(src, src_stride, ref, ref_stride, 64, 32, + sse, &sum, vpx_get32x32var_avx2, 32); + return *sse - (((int64_t)sum * sum) >> 11); +} diff --git a/media/libvpx/vp9/encoder/x86/vp9_variance_impl_intrin_avx2.c b/media/libvpx/vpx_dsp/x86/variance_impl_avx2.c similarity index 98% rename from media/libvpx/vp9/encoder/x86/vp9_variance_impl_intrin_avx2.c rename to media/libvpx/vpx_dsp/x86/variance_impl_avx2.c index f9923280a34..0e40959aa9d 100644 --- a/media/libvpx/vp9/encoder/x86/vp9_variance_impl_intrin_avx2.c +++ b/media/libvpx/vpx_dsp/x86/variance_impl_avx2.c @@ -10,7 +10,9 @@ #include // AVX2 -void vp9_get16x16var_avx2(const unsigned char *src_ptr, +#include "./vpx_dsp_rtcd.h" + +void vpx_get16x16var_avx2(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int recon_stride, @@ -121,7 +123,7 @@ void vp9_get16x16var_avx2(const unsigned char *src_ptr, } } -void vp9_get32x32var_avx2(const unsigned char *src_ptr, +void vpx_get32x32var_avx2(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int recon_stride, diff --git a/media/libvpx/vp8/common/x86/variance_impl_mmx.asm b/media/libvpx/vpx_dsp/x86/variance_impl_mmx.asm similarity index 52% rename from media/libvpx/vp8/common/x86/variance_impl_mmx.asm rename to media/libvpx/vpx_dsp/x86/variance_impl_mmx.asm index 7d5e6810bf0..a8d7d99dbc0 100644 --- a/media/libvpx/vp8/common/x86/variance_impl_mmx.asm +++ b/media/libvpx/vpx_dsp/x86/variance_impl_mmx.asm @@ -11,9 +11,9 @@ %include "vpx_ports/x86_abi_support.asm" -;unsigned int vp8_get_mb_ss_mmx( short *src_ptr ) -global sym(vp8_get_mb_ss_mmx) PRIVATE -sym(vp8_get_mb_ss_mmx): +;unsigned int vpx_get_mb_ss_mmx( short *src_ptr ) +global sym(vpx_get_mb_ss_mmx) PRIVATE +sym(vpx_get_mb_ss_mmx): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 7 @@ -63,7 +63,7 @@ sym(vp8_get_mb_ss_mmx): ret -;unsigned int vp8_get8x8var_mmx +;void vpx_get8x8var_mmx ;( ; unsigned char *src_ptr, ; int source_stride, @@ -72,8 +72,8 @@ sym(vp8_get_mb_ss_mmx): ; unsigned int *SSE, ; int *Sum ;) -global sym(vp8_get8x8var_mmx) PRIVATE -sym(vp8_get8x8var_mmx): +global sym(vpx_get8x8var_mmx) PRIVATE +sym(vpx_get8x8var_mmx): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 @@ -310,8 +310,8 @@ sym(vp8_get8x8var_mmx): -;unsigned int -;vp8_get4x4var_mmx +;void +;vpx_get4x4var_mmx ;( ; unsigned char *src_ptr, ; int source_stride, @@ -320,8 +320,8 @@ sym(vp8_get8x8var_mmx): ; unsigned int *SSE, ; int *Sum ;) -global sym(vp8_get4x4var_mmx) PRIVATE -sym(vp8_get4x4var_mmx): +global sym(vpx_get4x4var_mmx) PRIVATE +sym(vpx_get4x4var_mmx): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 @@ -422,430 +422,3 @@ sym(vp8_get4x4var_mmx): UNSHADOW_ARGS pop rbp ret - - - -;unsigned int -;vp8_get4x4sse_cs_mmx -;( -; unsigned char *src_ptr, -; int source_stride, -; unsigned char *ref_ptr, -; int recon_stride -;) -global sym(vp8_get4x4sse_cs_mmx) PRIVATE -sym(vp8_get4x4sse_cs_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 4 - push rsi - push rdi - push rbx - ; end prolog - - - pxor mm6, mm6 ; Blank mmx7 - pxor mm7, mm7 ; Blank mmx7 - - mov rax, arg(0) ;[src_ptr] ; Load base addresses - mov rbx, arg(2) ;[ref_ptr] - movsxd rcx, dword ptr arg(1) ;[source_stride] - movsxd rdx, dword ptr arg(3) ;[recon_stride] - ; Row 1 - movd mm0, [rax] ; Copy eight bytes to mm0 - movd mm1, [rbx] ; Copy eight bytes to mm1 - punpcklbw mm0, mm6 ; unpack to higher prrcision - punpcklbw mm1, mm6 - psubsw mm0, mm1 ; A-B (low order) to MM0 - pmaddwd mm0, mm0 ; square and accumulate - add rbx,rdx ; Inc pointer into ref data - add rax,rcx ; Inc pointer into the new data - movd mm1, [rbx] ; Copy eight bytes to mm1 - paddd mm7, mm0 ; accumulate in mm7 - - ; Row 2 - movd mm0, [rax] ; Copy eight bytes to mm0 - punpcklbw mm0, mm6 ; unpack to higher prrcision - punpcklbw mm1, mm6 - psubsw mm0, mm1 ; A-B (low order) to MM0 - pmaddwd mm0, mm0 ; square and accumulate - add rbx,rdx ; Inc pointer into ref data - add rax,rcx ; Inc pointer into the new data - movd mm1, [rbx] ; Copy eight bytes to mm1 - paddd mm7, mm0 ; accumulate in mm7 - - ; Row 3 - movd mm0, [rax] ; Copy eight bytes to mm0 - punpcklbw mm1, mm6 - punpcklbw mm0, mm6 ; unpack to higher prrcision - psubsw mm0, mm1 ; A-B (low order) to MM0 - - pmaddwd mm0, mm0 ; square and accumulate - add rbx,rdx ; Inc pointer into ref data - add rax,rcx ; Inc pointer into the new data - movd mm1, [rbx] ; Copy eight bytes to mm1 - paddd mm7, mm0 ; accumulate in mm7 - - ; Row 4 - movd mm0, [rax] ; Copy eight bytes to mm0 - punpcklbw mm0, mm6 ; unpack to higher prrcision - punpcklbw mm1, mm6 - psubsw mm0, mm1 ; A-B (low order) to MM0 - pmaddwd mm0, mm0 ; square and accumulate - paddd mm7, mm0 ; accumulate in mm7 - - movq mm0, mm7 ; - psrlq mm7, 32 - - paddd mm0, mm7 - movq rax, mm0 - - - ; begin epilog - pop rbx - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - -%define mmx_filter_shift 7 - -;void vp8_filter_block2d_bil4x4_var_mmx -;( -; unsigned char *ref_ptr, -; int ref_pixels_per_line, -; unsigned char *src_ptr, -; int src_pixels_per_line, -; unsigned short *HFilter, -; unsigned short *VFilter, -; int *sum, -; unsigned int *sumsquared -;) -global sym(vp8_filter_block2d_bil4x4_var_mmx) PRIVATE -sym(vp8_filter_block2d_bil4x4_var_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 8 - GET_GOT rbx - push rsi - push rdi - sub rsp, 16 - ; end prolog - - - pxor mm6, mm6 ; - pxor mm7, mm7 ; - - mov rax, arg(4) ;HFilter ; - mov rdx, arg(5) ;VFilter ; - - mov rsi, arg(0) ;ref_ptr ; - mov rdi, arg(2) ;src_ptr ; - - mov rcx, 4 ; - pxor mm0, mm0 ; - - movd mm1, [rsi] ; - movd mm3, [rsi+1] ; - - punpcklbw mm1, mm0 ; - pmullw mm1, [rax] ; - - punpcklbw mm3, mm0 ; - pmullw mm3, [rax+8] ; - - paddw mm1, mm3 ; - paddw mm1, [GLOBAL(mmx_bi_rd)] ; - - psraw mm1, mmx_filter_shift ; - movq mm5, mm1 - -%if ABI_IS_32BIT - add rsi, dword ptr arg(1) ;ref_pixels_per_line ; -%else - movsxd r8, dword ptr arg(1) ;ref_pixels_per_line ; - add rsi, r8 -%endif - -.filter_block2d_bil4x4_var_mmx_loop: - - movd mm1, [rsi] ; - movd mm3, [rsi+1] ; - - punpcklbw mm1, mm0 ; - pmullw mm1, [rax] ; - - punpcklbw mm3, mm0 ; - pmullw mm3, [rax+8] ; - - paddw mm1, mm3 ; - paddw mm1, [GLOBAL(mmx_bi_rd)] ; - - psraw mm1, mmx_filter_shift ; - movq mm3, mm5 ; - - movq mm5, mm1 ; - pmullw mm3, [rdx] ; - - pmullw mm1, [rdx+8] ; - paddw mm1, mm3 ; - - - paddw mm1, [GLOBAL(mmx_bi_rd)] ; - psraw mm1, mmx_filter_shift ; - - movd mm3, [rdi] ; - punpcklbw mm3, mm0 ; - - psubw mm1, mm3 ; - paddw mm6, mm1 ; - - pmaddwd mm1, mm1 ; - paddd mm7, mm1 ; - -%if ABI_IS_32BIT - add rsi, dword ptr arg(1) ;ref_pixels_per_line ; - add rdi, dword ptr arg(3) ;src_pixels_per_line ; -%else - movsxd r8, dword ptr arg(1) ;ref_pixels_per_line - movsxd r9, dword ptr arg(3) ;src_pixels_per_line - add rsi, r8 - add rdi, r9 -%endif - sub rcx, 1 ; - jnz .filter_block2d_bil4x4_var_mmx_loop ; - - - pxor mm3, mm3 ; - pxor mm2, mm2 ; - - punpcklwd mm2, mm6 ; - punpckhwd mm3, mm6 ; - - paddd mm2, mm3 ; - movq mm6, mm2 ; - - psrlq mm6, 32 ; - paddd mm2, mm6 ; - - psrad mm2, 16 ; - movq mm4, mm7 ; - - psrlq mm4, 32 ; - paddd mm4, mm7 ; - - mov rdi, arg(6) ;sum - mov rsi, arg(7) ;sumsquared - - movd dword ptr [rdi], mm2 ; - movd dword ptr [rsi], mm4 ; - - - - ; begin epilog - add rsp, 16 - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - - - - -;void vp8_filter_block2d_bil_var_mmx -;( -; unsigned char *ref_ptr, -; int ref_pixels_per_line, -; unsigned char *src_ptr, -; int src_pixels_per_line, -; unsigned int Height, -; unsigned short *HFilter, -; unsigned short *VFilter, -; int *sum, -; unsigned int *sumsquared -;) -global sym(vp8_filter_block2d_bil_var_mmx) PRIVATE -sym(vp8_filter_block2d_bil_var_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 9 - GET_GOT rbx - push rsi - push rdi - sub rsp, 16 - ; end prolog - - pxor mm6, mm6 ; - pxor mm7, mm7 ; - mov rax, arg(5) ;HFilter ; - - mov rdx, arg(6) ;VFilter ; - mov rsi, arg(0) ;ref_ptr ; - - mov rdi, arg(2) ;src_ptr ; - movsxd rcx, dword ptr arg(4) ;Height ; - - pxor mm0, mm0 ; - movq mm1, [rsi] ; - - movq mm3, [rsi+1] ; - movq mm2, mm1 ; - - movq mm4, mm3 ; - punpcklbw mm1, mm0 ; - - punpckhbw mm2, mm0 ; - pmullw mm1, [rax] ; - - pmullw mm2, [rax] ; - punpcklbw mm3, mm0 ; - - punpckhbw mm4, mm0 ; - pmullw mm3, [rax+8] ; - - pmullw mm4, [rax+8] ; - paddw mm1, mm3 ; - - paddw mm2, mm4 ; - paddw mm1, [GLOBAL(mmx_bi_rd)] ; - - psraw mm1, mmx_filter_shift ; - paddw mm2, [GLOBAL(mmx_bi_rd)] ; - - psraw mm2, mmx_filter_shift ; - movq mm5, mm1 - - packuswb mm5, mm2 ; -%if ABI_IS_32BIT - add rsi, dword ptr arg(1) ;ref_pixels_per_line -%else - movsxd r8, dword ptr arg(1) ;ref_pixels_per_line - add rsi, r8 -%endif - -.filter_block2d_bil_var_mmx_loop: - - movq mm1, [rsi] ; - movq mm3, [rsi+1] ; - - movq mm2, mm1 ; - movq mm4, mm3 ; - - punpcklbw mm1, mm0 ; - punpckhbw mm2, mm0 ; - - pmullw mm1, [rax] ; - pmullw mm2, [rax] ; - - punpcklbw mm3, mm0 ; - punpckhbw mm4, mm0 ; - - pmullw mm3, [rax+8] ; - pmullw mm4, [rax+8] ; - - paddw mm1, mm3 ; - paddw mm2, mm4 ; - - paddw mm1, [GLOBAL(mmx_bi_rd)] ; - psraw mm1, mmx_filter_shift ; - - paddw mm2, [GLOBAL(mmx_bi_rd)] ; - psraw mm2, mmx_filter_shift ; - - movq mm3, mm5 ; - movq mm4, mm5 ; - - punpcklbw mm3, mm0 ; - punpckhbw mm4, mm0 ; - - movq mm5, mm1 ; - packuswb mm5, mm2 ; - - pmullw mm3, [rdx] ; - pmullw mm4, [rdx] ; - - pmullw mm1, [rdx+8] ; - pmullw mm2, [rdx+8] ; - - paddw mm1, mm3 ; - paddw mm2, mm4 ; - - paddw mm1, [GLOBAL(mmx_bi_rd)] ; - paddw mm2, [GLOBAL(mmx_bi_rd)] ; - - psraw mm1, mmx_filter_shift ; - psraw mm2, mmx_filter_shift ; - - movq mm3, [rdi] ; - movq mm4, mm3 ; - - punpcklbw mm3, mm0 ; - punpckhbw mm4, mm0 ; - - psubw mm1, mm3 ; - psubw mm2, mm4 ; - - paddw mm6, mm1 ; - pmaddwd mm1, mm1 ; - - paddw mm6, mm2 ; - pmaddwd mm2, mm2 ; - - paddd mm7, mm1 ; - paddd mm7, mm2 ; - -%if ABI_IS_32BIT - add rsi, dword ptr arg(1) ;ref_pixels_per_line ; - add rdi, dword ptr arg(3) ;src_pixels_per_line ; -%else - movsxd r8, dword ptr arg(1) ;ref_pixels_per_line ; - movsxd r9, dword ptr arg(3) ;src_pixels_per_line ; - add rsi, r8 - add rdi, r9 -%endif - sub rcx, 1 ; - jnz .filter_block2d_bil_var_mmx_loop ; - - - pxor mm3, mm3 ; - pxor mm2, mm2 ; - - punpcklwd mm2, mm6 ; - punpckhwd mm3, mm6 ; - - paddd mm2, mm3 ; - movq mm6, mm2 ; - - psrlq mm6, 32 ; - paddd mm2, mm6 ; - - psrad mm2, 16 ; - movq mm4, mm7 ; - - psrlq mm4, 32 ; - paddd mm4, mm7 ; - - mov rdi, arg(7) ;sum - mov rsi, arg(8) ;sumsquared - - movd dword ptr [rdi], mm2 ; - movd dword ptr [rsi], mm4 ; - - ; begin epilog - add rsp, 16 - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - - -SECTION_RODATA -;short mmx_bi_rd[4] = { 64, 64, 64, 64}; -align 16 -mmx_bi_rd: - times 4 dw 64 diff --git a/media/libvpx/vpx_dsp/x86/variance_mmx.c b/media/libvpx/vpx_dsp/x86/variance_mmx.c new file mode 100644 index 00000000000..99dd741bca5 --- /dev/null +++ b/media/libvpx/vpx_dsp/x86/variance_mmx.c @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "./vpx_dsp_rtcd.h" + +extern void vpx_get4x4var_mmx(const uint8_t *a, int a_stride, + const uint8_t *b, int b_stride, + unsigned int *sse, int *sum); + +unsigned int vpx_variance4x4_mmx(const unsigned char *a, int a_stride, + const unsigned char *b, int b_stride, + unsigned int *sse) { + unsigned int var; + int avg; + + vpx_get4x4var_mmx(a, a_stride, b, b_stride, &var, &avg); + *sse = var; + return (var - (((unsigned int)avg * avg) >> 4)); +} + +unsigned int vpx_variance8x8_mmx(const unsigned char *a, int a_stride, + const unsigned char *b, int b_stride, + unsigned int *sse) { + unsigned int var; + int avg; + + vpx_get8x8var_mmx(a, a_stride, b, b_stride, &var, &avg); + *sse = var; + + return (var - (((unsigned int)avg * avg) >> 6)); +} + +unsigned int vpx_mse16x16_mmx(const unsigned char *a, int a_stride, + const unsigned char *b, int b_stride, + unsigned int *sse) { + unsigned int sse0, sse1, sse2, sse3, var; + int sum0, sum1, sum2, sum3; + + vpx_get8x8var_mmx(a, a_stride, b, b_stride, &sse0, &sum0); + vpx_get8x8var_mmx(a + 8, a_stride, b + 8, b_stride, &sse1, &sum1); + vpx_get8x8var_mmx(a + 8 * a_stride, a_stride, + b + 8 * b_stride, b_stride, &sse2, &sum2); + vpx_get8x8var_mmx(a + 8 * a_stride + 8, a_stride, + b + 8 * b_stride + 8, b_stride, &sse3, &sum3); + + var = sse0 + sse1 + sse2 + sse3; + *sse = var; + return var; +} + +unsigned int vpx_variance16x16_mmx(const unsigned char *a, int a_stride, + const unsigned char *b, int b_stride, + unsigned int *sse) { + unsigned int sse0, sse1, sse2, sse3, var; + int sum0, sum1, sum2, sum3, avg; + + vpx_get8x8var_mmx(a, a_stride, b, b_stride, &sse0, &sum0); + vpx_get8x8var_mmx(a + 8, a_stride, b + 8, b_stride, &sse1, &sum1); + vpx_get8x8var_mmx(a + 8 * a_stride, a_stride, + b + 8 * b_stride, b_stride, &sse2, &sum2); + vpx_get8x8var_mmx(a + 8 * a_stride + 8, a_stride, + b + 8 * b_stride + 8, b_stride, &sse3, &sum3); + + var = sse0 + sse1 + sse2 + sse3; + avg = sum0 + sum1 + sum2 + sum3; + *sse = var; + return (var - (((unsigned int)avg * avg) >> 8)); +} + +unsigned int vpx_variance16x8_mmx(const unsigned char *a, int a_stride, + const unsigned char *b, int b_stride, + unsigned int *sse) { + unsigned int sse0, sse1, var; + int sum0, sum1, avg; + + vpx_get8x8var_mmx(a, a_stride, b, b_stride, &sse0, &sum0); + vpx_get8x8var_mmx(a + 8, a_stride, b + 8, b_stride, &sse1, &sum1); + + var = sse0 + sse1; + avg = sum0 + sum1; + *sse = var; + return (var - (((unsigned int)avg * avg) >> 7)); +} + +unsigned int vpx_variance8x16_mmx(const unsigned char *a, int a_stride, + const unsigned char *b, int b_stride, + unsigned int *sse) { + unsigned int sse0, sse1, var; + int sum0, sum1, avg; + + vpx_get8x8var_mmx(a, a_stride, b, b_stride, &sse0, &sum0); + vpx_get8x8var_mmx(a + 8 * a_stride, a_stride, + b + 8 * b_stride, b_stride, &sse1, &sum1); + + var = sse0 + sse1; + avg = sum0 + sum1; + *sse = var; + + return (var - (((unsigned int)avg * avg) >> 7)); +} diff --git a/media/libvpx/vpx_dsp/x86/variance_sse2.c b/media/libvpx/vpx_dsp/x86/variance_sse2.c new file mode 100644 index 00000000000..6256bc53621 --- /dev/null +++ b/media/libvpx/vpx_dsp/x86/variance_sse2.c @@ -0,0 +1,309 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include // SSE2 + +#include "./vpx_config.h" +#include "./vpx_dsp_rtcd.h" + +#include "vpx_ports/mem.h" + +typedef void (*getNxMvar_fn_t) (const unsigned char *src, int src_stride, + const unsigned char *ref, int ref_stride, + unsigned int *sse, int *sum); + +unsigned int vpx_get_mb_ss_sse2(const int16_t *src) { + __m128i vsum = _mm_setzero_si128(); + int i; + + for (i = 0; i < 32; ++i) { + const __m128i v = _mm_loadu_si128((const __m128i *)src); + vsum = _mm_add_epi32(vsum, _mm_madd_epi16(v, v)); + src += 8; + } + + vsum = _mm_add_epi32(vsum, _mm_srli_si128(vsum, 8)); + vsum = _mm_add_epi32(vsum, _mm_srli_si128(vsum, 4)); + return _mm_cvtsi128_si32(vsum); +} + +#define READ64(p, stride, i) \ + _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(const uint32_t *)(p + i * stride)), \ + _mm_cvtsi32_si128(*(const uint32_t *)(p + (i + 1) * stride))) + +static void get4x4var_sse2(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, + unsigned int *sse, int *sum) { + const __m128i zero = _mm_setzero_si128(); + const __m128i src0 = _mm_unpacklo_epi8(READ64(src, src_stride, 0), zero); + const __m128i src1 = _mm_unpacklo_epi8(READ64(src, src_stride, 2), zero); + const __m128i ref0 = _mm_unpacklo_epi8(READ64(ref, ref_stride, 0), zero); + const __m128i ref1 = _mm_unpacklo_epi8(READ64(ref, ref_stride, 2), zero); + const __m128i diff0 = _mm_sub_epi16(src0, ref0); + const __m128i diff1 = _mm_sub_epi16(src1, ref1); + + // sum + __m128i vsum = _mm_add_epi16(diff0, diff1); + vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 8)); + vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 4)); + vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 2)); + *sum = (int16_t)_mm_extract_epi16(vsum, 0); + + // sse + vsum = _mm_add_epi32(_mm_madd_epi16(diff0, diff0), + _mm_madd_epi16(diff1, diff1)); + vsum = _mm_add_epi32(vsum, _mm_srli_si128(vsum, 8)); + vsum = _mm_add_epi32(vsum, _mm_srli_si128(vsum, 4)); + *sse = _mm_cvtsi128_si32(vsum); +} + +void vpx_get8x8var_sse2(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, + unsigned int *sse, int *sum) { + const __m128i zero = _mm_setzero_si128(); + __m128i vsum = _mm_setzero_si128(); + __m128i vsse = _mm_setzero_si128(); + int i; + + for (i = 0; i < 8; i += 2) { + const __m128i src0 = _mm_unpacklo_epi8(_mm_loadl_epi64( + (const __m128i *)(src + i * src_stride)), zero); + const __m128i ref0 = _mm_unpacklo_epi8(_mm_loadl_epi64( + (const __m128i *)(ref + i * ref_stride)), zero); + const __m128i diff0 = _mm_sub_epi16(src0, ref0); + + const __m128i src1 = _mm_unpacklo_epi8(_mm_loadl_epi64( + (const __m128i *)(src + (i + 1) * src_stride)), zero); + const __m128i ref1 = _mm_unpacklo_epi8(_mm_loadl_epi64( + (const __m128i *)(ref + (i + 1) * ref_stride)), zero); + const __m128i diff1 = _mm_sub_epi16(src1, ref1); + + vsum = _mm_add_epi16(vsum, diff0); + vsum = _mm_add_epi16(vsum, diff1); + vsse = _mm_add_epi32(vsse, _mm_madd_epi16(diff0, diff0)); + vsse = _mm_add_epi32(vsse, _mm_madd_epi16(diff1, diff1)); + } + + // sum + vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 8)); + vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 4)); + vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 2)); + *sum = (int16_t)_mm_extract_epi16(vsum, 0); + + // sse + vsse = _mm_add_epi32(vsse, _mm_srli_si128(vsse, 8)); + vsse = _mm_add_epi32(vsse, _mm_srli_si128(vsse, 4)); + *sse = _mm_cvtsi128_si32(vsse); +} + +void vpx_get16x16var_sse2(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, + unsigned int *sse, int *sum) { + const __m128i zero = _mm_setzero_si128(); + __m128i vsum = _mm_setzero_si128(); + __m128i vsse = _mm_setzero_si128(); + int i; + + for (i = 0; i < 16; ++i) { + const __m128i s = _mm_loadu_si128((const __m128i *)src); + const __m128i r = _mm_loadu_si128((const __m128i *)ref); + + const __m128i src0 = _mm_unpacklo_epi8(s, zero); + const __m128i ref0 = _mm_unpacklo_epi8(r, zero); + const __m128i diff0 = _mm_sub_epi16(src0, ref0); + + const __m128i src1 = _mm_unpackhi_epi8(s, zero); + const __m128i ref1 = _mm_unpackhi_epi8(r, zero); + const __m128i diff1 = _mm_sub_epi16(src1, ref1); + + vsum = _mm_add_epi16(vsum, diff0); + vsum = _mm_add_epi16(vsum, diff1); + vsse = _mm_add_epi32(vsse, _mm_madd_epi16(diff0, diff0)); + vsse = _mm_add_epi32(vsse, _mm_madd_epi16(diff1, diff1)); + + src += src_stride; + ref += ref_stride; + } + + // sum + vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 8)); + vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 4)); + *sum = (int16_t)_mm_extract_epi16(vsum, 0) + + (int16_t)_mm_extract_epi16(vsum, 1); + + // sse + vsse = _mm_add_epi32(vsse, _mm_srli_si128(vsse, 8)); + vsse = _mm_add_epi32(vsse, _mm_srli_si128(vsse, 4)); + *sse = _mm_cvtsi128_si32(vsse); +} + + +static void variance_sse2(const unsigned char *src, int src_stride, + const unsigned char *ref, int ref_stride, + int w, int h, unsigned int *sse, int *sum, + getNxMvar_fn_t var_fn, int block_size) { + int i, j; + + *sse = 0; + *sum = 0; + + for (i = 0; i < h; i += block_size) { + for (j = 0; j < w; j += block_size) { + unsigned int sse0; + int sum0; + var_fn(src + src_stride * i + j, src_stride, + ref + ref_stride * i + j, ref_stride, &sse0, &sum0); + *sse += sse0; + *sum += sum0; + } + } +} + +unsigned int vpx_variance4x4_sse2(const unsigned char *src, int src_stride, + const unsigned char *ref, int ref_stride, + unsigned int *sse) { + int sum; + get4x4var_sse2(src, src_stride, ref, ref_stride, sse, &sum); + return *sse - (((unsigned int)sum * sum) >> 4); +} + +unsigned int vpx_variance8x4_sse2(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, + unsigned int *sse) { + int sum; + variance_sse2(src, src_stride, ref, ref_stride, 8, 4, + sse, &sum, get4x4var_sse2, 4); + return *sse - (((unsigned int)sum * sum) >> 5); +} + +unsigned int vpx_variance4x8_sse2(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, + unsigned int *sse) { + int sum; + variance_sse2(src, src_stride, ref, ref_stride, 4, 8, + sse, &sum, get4x4var_sse2, 4); + return *sse - (((unsigned int)sum * sum) >> 5); +} + +unsigned int vpx_variance8x8_sse2(const unsigned char *src, int src_stride, + const unsigned char *ref, int ref_stride, + unsigned int *sse) { + int sum; + vpx_get8x8var_sse2(src, src_stride, ref, ref_stride, sse, &sum); + return *sse - (((unsigned int)sum * sum) >> 6); +} + +unsigned int vpx_variance16x8_sse2(const unsigned char *src, int src_stride, + const unsigned char *ref, int ref_stride, + unsigned int *sse) { + int sum; + variance_sse2(src, src_stride, ref, ref_stride, 16, 8, + sse, &sum, vpx_get8x8var_sse2, 8); + return *sse - (((unsigned int)sum * sum) >> 7); +} + +unsigned int vpx_variance8x16_sse2(const unsigned char *src, int src_stride, + const unsigned char *ref, int ref_stride, + unsigned int *sse) { + int sum; + variance_sse2(src, src_stride, ref, ref_stride, 8, 16, + sse, &sum, vpx_get8x8var_sse2, 8); + return *sse - (((unsigned int)sum * sum) >> 7); +} + +unsigned int vpx_variance16x16_sse2(const unsigned char *src, int src_stride, + const unsigned char *ref, int ref_stride, + unsigned int *sse) { + int sum; + vpx_get16x16var_sse2(src, src_stride, ref, ref_stride, sse, &sum); + return *sse - (((unsigned int)sum * sum) >> 8); +} + +unsigned int vpx_variance32x32_sse2(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, + unsigned int *sse) { + int sum; + variance_sse2(src, src_stride, ref, ref_stride, 32, 32, + sse, &sum, vpx_get16x16var_sse2, 16); + return *sse - (((int64_t)sum * sum) >> 10); +} + +unsigned int vpx_variance32x16_sse2(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, + unsigned int *sse) { + int sum; + variance_sse2(src, src_stride, ref, ref_stride, 32, 16, + sse, &sum, vpx_get16x16var_sse2, 16); + return *sse - (((int64_t)sum * sum) >> 9); +} + +unsigned int vpx_variance16x32_sse2(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, + unsigned int *sse) { + int sum; + variance_sse2(src, src_stride, ref, ref_stride, 16, 32, + sse, &sum, vpx_get16x16var_sse2, 16); + return *sse - (((int64_t)sum * sum) >> 9); +} + +unsigned int vpx_variance64x64_sse2(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, + unsigned int *sse) { + int sum; + variance_sse2(src, src_stride, ref, ref_stride, 64, 64, + sse, &sum, vpx_get16x16var_sse2, 16); + return *sse - (((int64_t)sum * sum) >> 12); +} + +unsigned int vpx_variance64x32_sse2(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, + unsigned int *sse) { + int sum; + variance_sse2(src, src_stride, ref, ref_stride, 64, 32, + sse, &sum, vpx_get16x16var_sse2, 16); + return *sse - (((int64_t)sum * sum) >> 11); +} + +unsigned int vpx_variance32x64_sse2(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, + unsigned int *sse) { + int sum; + variance_sse2(src, src_stride, ref, ref_stride, 32, 64, + sse, &sum, vpx_get16x16var_sse2, 16); + return *sse - (((int64_t)sum * sum) >> 11); +} + +unsigned int vpx_mse8x8_sse2(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, + unsigned int *sse) { + vpx_variance8x8_sse2(src, src_stride, ref, ref_stride, sse); + return *sse; +} + +unsigned int vpx_mse8x16_sse2(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, + unsigned int *sse) { + vpx_variance8x16_sse2(src, src_stride, ref, ref_stride, sse); + return *sse; +} + +unsigned int vpx_mse16x8_sse2(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, + unsigned int *sse) { + vpx_variance16x8_sse2(src, src_stride, ref, ref_stride, sse); + return *sse; +} + +unsigned int vpx_mse16x16_sse2(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, + unsigned int *sse) { + vpx_variance16x16_sse2(src, src_stride, ref, ref_stride, sse); + return *sse; +} diff --git a/media/libvpx/vpx_dsp_rtcd_armv7-android-gcc.h b/media/libvpx/vpx_dsp_rtcd_armv7-android-gcc.h new file mode 100644 index 00000000000..f926cf29a59 --- /dev/null +++ b/media/libvpx/vpx_dsp_rtcd_armv7-android-gcc.h @@ -0,0 +1,341 @@ +#ifndef VPX_DSP_RTCD_H_ +#define VPX_DSP_RTCD_H_ + +#ifdef RTCD_C +#define RTCD_EXTERN +#else +#define RTCD_EXTERN extern +#endif + +/* + * DSP + */ + +#include "vpx/vpx_integer.h" + + +#ifdef __cplusplus +extern "C" { +#endif + +void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride); +#define vpx_comp_avg_pred vpx_comp_avg_pred_c + +void vpx_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +void vpx_get16x16var_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +RTCD_EXTERN void (*vpx_get16x16var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); + +unsigned int vpx_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride); +unsigned int vpx_get4x4sse_cs_neon(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_get4x4sse_cs)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride); + +void vpx_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +void vpx_get8x8var_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +RTCD_EXTERN void (*vpx_get8x8var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); + +unsigned int vpx_get_mb_ss_c(const int16_t *); +#define vpx_get_mb_ss vpx_get_mb_ss_c + +unsigned int vpx_mse16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +unsigned int vpx_mse16x16_media(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +unsigned int vpx_mse16x16_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_mse16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); + +unsigned int vpx_mse16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +#define vpx_mse16x8 vpx_mse16x8_c + +unsigned int vpx_mse8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +#define vpx_mse8x16 vpx_mse8x16_c + +unsigned int vpx_mse8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +#define vpx_mse8x8 vpx_mse8x8_c + +unsigned int vpx_sad16x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad16x16_media(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad16x16_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad16x16)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int vpx_sad16x16_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad16x16_avg vpx_sad16x16_avg_c + +void vpx_sad16x16x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad16x16x3 vpx_sad16x16x3_c + +void vpx_sad16x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad16x16x4d_neon(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad16x16x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); + +void vpx_sad16x16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad16x16x8 vpx_sad16x16x8_c + +unsigned int vpx_sad16x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define vpx_sad16x32 vpx_sad16x32_c + +unsigned int vpx_sad16x32_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad16x32_avg vpx_sad16x32_avg_c + +void vpx_sad16x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad16x32x4d vpx_sad16x32x4d_c + +unsigned int vpx_sad16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad16x8_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad16x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int vpx_sad16x8_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad16x8_avg vpx_sad16x8_avg_c + +void vpx_sad16x8x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad16x8x3 vpx_sad16x8x3_c + +void vpx_sad16x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad16x8x4d vpx_sad16x8x4d_c + +void vpx_sad16x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad16x8x8 vpx_sad16x8x8_c + +unsigned int vpx_sad32x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define vpx_sad32x16 vpx_sad32x16_c + +unsigned int vpx_sad32x16_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad32x16_avg vpx_sad32x16_avg_c + +void vpx_sad32x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad32x16x4d vpx_sad32x16x4d_c + +unsigned int vpx_sad32x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad32x32_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad32x32)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int vpx_sad32x32_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad32x32_avg vpx_sad32x32_avg_c + +void vpx_sad32x32x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad32x32x3 vpx_sad32x32x3_c + +void vpx_sad32x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad32x32x4d_neon(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad32x32x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); + +void vpx_sad32x32x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad32x32x8 vpx_sad32x32x8_c + +unsigned int vpx_sad32x64_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define vpx_sad32x64 vpx_sad32x64_c + +unsigned int vpx_sad32x64_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad32x64_avg vpx_sad32x64_avg_c + +void vpx_sad32x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad32x64x4d vpx_sad32x64x4d_c + +unsigned int vpx_sad4x4_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad4x4_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad4x4)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int vpx_sad4x4_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad4x4_avg vpx_sad4x4_avg_c + +void vpx_sad4x4x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad4x4x3 vpx_sad4x4x3_c + +void vpx_sad4x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad4x4x4d vpx_sad4x4x4d_c + +void vpx_sad4x4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad4x4x8 vpx_sad4x4x8_c + +unsigned int vpx_sad4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define vpx_sad4x8 vpx_sad4x8_c + +unsigned int vpx_sad4x8_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad4x8_avg vpx_sad4x8_avg_c + +void vpx_sad4x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad4x8x4d vpx_sad4x8x4d_c + +void vpx_sad4x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad4x8x8 vpx_sad4x8x8_c + +unsigned int vpx_sad64x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define vpx_sad64x32 vpx_sad64x32_c + +unsigned int vpx_sad64x32_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad64x32_avg vpx_sad64x32_avg_c + +void vpx_sad64x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad64x32x4d vpx_sad64x32x4d_c + +unsigned int vpx_sad64x64_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad64x64_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad64x64)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int vpx_sad64x64_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad64x64_avg vpx_sad64x64_avg_c + +void vpx_sad64x64x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad64x64x3 vpx_sad64x64x3_c + +void vpx_sad64x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad64x64x4d_neon(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad64x64x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); + +void vpx_sad64x64x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad64x64x8 vpx_sad64x64x8_c + +unsigned int vpx_sad8x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad8x16_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad8x16)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int vpx_sad8x16_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad8x16_avg vpx_sad8x16_avg_c + +void vpx_sad8x16x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad8x16x3 vpx_sad8x16x3_c + +void vpx_sad8x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad8x16x4d vpx_sad8x16x4d_c + +void vpx_sad8x16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad8x16x8 vpx_sad8x16x8_c + +unsigned int vpx_sad8x4_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define vpx_sad8x4 vpx_sad8x4_c + +unsigned int vpx_sad8x4_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad8x4_avg vpx_sad8x4_avg_c + +void vpx_sad8x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad8x4x4d vpx_sad8x4x4d_c + +void vpx_sad8x4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad8x4x8 vpx_sad8x4x8_c + +unsigned int vpx_sad8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad8x8_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad8x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int vpx_sad8x8_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad8x8_avg vpx_sad8x8_avg_c + +void vpx_sad8x8x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad8x8x3 vpx_sad8x8x3_c + +void vpx_sad8x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad8x8x4d vpx_sad8x8x4d_c + +void vpx_sad8x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad8x8x8 vpx_sad8x8x8_c + +unsigned int vpx_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance16x16_media(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance16x16_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vpx_variance16x32 vpx_variance16x32_c + +unsigned int vpx_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance16x8_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance16x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vpx_variance32x16 vpx_variance32x16_c + +unsigned int vpx_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance32x32_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance32x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance32x64_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance32x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vpx_variance4x4 vpx_variance4x4_c + +unsigned int vpx_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vpx_variance4x8 vpx_variance4x8_c + +unsigned int vpx_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance64x32_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance64x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance64x64_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance64x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance8x16_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance8x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vpx_variance8x4 vpx_variance8x4_c + +unsigned int vpx_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance8x8_media(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance8x8_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance8x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +void vpx_dsp_rtcd(void); + +#include "vpx_config.h" + +#ifdef RTCD_C +#include "vpx_ports/arm.h" +static void setup_rtcd_internal(void) +{ + int flags = arm_cpu_caps(); + + (void)flags; + + vpx_get16x16var = vpx_get16x16var_c; + if (flags & HAS_NEON) vpx_get16x16var = vpx_get16x16var_neon; + vpx_get4x4sse_cs = vpx_get4x4sse_cs_c; + if (flags & HAS_NEON) vpx_get4x4sse_cs = vpx_get4x4sse_cs_neon; + vpx_get8x8var = vpx_get8x8var_c; + if (flags & HAS_NEON) vpx_get8x8var = vpx_get8x8var_neon; + vpx_mse16x16 = vpx_mse16x16_media; + if (flags & HAS_NEON) vpx_mse16x16 = vpx_mse16x16_neon; + vpx_sad16x16 = vpx_sad16x16_media; + if (flags & HAS_NEON) vpx_sad16x16 = vpx_sad16x16_neon; + vpx_sad16x16x4d = vpx_sad16x16x4d_c; + if (flags & HAS_NEON) vpx_sad16x16x4d = vpx_sad16x16x4d_neon; + vpx_sad16x8 = vpx_sad16x8_c; + if (flags & HAS_NEON) vpx_sad16x8 = vpx_sad16x8_neon; + vpx_sad32x32 = vpx_sad32x32_c; + if (flags & HAS_NEON) vpx_sad32x32 = vpx_sad32x32_neon; + vpx_sad32x32x4d = vpx_sad32x32x4d_c; + if (flags & HAS_NEON) vpx_sad32x32x4d = vpx_sad32x32x4d_neon; + vpx_sad4x4 = vpx_sad4x4_c; + if (flags & HAS_NEON) vpx_sad4x4 = vpx_sad4x4_neon; + vpx_sad64x64 = vpx_sad64x64_c; + if (flags & HAS_NEON) vpx_sad64x64 = vpx_sad64x64_neon; + vpx_sad64x64x4d = vpx_sad64x64x4d_c; + if (flags & HAS_NEON) vpx_sad64x64x4d = vpx_sad64x64x4d_neon; + vpx_sad8x16 = vpx_sad8x16_c; + if (flags & HAS_NEON) vpx_sad8x16 = vpx_sad8x16_neon; + vpx_sad8x8 = vpx_sad8x8_c; + if (flags & HAS_NEON) vpx_sad8x8 = vpx_sad8x8_neon; + vpx_variance16x16 = vpx_variance16x16_media; + if (flags & HAS_NEON) vpx_variance16x16 = vpx_variance16x16_neon; + vpx_variance16x8 = vpx_variance16x8_c; + if (flags & HAS_NEON) vpx_variance16x8 = vpx_variance16x8_neon; + vpx_variance32x32 = vpx_variance32x32_c; + if (flags & HAS_NEON) vpx_variance32x32 = vpx_variance32x32_neon; + vpx_variance32x64 = vpx_variance32x64_c; + if (flags & HAS_NEON) vpx_variance32x64 = vpx_variance32x64_neon; + vpx_variance64x32 = vpx_variance64x32_c; + if (flags & HAS_NEON) vpx_variance64x32 = vpx_variance64x32_neon; + vpx_variance64x64 = vpx_variance64x64_c; + if (flags & HAS_NEON) vpx_variance64x64 = vpx_variance64x64_neon; + vpx_variance8x16 = vpx_variance8x16_c; + if (flags & HAS_NEON) vpx_variance8x16 = vpx_variance8x16_neon; + vpx_variance8x8 = vpx_variance8x8_media; + if (flags & HAS_NEON) vpx_variance8x8 = vpx_variance8x8_neon; +} +#endif + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif diff --git a/media/libvpx/vpx_dsp_rtcd_generic-gnu.h b/media/libvpx/vpx_dsp_rtcd_generic-gnu.h new file mode 100644 index 00000000000..f086946da23 --- /dev/null +++ b/media/libvpx/vpx_dsp_rtcd_generic-gnu.h @@ -0,0 +1,266 @@ +#ifndef VPX_DSP_RTCD_H_ +#define VPX_DSP_RTCD_H_ + +#ifdef RTCD_C +#define RTCD_EXTERN +#else +#define RTCD_EXTERN extern +#endif + +/* + * DSP + */ + +#include "vpx/vpx_integer.h" + + +#ifdef __cplusplus +extern "C" { +#endif + +void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride); +#define vpx_comp_avg_pred vpx_comp_avg_pred_c + +void vpx_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +#define vpx_get16x16var vpx_get16x16var_c + +unsigned int vpx_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride); +#define vpx_get4x4sse_cs vpx_get4x4sse_cs_c + +void vpx_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +#define vpx_get8x8var vpx_get8x8var_c + +unsigned int vpx_get_mb_ss_c(const int16_t *); +#define vpx_get_mb_ss vpx_get_mb_ss_c + +unsigned int vpx_mse16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +#define vpx_mse16x16 vpx_mse16x16_c + +unsigned int vpx_mse16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +#define vpx_mse16x8 vpx_mse16x8_c + +unsigned int vpx_mse8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +#define vpx_mse8x16 vpx_mse8x16_c + +unsigned int vpx_mse8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +#define vpx_mse8x8 vpx_mse8x8_c + +unsigned int vpx_sad16x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define vpx_sad16x16 vpx_sad16x16_c + +unsigned int vpx_sad16x16_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad16x16_avg vpx_sad16x16_avg_c + +void vpx_sad16x16x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad16x16x3 vpx_sad16x16x3_c + +void vpx_sad16x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad16x16x4d vpx_sad16x16x4d_c + +void vpx_sad16x16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad16x16x8 vpx_sad16x16x8_c + +unsigned int vpx_sad16x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define vpx_sad16x32 vpx_sad16x32_c + +unsigned int vpx_sad16x32_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad16x32_avg vpx_sad16x32_avg_c + +void vpx_sad16x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad16x32x4d vpx_sad16x32x4d_c + +unsigned int vpx_sad16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define vpx_sad16x8 vpx_sad16x8_c + +unsigned int vpx_sad16x8_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad16x8_avg vpx_sad16x8_avg_c + +void vpx_sad16x8x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad16x8x3 vpx_sad16x8x3_c + +void vpx_sad16x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad16x8x4d vpx_sad16x8x4d_c + +void vpx_sad16x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad16x8x8 vpx_sad16x8x8_c + +unsigned int vpx_sad32x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define vpx_sad32x16 vpx_sad32x16_c + +unsigned int vpx_sad32x16_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad32x16_avg vpx_sad32x16_avg_c + +void vpx_sad32x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad32x16x4d vpx_sad32x16x4d_c + +unsigned int vpx_sad32x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define vpx_sad32x32 vpx_sad32x32_c + +unsigned int vpx_sad32x32_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad32x32_avg vpx_sad32x32_avg_c + +void vpx_sad32x32x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad32x32x3 vpx_sad32x32x3_c + +void vpx_sad32x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad32x32x4d vpx_sad32x32x4d_c + +void vpx_sad32x32x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad32x32x8 vpx_sad32x32x8_c + +unsigned int vpx_sad32x64_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define vpx_sad32x64 vpx_sad32x64_c + +unsigned int vpx_sad32x64_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad32x64_avg vpx_sad32x64_avg_c + +void vpx_sad32x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad32x64x4d vpx_sad32x64x4d_c + +unsigned int vpx_sad4x4_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define vpx_sad4x4 vpx_sad4x4_c + +unsigned int vpx_sad4x4_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad4x4_avg vpx_sad4x4_avg_c + +void vpx_sad4x4x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad4x4x3 vpx_sad4x4x3_c + +void vpx_sad4x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad4x4x4d vpx_sad4x4x4d_c + +void vpx_sad4x4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad4x4x8 vpx_sad4x4x8_c + +unsigned int vpx_sad4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define vpx_sad4x8 vpx_sad4x8_c + +unsigned int vpx_sad4x8_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad4x8_avg vpx_sad4x8_avg_c + +void vpx_sad4x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad4x8x4d vpx_sad4x8x4d_c + +void vpx_sad4x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad4x8x8 vpx_sad4x8x8_c + +unsigned int vpx_sad64x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define vpx_sad64x32 vpx_sad64x32_c + +unsigned int vpx_sad64x32_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad64x32_avg vpx_sad64x32_avg_c + +void vpx_sad64x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad64x32x4d vpx_sad64x32x4d_c + +unsigned int vpx_sad64x64_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define vpx_sad64x64 vpx_sad64x64_c + +unsigned int vpx_sad64x64_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad64x64_avg vpx_sad64x64_avg_c + +void vpx_sad64x64x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad64x64x3 vpx_sad64x64x3_c + +void vpx_sad64x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad64x64x4d vpx_sad64x64x4d_c + +void vpx_sad64x64x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad64x64x8 vpx_sad64x64x8_c + +unsigned int vpx_sad8x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define vpx_sad8x16 vpx_sad8x16_c + +unsigned int vpx_sad8x16_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad8x16_avg vpx_sad8x16_avg_c + +void vpx_sad8x16x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad8x16x3 vpx_sad8x16x3_c + +void vpx_sad8x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad8x16x4d vpx_sad8x16x4d_c + +void vpx_sad8x16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad8x16x8 vpx_sad8x16x8_c + +unsigned int vpx_sad8x4_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define vpx_sad8x4 vpx_sad8x4_c + +unsigned int vpx_sad8x4_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad8x4_avg vpx_sad8x4_avg_c + +void vpx_sad8x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad8x4x4d vpx_sad8x4x4d_c + +void vpx_sad8x4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad8x4x8 vpx_sad8x4x8_c + +unsigned int vpx_sad8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define vpx_sad8x8 vpx_sad8x8_c + +unsigned int vpx_sad8x8_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad8x8_avg vpx_sad8x8_avg_c + +void vpx_sad8x8x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad8x8x3 vpx_sad8x8x3_c + +void vpx_sad8x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad8x8x4d vpx_sad8x8x4d_c + +void vpx_sad8x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad8x8x8 vpx_sad8x8x8_c + +unsigned int vpx_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vpx_variance16x16 vpx_variance16x16_c + +unsigned int vpx_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vpx_variance16x32 vpx_variance16x32_c + +unsigned int vpx_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vpx_variance16x8 vpx_variance16x8_c + +unsigned int vpx_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vpx_variance32x16 vpx_variance32x16_c + +unsigned int vpx_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vpx_variance32x32 vpx_variance32x32_c + +unsigned int vpx_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vpx_variance32x64 vpx_variance32x64_c + +unsigned int vpx_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vpx_variance4x4 vpx_variance4x4_c + +unsigned int vpx_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vpx_variance4x8 vpx_variance4x8_c + +unsigned int vpx_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vpx_variance64x32 vpx_variance64x32_c + +unsigned int vpx_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vpx_variance64x64 vpx_variance64x64_c + +unsigned int vpx_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vpx_variance8x16 vpx_variance8x16_c + +unsigned int vpx_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vpx_variance8x4 vpx_variance8x4_c + +unsigned int vpx_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vpx_variance8x8 vpx_variance8x8_c + +void vpx_dsp_rtcd(void); + +#include "vpx_config.h" + +#ifdef RTCD_C +static void setup_rtcd_internal(void) +{ +} +#endif + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif diff --git a/media/libvpx/vpx_dsp_rtcd_x86-darwin9-gcc.h b/media/libvpx/vpx_dsp_rtcd_x86-darwin9-gcc.h new file mode 100644 index 00000000000..32ee77e25ce --- /dev/null +++ b/media/libvpx/vpx_dsp_rtcd_x86-darwin9-gcc.h @@ -0,0 +1,544 @@ +#ifndef VPX_DSP_RTCD_H_ +#define VPX_DSP_RTCD_H_ + +#ifdef RTCD_C +#define RTCD_EXTERN +#else +#define RTCD_EXTERN extern +#endif + +/* + * DSP + */ + +#include "vpx/vpx_integer.h" + + +#ifdef __cplusplus +extern "C" { +#endif + +void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride); +#define vpx_comp_avg_pred vpx_comp_avg_pred_c + +void vpx_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +void vpx_get16x16var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +void vpx_get16x16var_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +RTCD_EXTERN void (*vpx_get16x16var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); + +unsigned int vpx_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride); +#define vpx_get4x4sse_cs vpx_get4x4sse_cs_c + +void vpx_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +void vpx_get8x8var_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +void vpx_get8x8var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +RTCD_EXTERN void (*vpx_get8x8var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); + +unsigned int vpx_get_mb_ss_c(const int16_t *); +unsigned int vpx_get_mb_ss_mmx(const int16_t *); +unsigned int vpx_get_mb_ss_sse2(const int16_t *); +RTCD_EXTERN unsigned int (*vpx_get_mb_ss)(const int16_t *); + +unsigned int vpx_mse16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +unsigned int vpx_mse16x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +unsigned int vpx_mse16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +unsigned int vpx_mse16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_mse16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); + +unsigned int vpx_mse16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +unsigned int vpx_mse16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_mse16x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); + +unsigned int vpx_mse8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +unsigned int vpx_mse8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_mse8x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); + +unsigned int vpx_mse8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +unsigned int vpx_mse8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_mse8x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); + +unsigned int vpx_sad16x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad16x16_mmx(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad16x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad16x16)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int vpx_sad16x16_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad16x16_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad16x16_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); + +void vpx_sad16x16x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad16x16x3_sse3(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad16x16x3_ssse3(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad16x16x3)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +void vpx_sad16x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad16x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad16x16x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); + +void vpx_sad16x16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad16x16x8_sse4_1(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad16x16x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +unsigned int vpx_sad16x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad16x32_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad16x32)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int vpx_sad16x32_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad16x32_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad16x32_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); + +void vpx_sad16x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad16x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad16x32x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); + +unsigned int vpx_sad16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad16x8_mmx(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad16x8_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad16x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int vpx_sad16x8_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad16x8_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad16x8_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); + +void vpx_sad16x8x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad16x8x3_sse3(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad16x8x3_ssse3(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad16x8x3)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +void vpx_sad16x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad16x8x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad16x8x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); + +void vpx_sad16x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad16x8x8_sse4_1(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad16x8x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +unsigned int vpx_sad32x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad32x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad32x16_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad32x16)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int vpx_sad32x16_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad32x16_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad32x16_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad32x16_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); + +void vpx_sad32x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad32x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad32x16x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); + +unsigned int vpx_sad32x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad32x32_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad32x32_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad32x32)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int vpx_sad32x32_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad32x32_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad32x32_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad32x32_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); + +void vpx_sad32x32x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad32x32x3 vpx_sad32x32x3_c + +void vpx_sad32x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad32x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad32x32x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad32x32x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); + +void vpx_sad32x32x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad32x32x8 vpx_sad32x32x8_c + +unsigned int vpx_sad32x64_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad32x64_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad32x64_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad32x64)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int vpx_sad32x64_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad32x64_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad32x64_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad32x64_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); + +void vpx_sad32x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad32x64x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad32x64x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); + +unsigned int vpx_sad4x4_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad4x4_mmx(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad4x4_sse(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad4x4)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int vpx_sad4x4_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad4x4_avg_sse(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad4x4_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); + +void vpx_sad4x4x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad4x4x3_sse3(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad4x4x3)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +void vpx_sad4x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad4x4x4d_sse(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad4x4x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); + +void vpx_sad4x4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad4x4x8_sse4_1(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad4x4x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +unsigned int vpx_sad4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad4x8_sse(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad4x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int vpx_sad4x8_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad4x8_avg_sse(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad4x8_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); + +void vpx_sad4x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad4x8x4d_sse(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad4x8x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); + +void vpx_sad4x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad4x8x8 vpx_sad4x8x8_c + +unsigned int vpx_sad64x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad64x32_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad64x32_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad64x32)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int vpx_sad64x32_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad64x32_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad64x32_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad64x32_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); + +void vpx_sad64x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad64x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad64x32x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); + +unsigned int vpx_sad64x64_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad64x64_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad64x64_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad64x64)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int vpx_sad64x64_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad64x64_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad64x64_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad64x64_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); + +void vpx_sad64x64x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad64x64x3 vpx_sad64x64x3_c + +void vpx_sad64x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad64x64x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad64x64x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad64x64x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); + +void vpx_sad64x64x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad64x64x8 vpx_sad64x64x8_c + +unsigned int vpx_sad8x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad8x16_mmx(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad8x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad8x16)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int vpx_sad8x16_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad8x16_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad8x16_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); + +void vpx_sad8x16x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad8x16x3_sse3(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad8x16x3)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +void vpx_sad8x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad8x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad8x16x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); + +void vpx_sad8x16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad8x16x8_sse4_1(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad8x16x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +unsigned int vpx_sad8x4_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad8x4_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad8x4)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int vpx_sad8x4_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad8x4_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad8x4_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); + +void vpx_sad8x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad8x4x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad8x4x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); + +void vpx_sad8x4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad8x4x8 vpx_sad8x4x8_c + +unsigned int vpx_sad8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad8x8_mmx(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad8x8_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad8x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int vpx_sad8x8_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad8x8_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad8x8_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); + +void vpx_sad8x8x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad8x8x3_sse3(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad8x8x3)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +void vpx_sad8x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad8x8x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad8x8x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); + +void vpx_sad8x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad8x8x8_sse4_1(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad8x8x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +unsigned int vpx_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance16x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance16x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance16x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance16x8_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance16x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance32x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance32x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance32x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance32x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance32x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance32x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance4x4_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance4x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance4x4)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance4x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance4x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance64x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance64x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance64x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance64x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance8x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance8x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance8x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance8x4)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance8x8_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance8x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +void vpx_dsp_rtcd(void); + +#ifdef RTCD_C +#include "vpx_ports/x86.h" +static void setup_rtcd_internal(void) +{ + int flags = x86_simd_caps(); + + (void)flags; + + vpx_get16x16var = vpx_get16x16var_c; + if (flags & HAS_SSE2) vpx_get16x16var = vpx_get16x16var_sse2; + if (flags & HAS_AVX2) vpx_get16x16var = vpx_get16x16var_avx2; + vpx_get8x8var = vpx_get8x8var_c; + if (flags & HAS_MMX) vpx_get8x8var = vpx_get8x8var_mmx; + if (flags & HAS_SSE2) vpx_get8x8var = vpx_get8x8var_sse2; + vpx_get_mb_ss = vpx_get_mb_ss_c; + if (flags & HAS_MMX) vpx_get_mb_ss = vpx_get_mb_ss_mmx; + if (flags & HAS_SSE2) vpx_get_mb_ss = vpx_get_mb_ss_sse2; + vpx_mse16x16 = vpx_mse16x16_c; + if (flags & HAS_MMX) vpx_mse16x16 = vpx_mse16x16_mmx; + if (flags & HAS_SSE2) vpx_mse16x16 = vpx_mse16x16_sse2; + if (flags & HAS_AVX2) vpx_mse16x16 = vpx_mse16x16_avx2; + vpx_mse16x8 = vpx_mse16x8_c; + if (flags & HAS_SSE2) vpx_mse16x8 = vpx_mse16x8_sse2; + vpx_mse8x16 = vpx_mse8x16_c; + if (flags & HAS_SSE2) vpx_mse8x16 = vpx_mse8x16_sse2; + vpx_mse8x8 = vpx_mse8x8_c; + if (flags & HAS_SSE2) vpx_mse8x8 = vpx_mse8x8_sse2; + vpx_sad16x16 = vpx_sad16x16_c; + if (flags & HAS_MMX) vpx_sad16x16 = vpx_sad16x16_mmx; + if (flags & HAS_SSE2) vpx_sad16x16 = vpx_sad16x16_sse2; + vpx_sad16x16_avg = vpx_sad16x16_avg_c; + if (flags & HAS_SSE2) vpx_sad16x16_avg = vpx_sad16x16_avg_sse2; + vpx_sad16x16x3 = vpx_sad16x16x3_c; + if (flags & HAS_SSE3) vpx_sad16x16x3 = vpx_sad16x16x3_sse3; + if (flags & HAS_SSSE3) vpx_sad16x16x3 = vpx_sad16x16x3_ssse3; + vpx_sad16x16x4d = vpx_sad16x16x4d_c; + if (flags & HAS_SSE2) vpx_sad16x16x4d = vpx_sad16x16x4d_sse2; + vpx_sad16x16x8 = vpx_sad16x16x8_c; + if (flags & HAS_SSE4_1) vpx_sad16x16x8 = vpx_sad16x16x8_sse4_1; + vpx_sad16x32 = vpx_sad16x32_c; + if (flags & HAS_SSE2) vpx_sad16x32 = vpx_sad16x32_sse2; + vpx_sad16x32_avg = vpx_sad16x32_avg_c; + if (flags & HAS_SSE2) vpx_sad16x32_avg = vpx_sad16x32_avg_sse2; + vpx_sad16x32x4d = vpx_sad16x32x4d_c; + if (flags & HAS_SSE2) vpx_sad16x32x4d = vpx_sad16x32x4d_sse2; + vpx_sad16x8 = vpx_sad16x8_c; + if (flags & HAS_MMX) vpx_sad16x8 = vpx_sad16x8_mmx; + if (flags & HAS_SSE2) vpx_sad16x8 = vpx_sad16x8_sse2; + vpx_sad16x8_avg = vpx_sad16x8_avg_c; + if (flags & HAS_SSE2) vpx_sad16x8_avg = vpx_sad16x8_avg_sse2; + vpx_sad16x8x3 = vpx_sad16x8x3_c; + if (flags & HAS_SSE3) vpx_sad16x8x3 = vpx_sad16x8x3_sse3; + if (flags & HAS_SSSE3) vpx_sad16x8x3 = vpx_sad16x8x3_ssse3; + vpx_sad16x8x4d = vpx_sad16x8x4d_c; + if (flags & HAS_SSE2) vpx_sad16x8x4d = vpx_sad16x8x4d_sse2; + vpx_sad16x8x8 = vpx_sad16x8x8_c; + if (flags & HAS_SSE4_1) vpx_sad16x8x8 = vpx_sad16x8x8_sse4_1; + vpx_sad32x16 = vpx_sad32x16_c; + if (flags & HAS_SSE2) vpx_sad32x16 = vpx_sad32x16_sse2; + if (flags & HAS_AVX2) vpx_sad32x16 = vpx_sad32x16_avx2; + vpx_sad32x16_avg = vpx_sad32x16_avg_c; + if (flags & HAS_SSE2) vpx_sad32x16_avg = vpx_sad32x16_avg_sse2; + if (flags & HAS_AVX2) vpx_sad32x16_avg = vpx_sad32x16_avg_avx2; + vpx_sad32x16x4d = vpx_sad32x16x4d_c; + if (flags & HAS_SSE2) vpx_sad32x16x4d = vpx_sad32x16x4d_sse2; + vpx_sad32x32 = vpx_sad32x32_c; + if (flags & HAS_SSE2) vpx_sad32x32 = vpx_sad32x32_sse2; + if (flags & HAS_AVX2) vpx_sad32x32 = vpx_sad32x32_avx2; + vpx_sad32x32_avg = vpx_sad32x32_avg_c; + if (flags & HAS_SSE2) vpx_sad32x32_avg = vpx_sad32x32_avg_sse2; + if (flags & HAS_AVX2) vpx_sad32x32_avg = vpx_sad32x32_avg_avx2; + vpx_sad32x32x4d = vpx_sad32x32x4d_c; + if (flags & HAS_SSE2) vpx_sad32x32x4d = vpx_sad32x32x4d_sse2; + if (flags & HAS_AVX2) vpx_sad32x32x4d = vpx_sad32x32x4d_avx2; + vpx_sad32x64 = vpx_sad32x64_c; + if (flags & HAS_SSE2) vpx_sad32x64 = vpx_sad32x64_sse2; + if (flags & HAS_AVX2) vpx_sad32x64 = vpx_sad32x64_avx2; + vpx_sad32x64_avg = vpx_sad32x64_avg_c; + if (flags & HAS_SSE2) vpx_sad32x64_avg = vpx_sad32x64_avg_sse2; + if (flags & HAS_AVX2) vpx_sad32x64_avg = vpx_sad32x64_avg_avx2; + vpx_sad32x64x4d = vpx_sad32x64x4d_c; + if (flags & HAS_SSE2) vpx_sad32x64x4d = vpx_sad32x64x4d_sse2; + vpx_sad4x4 = vpx_sad4x4_c; + if (flags & HAS_MMX) vpx_sad4x4 = vpx_sad4x4_mmx; + if (flags & HAS_SSE) vpx_sad4x4 = vpx_sad4x4_sse; + vpx_sad4x4_avg = vpx_sad4x4_avg_c; + if (flags & HAS_SSE) vpx_sad4x4_avg = vpx_sad4x4_avg_sse; + vpx_sad4x4x3 = vpx_sad4x4x3_c; + if (flags & HAS_SSE3) vpx_sad4x4x3 = vpx_sad4x4x3_sse3; + vpx_sad4x4x4d = vpx_sad4x4x4d_c; + if (flags & HAS_SSE) vpx_sad4x4x4d = vpx_sad4x4x4d_sse; + vpx_sad4x4x8 = vpx_sad4x4x8_c; + if (flags & HAS_SSE4_1) vpx_sad4x4x8 = vpx_sad4x4x8_sse4_1; + vpx_sad4x8 = vpx_sad4x8_c; + if (flags & HAS_SSE) vpx_sad4x8 = vpx_sad4x8_sse; + vpx_sad4x8_avg = vpx_sad4x8_avg_c; + if (flags & HAS_SSE) vpx_sad4x8_avg = vpx_sad4x8_avg_sse; + vpx_sad4x8x4d = vpx_sad4x8x4d_c; + if (flags & HAS_SSE) vpx_sad4x8x4d = vpx_sad4x8x4d_sse; + vpx_sad64x32 = vpx_sad64x32_c; + if (flags & HAS_SSE2) vpx_sad64x32 = vpx_sad64x32_sse2; + if (flags & HAS_AVX2) vpx_sad64x32 = vpx_sad64x32_avx2; + vpx_sad64x32_avg = vpx_sad64x32_avg_c; + if (flags & HAS_SSE2) vpx_sad64x32_avg = vpx_sad64x32_avg_sse2; + if (flags & HAS_AVX2) vpx_sad64x32_avg = vpx_sad64x32_avg_avx2; + vpx_sad64x32x4d = vpx_sad64x32x4d_c; + if (flags & HAS_SSE2) vpx_sad64x32x4d = vpx_sad64x32x4d_sse2; + vpx_sad64x64 = vpx_sad64x64_c; + if (flags & HAS_SSE2) vpx_sad64x64 = vpx_sad64x64_sse2; + if (flags & HAS_AVX2) vpx_sad64x64 = vpx_sad64x64_avx2; + vpx_sad64x64_avg = vpx_sad64x64_avg_c; + if (flags & HAS_SSE2) vpx_sad64x64_avg = vpx_sad64x64_avg_sse2; + if (flags & HAS_AVX2) vpx_sad64x64_avg = vpx_sad64x64_avg_avx2; + vpx_sad64x64x4d = vpx_sad64x64x4d_c; + if (flags & HAS_SSE2) vpx_sad64x64x4d = vpx_sad64x64x4d_sse2; + if (flags & HAS_AVX2) vpx_sad64x64x4d = vpx_sad64x64x4d_avx2; + vpx_sad8x16 = vpx_sad8x16_c; + if (flags & HAS_MMX) vpx_sad8x16 = vpx_sad8x16_mmx; + if (flags & HAS_SSE2) vpx_sad8x16 = vpx_sad8x16_sse2; + vpx_sad8x16_avg = vpx_sad8x16_avg_c; + if (flags & HAS_SSE2) vpx_sad8x16_avg = vpx_sad8x16_avg_sse2; + vpx_sad8x16x3 = vpx_sad8x16x3_c; + if (flags & HAS_SSE3) vpx_sad8x16x3 = vpx_sad8x16x3_sse3; + vpx_sad8x16x4d = vpx_sad8x16x4d_c; + if (flags & HAS_SSE2) vpx_sad8x16x4d = vpx_sad8x16x4d_sse2; + vpx_sad8x16x8 = vpx_sad8x16x8_c; + if (flags & HAS_SSE4_1) vpx_sad8x16x8 = vpx_sad8x16x8_sse4_1; + vpx_sad8x4 = vpx_sad8x4_c; + if (flags & HAS_SSE2) vpx_sad8x4 = vpx_sad8x4_sse2; + vpx_sad8x4_avg = vpx_sad8x4_avg_c; + if (flags & HAS_SSE2) vpx_sad8x4_avg = vpx_sad8x4_avg_sse2; + vpx_sad8x4x4d = vpx_sad8x4x4d_c; + if (flags & HAS_SSE2) vpx_sad8x4x4d = vpx_sad8x4x4d_sse2; + vpx_sad8x8 = vpx_sad8x8_c; + if (flags & HAS_MMX) vpx_sad8x8 = vpx_sad8x8_mmx; + if (flags & HAS_SSE2) vpx_sad8x8 = vpx_sad8x8_sse2; + vpx_sad8x8_avg = vpx_sad8x8_avg_c; + if (flags & HAS_SSE2) vpx_sad8x8_avg = vpx_sad8x8_avg_sse2; + vpx_sad8x8x3 = vpx_sad8x8x3_c; + if (flags & HAS_SSE3) vpx_sad8x8x3 = vpx_sad8x8x3_sse3; + vpx_sad8x8x4d = vpx_sad8x8x4d_c; + if (flags & HAS_SSE2) vpx_sad8x8x4d = vpx_sad8x8x4d_sse2; + vpx_sad8x8x8 = vpx_sad8x8x8_c; + if (flags & HAS_SSE4_1) vpx_sad8x8x8 = vpx_sad8x8x8_sse4_1; + vpx_variance16x16 = vpx_variance16x16_c; + if (flags & HAS_MMX) vpx_variance16x16 = vpx_variance16x16_mmx; + if (flags & HAS_SSE2) vpx_variance16x16 = vpx_variance16x16_sse2; + if (flags & HAS_AVX2) vpx_variance16x16 = vpx_variance16x16_avx2; + vpx_variance16x32 = vpx_variance16x32_c; + if (flags & HAS_SSE2) vpx_variance16x32 = vpx_variance16x32_sse2; + vpx_variance16x8 = vpx_variance16x8_c; + if (flags & HAS_MMX) vpx_variance16x8 = vpx_variance16x8_mmx; + if (flags & HAS_SSE2) vpx_variance16x8 = vpx_variance16x8_sse2; + vpx_variance32x16 = vpx_variance32x16_c; + if (flags & HAS_SSE2) vpx_variance32x16 = vpx_variance32x16_sse2; + if (flags & HAS_AVX2) vpx_variance32x16 = vpx_variance32x16_avx2; + vpx_variance32x32 = vpx_variance32x32_c; + if (flags & HAS_SSE2) vpx_variance32x32 = vpx_variance32x32_sse2; + if (flags & HAS_AVX2) vpx_variance32x32 = vpx_variance32x32_avx2; + vpx_variance32x64 = vpx_variance32x64_c; + if (flags & HAS_SSE2) vpx_variance32x64 = vpx_variance32x64_sse2; + vpx_variance4x4 = vpx_variance4x4_c; + if (flags & HAS_MMX) vpx_variance4x4 = vpx_variance4x4_mmx; + if (flags & HAS_SSE2) vpx_variance4x4 = vpx_variance4x4_sse2; + vpx_variance4x8 = vpx_variance4x8_c; + if (flags & HAS_SSE2) vpx_variance4x8 = vpx_variance4x8_sse2; + vpx_variance64x32 = vpx_variance64x32_c; + if (flags & HAS_SSE2) vpx_variance64x32 = vpx_variance64x32_sse2; + if (flags & HAS_AVX2) vpx_variance64x32 = vpx_variance64x32_avx2; + vpx_variance64x64 = vpx_variance64x64_c; + if (flags & HAS_SSE2) vpx_variance64x64 = vpx_variance64x64_sse2; + if (flags & HAS_AVX2) vpx_variance64x64 = vpx_variance64x64_avx2; + vpx_variance8x16 = vpx_variance8x16_c; + if (flags & HAS_MMX) vpx_variance8x16 = vpx_variance8x16_mmx; + if (flags & HAS_SSE2) vpx_variance8x16 = vpx_variance8x16_sse2; + vpx_variance8x4 = vpx_variance8x4_c; + if (flags & HAS_SSE2) vpx_variance8x4 = vpx_variance8x4_sse2; + vpx_variance8x8 = vpx_variance8x8_c; + if (flags & HAS_MMX) vpx_variance8x8 = vpx_variance8x8_mmx; + if (flags & HAS_SSE2) vpx_variance8x8 = vpx_variance8x8_sse2; +} +#endif + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif diff --git a/media/libvpx/vpx_dsp_rtcd_x86-linux-gcc.h b/media/libvpx/vpx_dsp_rtcd_x86-linux-gcc.h new file mode 100644 index 00000000000..5cb89257744 --- /dev/null +++ b/media/libvpx/vpx_dsp_rtcd_x86-linux-gcc.h @@ -0,0 +1,394 @@ +#ifndef VPX_DSP_RTCD_H_ +#define VPX_DSP_RTCD_H_ + +#ifdef RTCD_C +#define RTCD_EXTERN +#else +#define RTCD_EXTERN extern +#endif + +/* + * DSP + */ + +#include "vpx/vpx_integer.h" + + +#ifdef __cplusplus +extern "C" { +#endif + +void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride); +#define vpx_comp_avg_pred vpx_comp_avg_pred_c + +void vpx_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +void vpx_get16x16var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +RTCD_EXTERN void (*vpx_get16x16var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); + +unsigned int vpx_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride); +#define vpx_get4x4sse_cs vpx_get4x4sse_cs_c + +void vpx_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +void vpx_get8x8var_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +void vpx_get8x8var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +RTCD_EXTERN void (*vpx_get8x8var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); + +unsigned int vpx_get_mb_ss_c(const int16_t *); +unsigned int vpx_get_mb_ss_mmx(const int16_t *); +unsigned int vpx_get_mb_ss_sse2(const int16_t *); +RTCD_EXTERN unsigned int (*vpx_get_mb_ss)(const int16_t *); + +unsigned int vpx_mse16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +unsigned int vpx_mse16x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +unsigned int vpx_mse16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_mse16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); + +unsigned int vpx_mse16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +unsigned int vpx_mse16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_mse16x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); + +unsigned int vpx_mse8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +unsigned int vpx_mse8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_mse8x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); + +unsigned int vpx_mse8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +unsigned int vpx_mse8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_mse8x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); + +unsigned int vpx_sad16x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad16x16_mmx(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad16x16)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int vpx_sad16x16_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad16x16_avg vpx_sad16x16_avg_c + +void vpx_sad16x16x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad16x16x3_sse3(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad16x16x3_ssse3(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad16x16x3)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +void vpx_sad16x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad16x16x4d vpx_sad16x16x4d_c + +void vpx_sad16x16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad16x16x8_sse4_1(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad16x16x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +unsigned int vpx_sad16x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define vpx_sad16x32 vpx_sad16x32_c + +unsigned int vpx_sad16x32_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad16x32_avg vpx_sad16x32_avg_c + +void vpx_sad16x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad16x32x4d vpx_sad16x32x4d_c + +unsigned int vpx_sad16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad16x8_mmx(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad16x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int vpx_sad16x8_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad16x8_avg vpx_sad16x8_avg_c + +void vpx_sad16x8x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad16x8x3_sse3(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad16x8x3_ssse3(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad16x8x3)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +void vpx_sad16x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad16x8x4d vpx_sad16x8x4d_c + +void vpx_sad16x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad16x8x8_sse4_1(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad16x8x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +unsigned int vpx_sad32x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define vpx_sad32x16 vpx_sad32x16_c + +unsigned int vpx_sad32x16_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad32x16_avg vpx_sad32x16_avg_c + +void vpx_sad32x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad32x16x4d vpx_sad32x16x4d_c + +unsigned int vpx_sad32x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define vpx_sad32x32 vpx_sad32x32_c + +unsigned int vpx_sad32x32_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad32x32_avg vpx_sad32x32_avg_c + +void vpx_sad32x32x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad32x32x3 vpx_sad32x32x3_c + +void vpx_sad32x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad32x32x4d vpx_sad32x32x4d_c + +void vpx_sad32x32x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad32x32x8 vpx_sad32x32x8_c + +unsigned int vpx_sad32x64_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define vpx_sad32x64 vpx_sad32x64_c + +unsigned int vpx_sad32x64_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad32x64_avg vpx_sad32x64_avg_c + +void vpx_sad32x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad32x64x4d vpx_sad32x64x4d_c + +unsigned int vpx_sad4x4_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad4x4_mmx(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad4x4)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int vpx_sad4x4_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad4x4_avg vpx_sad4x4_avg_c + +void vpx_sad4x4x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad4x4x3_sse3(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad4x4x3)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +void vpx_sad4x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad4x4x4d vpx_sad4x4x4d_c + +void vpx_sad4x4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad4x4x8_sse4_1(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad4x4x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +unsigned int vpx_sad4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define vpx_sad4x8 vpx_sad4x8_c + +unsigned int vpx_sad4x8_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad4x8_avg vpx_sad4x8_avg_c + +void vpx_sad4x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad4x8x4d vpx_sad4x8x4d_c + +void vpx_sad4x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad4x8x8 vpx_sad4x8x8_c + +unsigned int vpx_sad64x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define vpx_sad64x32 vpx_sad64x32_c + +unsigned int vpx_sad64x32_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad64x32_avg vpx_sad64x32_avg_c + +void vpx_sad64x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad64x32x4d vpx_sad64x32x4d_c + +unsigned int vpx_sad64x64_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define vpx_sad64x64 vpx_sad64x64_c + +unsigned int vpx_sad64x64_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad64x64_avg vpx_sad64x64_avg_c + +void vpx_sad64x64x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad64x64x3 vpx_sad64x64x3_c + +void vpx_sad64x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad64x64x4d vpx_sad64x64x4d_c + +void vpx_sad64x64x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad64x64x8 vpx_sad64x64x8_c + +unsigned int vpx_sad8x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad8x16_mmx(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad8x16)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int vpx_sad8x16_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad8x16_avg vpx_sad8x16_avg_c + +void vpx_sad8x16x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad8x16x3_sse3(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad8x16x3)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +void vpx_sad8x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad8x16x4d vpx_sad8x16x4d_c + +void vpx_sad8x16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad8x16x8_sse4_1(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad8x16x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +unsigned int vpx_sad8x4_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define vpx_sad8x4 vpx_sad8x4_c + +unsigned int vpx_sad8x4_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad8x4_avg vpx_sad8x4_avg_c + +void vpx_sad8x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad8x4x4d vpx_sad8x4x4d_c + +void vpx_sad8x4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad8x4x8 vpx_sad8x4x8_c + +unsigned int vpx_sad8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad8x8_mmx(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad8x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int vpx_sad8x8_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad8x8_avg vpx_sad8x8_avg_c + +void vpx_sad8x8x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad8x8x3_sse3(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad8x8x3)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +void vpx_sad8x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad8x8x4d vpx_sad8x8x4d_c + +void vpx_sad8x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad8x8x8_sse4_1(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad8x8x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +unsigned int vpx_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance16x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance16x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance16x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance16x8_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance16x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance32x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance32x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance32x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance32x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance32x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance4x4_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance4x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance4x4)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance4x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance4x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance64x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance64x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance64x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance8x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance8x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance8x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance8x4)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance8x8_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance8x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +void vpx_dsp_rtcd(void); + +#ifdef RTCD_C +#include "vpx_ports/x86.h" +static void setup_rtcd_internal(void) +{ + int flags = x86_simd_caps(); + + (void)flags; + + vpx_get16x16var = vpx_get16x16var_c; + if (flags & HAS_SSE2) vpx_get16x16var = vpx_get16x16var_sse2; + vpx_get8x8var = vpx_get8x8var_c; + if (flags & HAS_MMX) vpx_get8x8var = vpx_get8x8var_mmx; + if (flags & HAS_SSE2) vpx_get8x8var = vpx_get8x8var_sse2; + vpx_get_mb_ss = vpx_get_mb_ss_c; + if (flags & HAS_MMX) vpx_get_mb_ss = vpx_get_mb_ss_mmx; + if (flags & HAS_SSE2) vpx_get_mb_ss = vpx_get_mb_ss_sse2; + vpx_mse16x16 = vpx_mse16x16_c; + if (flags & HAS_MMX) vpx_mse16x16 = vpx_mse16x16_mmx; + if (flags & HAS_SSE2) vpx_mse16x16 = vpx_mse16x16_sse2; + vpx_mse16x8 = vpx_mse16x8_c; + if (flags & HAS_SSE2) vpx_mse16x8 = vpx_mse16x8_sse2; + vpx_mse8x16 = vpx_mse8x16_c; + if (flags & HAS_SSE2) vpx_mse8x16 = vpx_mse8x16_sse2; + vpx_mse8x8 = vpx_mse8x8_c; + if (flags & HAS_SSE2) vpx_mse8x8 = vpx_mse8x8_sse2; + vpx_sad16x16 = vpx_sad16x16_c; + if (flags & HAS_MMX) vpx_sad16x16 = vpx_sad16x16_mmx; + vpx_sad16x16x3 = vpx_sad16x16x3_c; + if (flags & HAS_SSE3) vpx_sad16x16x3 = vpx_sad16x16x3_sse3; + if (flags & HAS_SSSE3) vpx_sad16x16x3 = vpx_sad16x16x3_ssse3; + vpx_sad16x16x8 = vpx_sad16x16x8_c; + if (flags & HAS_SSE4_1) vpx_sad16x16x8 = vpx_sad16x16x8_sse4_1; + vpx_sad16x8 = vpx_sad16x8_c; + if (flags & HAS_MMX) vpx_sad16x8 = vpx_sad16x8_mmx; + vpx_sad16x8x3 = vpx_sad16x8x3_c; + if (flags & HAS_SSE3) vpx_sad16x8x3 = vpx_sad16x8x3_sse3; + if (flags & HAS_SSSE3) vpx_sad16x8x3 = vpx_sad16x8x3_ssse3; + vpx_sad16x8x8 = vpx_sad16x8x8_c; + if (flags & HAS_SSE4_1) vpx_sad16x8x8 = vpx_sad16x8x8_sse4_1; + vpx_sad4x4 = vpx_sad4x4_c; + if (flags & HAS_MMX) vpx_sad4x4 = vpx_sad4x4_mmx; + vpx_sad4x4x3 = vpx_sad4x4x3_c; + if (flags & HAS_SSE3) vpx_sad4x4x3 = vpx_sad4x4x3_sse3; + vpx_sad4x4x8 = vpx_sad4x4x8_c; + if (flags & HAS_SSE4_1) vpx_sad4x4x8 = vpx_sad4x4x8_sse4_1; + vpx_sad8x16 = vpx_sad8x16_c; + if (flags & HAS_MMX) vpx_sad8x16 = vpx_sad8x16_mmx; + vpx_sad8x16x3 = vpx_sad8x16x3_c; + if (flags & HAS_SSE3) vpx_sad8x16x3 = vpx_sad8x16x3_sse3; + vpx_sad8x16x8 = vpx_sad8x16x8_c; + if (flags & HAS_SSE4_1) vpx_sad8x16x8 = vpx_sad8x16x8_sse4_1; + vpx_sad8x8 = vpx_sad8x8_c; + if (flags & HAS_MMX) vpx_sad8x8 = vpx_sad8x8_mmx; + vpx_sad8x8x3 = vpx_sad8x8x3_c; + if (flags & HAS_SSE3) vpx_sad8x8x3 = vpx_sad8x8x3_sse3; + vpx_sad8x8x8 = vpx_sad8x8x8_c; + if (flags & HAS_SSE4_1) vpx_sad8x8x8 = vpx_sad8x8x8_sse4_1; + vpx_variance16x16 = vpx_variance16x16_c; + if (flags & HAS_MMX) vpx_variance16x16 = vpx_variance16x16_mmx; + if (flags & HAS_SSE2) vpx_variance16x16 = vpx_variance16x16_sse2; + vpx_variance16x32 = vpx_variance16x32_c; + if (flags & HAS_SSE2) vpx_variance16x32 = vpx_variance16x32_sse2; + vpx_variance16x8 = vpx_variance16x8_c; + if (flags & HAS_MMX) vpx_variance16x8 = vpx_variance16x8_mmx; + if (flags & HAS_SSE2) vpx_variance16x8 = vpx_variance16x8_sse2; + vpx_variance32x16 = vpx_variance32x16_c; + if (flags & HAS_SSE2) vpx_variance32x16 = vpx_variance32x16_sse2; + vpx_variance32x32 = vpx_variance32x32_c; + if (flags & HAS_SSE2) vpx_variance32x32 = vpx_variance32x32_sse2; + vpx_variance32x64 = vpx_variance32x64_c; + if (flags & HAS_SSE2) vpx_variance32x64 = vpx_variance32x64_sse2; + vpx_variance4x4 = vpx_variance4x4_c; + if (flags & HAS_MMX) vpx_variance4x4 = vpx_variance4x4_mmx; + if (flags & HAS_SSE2) vpx_variance4x4 = vpx_variance4x4_sse2; + vpx_variance4x8 = vpx_variance4x8_c; + if (flags & HAS_SSE2) vpx_variance4x8 = vpx_variance4x8_sse2; + vpx_variance64x32 = vpx_variance64x32_c; + if (flags & HAS_SSE2) vpx_variance64x32 = vpx_variance64x32_sse2; + vpx_variance64x64 = vpx_variance64x64_c; + if (flags & HAS_SSE2) vpx_variance64x64 = vpx_variance64x64_sse2; + vpx_variance8x16 = vpx_variance8x16_c; + if (flags & HAS_MMX) vpx_variance8x16 = vpx_variance8x16_mmx; + if (flags & HAS_SSE2) vpx_variance8x16 = vpx_variance8x16_sse2; + vpx_variance8x4 = vpx_variance8x4_c; + if (flags & HAS_SSE2) vpx_variance8x4 = vpx_variance8x4_sse2; + vpx_variance8x8 = vpx_variance8x8_c; + if (flags & HAS_MMX) vpx_variance8x8 = vpx_variance8x8_mmx; + if (flags & HAS_SSE2) vpx_variance8x8 = vpx_variance8x8_sse2; +} +#endif + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif diff --git a/media/libvpx/vpx_dsp_rtcd_x86-win32-gcc.h b/media/libvpx/vpx_dsp_rtcd_x86-win32-gcc.h new file mode 100644 index 00000000000..32ee77e25ce --- /dev/null +++ b/media/libvpx/vpx_dsp_rtcd_x86-win32-gcc.h @@ -0,0 +1,544 @@ +#ifndef VPX_DSP_RTCD_H_ +#define VPX_DSP_RTCD_H_ + +#ifdef RTCD_C +#define RTCD_EXTERN +#else +#define RTCD_EXTERN extern +#endif + +/* + * DSP + */ + +#include "vpx/vpx_integer.h" + + +#ifdef __cplusplus +extern "C" { +#endif + +void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride); +#define vpx_comp_avg_pred vpx_comp_avg_pred_c + +void vpx_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +void vpx_get16x16var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +void vpx_get16x16var_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +RTCD_EXTERN void (*vpx_get16x16var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); + +unsigned int vpx_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride); +#define vpx_get4x4sse_cs vpx_get4x4sse_cs_c + +void vpx_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +void vpx_get8x8var_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +void vpx_get8x8var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +RTCD_EXTERN void (*vpx_get8x8var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); + +unsigned int vpx_get_mb_ss_c(const int16_t *); +unsigned int vpx_get_mb_ss_mmx(const int16_t *); +unsigned int vpx_get_mb_ss_sse2(const int16_t *); +RTCD_EXTERN unsigned int (*vpx_get_mb_ss)(const int16_t *); + +unsigned int vpx_mse16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +unsigned int vpx_mse16x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +unsigned int vpx_mse16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +unsigned int vpx_mse16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_mse16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); + +unsigned int vpx_mse16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +unsigned int vpx_mse16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_mse16x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); + +unsigned int vpx_mse8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +unsigned int vpx_mse8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_mse8x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); + +unsigned int vpx_mse8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +unsigned int vpx_mse8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_mse8x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); + +unsigned int vpx_sad16x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad16x16_mmx(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad16x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad16x16)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int vpx_sad16x16_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad16x16_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad16x16_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); + +void vpx_sad16x16x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad16x16x3_sse3(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad16x16x3_ssse3(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad16x16x3)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +void vpx_sad16x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad16x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad16x16x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); + +void vpx_sad16x16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad16x16x8_sse4_1(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad16x16x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +unsigned int vpx_sad16x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad16x32_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad16x32)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int vpx_sad16x32_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad16x32_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad16x32_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); + +void vpx_sad16x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad16x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad16x32x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); + +unsigned int vpx_sad16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad16x8_mmx(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad16x8_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad16x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int vpx_sad16x8_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad16x8_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad16x8_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); + +void vpx_sad16x8x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad16x8x3_sse3(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad16x8x3_ssse3(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad16x8x3)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +void vpx_sad16x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad16x8x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad16x8x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); + +void vpx_sad16x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad16x8x8_sse4_1(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad16x8x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +unsigned int vpx_sad32x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad32x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad32x16_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad32x16)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int vpx_sad32x16_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad32x16_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad32x16_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad32x16_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); + +void vpx_sad32x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad32x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad32x16x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); + +unsigned int vpx_sad32x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad32x32_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad32x32_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad32x32)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int vpx_sad32x32_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad32x32_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad32x32_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad32x32_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); + +void vpx_sad32x32x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad32x32x3 vpx_sad32x32x3_c + +void vpx_sad32x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad32x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad32x32x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad32x32x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); + +void vpx_sad32x32x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad32x32x8 vpx_sad32x32x8_c + +unsigned int vpx_sad32x64_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad32x64_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad32x64_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad32x64)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int vpx_sad32x64_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad32x64_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad32x64_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad32x64_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); + +void vpx_sad32x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad32x64x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad32x64x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); + +unsigned int vpx_sad4x4_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad4x4_mmx(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad4x4_sse(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad4x4)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int vpx_sad4x4_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad4x4_avg_sse(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad4x4_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); + +void vpx_sad4x4x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad4x4x3_sse3(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad4x4x3)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +void vpx_sad4x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad4x4x4d_sse(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad4x4x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); + +void vpx_sad4x4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad4x4x8_sse4_1(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad4x4x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +unsigned int vpx_sad4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad4x8_sse(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad4x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int vpx_sad4x8_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad4x8_avg_sse(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad4x8_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); + +void vpx_sad4x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad4x8x4d_sse(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad4x8x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); + +void vpx_sad4x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad4x8x8 vpx_sad4x8x8_c + +unsigned int vpx_sad64x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad64x32_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad64x32_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad64x32)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int vpx_sad64x32_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad64x32_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad64x32_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad64x32_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); + +void vpx_sad64x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad64x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad64x32x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); + +unsigned int vpx_sad64x64_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad64x64_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad64x64_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad64x64)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int vpx_sad64x64_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad64x64_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad64x64_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad64x64_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); + +void vpx_sad64x64x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad64x64x3 vpx_sad64x64x3_c + +void vpx_sad64x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad64x64x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad64x64x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad64x64x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); + +void vpx_sad64x64x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad64x64x8 vpx_sad64x64x8_c + +unsigned int vpx_sad8x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad8x16_mmx(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad8x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad8x16)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int vpx_sad8x16_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad8x16_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad8x16_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); + +void vpx_sad8x16x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad8x16x3_sse3(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad8x16x3)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +void vpx_sad8x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad8x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad8x16x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); + +void vpx_sad8x16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad8x16x8_sse4_1(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad8x16x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +unsigned int vpx_sad8x4_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad8x4_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad8x4)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int vpx_sad8x4_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad8x4_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad8x4_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); + +void vpx_sad8x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad8x4x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad8x4x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); + +void vpx_sad8x4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad8x4x8 vpx_sad8x4x8_c + +unsigned int vpx_sad8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad8x8_mmx(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad8x8_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad8x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int vpx_sad8x8_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad8x8_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad8x8_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); + +void vpx_sad8x8x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad8x8x3_sse3(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad8x8x3)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +void vpx_sad8x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad8x8x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad8x8x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); + +void vpx_sad8x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad8x8x8_sse4_1(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad8x8x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +unsigned int vpx_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance16x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance16x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance16x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance16x8_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance16x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance32x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance32x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance32x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance32x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance32x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance32x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance4x4_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance4x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance4x4)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance4x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance4x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance64x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance64x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance64x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance64x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance8x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance8x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance8x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance8x4)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance8x8_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance8x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +void vpx_dsp_rtcd(void); + +#ifdef RTCD_C +#include "vpx_ports/x86.h" +static void setup_rtcd_internal(void) +{ + int flags = x86_simd_caps(); + + (void)flags; + + vpx_get16x16var = vpx_get16x16var_c; + if (flags & HAS_SSE2) vpx_get16x16var = vpx_get16x16var_sse2; + if (flags & HAS_AVX2) vpx_get16x16var = vpx_get16x16var_avx2; + vpx_get8x8var = vpx_get8x8var_c; + if (flags & HAS_MMX) vpx_get8x8var = vpx_get8x8var_mmx; + if (flags & HAS_SSE2) vpx_get8x8var = vpx_get8x8var_sse2; + vpx_get_mb_ss = vpx_get_mb_ss_c; + if (flags & HAS_MMX) vpx_get_mb_ss = vpx_get_mb_ss_mmx; + if (flags & HAS_SSE2) vpx_get_mb_ss = vpx_get_mb_ss_sse2; + vpx_mse16x16 = vpx_mse16x16_c; + if (flags & HAS_MMX) vpx_mse16x16 = vpx_mse16x16_mmx; + if (flags & HAS_SSE2) vpx_mse16x16 = vpx_mse16x16_sse2; + if (flags & HAS_AVX2) vpx_mse16x16 = vpx_mse16x16_avx2; + vpx_mse16x8 = vpx_mse16x8_c; + if (flags & HAS_SSE2) vpx_mse16x8 = vpx_mse16x8_sse2; + vpx_mse8x16 = vpx_mse8x16_c; + if (flags & HAS_SSE2) vpx_mse8x16 = vpx_mse8x16_sse2; + vpx_mse8x8 = vpx_mse8x8_c; + if (flags & HAS_SSE2) vpx_mse8x8 = vpx_mse8x8_sse2; + vpx_sad16x16 = vpx_sad16x16_c; + if (flags & HAS_MMX) vpx_sad16x16 = vpx_sad16x16_mmx; + if (flags & HAS_SSE2) vpx_sad16x16 = vpx_sad16x16_sse2; + vpx_sad16x16_avg = vpx_sad16x16_avg_c; + if (flags & HAS_SSE2) vpx_sad16x16_avg = vpx_sad16x16_avg_sse2; + vpx_sad16x16x3 = vpx_sad16x16x3_c; + if (flags & HAS_SSE3) vpx_sad16x16x3 = vpx_sad16x16x3_sse3; + if (flags & HAS_SSSE3) vpx_sad16x16x3 = vpx_sad16x16x3_ssse3; + vpx_sad16x16x4d = vpx_sad16x16x4d_c; + if (flags & HAS_SSE2) vpx_sad16x16x4d = vpx_sad16x16x4d_sse2; + vpx_sad16x16x8 = vpx_sad16x16x8_c; + if (flags & HAS_SSE4_1) vpx_sad16x16x8 = vpx_sad16x16x8_sse4_1; + vpx_sad16x32 = vpx_sad16x32_c; + if (flags & HAS_SSE2) vpx_sad16x32 = vpx_sad16x32_sse2; + vpx_sad16x32_avg = vpx_sad16x32_avg_c; + if (flags & HAS_SSE2) vpx_sad16x32_avg = vpx_sad16x32_avg_sse2; + vpx_sad16x32x4d = vpx_sad16x32x4d_c; + if (flags & HAS_SSE2) vpx_sad16x32x4d = vpx_sad16x32x4d_sse2; + vpx_sad16x8 = vpx_sad16x8_c; + if (flags & HAS_MMX) vpx_sad16x8 = vpx_sad16x8_mmx; + if (flags & HAS_SSE2) vpx_sad16x8 = vpx_sad16x8_sse2; + vpx_sad16x8_avg = vpx_sad16x8_avg_c; + if (flags & HAS_SSE2) vpx_sad16x8_avg = vpx_sad16x8_avg_sse2; + vpx_sad16x8x3 = vpx_sad16x8x3_c; + if (flags & HAS_SSE3) vpx_sad16x8x3 = vpx_sad16x8x3_sse3; + if (flags & HAS_SSSE3) vpx_sad16x8x3 = vpx_sad16x8x3_ssse3; + vpx_sad16x8x4d = vpx_sad16x8x4d_c; + if (flags & HAS_SSE2) vpx_sad16x8x4d = vpx_sad16x8x4d_sse2; + vpx_sad16x8x8 = vpx_sad16x8x8_c; + if (flags & HAS_SSE4_1) vpx_sad16x8x8 = vpx_sad16x8x8_sse4_1; + vpx_sad32x16 = vpx_sad32x16_c; + if (flags & HAS_SSE2) vpx_sad32x16 = vpx_sad32x16_sse2; + if (flags & HAS_AVX2) vpx_sad32x16 = vpx_sad32x16_avx2; + vpx_sad32x16_avg = vpx_sad32x16_avg_c; + if (flags & HAS_SSE2) vpx_sad32x16_avg = vpx_sad32x16_avg_sse2; + if (flags & HAS_AVX2) vpx_sad32x16_avg = vpx_sad32x16_avg_avx2; + vpx_sad32x16x4d = vpx_sad32x16x4d_c; + if (flags & HAS_SSE2) vpx_sad32x16x4d = vpx_sad32x16x4d_sse2; + vpx_sad32x32 = vpx_sad32x32_c; + if (flags & HAS_SSE2) vpx_sad32x32 = vpx_sad32x32_sse2; + if (flags & HAS_AVX2) vpx_sad32x32 = vpx_sad32x32_avx2; + vpx_sad32x32_avg = vpx_sad32x32_avg_c; + if (flags & HAS_SSE2) vpx_sad32x32_avg = vpx_sad32x32_avg_sse2; + if (flags & HAS_AVX2) vpx_sad32x32_avg = vpx_sad32x32_avg_avx2; + vpx_sad32x32x4d = vpx_sad32x32x4d_c; + if (flags & HAS_SSE2) vpx_sad32x32x4d = vpx_sad32x32x4d_sse2; + if (flags & HAS_AVX2) vpx_sad32x32x4d = vpx_sad32x32x4d_avx2; + vpx_sad32x64 = vpx_sad32x64_c; + if (flags & HAS_SSE2) vpx_sad32x64 = vpx_sad32x64_sse2; + if (flags & HAS_AVX2) vpx_sad32x64 = vpx_sad32x64_avx2; + vpx_sad32x64_avg = vpx_sad32x64_avg_c; + if (flags & HAS_SSE2) vpx_sad32x64_avg = vpx_sad32x64_avg_sse2; + if (flags & HAS_AVX2) vpx_sad32x64_avg = vpx_sad32x64_avg_avx2; + vpx_sad32x64x4d = vpx_sad32x64x4d_c; + if (flags & HAS_SSE2) vpx_sad32x64x4d = vpx_sad32x64x4d_sse2; + vpx_sad4x4 = vpx_sad4x4_c; + if (flags & HAS_MMX) vpx_sad4x4 = vpx_sad4x4_mmx; + if (flags & HAS_SSE) vpx_sad4x4 = vpx_sad4x4_sse; + vpx_sad4x4_avg = vpx_sad4x4_avg_c; + if (flags & HAS_SSE) vpx_sad4x4_avg = vpx_sad4x4_avg_sse; + vpx_sad4x4x3 = vpx_sad4x4x3_c; + if (flags & HAS_SSE3) vpx_sad4x4x3 = vpx_sad4x4x3_sse3; + vpx_sad4x4x4d = vpx_sad4x4x4d_c; + if (flags & HAS_SSE) vpx_sad4x4x4d = vpx_sad4x4x4d_sse; + vpx_sad4x4x8 = vpx_sad4x4x8_c; + if (flags & HAS_SSE4_1) vpx_sad4x4x8 = vpx_sad4x4x8_sse4_1; + vpx_sad4x8 = vpx_sad4x8_c; + if (flags & HAS_SSE) vpx_sad4x8 = vpx_sad4x8_sse; + vpx_sad4x8_avg = vpx_sad4x8_avg_c; + if (flags & HAS_SSE) vpx_sad4x8_avg = vpx_sad4x8_avg_sse; + vpx_sad4x8x4d = vpx_sad4x8x4d_c; + if (flags & HAS_SSE) vpx_sad4x8x4d = vpx_sad4x8x4d_sse; + vpx_sad64x32 = vpx_sad64x32_c; + if (flags & HAS_SSE2) vpx_sad64x32 = vpx_sad64x32_sse2; + if (flags & HAS_AVX2) vpx_sad64x32 = vpx_sad64x32_avx2; + vpx_sad64x32_avg = vpx_sad64x32_avg_c; + if (flags & HAS_SSE2) vpx_sad64x32_avg = vpx_sad64x32_avg_sse2; + if (flags & HAS_AVX2) vpx_sad64x32_avg = vpx_sad64x32_avg_avx2; + vpx_sad64x32x4d = vpx_sad64x32x4d_c; + if (flags & HAS_SSE2) vpx_sad64x32x4d = vpx_sad64x32x4d_sse2; + vpx_sad64x64 = vpx_sad64x64_c; + if (flags & HAS_SSE2) vpx_sad64x64 = vpx_sad64x64_sse2; + if (flags & HAS_AVX2) vpx_sad64x64 = vpx_sad64x64_avx2; + vpx_sad64x64_avg = vpx_sad64x64_avg_c; + if (flags & HAS_SSE2) vpx_sad64x64_avg = vpx_sad64x64_avg_sse2; + if (flags & HAS_AVX2) vpx_sad64x64_avg = vpx_sad64x64_avg_avx2; + vpx_sad64x64x4d = vpx_sad64x64x4d_c; + if (flags & HAS_SSE2) vpx_sad64x64x4d = vpx_sad64x64x4d_sse2; + if (flags & HAS_AVX2) vpx_sad64x64x4d = vpx_sad64x64x4d_avx2; + vpx_sad8x16 = vpx_sad8x16_c; + if (flags & HAS_MMX) vpx_sad8x16 = vpx_sad8x16_mmx; + if (flags & HAS_SSE2) vpx_sad8x16 = vpx_sad8x16_sse2; + vpx_sad8x16_avg = vpx_sad8x16_avg_c; + if (flags & HAS_SSE2) vpx_sad8x16_avg = vpx_sad8x16_avg_sse2; + vpx_sad8x16x3 = vpx_sad8x16x3_c; + if (flags & HAS_SSE3) vpx_sad8x16x3 = vpx_sad8x16x3_sse3; + vpx_sad8x16x4d = vpx_sad8x16x4d_c; + if (flags & HAS_SSE2) vpx_sad8x16x4d = vpx_sad8x16x4d_sse2; + vpx_sad8x16x8 = vpx_sad8x16x8_c; + if (flags & HAS_SSE4_1) vpx_sad8x16x8 = vpx_sad8x16x8_sse4_1; + vpx_sad8x4 = vpx_sad8x4_c; + if (flags & HAS_SSE2) vpx_sad8x4 = vpx_sad8x4_sse2; + vpx_sad8x4_avg = vpx_sad8x4_avg_c; + if (flags & HAS_SSE2) vpx_sad8x4_avg = vpx_sad8x4_avg_sse2; + vpx_sad8x4x4d = vpx_sad8x4x4d_c; + if (flags & HAS_SSE2) vpx_sad8x4x4d = vpx_sad8x4x4d_sse2; + vpx_sad8x8 = vpx_sad8x8_c; + if (flags & HAS_MMX) vpx_sad8x8 = vpx_sad8x8_mmx; + if (flags & HAS_SSE2) vpx_sad8x8 = vpx_sad8x8_sse2; + vpx_sad8x8_avg = vpx_sad8x8_avg_c; + if (flags & HAS_SSE2) vpx_sad8x8_avg = vpx_sad8x8_avg_sse2; + vpx_sad8x8x3 = vpx_sad8x8x3_c; + if (flags & HAS_SSE3) vpx_sad8x8x3 = vpx_sad8x8x3_sse3; + vpx_sad8x8x4d = vpx_sad8x8x4d_c; + if (flags & HAS_SSE2) vpx_sad8x8x4d = vpx_sad8x8x4d_sse2; + vpx_sad8x8x8 = vpx_sad8x8x8_c; + if (flags & HAS_SSE4_1) vpx_sad8x8x8 = vpx_sad8x8x8_sse4_1; + vpx_variance16x16 = vpx_variance16x16_c; + if (flags & HAS_MMX) vpx_variance16x16 = vpx_variance16x16_mmx; + if (flags & HAS_SSE2) vpx_variance16x16 = vpx_variance16x16_sse2; + if (flags & HAS_AVX2) vpx_variance16x16 = vpx_variance16x16_avx2; + vpx_variance16x32 = vpx_variance16x32_c; + if (flags & HAS_SSE2) vpx_variance16x32 = vpx_variance16x32_sse2; + vpx_variance16x8 = vpx_variance16x8_c; + if (flags & HAS_MMX) vpx_variance16x8 = vpx_variance16x8_mmx; + if (flags & HAS_SSE2) vpx_variance16x8 = vpx_variance16x8_sse2; + vpx_variance32x16 = vpx_variance32x16_c; + if (flags & HAS_SSE2) vpx_variance32x16 = vpx_variance32x16_sse2; + if (flags & HAS_AVX2) vpx_variance32x16 = vpx_variance32x16_avx2; + vpx_variance32x32 = vpx_variance32x32_c; + if (flags & HAS_SSE2) vpx_variance32x32 = vpx_variance32x32_sse2; + if (flags & HAS_AVX2) vpx_variance32x32 = vpx_variance32x32_avx2; + vpx_variance32x64 = vpx_variance32x64_c; + if (flags & HAS_SSE2) vpx_variance32x64 = vpx_variance32x64_sse2; + vpx_variance4x4 = vpx_variance4x4_c; + if (flags & HAS_MMX) vpx_variance4x4 = vpx_variance4x4_mmx; + if (flags & HAS_SSE2) vpx_variance4x4 = vpx_variance4x4_sse2; + vpx_variance4x8 = vpx_variance4x8_c; + if (flags & HAS_SSE2) vpx_variance4x8 = vpx_variance4x8_sse2; + vpx_variance64x32 = vpx_variance64x32_c; + if (flags & HAS_SSE2) vpx_variance64x32 = vpx_variance64x32_sse2; + if (flags & HAS_AVX2) vpx_variance64x32 = vpx_variance64x32_avx2; + vpx_variance64x64 = vpx_variance64x64_c; + if (flags & HAS_SSE2) vpx_variance64x64 = vpx_variance64x64_sse2; + if (flags & HAS_AVX2) vpx_variance64x64 = vpx_variance64x64_avx2; + vpx_variance8x16 = vpx_variance8x16_c; + if (flags & HAS_MMX) vpx_variance8x16 = vpx_variance8x16_mmx; + if (flags & HAS_SSE2) vpx_variance8x16 = vpx_variance8x16_sse2; + vpx_variance8x4 = vpx_variance8x4_c; + if (flags & HAS_SSE2) vpx_variance8x4 = vpx_variance8x4_sse2; + vpx_variance8x8 = vpx_variance8x8_c; + if (flags & HAS_MMX) vpx_variance8x8 = vpx_variance8x8_mmx; + if (flags & HAS_SSE2) vpx_variance8x8 = vpx_variance8x8_sse2; +} +#endif + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif diff --git a/media/libvpx/vpx_dsp_rtcd_x86-win32-vs12.h b/media/libvpx/vpx_dsp_rtcd_x86-win32-vs12.h new file mode 100644 index 00000000000..32ee77e25ce --- /dev/null +++ b/media/libvpx/vpx_dsp_rtcd_x86-win32-vs12.h @@ -0,0 +1,544 @@ +#ifndef VPX_DSP_RTCD_H_ +#define VPX_DSP_RTCD_H_ + +#ifdef RTCD_C +#define RTCD_EXTERN +#else +#define RTCD_EXTERN extern +#endif + +/* + * DSP + */ + +#include "vpx/vpx_integer.h" + + +#ifdef __cplusplus +extern "C" { +#endif + +void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride); +#define vpx_comp_avg_pred vpx_comp_avg_pred_c + +void vpx_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +void vpx_get16x16var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +void vpx_get16x16var_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +RTCD_EXTERN void (*vpx_get16x16var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); + +unsigned int vpx_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride); +#define vpx_get4x4sse_cs vpx_get4x4sse_cs_c + +void vpx_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +void vpx_get8x8var_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +void vpx_get8x8var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +RTCD_EXTERN void (*vpx_get8x8var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); + +unsigned int vpx_get_mb_ss_c(const int16_t *); +unsigned int vpx_get_mb_ss_mmx(const int16_t *); +unsigned int vpx_get_mb_ss_sse2(const int16_t *); +RTCD_EXTERN unsigned int (*vpx_get_mb_ss)(const int16_t *); + +unsigned int vpx_mse16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +unsigned int vpx_mse16x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +unsigned int vpx_mse16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +unsigned int vpx_mse16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_mse16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); + +unsigned int vpx_mse16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +unsigned int vpx_mse16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_mse16x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); + +unsigned int vpx_mse8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +unsigned int vpx_mse8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_mse8x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); + +unsigned int vpx_mse8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +unsigned int vpx_mse8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_mse8x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); + +unsigned int vpx_sad16x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad16x16_mmx(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad16x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad16x16)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int vpx_sad16x16_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad16x16_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad16x16_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); + +void vpx_sad16x16x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad16x16x3_sse3(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad16x16x3_ssse3(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad16x16x3)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +void vpx_sad16x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad16x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad16x16x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); + +void vpx_sad16x16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad16x16x8_sse4_1(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad16x16x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +unsigned int vpx_sad16x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad16x32_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad16x32)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int vpx_sad16x32_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad16x32_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad16x32_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); + +void vpx_sad16x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad16x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad16x32x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); + +unsigned int vpx_sad16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad16x8_mmx(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad16x8_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad16x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int vpx_sad16x8_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad16x8_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad16x8_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); + +void vpx_sad16x8x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad16x8x3_sse3(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad16x8x3_ssse3(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad16x8x3)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +void vpx_sad16x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad16x8x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad16x8x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); + +void vpx_sad16x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad16x8x8_sse4_1(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad16x8x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +unsigned int vpx_sad32x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad32x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad32x16_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad32x16)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int vpx_sad32x16_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad32x16_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad32x16_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad32x16_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); + +void vpx_sad32x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad32x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad32x16x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); + +unsigned int vpx_sad32x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad32x32_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad32x32_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad32x32)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int vpx_sad32x32_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad32x32_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad32x32_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad32x32_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); + +void vpx_sad32x32x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad32x32x3 vpx_sad32x32x3_c + +void vpx_sad32x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad32x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad32x32x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad32x32x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); + +void vpx_sad32x32x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad32x32x8 vpx_sad32x32x8_c + +unsigned int vpx_sad32x64_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad32x64_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad32x64_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad32x64)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int vpx_sad32x64_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad32x64_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad32x64_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad32x64_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); + +void vpx_sad32x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad32x64x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad32x64x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); + +unsigned int vpx_sad4x4_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad4x4_mmx(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad4x4_sse(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad4x4)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int vpx_sad4x4_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad4x4_avg_sse(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad4x4_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); + +void vpx_sad4x4x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad4x4x3_sse3(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad4x4x3)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +void vpx_sad4x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad4x4x4d_sse(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad4x4x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); + +void vpx_sad4x4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad4x4x8_sse4_1(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad4x4x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +unsigned int vpx_sad4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad4x8_sse(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad4x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int vpx_sad4x8_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad4x8_avg_sse(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad4x8_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); + +void vpx_sad4x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad4x8x4d_sse(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad4x8x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); + +void vpx_sad4x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad4x8x8 vpx_sad4x8x8_c + +unsigned int vpx_sad64x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad64x32_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad64x32_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad64x32)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int vpx_sad64x32_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad64x32_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad64x32_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad64x32_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); + +void vpx_sad64x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad64x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad64x32x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); + +unsigned int vpx_sad64x64_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad64x64_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad64x64_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad64x64)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int vpx_sad64x64_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad64x64_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad64x64_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad64x64_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); + +void vpx_sad64x64x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad64x64x3 vpx_sad64x64x3_c + +void vpx_sad64x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad64x64x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad64x64x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad64x64x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); + +void vpx_sad64x64x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad64x64x8 vpx_sad64x64x8_c + +unsigned int vpx_sad8x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad8x16_mmx(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad8x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad8x16)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int vpx_sad8x16_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad8x16_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad8x16_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); + +void vpx_sad8x16x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad8x16x3_sse3(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad8x16x3)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +void vpx_sad8x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad8x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad8x16x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); + +void vpx_sad8x16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad8x16x8_sse4_1(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad8x16x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +unsigned int vpx_sad8x4_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad8x4_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad8x4)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int vpx_sad8x4_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad8x4_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad8x4_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); + +void vpx_sad8x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad8x4x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad8x4x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); + +void vpx_sad8x4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad8x4x8 vpx_sad8x4x8_c + +unsigned int vpx_sad8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad8x8_mmx(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad8x8_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad8x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int vpx_sad8x8_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad8x8_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad8x8_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); + +void vpx_sad8x8x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad8x8x3_sse3(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad8x8x3)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +void vpx_sad8x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad8x8x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad8x8x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); + +void vpx_sad8x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad8x8x8_sse4_1(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad8x8x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +unsigned int vpx_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance16x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance16x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance16x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance16x8_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance16x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance32x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance32x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance32x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance32x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance32x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance32x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance4x4_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance4x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance4x4)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance4x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance4x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance64x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance64x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance64x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance64x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance8x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance8x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance8x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance8x4)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance8x8_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance8x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +void vpx_dsp_rtcd(void); + +#ifdef RTCD_C +#include "vpx_ports/x86.h" +static void setup_rtcd_internal(void) +{ + int flags = x86_simd_caps(); + + (void)flags; + + vpx_get16x16var = vpx_get16x16var_c; + if (flags & HAS_SSE2) vpx_get16x16var = vpx_get16x16var_sse2; + if (flags & HAS_AVX2) vpx_get16x16var = vpx_get16x16var_avx2; + vpx_get8x8var = vpx_get8x8var_c; + if (flags & HAS_MMX) vpx_get8x8var = vpx_get8x8var_mmx; + if (flags & HAS_SSE2) vpx_get8x8var = vpx_get8x8var_sse2; + vpx_get_mb_ss = vpx_get_mb_ss_c; + if (flags & HAS_MMX) vpx_get_mb_ss = vpx_get_mb_ss_mmx; + if (flags & HAS_SSE2) vpx_get_mb_ss = vpx_get_mb_ss_sse2; + vpx_mse16x16 = vpx_mse16x16_c; + if (flags & HAS_MMX) vpx_mse16x16 = vpx_mse16x16_mmx; + if (flags & HAS_SSE2) vpx_mse16x16 = vpx_mse16x16_sse2; + if (flags & HAS_AVX2) vpx_mse16x16 = vpx_mse16x16_avx2; + vpx_mse16x8 = vpx_mse16x8_c; + if (flags & HAS_SSE2) vpx_mse16x8 = vpx_mse16x8_sse2; + vpx_mse8x16 = vpx_mse8x16_c; + if (flags & HAS_SSE2) vpx_mse8x16 = vpx_mse8x16_sse2; + vpx_mse8x8 = vpx_mse8x8_c; + if (flags & HAS_SSE2) vpx_mse8x8 = vpx_mse8x8_sse2; + vpx_sad16x16 = vpx_sad16x16_c; + if (flags & HAS_MMX) vpx_sad16x16 = vpx_sad16x16_mmx; + if (flags & HAS_SSE2) vpx_sad16x16 = vpx_sad16x16_sse2; + vpx_sad16x16_avg = vpx_sad16x16_avg_c; + if (flags & HAS_SSE2) vpx_sad16x16_avg = vpx_sad16x16_avg_sse2; + vpx_sad16x16x3 = vpx_sad16x16x3_c; + if (flags & HAS_SSE3) vpx_sad16x16x3 = vpx_sad16x16x3_sse3; + if (flags & HAS_SSSE3) vpx_sad16x16x3 = vpx_sad16x16x3_ssse3; + vpx_sad16x16x4d = vpx_sad16x16x4d_c; + if (flags & HAS_SSE2) vpx_sad16x16x4d = vpx_sad16x16x4d_sse2; + vpx_sad16x16x8 = vpx_sad16x16x8_c; + if (flags & HAS_SSE4_1) vpx_sad16x16x8 = vpx_sad16x16x8_sse4_1; + vpx_sad16x32 = vpx_sad16x32_c; + if (flags & HAS_SSE2) vpx_sad16x32 = vpx_sad16x32_sse2; + vpx_sad16x32_avg = vpx_sad16x32_avg_c; + if (flags & HAS_SSE2) vpx_sad16x32_avg = vpx_sad16x32_avg_sse2; + vpx_sad16x32x4d = vpx_sad16x32x4d_c; + if (flags & HAS_SSE2) vpx_sad16x32x4d = vpx_sad16x32x4d_sse2; + vpx_sad16x8 = vpx_sad16x8_c; + if (flags & HAS_MMX) vpx_sad16x8 = vpx_sad16x8_mmx; + if (flags & HAS_SSE2) vpx_sad16x8 = vpx_sad16x8_sse2; + vpx_sad16x8_avg = vpx_sad16x8_avg_c; + if (flags & HAS_SSE2) vpx_sad16x8_avg = vpx_sad16x8_avg_sse2; + vpx_sad16x8x3 = vpx_sad16x8x3_c; + if (flags & HAS_SSE3) vpx_sad16x8x3 = vpx_sad16x8x3_sse3; + if (flags & HAS_SSSE3) vpx_sad16x8x3 = vpx_sad16x8x3_ssse3; + vpx_sad16x8x4d = vpx_sad16x8x4d_c; + if (flags & HAS_SSE2) vpx_sad16x8x4d = vpx_sad16x8x4d_sse2; + vpx_sad16x8x8 = vpx_sad16x8x8_c; + if (flags & HAS_SSE4_1) vpx_sad16x8x8 = vpx_sad16x8x8_sse4_1; + vpx_sad32x16 = vpx_sad32x16_c; + if (flags & HAS_SSE2) vpx_sad32x16 = vpx_sad32x16_sse2; + if (flags & HAS_AVX2) vpx_sad32x16 = vpx_sad32x16_avx2; + vpx_sad32x16_avg = vpx_sad32x16_avg_c; + if (flags & HAS_SSE2) vpx_sad32x16_avg = vpx_sad32x16_avg_sse2; + if (flags & HAS_AVX2) vpx_sad32x16_avg = vpx_sad32x16_avg_avx2; + vpx_sad32x16x4d = vpx_sad32x16x4d_c; + if (flags & HAS_SSE2) vpx_sad32x16x4d = vpx_sad32x16x4d_sse2; + vpx_sad32x32 = vpx_sad32x32_c; + if (flags & HAS_SSE2) vpx_sad32x32 = vpx_sad32x32_sse2; + if (flags & HAS_AVX2) vpx_sad32x32 = vpx_sad32x32_avx2; + vpx_sad32x32_avg = vpx_sad32x32_avg_c; + if (flags & HAS_SSE2) vpx_sad32x32_avg = vpx_sad32x32_avg_sse2; + if (flags & HAS_AVX2) vpx_sad32x32_avg = vpx_sad32x32_avg_avx2; + vpx_sad32x32x4d = vpx_sad32x32x4d_c; + if (flags & HAS_SSE2) vpx_sad32x32x4d = vpx_sad32x32x4d_sse2; + if (flags & HAS_AVX2) vpx_sad32x32x4d = vpx_sad32x32x4d_avx2; + vpx_sad32x64 = vpx_sad32x64_c; + if (flags & HAS_SSE2) vpx_sad32x64 = vpx_sad32x64_sse2; + if (flags & HAS_AVX2) vpx_sad32x64 = vpx_sad32x64_avx2; + vpx_sad32x64_avg = vpx_sad32x64_avg_c; + if (flags & HAS_SSE2) vpx_sad32x64_avg = vpx_sad32x64_avg_sse2; + if (flags & HAS_AVX2) vpx_sad32x64_avg = vpx_sad32x64_avg_avx2; + vpx_sad32x64x4d = vpx_sad32x64x4d_c; + if (flags & HAS_SSE2) vpx_sad32x64x4d = vpx_sad32x64x4d_sse2; + vpx_sad4x4 = vpx_sad4x4_c; + if (flags & HAS_MMX) vpx_sad4x4 = vpx_sad4x4_mmx; + if (flags & HAS_SSE) vpx_sad4x4 = vpx_sad4x4_sse; + vpx_sad4x4_avg = vpx_sad4x4_avg_c; + if (flags & HAS_SSE) vpx_sad4x4_avg = vpx_sad4x4_avg_sse; + vpx_sad4x4x3 = vpx_sad4x4x3_c; + if (flags & HAS_SSE3) vpx_sad4x4x3 = vpx_sad4x4x3_sse3; + vpx_sad4x4x4d = vpx_sad4x4x4d_c; + if (flags & HAS_SSE) vpx_sad4x4x4d = vpx_sad4x4x4d_sse; + vpx_sad4x4x8 = vpx_sad4x4x8_c; + if (flags & HAS_SSE4_1) vpx_sad4x4x8 = vpx_sad4x4x8_sse4_1; + vpx_sad4x8 = vpx_sad4x8_c; + if (flags & HAS_SSE) vpx_sad4x8 = vpx_sad4x8_sse; + vpx_sad4x8_avg = vpx_sad4x8_avg_c; + if (flags & HAS_SSE) vpx_sad4x8_avg = vpx_sad4x8_avg_sse; + vpx_sad4x8x4d = vpx_sad4x8x4d_c; + if (flags & HAS_SSE) vpx_sad4x8x4d = vpx_sad4x8x4d_sse; + vpx_sad64x32 = vpx_sad64x32_c; + if (flags & HAS_SSE2) vpx_sad64x32 = vpx_sad64x32_sse2; + if (flags & HAS_AVX2) vpx_sad64x32 = vpx_sad64x32_avx2; + vpx_sad64x32_avg = vpx_sad64x32_avg_c; + if (flags & HAS_SSE2) vpx_sad64x32_avg = vpx_sad64x32_avg_sse2; + if (flags & HAS_AVX2) vpx_sad64x32_avg = vpx_sad64x32_avg_avx2; + vpx_sad64x32x4d = vpx_sad64x32x4d_c; + if (flags & HAS_SSE2) vpx_sad64x32x4d = vpx_sad64x32x4d_sse2; + vpx_sad64x64 = vpx_sad64x64_c; + if (flags & HAS_SSE2) vpx_sad64x64 = vpx_sad64x64_sse2; + if (flags & HAS_AVX2) vpx_sad64x64 = vpx_sad64x64_avx2; + vpx_sad64x64_avg = vpx_sad64x64_avg_c; + if (flags & HAS_SSE2) vpx_sad64x64_avg = vpx_sad64x64_avg_sse2; + if (flags & HAS_AVX2) vpx_sad64x64_avg = vpx_sad64x64_avg_avx2; + vpx_sad64x64x4d = vpx_sad64x64x4d_c; + if (flags & HAS_SSE2) vpx_sad64x64x4d = vpx_sad64x64x4d_sse2; + if (flags & HAS_AVX2) vpx_sad64x64x4d = vpx_sad64x64x4d_avx2; + vpx_sad8x16 = vpx_sad8x16_c; + if (flags & HAS_MMX) vpx_sad8x16 = vpx_sad8x16_mmx; + if (flags & HAS_SSE2) vpx_sad8x16 = vpx_sad8x16_sse2; + vpx_sad8x16_avg = vpx_sad8x16_avg_c; + if (flags & HAS_SSE2) vpx_sad8x16_avg = vpx_sad8x16_avg_sse2; + vpx_sad8x16x3 = vpx_sad8x16x3_c; + if (flags & HAS_SSE3) vpx_sad8x16x3 = vpx_sad8x16x3_sse3; + vpx_sad8x16x4d = vpx_sad8x16x4d_c; + if (flags & HAS_SSE2) vpx_sad8x16x4d = vpx_sad8x16x4d_sse2; + vpx_sad8x16x8 = vpx_sad8x16x8_c; + if (flags & HAS_SSE4_1) vpx_sad8x16x8 = vpx_sad8x16x8_sse4_1; + vpx_sad8x4 = vpx_sad8x4_c; + if (flags & HAS_SSE2) vpx_sad8x4 = vpx_sad8x4_sse2; + vpx_sad8x4_avg = vpx_sad8x4_avg_c; + if (flags & HAS_SSE2) vpx_sad8x4_avg = vpx_sad8x4_avg_sse2; + vpx_sad8x4x4d = vpx_sad8x4x4d_c; + if (flags & HAS_SSE2) vpx_sad8x4x4d = vpx_sad8x4x4d_sse2; + vpx_sad8x8 = vpx_sad8x8_c; + if (flags & HAS_MMX) vpx_sad8x8 = vpx_sad8x8_mmx; + if (flags & HAS_SSE2) vpx_sad8x8 = vpx_sad8x8_sse2; + vpx_sad8x8_avg = vpx_sad8x8_avg_c; + if (flags & HAS_SSE2) vpx_sad8x8_avg = vpx_sad8x8_avg_sse2; + vpx_sad8x8x3 = vpx_sad8x8x3_c; + if (flags & HAS_SSE3) vpx_sad8x8x3 = vpx_sad8x8x3_sse3; + vpx_sad8x8x4d = vpx_sad8x8x4d_c; + if (flags & HAS_SSE2) vpx_sad8x8x4d = vpx_sad8x8x4d_sse2; + vpx_sad8x8x8 = vpx_sad8x8x8_c; + if (flags & HAS_SSE4_1) vpx_sad8x8x8 = vpx_sad8x8x8_sse4_1; + vpx_variance16x16 = vpx_variance16x16_c; + if (flags & HAS_MMX) vpx_variance16x16 = vpx_variance16x16_mmx; + if (flags & HAS_SSE2) vpx_variance16x16 = vpx_variance16x16_sse2; + if (flags & HAS_AVX2) vpx_variance16x16 = vpx_variance16x16_avx2; + vpx_variance16x32 = vpx_variance16x32_c; + if (flags & HAS_SSE2) vpx_variance16x32 = vpx_variance16x32_sse2; + vpx_variance16x8 = vpx_variance16x8_c; + if (flags & HAS_MMX) vpx_variance16x8 = vpx_variance16x8_mmx; + if (flags & HAS_SSE2) vpx_variance16x8 = vpx_variance16x8_sse2; + vpx_variance32x16 = vpx_variance32x16_c; + if (flags & HAS_SSE2) vpx_variance32x16 = vpx_variance32x16_sse2; + if (flags & HAS_AVX2) vpx_variance32x16 = vpx_variance32x16_avx2; + vpx_variance32x32 = vpx_variance32x32_c; + if (flags & HAS_SSE2) vpx_variance32x32 = vpx_variance32x32_sse2; + if (flags & HAS_AVX2) vpx_variance32x32 = vpx_variance32x32_avx2; + vpx_variance32x64 = vpx_variance32x64_c; + if (flags & HAS_SSE2) vpx_variance32x64 = vpx_variance32x64_sse2; + vpx_variance4x4 = vpx_variance4x4_c; + if (flags & HAS_MMX) vpx_variance4x4 = vpx_variance4x4_mmx; + if (flags & HAS_SSE2) vpx_variance4x4 = vpx_variance4x4_sse2; + vpx_variance4x8 = vpx_variance4x8_c; + if (flags & HAS_SSE2) vpx_variance4x8 = vpx_variance4x8_sse2; + vpx_variance64x32 = vpx_variance64x32_c; + if (flags & HAS_SSE2) vpx_variance64x32 = vpx_variance64x32_sse2; + if (flags & HAS_AVX2) vpx_variance64x32 = vpx_variance64x32_avx2; + vpx_variance64x64 = vpx_variance64x64_c; + if (flags & HAS_SSE2) vpx_variance64x64 = vpx_variance64x64_sse2; + if (flags & HAS_AVX2) vpx_variance64x64 = vpx_variance64x64_avx2; + vpx_variance8x16 = vpx_variance8x16_c; + if (flags & HAS_MMX) vpx_variance8x16 = vpx_variance8x16_mmx; + if (flags & HAS_SSE2) vpx_variance8x16 = vpx_variance8x16_sse2; + vpx_variance8x4 = vpx_variance8x4_c; + if (flags & HAS_SSE2) vpx_variance8x4 = vpx_variance8x4_sse2; + vpx_variance8x8 = vpx_variance8x8_c; + if (flags & HAS_MMX) vpx_variance8x8 = vpx_variance8x8_mmx; + if (flags & HAS_SSE2) vpx_variance8x8 = vpx_variance8x8_sse2; +} +#endif + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif diff --git a/media/libvpx/vpx_dsp_rtcd_x86_64-darwin9-gcc.h b/media/libvpx/vpx_dsp_rtcd_x86_64-darwin9-gcc.h new file mode 100644 index 00000000000..d93c56eb765 --- /dev/null +++ b/media/libvpx/vpx_dsp_rtcd_x86_64-darwin9-gcc.h @@ -0,0 +1,432 @@ +#ifndef VPX_DSP_RTCD_H_ +#define VPX_DSP_RTCD_H_ + +#ifdef RTCD_C +#define RTCD_EXTERN +#else +#define RTCD_EXTERN extern +#endif + +/* + * DSP + */ + +#include "vpx/vpx_integer.h" + + +#ifdef __cplusplus +extern "C" { +#endif + +void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride); +#define vpx_comp_avg_pred vpx_comp_avg_pred_c + +void vpx_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +void vpx_get16x16var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +void vpx_get16x16var_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +RTCD_EXTERN void (*vpx_get16x16var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); + +unsigned int vpx_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride); +#define vpx_get4x4sse_cs vpx_get4x4sse_cs_c + +void vpx_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +void vpx_get8x8var_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +void vpx_get8x8var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +#define vpx_get8x8var vpx_get8x8var_sse2 + +unsigned int vpx_get_mb_ss_c(const int16_t *); +unsigned int vpx_get_mb_ss_mmx(const int16_t *); +unsigned int vpx_get_mb_ss_sse2(const int16_t *); +#define vpx_get_mb_ss vpx_get_mb_ss_sse2 + +unsigned int vpx_mse16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +unsigned int vpx_mse16x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +unsigned int vpx_mse16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +unsigned int vpx_mse16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_mse16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); + +unsigned int vpx_mse16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +unsigned int vpx_mse16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +#define vpx_mse16x8 vpx_mse16x8_sse2 + +unsigned int vpx_mse8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +unsigned int vpx_mse8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +#define vpx_mse8x16 vpx_mse8x16_sse2 + +unsigned int vpx_mse8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +unsigned int vpx_mse8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +#define vpx_mse8x8 vpx_mse8x8_sse2 + +unsigned int vpx_sad16x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad16x16_mmx(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad16x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define vpx_sad16x16 vpx_sad16x16_sse2 + +unsigned int vpx_sad16x16_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad16x16_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad16x16_avg vpx_sad16x16_avg_sse2 + +void vpx_sad16x16x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad16x16x3_sse3(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad16x16x3_ssse3(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad16x16x3)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +void vpx_sad16x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad16x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad16x16x4d vpx_sad16x16x4d_sse2 + +void vpx_sad16x16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad16x16x8_sse4_1(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad16x16x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +unsigned int vpx_sad16x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad16x32_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define vpx_sad16x32 vpx_sad16x32_sse2 + +unsigned int vpx_sad16x32_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad16x32_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad16x32_avg vpx_sad16x32_avg_sse2 + +void vpx_sad16x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad16x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad16x32x4d vpx_sad16x32x4d_sse2 + +unsigned int vpx_sad16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad16x8_mmx(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad16x8_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define vpx_sad16x8 vpx_sad16x8_sse2 + +unsigned int vpx_sad16x8_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad16x8_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad16x8_avg vpx_sad16x8_avg_sse2 + +void vpx_sad16x8x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad16x8x3_sse3(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad16x8x3_ssse3(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad16x8x3)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +void vpx_sad16x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad16x8x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad16x8x4d vpx_sad16x8x4d_sse2 + +void vpx_sad16x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad16x8x8_sse4_1(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad16x8x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +unsigned int vpx_sad32x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad32x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad32x16_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad32x16)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int vpx_sad32x16_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad32x16_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad32x16_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad32x16_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); + +void vpx_sad32x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad32x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad32x16x4d vpx_sad32x16x4d_sse2 + +unsigned int vpx_sad32x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad32x32_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad32x32_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad32x32)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int vpx_sad32x32_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad32x32_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad32x32_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad32x32_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); + +void vpx_sad32x32x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad32x32x3 vpx_sad32x32x3_c + +void vpx_sad32x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad32x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad32x32x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad32x32x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); + +void vpx_sad32x32x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad32x32x8 vpx_sad32x32x8_c + +unsigned int vpx_sad32x64_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad32x64_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad32x64_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad32x64)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int vpx_sad32x64_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad32x64_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad32x64_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad32x64_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); + +void vpx_sad32x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad32x64x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad32x64x4d vpx_sad32x64x4d_sse2 + +unsigned int vpx_sad4x4_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad4x4_mmx(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad4x4_sse(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define vpx_sad4x4 vpx_sad4x4_sse + +unsigned int vpx_sad4x4_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad4x4_avg_sse(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad4x4_avg vpx_sad4x4_avg_sse + +void vpx_sad4x4x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad4x4x3_sse3(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad4x4x3)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +void vpx_sad4x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad4x4x4d_sse(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad4x4x4d vpx_sad4x4x4d_sse + +void vpx_sad4x4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad4x4x8_sse4_1(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad4x4x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +unsigned int vpx_sad4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad4x8_sse(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define vpx_sad4x8 vpx_sad4x8_sse + +unsigned int vpx_sad4x8_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad4x8_avg_sse(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad4x8_avg vpx_sad4x8_avg_sse + +void vpx_sad4x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad4x8x4d_sse(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad4x8x4d vpx_sad4x8x4d_sse + +void vpx_sad4x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad4x8x8 vpx_sad4x8x8_c + +unsigned int vpx_sad64x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad64x32_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad64x32_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad64x32)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int vpx_sad64x32_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad64x32_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad64x32_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad64x32_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); + +void vpx_sad64x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad64x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad64x32x4d vpx_sad64x32x4d_sse2 + +unsigned int vpx_sad64x64_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad64x64_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad64x64_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad64x64)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int vpx_sad64x64_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad64x64_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad64x64_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad64x64_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); + +void vpx_sad64x64x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad64x64x3 vpx_sad64x64x3_c + +void vpx_sad64x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad64x64x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad64x64x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad64x64x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); + +void vpx_sad64x64x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad64x64x8 vpx_sad64x64x8_c + +unsigned int vpx_sad8x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad8x16_mmx(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad8x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define vpx_sad8x16 vpx_sad8x16_sse2 + +unsigned int vpx_sad8x16_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad8x16_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad8x16_avg vpx_sad8x16_avg_sse2 + +void vpx_sad8x16x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad8x16x3_sse3(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad8x16x3)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +void vpx_sad8x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad8x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad8x16x4d vpx_sad8x16x4d_sse2 + +void vpx_sad8x16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad8x16x8_sse4_1(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad8x16x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +unsigned int vpx_sad8x4_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad8x4_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define vpx_sad8x4 vpx_sad8x4_sse2 + +unsigned int vpx_sad8x4_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad8x4_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad8x4_avg vpx_sad8x4_avg_sse2 + +void vpx_sad8x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad8x4x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad8x4x4d vpx_sad8x4x4d_sse2 + +void vpx_sad8x4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad8x4x8 vpx_sad8x4x8_c + +unsigned int vpx_sad8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad8x8_mmx(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad8x8_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define vpx_sad8x8 vpx_sad8x8_sse2 + +unsigned int vpx_sad8x8_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad8x8_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad8x8_avg vpx_sad8x8_avg_sse2 + +void vpx_sad8x8x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad8x8x3_sse3(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad8x8x3)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +void vpx_sad8x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad8x8x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad8x8x4d vpx_sad8x8x4d_sse2 + +void vpx_sad8x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad8x8x8_sse4_1(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad8x8x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +unsigned int vpx_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance16x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance16x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vpx_variance16x32 vpx_variance16x32_sse2 + +unsigned int vpx_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance16x8_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vpx_variance16x8 vpx_variance16x8_sse2 + +unsigned int vpx_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance32x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance32x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance32x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance32x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance32x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vpx_variance32x64 vpx_variance32x64_sse2 + +unsigned int vpx_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance4x4_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance4x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vpx_variance4x4 vpx_variance4x4_sse2 + +unsigned int vpx_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance4x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vpx_variance4x8 vpx_variance4x8_sse2 + +unsigned int vpx_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance64x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance64x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance64x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance64x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance8x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vpx_variance8x16 vpx_variance8x16_sse2 + +unsigned int vpx_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance8x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vpx_variance8x4 vpx_variance8x4_sse2 + +unsigned int vpx_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance8x8_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vpx_variance8x8 vpx_variance8x8_sse2 + +void vpx_dsp_rtcd(void); + +#ifdef RTCD_C +#include "vpx_ports/x86.h" +static void setup_rtcd_internal(void) +{ + int flags = x86_simd_caps(); + + (void)flags; + + vpx_get16x16var = vpx_get16x16var_sse2; + if (flags & HAS_AVX2) vpx_get16x16var = vpx_get16x16var_avx2; + vpx_mse16x16 = vpx_mse16x16_sse2; + if (flags & HAS_AVX2) vpx_mse16x16 = vpx_mse16x16_avx2; + vpx_sad16x16x3 = vpx_sad16x16x3_c; + if (flags & HAS_SSE3) vpx_sad16x16x3 = vpx_sad16x16x3_sse3; + if (flags & HAS_SSSE3) vpx_sad16x16x3 = vpx_sad16x16x3_ssse3; + vpx_sad16x16x8 = vpx_sad16x16x8_c; + if (flags & HAS_SSE4_1) vpx_sad16x16x8 = vpx_sad16x16x8_sse4_1; + vpx_sad16x8x3 = vpx_sad16x8x3_c; + if (flags & HAS_SSE3) vpx_sad16x8x3 = vpx_sad16x8x3_sse3; + if (flags & HAS_SSSE3) vpx_sad16x8x3 = vpx_sad16x8x3_ssse3; + vpx_sad16x8x8 = vpx_sad16x8x8_c; + if (flags & HAS_SSE4_1) vpx_sad16x8x8 = vpx_sad16x8x8_sse4_1; + vpx_sad32x16 = vpx_sad32x16_sse2; + if (flags & HAS_AVX2) vpx_sad32x16 = vpx_sad32x16_avx2; + vpx_sad32x16_avg = vpx_sad32x16_avg_sse2; + if (flags & HAS_AVX2) vpx_sad32x16_avg = vpx_sad32x16_avg_avx2; + vpx_sad32x32 = vpx_sad32x32_sse2; + if (flags & HAS_AVX2) vpx_sad32x32 = vpx_sad32x32_avx2; + vpx_sad32x32_avg = vpx_sad32x32_avg_sse2; + if (flags & HAS_AVX2) vpx_sad32x32_avg = vpx_sad32x32_avg_avx2; + vpx_sad32x32x4d = vpx_sad32x32x4d_sse2; + if (flags & HAS_AVX2) vpx_sad32x32x4d = vpx_sad32x32x4d_avx2; + vpx_sad32x64 = vpx_sad32x64_sse2; + if (flags & HAS_AVX2) vpx_sad32x64 = vpx_sad32x64_avx2; + vpx_sad32x64_avg = vpx_sad32x64_avg_sse2; + if (flags & HAS_AVX2) vpx_sad32x64_avg = vpx_sad32x64_avg_avx2; + vpx_sad4x4x3 = vpx_sad4x4x3_c; + if (flags & HAS_SSE3) vpx_sad4x4x3 = vpx_sad4x4x3_sse3; + vpx_sad4x4x8 = vpx_sad4x4x8_c; + if (flags & HAS_SSE4_1) vpx_sad4x4x8 = vpx_sad4x4x8_sse4_1; + vpx_sad64x32 = vpx_sad64x32_sse2; + if (flags & HAS_AVX2) vpx_sad64x32 = vpx_sad64x32_avx2; + vpx_sad64x32_avg = vpx_sad64x32_avg_sse2; + if (flags & HAS_AVX2) vpx_sad64x32_avg = vpx_sad64x32_avg_avx2; + vpx_sad64x64 = vpx_sad64x64_sse2; + if (flags & HAS_AVX2) vpx_sad64x64 = vpx_sad64x64_avx2; + vpx_sad64x64_avg = vpx_sad64x64_avg_sse2; + if (flags & HAS_AVX2) vpx_sad64x64_avg = vpx_sad64x64_avg_avx2; + vpx_sad64x64x4d = vpx_sad64x64x4d_sse2; + if (flags & HAS_AVX2) vpx_sad64x64x4d = vpx_sad64x64x4d_avx2; + vpx_sad8x16x3 = vpx_sad8x16x3_c; + if (flags & HAS_SSE3) vpx_sad8x16x3 = vpx_sad8x16x3_sse3; + vpx_sad8x16x8 = vpx_sad8x16x8_c; + if (flags & HAS_SSE4_1) vpx_sad8x16x8 = vpx_sad8x16x8_sse4_1; + vpx_sad8x8x3 = vpx_sad8x8x3_c; + if (flags & HAS_SSE3) vpx_sad8x8x3 = vpx_sad8x8x3_sse3; + vpx_sad8x8x8 = vpx_sad8x8x8_c; + if (flags & HAS_SSE4_1) vpx_sad8x8x8 = vpx_sad8x8x8_sse4_1; + vpx_variance16x16 = vpx_variance16x16_sse2; + if (flags & HAS_AVX2) vpx_variance16x16 = vpx_variance16x16_avx2; + vpx_variance32x16 = vpx_variance32x16_sse2; + if (flags & HAS_AVX2) vpx_variance32x16 = vpx_variance32x16_avx2; + vpx_variance32x32 = vpx_variance32x32_sse2; + if (flags & HAS_AVX2) vpx_variance32x32 = vpx_variance32x32_avx2; + vpx_variance64x32 = vpx_variance64x32_sse2; + if (flags & HAS_AVX2) vpx_variance64x32 = vpx_variance64x32_avx2; + vpx_variance64x64 = vpx_variance64x64_sse2; + if (flags & HAS_AVX2) vpx_variance64x64 = vpx_variance64x64_avx2; +} +#endif + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif diff --git a/media/libvpx/vpx_dsp_rtcd_x86_64-linux-gcc.h b/media/libvpx/vpx_dsp_rtcd_x86_64-linux-gcc.h new file mode 100644 index 00000000000..227fe0d691b --- /dev/null +++ b/media/libvpx/vpx_dsp_rtcd_x86_64-linux-gcc.h @@ -0,0 +1,375 @@ +#ifndef VPX_DSP_RTCD_H_ +#define VPX_DSP_RTCD_H_ + +#ifdef RTCD_C +#define RTCD_EXTERN +#else +#define RTCD_EXTERN extern +#endif + +/* + * DSP + */ + +#include "vpx/vpx_integer.h" + + +#ifdef __cplusplus +extern "C" { +#endif + +void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride); +#define vpx_comp_avg_pred vpx_comp_avg_pred_c + +void vpx_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +void vpx_get16x16var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +#define vpx_get16x16var vpx_get16x16var_sse2 + +unsigned int vpx_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride); +#define vpx_get4x4sse_cs vpx_get4x4sse_cs_c + +void vpx_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +void vpx_get8x8var_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +void vpx_get8x8var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +#define vpx_get8x8var vpx_get8x8var_sse2 + +unsigned int vpx_get_mb_ss_c(const int16_t *); +unsigned int vpx_get_mb_ss_mmx(const int16_t *); +unsigned int vpx_get_mb_ss_sse2(const int16_t *); +#define vpx_get_mb_ss vpx_get_mb_ss_sse2 + +unsigned int vpx_mse16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +unsigned int vpx_mse16x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +unsigned int vpx_mse16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +#define vpx_mse16x16 vpx_mse16x16_sse2 + +unsigned int vpx_mse16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +unsigned int vpx_mse16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +#define vpx_mse16x8 vpx_mse16x8_sse2 + +unsigned int vpx_mse8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +unsigned int vpx_mse8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +#define vpx_mse8x16 vpx_mse8x16_sse2 + +unsigned int vpx_mse8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +unsigned int vpx_mse8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +#define vpx_mse8x8 vpx_mse8x8_sse2 + +unsigned int vpx_sad16x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad16x16_mmx(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad16x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define vpx_sad16x16 vpx_sad16x16_sse2 + +unsigned int vpx_sad16x16_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad16x16_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad16x16_avg vpx_sad16x16_avg_sse2 + +void vpx_sad16x16x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad16x16x3_sse3(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad16x16x3_ssse3(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad16x16x3)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +void vpx_sad16x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad16x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad16x16x4d vpx_sad16x16x4d_sse2 + +void vpx_sad16x16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad16x16x8_sse4_1(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad16x16x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +unsigned int vpx_sad16x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad16x32_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define vpx_sad16x32 vpx_sad16x32_sse2 + +unsigned int vpx_sad16x32_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad16x32_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad16x32_avg vpx_sad16x32_avg_sse2 + +void vpx_sad16x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad16x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad16x32x4d vpx_sad16x32x4d_sse2 + +unsigned int vpx_sad16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad16x8_mmx(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad16x8_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define vpx_sad16x8 vpx_sad16x8_sse2 + +unsigned int vpx_sad16x8_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad16x8_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad16x8_avg vpx_sad16x8_avg_sse2 + +void vpx_sad16x8x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad16x8x3_sse3(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad16x8x3_ssse3(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad16x8x3)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +void vpx_sad16x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad16x8x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad16x8x4d vpx_sad16x8x4d_sse2 + +void vpx_sad16x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad16x8x8_sse4_1(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad16x8x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +unsigned int vpx_sad32x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad32x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define vpx_sad32x16 vpx_sad32x16_sse2 + +unsigned int vpx_sad32x16_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad32x16_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad32x16_avg vpx_sad32x16_avg_sse2 + +void vpx_sad32x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad32x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad32x16x4d vpx_sad32x16x4d_sse2 + +unsigned int vpx_sad32x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad32x32_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define vpx_sad32x32 vpx_sad32x32_sse2 + +unsigned int vpx_sad32x32_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad32x32_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad32x32_avg vpx_sad32x32_avg_sse2 + +void vpx_sad32x32x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad32x32x3 vpx_sad32x32x3_c + +void vpx_sad32x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad32x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad32x32x4d vpx_sad32x32x4d_sse2 + +void vpx_sad32x32x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad32x32x8 vpx_sad32x32x8_c + +unsigned int vpx_sad32x64_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad32x64_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define vpx_sad32x64 vpx_sad32x64_sse2 + +unsigned int vpx_sad32x64_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad32x64_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad32x64_avg vpx_sad32x64_avg_sse2 + +void vpx_sad32x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad32x64x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad32x64x4d vpx_sad32x64x4d_sse2 + +unsigned int vpx_sad4x4_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad4x4_mmx(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad4x4_sse(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define vpx_sad4x4 vpx_sad4x4_sse + +unsigned int vpx_sad4x4_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad4x4_avg_sse(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad4x4_avg vpx_sad4x4_avg_sse + +void vpx_sad4x4x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad4x4x3_sse3(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad4x4x3)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +void vpx_sad4x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad4x4x4d_sse(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad4x4x4d vpx_sad4x4x4d_sse + +void vpx_sad4x4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad4x4x8_sse4_1(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad4x4x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +unsigned int vpx_sad4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad4x8_sse(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define vpx_sad4x8 vpx_sad4x8_sse + +unsigned int vpx_sad4x8_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad4x8_avg_sse(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad4x8_avg vpx_sad4x8_avg_sse + +void vpx_sad4x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad4x8x4d_sse(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad4x8x4d vpx_sad4x8x4d_sse + +void vpx_sad4x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad4x8x8 vpx_sad4x8x8_c + +unsigned int vpx_sad64x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad64x32_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define vpx_sad64x32 vpx_sad64x32_sse2 + +unsigned int vpx_sad64x32_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad64x32_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad64x32_avg vpx_sad64x32_avg_sse2 + +void vpx_sad64x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad64x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad64x32x4d vpx_sad64x32x4d_sse2 + +unsigned int vpx_sad64x64_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad64x64_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define vpx_sad64x64 vpx_sad64x64_sse2 + +unsigned int vpx_sad64x64_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad64x64_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad64x64_avg vpx_sad64x64_avg_sse2 + +void vpx_sad64x64x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad64x64x3 vpx_sad64x64x3_c + +void vpx_sad64x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad64x64x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad64x64x4d vpx_sad64x64x4d_sse2 + +void vpx_sad64x64x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad64x64x8 vpx_sad64x64x8_c + +unsigned int vpx_sad8x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad8x16_mmx(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad8x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define vpx_sad8x16 vpx_sad8x16_sse2 + +unsigned int vpx_sad8x16_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad8x16_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad8x16_avg vpx_sad8x16_avg_sse2 + +void vpx_sad8x16x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad8x16x3_sse3(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad8x16x3)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +void vpx_sad8x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad8x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad8x16x4d vpx_sad8x16x4d_sse2 + +void vpx_sad8x16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad8x16x8_sse4_1(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad8x16x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +unsigned int vpx_sad8x4_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad8x4_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define vpx_sad8x4 vpx_sad8x4_sse2 + +unsigned int vpx_sad8x4_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad8x4_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad8x4_avg vpx_sad8x4_avg_sse2 + +void vpx_sad8x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad8x4x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad8x4x4d vpx_sad8x4x4d_sse2 + +void vpx_sad8x4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad8x4x8 vpx_sad8x4x8_c + +unsigned int vpx_sad8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad8x8_mmx(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad8x8_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define vpx_sad8x8 vpx_sad8x8_sse2 + +unsigned int vpx_sad8x8_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad8x8_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad8x8_avg vpx_sad8x8_avg_sse2 + +void vpx_sad8x8x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad8x8x3_sse3(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad8x8x3)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +void vpx_sad8x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad8x8x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad8x8x4d vpx_sad8x8x4d_sse2 + +void vpx_sad8x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad8x8x8_sse4_1(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad8x8x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +unsigned int vpx_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance16x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vpx_variance16x16 vpx_variance16x16_sse2 + +unsigned int vpx_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance16x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vpx_variance16x32 vpx_variance16x32_sse2 + +unsigned int vpx_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance16x8_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vpx_variance16x8 vpx_variance16x8_sse2 + +unsigned int vpx_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance32x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vpx_variance32x16 vpx_variance32x16_sse2 + +unsigned int vpx_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vpx_variance32x32 vpx_variance32x32_sse2 + +unsigned int vpx_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance32x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vpx_variance32x64 vpx_variance32x64_sse2 + +unsigned int vpx_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance4x4_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance4x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vpx_variance4x4 vpx_variance4x4_sse2 + +unsigned int vpx_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance4x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vpx_variance4x8 vpx_variance4x8_sse2 + +unsigned int vpx_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance64x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vpx_variance64x32 vpx_variance64x32_sse2 + +unsigned int vpx_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vpx_variance64x64 vpx_variance64x64_sse2 + +unsigned int vpx_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance8x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vpx_variance8x16 vpx_variance8x16_sse2 + +unsigned int vpx_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance8x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vpx_variance8x4 vpx_variance8x4_sse2 + +unsigned int vpx_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance8x8_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vpx_variance8x8 vpx_variance8x8_sse2 + +void vpx_dsp_rtcd(void); + +#ifdef RTCD_C +#include "vpx_ports/x86.h" +static void setup_rtcd_internal(void) +{ + int flags = x86_simd_caps(); + + (void)flags; + + vpx_sad16x16x3 = vpx_sad16x16x3_c; + if (flags & HAS_SSE3) vpx_sad16x16x3 = vpx_sad16x16x3_sse3; + if (flags & HAS_SSSE3) vpx_sad16x16x3 = vpx_sad16x16x3_ssse3; + vpx_sad16x16x8 = vpx_sad16x16x8_c; + if (flags & HAS_SSE4_1) vpx_sad16x16x8 = vpx_sad16x16x8_sse4_1; + vpx_sad16x8x3 = vpx_sad16x8x3_c; + if (flags & HAS_SSE3) vpx_sad16x8x3 = vpx_sad16x8x3_sse3; + if (flags & HAS_SSSE3) vpx_sad16x8x3 = vpx_sad16x8x3_ssse3; + vpx_sad16x8x8 = vpx_sad16x8x8_c; + if (flags & HAS_SSE4_1) vpx_sad16x8x8 = vpx_sad16x8x8_sse4_1; + vpx_sad4x4x3 = vpx_sad4x4x3_c; + if (flags & HAS_SSE3) vpx_sad4x4x3 = vpx_sad4x4x3_sse3; + vpx_sad4x4x8 = vpx_sad4x4x8_c; + if (flags & HAS_SSE4_1) vpx_sad4x4x8 = vpx_sad4x4x8_sse4_1; + vpx_sad8x16x3 = vpx_sad8x16x3_c; + if (flags & HAS_SSE3) vpx_sad8x16x3 = vpx_sad8x16x3_sse3; + vpx_sad8x16x8 = vpx_sad8x16x8_c; + if (flags & HAS_SSE4_1) vpx_sad8x16x8 = vpx_sad8x16x8_sse4_1; + vpx_sad8x8x3 = vpx_sad8x8x3_c; + if (flags & HAS_SSE3) vpx_sad8x8x3 = vpx_sad8x8x3_sse3; + vpx_sad8x8x8 = vpx_sad8x8x8_c; + if (flags & HAS_SSE4_1) vpx_sad8x8x8 = vpx_sad8x8x8_sse4_1; +} +#endif + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif diff --git a/media/libvpx/vpx_dsp_rtcd_x86_64-win64-gcc.h b/media/libvpx/vpx_dsp_rtcd_x86_64-win64-gcc.h new file mode 100644 index 00000000000..d93c56eb765 --- /dev/null +++ b/media/libvpx/vpx_dsp_rtcd_x86_64-win64-gcc.h @@ -0,0 +1,432 @@ +#ifndef VPX_DSP_RTCD_H_ +#define VPX_DSP_RTCD_H_ + +#ifdef RTCD_C +#define RTCD_EXTERN +#else +#define RTCD_EXTERN extern +#endif + +/* + * DSP + */ + +#include "vpx/vpx_integer.h" + + +#ifdef __cplusplus +extern "C" { +#endif + +void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride); +#define vpx_comp_avg_pred vpx_comp_avg_pred_c + +void vpx_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +void vpx_get16x16var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +void vpx_get16x16var_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +RTCD_EXTERN void (*vpx_get16x16var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); + +unsigned int vpx_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride); +#define vpx_get4x4sse_cs vpx_get4x4sse_cs_c + +void vpx_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +void vpx_get8x8var_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +void vpx_get8x8var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +#define vpx_get8x8var vpx_get8x8var_sse2 + +unsigned int vpx_get_mb_ss_c(const int16_t *); +unsigned int vpx_get_mb_ss_mmx(const int16_t *); +unsigned int vpx_get_mb_ss_sse2(const int16_t *); +#define vpx_get_mb_ss vpx_get_mb_ss_sse2 + +unsigned int vpx_mse16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +unsigned int vpx_mse16x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +unsigned int vpx_mse16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +unsigned int vpx_mse16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_mse16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); + +unsigned int vpx_mse16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +unsigned int vpx_mse16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +#define vpx_mse16x8 vpx_mse16x8_sse2 + +unsigned int vpx_mse8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +unsigned int vpx_mse8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +#define vpx_mse8x16 vpx_mse8x16_sse2 + +unsigned int vpx_mse8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +unsigned int vpx_mse8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +#define vpx_mse8x8 vpx_mse8x8_sse2 + +unsigned int vpx_sad16x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad16x16_mmx(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad16x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define vpx_sad16x16 vpx_sad16x16_sse2 + +unsigned int vpx_sad16x16_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad16x16_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad16x16_avg vpx_sad16x16_avg_sse2 + +void vpx_sad16x16x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad16x16x3_sse3(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad16x16x3_ssse3(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad16x16x3)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +void vpx_sad16x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad16x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad16x16x4d vpx_sad16x16x4d_sse2 + +void vpx_sad16x16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad16x16x8_sse4_1(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad16x16x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +unsigned int vpx_sad16x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad16x32_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define vpx_sad16x32 vpx_sad16x32_sse2 + +unsigned int vpx_sad16x32_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad16x32_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad16x32_avg vpx_sad16x32_avg_sse2 + +void vpx_sad16x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad16x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad16x32x4d vpx_sad16x32x4d_sse2 + +unsigned int vpx_sad16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad16x8_mmx(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad16x8_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define vpx_sad16x8 vpx_sad16x8_sse2 + +unsigned int vpx_sad16x8_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad16x8_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad16x8_avg vpx_sad16x8_avg_sse2 + +void vpx_sad16x8x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad16x8x3_sse3(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad16x8x3_ssse3(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad16x8x3)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +void vpx_sad16x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad16x8x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad16x8x4d vpx_sad16x8x4d_sse2 + +void vpx_sad16x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad16x8x8_sse4_1(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad16x8x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +unsigned int vpx_sad32x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad32x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad32x16_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad32x16)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int vpx_sad32x16_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad32x16_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad32x16_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad32x16_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); + +void vpx_sad32x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad32x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad32x16x4d vpx_sad32x16x4d_sse2 + +unsigned int vpx_sad32x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad32x32_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad32x32_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad32x32)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int vpx_sad32x32_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad32x32_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad32x32_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad32x32_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); + +void vpx_sad32x32x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad32x32x3 vpx_sad32x32x3_c + +void vpx_sad32x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad32x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad32x32x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad32x32x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); + +void vpx_sad32x32x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad32x32x8 vpx_sad32x32x8_c + +unsigned int vpx_sad32x64_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad32x64_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad32x64_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad32x64)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int vpx_sad32x64_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad32x64_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad32x64_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad32x64_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); + +void vpx_sad32x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad32x64x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad32x64x4d vpx_sad32x64x4d_sse2 + +unsigned int vpx_sad4x4_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad4x4_mmx(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad4x4_sse(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define vpx_sad4x4 vpx_sad4x4_sse + +unsigned int vpx_sad4x4_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad4x4_avg_sse(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad4x4_avg vpx_sad4x4_avg_sse + +void vpx_sad4x4x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad4x4x3_sse3(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad4x4x3)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +void vpx_sad4x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad4x4x4d_sse(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad4x4x4d vpx_sad4x4x4d_sse + +void vpx_sad4x4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad4x4x8_sse4_1(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad4x4x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +unsigned int vpx_sad4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad4x8_sse(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define vpx_sad4x8 vpx_sad4x8_sse + +unsigned int vpx_sad4x8_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad4x8_avg_sse(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad4x8_avg vpx_sad4x8_avg_sse + +void vpx_sad4x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad4x8x4d_sse(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad4x8x4d vpx_sad4x8x4d_sse + +void vpx_sad4x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad4x8x8 vpx_sad4x8x8_c + +unsigned int vpx_sad64x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad64x32_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad64x32_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad64x32)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int vpx_sad64x32_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad64x32_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad64x32_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad64x32_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); + +void vpx_sad64x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad64x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad64x32x4d vpx_sad64x32x4d_sse2 + +unsigned int vpx_sad64x64_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad64x64_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad64x64_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad64x64)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int vpx_sad64x64_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad64x64_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad64x64_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad64x64_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); + +void vpx_sad64x64x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad64x64x3 vpx_sad64x64x3_c + +void vpx_sad64x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad64x64x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad64x64x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad64x64x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); + +void vpx_sad64x64x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad64x64x8 vpx_sad64x64x8_c + +unsigned int vpx_sad8x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad8x16_mmx(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad8x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define vpx_sad8x16 vpx_sad8x16_sse2 + +unsigned int vpx_sad8x16_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad8x16_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad8x16_avg vpx_sad8x16_avg_sse2 + +void vpx_sad8x16x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad8x16x3_sse3(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad8x16x3)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +void vpx_sad8x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad8x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad8x16x4d vpx_sad8x16x4d_sse2 + +void vpx_sad8x16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad8x16x8_sse4_1(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad8x16x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +unsigned int vpx_sad8x4_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad8x4_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define vpx_sad8x4 vpx_sad8x4_sse2 + +unsigned int vpx_sad8x4_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad8x4_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad8x4_avg vpx_sad8x4_avg_sse2 + +void vpx_sad8x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad8x4x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad8x4x4d vpx_sad8x4x4d_sse2 + +void vpx_sad8x4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad8x4x8 vpx_sad8x4x8_c + +unsigned int vpx_sad8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad8x8_mmx(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad8x8_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define vpx_sad8x8 vpx_sad8x8_sse2 + +unsigned int vpx_sad8x8_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad8x8_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad8x8_avg vpx_sad8x8_avg_sse2 + +void vpx_sad8x8x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad8x8x3_sse3(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad8x8x3)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +void vpx_sad8x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad8x8x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad8x8x4d vpx_sad8x8x4d_sse2 + +void vpx_sad8x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad8x8x8_sse4_1(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad8x8x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +unsigned int vpx_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance16x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance16x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vpx_variance16x32 vpx_variance16x32_sse2 + +unsigned int vpx_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance16x8_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vpx_variance16x8 vpx_variance16x8_sse2 + +unsigned int vpx_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance32x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance32x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance32x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance32x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance32x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vpx_variance32x64 vpx_variance32x64_sse2 + +unsigned int vpx_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance4x4_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance4x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vpx_variance4x4 vpx_variance4x4_sse2 + +unsigned int vpx_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance4x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vpx_variance4x8 vpx_variance4x8_sse2 + +unsigned int vpx_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance64x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance64x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance64x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance64x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance8x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vpx_variance8x16 vpx_variance8x16_sse2 + +unsigned int vpx_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance8x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vpx_variance8x4 vpx_variance8x4_sse2 + +unsigned int vpx_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance8x8_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vpx_variance8x8 vpx_variance8x8_sse2 + +void vpx_dsp_rtcd(void); + +#ifdef RTCD_C +#include "vpx_ports/x86.h" +static void setup_rtcd_internal(void) +{ + int flags = x86_simd_caps(); + + (void)flags; + + vpx_get16x16var = vpx_get16x16var_sse2; + if (flags & HAS_AVX2) vpx_get16x16var = vpx_get16x16var_avx2; + vpx_mse16x16 = vpx_mse16x16_sse2; + if (flags & HAS_AVX2) vpx_mse16x16 = vpx_mse16x16_avx2; + vpx_sad16x16x3 = vpx_sad16x16x3_c; + if (flags & HAS_SSE3) vpx_sad16x16x3 = vpx_sad16x16x3_sse3; + if (flags & HAS_SSSE3) vpx_sad16x16x3 = vpx_sad16x16x3_ssse3; + vpx_sad16x16x8 = vpx_sad16x16x8_c; + if (flags & HAS_SSE4_1) vpx_sad16x16x8 = vpx_sad16x16x8_sse4_1; + vpx_sad16x8x3 = vpx_sad16x8x3_c; + if (flags & HAS_SSE3) vpx_sad16x8x3 = vpx_sad16x8x3_sse3; + if (flags & HAS_SSSE3) vpx_sad16x8x3 = vpx_sad16x8x3_ssse3; + vpx_sad16x8x8 = vpx_sad16x8x8_c; + if (flags & HAS_SSE4_1) vpx_sad16x8x8 = vpx_sad16x8x8_sse4_1; + vpx_sad32x16 = vpx_sad32x16_sse2; + if (flags & HAS_AVX2) vpx_sad32x16 = vpx_sad32x16_avx2; + vpx_sad32x16_avg = vpx_sad32x16_avg_sse2; + if (flags & HAS_AVX2) vpx_sad32x16_avg = vpx_sad32x16_avg_avx2; + vpx_sad32x32 = vpx_sad32x32_sse2; + if (flags & HAS_AVX2) vpx_sad32x32 = vpx_sad32x32_avx2; + vpx_sad32x32_avg = vpx_sad32x32_avg_sse2; + if (flags & HAS_AVX2) vpx_sad32x32_avg = vpx_sad32x32_avg_avx2; + vpx_sad32x32x4d = vpx_sad32x32x4d_sse2; + if (flags & HAS_AVX2) vpx_sad32x32x4d = vpx_sad32x32x4d_avx2; + vpx_sad32x64 = vpx_sad32x64_sse2; + if (flags & HAS_AVX2) vpx_sad32x64 = vpx_sad32x64_avx2; + vpx_sad32x64_avg = vpx_sad32x64_avg_sse2; + if (flags & HAS_AVX2) vpx_sad32x64_avg = vpx_sad32x64_avg_avx2; + vpx_sad4x4x3 = vpx_sad4x4x3_c; + if (flags & HAS_SSE3) vpx_sad4x4x3 = vpx_sad4x4x3_sse3; + vpx_sad4x4x8 = vpx_sad4x4x8_c; + if (flags & HAS_SSE4_1) vpx_sad4x4x8 = vpx_sad4x4x8_sse4_1; + vpx_sad64x32 = vpx_sad64x32_sse2; + if (flags & HAS_AVX2) vpx_sad64x32 = vpx_sad64x32_avx2; + vpx_sad64x32_avg = vpx_sad64x32_avg_sse2; + if (flags & HAS_AVX2) vpx_sad64x32_avg = vpx_sad64x32_avg_avx2; + vpx_sad64x64 = vpx_sad64x64_sse2; + if (flags & HAS_AVX2) vpx_sad64x64 = vpx_sad64x64_avx2; + vpx_sad64x64_avg = vpx_sad64x64_avg_sse2; + if (flags & HAS_AVX2) vpx_sad64x64_avg = vpx_sad64x64_avg_avx2; + vpx_sad64x64x4d = vpx_sad64x64x4d_sse2; + if (flags & HAS_AVX2) vpx_sad64x64x4d = vpx_sad64x64x4d_avx2; + vpx_sad8x16x3 = vpx_sad8x16x3_c; + if (flags & HAS_SSE3) vpx_sad8x16x3 = vpx_sad8x16x3_sse3; + vpx_sad8x16x8 = vpx_sad8x16x8_c; + if (flags & HAS_SSE4_1) vpx_sad8x16x8 = vpx_sad8x16x8_sse4_1; + vpx_sad8x8x3 = vpx_sad8x8x3_c; + if (flags & HAS_SSE3) vpx_sad8x8x3 = vpx_sad8x8x3_sse3; + vpx_sad8x8x8 = vpx_sad8x8x8_c; + if (flags & HAS_SSE4_1) vpx_sad8x8x8 = vpx_sad8x8x8_sse4_1; + vpx_variance16x16 = vpx_variance16x16_sse2; + if (flags & HAS_AVX2) vpx_variance16x16 = vpx_variance16x16_avx2; + vpx_variance32x16 = vpx_variance32x16_sse2; + if (flags & HAS_AVX2) vpx_variance32x16 = vpx_variance32x16_avx2; + vpx_variance32x32 = vpx_variance32x32_sse2; + if (flags & HAS_AVX2) vpx_variance32x32 = vpx_variance32x32_avx2; + vpx_variance64x32 = vpx_variance64x32_sse2; + if (flags & HAS_AVX2) vpx_variance64x32 = vpx_variance64x32_avx2; + vpx_variance64x64 = vpx_variance64x64_sse2; + if (flags & HAS_AVX2) vpx_variance64x64 = vpx_variance64x64_avx2; +} +#endif + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif diff --git a/media/libvpx/vpx_dsp_rtcd_x86_64-win64-vs12.h b/media/libvpx/vpx_dsp_rtcd_x86_64-win64-vs12.h new file mode 100644 index 00000000000..d93c56eb765 --- /dev/null +++ b/media/libvpx/vpx_dsp_rtcd_x86_64-win64-vs12.h @@ -0,0 +1,432 @@ +#ifndef VPX_DSP_RTCD_H_ +#define VPX_DSP_RTCD_H_ + +#ifdef RTCD_C +#define RTCD_EXTERN +#else +#define RTCD_EXTERN extern +#endif + +/* + * DSP + */ + +#include "vpx/vpx_integer.h" + + +#ifdef __cplusplus +extern "C" { +#endif + +void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride); +#define vpx_comp_avg_pred vpx_comp_avg_pred_c + +void vpx_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +void vpx_get16x16var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +void vpx_get16x16var_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +RTCD_EXTERN void (*vpx_get16x16var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); + +unsigned int vpx_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride); +#define vpx_get4x4sse_cs vpx_get4x4sse_cs_c + +void vpx_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +void vpx_get8x8var_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +void vpx_get8x8var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +#define vpx_get8x8var vpx_get8x8var_sse2 + +unsigned int vpx_get_mb_ss_c(const int16_t *); +unsigned int vpx_get_mb_ss_mmx(const int16_t *); +unsigned int vpx_get_mb_ss_sse2(const int16_t *); +#define vpx_get_mb_ss vpx_get_mb_ss_sse2 + +unsigned int vpx_mse16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +unsigned int vpx_mse16x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +unsigned int vpx_mse16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +unsigned int vpx_mse16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_mse16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); + +unsigned int vpx_mse16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +unsigned int vpx_mse16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +#define vpx_mse16x8 vpx_mse16x8_sse2 + +unsigned int vpx_mse8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +unsigned int vpx_mse8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +#define vpx_mse8x16 vpx_mse8x16_sse2 + +unsigned int vpx_mse8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +unsigned int vpx_mse8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +#define vpx_mse8x8 vpx_mse8x8_sse2 + +unsigned int vpx_sad16x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad16x16_mmx(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad16x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define vpx_sad16x16 vpx_sad16x16_sse2 + +unsigned int vpx_sad16x16_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad16x16_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad16x16_avg vpx_sad16x16_avg_sse2 + +void vpx_sad16x16x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad16x16x3_sse3(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad16x16x3_ssse3(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad16x16x3)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +void vpx_sad16x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad16x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad16x16x4d vpx_sad16x16x4d_sse2 + +void vpx_sad16x16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad16x16x8_sse4_1(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad16x16x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +unsigned int vpx_sad16x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad16x32_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define vpx_sad16x32 vpx_sad16x32_sse2 + +unsigned int vpx_sad16x32_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad16x32_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad16x32_avg vpx_sad16x32_avg_sse2 + +void vpx_sad16x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad16x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad16x32x4d vpx_sad16x32x4d_sse2 + +unsigned int vpx_sad16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad16x8_mmx(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad16x8_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define vpx_sad16x8 vpx_sad16x8_sse2 + +unsigned int vpx_sad16x8_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad16x8_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad16x8_avg vpx_sad16x8_avg_sse2 + +void vpx_sad16x8x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad16x8x3_sse3(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad16x8x3_ssse3(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad16x8x3)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +void vpx_sad16x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad16x8x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad16x8x4d vpx_sad16x8x4d_sse2 + +void vpx_sad16x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad16x8x8_sse4_1(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad16x8x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +unsigned int vpx_sad32x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad32x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad32x16_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad32x16)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int vpx_sad32x16_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad32x16_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad32x16_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad32x16_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); + +void vpx_sad32x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad32x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad32x16x4d vpx_sad32x16x4d_sse2 + +unsigned int vpx_sad32x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad32x32_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad32x32_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad32x32)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int vpx_sad32x32_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad32x32_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad32x32_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad32x32_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); + +void vpx_sad32x32x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad32x32x3 vpx_sad32x32x3_c + +void vpx_sad32x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad32x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad32x32x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad32x32x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); + +void vpx_sad32x32x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad32x32x8 vpx_sad32x32x8_c + +unsigned int vpx_sad32x64_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad32x64_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad32x64_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad32x64)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int vpx_sad32x64_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad32x64_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad32x64_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad32x64_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); + +void vpx_sad32x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad32x64x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad32x64x4d vpx_sad32x64x4d_sse2 + +unsigned int vpx_sad4x4_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad4x4_mmx(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad4x4_sse(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define vpx_sad4x4 vpx_sad4x4_sse + +unsigned int vpx_sad4x4_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad4x4_avg_sse(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad4x4_avg vpx_sad4x4_avg_sse + +void vpx_sad4x4x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad4x4x3_sse3(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad4x4x3)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +void vpx_sad4x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad4x4x4d_sse(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad4x4x4d vpx_sad4x4x4d_sse + +void vpx_sad4x4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad4x4x8_sse4_1(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad4x4x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +unsigned int vpx_sad4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad4x8_sse(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define vpx_sad4x8 vpx_sad4x8_sse + +unsigned int vpx_sad4x8_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad4x8_avg_sse(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad4x8_avg vpx_sad4x8_avg_sse + +void vpx_sad4x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad4x8x4d_sse(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad4x8x4d vpx_sad4x8x4d_sse + +void vpx_sad4x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad4x8x8 vpx_sad4x8x8_c + +unsigned int vpx_sad64x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad64x32_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad64x32_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad64x32)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int vpx_sad64x32_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad64x32_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad64x32_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad64x32_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); + +void vpx_sad64x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad64x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad64x32x4d vpx_sad64x32x4d_sse2 + +unsigned int vpx_sad64x64_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad64x64_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad64x64_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad64x64)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int vpx_sad64x64_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad64x64_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad64x64_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad64x64_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); + +void vpx_sad64x64x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad64x64x3 vpx_sad64x64x3_c + +void vpx_sad64x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad64x64x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad64x64x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad64x64x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); + +void vpx_sad64x64x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad64x64x8 vpx_sad64x64x8_c + +unsigned int vpx_sad8x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad8x16_mmx(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad8x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define vpx_sad8x16 vpx_sad8x16_sse2 + +unsigned int vpx_sad8x16_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad8x16_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad8x16_avg vpx_sad8x16_avg_sse2 + +void vpx_sad8x16x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad8x16x3_sse3(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad8x16x3)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +void vpx_sad8x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad8x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad8x16x4d vpx_sad8x16x4d_sse2 + +void vpx_sad8x16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad8x16x8_sse4_1(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad8x16x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +unsigned int vpx_sad8x4_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad8x4_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define vpx_sad8x4 vpx_sad8x4_sse2 + +unsigned int vpx_sad8x4_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad8x4_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad8x4_avg vpx_sad8x4_avg_sse2 + +void vpx_sad8x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad8x4x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad8x4x4d vpx_sad8x4x4d_sse2 + +void vpx_sad8x4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vpx_sad8x4x8 vpx_sad8x4x8_c + +unsigned int vpx_sad8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad8x8_mmx(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad8x8_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define vpx_sad8x8 vpx_sad8x8_sse2 + +unsigned int vpx_sad8x8_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad8x8_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define vpx_sad8x8_avg vpx_sad8x8_avg_sse2 + +void vpx_sad8x8x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad8x8x3_sse3(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad8x8x3)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +void vpx_sad8x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad8x8x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define vpx_sad8x8x4d vpx_sad8x8x4d_sse2 + +void vpx_sad8x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void vpx_sad8x8x8_sse4_1(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad8x8x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +unsigned int vpx_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance16x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance16x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vpx_variance16x32 vpx_variance16x32_sse2 + +unsigned int vpx_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance16x8_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vpx_variance16x8 vpx_variance16x8_sse2 + +unsigned int vpx_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance32x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance32x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance32x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance32x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance32x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vpx_variance32x64 vpx_variance32x64_sse2 + +unsigned int vpx_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance4x4_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance4x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vpx_variance4x4 vpx_variance4x4_sse2 + +unsigned int vpx_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance4x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vpx_variance4x8 vpx_variance4x8_sse2 + +unsigned int vpx_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance64x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance64x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance64x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance64x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance8x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vpx_variance8x16 vpx_variance8x16_sse2 + +unsigned int vpx_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance8x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vpx_variance8x4 vpx_variance8x4_sse2 + +unsigned int vpx_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance8x8_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vpx_variance8x8 vpx_variance8x8_sse2 + +void vpx_dsp_rtcd(void); + +#ifdef RTCD_C +#include "vpx_ports/x86.h" +static void setup_rtcd_internal(void) +{ + int flags = x86_simd_caps(); + + (void)flags; + + vpx_get16x16var = vpx_get16x16var_sse2; + if (flags & HAS_AVX2) vpx_get16x16var = vpx_get16x16var_avx2; + vpx_mse16x16 = vpx_mse16x16_sse2; + if (flags & HAS_AVX2) vpx_mse16x16 = vpx_mse16x16_avx2; + vpx_sad16x16x3 = vpx_sad16x16x3_c; + if (flags & HAS_SSE3) vpx_sad16x16x3 = vpx_sad16x16x3_sse3; + if (flags & HAS_SSSE3) vpx_sad16x16x3 = vpx_sad16x16x3_ssse3; + vpx_sad16x16x8 = vpx_sad16x16x8_c; + if (flags & HAS_SSE4_1) vpx_sad16x16x8 = vpx_sad16x16x8_sse4_1; + vpx_sad16x8x3 = vpx_sad16x8x3_c; + if (flags & HAS_SSE3) vpx_sad16x8x3 = vpx_sad16x8x3_sse3; + if (flags & HAS_SSSE3) vpx_sad16x8x3 = vpx_sad16x8x3_ssse3; + vpx_sad16x8x8 = vpx_sad16x8x8_c; + if (flags & HAS_SSE4_1) vpx_sad16x8x8 = vpx_sad16x8x8_sse4_1; + vpx_sad32x16 = vpx_sad32x16_sse2; + if (flags & HAS_AVX2) vpx_sad32x16 = vpx_sad32x16_avx2; + vpx_sad32x16_avg = vpx_sad32x16_avg_sse2; + if (flags & HAS_AVX2) vpx_sad32x16_avg = vpx_sad32x16_avg_avx2; + vpx_sad32x32 = vpx_sad32x32_sse2; + if (flags & HAS_AVX2) vpx_sad32x32 = vpx_sad32x32_avx2; + vpx_sad32x32_avg = vpx_sad32x32_avg_sse2; + if (flags & HAS_AVX2) vpx_sad32x32_avg = vpx_sad32x32_avg_avx2; + vpx_sad32x32x4d = vpx_sad32x32x4d_sse2; + if (flags & HAS_AVX2) vpx_sad32x32x4d = vpx_sad32x32x4d_avx2; + vpx_sad32x64 = vpx_sad32x64_sse2; + if (flags & HAS_AVX2) vpx_sad32x64 = vpx_sad32x64_avx2; + vpx_sad32x64_avg = vpx_sad32x64_avg_sse2; + if (flags & HAS_AVX2) vpx_sad32x64_avg = vpx_sad32x64_avg_avx2; + vpx_sad4x4x3 = vpx_sad4x4x3_c; + if (flags & HAS_SSE3) vpx_sad4x4x3 = vpx_sad4x4x3_sse3; + vpx_sad4x4x8 = vpx_sad4x4x8_c; + if (flags & HAS_SSE4_1) vpx_sad4x4x8 = vpx_sad4x4x8_sse4_1; + vpx_sad64x32 = vpx_sad64x32_sse2; + if (flags & HAS_AVX2) vpx_sad64x32 = vpx_sad64x32_avx2; + vpx_sad64x32_avg = vpx_sad64x32_avg_sse2; + if (flags & HAS_AVX2) vpx_sad64x32_avg = vpx_sad64x32_avg_avx2; + vpx_sad64x64 = vpx_sad64x64_sse2; + if (flags & HAS_AVX2) vpx_sad64x64 = vpx_sad64x64_avx2; + vpx_sad64x64_avg = vpx_sad64x64_avg_sse2; + if (flags & HAS_AVX2) vpx_sad64x64_avg = vpx_sad64x64_avg_avx2; + vpx_sad64x64x4d = vpx_sad64x64x4d_sse2; + if (flags & HAS_AVX2) vpx_sad64x64x4d = vpx_sad64x64x4d_avx2; + vpx_sad8x16x3 = vpx_sad8x16x3_c; + if (flags & HAS_SSE3) vpx_sad8x16x3 = vpx_sad8x16x3_sse3; + vpx_sad8x16x8 = vpx_sad8x16x8_c; + if (flags & HAS_SSE4_1) vpx_sad8x16x8 = vpx_sad8x16x8_sse4_1; + vpx_sad8x8x3 = vpx_sad8x8x3_c; + if (flags & HAS_SSE3) vpx_sad8x8x3 = vpx_sad8x8x3_sse3; + vpx_sad8x8x8 = vpx_sad8x8x8_c; + if (flags & HAS_SSE4_1) vpx_sad8x8x8 = vpx_sad8x8x8_sse4_1; + vpx_variance16x16 = vpx_variance16x16_sse2; + if (flags & HAS_AVX2) vpx_variance16x16 = vpx_variance16x16_avx2; + vpx_variance32x16 = vpx_variance32x16_sse2; + if (flags & HAS_AVX2) vpx_variance32x16 = vpx_variance32x16_avx2; + vpx_variance32x32 = vpx_variance32x32_sse2; + if (flags & HAS_AVX2) vpx_variance32x32 = vpx_variance32x32_avx2; + vpx_variance64x32 = vpx_variance64x32_sse2; + if (flags & HAS_AVX2) vpx_variance64x32 = vpx_variance64x32_avx2; + vpx_variance64x64 = vpx_variance64x64_sse2; + if (flags & HAS_AVX2) vpx_variance64x64 = vpx_variance64x64_avx2; +} +#endif + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif diff --git a/media/libvpx/vpx_mem/include/vpx_mem_intrnl.h b/media/libvpx/vpx_mem/include/vpx_mem_intrnl.h index 225a3babfe3..c4dd78550f3 100644 --- a/media/libvpx/vpx_mem/include/vpx_mem_intrnl.h +++ b/media/libvpx/vpx_mem/include/vpx_mem_intrnl.h @@ -13,35 +13,6 @@ #define VPX_MEM_INCLUDE_VPX_MEM_INTRNL_H_ #include "./vpx_config.h" -#ifndef CONFIG_MEM_MANAGER -# if defined(VXWORKS) -# define CONFIG_MEM_MANAGER 1 /*include heap manager functionality,*/ -/*default: enabled on vxworks*/ -# else -# define CONFIG_MEM_MANAGER 0 /*include heap manager functionality*/ -# endif -#endif /*CONFIG_MEM_MANAGER*/ - -#ifndef CONFIG_MEM_TRACKER -# define CONFIG_MEM_TRACKER 1 /*include xvpx_* calls in the lib*/ -#endif - -#ifndef CONFIG_MEM_CHECKS -# define CONFIG_MEM_CHECKS 0 /*include some basic safety checks in -vpx_memcpy, _memset, and _memmove*/ -#endif - -#ifndef USE_GLOBAL_FUNCTION_POINTERS -# define USE_GLOBAL_FUNCTION_POINTERS 0 /*use function pointers instead of compiled functions.*/ -#endif - -#if CONFIG_MEM_TRACKER -# include "vpx_mem_tracker.h" -# if VPX_MEM_TRACKER_VERSION_CHIEF != 2 || VPX_MEM_TRACKER_VERSION_MAJOR != 5 -# error "vpx_mem requires memory tracker version 2.5 to track memory usage" -# endif -#endif - #define ADDRESS_STORAGE_SIZE sizeof(size_t) #ifndef DEFAULT_ALIGNMENT @@ -54,41 +25,6 @@ than vpx_memalign*/ # endif #endif -#if CONFIG_MEM_TRACKER -# define TRY_BOUNDS_CHECK 1 /*when set to 1 pads each allocation, -integrity can be checked using -vpx_memory_tracker_check_integrity -or on free by defining*/ -/*TRY_BOUNDS_CHECK_ON_FREE*/ -#else -# define TRY_BOUNDS_CHECK 0 -#endif /*CONFIG_MEM_TRACKER*/ - -#if TRY_BOUNDS_CHECK -# define TRY_BOUNDS_CHECK_ON_FREE 0 /*checks mem integrity on every -free, very expensive*/ -# define BOUNDS_CHECK_VALUE 0xdeadbeef /*value stored before/after ea. -mem addr for bounds checking*/ -# define BOUNDS_CHECK_PAD_SIZE 32 /*size of the padding before and -after ea allocation to be filled -with BOUNDS_CHECK_VALUE. -this should be a multiple of 4*/ -#else -# define BOUNDS_CHECK_VALUE 0 -# define BOUNDS_CHECK_PAD_SIZE 0 -#endif /*TRY_BOUNDS_CHECK*/ - -#ifndef REMOVE_PRINTFS -# define REMOVE_PRINTFS 0 -#endif - -/* Should probably use a vpx_mem logger function. */ -#if REMOVE_PRINTFS -# define _P(x) -#else -# define _P(x) x -#endif - /*returns an addr aligned to the byte boundary specified by align*/ #define align_addr(addr,align) (void*)(((size_t)(addr) + ((align) - 1)) & (size_t)-(align)) diff --git a/media/libvpx/vpx_mem/include/vpx_mem_tracker.h b/media/libvpx/vpx_mem/include/vpx_mem_tracker.h deleted file mode 100644 index 1335e0017b3..00000000000 --- a/media/libvpx/vpx_mem/include/vpx_mem_tracker.h +++ /dev/null @@ -1,179 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VPX_MEM_INCLUDE_VPX_MEM_TRACKER_H_ -#define VPX_MEM_INCLUDE_VPX_MEM_TRACKER_H_ - -/* vpx_mem_tracker version info */ -#define vpx_mem_tracker_version "2.5.1.1" - -#define VPX_MEM_TRACKER_VERSION_CHIEF 2 -#define VPX_MEM_TRACKER_VERSION_MAJOR 5 -#define VPX_MEM_TRACKER_VERSION_MINOR 1 -#define VPX_MEM_TRACKER_VERSION_PATCH 1 -/* END - vpx_mem_tracker version info */ - -#include - -struct mem_block { - size_t addr; - unsigned int size, - line; - char *file; - struct mem_block *prev, - * next; - - int padded; // This mem_block has padding for integrity checks. - // As of right now, this should only be 0 if - // using vpx_mem_alloc to allocate cache memory. - // 2005-01-11 tjf -}; - -#if defined(__cplusplus) -extern "C" { -#endif - - /* - vpx_memory_tracker_init(int padding_size, int pad_value) - padding_size - the size of the padding before and after each mem addr. - Values > 0 indicate that integrity checks can be performed - by inspecting these areas. - pad_value - the initial value within the padding area before and after - each mem addr. - - Initializes the memory tracker interface. Should be called before any - other calls to the memory tracker. - */ - int vpx_memory_tracker_init(int padding_size, int pad_value); - - /* - vpx_memory_tracker_destroy() - Deinitializes the memory tracker interface - */ - void vpx_memory_tracker_destroy(); - - /* - vpx_memory_tracker_add(size_t addr, unsigned int size, - char * file, unsigned int line) - addr - memory address to be added to list - size - size of addr - file - the file addr was referenced from - line - the line in file addr was referenced from - Adds memory address addr, it's size, file and line it came from - to the memory tracker allocation table - */ - void vpx_memory_tracker_add(size_t addr, unsigned int size, - char *file, unsigned int line, - int padded); - - /* - vpx_memory_tracker_add(size_t addr, unsigned int size, char * file, unsigned int line) - addr - memory address to be added to be removed - padded - if 0, disables bounds checking on this memory block even if bounds - checking is enabled. (for example, when allocating cache memory, we still want - to check for memory leaks, but we do not waste cache space for bounds check padding) - Removes the specified address from the memory tracker's allocation - table - Return: - 0: on success - -1: if memory allocation table's mutex could not be locked - -2: if the addr was not found in the list - */ - int vpx_memory_tracker_remove(size_t addr); - - /* - vpx_memory_tracker_find(unsigned int addr) - addr - address to be found in the memory tracker's - allocation table - Return: - If found, pointer to the memory block that matches addr - NULL otherwise - */ - struct mem_block *vpx_memory_tracker_find(size_t addr); - - /* - vpx_memory_tracker_dump() - Dumps the current contents of the memory - tracker allocation table - */ - void vpx_memory_tracker_dump(); - - /* - vpx_memory_tracker_check_integrity() - If a padding_size was provided to vpx_memory_tracker_init() - This function will verify that the region before and after each - memory address contains the specified pad_value. Should the check - fail, the filename and line of the check will be printed out. - */ - void vpx_memory_tracker_check_integrity(char *file, unsigned int line); - - /* - vpx_memory_tracker_set_log_type - type - value representing the logging type to use - option - type specific option. This will be interpreted differently - based on the type. - Sets the logging type for the memory tracker. - Values currently supported: - 0: if option is NULL, log to stderr, otherwise interpret option as a - filename and attempt to open it. - 1: Use output_debug_string (WIN32 only), option ignored - Return: - 0: on success - -1: if the logging type could not be set, because the value was invalid - or because a file could not be opened - */ - int vpx_memory_tracker_set_log_type(int type, char *option); - - /* - vpx_memory_tracker_set_log_func - userdata - ptr to be passed to the supplied logfunc, can be NULL - logfunc - the logging function to be used to output data from - vpx_memory_track_dump/check_integrity - Sets a logging function to be used by the memory tracker. - Return: - 0: on success - -1: if the logging type could not be set because logfunc was NULL - */ - int vpx_memory_tracker_set_log_func(void *userdata, - void(*logfunc)(void *userdata, - const char *fmt, va_list args)); - - /* Wrappers to standard library functions. */ - typedef void *(* mem_track_malloc_func)(size_t); - typedef void *(* mem_track_calloc_func)(size_t, size_t); - typedef void *(* mem_track_realloc_func)(void *, size_t); - typedef void (* mem_track_free_func)(void *); - typedef void *(* mem_track_memcpy_func)(void *, const void *, size_t); - typedef void *(* mem_track_memset_func)(void *, int, size_t); - typedef void *(* mem_track_memmove_func)(void *, const void *, size_t); - - /* - vpx_memory_tracker_set_functions - - Sets the function pointers for the standard library functions. - - Return: - 0: on success - -1: if the use global function pointers is not set. - */ - int vpx_memory_tracker_set_functions(mem_track_malloc_func g_malloc_l -, mem_track_calloc_func g_calloc_l -, mem_track_realloc_func g_realloc_l -, mem_track_free_func g_free_l -, mem_track_memcpy_func g_memcpy_l -, mem_track_memset_func g_memset_l -, mem_track_memmove_func g_memmove_l); - -#if defined(__cplusplus) -} -#endif - -#endif // VPX_MEM_INCLUDE_VPX_MEM_TRACKER_H_ diff --git a/media/libvpx/vpx_mem/vpx_mem.c b/media/libvpx/vpx_mem/vpx_mem.c index da616425c69..b60d7319cc3 100644 --- a/media/libvpx/vpx_mem/vpx_mem.c +++ b/media/libvpx/vpx_mem/vpx_mem.c @@ -18,113 +18,11 @@ #include "include/vpx_mem_intrnl.h" #include "vpx/vpx_integer.h" -#if CONFIG_MEM_TRACKER -#ifndef VPX_NO_GLOBALS -static unsigned long g_alloc_count = 0; -#else -#include "vpx_global_handling.h" -#define g_alloc_count vpxglobalm(vpxmem,g_alloc_count) -#endif -#endif - -#if CONFIG_MEM_MANAGER -# include "heapmm.h" -# include "hmm_intrnl.h" - -# define SHIFT_HMM_ADDR_ALIGN_UNIT 5 -# define TOTAL_MEMORY_TO_ALLOCATE 20971520 /* 20 * 1024 * 1024 */ - -# define MM_DYNAMIC_MEMORY 1 -# if MM_DYNAMIC_MEMORY -static unsigned char *g_p_mng_memory_raw = NULL; -static unsigned char *g_p_mng_memory = NULL; -# else -static unsigned char g_p_mng_memory[TOTAL_MEMORY_TO_ALLOCATE]; -# endif - -static size_t g_mm_memory_size = TOTAL_MEMORY_TO_ALLOCATE; - -static hmm_descriptor hmm_d; -static int g_mng_memory_allocated = 0; - -static int vpx_mm_create_heap_memory(); -static void *vpx_mm_realloc(void *memblk, size_t size); -#endif /*CONFIG_MEM_MANAGER*/ - -#if USE_GLOBAL_FUNCTION_POINTERS -struct GLOBAL_FUNC_POINTERS { - g_malloc_func g_malloc; - g_calloc_func g_calloc; - g_realloc_func g_realloc; - g_free_func g_free; - g_memcpy_func g_memcpy; - g_memset_func g_memset; - g_memmove_func g_memmove; -} *g_func = NULL; - -# define VPX_MALLOC_L g_func->g_malloc -# define VPX_REALLOC_L g_func->g_realloc -# define VPX_FREE_L g_func->g_free -# define VPX_MEMCPY_L g_func->g_memcpy -# define VPX_MEMSET_L g_func->g_memset -# define VPX_MEMMOVE_L g_func->g_memmove -#else -# define VPX_MALLOC_L malloc -# define VPX_REALLOC_L realloc -# define VPX_FREE_L free -# define VPX_MEMCPY_L memcpy -# define VPX_MEMSET_L memset -# define VPX_MEMMOVE_L memmove -#endif /* USE_GLOBAL_FUNCTION_POINTERS */ - -unsigned int vpx_mem_get_version() { - unsigned int ver = ((unsigned int)(unsigned char)VPX_MEM_VERSION_CHIEF << 24 | - (unsigned int)(unsigned char)VPX_MEM_VERSION_MAJOR << 16 | - (unsigned int)(unsigned char)VPX_MEM_VERSION_MINOR << 8 | - (unsigned int)(unsigned char)VPX_MEM_VERSION_PATCH); - return ver; -} - -int vpx_mem_set_heap_size(size_t size) { - int ret = -1; - -#if CONFIG_MEM_MANAGER -#if MM_DYNAMIC_MEMORY - - if (!g_mng_memory_allocated && size) { - g_mm_memory_size = size; - ret = 0; - } else - ret = -3; - -#else - ret = -2; -#endif -#else - (void)size; -#endif - - return ret; -} - void *vpx_memalign(size_t align, size_t size) { void *addr, * x = NULL; -#if CONFIG_MEM_MANAGER - int number_aau; - - if (vpx_mm_create_heap_memory() < 0) { - _P(printf("[vpx][mm] ERROR vpx_memalign() Couldn't create memory for Heap.\n");) - } - - number_aau = ((size + align - 1 + ADDRESS_STORAGE_SIZE) >> - SHIFT_HMM_ADDR_ALIGN_UNIT) + 1; - - addr = hmm_alloc(&hmm_d, number_aau); -#else - addr = VPX_MALLOC_L(size + align - 1 + ADDRESS_STORAGE_SIZE); -#endif /*CONFIG_MEM_MANAGER*/ + addr = malloc(size + align - 1 + ADDRESS_STORAGE_SIZE); if (addr) { x = align_addr((unsigned char *)addr + ADDRESS_STORAGE_SIZE, (int)align); @@ -145,7 +43,7 @@ void *vpx_calloc(size_t num, size_t size) { x = vpx_memalign(DEFAULT_ALIGNMENT, num * size); if (x) - VPX_MEMSET_L(x, 0, num * size); + memset(x, 0, num * size); return x; } @@ -171,11 +69,7 @@ void *vpx_realloc(void *memblk, size_t size) { addr = (void *)(((size_t *)memblk)[-1]); memblk = NULL; -#if CONFIG_MEM_MANAGER - new_addr = vpx_mm_realloc(addr, size + align + ADDRESS_STORAGE_SIZE); -#else - new_addr = VPX_REALLOC_L(addr, size + align + ADDRESS_STORAGE_SIZE); -#endif + new_addr = realloc(addr, size + align + ADDRESS_STORAGE_SIZE); if (new_addr) { addr = new_addr; @@ -193,280 +87,12 @@ void *vpx_realloc(void *memblk, size_t size) { void vpx_free(void *memblk) { if (memblk) { void *addr = (void *)(((size_t *)memblk)[-1]); -#if CONFIG_MEM_MANAGER - hmm_free(&hmm_d, addr); -#else - VPX_FREE_L(addr); -#endif + free(addr); } } -#if CONFIG_MEM_TRACKER -void *xvpx_memalign(size_t align, size_t size, char *file, int line) { -#if TRY_BOUNDS_CHECK - unsigned char *x_bounds; -#endif - - void *x; - - if (g_alloc_count == 0) { -#if TRY_BOUNDS_CHECK - int i_rv = vpx_memory_tracker_init(BOUNDS_CHECK_PAD_SIZE, BOUNDS_CHECK_VALUE); -#else - int i_rv = vpx_memory_tracker_init(0, 0); -#endif - - if (i_rv < 0) { - _P(printf("ERROR xvpx_malloc MEM_TRACK_USAGE error vpx_memory_tracker_init().\n");) - } - } - -#if TRY_BOUNDS_CHECK - { - int i; - unsigned int tempme = BOUNDS_CHECK_VALUE; - - x_bounds = vpx_memalign(align, size + (BOUNDS_CHECK_PAD_SIZE * 2)); - - if (x_bounds) { - /*we're aligning the address twice here but to keep things - consistent we want to have the padding come before the stored - address so no matter what free function gets called we will - attempt to free the correct address*/ - x_bounds = (unsigned char *)(((size_t *)x_bounds)[-1]); - x = align_addr(x_bounds + BOUNDS_CHECK_PAD_SIZE + ADDRESS_STORAGE_SIZE, - (int)align); - /* save the actual malloc address */ - ((size_t *)x)[-1] = (size_t)x_bounds; - - for (i = 0; i < BOUNDS_CHECK_PAD_SIZE; i += sizeof(unsigned int)) { - VPX_MEMCPY_L(x_bounds + i, &tempme, sizeof(unsigned int)); - VPX_MEMCPY_L((unsigned char *)x + size + i, - &tempme, sizeof(unsigned int)); - } - } else - x = NULL; - } -#else - x = vpx_memalign(align, size); -#endif /*TRY_BOUNDS_CHECK*/ - - g_alloc_count++; - - vpx_memory_tracker_add((size_t)x, (unsigned int)size, file, line, 1); - - return x; -} - -void *xvpx_malloc(size_t size, char *file, int line) { - return xvpx_memalign(DEFAULT_ALIGNMENT, size, file, line); -} - -void *xvpx_calloc(size_t num, size_t size, char *file, int line) { - void *x = xvpx_memalign(DEFAULT_ALIGNMENT, num * size, file, line); - - if (x) - VPX_MEMSET_L(x, 0, num * size); - - return x; -} - -void *xvpx_realloc(void *memblk, size_t size, char *file, int line) { - struct mem_block *p = NULL; - int orig_size = 0, - orig_line = 0; - char *orig_file = NULL; - -#if TRY_BOUNDS_CHECK - unsigned char *x_bounds = memblk ? - (unsigned char *)(((size_t *)memblk)[-1]) : - NULL; -#endif - - void *x; - - if (g_alloc_count == 0) { -#if TRY_BOUNDS_CHECK - - if (!vpx_memory_tracker_init(BOUNDS_CHECK_PAD_SIZE, BOUNDS_CHECK_VALUE)) -#else - if (!vpx_memory_tracker_init(0, 0)) -#endif - { - _P(printf("ERROR xvpx_malloc MEM_TRACK_USAGE error vpx_memory_tracker_init().\n");) - } - } - - if ((p = vpx_memory_tracker_find((size_t)memblk))) { - orig_size = p->size; - orig_file = p->file; - orig_line = p->line; - } - -#if TRY_BOUNDS_CHECK_ON_FREE - vpx_memory_tracker_check_integrity(file, line); -#endif - - /* have to do this regardless of success, because - * the memory that does get realloc'd may change - * the bounds values of this block - */ - vpx_memory_tracker_remove((size_t)memblk); - -#if TRY_BOUNDS_CHECK - { - int i; - unsigned int tempme = BOUNDS_CHECK_VALUE; - - x_bounds = vpx_realloc(memblk, size + (BOUNDS_CHECK_PAD_SIZE * 2)); - - if (x_bounds) { - x_bounds = (unsigned char *)(((size_t *)x_bounds)[-1]); - x = align_addr(x_bounds + BOUNDS_CHECK_PAD_SIZE + ADDRESS_STORAGE_SIZE, - (int)DEFAULT_ALIGNMENT); - /* save the actual malloc address */ - ((size_t *)x)[-1] = (size_t)x_bounds; - - for (i = 0; i < BOUNDS_CHECK_PAD_SIZE; i += sizeof(unsigned int)) { - VPX_MEMCPY_L(x_bounds + i, &tempme, sizeof(unsigned int)); - VPX_MEMCPY_L((unsigned char *)x + size + i, - &tempme, sizeof(unsigned int)); - } - } else - x = NULL; - } -#else - x = vpx_realloc(memblk, size); -#endif /*TRY_BOUNDS_CHECK*/ - - if (!memblk) ++g_alloc_count; - - if (x) - vpx_memory_tracker_add((size_t)x, (unsigned int)size, file, line, 1); - else - vpx_memory_tracker_add((size_t)memblk, orig_size, orig_file, orig_line, 1); - - return x; -} - -void xvpx_free(void *p_address, char *file, int line) { -#if TRY_BOUNDS_CHECK - unsigned char *p_bounds_address = (unsigned char *)p_address; - /*p_bounds_address -= BOUNDS_CHECK_PAD_SIZE;*/ -#endif - -#if !TRY_BOUNDS_CHECK_ON_FREE - (void)file; - (void)line; -#endif - - if (p_address) { -#if TRY_BOUNDS_CHECK_ON_FREE - vpx_memory_tracker_check_integrity(file, line); -#endif - - /* if the addr isn't found in the list, assume it was allocated via - * vpx_ calls not xvpx_, therefore it does not contain any padding - */ - if (vpx_memory_tracker_remove((size_t)p_address) == -2) { - p_bounds_address = p_address; - _P(fprintf(stderr, "[vpx_mem][xvpx_free] addr: %p not found in" - " list; freed from file:%s" - " line:%d\n", p_address, file, line)); - } else - --g_alloc_count; - -#if TRY_BOUNDS_CHECK - vpx_free(p_bounds_address); -#else - vpx_free(p_address); -#endif - - if (!g_alloc_count) - vpx_memory_tracker_destroy(); - } -} - -#endif /*CONFIG_MEM_TRACKER*/ - -#if CONFIG_MEM_CHECKS -#if defined(VXWORKS) -#include /*for task_delay()*/ -/* This function is only used to get a stack trace of the player -object so we can se where we are having a problem. */ -static int get_my_tt(int task) { - tt(task); - - return 0; -} - -static void vx_sleep(int msec) { - int ticks_to_sleep = 0; - - if (msec) { - int msec_per_tick = 1000 / sys_clk_rate_get(); - - if (msec < msec_per_tick) - ticks_to_sleep++; - else - ticks_to_sleep = msec / msec_per_tick; - } - - task_delay(ticks_to_sleep); -} -#endif -#endif - -void *vpx_memcpy(void *dest, const void *source, size_t length) { -#if CONFIG_MEM_CHECKS - - if (((int)dest < 0x4000) || ((int)source < 0x4000)) { - _P(printf("WARNING: vpx_memcpy dest:0x%x source:0x%x len:%d\n", (int)dest, (int)source, length);) - -#if defined(VXWORKS) - sp(get_my_tt, task_id_self(), 0, 0, 0, 0, 0, 0, 0, 0); - - vx_sleep(10000); -#endif - } - -#endif - - return VPX_MEMCPY_L(dest, source, length); -} - -void *vpx_memset(void *dest, int val, size_t length) { -#if CONFIG_MEM_CHECKS - - if ((int)dest < 0x4000) { - _P(printf("WARNING: vpx_memset dest:0x%x val:%d len:%d\n", (int)dest, val, length);) - -#if defined(VXWORKS) - sp(get_my_tt, task_id_self(), 0, 0, 0, 0, 0, 0, 0, 0); - - vx_sleep(10000); -#endif - } - -#endif - - return VPX_MEMSET_L(dest, val, length); -} - #if CONFIG_VP9 && CONFIG_VP9_HIGHBITDEPTH void *vpx_memset16(void *dest, int val, size_t length) { -#if CONFIG_MEM_CHECKS - if ((int)dest < 0x4000) { - _P(printf("WARNING: vpx_memset dest:0x%x val:%d len:%d\n", - (int)dest, val, length);) - -#if defined(VXWORKS) - sp(get_my_tt, task_id_self(), 0, 0, 0, 0, 0, 0, 0, 0); - - vx_sleep(10000); -#endif - } -#endif int i; void *orig = dest; uint16_t *dest16 = dest; @@ -475,207 +101,3 @@ void *vpx_memset16(void *dest, int val, size_t length) { return orig; } #endif // CONFIG_VP9 && CONFIG_VP9_HIGHBITDEPTH - -void *vpx_memmove(void *dest, const void *src, size_t count) { -#if CONFIG_MEM_CHECKS - - if (((int)dest < 0x4000) || ((int)src < 0x4000)) { - _P(printf("WARNING: vpx_memmove dest:0x%x src:0x%x count:%d\n", (int)dest, (int)src, count);) - -#if defined(VXWORKS) - sp(get_my_tt, task_id_self(), 0, 0, 0, 0, 0, 0, 0, 0); - - vx_sleep(10000); -#endif - } - -#endif - - return VPX_MEMMOVE_L(dest, src, count); -} - -#if CONFIG_MEM_MANAGER - -static int vpx_mm_create_heap_memory() { - int i_rv = 0; - - if (!g_mng_memory_allocated) { -#if MM_DYNAMIC_MEMORY - g_p_mng_memory_raw = - (unsigned char *)malloc(g_mm_memory_size + HMM_ADDR_ALIGN_UNIT); - - if (g_p_mng_memory_raw) { - g_p_mng_memory = (unsigned char *)((((unsigned int)g_p_mng_memory_raw) + - HMM_ADDR_ALIGN_UNIT - 1) & - -(int)HMM_ADDR_ALIGN_UNIT); - - _P(printf("[vpx][mm] total memory size:%d g_p_mng_memory_raw:0x%x g_p_mng_memory:0x%x\n" -, g_mm_memory_size + HMM_ADDR_ALIGN_UNIT -, (unsigned int)g_p_mng_memory_raw -, (unsigned int)g_p_mng_memory);) - } else { - _P(printf("[vpx][mm] Couldn't allocate memory:%d for vpx memory manager.\n" -, g_mm_memory_size);) - - i_rv = -1; - } - - if (g_p_mng_memory) -#endif - { - int chunk_size = 0; - - g_mng_memory_allocated = 1; - - hmm_init(&hmm_d); - - chunk_size = g_mm_memory_size >> SHIFT_HMM_ADDR_ALIGN_UNIT; - - chunk_size -= DUMMY_END_BLOCK_BAUS; - - _P(printf("[vpx][mm] memory size:%d for vpx memory manager. g_p_mng_memory:0x%x chunk_size:%d\n" -, g_mm_memory_size -, (unsigned int)g_p_mng_memory -, chunk_size);) - - hmm_new_chunk(&hmm_d, (void *)g_p_mng_memory, chunk_size); - } - -#if MM_DYNAMIC_MEMORY - else { - _P(printf("[vpx][mm] Couldn't allocate memory:%d for vpx memory manager.\n" -, g_mm_memory_size);) - - i_rv = -1; - } - -#endif - } - - return i_rv; -} - -static void *vpx_mm_realloc(void *memblk, size_t size) { - void *p_ret = NULL; - - if (vpx_mm_create_heap_memory() < 0) { - _P(printf("[vpx][mm] ERROR vpx_mm_realloc() Couldn't create memory for Heap.\n");) - } else { - int i_rv = 0; - int old_num_aaus; - int new_num_aaus; - - old_num_aaus = hmm_true_size(memblk); - new_num_aaus = (size >> SHIFT_HMM_ADDR_ALIGN_UNIT) + 1; - - if (old_num_aaus == new_num_aaus) { - p_ret = memblk; - } else { - i_rv = hmm_resize(&hmm_d, memblk, new_num_aaus); - - if (i_rv == 0) { - p_ret = memblk; - } else { - /* Error. Try to malloc and then copy data. */ - void *p_from_malloc; - - new_num_aaus = (size >> SHIFT_HMM_ADDR_ALIGN_UNIT) + 1; - p_from_malloc = hmm_alloc(&hmm_d, new_num_aaus); - - if (p_from_malloc) { - vpx_memcpy(p_from_malloc, memblk, size); - hmm_free(&hmm_d, memblk); - - p_ret = p_from_malloc; - } - } - } - } - - return p_ret; -} -#endif /*CONFIG_MEM_MANAGER*/ - -#if USE_GLOBAL_FUNCTION_POINTERS -# if CONFIG_MEM_TRACKER -extern int vpx_memory_tracker_set_functions(g_malloc_func g_malloc_l -, g_calloc_func g_calloc_l -, g_realloc_func g_realloc_l -, g_free_func g_free_l -, g_memcpy_func g_memcpy_l -, g_memset_func g_memset_l -, g_memmove_func g_memmove_l); -# endif -#endif /*USE_GLOBAL_FUNCTION_POINTERS*/ -int vpx_mem_set_functions(g_malloc_func g_malloc_l -, g_calloc_func g_calloc_l -, g_realloc_func g_realloc_l -, g_free_func g_free_l -, g_memcpy_func g_memcpy_l -, g_memset_func g_memset_l -, g_memmove_func g_memmove_l) { -#if USE_GLOBAL_FUNCTION_POINTERS - - /* If use global functions is turned on then the - application must set the global functions before - it does anything else or vpx_mem will have - unpredictable results. */ - if (!g_func) { - g_func = (struct GLOBAL_FUNC_POINTERS *) - g_malloc_l(sizeof(struct GLOBAL_FUNC_POINTERS)); - - if (!g_func) { - return -1; - } - } - -#if CONFIG_MEM_TRACKER - { - int rv = 0; - rv = vpx_memory_tracker_set_functions(g_malloc_l -, g_calloc_l -, g_realloc_l -, g_free_l -, g_memcpy_l -, g_memset_l -, g_memmove_l); - - if (rv < 0) { - return rv; - } - } -#endif - - g_func->g_malloc = g_malloc_l; - g_func->g_calloc = g_calloc_l; - g_func->g_realloc = g_realloc_l; - g_func->g_free = g_free_l; - g_func->g_memcpy = g_memcpy_l; - g_func->g_memset = g_memset_l; - g_func->g_memmove = g_memmove_l; - - return 0; -#else - (void)g_malloc_l; - (void)g_calloc_l; - (void)g_realloc_l; - (void)g_free_l; - (void)g_memcpy_l; - (void)g_memset_l; - (void)g_memmove_l; - return -1; -#endif -} - -int vpx_mem_unset_functions() { -#if USE_GLOBAL_FUNCTION_POINTERS - - if (g_func) { - g_free_func temp_free = g_func->g_free; - temp_free(g_func); - g_func = NULL; - } - -#endif - return 0; -} diff --git a/media/libvpx/vpx_mem/vpx_mem.h b/media/libvpx/vpx_mem/vpx_mem.h index e2391f49629..a027714a01b 100644 --- a/media/libvpx/vpx_mem/vpx_mem.h +++ b/media/libvpx/vpx_mem/vpx_mem.h @@ -17,27 +17,6 @@ # include #endif -/* vpx_mem version info */ -#define vpx_mem_version "2.2.1.5" - -#define VPX_MEM_VERSION_CHIEF 2 -#define VPX_MEM_VERSION_MAJOR 2 -#define VPX_MEM_VERSION_MINOR 1 -#define VPX_MEM_VERSION_PATCH 5 -/* end - vpx_mem version info */ - -#ifndef VPX_TRACK_MEM_USAGE -# define VPX_TRACK_MEM_USAGE 0 /* enable memory tracking/integrity checks */ -#endif -#ifndef VPX_CHECK_MEM_FUNCTIONS -# define VPX_CHECK_MEM_FUNCTIONS 0 /* enable basic safety checks in _memcpy, -_memset, and _memmove */ -#endif -#ifndef REPLACE_BUILTIN_FUNCTIONS -# define REPLACE_BUILTIN_FUNCTIONS 0 /* replace builtin functions with their -vpx_ equivalents */ -#endif - #include #include @@ -45,125 +24,17 @@ vpx_ equivalents */ extern "C" { #endif - /* - vpx_mem_get_version() - provided for runtime version checking. Returns an unsigned int of the form - CHIEF | MAJOR | MINOR | PATCH, where the chief version number is the high - order byte. - */ - unsigned int vpx_mem_get_version(void); - - /* - vpx_mem_set_heap_size(size_t size) - size - size in bytes for the memory manager to allocate for its heap - Sets the memory manager's initial heap size - Return: - 0: on success - -1: if memory manager calls have not been included in the vpx_mem lib - -2: if the memory manager has been compiled to use static memory - -3: if the memory manager has already allocated its heap - */ - int vpx_mem_set_heap_size(size_t size); - void *vpx_memalign(size_t align, size_t size); void *vpx_malloc(size_t size); void *vpx_calloc(size_t num, size_t size); void *vpx_realloc(void *memblk, size_t size); void vpx_free(void *memblk); - void *vpx_memcpy(void *dest, const void *src, size_t length); - void *vpx_memset(void *dest, int val, size_t length); #if CONFIG_VP9 && CONFIG_VP9_HIGHBITDEPTH void *vpx_memset16(void *dest, int val, size_t length); #endif - void *vpx_memmove(void *dest, const void *src, size_t count); - /* special memory functions */ - void *vpx_mem_alloc(int id, size_t size, size_t align); - void vpx_mem_free(int id, void *mem, size_t size); - - /* Wrappers to standard library functions. */ - typedef void *(* g_malloc_func)(size_t); - typedef void *(* g_calloc_func)(size_t, size_t); - typedef void *(* g_realloc_func)(void *, size_t); - typedef void (* g_free_func)(void *); - typedef void *(* g_memcpy_func)(void *, const void *, size_t); - typedef void *(* g_memset_func)(void *, int, size_t); - typedef void *(* g_memmove_func)(void *, const void *, size_t); - - int vpx_mem_set_functions(g_malloc_func g_malloc_l -, g_calloc_func g_calloc_l -, g_realloc_func g_realloc_l -, g_free_func g_free_l -, g_memcpy_func g_memcpy_l -, g_memset_func g_memset_l -, g_memmove_func g_memmove_l); - int vpx_mem_unset_functions(void); - - - /* some defines for backward compatibility */ -#define DMEM_GENERAL 0 - -// (*)< - -#if REPLACE_BUILTIN_FUNCTIONS -# ifndef __VPX_MEM_C__ -# define memalign vpx_memalign -# define malloc vpx_malloc -# define calloc vpx_calloc -# define realloc vpx_realloc -# define free vpx_free -# define memcpy vpx_memcpy -# define memmove vpx_memmove -# define memset vpx_memset -# endif -#endif - -#if CONFIG_MEM_TRACKER -#include - /*from vpx_mem/vpx_mem_tracker.c*/ - extern void vpx_memory_tracker_dump(); - extern void vpx_memory_tracker_check_integrity(char *file, unsigned int line); - extern int vpx_memory_tracker_set_log_type(int type, char *option); - extern int vpx_memory_tracker_set_log_func(void *userdata, - void(*logfunc)(void *userdata, - const char *fmt, va_list args)); -# ifndef __VPX_MEM_C__ -# define vpx_memalign(align, size) xvpx_memalign((align), (size), __FILE__, __LINE__) -# define vpx_malloc(size) xvpx_malloc((size), __FILE__, __LINE__) -# define vpx_calloc(num, size) xvpx_calloc(num, size, __FILE__, __LINE__) -# define vpx_realloc(addr, size) xvpx_realloc(addr, size, __FILE__, __LINE__) -# define vpx_free(addr) xvpx_free(addr, __FILE__, __LINE__) -# define vpx_memory_tracker_check_integrity() vpx_memory_tracker_check_integrity(__FILE__, __LINE__) -# define vpx_mem_alloc(id,size,align) xvpx_mem_alloc(id, size, align, __FILE__, __LINE__) -# define vpx_mem_free(id,mem,size) xvpx_mem_free(id, mem, size, __FILE__, __LINE__) -# endif - - void *xvpx_memalign(size_t align, size_t size, char *file, int line); - void *xvpx_malloc(size_t size, char *file, int line); - void *xvpx_calloc(size_t num, size_t size, char *file, int line); - void *xvpx_realloc(void *memblk, size_t size, char *file, int line); - void xvpx_free(void *memblk, char *file, int line); - void *xvpx_mem_alloc(int id, size_t size, size_t align, char *file, int line); - void xvpx_mem_free(int id, void *mem, size_t size, char *file, int line); - -#else -# ifndef __VPX_MEM_C__ -# define vpx_memory_tracker_dump() -# define vpx_memory_tracker_check_integrity() -# define vpx_memory_tracker_set_log_type(t,o) 0 -# define vpx_memory_tracker_set_log_func(u,f) 0 -# endif -#endif - -#if !VPX_CHECK_MEM_FUNCTIONS -# ifndef __VPX_MEM_C__ -# include -# define vpx_memcpy memcpy -# define vpx_memset memset -# define vpx_memmove memmove -# endif -#endif +#include #ifdef VPX_MEM_PLTFRM # include VPX_MEM_PLTFRM diff --git a/media/libvpx/vpx_ports/mem.h b/media/libvpx/vpx_ports/mem.h index 1cb8c8cd9af..7502f906325 100644 --- a/media/libvpx/vpx_ports/mem.h +++ b/media/libvpx/vpx_ports/mem.h @@ -24,17 +24,6 @@ #define DECLARE_ALIGNED(n,typ,val) typ val #endif - -/* Declare an aligned array on the stack, for situations where the stack - * pointer may not have the alignment we expect. Creates an array with a - * modified name, then defines val to be a pointer, and aligns that pointer - * within the array. - */ -#define DECLARE_ALIGNED_ARRAY(a,typ,val,n)\ - typ val##_[(n)+(a)/sizeof(typ)+1];\ - typ *val = (typ*)((((intptr_t)val##_)+(a)-1)&((intptr_t)-(a))) - - /* Indicates that the usage of the specified variable has been audited to assure * that it's safe to use uninitialized. Silences 'may be used uninitialized' * warnings on gcc. @@ -49,4 +38,16 @@ #define __builtin_prefetch(x) #endif +/* Shift down with rounding */ +#define ROUND_POWER_OF_TWO(value, n) \ + (((value) + (1 << ((n) - 1))) >> (n)) + +#define ALIGN_POWER_OF_TWO(value, n) \ + (((value) + ((1 << (n)) - 1)) & ~((1 << (n)) - 1)) + +#if CONFIG_VP9_HIGHBITDEPTH +#define CONVERT_TO_SHORTPTR(x) ((uint16_t*)(((uintptr_t)x) << 1)) +#define CONVERT_TO_BYTEPTR(x) ((uint8_t*)(((uintptr_t)x) >> 1)) +#endif // CONFIG_VP9_HIGHBITDEPTH + #endif // VPX_PORTS_MEM_H_ diff --git a/media/libvpx/vpx_ports/msvc.h b/media/libvpx/vpx_ports/msvc.h new file mode 100644 index 00000000000..43a36e76184 --- /dev/null +++ b/media/libvpx/vpx_ports/msvc.h @@ -0,0 +1,22 @@ +/* + * Copyright (c) 2015 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VPX_PORTS_MSVC_H_ +#define VPX_PORTS_MSVC_H_ +#ifdef _MSC_VER + +#include "./vpx_config.h" + +# if _MSC_VER < 1900 // VS2015 provides snprintf +# define snprintf _snprintf +# endif // _MSC_VER < 1900 + +#endif // _MSC_VER +#endif // VPX_PORTS_MSVC_H_ diff --git a/media/libvpx/vpx_ports/vpx_once.h b/media/libvpx/vpx_ports/vpx_once.h index bd9eebd643b..f1df3943457 100644 --- a/media/libvpx/vpx_ports/vpx_once.h +++ b/media/libvpx/vpx_ports/vpx_once.h @@ -110,7 +110,7 @@ static void once(void (*func)(void)) #else -/* No-op version that performs no synchronization. vp8_rtcd() is idempotent, +/* No-op version that performs no synchronization. *_rtcd() is idempotent, * so as long as your platform provides atomic loads/stores of pointers * no synchronization is strictly necessary. */ diff --git a/media/libvpx/vpx_ports/x86.h b/media/libvpx/vpx_ports/x86.h index ae349fb84c6..7d93710c4b0 100644 --- a/media/libvpx/vpx_ports/x86.h +++ b/media/libvpx/vpx_ports/x86.h @@ -152,7 +152,7 @@ static INLINE int x86_simd_caps(void) { unsigned int flags = 0; unsigned int mask = ~0; - unsigned int reg_eax, reg_ebx, reg_ecx, reg_edx; + unsigned int max_cpuid_val, reg_eax, reg_ebx, reg_ecx, reg_edx; char *env; (void)reg_ebx; @@ -168,9 +168,9 @@ x86_simd_caps(void) { mask = strtol(env, NULL, 0); /* Ensure that the CPUID instruction supports extended features */ - cpuid(0, 0, reg_eax, reg_ebx, reg_ecx, reg_edx); + cpuid(0, 0, max_cpuid_val, reg_ebx, reg_ecx, reg_edx); - if (reg_eax < 1) + if (max_cpuid_val < 1) return 0; /* Get the standard feature flags */ @@ -193,10 +193,12 @@ x86_simd_caps(void) { if ((xgetbv() & 0x6) == 0x6) { flags |= HAS_AVX; - /* Get the leaf 7 feature flags. Needed to check for AVX2 support */ - cpuid(7, 0, reg_eax, reg_ebx, reg_ecx, reg_edx); + if (max_cpuid_val >= 7) { + /* Get the leaf 7 feature flags. Needed to check for AVX2 support */ + cpuid(7, 0, reg_eax, reg_ebx, reg_ecx, reg_edx); - if (reg_ebx & BIT(5)) flags |= HAS_AVX2; + if (reg_ebx & BIT(5)) flags |= HAS_AVX2; + } } } diff --git a/media/libvpx/vpx_scale/generic/gen_scalers.c b/media/libvpx/vpx_scale/generic/gen_scalers.c index 5f355c5a6bb..dab324edfcc 100644 --- a/media/libvpx/vpx_scale/generic/gen_scalers.c +++ b/media/libvpx/vpx_scale/generic/gen_scalers.c @@ -8,7 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ - +#include "./vpx_scale_rtcd.h" #include "vpx_scale/vpx_scale.h" #include "vpx_mem/vpx_mem.h" /**************************************************************************** @@ -215,7 +215,7 @@ void vp8_vertical_band_2_1_scale_c(unsigned char *source, unsigned int dest_width) { (void) dest_pitch; (void) src_pitch; - vpx_memcpy(dest, source, dest_width); + memcpy(dest, source, dest_width); } void vp8_vertical_band_2_1_scale_i_c(unsigned char *source, diff --git a/media/libvpx/vpx_scale/generic/vpx_scale.c b/media/libvpx/vpx_scale/generic/vpx_scale.c index 8044d2ad776..15e4ba87e72 100644 --- a/media/libvpx/vpx_scale/generic/vpx_scale.c +++ b/media/libvpx/vpx_scale/generic/vpx_scale.c @@ -22,6 +22,7 @@ ****************************************************************************/ #include "./vpx_scale_rtcd.h" #include "vpx_mem/vpx_mem.h" +#include "vpx_scale/vpx_scale.h" #include "vpx_scale/yv12config.h" typedef struct { @@ -379,7 +380,7 @@ void Scale2D vert_band_scale(temp_area + dest_pitch, dest_pitch, dest, dest_pitch, dest_width); if (interpolation) - vpx_memcpy(temp_area, temp_area + source_band_height * dest_pitch, dest_width); + memcpy(temp_area, temp_area + source_band_height * dest_pitch, dest_width); /* Next band... */ source += (unsigned long) source_band_height * source_pitch; @@ -432,7 +433,7 @@ void Scale2D temp_area + i * dest_pitch, 1, hratio, dest_width); } else { /* Duplicate the last row */ /* copy temp_area row 0 over from last row in the past */ - vpx_memcpy(temp_area + i * dest_pitch, temp_area + (i - 1)*dest_pitch, dest_pitch); + memcpy(temp_area + i * dest_pitch, temp_area + (i - 1)*dest_pitch, dest_pitch); } } @@ -443,7 +444,7 @@ void Scale2D } /* copy temp_area row 0 over from last row in the past */ - vpx_memcpy(temp_area, temp_area + source_band_height * dest_pitch, dest_pitch); + memcpy(temp_area, temp_area + source_band_height * dest_pitch, dest_pitch); /* move to the next band */ source += source_band_height * source_pitch; @@ -498,11 +499,11 @@ void vpx_scale_frame if (dw < (int)dst->y_width) for (i = 0; i < dh; i++) - vpx_memset(dst->y_buffer + i * dst->y_stride + dw - 1, dst->y_buffer[i * dst->y_stride + dw - 2], dst->y_width - dw + 1); + memset(dst->y_buffer + i * dst->y_stride + dw - 1, dst->y_buffer[i * dst->y_stride + dw - 2], dst->y_width - dw + 1); if (dh < (int)dst->y_height) for (i = dh - 1; i < (int)dst->y_height; i++) - vpx_memcpy(dst->y_buffer + i * dst->y_stride, dst->y_buffer + (dh - 2) * dst->y_stride, dst->y_width + 1); + memcpy(dst->y_buffer + i * dst->y_stride, dst->y_buffer + (dh - 2) * dst->y_stride, dst->y_width + 1); Scale2D((unsigned char *) src->u_buffer, src->uv_stride, src->uv_width, src->uv_height, (unsigned char *) dst->u_buffer, dst->uv_stride, dw / 2, dh / 2, @@ -510,11 +511,11 @@ void vpx_scale_frame if (dw / 2 < (int)dst->uv_width) for (i = 0; i < dst->uv_height; i++) - vpx_memset(dst->u_buffer + i * dst->uv_stride + dw / 2 - 1, dst->u_buffer[i * dst->uv_stride + dw / 2 - 2], dst->uv_width - dw / 2 + 1); + memset(dst->u_buffer + i * dst->uv_stride + dw / 2 - 1, dst->u_buffer[i * dst->uv_stride + dw / 2 - 2], dst->uv_width - dw / 2 + 1); if (dh / 2 < (int)dst->uv_height) for (i = dh / 2 - 1; i < (int)dst->y_height / 2; i++) - vpx_memcpy(dst->u_buffer + i * dst->uv_stride, dst->u_buffer + (dh / 2 - 2)*dst->uv_stride, dst->uv_width); + memcpy(dst->u_buffer + i * dst->uv_stride, dst->u_buffer + (dh / 2 - 2)*dst->uv_stride, dst->uv_width); Scale2D((unsigned char *) src->v_buffer, src->uv_stride, src->uv_width, src->uv_height, (unsigned char *) dst->v_buffer, dst->uv_stride, dw / 2, dh / 2, @@ -522,9 +523,9 @@ void vpx_scale_frame if (dw / 2 < (int)dst->uv_width) for (i = 0; i < dst->uv_height; i++) - vpx_memset(dst->v_buffer + i * dst->uv_stride + dw / 2 - 1, dst->v_buffer[i * dst->uv_stride + dw / 2 - 2], dst->uv_width - dw / 2 + 1); + memset(dst->v_buffer + i * dst->uv_stride + dw / 2 - 1, dst->v_buffer[i * dst->uv_stride + dw / 2 - 2], dst->uv_width - dw / 2 + 1); if (dh / 2 < (int) dst->uv_height) for (i = dh / 2 - 1; i < (int)dst->y_height / 2; i++) - vpx_memcpy(dst->v_buffer + i * dst->uv_stride, dst->v_buffer + (dh / 2 - 2)*dst->uv_stride, dst->uv_width); + memcpy(dst->v_buffer + i * dst->uv_stride, dst->v_buffer + (dh / 2 - 2)*dst->uv_stride, dst->uv_width); } diff --git a/media/libvpx/vpx_scale/generic/yv12config.c b/media/libvpx/vpx_scale/generic/yv12config.c index ff49ffb951d..7582792d939 100644 --- a/media/libvpx/vpx_scale/generic/yv12config.c +++ b/media/libvpx/vpx_scale/generic/yv12config.c @@ -12,9 +12,7 @@ #include "vpx_scale/yv12config.h" #include "vpx_mem/vpx_mem.h" -#if CONFIG_VP9 && CONFIG_VP9_HIGHBITDEPTH -#include "vp9/common/vp9_common.h" -#endif +#include "vpx_ports/mem.h" /**************************************************************************** * Exports @@ -38,7 +36,7 @@ vp8_yv12_de_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf) { /* buffer_alloc isn't accessed by most functions. Rather y_buffer, u_buffer and v_buffer point to buffer_alloc and are used. Clear out all of this so that a freed pointer isn't inadvertently used */ - vpx_memset(ybf, 0, sizeof(YV12_BUFFER_CONFIG)); + memset(ybf, 0, sizeof(YV12_BUFFER_CONFIG)); } else { return -1; } @@ -128,7 +126,7 @@ int vp9_free_frame_buffer(YV12_BUFFER_CONFIG *ybf) { /* buffer_alloc isn't accessed by most functions. Rather y_buffer, u_buffer and v_buffer point to buffer_alloc and are used. Clear out all of this so that a freed pointer isn't inadvertently used */ - vpx_memset(ybf, 0, sizeof(YV12_BUFFER_CONFIG)); + memset(ybf, 0, sizeof(YV12_BUFFER_CONFIG)); } else { return -1; } @@ -222,7 +220,7 @@ int vp9_realloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, // This memset is needed for fixing valgrind error from C loop filter // due to access uninitialized memory in frame border. It could be // removed if border is totally removed. - vpx_memset(ybf->buffer_alloc, 0, ybf->buffer_alloc_sz); + memset(ybf->buffer_alloc, 0, ybf->buffer_alloc_sz); } /* Only support allocating buffers that have a border that's a multiple diff --git a/media/libvpx/vpx_scale/generic/yv12extend.c b/media/libvpx/vpx_scale/generic/yv12extend.c index 953527faf84..086e2f398fb 100644 --- a/media/libvpx/vpx_scale/generic/yv12extend.c +++ b/media/libvpx/vpx_scale/generic/yv12extend.c @@ -10,8 +10,10 @@ #include #include "./vpx_config.h" +#include "./vpx_scale_rtcd.h" #include "vpx/vpx_integer.h" #include "vpx_mem/vpx_mem.h" +#include "vpx_ports/mem.h" #include "vpx_scale/yv12config.h" #if CONFIG_VP9 && CONFIG_VP9_HIGHBITDEPTH #include "vp9/common/vp9_common.h" @@ -31,8 +33,8 @@ static void extend_plane(uint8_t *const src, int src_stride, uint8_t *dst_ptr2 = src + width; for (i = 0; i < height; ++i) { - vpx_memset(dst_ptr1, src_ptr1[0], extend_left); - vpx_memset(dst_ptr2, src_ptr2[0], extend_right); + memset(dst_ptr1, src_ptr1[0], extend_left); + memset(dst_ptr2, src_ptr2[0], extend_right); src_ptr1 += src_stride; src_ptr2 += src_stride; dst_ptr1 += src_stride; @@ -48,12 +50,12 @@ static void extend_plane(uint8_t *const src, int src_stride, dst_ptr2 = src + src_stride * height - extend_left; for (i = 0; i < extend_top; ++i) { - vpx_memcpy(dst_ptr1, src_ptr1, linesize); + memcpy(dst_ptr1, src_ptr1, linesize); dst_ptr1 += src_stride; } for (i = 0; i < extend_bottom; ++i) { - vpx_memcpy(dst_ptr2, src_ptr2, linesize); + memcpy(dst_ptr2, src_ptr2, linesize); dst_ptr2 += src_stride; } } @@ -91,12 +93,12 @@ static void extend_plane_high(uint8_t *const src8, int src_stride, dst_ptr2 = src + src_stride * height - extend_left; for (i = 0; i < extend_top; ++i) { - vpx_memcpy(dst_ptr1, src_ptr1, linesize * sizeof(uint16_t)); + memcpy(dst_ptr1, src_ptr1, linesize * sizeof(uint16_t)); dst_ptr1 += src_stride; } for (i = 0; i < extend_bottom; ++i) { - vpx_memcpy(dst_ptr2, src_ptr2, linesize * sizeof(uint16_t)); + memcpy(dst_ptr2, src_ptr2, linesize * sizeof(uint16_t)); dst_ptr2 += src_stride; } } @@ -212,7 +214,7 @@ void vp9_extend_frame_inner_borders_c(YV12_BUFFER_CONFIG *ybf) { void memcpy_short_addr(uint8_t *dst8, const uint8_t *src8, int num) { uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); uint16_t *src = CONVERT_TO_SHORTPTR(src8); - vpx_memcpy(dst, src, num * sizeof(uint16_t)); + memcpy(dst, src, num * sizeof(uint16_t)); } #endif // CONFIG_VP9_HIGHBITDEPTH #endif // CONFIG_VP9 @@ -269,7 +271,7 @@ void vp8_yv12_copy_frame_c(const YV12_BUFFER_CONFIG *src_ybc, #endif for (row = 0; row < src_ybc->y_height; ++row) { - vpx_memcpy(dst, src, src_ybc->y_width); + memcpy(dst, src, src_ybc->y_width); src += src_ybc->y_stride; dst += dst_ybc->y_stride; } @@ -278,7 +280,7 @@ void vp8_yv12_copy_frame_c(const YV12_BUFFER_CONFIG *src_ybc, dst = dst_ybc->u_buffer; for (row = 0; row < src_ybc->uv_height; ++row) { - vpx_memcpy(dst, src, src_ybc->uv_width); + memcpy(dst, src, src_ybc->uv_width); src += src_ybc->uv_stride; dst += dst_ybc->uv_stride; } @@ -287,7 +289,7 @@ void vp8_yv12_copy_frame_c(const YV12_BUFFER_CONFIG *src_ybc, dst = dst_ybc->v_buffer; for (row = 0; row < src_ybc->uv_height; ++row) { - vpx_memcpy(dst, src, src_ybc->uv_width); + memcpy(dst, src, src_ybc->uv_width); src += src_ybc->uv_stride; dst += dst_ybc->uv_stride; } @@ -306,7 +308,7 @@ void vpx_yv12_copy_y_c(const YV12_BUFFER_CONFIG *src_ybc, const uint16_t *src16 = CONVERT_TO_SHORTPTR(src); uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst); for (row = 0; row < src_ybc->y_height; ++row) { - vpx_memcpy(dst16, src16, src_ybc->y_width * sizeof(uint16_t)); + memcpy(dst16, src16, src_ybc->y_width * sizeof(uint16_t)); src16 += src_ybc->y_stride; dst16 += dst_ybc->y_stride; } @@ -315,7 +317,7 @@ void vpx_yv12_copy_y_c(const YV12_BUFFER_CONFIG *src_ybc, #endif for (row = 0; row < src_ybc->y_height; ++row) { - vpx_memcpy(dst, src, src_ybc->y_width); + memcpy(dst, src, src_ybc->y_width); src += src_ybc->y_stride; dst += dst_ybc->y_stride; } diff --git a/media/libvpx/vpx_scale/vpx_scale_rtcd.c b/media/libvpx/vpx_scale/vpx_scale_rtcd.c index 656a22f5240..bea603fd104 100644 --- a/media/libvpx/vpx_scale/vpx_scale_rtcd.c +++ b/media/libvpx/vpx_scale/vpx_scale_rtcd.c @@ -7,9 +7,9 @@ * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ -#include "vpx_config.h" +#include "./vpx_config.h" #define RTCD_C -#include "vpx_scale_rtcd.h" +#include "./vpx_scale_rtcd.h" #include "vpx_ports/vpx_once.h" void vpx_scale_rtcd() diff --git a/media/libvpx/vpx_version.h b/media/libvpx/vpx_version.h index bce03815bfd..eefc59eb0db 100644 --- a/media/libvpx/vpx_version.h +++ b/media/libvpx/vpx_version.h @@ -1,7 +1,7 @@ #define VERSION_MAJOR 1 #define VERSION_MINOR 4 #define VERSION_PATCH 0 -#define VERSION_EXTRA "" +#define VERSION_EXTRA "488-ge67d45d" #define VERSION_PACKED ((VERSION_MAJOR<<16)|(VERSION_MINOR<<8)|(VERSION_PATCH)) -#define VERSION_STRING_NOSP "v1.4.0" -#define VERSION_STRING " v1.4.0" +#define VERSION_STRING_NOSP "v1.4.0-488-ge67d45d" +#define VERSION_STRING " v1.4.0-488-ge67d45d" From db4f83350f89bdbf663968ae1fa084dc5dafe16b Mon Sep 17 00:00:00 2001 From: "L. David Baron" Date: Tue, 30 Jun 2015 22:34:16 -0700 Subject: [PATCH 47/61] Bug 1176969 followup - Mark test as random for now until I have a chance to rewrite it. --- layout/reftests/transform-3d/reftest.list | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/layout/reftests/transform-3d/reftest.list b/layout/reftests/transform-3d/reftest.list index feead0fa9a4..d6116bdadcb 100644 --- a/layout/reftests/transform-3d/reftest.list +++ b/layout/reftests/transform-3d/reftest.list @@ -63,5 +63,5 @@ fuzzy(3,99) fuzzy-if(/^Windows\x20NT\x206\.1/.test(http.oscpu)&&!layersGPUAccele fuzzy(3,99) == animate-cube-degrees.html animate-cube-degrees-ref.html # subpixel AA == animate-cube-degrees-zoom.html animate-cube-degrees-zoom-ref.html != animate-cube-degrees-ref.html animate-cube-degrees-zoom-ref.html -fuzzy-if(cocoaWidget,128,3) skip-if(Android) == animate-preserve3d-parent.html animate-preserve3d-ref.html # intermittently fuzzy on Mac -fuzzy-if(cocoaWidget,128,3) == animate-preserve3d-child.html animate-preserve3d-ref.html # intermittently fuzzy on Mac +fuzzy-if(cocoaWidget,128,4) random == animate-preserve3d-parent.html animate-preserve3d-ref.html # intermittently fuzzy on Mac; needs to be rewritten for timing issues +fuzzy-if(cocoaWidget,128,4) random == animate-preserve3d-child.html animate-preserve3d-ref.html # intermittently fuzzy on Mac; needs to be rewritten for timing issues From 381fbf547508dac0947c45815905c9863cea3215 Mon Sep 17 00:00:00 2001 From: Franziskus Kiefer Date: Fri, 5 Jun 2015 15:25:24 -0700 Subject: [PATCH 48/61] Bug 1166910 - referrer attribute for img tag. r=hsivonen --- dom/base/nsImageLoadingContent.cpp | 25 +++++++++++++++++-- dom/base/nsImageLoadingContent.h | 3 +++ dom/html/HTMLImageElement.cpp | 1 + dom/html/HTMLImageElement.h | 10 ++++++++ dom/html/nsGenericHTMLElement.cpp | 19 ++++++++++++++ dom/html/nsGenericHTMLElement.h | 15 +++++++++++ .../html/nsIDOMHTMLImageElement.idl | 3 ++- dom/webidl/HTMLImageElement.webidl | 2 ++ image/imgLoader.cpp | 3 +++ modules/libpref/init/all.js | 3 +++ netwerk/base/ReferrerPolicy.h | 5 +++- parser/html/nsHtml5SpeculativeLoad.cpp | 4 +-- parser/html/nsHtml5SpeculativeLoad.h | 7 ++++-- parser/html/nsHtml5TreeBuilderCppSupplement.h | 5 ++++ parser/html/nsHtml5TreeOpExecutor.cpp | 16 ++++++++++-- parser/html/nsHtml5TreeOpExecutor.h | 3 ++- 16 files changed, 113 insertions(+), 11 deletions(-) diff --git a/dom/base/nsImageLoadingContent.cpp b/dom/base/nsImageLoadingContent.cpp index 18205ab5d31..bef2ed691be 100644 --- a/dom/base/nsImageLoadingContent.cpp +++ b/dom/base/nsImageLoadingContent.cpp @@ -46,6 +46,7 @@ #include "mozilla/EventStates.h" #include "mozilla/dom/Element.h" #include "mozilla/dom/ScriptSettings.h" +#include "mozilla/Preferences.h" #ifdef LoadImage // Undefine LoadImage to prevent naming conflict with Windows. @@ -930,15 +931,27 @@ nsImageLoadingContent::LoadImage(nsIURI* aNewURI, loadFlags |= imgILoader::LOAD_CORS_USE_CREDENTIALS; } + // get document wide referrer policy + mozilla::net::ReferrerPolicy referrerPolicy = aDocument->GetReferrerPolicy(); + bool referrerAttributeEnabled = Preferences::GetBool("network.http.enablePerElementReferrer", false); + // if referrer attributes are enabled in preferences, load img referrer attribute + nsresult rv; + if (referrerAttributeEnabled) { + mozilla::net::ReferrerPolicy imgReferrerPolicy = GetImageReferrerPolicy(); + // if the image does not provide a referrer attribute, ignore this + if (imgReferrerPolicy != mozilla::net::RP_Unset) { + referrerPolicy = imgReferrerPolicy; + } + } + // Not blocked. Do the load. nsRefPtr& req = PrepareNextRequest(aImageLoadType); nsCOMPtr content = do_QueryInterface(static_cast(this)); - nsresult rv; rv = nsContentUtils::LoadImage(aNewURI, aDocument, aDocument->NodePrincipal(), aDocument->GetDocumentURI(), - aDocument->GetReferrerPolicy(), + referrerPolicy, this, loadFlags, content->LocalName(), getter_AddRefs(req), @@ -1566,3 +1579,11 @@ nsImageLoadingContent::ImageObserver::~ImageObserver() MOZ_COUNT_DTOR(ImageObserver); NS_CONTENT_DELETE_LIST_MEMBER(ImageObserver, this, mNext); } + +// Only HTMLInputElement.h overrides this for tags +// all other subclasses use this one, i.e. ignore referrer attributes +mozilla::net::ReferrerPolicy +nsImageLoadingContent::GetImageReferrerPolicy() +{ + return mozilla::net::RP_Unset; +}; diff --git a/dom/base/nsImageLoadingContent.h b/dom/base/nsImageLoadingContent.h index 8d23d379557..78f48404ef8 100644 --- a/dom/base/nsImageLoadingContent.h +++ b/dom/base/nsImageLoadingContent.h @@ -24,6 +24,7 @@ #include "nsAutoPtr.h" #include "nsIContentPolicy.h" #include "mozilla/dom/BindingDeclarations.h" +#include "mozilla/net/ReferrerPolicy.h" class nsIURI; class nsIDocument; @@ -198,6 +199,8 @@ protected: */ virtual mozilla::CORSMode GetCORSMode(); + virtual mozilla::net::ReferrerPolicy GetImageReferrerPolicy(); + // Subclasses are *required* to call BindToTree/UnbindFromTree. void BindToTree(nsIDocument* aDocument, nsIContent* aParent, nsIContent* aBindingParent, bool aCompileEventHandlers); diff --git a/dom/html/HTMLImageElement.cpp b/dom/html/HTMLImageElement.cpp index aac2e44afbc..a345817f09a 100644 --- a/dom/html/HTMLImageElement.cpp +++ b/dom/html/HTMLImageElement.cpp @@ -145,6 +145,7 @@ NS_IMPL_BOOL_ATTR(HTMLImageElement, IsMap, ismap) NS_IMPL_URI_ATTR(HTMLImageElement, LongDesc, longdesc) NS_IMPL_STRING_ATTR(HTMLImageElement, Sizes, sizes) NS_IMPL_STRING_ATTR(HTMLImageElement, Lowsrc, lowsrc) +NS_IMPL_STRING_ATTR(HTMLImageElement, Referrer, referrer) NS_IMPL_URI_ATTR(HTMLImageElement, Src, src) NS_IMPL_STRING_ATTR(HTMLImageElement, Srcset, srcset) NS_IMPL_STRING_ATTR(HTMLImageElement, UseMap, usemap) diff --git a/dom/html/HTMLImageElement.h b/dom/html/HTMLImageElement.h index 1841ec8f16a..3036835e3ec 100644 --- a/dom/html/HTMLImageElement.h +++ b/dom/html/HTMLImageElement.h @@ -189,6 +189,16 @@ public: { SetHTMLAttr(nsGkAtoms::border, aBorder, aError); } + void SetReferrer(const nsAString& aReferrer, ErrorResult& aError) + { + SetHTMLAttr(nsGkAtoms::referrer, aReferrer, aError); + } + + mozilla::net::ReferrerPolicy + GetImageReferrerPolicy() + { + return GetReferrerPolicy(); + } int32_t X(); int32_t Y(); diff --git a/dom/html/nsGenericHTMLElement.cpp b/dom/html/nsGenericHTMLElement.cpp index 3a60eb3d42a..a4d961f7524 100644 --- a/dom/html/nsGenericHTMLElement.cpp +++ b/dom/html/nsGenericHTMLElement.cpp @@ -106,6 +106,8 @@ #include "mozilla/dom/HTMLBodyElement.h" #include "imgIContainer.h" +#include "mozilla/net/ReferrerPolicy.h" + using namespace mozilla; using namespace mozilla::dom; @@ -995,6 +997,10 @@ nsGenericHTMLElement::ParseAttribute(int32_t aNamespaceID, return aResult.ParseIntValue(aValue); } + if (aAttribute == nsGkAtoms::referrer) { + return ParseReferrerAttribute(aValue, aResult); + } + if (aAttribute == nsGkAtoms::name) { // Store name as an atom. name="" means that the element has no name, // not that it has an emptystring as the name. @@ -1262,6 +1268,19 @@ nsGenericHTMLElement::ParseImageAttribute(nsIAtom* aAttribute, return false; } +bool +nsGenericHTMLElement::ParseReferrerAttribute(const nsAString& aString, + nsAttrValue& aResult) +{ + static const nsAttrValue::EnumTable kReferrerTable[] = { + { "no-referrer", net::RP_No_Referrer }, + { "origin", net::RP_Origin }, + { "unsafe-url", net::RP_Unsafe_URL }, + { 0 } + }; + return aResult.ParseEnumValue(aString, kReferrerTable, false); +} + bool nsGenericHTMLElement::ParseFrameborderValue(const nsAString& aString, nsAttrValue& aResult) diff --git a/dom/html/nsGenericHTMLElement.h b/dom/html/nsGenericHTMLElement.h index 82cc1cf25bb..30bd71bdd7d 100644 --- a/dom/html/nsGenericHTMLElement.h +++ b/dom/html/nsGenericHTMLElement.h @@ -233,6 +233,17 @@ public: mScrollgrab = aValue; } + mozilla::net::ReferrerPolicy + GetReferrerPolicy() + { + nsAutoString aPolicyString; + GetEnumAttr(nsGkAtoms::referrer, nullptr, aPolicyString); + if (aPolicyString.IsEmpty()) { + return mozilla::net::RP_Unset; + } + return mozilla::net::ReferrerPolicyFromString(aPolicyString); + } + /** * Determine whether an attribute is an event (onclick, etc.) * @param aName the attribute @@ -711,6 +722,10 @@ public: static bool ParseImageAttribute(nsIAtom* aAttribute, const nsAString& aString, nsAttrValue& aResult); + + static bool ParseReferrerAttribute(const nsAString& aString, + nsAttrValue& aResult); + /** * Convert a frameborder string to value (yes/no/1/0) * diff --git a/dom/interfaces/html/nsIDOMHTMLImageElement.idl b/dom/interfaces/html/nsIDOMHTMLImageElement.idl index 64574ca794c..6c5c915ffaa 100644 --- a/dom/interfaces/html/nsIDOMHTMLImageElement.idl +++ b/dom/interfaces/html/nsIDOMHTMLImageElement.idl @@ -16,7 +16,7 @@ * http://www.whatwg.org/specs/web-apps/current-work/ */ -[uuid(ec18e71c-4f5c-4cc3-aa36-5273168644dc)] +[uuid(a640d8af-3c0e-4926-8102-5ab52053c280)] interface nsIDOMHTMLImageElement : nsISupports { attribute DOMString alt; @@ -24,6 +24,7 @@ interface nsIDOMHTMLImageElement : nsISupports attribute DOMString srcset; attribute DOMString sizes; attribute DOMString useMap; + attribute DOMString referrer; attribute boolean isMap; attribute unsigned long width; attribute unsigned long height; diff --git a/dom/webidl/HTMLImageElement.webidl b/dom/webidl/HTMLImageElement.webidl index fce3c7cdf48..55f5a851b37 100644 --- a/dom/webidl/HTMLImageElement.webidl +++ b/dom/webidl/HTMLImageElement.webidl @@ -30,6 +30,8 @@ interface HTMLImageElement : HTMLElement { [SetterThrows] attribute DOMString useMap; [SetterThrows] + attribute DOMString referrer; + [SetterThrows] attribute boolean isMap; [SetterThrows] attribute unsigned long width; diff --git a/image/imgLoader.cpp b/image/imgLoader.cpp index b4b444dc82e..4398c010991 100644 --- a/image/imgLoader.cpp +++ b/image/imgLoader.cpp @@ -670,6 +670,9 @@ ValidateSecurityInfo(imgRequest* request, bool forcePrincipalCheck, nsISupports* aCX, ReferrerPolicy referrerPolicy) { // If the entry's Referrer Policy doesn't match, we can't use this request. + // XXX: this will return false if an image has different referrer attributes, + // i.e. we currently don't use the cached image but reload the image with + // the new referrer policy if (referrerPolicy != request->GetReferrerPolicy()) { return false; } diff --git a/modules/libpref/init/all.js b/modules/libpref/init/all.js index 4e18b6ba615..a83ab8f4905 100644 --- a/modules/libpref/init/all.js +++ b/modules/libpref/init/all.js @@ -1273,6 +1273,9 @@ pref("network.http.referer.XOriginPolicy", 0); // By default this is enabled for compatibility (see bug 141641) pref("network.http.sendSecureXSiteReferrer", true); +// Controls whether referrer attributes in , , , and + + + + From 66c874be8f871be9cd798fc3d3f7f20f6b4ed885 Mon Sep 17 00:00:00 2001 From: Dan Glastonbury Date: Wed, 17 Jun 2015 23:49:35 -0400 Subject: [PATCH 50/61] Bug 1151736 - Lose context on OOM instead of crashing. r=jgilbert --- dom/canvas/WebGL2ContextTextures.cpp | 3 +- dom/canvas/WebGLContextGL.cpp | 12 ++++--- dom/canvas/WebGLTexture.cpp | 51 +++++++++++++++++++++------- dom/canvas/WebGLTexture.h | 2 +- 4 files changed, 49 insertions(+), 19 deletions(-) diff --git a/dom/canvas/WebGL2ContextTextures.cpp b/dom/canvas/WebGL2ContextTextures.cpp index 532668ffae2..e7ae1cacf8e 100644 --- a/dom/canvas/WebGL2ContextTextures.cpp +++ b/dom/canvas/WebGL2ContextTextures.cpp @@ -402,7 +402,8 @@ WebGL2Context::TexSubImage3D(GLenum rawTarget, GLint level, if (coversWholeImage) { tex->SetImageDataStatus(texImageTarget, level, WebGLImageDataStatus::InitializedImageData); } else { - tex->EnsureNoUninitializedImageData(texImageTarget, level); + if (!tex->EnsureInitializedImageData(texImageTarget, level)) + return; } } diff --git a/dom/canvas/WebGLContextGL.cpp b/dom/canvas/WebGLContextGL.cpp index f7be7c87556..341ac1e0947 100644 --- a/dom/canvas/WebGLContextGL.cpp +++ b/dom/canvas/WebGLContextGL.cpp @@ -459,7 +459,8 @@ WebGLContext::CopyTexSubImage2D_base(TexImageTarget texImageTarget, GLint level, tex->SetImageInfo(texImageTarget, level, width, height, 1, effectiveInternalFormat, WebGLImageDataStatus::UninitializedImageData); - tex->EnsureNoUninitializedImageData(texImageTarget, level); + if (!tex->EnsureInitializedImageData(texImageTarget, level)) + return; } // if we are completely outside of the framebuffer, we can exit now with our black texture @@ -607,7 +608,8 @@ WebGLContext::CopyTexSubImage2D(GLenum rawTexImgTarget, if (coversWholeImage) { tex->SetImageDataStatus(texImageTarget, level, WebGLImageDataStatus::InitializedImageData); } else { - tex->EnsureNoUninitializedImageData(texImageTarget, level); + if (!tex->EnsureInitializedImageData(texImageTarget, level)) + return; } } @@ -3062,7 +3064,8 @@ WebGLContext::CompressedTexSubImage2D(GLenum rawTexImgTarget, GLint level, GLint if (coversWholeImage) { tex->SetImageDataStatus(texImageTarget, level, WebGLImageDataStatus::InitializedImageData); } else { - tex->EnsureNoUninitializedImageData(texImageTarget, level); + if (!tex->EnsureInitializedImageData(texImageTarget, level)) + return; } } @@ -3535,7 +3538,8 @@ WebGLContext::TexSubImage2D_base(GLenum rawImageTarget, GLint level, if (coversWholeImage) { tex->SetImageDataStatus(texImageTarget, level, WebGLImageDataStatus::InitializedImageData); } else { - tex->EnsureNoUninitializedImageData(texImageTarget, level); + if (!tex->EnsureInitializedImageData(texImageTarget, level)) + return; } } MakeContextCurrent(); diff --git a/dom/canvas/WebGLTexture.cpp b/dom/canvas/WebGLTexture.cpp index aeaaa89f28c..7a8828eb672 100644 --- a/dom/canvas/WebGLTexture.cpp +++ b/dom/canvas/WebGLTexture.cpp @@ -492,7 +492,7 @@ WebGLTexture::ResolvedFakeBlackStatus() const ImageInfo& imageInfo = ImageInfoAt(imageTarget, level); if (imageInfo.mImageDataStatus == WebGLImageDataStatus::UninitializedImageData) { - EnsureNoUninitializedImageData(imageTarget, level); + EnsureInitializedImageData(imageTarget, level); } } } @@ -613,13 +613,13 @@ ClearWithTempFB(WebGLContext* webgl, GLuint tex, } -void -WebGLTexture::EnsureNoUninitializedImageData(TexImageTarget imageTarget, - GLint level) +bool +WebGLTexture::EnsureInitializedImageData(TexImageTarget imageTarget, + GLint level) { const ImageInfo& imageInfo = ImageInfoAt(imageTarget, level); if (!imageInfo.HasUninitializedImageData()) - return; + return true; mContext->MakeContextCurrent(); @@ -631,13 +631,11 @@ WebGLTexture::EnsureNoUninitializedImageData(TexImageTarget imageTarget, if (cleared) { SetImageDataStatus(imageTarget, level, WebGLImageDataStatus::InitializedImageData); - return; + return true; } } // That didn't work. Try uploading zeros then. - gl::ScopedBindTexture autoBindTex(mContext->gl, mGLName, mTarget); - size_t bitspertexel = GetBitsPerTexel(imageInfo.mEffectiveInternalFormat); MOZ_ASSERT((bitspertexel % 8) == 0); // That would only happen for // compressed images, which cannot use @@ -653,11 +651,22 @@ WebGLTexture::EnsureNoUninitializedImageData(TexImageTarget imageTarget, MOZ_ASSERT(checked_byteLength.isValid()); // Should have been checked // earlier. - // Infallible for now. - UniquePtr zeros((uint8_t*)moz_xcalloc(1, - checked_byteLength.value())); + size_t byteCount = checked_byteLength.value(); + + UniquePtr zeros((uint8_t*)calloc(1, byteCount)); + if (zeros == nullptr) { + // Failed to allocate memory. Lose the context. Return OOM error. + mContext->ForceLoseContext(true); + mContext->ErrorOutOfMemory("EnsureInitializedImageData: Failed to alloc %u " + "bytes to clear image target `%s` level `%d`.", + byteCount, mContext->EnumName(imageTarget.get()), + level); + return false; + } gl::GLContext* gl = mContext->gl; + gl::ScopedBindTexture autoBindTex(gl, mGLName, mTarget); + GLenum driverInternalFormat = LOCAL_GL_NONE; GLenum driverFormat = LOCAL_GL_NONE; GLenum driverType = LOCAL_GL_NONE; @@ -691,11 +700,27 @@ WebGLTexture::EnsureNoUninitializedImageData(TexImageTarget imageTarget, // from this here. gfxCriticalError() << "GL context GetAndFlushUnderlyingGLErrors " << gfx::hexa(error); printf_stderr("Error: 0x%4x\n", error); - MOZ_CRASH(); // Errors on texture upload have been related to video - // memory exposure in the past. + if (error != LOCAL_GL_OUT_OF_MEMORY) { + // Errors on texture upload have been related to video + // memory exposure in the past, which is a security issue. + // Force loss of context. + mContext->ForceLoseContext(true); + return false; + } + + // Out-of-memory uploading pixels to GL. Lose context and report OOM. + mContext->ForceLoseContext(true); + mContext->ErrorOutOfMemory("EnsureNoUninitializedImageData: Failed to " + "upload texture of width: %u, height: %u, " + "depth: %u to target %s level %d.", + imageInfo.mWidth, imageInfo.mHeight, imageInfo.mDepth, + mContext->EnumName(imageTarget.get()), level); + return false; } SetImageDataStatus(imageTarget, level, WebGLImageDataStatus::InitializedImageData); + + return true; } void diff --git a/dom/canvas/WebGLTexture.h b/dom/canvas/WebGLTexture.h index 5545fdaadb0..3116af0e1b8 100644 --- a/dom/canvas/WebGLTexture.h +++ b/dom/canvas/WebGLTexture.h @@ -212,7 +212,7 @@ public: imageInfo.mImageDataStatus = newStatus; } - void EnsureNoUninitializedImageData(TexImageTarget imageTarget, GLint level); + bool EnsureInitializedImageData(TexImageTarget imageTarget, GLint level); protected: TexMinFilter mMinFilter; From 70895519594bbb01a671302d2afd2f5da30b89ea Mon Sep 17 00:00:00 2001 From: Nihanth Subramanya Date: Tue, 30 Jun 2015 13:45:24 -0700 Subject: [PATCH 51/61] Bug 1178152 - Provide a notification when the newtab URL changes. r=florian --- browser/modules/NewTabURL.jsm | 4 ++++ .../modules/test/xpcshell/test_NewTabURL.js | 18 +++++++++++++++++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/browser/modules/NewTabURL.jsm b/browser/modules/NewTabURL.jsm index 9e78efaae09..f673bbc4856 100644 --- a/browser/modules/NewTabURL.jsm +++ b/browser/modules/NewTabURL.jsm @@ -10,6 +10,8 @@ let Cu = Components.utils; this.EXPORTED_SYMBOLS = [ "NewTabURL" ]; +Components.utils.import("resource://gre/modules/Services.jsm"); + this.NewTabURL = { _url: "about:newtab", _overridden: false, @@ -25,10 +27,12 @@ this.NewTabURL = { override: function(newURL) { this._url = newURL; this._overridden = true; + Services.obs.notifyObservers(null, "newtab-url-changed", this._url); }, reset: function() { this._url = "about:newtab"; this._overridden = false; + Services.obs.notifyObservers(null, "newtab-url-changed", this._url); } }; diff --git a/browser/modules/test/xpcshell/test_NewTabURL.js b/browser/modules/test/xpcshell/test_NewTabURL.js index e5ccd9ecfc7..aad233729d1 100644 --- a/browser/modules/test/xpcshell/test_NewTabURL.js +++ b/browser/modules/test/xpcshell/test_NewTabURL.js @@ -4,14 +4,30 @@ "use strict"; Components.utils.import("resource:///modules/NewTabURL.jsm"); +Components.utils.import("resource://gre/modules/Services.jsm"); -function run_test() { +add_task(function* () { Assert.equal(NewTabURL.get(), "about:newtab", "Default newtab URL should be about:newtab"); let url = "http://example.com/"; + let notificationPromise = promiseNewtabURLNotification(url); NewTabURL.override(url); + yield notificationPromise; Assert.ok(NewTabURL.overridden, "Newtab URL should be overridden"); Assert.equal(NewTabURL.get(), url, "Newtab URL should be the custom URL"); + + notificationPromise = promiseNewtabURLNotification("about:newtab"); NewTabURL.reset(); + yield notificationPromise; Assert.ok(!NewTabURL.overridden, "Newtab URL should not be overridden"); Assert.equal(NewTabURL.get(), "about:newtab", "Newtab URL should be the about:newtab"); +}); + +function promiseNewtabURLNotification(aNewURL) { + return new Promise(resolve => { + Services.obs.addObserver(function observer(aSubject, aTopic, aData) { + Services.obs.removeObserver(observer, aTopic); + Assert.equal(aData, aNewURL, "Data for newtab-url-changed notification should be new URL."); + resolve(); + }, "newtab-url-changed", false); + }); } From e21ed6f2b942aeb9a400024b6c2c1c9c9a49ffc9 Mon Sep 17 00:00:00 2001 From: Margaret Leibovic Date: Mon, 29 Jun 2015 15:03:54 -0700 Subject: [PATCH 52/61] Bug 1156917 - Use higher resolution favicons for search engines. r=mfinkle --- mobile/locales/en-US/searchplugins/amazondotcom.xml | 2 +- mobile/locales/en-US/searchplugins/bing.xml | 2 +- mobile/locales/en-US/searchplugins/duckduckgo.xml | 2 +- mobile/locales/en-US/searchplugins/google.xml | 2 +- mobile/locales/en-US/searchplugins/twitter.xml | 2 +- mobile/locales/en-US/searchplugins/wikipedia.xml | 2 +- mobile/locales/en-US/searchplugins/yahoo.xml | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/mobile/locales/en-US/searchplugins/amazondotcom.xml b/mobile/locales/en-US/searchplugins/amazondotcom.xml index d52ff8633ff..479abc36d93 100644 --- a/mobile/locales/en-US/searchplugins/amazondotcom.xml +++ b/mobile/locales/en-US/searchplugins/amazondotcom.xml @@ -5,7 +5,7 @@ Amazon.com ISO-8859-1 - + diff --git a/mobile/locales/en-US/searchplugins/bing.xml b/mobile/locales/en-US/searchplugins/bing.xml index e503048b176..f90bf97da21 100644 --- a/mobile/locales/en-US/searchplugins/bing.xml +++ b/mobile/locales/en-US/searchplugins/bing.xml @@ -4,7 +4,7 @@ Bing - + diff --git a/mobile/locales/en-US/searchplugins/duckduckgo.xml b/mobile/locales/en-US/searchplugins/duckduckgo.xml index 97ca332799b..9e96ae21a00 100644 --- a/mobile/locales/en-US/searchplugins/duckduckgo.xml +++ b/mobile/locales/en-US/searchplugins/duckduckgo.xml @@ -5,7 +5,7 @@ DuckDuckGo UTF-8 - + diff --git a/mobile/locales/en-US/searchplugins/google.xml b/mobile/locales/en-US/searchplugins/google.xml index 63a14176e54..f563a69b437 100644 --- a/mobile/locales/en-US/searchplugins/google.xml +++ b/mobile/locales/en-US/searchplugins/google.xml @@ -5,7 +5,7 @@ Google UTF-8 - + diff --git a/mobile/locales/en-US/searchplugins/twitter.xml b/mobile/locales/en-US/searchplugins/twitter.xml index da5eb759054..1334c4726f6 100644 --- a/mobile/locales/en-US/searchplugins/twitter.xml +++ b/mobile/locales/en-US/searchplugins/twitter.xml @@ -4,7 +4,7 @@ Twitter -  +  diff --git a/mobile/locales/en-US/searchplugins/wikipedia.xml b/mobile/locales/en-US/searchplugins/wikipedia.xml index bd22b1ea3c6..85753c0227e 100644 --- a/mobile/locales/en-US/searchplugins/wikipedia.xml +++ b/mobile/locales/en-US/searchplugins/wikipedia.xml @@ -5,7 +5,7 @@ Wikipedia UTF-8 - + diff --git a/mobile/locales/en-US/searchplugins/yahoo.xml b/mobile/locales/en-US/searchplugins/yahoo.xml index cceee874803..6fa29bf2018 100644 --- a/mobile/locales/en-US/searchplugins/yahoo.xml +++ b/mobile/locales/en-US/searchplugins/yahoo.xml @@ -5,7 +5,7 @@ Yahoo UTF-8 - + From 6ddcde1ef7b241787f92404e2adbdbe48d48dbac Mon Sep 17 00:00:00 2001 From: Margaret Leibovic Date: Mon, 29 Jun 2015 15:20:58 -0700 Subject: [PATCH 53/61] Bug 1175451 - Change "Added to list" super toast to "Page added to Reading List". r=nalexander --- mobile/android/base/locales/en-US/android_strings.dtd | 4 ++-- mobile/android/base/strings.xml.in | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/mobile/android/base/locales/en-US/android_strings.dtd b/mobile/android/base/locales/en-US/android_strings.dtd index d803c466028..0e696e7d64c 100644 --- a/mobile/android/base/locales/en-US/android_strings.dtd +++ b/mobile/android/base/locales/en-US/android_strings.dtd @@ -426,9 +426,9 @@ size. --> - - + diff --git a/mobile/android/base/strings.xml.in b/mobile/android/base/strings.xml.in index 36de7c036a2..cbb6eaadbed 100644 --- a/mobile/android/base/strings.xml.in +++ b/mobile/android/base/strings.xml.in @@ -326,7 +326,7 @@ &site_settings_clear; &site_settings_no_settings; - &reading_list_added2; + &reading_list_added3; &reading_list_removed; &reading_list_remove; &reading_list_duplicate; From f34524f1bccd3099d05590c8ceee223b29296305 Mon Sep 17 00:00:00 2001 From: Margaret Leibovic Date: Tue, 30 Jun 2015 13:51:39 -0700 Subject: [PATCH 54/61] backout 8cab4b5abe7f for busting home panel add-ons (bug 1178739) --- mobile/android/modules/HomeProvider.jsm | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/mobile/android/modules/HomeProvider.jsm b/mobile/android/modules/HomeProvider.jsm index 7d302274202..77e15aad977 100644 --- a/mobile/android/modules/HomeProvider.jsm +++ b/mobile/android/modules/HomeProvider.jsm @@ -5,8 +5,6 @@ "use strict"; -/*globals gSyncCheckIntervalSecs, gUpdateTimerManager, Sqlite, DB_PATH */ - this.EXPORTED_SYMBOLS = [ "HomeProvider" ]; const { utils: Cu, classes: Cc, interfaces: Ci } = Components; @@ -127,18 +125,18 @@ function syncTimerCallback(timer) { } } -let HomeStorage = function(datasetId) { +this.HomeStorage = function(datasetId) { this.datasetId = datasetId; }; -let ValidationError = function(message) { +this.ValidationError = function(message) { this.name = "ValidationError"; this.message = message; }; ValidationError.prototype = new Error(); ValidationError.prototype.constructor = ValidationError; -let HomeProvider = Object.freeze({ +this.HomeProvider = Object.freeze({ ValidationError: ValidationError, /** From e9b90c5ee7784f87f8d89698c94e69bcee1facf5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eddy=20Bru=C3=ABl?= Date: Tue, 30 Jun 2015 22:56:26 +0200 Subject: [PATCH 55/61] Bug 1171967 - Implement WorkerTarget;r=jlong --- .../devtools/debugger/debugger-controller.js | 12 ++++ .../devtools/debugger/views/workers-view.js | 12 +++- browser/devtools/framework/target.js | 70 +++++++++++++++++++ toolkit/devtools/client/dbg-client.jsm | 9 ++- 4 files changed, 101 insertions(+), 2 deletions(-) diff --git a/browser/devtools/debugger/debugger-controller.js b/browser/devtools/debugger/debugger-controller.js index 54ae20dc2c2..5010eb68594 100644 --- a/browser/devtools/debugger/debugger-controller.js +++ b/browser/devtools/debugger/debugger-controller.js @@ -455,6 +455,7 @@ function Workers() { this._onWorkerListChanged = this._onWorkerListChanged.bind(this); this._onWorkerFreeze = this._onWorkerFreeze.bind(this); this._onWorkerThaw = this._onWorkerThaw.bind(this); + this._onWorkerSelect = this._onWorkerSelect.bind(this); } Workers.prototype = { @@ -476,6 +477,10 @@ Workers.prototype = { }, _updateWorkerList: function () { + if (!this._tabClient.listWorkers) { + return; + } + this._tabClient.listWorkers((response) => { let workerActors = new Set(); for (let worker of response.workers) { @@ -516,6 +521,13 @@ Workers.prototype = { _onWorkerThaw: function (type, packet) { let workerClient = this._workerClients.get(packet.from); DebuggerView.Workers.addWorker(packet.from, workerClient.url); + }, + + _onWorkerSelect: function (workerActor) { + let workerClient = this._workerClients.get(workerActor); + gDevTools.showToolbox(devtools.TargetFactory.forWorker(workerClient), + "jsdebugger", + devtools.Toolbox.HostType.WINDOW); } }; diff --git a/browser/devtools/debugger/views/workers-view.js b/browser/devtools/debugger/views/workers-view.js index 808cac63f71..3b7b9db24d8 100644 --- a/browser/devtools/debugger/views/workers-view.js +++ b/browser/devtools/debugger/views/workers-view.js @@ -3,7 +3,9 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ "use strict"; -function WorkersView() {} +function WorkersView() { + this._onWorkerSelect = this._onWorkerSelect.bind(this); +} WorkersView.prototype = Heritage.extend(WidgetMethods, { initialize: function () { @@ -17,6 +19,7 @@ WorkersView.prototype = Heritage.extend(WidgetMethods, { showArrows: true, }); this.emptyText = L10N.getStr("noWorkersText"); + this.widget.addEventListener("select", this._onWorkerSelect, false); }, addWorker: function (actor, name) { @@ -30,6 +33,13 @@ WorkersView.prototype = Heritage.extend(WidgetMethods, { removeWorker: function (actor) { this.remove(this.getItemByValue(actor)); + }, + + _onWorkerSelect: function () { + if (this.selectedItem !== null) { + DebuggerController.Workers._onWorkerSelect(this.selectedItem.value); + this.selectedItem = null; + } } }); diff --git a/browser/devtools/framework/target.js b/browser/devtools/framework/target.js index ee9cb1ac0b1..13c9177860d 100644 --- a/browser/devtools/framework/target.js +++ b/browser/devtools/framework/target.js @@ -60,6 +60,15 @@ exports.TargetFactory = { return targetPromise; }, + forWorker: function TF_forWorker(workerClient) { + let target = targets.get(workerClient); + if (target == null) { + target = new WorkerTarget(workerClient); + targets.set(workerClient, target); + } + return target; + }, + /** * Creating a target for a tab that is being closed is a problem because it * allows a leak as a result of coming after the close event which normally @@ -799,3 +808,64 @@ WindowTarget.prototype = { return 'WindowTarget:' + this.window; }, }; + +function WorkerTarget(workerClient) { + EventEmitter.decorate(this); + this._workerClient = workerClient; +} + +/** + * A WorkerTarget represents a worker. Unlike TabTarget, which can represent + * either a local or remote tab, WorkerTarget always represents a remote worker. + * Moreover, unlike TabTarget, which is constructed with a placeholder object + * for remote tabs (from which a TabClient can then be lazily obtained), + * WorkerTarget is constructed with a WorkerClient directly. + * + * The reason for this is that in order to get notifications when a worker + * closes/freezes/thaws, the UI needs to attach to each worker anyway, so by + * the time a WorkerTarget for a given worker is created, a WorkerClient for + * that worker will already be available. Consequently, there is no need to + * obtain a WorkerClient lazily. + * + * WorkerClient is designed to mimic the interface of TabClient as closely as + * possible. This allows us to debug workers as if they were ordinary tabs, + * requiring only minimal changes to the rest of the frontend. + */ +WorkerTarget.prototype = { + get isRemote() { + return true; + }, + + get isTabActor() { + return true; + }, + + get form() { + return { + from: this._workerClient.actor, + type: "attached", + isFrozen: this._workerClient.isFrozen, + url: this._workerClient.url + }; + }, + + get activeTab() { + return this._workerClient; + }, + + get client() { + return this._workerClient.client; + }, + + destroy: function () {}, + + hasActor: function (name) { + return false; + }, + + getTrait: function (name) { + return undefined; + }, + + makeRemote: function () {} +}; diff --git a/toolkit/devtools/client/dbg-client.jsm b/toolkit/devtools/client/dbg-client.jsm index 4c36ae72daa..52693e79b5e 100644 --- a/toolkit/devtools/client/dbg-client.jsm +++ b/toolkit/devtools/client/dbg-client.jsm @@ -511,7 +511,8 @@ DebuggerClient.prototype = { executeSoon(() => aOnResponse({ from: workerClient.actor, type: "attached", - isFrozen: workerClient.isFrozen + isFrozen: workerClient.isFrozen, + url: workerClient.url }, workerClient)); return; } @@ -1379,6 +1380,8 @@ function WorkerClient(aClient, aForm) { this.addListener("close", this._onClose); this.addListener("freeze", this._onFreeze); this.addListener("thaw", this._onThaw); + + this.traits = {}; } WorkerClient.prototype = { @@ -1454,6 +1457,10 @@ WorkerClient.prototype = { this._isFrozen = false; }, + reconfigure: function () { + return Promise.resolve(); + }, + events: ["close", "freeze", "thaw"] }; From f1d9e8bc30a363cfa7d123006be057169472e15e Mon Sep 17 00:00:00 2001 From: Brian Grinstead Date: Tue, 30 Jun 2015 14:02:39 -0700 Subject: [PATCH 56/61] Bug 1175327 - Move existing Tracking Protection functionality from shield doorhanger to Control Center;r=ttaubert --- .../content/browser-trackingprotection.js | 96 ++++++++++++ browser/base/content/browser.js | 13 +- .../test/general/browser_trackingUI_1.js | 146 ++++++++---------- .../test/general/browser_trackingUI_2.js | 53 ++++--- browser/base/content/urlbarBindings.xml | 12 -- .../controlcenter/content/panel.inc.xul | 36 +++++ .../locales/en-US/chrome/browser/browser.dtd | 9 ++ browser/themes/linux/jar.mn | 2 + browser/themes/osx/jar.mn | 2 + .../themes/shared/controlcenter/panel.inc.css | 30 +++- .../tracking-protection-disabled.svg | 24 +++ .../controlcenter/tracking-protection.svg | 22 +++ browser/themes/windows/jar.mn | 2 + 13 files changed, 319 insertions(+), 128 deletions(-) create mode 100644 browser/base/content/browser-trackingprotection.js create mode 100644 browser/themes/shared/controlcenter/tracking-protection-disabled.svg create mode 100644 browser/themes/shared/controlcenter/tracking-protection.svg diff --git a/browser/base/content/browser-trackingprotection.js b/browser/base/content/browser-trackingprotection.js new file mode 100644 index 00000000000..bc02c1fbdf9 --- /dev/null +++ b/browser/base/content/browser-trackingprotection.js @@ -0,0 +1,96 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +let TrackingProtection = { + PREF_ENABLED: "privacy.trackingprotection.enabled", + + init() { + let $ = selector => document.querySelector(selector); + this.container = $("#tracking-protection-container"); + this.content = $("#tracking-protection-content"); + + this.updateEnabled(); + Services.prefs.addObserver(this.PREF_ENABLED, this, false); + + this.enabledHistogram.add(this.enabled); + }, + + uninit() { + Services.prefs.removeObserver(this.PREF_ENABLED, this); + }, + + observe() { + this.updateEnabled(); + }, + + updateEnabled() { + this.enabled = Services.prefs.getBoolPref(this.PREF_ENABLED); + this.container.hidden = !this.enabled; + }, + + get enabledHistogram() { + return Services.telemetry.getHistogramById("TRACKING_PROTECTION_ENABLED"); + }, + + get eventsHistogram() { + return Services.telemetry.getHistogramById("TRACKING_PROTECTION_EVENTS"); + }, + + onSecurityChange(state) { + if (!this.enabled) { + return; + } + + let { + STATE_BLOCKED_TRACKING_CONTENT, STATE_LOADED_TRACKING_CONTENT + } = Ci.nsIWebProgressListener; + + if (state & STATE_BLOCKED_TRACKING_CONTENT) { + this.content.setAttribute("block-active", true); + this.content.removeAttribute("block-disabled"); + } else if (state & STATE_LOADED_TRACKING_CONTENT) { + this.content.setAttribute("block-disabled", true); + this.content.removeAttribute("block-active"); + } else { + this.content.removeAttribute("block-disabled"); + this.content.removeAttribute("block-active"); + } + + // Telemetry for state change. + this.eventsHistogram.add(0); + }, + + disableForCurrentPage() { + // Convert document URI into the format used by + // nsChannelClassifier::ShouldEnableTrackingProtection. + // Any scheme turned into https is correct. + let normalizedUrl = Services.io.newURI( + "https://" + gBrowser.selectedBrowser.currentURI.hostPort, + null, null); + + // Add the current host in the 'trackingprotection' consumer of + // the permission manager using a normalized URI. This effectively + // places this host on the tracking protection allowlist. + Services.perms.add(normalizedUrl, + "trackingprotection", Services.perms.ALLOW_ACTION); + + // Telemetry for disable protection. + this.eventsHistogram.add(1); + + BrowserReload(); + }, + + enableForCurrentPage() { + // Remove the current host from the 'trackingprotection' consumer + // of the permission manager. This effectively removes this host + // from the tracking protection allowlist. + Services.perms.remove(gBrowser.selectedBrowser.currentURI.host, + "trackingprotection"); + + // Telemetry for enable protection. + this.eventsHistogram.add(2); + + BrowserReload(); + }, +}; diff --git a/browser/base/content/browser.js b/browser/base/content/browser.js index 7f4d699cb0a..0d79962b725 100644 --- a/browser/base/content/browser.js +++ b/browser/base/content/browser.js @@ -280,6 +280,7 @@ let gInitialPages = [ #include browser-social.js #include browser-tabview.js #include browser-thumbnails.js +#include browser-trackingprotection.js #ifdef MOZ_DATA_REPORTING #include browser-data-submission-info-bar.js @@ -964,6 +965,7 @@ var gBrowserInit = { BrowserOnClick.init(); DevEdition.init(); AboutPrivateBrowsingListener.init(); + TrackingProtection.init(); let mm = window.getGroupMessageManager("browsers"); mm.loadFrameScript("chrome://browser/content/tab-content.js", true); @@ -1446,12 +1448,6 @@ var gBrowserInit = { } }, 5000); - // Telemetry for tracking protection. - let tpEnabled = gPrefService - .getBoolPref("privacy.trackingprotection.enabled"); - Services.telemetry.getHistogramById("TRACKING_PROTECTION_ENABLED") - .add(tpEnabled); - PanicButtonNotifier.init(); }); this.delayedStartupFinished = true; @@ -1534,6 +1530,8 @@ var gBrowserInit = { DevEdition.uninit(); + TrackingProtection.uninit(); + gMenuButtonUpdateBadge.uninit(); ReadingListUI.uninit(); @@ -4383,6 +4381,7 @@ var XULBrowserWindow = { uri = Services.uriFixup.createExposableURI(uri); } catch (e) {} gIdentityHandler.checkIdentity(this._state, uri); + TrackingProtection.onSecurityChange(this._state); }, // simulate all change notifications after switching tabs @@ -6781,7 +6780,7 @@ var gIdentityHandler = { nsIWebProgressListener.STATE_BLOCKED_TRACKING_CONTENT | nsIWebProgressListener.STATE_LOADED_TRACKING_CONTENT)) { this.showBadContentDoorhanger(state); - } else if (gPrefService.getBoolPref("privacy.trackingprotection.enabled")) { + } else if (TrackingProtection.enabled) { // We didn't show the shield Services.telemetry.getHistogramById("TRACKING_PROTECTION_SHIELD") .add(0); diff --git a/browser/base/content/test/general/browser_trackingUI_1.js b/browser/base/content/test/general/browser_trackingUI_1.js index 00555c9c549..9c67f39c9e8 100644 --- a/browser/base/content/test/general/browser_trackingUI_1.js +++ b/browser/base/content/test/general/browser_trackingUI_1.js @@ -2,118 +2,100 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -// Test that the Tracking Protection Doorhanger appears -// and has the correct state when tracking content is blocked (Bug 1043801) +// Test that the Tracking Protection section is visible in the Control Center +// and has the correct state for the cases when: +// * A page with no tracking elements is loaded. +// * A page with tracking elements is loaded and they are blocked. +// * A page with tracking elements is loaded and they are not blocked. +// See also Bugs 1175327 and 1043801. -var PREF = "privacy.trackingprotection.enabled"; -var BENIGN_PAGE = "http://tracking.example.org/browser/browser/base/content/test/general/benignPage.html"; -var TRACKING_PAGE = "http://tracking.example.org/browser/browser/base/content/test/general/trackingPage.html"; +let PREF = "privacy.trackingprotection.enabled"; +let BENIGN_PAGE = "http://tracking.example.org/browser/browser/base/content/test/general/benignPage.html"; +let TRACKING_PAGE = "http://tracking.example.org/browser/browser/base/content/test/general/trackingPage.html"; +let TrackingProtection = null; -function testBenignPage(gTestBrowser) -{ - // Make sure the doorhanger does NOT appear - var notification = PopupNotifications.getNotification("bad-content", gTestBrowser); - is(notification, null, "Tracking Content Doorhanger did NOT appear when protection was ON and tracking was NOT present"); +registerCleanupFunction(function() { + TrackingProtection = null; + Services.prefs.clearUserPref(PREF); + gBrowser.removeCurrentTab(); +}); + +function hidden(sel) { + let win = gBrowser.ownerGlobal; + let el = win.document.querySelector(sel); + let display = win.getComputedStyle(el).getPropertyValue("display", null); + return display === "none"; } -function* testTrackingPage(gTestBrowser) -{ - // Make sure the doorhanger appears - var notification = PopupNotifications.getNotification("bad-content", gTestBrowser); - isnot(notification, null, "Tracking Content Doorhanger did appear when protection was ON and tracking was present"); - notification.reshow(); - var notificationElement = PopupNotifications.panel.firstChild; +function testBenignPage() { + ok (!TrackingProtection.content.hasAttribute("block-disabled"), "blocking not disabled"); + ok (!TrackingProtection.content.hasAttribute("block-active"), "blocking is not active"); - // Wait for the method to be attached after showing the popup - yield promiseWaitForCondition(() => { - return notificationElement.disableTrackingContentProtection; - }); - - // Make sure the state of the doorhanger includes blocking tracking elements - ok(notificationElement.isTrackingContentBlocked, - "Tracking Content is being blocked"); - - // Make sure the notification has no trackingblockdisabled attribute - ok(!notificationElement.hasAttribute("trackingblockdisabled"), - "Doorhanger must have no trackingblockdisabled attribute"); + // Make sure that the no tracking elements message appears + ok (!hidden("#tracking-not-detected"), "labelNoTracking is visible"); + ok (hidden("#tracking-loaded"), "labelTrackingLoaded is hidden"); + ok (hidden("#tracking-blocked"), "labelTrackingBlocked is hidden"); } -function* testTrackingPageWhitelisted(gTestBrowser) -{ - // Make sure the doorhanger appears - var notification = PopupNotifications.getNotification("bad-content", gTestBrowser); - isnot(notification, null, "Tracking Content Doorhanger did appear when protection was ON and tracking was present but white-listed"); - notification.reshow(); - var notificationElement = PopupNotifications.panel.firstChild; +function testTrackingPage() { + ok (!TrackingProtection.content.hasAttribute("block-disabled"), "blocking not disabled"); + ok (TrackingProtection.content.hasAttribute("block-active"), "blocking is active"); - // Wait for the method to be attached after showing the popup - yield promiseWaitForCondition(() => { - return notificationElement.disableTrackingContentProtection; - }); - - var notificationElement = PopupNotifications.panel.firstChild; - - // Make sure the state of the doorhanger does NOT include blocking tracking elements - ok(!notificationElement.isTrackingContentBlocked, - "Tracking Content is NOT being blocked"); - - // Make sure the notification has the trackingblockdisabled attribute set to true - is(notificationElement.getAttribute("trackingblockdisabled"), "true", - "Doorhanger must have [trackingblockdisabled='true'] attribute"); + // Make sure that the blocked tracking elements message appears + ok (hidden("#tracking-not-detected"), "labelNoTracking is hidden"); + ok (hidden("#tracking-loaded"), "labelTrackingLoaded is hidden"); + ok (!hidden("#tracking-blocked"), "labelTrackingBlocked is visible"); } -function testTrackingPageOFF(gTestBrowser) -{ - // Make sure the doorhanger does NOT appear - var notification = PopupNotifications.getNotification("bad-content", gTestBrowser); - is(notification, null, "Tracking Content Doorhanger did NOT appear when protection was OFF and tracking was present"); -} +function testTrackingPageWhitelisted() { + ok (TrackingProtection.content.hasAttribute("block-disabled"), "blocking is disabled"); + ok (!TrackingProtection.content.hasAttribute("block-active"), "blocking is not active"); -function testBenignPageOFF(gTestBrowser) -{ - // Make sure the doorhanger does NOT appear - var notification = PopupNotifications.getNotification("bad-content", gTestBrowser); - is(notification, null, "Tracking Content Doorhanger did NOT appear when protection was OFF and tracking was NOT present"); + // Make sure that the blocked tracking elements message appears + ok (hidden("#tracking-not-detected"), "labelNoTracking is hidden"); + ok (!hidden("#tracking-loaded"), "labelTrackingLoaded is visible"); + ok (hidden("#tracking-blocked"), "labelTrackingBlocked is hidden"); } add_task(function* () { - registerCleanupFunction(function() { - Services.prefs.clearUserPref(PREF); - gBrowser.removeCurrentTab(); - }); - yield updateTrackingProtectionDatabase(); let tab = gBrowser.selectedTab = gBrowser.addTab(); - // Enable Tracking Protection + TrackingProtection = gBrowser.ownerGlobal.TrackingProtection; + ok (TrackingProtection, "Functionality is attached to the browser window"); + is (TrackingProtection.enabled, Services.prefs.getBoolPref(PREF), + "The initial enabled value is based on the default pref value"); + + info("Enable Tracking Protection"); Services.prefs.setBoolPref(PREF, true); + ok (TrackingProtection.enabled, "Functionality is enabled after setting the pref"); - // Point tab to a test page NOT containing tracking elements + info("Point tab to a test page NOT containing tracking elements"); yield promiseTabLoadEvent(tab, BENIGN_PAGE); - testBenignPage(gBrowser.getBrowserForTab(tab)); + testBenignPage(); - // Point tab to a test page containing tracking elements + info("Point tab to a test page containing tracking elements"); yield promiseTabLoadEvent(tab, TRACKING_PAGE); - // Tracking content must be blocked - yield testTrackingPage(gBrowser.getBrowserForTab(tab)); + info("Tracking content must be blocked"); + testTrackingPage(); - // Disable Tracking Content Protection for the page (which reloads the page) - PopupNotifications.panel.firstChild.disableTrackingContentProtection(); + info("Disable Tracking Content Protection for the page (which reloads the page)"); + TrackingProtection.disableForCurrentPage(); - // Wait for tab to reload following tracking-protection page white-listing + info("Wait for tab to reload following tracking-protection page white-listing"); yield promiseTabLoadEvent(tab); - // Tracking content must be white-listed (NOT blocked) - yield testTrackingPageWhitelisted(gBrowser.getBrowserForTab(tab)); + info("Tracking content must be white-listed (NOT blocked)"); + testTrackingPageWhitelisted(); - // Re-enable Tracking Content Protection for the page (which reloads the page) - PopupNotifications.panel.firstChild.enableTrackingContentProtection(); + info("Re-enable Tracking Content Protection for the page (which reloads the page)"); + TrackingProtection.enableForCurrentPage(); - // Wait for tab to reload following tracking-protection page white-listing + info("Wait for tab to reload following tracking-protection page white-listing"); yield promiseTabLoadEvent(tab); - // Tracking content must be blocked - yield testTrackingPage(gBrowser.getBrowserForTab(tab)); + info("Tracking content must be blocked"); + testTrackingPage(); }); diff --git a/browser/base/content/test/general/browser_trackingUI_2.js b/browser/base/content/test/general/browser_trackingUI_2.js index a466bb9d4c5..556558c8826 100644 --- a/browser/base/content/test/general/browser_trackingUI_2.js +++ b/browser/base/content/test/general/browser_trackingUI_2.js @@ -2,45 +2,48 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -// Test that the Tracking Protection Doorhanger does not ever appear -// when the feature is off (Bug 1043801) +// Test that the Tracking Protection section is never visible in the +// Control Center when the feature is off. +// See also Bugs 1175327 and 1043801. -var PREF = "privacy.trackingprotection.enabled"; -var BENIGN_PAGE = "http://tracking.example.org/browser/browser/base/content/test/general/benignPage.html"; -var TRACKING_PAGE = "http://tracking.example.org/browser/browser/base/content/test/general/trackingPage.html"; +let PREF = "privacy.trackingprotection.enabled"; +let BENIGN_PAGE = "http://tracking.example.org/browser/browser/base/content/test/general/benignPage.html"; +let TRACKING_PAGE = "http://tracking.example.org/browser/browser/base/content/test/general/trackingPage.html"; +let TrackingProtection = null; -function testTrackingPageOFF(gTestBrowser) -{ - // Make sure the doorhanger does NOT appear - var notification = PopupNotifications.getNotification("bad-content", gTestBrowser); - is(notification, null, "Tracking Content Doorhanger did NOT appear when protection was OFF and tracking was present"); +registerCleanupFunction(function() { + TrackingProtection = null; + Services.prefs.clearUserPref(PREF); + gBrowser.removeCurrentTab(); +}); + +function testTrackingPageOFF() { + ok (TrackingProtection.container.hidden, "The container is hidden"); } -function testBenignPageOFF(gTestBrowser) -{ - // Make sure the doorhanger does NOT appear - var notification = PopupNotifications.getNotification("bad-content", gTestBrowser); - is(notification, null, "Tracking Content Doorhanger did NOT appear when protection was OFF and tracking was NOT present"); +function testBenignPageOFF() { + ok (TrackingProtection.container.hidden, "The container is hidden"); } add_task(function* () { - registerCleanupFunction(function() { - Services.prefs.clearUserPref(PREF); - gBrowser.removeCurrentTab(); - }); - yield updateTrackingProtectionDatabase(); let tab = gBrowser.selectedTab = gBrowser.addTab(); - // Disable Tracking Protection + TrackingProtection = gBrowser.ownerGlobal.TrackingProtection; + ok (TrackingProtection, "Functionality is attached to the browser window"); + is (TrackingProtection.enabled, Services.prefs.getBoolPref(PREF), + "The initial enabled value is based on the default pref value"); + + info ("Disable Tracking Protection"); Services.prefs.setBoolPref(PREF, false); + ok (!TrackingProtection.enabled, "Functionality is disabled after setting the pref"); - // Point tab to a test page containing tracking elements + info ("Point tab to a test page containing tracking elements"); yield promiseTabLoadEvent(tab, TRACKING_PAGE); - testTrackingPageOFF(gBrowser.getBrowserForTab(tab)); + testTrackingPageOFF(); - // Point tab to a test page NOT containing tracking elements + info ("Point tab to a test page NOT containing tracking elements"); yield promiseTabLoadEvent(tab, BENIGN_PAGE); - testBenignPageOFF(gBrowser.getBrowserForTab(tab)); + testBenignPageOFF(); }); diff --git a/browser/base/content/urlbarBindings.xml b/browser/base/content/urlbarBindings.xml index 519101caaaf..b537739befe 100644 --- a/browser/base/content/urlbarBindings.xml +++ b/browser/base/content/urlbarBindings.xml @@ -2377,10 +2377,6 @@ file, You can obtain one at http://mozilla.org/MPL/2.0/. Services.urlFormatter.formatURLPref("app.support.baseURL") + "tracking-protection"; } - if (Services.prefs.getBoolPref("privacy.trackingprotection.enabled")) { - let histogram = Services.telemetry.getHistogramById("TRACKING_PROTECTION_EVENTS"); - histogram.add(0); - } ]]> @@ -2429,10 +2421,6 @@ file, You can obtain one at http://mozilla.org/MPL/2.0/. // from the tracking protection allowlist. Services.perms.remove(gBrowser.selectedBrowser.currentURI.host, "trackingprotection"); - // Telemetry for enable protection - let histogram = Services.telemetry.getHistogramById( - "TRACKING_PROTECTION_EVENTS"); - histogram.add(2); BrowserReload(); ]]> diff --git a/browser/components/controlcenter/content/panel.inc.xul b/browser/components/controlcenter/content/panel.inc.xul index d91bac14b3e..c8e97c29aa6 100644 --- a/browser/components/controlcenter/content/panel.inc.xul +++ b/browser/components/controlcenter/content/panel.inc.xul @@ -36,6 +36,42 @@ oncommand="gIdentityHandler.showSubView('security', this)"/> + + + + + + + + + + + + + diff --git a/browser/locales/en-US/chrome/browser/browser.dtd b/browser/locales/en-US/chrome/browser/browser.dtd index d34cc4a4caa..2c8089b2711 100644 --- a/browser/locales/en-US/chrome/browser/browser.dtd +++ b/browser/locales/en-US/chrome/browser/browser.dtd @@ -768,6 +768,15 @@ you can use these alternative items. Otherwise, their values should be empty. - + + + + + + + + + diff --git a/browser/themes/linux/jar.mn b/browser/themes/linux/jar.mn index 6c09a4b3490..b4a65d5e91a 100644 --- a/browser/themes/linux/jar.mn +++ b/browser/themes/linux/jar.mn @@ -150,6 +150,8 @@ browser.jar: skin/classic/browser/controlcenter/conn-secure-dv.svg (../shared/controlcenter/conn-secure-dv.svg) skin/classic/browser/controlcenter/conn-secure-ev.svg (../shared/controlcenter/conn-secure-ev.svg) skin/classic/browser/controlcenter/permissions.svg (../shared/controlcenter/permissions.svg) + skin/classic/browser/controlcenter/tracking-protection.svg (../shared/controlcenter/tracking-protection.svg) + skin/classic/browser/controlcenter/tracking-protection-disabled.svg (../shared/controlcenter/tracking-protection-disabled.svg) skin/classic/browser/customizableui/background-noise-toolbar.png (customizableui/background-noise-toolbar.png) skin/classic/browser/customizableui/customize-illustration.png (../shared/customizableui/customize-illustration.png) skin/classic/browser/customizableui/customize-illustration-rtl.png (../shared/customizableui/customize-illustration-rtl.png) diff --git a/browser/themes/osx/jar.mn b/browser/themes/osx/jar.mn index e5c7749c10e..5d4d0389c1f 100644 --- a/browser/themes/osx/jar.mn +++ b/browser/themes/osx/jar.mn @@ -194,6 +194,8 @@ browser.jar: skin/classic/browser/controlcenter/conn-secure-dv.svg (../shared/controlcenter/conn-secure-dv.svg) skin/classic/browser/controlcenter/conn-secure-ev.svg (../shared/controlcenter/conn-secure-ev.svg) skin/classic/browser/controlcenter/permissions.svg (../shared/controlcenter/permissions.svg) + skin/classic/browser/controlcenter/tracking-protection.svg (../shared/controlcenter/tracking-protection.svg) + skin/classic/browser/controlcenter/tracking-protection-disabled.svg (../shared/controlcenter/tracking-protection-disabled.svg) skin/classic/browser/customizableui/background-noise-toolbar.png (customizableui/background-noise-toolbar.png) skin/classic/browser/customizableui/customize-titleBar-toggle.png (customizableui/customize-titleBar-toggle.png) skin/classic/browser/customizableui/customize-titleBar-toggle@2x.png (customizableui/customize-titleBar-toggle@2x.png) diff --git a/browser/themes/shared/controlcenter/panel.inc.css b/browser/themes/shared/controlcenter/panel.inc.css index ca456a1e384..371b6bdbec4 100644 --- a/browser/themes/shared/controlcenter/panel.inc.css +++ b/browser/themes/shared/controlcenter/panel.inc.css @@ -55,7 +55,8 @@ #identity-popup-securityView, #identity-popup-security-content, -#identity-popup-permissions-content { +#identity-popup-permissions-content, +#tracking-protection-content { padding: 0.75em 0 1em; -moz-padding-start: calc(2em + 24px); -moz-padding-end: 1em; @@ -66,7 +67,8 @@ #identity-popup-securityView:-moz-locale-dir(rtl), #identity-popup-security-content:-moz-locale-dir(rtl), -#identity-popup-permissions-content:-moz-locale-dir(rtl) { +#identity-popup-permissions-content:-moz-locale-dir(rtl), +#tracking-protection-content:-moz-locale-dir(rtl) { background-position: calc(100% - 1em) 1em; } @@ -199,6 +201,30 @@ margin-top: 1em; } +/* TRACKING PROTECTION */ + +#tracking-protection-content { + background-image: url("chrome://browser/skin/controlcenter/tracking-protection.svg"); +} + +#tracking-protection-content[block-disabled] { + background-image: url("chrome://browser/skin/controlcenter/tracking-protection-disabled.svg"); +} + +#tracking-actions { + margin: 1em 0 0; +} + +#tracking-protection-content[block-active] > #tracking-not-detected, +#tracking-protection-content[block-disabled] > #tracking-not-detected, +#tracking-protection-content:not([block-active]) > #tracking-blocked, +#tracking-protection-content:not([block-active]) #tracking-action-unblock, +#tracking-protection-content:not([block-disabled]) > #tracking-loaded, +#tracking-protection-content:not([block-disabled]) #tracking-action-block, +#tracking-protection-content:not([block-active]):not([block-disabled]) > #tracking-actions { + display: none; +} + /* PERMISSIONS */ #identity-popup-permissions-content { diff --git a/browser/themes/shared/controlcenter/tracking-protection-disabled.svg b/browser/themes/shared/controlcenter/tracking-protection-disabled.svg new file mode 100644 index 00000000000..955dfe23d0c --- /dev/null +++ b/browser/themes/shared/controlcenter/tracking-protection-disabled.svg @@ -0,0 +1,24 @@ + + + + + + + + + + + + + + + + + + diff --git a/browser/themes/shared/controlcenter/tracking-protection.svg b/browser/themes/shared/controlcenter/tracking-protection.svg new file mode 100644 index 00000000000..bf3f7806967 --- /dev/null +++ b/browser/themes/shared/controlcenter/tracking-protection.svg @@ -0,0 +1,22 @@ + + + + + + + + + + + + + + + + diff --git a/browser/themes/windows/jar.mn b/browser/themes/windows/jar.mn index df3b34dcd65..31abf45210e 100644 --- a/browser/themes/windows/jar.mn +++ b/browser/themes/windows/jar.mn @@ -197,6 +197,8 @@ browser.jar: skin/classic/browser/controlcenter/conn-secure-dv.svg (../shared/controlcenter/conn-secure-dv.svg) skin/classic/browser/controlcenter/conn-secure-ev.svg (../shared/controlcenter/conn-secure-ev.svg) skin/classic/browser/controlcenter/permissions.svg (../shared/controlcenter/permissions.svg) + skin/classic/browser/controlcenter/tracking-protection.svg (../shared/controlcenter/tracking-protection.svg) + skin/classic/browser/controlcenter/tracking-protection-disabled.svg (../shared/controlcenter/tracking-protection-disabled.svg) skin/classic/browser/customizableui/background-noise-toolbar.png (customizableui/background-noise-toolbar.png) skin/classic/browser/customizableui/customizeFavicon.ico (../shared/customizableui/customizeFavicon.ico) skin/classic/browser/customizableui/customize-illustration.png (../shared/customizableui/customize-illustration.png) From 1a97339daf5f5eb1a5e8326cc3531cee04312f5a Mon Sep 17 00:00:00 2001 From: Dave Townsend Date: Tue, 30 Jun 2015 14:06:38 -0700 Subject: [PATCH 57/61] Bug 1176205: Fix typo breaking tests on aurora. r=RyanVM --- .../preferences/in-content/tests/browser_privacypane_5.js | 2 +- browser/components/preferences/tests/browser_privacypane_5.js | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/browser/components/preferences/in-content/tests/browser_privacypane_5.js b/browser/components/preferences/in-content/tests/browser_privacypane_5.js index 1a2b572d2d5..4f549bb3430 100644 --- a/browser/components/preferences/in-content/tests/browser_privacypane_5.js +++ b/browser/components/preferences/in-content/tests/browser_privacypane_5.js @@ -21,7 +21,7 @@ function test() { ]; if (AppConstants.NIGHTLY_BUILD) - tests.push(test_locbar_suggestion_retention("searches", true)), + tests.push(test_locbar_suggestion_retention("searches", true)); run_test_subset(tests); } diff --git a/browser/components/preferences/tests/browser_privacypane_5.js b/browser/components/preferences/tests/browser_privacypane_5.js index 1a2b572d2d5..4f549bb3430 100644 --- a/browser/components/preferences/tests/browser_privacypane_5.js +++ b/browser/components/preferences/tests/browser_privacypane_5.js @@ -21,7 +21,7 @@ function test() { ]; if (AppConstants.NIGHTLY_BUILD) - tests.push(test_locbar_suggestion_retention("searches", true)), + tests.push(test_locbar_suggestion_retention("searches", true)); run_test_subset(tests); } From 6943c5562c442479485824d8c57bb5a14401e1ad Mon Sep 17 00:00:00 2001 From: Nick Alexander Date: Tue, 30 Jun 2015 16:19:51 -0700 Subject: [PATCH 58/61] No bug - Don't write artifacts twice. r=me DONTBUILD NPOTB This was just an oversight during the initial landing, leading to two copies of artifact libraries being appended to the same destination file. --- python/mozbuild/mozbuild/artifacts.py | 1 - 1 file changed, 1 deletion(-) diff --git a/python/mozbuild/mozbuild/artifacts.py b/python/mozbuild/mozbuild/artifacts.py index fb0e2bce8c5..49b6612f232 100644 --- a/python/mozbuild/mozbuild/artifacts.py +++ b/python/mozbuild/mozbuild/artifacts.py @@ -325,7 +325,6 @@ class Artifacts(object): n = os.path.join(distdir, 'bin', os.path.basename(info.filename)) fh = FileAvoidWrite(n, mode='r') shutil.copyfileobj(zf.open(info), fh) - fh.write(zf.open(info).read()) file_existed, file_updated = fh.close() self.log(logging.INFO, 'artifact', {'updating': 'Updating' if file_updated else 'Not updating', 'filename': n}, From 5b362f8911a6ede0159f32a6d966978e427afee7 Mon Sep 17 00:00:00 2001 From: vivek Date: Tue, 30 Jun 2015 21:09:44 -0700 Subject: [PATCH 59/61] Bug 1177855: Fetch and show avatar image as preference icon. r=nalexander The profile JSON is stored in the Account bundle. There's no need to bump the bundle version, since missing (i.e., null) profile JSON is legal. This introduces and uses a general-purpose PicassoPreferenceIcon Picasso Target that, on API 11+ devices, dynamically loads a preference icon. --- mobile/android/base/android-services.mozbuild | 1 + .../android/base/fxa/FxAccountConstants.java | 2 +- .../activities/FxAccountStatusFragment.java | 130 ++++++++------- .../PicassoPreferenceIconTarget.java | 76 +++++++++ .../fxa/authenticator/AndroidFxAccount.java | 148 +++++------------- .../fxa/sync/FxAccountProfileService.java | 8 +- .../base/fxa/sync/FxAccountSyncAdapter.java | 7 + .../resources/values/fxaccount_dimens.xml | 5 + 8 files changed, 209 insertions(+), 168 deletions(-) create mode 100644 mobile/android/base/fxa/activities/PicassoPreferenceIconTarget.java diff --git a/mobile/android/base/android-services.mozbuild b/mobile/android/base/android-services.mozbuild index ac6af72c16d..bec1fac3b0e 100644 --- a/mobile/android/base/android-services.mozbuild +++ b/mobile/android/base/android-services.mozbuild @@ -863,6 +863,7 @@ sync_java_files = [ 'fxa/activities/FxAccountStatusFragment.java', 'fxa/activities/FxAccountUpdateCredentialsActivity.java', 'fxa/activities/FxAccountVerifiedAccountActivity.java', + 'fxa/activities/PicassoPreferenceIconTarget.java', 'fxa/authenticator/AccountPickler.java', 'fxa/authenticator/AndroidFxAccount.java', 'fxa/authenticator/FxAccountAuthenticator.java', diff --git a/mobile/android/base/fxa/FxAccountConstants.java b/mobile/android/base/fxa/FxAccountConstants.java index c3e3d063741..b5aed319cb7 100644 --- a/mobile/android/base/fxa/FxAccountConstants.java +++ b/mobile/android/base/fxa/FxAccountConstants.java @@ -24,7 +24,7 @@ public class FxAccountConstants { public static final String STAGE_PROFILE_SERVER_ENDPOINT = "https://latest.dev.lcip.org/profile/v1"; // Action to update on cached profile information. - public static final String ACCOUNT_PROFILE_AVATAR_UPDATED_ACTION = "org.mozilla.gecko.fxa.profile.cached"; + public static final String ACCOUNT_PROFILE_JSON_UPDATED_ACTION = "org.mozilla.gecko.fxa.profile.JSON.updated"; // You must be at least 13 years old, on the day of creation, to create a Firefox Account. public static final int MINIMUM_AGE_TO_CREATE_AN_ACCOUNT = 13; diff --git a/mobile/android/base/fxa/activities/FxAccountStatusFragment.java b/mobile/android/base/fxa/activities/FxAccountStatusFragment.java index e90766a99aa..709163cbb2b 100644 --- a/mobile/android/base/fxa/activities/FxAccountStatusFragment.java +++ b/mobile/android/base/fxa/activities/FxAccountStatusFragment.java @@ -50,6 +50,8 @@ import android.text.TextUtils; import android.text.format.DateUtils; import android.widget.Toast; +import com.squareup.picasso.Picasso; +import com.squareup.picasso.Target; /** * A fragment that displays the status of an AndroidFxAccount. @@ -140,13 +142,11 @@ public class FxAccountStatusFragment // Runnable to update last synced time. protected Runnable lastSyncedTimeUpdateRunnable; - // Runnable to retry fetching profile information. - protected Runnable profileFetchRunnable; - // Broadcast Receiver to update profile Information. protected FxAccountProfileInformationReceiver accountProfileInformationReceiver; protected final InnerSyncStatusDelegate syncStatusDelegate = new InnerSyncStatusDelegate(); + private Target profileAvatarTarget; protected Preference ensureFindPreference(String key) { Preference preference = findPreference(key); @@ -485,6 +485,18 @@ public class FxAccountStatusFragment // register/unregister calls. FxAccountSyncStatusHelper.getInstance().startObserving(syncStatusDelegate); + if (AppConstants.MOZ_ANDROID_FIREFOX_ACCOUNT_PROFILES) { + // Register a local broadcast receiver to get profile cached notification. + final IntentFilter intentFilter = new IntentFilter(); + intentFilter.addAction(FxAccountConstants.ACCOUNT_PROFILE_JSON_UPDATED_ACTION); + accountProfileInformationReceiver = new FxAccountProfileInformationReceiver(); + LocalBroadcastManager.getInstance(getActivity()).registerReceiver(accountProfileInformationReceiver, intentFilter); + + // profilePreference is set during onCreate, so it's definitely not null here. + final float cornerRadius = getResources().getDimension(R.dimen.fxaccount_profile_image_width) / 2; + profileAvatarTarget = new PicassoPreferenceIconTarget(getResources(), profilePreference, cornerRadius); + } + refresh(); } @@ -498,14 +510,15 @@ public class FxAccountStatusFragment handler.removeCallbacks(lastSyncedTimeUpdateRunnable); } - if (profileFetchRunnable != null) { - handler.removeCallbacks(profileFetchRunnable); - } - // Focus lost, unregister broadcast receiver. if (accountProfileInformationReceiver != null) { LocalBroadcastManager.getInstance(getActivity()).unregisterReceiver(accountProfileInformationReceiver); } + + if (profileAvatarTarget != null) { + Picasso.with(getActivity()).cancelRequest(profileAvatarTarget); + profileAvatarTarget = null; + } } protected void hardRefresh() { @@ -606,53 +619,60 @@ public class FxAccountStatusFragment return; } - final ExtendedJSONObject cachedProfileJSON = fxAccount.getCachedProfileJSON(); - if (cachedProfileJSON != null) { - // Update profile information from the cached Json. - updateProfileInformation(cachedProfileJSON); + final ExtendedJSONObject profileJSON = fxAccount.getProfileJSON(); + if (profileJSON == null) { + // Update the profile title with email as the fallback. + // Profile icon by default use the default avatar as the fallback. + profilePreference.setTitle(fxAccount.getEmail()); return; } - // Update the profile title with email as the fallback. - // Profile icon by default use the default avatar as the fallback. - profilePreference.setTitle(fxAccount.getEmail()); - - // Register a local broadcast receiver to get profile cached notification. - final IntentFilter intentFilter = new IntentFilter(); - intentFilter.addAction(FxAccountConstants.ACCOUNT_PROFILE_AVATAR_UPDATED_ACTION); - accountProfileInformationReceiver = new FxAccountProfileInformationReceiver(); - LocalBroadcastManager.getInstance(getActivity()).registerReceiver(accountProfileInformationReceiver, intentFilter); - - // Fetch the profile from the server. - fxAccount.maybeUpdateProfileJSON(false); - - // Schedule an runnable to retry fetching profile. - profileFetchRunnable = new ProfileFetchUpdateRunnable(); - handler.postDelayed(profileFetchRunnable, PROFILE_FETCH_RETRY_INTERVAL_IN_MILLISECONDS); + updateProfileInformation(profileJSON); } /** * Update profile information from json on UI thread. * - * @param profileJson json fetched from server. + * @param profileJSON json fetched from server. */ - protected void updateProfileInformation(final ExtendedJSONObject profileJson) { - // Remove the scheduled runnable for fetching the profile information. - if (profileFetchRunnable != null) { - handler.removeCallbacks(profileFetchRunnable); + protected void updateProfileInformation(final ExtendedJSONObject profileJSON) { + // View changes must always be done on UI thread. + ThreadUtils.assertOnUiThread(); + + FxAccountUtils.pii(LOG_TAG, "Profile JSON is: " + profileJSON.toJSONString()); + + final String userName = profileJSON.getString(FxAccountConstants.KEY_PROFILE_JSON_USERNAME); + // Update the profile username and email if available. + if (!TextUtils.isEmpty(userName)) { + profilePreference.setTitle(userName); + profilePreference.setSummary(fxAccount.getEmail()); + } else { + profilePreference.setTitle(fxAccount.getEmail()); } - // Read the profile information from json and Update the UI elements. - ThreadUtils.postToUiThread(new Runnable() { - @Override - public void run() { - // Icon update from java is not supported prior to API 11, skip the avatar update for older device. - if (AppConstants.Versions.feature11Plus) { - profilePreference.setIcon(getResources().getDrawable(R.drawable.sync_avatar_default)); - } - profilePreference.setTitle(fxAccount.getAndroidAccount().name); - } - }); + // Icon update from java is not supported prior to API 11, skip the avatar image fetch and update for older device. + if (!AppConstants.Versions.feature11Plus) { + Logger.info(LOG_TAG, "Skipping profile image fetch for older pre-API 11 devices."); + return; + } + + // Avatar URI empty, skip profile image fetch. + final String avatarURI = profileJSON.getString(FxAccountConstants.KEY_PROFILE_JSON_AVATAR); + if (TextUtils.isEmpty(avatarURI)) { + Logger.info(LOG_TAG, "AvatarURI is empty, skipping profile image fetch."); + return; + } + + // Using noPlaceholder would avoid a pop of the default image, but it's not available in the version of Picasso + // we ship in the tree. + Picasso + .with(getActivity()) + .load(avatarURI) + .centerInside() + .resizeDimen(R.dimen.fxaccount_profile_image_width, R.dimen.fxaccount_profile_image_height) + .placeholder(R.drawable.sync_avatar_default) + .error(R.drawable.sync_avatar_default) + .into(profileAvatarTarget); } private void scheduleAndUpdateLastSyncedTime() { @@ -830,26 +850,24 @@ public class FxAccountStatusFragment } } - /** - * The Runnable that schedules a future to fetch profile information. - */ - protected class ProfileFetchUpdateRunnable implements Runnable { - @Override - public void run() { - updateProfileInformation(); - } - } - /** * Broadcast receiver to receive updates for the cached profile action. */ public class FxAccountProfileInformationReceiver extends BroadcastReceiver { @Override public void onReceive(Context context, Intent intent) { - if (intent.getAction().equals(FxAccountConstants.ACCOUNT_PROFILE_AVATAR_UPDATED_ACTION)) { - // We should have a cached profile json here. - updateProfileInformation(fxAccount.getCachedProfileJSON()); + if (!intent.getAction().equals(FxAccountConstants.ACCOUNT_PROFILE_JSON_UPDATED_ACTION)) { + return; } + + Logger.info(LOG_TAG, "Profile avatar cache update action broadcast received."); + // Update the UI from cached profile json on the main thread. + getActivity().runOnUiThread(new Runnable() { + @Override + public void run() { + updateProfileInformation(); + } + }); } } diff --git a/mobile/android/base/fxa/activities/PicassoPreferenceIconTarget.java b/mobile/android/base/fxa/activities/PicassoPreferenceIconTarget.java new file mode 100644 index 00000000000..bc15f085ae8 --- /dev/null +++ b/mobile/android/base/fxa/activities/PicassoPreferenceIconTarget.java @@ -0,0 +1,76 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +package org.mozilla.gecko.fxa.activities; + +import android.content.res.Resources; +import android.graphics.Bitmap; +import android.graphics.drawable.BitmapDrawable; +import android.graphics.drawable.Drawable; +import android.preference.Preference; +import android.support.v4.graphics.drawable.RoundedBitmapDrawable; +import android.support.v4.graphics.drawable.RoundedBitmapDrawableFactory; +import com.squareup.picasso.Picasso; +import com.squareup.picasso.Target; +import org.mozilla.gecko.AppConstants; + +/** + * A Picasso Target that updates a preference icon. + * + * Nota bene: Android grew support for updating preference icons programatically + * only in API 11. This class silently ignores requests before API 11. + */ +public class PicassoPreferenceIconTarget implements Target { + private final Preference preference; + private final Resources resources; + private final float cornerRadius; + + public PicassoPreferenceIconTarget(Resources resources, Preference preference) { + this(resources, preference, 0); + } + + public PicassoPreferenceIconTarget(Resources resources, Preference preference, float cornerRadius) { + this.resources = resources; + this.preference = preference; + this.cornerRadius = cornerRadius; + } + + @Override + public void onBitmapLoaded(Bitmap bitmap, Picasso.LoadedFrom from) { + // Updating icons from Java is not supported prior to API 11. + if (!AppConstants.Versions.feature11Plus) { + return; + } + + final Drawable drawable; + if (cornerRadius > 0) { + final RoundedBitmapDrawable roundedBitmapDrawable; + roundedBitmapDrawable = RoundedBitmapDrawableFactory.create(resources, bitmap); + roundedBitmapDrawable.setCornerRadius(cornerRadius); + roundedBitmapDrawable.setAntiAlias(true); + drawable = roundedBitmapDrawable; + } else { + drawable = new BitmapDrawable(resources, bitmap); + } + preference.setIcon(drawable); + } + + @Override + public void onBitmapFailed(Drawable errorDrawable) { + // Updating icons from Java is not supported prior to API 11. + if (!AppConstants.Versions.feature11Plus) { + return; + } + preference.setIcon(errorDrawable); + } + + @Override + public void onPrepareLoad(Drawable placeHolderDrawable) { + // Updating icons from Java is not supported prior to API 11. + if (!AppConstants.Versions.feature11Plus) { + return; + } + preference.setIcon(placeHolderDrawable); + } +} diff --git a/mobile/android/base/fxa/authenticator/AndroidFxAccount.java b/mobile/android/base/fxa/authenticator/AndroidFxAccount.java index c056f211ed5..5232b7676ec 100644 --- a/mobile/android/base/fxa/authenticator/AndroidFxAccount.java +++ b/mobile/android/base/fxa/authenticator/AndroidFxAccount.java @@ -69,12 +69,12 @@ public class AndroidFxAccount { public static final String ACCOUNT_KEY_TOKEN_SERVER = "tokenServerURI"; // Sync-specific. public static final String ACCOUNT_KEY_DESCRIPTOR = "descriptor"; - public static final String ACCOUNT_KEY_PROFILE_AVATAR = "avatar"; public static final int CURRENT_BUNDLE_VERSION = 2; public static final String BUNDLE_KEY_BUNDLE_VERSION = "version"; public static final String BUNDLE_KEY_STATE_LABEL = "stateLabel"; public static final String BUNDLE_KEY_STATE = "state"; + public static final String BUNDLE_KEY_PROFILE_JSON = "profile"; // Account authentication token type for fetching account profile. public static final String PROFILE_OAUTH_TOKEN_TYPE = "oauth::profile"; @@ -105,13 +105,6 @@ public class AndroidFxAccount { } private static final String PREF_KEY_LAST_SYNCED_TIMESTAMP = "lastSyncedTimestamp"; - public static final String PREF_KEY_LAST_PROFILE_FETCH_TIME = "lastProfilefetchTime"; - public static final String PREF_KEY_NUMBER_OF_PROFILE_FETCH = "numProfileFetch"; - - // Max wait time between successful profile avatar network fetch. - public static final long PROFILE_FETCH_RETRY_BACKOFF_DELTA_IN_MILLISECONDS = 24 * 60 * 60 * 1000; - // Max attempts allowed for retrying profile avatar network fetch. - public static final int MAX_PROFILE_FETCH_RETRIES = 5; protected final Context context; protected final AccountManager accountManager; @@ -127,7 +120,6 @@ public class AndroidFxAccount { */ protected static final ConcurrentHashMap perAccountBundleCache = new ConcurrentHashMap<>(); - private ExtendedJSONObject profileJson; public static void invalidateCaches() { perAccountBundleCache.clear(); @@ -667,39 +659,17 @@ public class AndroidFxAccount { return intent; } - private void setLastProfileFetchTimestampAndAttempts(long now, int attempts) { - try { - getSyncPrefs().edit().putLong(PREF_KEY_LAST_PROFILE_FETCH_TIME, now).commit(); - getSyncPrefs().edit().putInt(PREF_KEY_NUMBER_OF_PROFILE_FETCH, attempts); - } catch (Exception e) { - Logger.warn(LOG_TAG, "Got exception setting last profile fetch time & attempts; ignoring.", e); - } - } - - private long getLastProfileFetchTimestamp() { - final long neverFetched = -1L; - try { - return getSyncPrefs().getLong(PREF_KEY_LAST_PROFILE_FETCH_TIME, neverFetched); - } catch (Exception e) { - Logger.warn(LOG_TAG, "Got exception getting last profile fetch time; ignoring.", e); - return neverFetched; - } - } - - private int getNumberOfProfileFetch() { - final int neverFetched = 0; - try { - return getSyncPrefs().getInt(PREF_KEY_NUMBER_OF_PROFILE_FETCH, neverFetched); - } catch (Exception e) { - Logger.warn(LOG_TAG, "Got exception getting number of profile fetch; ignoring.", e); - return neverFetched; - } - } - - private boolean canScheduleProfileFetch() { - final int attempts = getNumberOfProfileFetch(); - final long delta = System.currentTimeMillis() - getLastProfileFetchTimestamp(); - return delta > PROFILE_FETCH_RETRY_BACKOFF_DELTA_IN_MILLISECONDS || attempts < MAX_PROFILE_FETCH_RETRIES; + /** + * Create an intent announcing that the profile JSON attached to this Firefox Account has been updated. + *

+ * It is not guaranteed that the profile JSON has changed. + * + * @return Intent to broadcast. + */ + private Intent makeProfileJSONUpdatedIntent() { + final Intent intent = new Intent(); + intent.setAction(FxAccountConstants.ACCOUNT_PROFILE_JSON_UPDATED_ACTION); + return intent; } public void setLastSyncedTimestamp(long now) { @@ -755,60 +725,31 @@ public class AndroidFxAccount { ContentResolver.setIsSyncable(account, BrowserContract.READING_LIST_AUTHORITY, 1); } - // Helper function to create intent for profile avatar updated event. - private Intent getProfileAvatarUpdatedIntent() { - final Intent profileCachedIntent = new Intent(); - profileCachedIntent.setAction(FxAccountConstants.ACCOUNT_PROFILE_AVATAR_UPDATED_ACTION); - return profileCachedIntent; - } - /** - * Returns the cached profile JSON object if available or null. + * Returns the current profile JSON if available, or null. * - * @return profile JSON Object. + * @return profile JSON object. */ - public ExtendedJSONObject getCachedProfileJSON() { - if (profileJson == null) { - // Try to retrieve and parse the json string from account manager. - final String profileJsonString = accountManager.getUserData(account, ACCOUNT_KEY_PROFILE_AVATAR); - if (profileJsonString != null) { - Logger.info(LOG_TAG, "Cached Profile information retrieved from AccountManager."); - try { - profileJson = ExtendedJSONObject.parseJSONObject(profileJsonString); - } catch (Exception e) { - Logger.error(LOG_TAG, "Failed to parse profile json; ignoring.", e); - } - } + public ExtendedJSONObject getProfileJSON() { + final String profileString = getBundleData(BUNDLE_KEY_PROFILE_JSON); + if (profileString == null) { + return null; } - return profileJson; + + try { + return new ExtendedJSONObject(profileString); + } catch (Exception e) { + Logger.error(LOG_TAG, "Failed to parse profile JSON; ignoring and returning null.", e); + } + return null; } /** - * Fetches the profile json from the server and updates the local cache. - * + * Fetch the profile JSON associated to the underlying Firefox Account from the server and update the local store. *

- * On successful fetch and cache, LocalBroadcastManager is used to notify the receivers asynchronously. - *

- * - * @param isForceFetch boolean to isForceFetch fetch from the server. + * The LocalBroadcastManager is used to notify the receivers asynchronously after a successful fetch. */ - public void maybeUpdateProfileJSON(final boolean isForceFetch) { - final ExtendedJSONObject profileJson = getCachedProfileJSON(); - final Intent profileAvatarUpdatedIntent = getProfileAvatarUpdatedIntent(); - - if (!isForceFetch && profileJson != null && !profileJson.keySet().isEmpty()) { - // Second line of defense, cache may have been updated in between. - Logger.info(LOG_TAG, "Profile already cached."); - LocalBroadcastManager.getInstance(context).sendBroadcast(profileAvatarUpdatedIntent); - return; - } - - if (!isForceFetch && !canScheduleProfileFetch()) { - // Rate limiting repeated attempts to fetch the profile information. - Logger.info(LOG_TAG, "Too many attempts to fetch the profile information."); - return; - } - + public void fetchProfileJSON() { ThreadUtils.postToBackgroundThread(new Runnable() { @Override public void run() { @@ -828,24 +769,15 @@ public class AndroidFxAccount { final Intent intent = new Intent(context, FxAccountProfileService.class); intent.putExtra(FxAccountProfileService.KEY_AUTH_TOKEN, authToken); intent.putExtra(FxAccountProfileService.KEY_PROFILE_SERVER_URI, getProfileServerURI()); - intent.putExtra(FxAccountProfileService.KEY_RESULT_RECEIVER, new ProfileResultReceiver(profileAvatarUpdatedIntent)); + intent.putExtra(FxAccountProfileService.KEY_RESULT_RECEIVER, new ProfileResultReceiver(new Handler())); context.startService(intent); - - // Update the profile fetch time and attempts, resetting the attempts if last fetch was over a day old. - final int attempts = getNumberOfProfileFetch(); - final long now = System.currentTimeMillis(); - final long delta = now - getLastProfileFetchTimestamp(); - setLastProfileFetchTimestampAndAttempts(now, delta < PROFILE_FETCH_RETRY_BACKOFF_DELTA_IN_MILLISECONDS ? attempts + 1 : 1); } }); } private class ProfileResultReceiver extends ResultReceiver { - private final Intent profileAvatarUpdatedIntent; - - public ProfileResultReceiver(Intent broadcastIntent) { - super(new Handler()); - this.profileAvatarUpdatedIntent = broadcastIntent; + public ProfileResultReceiver(Handler handler) { + super(handler); } @Override @@ -853,21 +785,17 @@ public class AndroidFxAccount { super.onReceiveResult(resultCode, bundle); switch (resultCode) { case Activity.RESULT_OK: - try { - final String resultData = bundle.getString(FxAccountProfileService.KEY_RESULT_STRING); - profileJson = ExtendedJSONObject.parseJSONObject(resultData); - accountManager.setUserData(account, ACCOUNT_KEY_PROFILE_AVATAR, resultData); - Logger.pii(LOG_TAG, "Profile fetch successful." + resultData); - LocalBroadcastManager.getInstance(context).sendBroadcast(profileAvatarUpdatedIntent); - } catch (Exception e) { - Logger.error(LOG_TAG, "Failed to parse profile json; ignoring.", e); - } + final String resultData = bundle.getString(FxAccountProfileService.KEY_RESULT_STRING); + updateBundleValues(BUNDLE_KEY_PROFILE_JSON, resultData); + Logger.info(LOG_TAG, "Profile JSON fetch succeeeded!"); + FxAccountUtils.pii(LOG_TAG, "Profile JSON fetch returned: " + resultData); + LocalBroadcastManager.getInstance(context).sendBroadcast(makeDeletedAccountIntent()); break; case Activity.RESULT_CANCELED: - Logger.warn(LOG_TAG, "Failed to fetch profile; ignoring."); + Logger.warn(LOG_TAG, "Failed to fetch profile JSON; ignoring."); break; default: - Logger.warn(LOG_TAG, "Invalid Result code received; ignoring."); + Logger.warn(LOG_TAG, "Invalid result code received; ignoring."); break; } } diff --git a/mobile/android/base/fxa/sync/FxAccountProfileService.java b/mobile/android/base/fxa/sync/FxAccountProfileService.java index 723b08219be..1548c3ff0c9 100644 --- a/mobile/android/base/fxa/sync/FxAccountProfileService.java +++ b/mobile/android/base/fxa/sync/FxAccountProfileService.java @@ -11,6 +11,7 @@ import android.os.Bundle; import android.os.ResultReceiver; import org.mozilla.gecko.background.common.log.Logger; +import org.mozilla.gecko.background.fxa.FxAccountUtils; import org.mozilla.gecko.background.fxa.oauth.FxAccountAbstractClient; import org.mozilla.gecko.background.fxa.oauth.FxAccountAbstractClientException; import org.mozilla.gecko.background.fxa.profile.FxAccountProfileClient10; @@ -37,6 +38,11 @@ public class FxAccountProfileService extends IntentService { final String profileServerURI = intent.getStringExtra(KEY_PROFILE_SERVER_URI); final ResultReceiver resultReceiver = intent.getParcelableExtra(KEY_RESULT_RECEIVER); + if (resultReceiver == null) { + Logger.warn(LOG_TAG, "Result receiver must not be null; ignoring intent."); + return; + } + if (authToken == null || authToken.length() == 0) { Logger.warn(LOG_TAG, "Invalid Auth Token"); sendResult("Invalid Auth Token", resultReceiver, Activity.RESULT_CANCELED); @@ -66,7 +72,7 @@ public class FxAccountProfileService extends IntentService { @Override public void handleSuccess(ExtendedJSONObject result) { if (result != null){ - Logger.pii(LOG_TAG, "Profile Server response : " + result.toJSONString()); + FxAccountUtils.pii(LOG_TAG, "Profile server return profile: " + result.toJSONString()); sendResult(result.toJSONString(), resultReceiver, Activity.RESULT_OK); } } diff --git a/mobile/android/base/fxa/sync/FxAccountSyncAdapter.java b/mobile/android/base/fxa/sync/FxAccountSyncAdapter.java index fef11a114ec..c6ac45fd892 100644 --- a/mobile/android/base/fxa/sync/FxAccountSyncAdapter.java +++ b/mobile/android/base/fxa/sync/FxAccountSyncAdapter.java @@ -14,6 +14,7 @@ import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.LinkedBlockingQueue; +import org.mozilla.gecko.AppConstants; import org.mozilla.gecko.background.common.log.Logger; import org.mozilla.gecko.background.fxa.FxAccountUtils; import org.mozilla.gecko.background.fxa.SkewHandler; @@ -532,6 +533,12 @@ public class FxAccountSyncAdapter extends AbstractThreadedSyncAdapter { final KeyBundle syncKeyBundle = married.getSyncKeyBundle(); final String clientState = married.getClientState(); syncWithAssertion(audience, assertion, tokenServerEndpointURI, tokenBackoffHandler, sharedPrefs, syncKeyBundle, clientState, sessionCallback, extras, fxAccount); + + if (AppConstants.MOZ_ANDROID_FIREFOX_ACCOUNT_PROFILES) { + // Force fetch the profile avatar information. + Logger.info(LOG_TAG, "Fetching profile avatar information."); + fxAccount.fetchProfileJSON(); + } } catch (Exception e) { syncDelegate.handleError(e); return; diff --git a/mobile/android/base/resources/values/fxaccount_dimens.xml b/mobile/android/base/resources/values/fxaccount_dimens.xml index 0298be25065..f355141b4d4 100644 --- a/mobile/android/base/resources/values/fxaccount_dimens.xml +++ b/mobile/android/base/resources/values/fxaccount_dimens.xml @@ -25,4 +25,9 @@ 16dp 0x02000000 + + + 48dp + + 48dp From 2684c9ea8e8b451a9e98f4eb8e974dec7d300949 Mon Sep 17 00:00:00 2001 From: "Carsten \"Tomcat\" Book" Date: Wed, 1 Jul 2015 08:18:58 +0200 Subject: [PATCH 60/61] Backed out changeset 981a1dbe042b (bug 1166910) --- dom/base/test/img_referrer_testserver.sjs | 218 ---------------------- dom/base/test/mochitest.ini | 2 - dom/base/test/test_img_referrer.html | 171 ----------------- 3 files changed, 391 deletions(-) delete mode 100644 dom/base/test/img_referrer_testserver.sjs delete mode 100644 dom/base/test/test_img_referrer.html diff --git a/dom/base/test/img_referrer_testserver.sjs b/dom/base/test/img_referrer_testserver.sjs deleted file mode 100644 index 173186e6584..00000000000 --- a/dom/base/test/img_referrer_testserver.sjs +++ /dev/null @@ -1,218 +0,0 @@ -var BASE_URL = 'example.com/tests/dom/base/test/img_referrer_testserver.sjs'; - -function createTestUrl(aPolicy, aAction, aName) { - return 'http://' + BASE_URL + '?' + - 'action=' + aAction + '&' + - 'policy=' + aPolicy + '&' + - 'name=' + aName; -} - -function createTestPage(aHead, aImgPolicy, aName) { - var _createTestUrl = createTestUrl.bind(null, aImgPolicy, 'test', aName); - - return '\n\ - '+ - aHead + - '\n\ - \n\ - \n\ - \n\ - '; -} - -// Creates the following test cases for the specified referrer -// policy combination: -// with referrer -function createTest(aPolicy, aImgPolicy, aName) { - var headString = ''; - if (aPolicy) { - headString += ''; - } - - headString += ''; - - return createTestPage(headString, aImgPolicy, aName); -} - -// testing regular load img with referrer policy -// speculative parser should not kick in here -function createTest2(aImgPolicy, name) { - return createTestPage('', aImgPolicy, name); -} - -function createTest3(aImgPolicy1, aImgPolicy2, aImgPolicy3, aName) { - return '\n\ - \n\ - \n\ - \n\ - \n\ - \n\ - \n\ - \n\ - '; -} - -function createTestPage2(aHead, aPolicy, aName) { - return '\n\ - '+ - aHead + - '\n\ - \n\ - \n\ - \n\ - '; -} - -function createTest4(aPolicy, aName) { - var headString = ''; - headString += ''; - headString += ''; - - return createTestPage2(headString, aPolicy, aName); -} - -function createTest5(aPolicy, aName) { - var headString = ''; - headString += ''; - - return createTestPage2(headString, aPolicy, aName); -} - -function handleRequest(request, response) { - var sharedKey = 'img_referrer_testserver.sjs'; - var params = request.queryString.split('&'); - var action = params[0].split('=')[1]; - - if (action === 'resetState') { - var state = getSharedState(sharedKey); - state = {}; - setSharedState(sharedKey, JSON.stringify(state)); - response.write(""); - return; - } else if (action === 'test') { - // ?action=test&policy=origin&name=name - var policy = params[1].split('=')[1]; - var name = params[2].split('=')[1]; - var result = getSharedState(sharedKey); - - if (result === '') { - result = {}; - } else { - result = JSON.parse(result); - } - - if (!result["tests"]) { - result["tests"] = {}; - } - - if (name === "setAttribute") { - result["tests"][name] = policy; - } else { - var referrerLevel = "none"; - var test = {} - if (request.hasHeader('Referer')) { - let referrer = request.getHeader('Referer'); - if (referrer.indexOf("img_referrer_testserver") > 0) { - referrerLevel = "full"; - } else if (referrer == "http://mochi.test:8888") { - referrerLevel = "origin"; - } - test.referrer = request.getHeader('Referer'); - } else { - test.referrer = ''; - } - test.policy = referrerLevel; - test.expected = policy; - - result["tests"][name] = test; - - setSharedState(sharedKey, JSON.stringify(result)); - } - return; - } else if (action === 'get-test-results') { - // ?action=get-result - response.setHeader('Cache-Control', 'no-cache', false); - response.setHeader('Content-Type', 'text/plain', false); - response.write(getSharedState(sharedKey)); - return; - } else if (action === 'generate-img-policy-test') { - // ?action=generate-img-policy-test&imgPolicy=b64-encoded-string&name=name&policy=b64-encoded-string - response.setHeader('Cache-Control', 'no-cache', false); - response.setHeader('Content-Type', 'text/html; charset=utf-8', false); - var imgPolicy = unescape(params[1].split('=')[1]); - var name = unescape(params[2].split('=')[1]); - var metaPolicy = ''; - if (params[3]) { - metaPolicy = params[3].split('=')[1]; - } - - response.write(createTest(metaPolicy, imgPolicy, name)); - return; - } else if (action === 'generate-img-policy-test2') { - // ?action=generate-img-policy-test2&imgPolicy=b64-encoded-string&name=name - response.setHeader('Cache-Control', 'no-cache', false); - response.setHeader('Content-Type', 'text/html; charset=utf-8', false); - var imgPolicy = unescape(params[1].split('=')[1]); - var name = unescape(params[2].split('=')[1]); - - response.write(createTest2(imgPolicy, name)); - return; - } else if (action === 'generate-img-policy-test3') { - // ?action=generate-img-policy-test3&imgPolicy1=b64-encoded-string&imgPolicy2=b64-encoded-string&imgPolicy3=b64-encoded-string&name=name - response.setHeader('Cache-Control', 'no-cache', false); - response.setHeader('Content-Type', 'text/html; charset=utf-8', false); - var imgPolicy1 = unescape(params[1].split('=')[1]); - var imgPolicy2 = unescape(params[2].split('=')[1]); - var imgPolicy3 = unescape(params[3].split('=')[1]); - var name = unescape(params[4].split('=')[1]); - - response.write(createTest3(imgPolicy1, imgPolicy2, imgPolicy3, name)); - return; - } else if (action === 'generate-img-policy-test4') { - // ?action=generate-img-policy-test4&imgPolicy=b64-encoded-string&name=name - response.setHeader('Cache-Control', 'no-cache', false); - response.setHeader('Content-Type', 'text/html; charset=utf-8', false); - var policy = unescape(params[1].split('=')[1]); - var name = unescape(params[2].split('=')[1]); - - response.write(createTest4(policy, name)); - return; - } else if (action === 'generate-img-policy-test5') { - // ?action=generate-img-policy-test5&policy=b64-encoded-string&name=name - response.setHeader('Cache-Control', 'no-cache', false); - response.setHeader('Content-Type', 'text/html; charset=utf-8', false); - var policy = unescape(params[1].split('=')[1]); - var name = unescape(params[2].split('=')[1]); - - response.write(createTest5(policy, name)); - return; - } else { - response.write("I don't know action "+action); - return; - } -} diff --git a/dom/base/test/mochitest.ini b/dom/base/test/mochitest.ini index dde17855ecd..d62b05c2a79 100644 --- a/dom/base/test/mochitest.ini +++ b/dom/base/test/mochitest.ini @@ -236,7 +236,6 @@ support-files = file_nonascii_blob_url.html referrerHelper.js test_performance_user_timing.js - img_referrer_testserver.sjs [test_anonymousContent_api.html] [test_anonymousContent_append_after_reflow.html] @@ -662,7 +661,6 @@ skip-if = buildapp == 'mulet' || buildapp == 'b2g' || toolkit == 'android' support-files = referrerHelper.js [test_bug1165501.html] support-files = referrerHelper.js -[test_img_referrer.html] [test_caretPositionFromPoint.html] [test_classList.html] # This test fails on the Mac for some reason diff --git a/dom/base/test/test_img_referrer.html b/dom/base/test/test_img_referrer.html deleted file mode 100644 index 16b88b07fdb..00000000000 --- a/dom/base/test/test_img_referrer.html +++ /dev/null @@ -1,171 +0,0 @@ - - - - - Test img policy attribute for Bug 1166910 - - - - - - - - - - - - - - From d4735903b0e851aff3ffef341449e0facb8a494d Mon Sep 17 00:00:00 2001 From: "Carsten \"Tomcat\" Book" Date: Wed, 1 Jul 2015 08:19:28 +0200 Subject: [PATCH 61/61] Backed out changeset f5f3827ffcf1 (bug 1166910) for bustage --- dom/base/nsImageLoadingContent.cpp | 25 ++----------------- dom/base/nsImageLoadingContent.h | 3 --- dom/html/HTMLImageElement.cpp | 1 - dom/html/HTMLImageElement.h | 10 -------- dom/html/nsGenericHTMLElement.cpp | 19 -------------- dom/html/nsGenericHTMLElement.h | 15 ----------- .../html/nsIDOMHTMLImageElement.idl | 3 +-- dom/webidl/HTMLImageElement.webidl | 2 -- image/imgLoader.cpp | 3 --- modules/libpref/init/all.js | 3 --- netwerk/base/ReferrerPolicy.h | 5 +--- parser/html/nsHtml5SpeculativeLoad.cpp | 4 +-- parser/html/nsHtml5SpeculativeLoad.h | 7 ++---- parser/html/nsHtml5TreeBuilderCppSupplement.h | 5 ---- parser/html/nsHtml5TreeOpExecutor.cpp | 16 ++---------- parser/html/nsHtml5TreeOpExecutor.h | 3 +-- 16 files changed, 11 insertions(+), 113 deletions(-) diff --git a/dom/base/nsImageLoadingContent.cpp b/dom/base/nsImageLoadingContent.cpp index bef2ed691be..18205ab5d31 100644 --- a/dom/base/nsImageLoadingContent.cpp +++ b/dom/base/nsImageLoadingContent.cpp @@ -46,7 +46,6 @@ #include "mozilla/EventStates.h" #include "mozilla/dom/Element.h" #include "mozilla/dom/ScriptSettings.h" -#include "mozilla/Preferences.h" #ifdef LoadImage // Undefine LoadImage to prevent naming conflict with Windows. @@ -931,27 +930,15 @@ nsImageLoadingContent::LoadImage(nsIURI* aNewURI, loadFlags |= imgILoader::LOAD_CORS_USE_CREDENTIALS; } - // get document wide referrer policy - mozilla::net::ReferrerPolicy referrerPolicy = aDocument->GetReferrerPolicy(); - bool referrerAttributeEnabled = Preferences::GetBool("network.http.enablePerElementReferrer", false); - // if referrer attributes are enabled in preferences, load img referrer attribute - nsresult rv; - if (referrerAttributeEnabled) { - mozilla::net::ReferrerPolicy imgReferrerPolicy = GetImageReferrerPolicy(); - // if the image does not provide a referrer attribute, ignore this - if (imgReferrerPolicy != mozilla::net::RP_Unset) { - referrerPolicy = imgReferrerPolicy; - } - } - // Not blocked. Do the load. nsRefPtr& req = PrepareNextRequest(aImageLoadType); nsCOMPtr content = do_QueryInterface(static_cast(this)); + nsresult rv; rv = nsContentUtils::LoadImage(aNewURI, aDocument, aDocument->NodePrincipal(), aDocument->GetDocumentURI(), - referrerPolicy, + aDocument->GetReferrerPolicy(), this, loadFlags, content->LocalName(), getter_AddRefs(req), @@ -1579,11 +1566,3 @@ nsImageLoadingContent::ImageObserver::~ImageObserver() MOZ_COUNT_DTOR(ImageObserver); NS_CONTENT_DELETE_LIST_MEMBER(ImageObserver, this, mNext); } - -// Only HTMLInputElement.h overrides this for tags -// all other subclasses use this one, i.e. ignore referrer attributes -mozilla::net::ReferrerPolicy -nsImageLoadingContent::GetImageReferrerPolicy() -{ - return mozilla::net::RP_Unset; -}; diff --git a/dom/base/nsImageLoadingContent.h b/dom/base/nsImageLoadingContent.h index 78f48404ef8..8d23d379557 100644 --- a/dom/base/nsImageLoadingContent.h +++ b/dom/base/nsImageLoadingContent.h @@ -24,7 +24,6 @@ #include "nsAutoPtr.h" #include "nsIContentPolicy.h" #include "mozilla/dom/BindingDeclarations.h" -#include "mozilla/net/ReferrerPolicy.h" class nsIURI; class nsIDocument; @@ -199,8 +198,6 @@ protected: */ virtual mozilla::CORSMode GetCORSMode(); - virtual mozilla::net::ReferrerPolicy GetImageReferrerPolicy(); - // Subclasses are *required* to call BindToTree/UnbindFromTree. void BindToTree(nsIDocument* aDocument, nsIContent* aParent, nsIContent* aBindingParent, bool aCompileEventHandlers); diff --git a/dom/html/HTMLImageElement.cpp b/dom/html/HTMLImageElement.cpp index a345817f09a..aac2e44afbc 100644 --- a/dom/html/HTMLImageElement.cpp +++ b/dom/html/HTMLImageElement.cpp @@ -145,7 +145,6 @@ NS_IMPL_BOOL_ATTR(HTMLImageElement, IsMap, ismap) NS_IMPL_URI_ATTR(HTMLImageElement, LongDesc, longdesc) NS_IMPL_STRING_ATTR(HTMLImageElement, Sizes, sizes) NS_IMPL_STRING_ATTR(HTMLImageElement, Lowsrc, lowsrc) -NS_IMPL_STRING_ATTR(HTMLImageElement, Referrer, referrer) NS_IMPL_URI_ATTR(HTMLImageElement, Src, src) NS_IMPL_STRING_ATTR(HTMLImageElement, Srcset, srcset) NS_IMPL_STRING_ATTR(HTMLImageElement, UseMap, usemap) diff --git a/dom/html/HTMLImageElement.h b/dom/html/HTMLImageElement.h index 3036835e3ec..1841ec8f16a 100644 --- a/dom/html/HTMLImageElement.h +++ b/dom/html/HTMLImageElement.h @@ -189,16 +189,6 @@ public: { SetHTMLAttr(nsGkAtoms::border, aBorder, aError); } - void SetReferrer(const nsAString& aReferrer, ErrorResult& aError) - { - SetHTMLAttr(nsGkAtoms::referrer, aReferrer, aError); - } - - mozilla::net::ReferrerPolicy - GetImageReferrerPolicy() - { - return GetReferrerPolicy(); - } int32_t X(); int32_t Y(); diff --git a/dom/html/nsGenericHTMLElement.cpp b/dom/html/nsGenericHTMLElement.cpp index a4d961f7524..3a60eb3d42a 100644 --- a/dom/html/nsGenericHTMLElement.cpp +++ b/dom/html/nsGenericHTMLElement.cpp @@ -106,8 +106,6 @@ #include "mozilla/dom/HTMLBodyElement.h" #include "imgIContainer.h" -#include "mozilla/net/ReferrerPolicy.h" - using namespace mozilla; using namespace mozilla::dom; @@ -997,10 +995,6 @@ nsGenericHTMLElement::ParseAttribute(int32_t aNamespaceID, return aResult.ParseIntValue(aValue); } - if (aAttribute == nsGkAtoms::referrer) { - return ParseReferrerAttribute(aValue, aResult); - } - if (aAttribute == nsGkAtoms::name) { // Store name as an atom. name="" means that the element has no name, // not that it has an emptystring as the name. @@ -1268,19 +1262,6 @@ nsGenericHTMLElement::ParseImageAttribute(nsIAtom* aAttribute, return false; } -bool -nsGenericHTMLElement::ParseReferrerAttribute(const nsAString& aString, - nsAttrValue& aResult) -{ - static const nsAttrValue::EnumTable kReferrerTable[] = { - { "no-referrer", net::RP_No_Referrer }, - { "origin", net::RP_Origin }, - { "unsafe-url", net::RP_Unsafe_URL }, - { 0 } - }; - return aResult.ParseEnumValue(aString, kReferrerTable, false); -} - bool nsGenericHTMLElement::ParseFrameborderValue(const nsAString& aString, nsAttrValue& aResult) diff --git a/dom/html/nsGenericHTMLElement.h b/dom/html/nsGenericHTMLElement.h index 30bd71bdd7d..82cc1cf25bb 100644 --- a/dom/html/nsGenericHTMLElement.h +++ b/dom/html/nsGenericHTMLElement.h @@ -233,17 +233,6 @@ public: mScrollgrab = aValue; } - mozilla::net::ReferrerPolicy - GetReferrerPolicy() - { - nsAutoString aPolicyString; - GetEnumAttr(nsGkAtoms::referrer, nullptr, aPolicyString); - if (aPolicyString.IsEmpty()) { - return mozilla::net::RP_Unset; - } - return mozilla::net::ReferrerPolicyFromString(aPolicyString); - } - /** * Determine whether an attribute is an event (onclick, etc.) * @param aName the attribute @@ -722,10 +711,6 @@ public: static bool ParseImageAttribute(nsIAtom* aAttribute, const nsAString& aString, nsAttrValue& aResult); - - static bool ParseReferrerAttribute(const nsAString& aString, - nsAttrValue& aResult); - /** * Convert a frameborder string to value (yes/no/1/0) * diff --git a/dom/interfaces/html/nsIDOMHTMLImageElement.idl b/dom/interfaces/html/nsIDOMHTMLImageElement.idl index 6c5c915ffaa..64574ca794c 100644 --- a/dom/interfaces/html/nsIDOMHTMLImageElement.idl +++ b/dom/interfaces/html/nsIDOMHTMLImageElement.idl @@ -16,7 +16,7 @@ * http://www.whatwg.org/specs/web-apps/current-work/ */ -[uuid(a640d8af-3c0e-4926-8102-5ab52053c280)] +[uuid(ec18e71c-4f5c-4cc3-aa36-5273168644dc)] interface nsIDOMHTMLImageElement : nsISupports { attribute DOMString alt; @@ -24,7 +24,6 @@ interface nsIDOMHTMLImageElement : nsISupports attribute DOMString srcset; attribute DOMString sizes; attribute DOMString useMap; - attribute DOMString referrer; attribute boolean isMap; attribute unsigned long width; attribute unsigned long height; diff --git a/dom/webidl/HTMLImageElement.webidl b/dom/webidl/HTMLImageElement.webidl index 55f5a851b37..fce3c7cdf48 100644 --- a/dom/webidl/HTMLImageElement.webidl +++ b/dom/webidl/HTMLImageElement.webidl @@ -30,8 +30,6 @@ interface HTMLImageElement : HTMLElement { [SetterThrows] attribute DOMString useMap; [SetterThrows] - attribute DOMString referrer; - [SetterThrows] attribute boolean isMap; [SetterThrows] attribute unsigned long width; diff --git a/image/imgLoader.cpp b/image/imgLoader.cpp index 4398c010991..b4b444dc82e 100644 --- a/image/imgLoader.cpp +++ b/image/imgLoader.cpp @@ -670,9 +670,6 @@ ValidateSecurityInfo(imgRequest* request, bool forcePrincipalCheck, nsISupports* aCX, ReferrerPolicy referrerPolicy) { // If the entry's Referrer Policy doesn't match, we can't use this request. - // XXX: this will return false if an image has different referrer attributes, - // i.e. we currently don't use the cached image but reload the image with - // the new referrer policy if (referrerPolicy != request->GetReferrerPolicy()) { return false; } diff --git a/modules/libpref/init/all.js b/modules/libpref/init/all.js index a83ab8f4905..4e18b6ba615 100644 --- a/modules/libpref/init/all.js +++ b/modules/libpref/init/all.js @@ -1273,9 +1273,6 @@ pref("network.http.referer.XOriginPolicy", 0); // By default this is enabled for compatibility (see bug 141641) pref("network.http.sendSecureXSiteReferrer", true); -// Controls whether referrer attributes in
, , , and