From 51418f0b1899c331dc4c02f706e225eb55cd5e64 Mon Sep 17 00:00:00 2001 From: Daniel Holbert Date: Thu, 2 Oct 2014 07:48:59 -0700 Subject: [PATCH 001/146] Bug 1076443: Remove no-longer-used variable 'gBrowserTabsRemoteInitialized' from nsAppRunner.cpp. r=jimm --- toolkit/xre/nsAppRunner.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/toolkit/xre/nsAppRunner.cpp b/toolkit/xre/nsAppRunner.cpp index fcbf1ba1943..799364a983b 100644 --- a/toolkit/xre/nsAppRunner.cpp +++ b/toolkit/xre/nsAppRunner.cpp @@ -841,7 +841,6 @@ nsXULAppInfo::GetProcessID(uint32_t* aResult) return NS_OK; } -static bool gBrowserTabsRemoteInitialized = false; static bool gBrowserTabsRemoteAutostart = false; static bool gBrowserTabsRemoteAutostartInitialized = false; From d2772198b42799d2b4261460ebde75e427d3f5f4 Mon Sep 17 00:00:00 2001 From: Patrick McManus Date: Sat, 27 Sep 2014 14:51:19 -0400 Subject: [PATCH 002/146] bug 1073747 - nsurlparser xpcom methods don't validate input r=sworkman --- netwerk/base/src/nsURLParsers.cpp | 36 +++++++++++++++++++++++-------- netwerk/test/unit/test_1073747.js | 30 ++++++++++++++++++++++++++ netwerk/test/unit/xpcshell.ini | 1 + 3 files changed, 58 insertions(+), 9 deletions(-) create mode 100644 netwerk/test/unit/test_1073747.js diff --git a/netwerk/base/src/nsURLParsers.cpp b/netwerk/base/src/nsURLParsers.cpp index 81aadd345f2..37f3e722c72 100644 --- a/netwerk/base/src/nsURLParsers.cpp +++ b/netwerk/base/src/nsURLParsers.cpp @@ -52,7 +52,9 @@ nsBaseURLParser::ParseURL(const char *spec, int32_t specLen, uint32_t *authorityPos, int32_t *authorityLen, uint32_t *pathPos, int32_t *pathLen) { - NS_PRECONDITION(spec, "null pointer"); + if (NS_WARN_IF(!spec)) { + return NS_ERROR_INVALID_POINTER; + } if (specLen < 0) specLen = strlen(spec); @@ -165,7 +167,9 @@ nsBaseURLParser::ParseAuthority(const char *auth, int32_t authLen, uint32_t *hostnamePos, int32_t *hostnameLen, int32_t *port) { - NS_PRECONDITION(auth, "null pointer"); + if (NS_WARN_IF(!auth)) { + return NS_ERROR_INVALID_POINTER; + } if (authLen < 0) authLen = strlen(auth); @@ -205,7 +209,9 @@ nsBaseURLParser::ParsePath(const char *path, int32_t pathLen, uint32_t *queryPos, int32_t *queryLen, uint32_t *refPos, int32_t *refLen) { - NS_PRECONDITION(path, "null pointer"); + if (NS_WARN_IF(!path)) { + return NS_ERROR_INVALID_POINTER; + } if (pathLen < 0) pathLen = strlen(path); @@ -266,7 +272,9 @@ nsBaseURLParser::ParseFilePath(const char *filepath, int32_t filepathLen, uint32_t *basenamePos, int32_t *basenameLen, uint32_t *extensionPos, int32_t *extensionLen) { - NS_PRECONDITION(filepath, "null pointer"); + if (NS_WARN_IF(!filepath)) { + return NS_ERROR_INVALID_POINTER; + } if (filepathLen < 0) filepathLen = strlen(filepath); @@ -312,7 +320,9 @@ nsBaseURLParser::ParseFileName(const char *filename, int32_t filenameLen, uint32_t *basenamePos, int32_t *basenameLen, uint32_t *extensionPos, int32_t *extensionLen) { - NS_PRECONDITION(filename, "null pointer"); + if (NS_WARN_IF(!filename)) { + return NS_ERROR_INVALID_POINTER; + } if (filenameLen < 0) filenameLen = strlen(filename); @@ -408,7 +418,9 @@ nsNoAuthURLParser::ParseFilePath(const char *filepath, int32_t filepathLen, uint32_t *basenamePos, int32_t *basenameLen, uint32_t *extensionPos, int32_t *extensionLen) { - NS_PRECONDITION(filepath, "null pointer"); + if (NS_WARN_IF(!filepath)) { + return NS_ERROR_INVALID_POINTER; + } if (filepathLen < 0) filepathLen = strlen(filepath); @@ -450,7 +462,9 @@ nsAuthURLParser::ParseAuthority(const char *auth, int32_t authLen, { nsresult rv; - NS_PRECONDITION(auth, "null pointer"); + if (NS_WARN_IF(!auth)) { + return NS_ERROR_INVALID_POINTER; + } if (authLen < 0) authLen = strlen(auth); @@ -498,7 +512,9 @@ nsAuthURLParser::ParseUserInfo(const char *userinfo, int32_t userinfoLen, uint32_t *usernamePos, int32_t *usernameLen, uint32_t *passwordPos, int32_t *passwordLen) { - NS_PRECONDITION(userinfo, "null pointer"); + if (NS_WARN_IF(!userinfo)) { + return NS_ERROR_INVALID_POINTER; + } if (userinfoLen < 0) userinfoLen = strlen(userinfo); @@ -532,7 +548,9 @@ nsAuthURLParser::ParseServerInfo(const char *serverinfo, int32_t serverinfoLen, uint32_t *hostnamePos, int32_t *hostnameLen, int32_t *port) { - NS_PRECONDITION(serverinfo, "null pointer"); + if (NS_WARN_IF(!serverinfo)) { + return NS_ERROR_INVALID_POINTER; + } if (serverinfoLen < 0) serverinfoLen = strlen(serverinfo); diff --git a/netwerk/test/unit/test_1073747.js b/netwerk/test/unit/test_1073747.js new file mode 100644 index 00000000000..c930514e7e4 --- /dev/null +++ b/netwerk/test/unit/test_1073747.js @@ -0,0 +1,30 @@ +// Test based on submitted one from Peter B Shalimoff + +var test = function(s, funcName){ + function Arg(){}; + Arg.prototype.toString = function(){ + do_print("Testing " + funcName + " with null args"); + return this.value; + }; + // create a generic arg lits of null, -1, and 10 nulls + var args = [s, -1]; + for (var i = 0; i < 10; ++i) { + args.push(new Arg()); + } + var up = Components.classes["@mozilla.org/network/url-parser;1?auth=maybe"].getService(Components.interfaces.nsIURLParser); + try { + up[funcName].apply(up, args); + return args; + } catch (x) { + do_check_true(true); // make sure it throws an exception instead of crashing + return x; + } + // should always have an exception to catch + do_check_true(false); +}; +var s = null; +var funcs = ["parseAuthority", "parseFileName", "parseFilePath", "parsePath", "parseServerInfo", "parseURL", "parseUserInfo"]; + +function run_test() { + funcs.forEach(function(f){test(s, f);}); +} diff --git a/netwerk/test/unit/xpcshell.ini b/netwerk/test/unit/xpcshell.ini index 0f4c5903afa..07f7ba67399 100644 --- a/netwerk/test/unit/xpcshell.ini +++ b/netwerk/test/unit/xpcshell.ini @@ -303,3 +303,4 @@ run-if = os == "win" [test_tls_server.js] # The local cert service used by this test is not currently shipped on Android skip-if = os == "android" +[test_1073747.js] From 1a57f38c68ed4fc6de206d556baf61456ddf0e11 Mon Sep 17 00:00:00 2001 From: Hannes Verschore Date: Thu, 2 Oct 2014 17:11:28 +0200 Subject: [PATCH 003/146] Bug 1073861 - IonMonkey: Don't update types during type policy, r=jandem --- js/src/jit-test/tests/ion/bug1073861.js | 69 +++++++++++++++++++ js/src/jit/IonBuilder.cpp | 36 ++++++---- js/src/jit/TypePolicy.cpp | 90 +++++++++++++++---------- 3 files changed, 146 insertions(+), 49 deletions(-) create mode 100644 js/src/jit-test/tests/ion/bug1073861.js diff --git a/js/src/jit-test/tests/ion/bug1073861.js b/js/src/jit-test/tests/ion/bug1073861.js new file mode 100644 index 00000000000..fe837c45c72 --- /dev/null +++ b/js/src/jit-test/tests/ion/bug1073861.js @@ -0,0 +1,69 @@ +function a(a, b, c, g) { + for (;;) { + if (0 > c) return a; + a: { + for (;;) { + var k = a.forward[c]; + if (t(k)) + if (k.key < b) a = k; + else break a; + else break a + } + a = void 0 + } + null != + g && (g[c] = a); + c -= 1 + } +} + +function t(a) { + return null != a && !1 !== a +} + + +var d = {forward: [{},null,{}]} +for (var i=0; i < 1000; i++) { + a(d, 0, 1, null); + a(d, 0, 0, null); +} + + + + +function test(a) { + var t = a[0] + if (t) { + return t.test; + } +} + +function test2(a) { + var t = a[0] + if (t) { + if (t) { + return t.test; + } + } +} + +function test3(a) { + var t = a[0] + if (t !== null) { + if (t !== undefined) { + return t.test; + } + } +} + +var a = [{test:1}] +var b = [undefined] +assertEq(test(b), undefined) +assertEq(test(a), 1) +assertEq(test(a), 1) +assertEq(test2(b), undefined) +assertEq(test2(a), 1) +assertEq(test2(a), 1) +assertEq(test3(b), undefined) +assertEq(test3(a), 1) +assertEq(test3(a), 1) diff --git a/js/src/jit/IonBuilder.cpp b/js/src/jit/IonBuilder.cpp index 1e51dab31b9..25bce093457 100644 --- a/js/src/jit/IonBuilder.cpp +++ b/js/src/jit/IonBuilder.cpp @@ -3144,7 +3144,7 @@ IonBuilder::replaceTypeSet(MDefinition *subject, types::TemporaryTypeSet *type, if (type->unknown()) return true; - MFilterTypeSet *replace = nullptr; + MInstruction *replace = nullptr; MDefinition *ins; for (uint32_t i = 0; i < current->stackDepth(); i++) { @@ -3161,27 +3161,32 @@ IonBuilder::replaceTypeSet(MDefinition *subject, types::TemporaryTypeSet *type, ins->toFilterTypeSet()->setResultType(intersect->getKnownMIRType()); ins->toFilterTypeSet()->setResultTypeSet(intersect); + + if (ins->type() == MIRType_Undefined) + current->setSlot(i, constant(UndefinedValue())); + if (ins->type() == MIRType_Null) + current->setSlot(i, constant(NullValue())); continue; } if (ins == subject) { if (!replace) { replace = MFilterTypeSet::New(alloc(), subject, type); - if (!replace) return false; - if (replace == subject) - break; current->add(replace); - if (replace != subject) { - // Make sure we don't hoist it above the MTest, we can use the - // 'dependency' of an MInstruction. This is normally used by - // Alias Analysis, but won't get overwritten, since this - // instruction doesn't have an AliasSet. - replace->setDependency(test); - } + // Make sure we don't hoist it above the MTest, we can use the + // 'dependency' of an MInstruction. This is normally used by + // Alias Analysis, but won't get overwritten, since this + // instruction doesn't have an AliasSet. + replace->setDependency(test); + + if (replace->type() == MIRType_Undefined) + replace = constant(UndefinedValue()); + if (replace->type() == MIRType_Null) + replace = constant(NullValue()); } current->setSlot(i, replace); } @@ -3312,7 +3317,7 @@ IonBuilder::improveTypesAtTest(MDefinition *ins, bool trueBranch, MTest *test) if (!ins) return true; - switch(ins->op()) { + switch (ins->op()) { case MDefinition::Op_Not: return improveTypesAtTest(ins->toNot()->getOperand(0), !trueBranch, test); case MDefinition::Op_IsObject: { @@ -4245,7 +4250,12 @@ IonBuilder::inlineScriptedCall(CallInfo &callInfo, JSFunction *target) return oom(); MTypeBarrier *barrier = MTypeBarrier::New(alloc(), callInfo.thisArg(), clonedTypes); current->add(barrier); - callInfo.setThis(barrier); + if (barrier->type() == MIRType_Undefined) + callInfo.setThis(constant(UndefinedValue())); + else if (barrier->type() == MIRType_Null) + callInfo.setThis(constant(NullValue())); + else + callInfo.setThis(barrier); } } diff --git a/js/src/jit/TypePolicy.cpp b/js/src/jit/TypePolicy.cpp index 89818c47b66..8757f68e781 100644 --- a/js/src/jit/TypePolicy.cpp +++ b/js/src/jit/TypePolicy.cpp @@ -259,34 +259,35 @@ TypeBarrierPolicy::adjustInputs(TempAllocator &alloc, MInstruction *def) return true; } - // Input is a value. Unbox the input to the requested type. - if (inputType == MIRType_Value) { - MOZ_ASSERT(outputType != MIRType_Value); + // Box input if needed. + if (inputType != MIRType_Value) { + MOZ_ASSERT(ins->alwaysBails()); + ins->replaceOperand(0, boxAt(alloc, ins, ins->getOperand(0))); + } - // We can't unbox a value to null/undefined/lazyargs. So keep output - // also a value. - if (IsNullOrUndefined(outputType) || outputType == MIRType_MagicOptimizedArguments) { - MOZ_ASSERT(!ins->hasDefUses()); - ins->setResultType(MIRType_Value); - return true; - } - - MUnbox *unbox = MUnbox::New(alloc, ins->getOperand(0), outputType, MUnbox::TypeBarrier); - ins->block()->insertBefore(ins, unbox); - - // The TypeBarrier is equivalent to removing branches with unexpected - // types. The unexpected types would have changed Range Analysis - // predictions. As such, we need to prevent destructive optimizations. - ins->block()->flagOperandsOfPrunedBranches(unbox); - - ins->replaceOperand(0, unbox); + // We can't unbox a value to null/undefined/lazyargs. So keep output + // also a value. + // Note: Using setResultType shouldn't be done in TypePolicies, + // Here it is fine, since the type barrier has no uses. + if (IsNullOrUndefined(outputType) || outputType == MIRType_MagicOptimizedArguments) { + MOZ_ASSERT(!ins->hasDefUses()); + ins->setResultType(MIRType_Value); return true; } - // In the remaining cases we will alway bail. OutputType doesn't matter. - // Take inputType so we can use redefine during lowering. - MOZ_ASSERT(ins->alwaysBails()); - ins->setResultType(inputType); + // Unbox / propagate the right type. + MUnbox::Mode mode = MUnbox::TypeBarrier; + MInstruction *replace = MUnbox::New(alloc, ins->getOperand(0), ins->type(), mode); + + ins->block()->insertBefore(ins, replace); + ins->replaceOperand(0, replace); + if (!replace->typePolicy()->adjustInputs(alloc, replace)) + return false; + + // The TypeBarrier is equivalent to removing branches with unexpected + // types. The unexpected types would have changed Range Analysis + // predictions. As such, we need to prevent destructive optimizations. + ins->block()->flagOperandsOfPrunedBranches(replace); return true; } @@ -851,29 +852,46 @@ bool FilterTypeSetPolicy::adjustInputs(TempAllocator &alloc, MInstruction *ins) { MOZ_ASSERT(ins->numOperands() == 1); + MIRType inputType = ins->getOperand(0)->type(); + MIRType outputType = ins->type(); - // Do nothing if already same type. - if (ins->type() == ins->getOperand(0)->type()) + // Input and output type are already in accordance. + if (inputType == outputType) return true; - // Box input if ouput type is MIRType_Value - if (ins->type() == MIRType_Value) { + // Output is a value, box the input. + if (outputType == MIRType_Value) { + MOZ_ASSERT(inputType != MIRType_Value); ins->replaceOperand(0, boxAt(alloc, ins, ins->getOperand(0))); return true; } - // For simplicity just mark output type as MIRType_Value if input type - // is MIRType_Value. It should be possible to unbox, but we need to - // add extra code for Undefined/Null. - if (ins->getOperand(0)->type() == MIRType_Value) { + // The outputType should always be a subset of the inputType. + // So if types don't equal, the input type is definitely a MIRType_Value. + if (inputType != MIRType_Value) + MOZ_CRASH("Types should be in accordance."); + + // We can't unbox a value to null/undefined/lazyargs. So keep output + // also a value. + // Note: Using setResultType shouldn't be done in TypePolicies, + // Here it is fine, since the type barrier has no uses. + if (IsNullOrUndefined(outputType) || outputType == MIRType_MagicOptimizedArguments) { + MOZ_ASSERT(!ins->hasDefUses()); ins->setResultType(MIRType_Value); return true; } - // In all other cases we will definitely bail, since types don't - // correspond. Just box and mark output as MIRType_Value. - ins->replaceOperand(0, boxAt(alloc, ins, ins->getOperand(0))); - ins->setResultType(MIRType_Value); + // Unbox / propagate the right type. + MUnbox::Mode mode = MUnbox::Infallible; + MInstruction *replace = MUnbox::New(alloc, ins->getOperand(0), ins->type(), mode); + + ins->block()->insertBefore(ins, replace); + ins->replaceOperand(0, replace); + if (!replace->typePolicy()->adjustInputs(alloc, replace)) + return false; + + // Carry over the dependency the MFilterTypeSet had. + replace->setDependency(ins->dependency()); return true; } From 02871bd690c1e8eceb1e021a8ebe71ecc0e765bf Mon Sep 17 00:00:00 2001 From: Mukilan Thiyagarajan Date: Thu, 2 Oct 2014 17:21:39 +0200 Subject: [PATCH 004/146] Bug 1073016 - Optimize LRound and LRoundF on x86/x64. r=nbp --- .../jit/shared/CodeGenerator-x86-shared.cpp | 56 ++++++++++++------- .../jit/shared/MacroAssembler-x86-shared.cpp | 15 +++-- js/src/jit/shared/MacroAssembler-x86-shared.h | 2 +- 3 files changed, 47 insertions(+), 26 deletions(-) diff --git a/js/src/jit/shared/CodeGenerator-x86-shared.cpp b/js/src/jit/shared/CodeGenerator-x86-shared.cpp index 6e2406b687c..2daa5d21e7a 100644 --- a/js/src/jit/shared/CodeGenerator-x86-shared.cpp +++ b/js/src/jit/shared/CodeGenerator-x86-shared.cpp @@ -1903,18 +1903,13 @@ CodeGeneratorX86Shared::visitRound(LRound *lir) FloatRegister scratch = ScratchDoubleReg; Register output = ToRegister(lir->output()); - Label negative, end, bailout; + Label negativeOrZero, negative, end, bailout; - // Branch to a slow path for negative inputs. Doesn't catch NaN or -0. + // Branch to a slow path for non-positive inputs. Doesn't catch NaN. masm.xorpd(scratch, scratch); - masm.branchDouble(Assembler::DoubleLessThan, input, scratch, &negative); + masm.branchDouble(Assembler::DoubleLessThanOrEqual, input, scratch, &negativeOrZero); - // Bail on negative-zero. - masm.branchNegativeZero(input, output, &bailout); - if (!bailoutFrom(&bailout, lir->snapshot())) - return false; - - // Input is non-negative. Add the biggest double less than 0.5 and + // Input is positive. Add the biggest double less than 0.5 and // truncate, rounding down (because if the input is the biggest double less // than 0.5, adding 0.5 would undesirably round up to 1). Note that we have // to add the input to the temp register because we're not allowed to @@ -1926,7 +1921,21 @@ CodeGeneratorX86Shared::visitRound(LRound *lir) masm.jump(&end); - // Input is negative, but isn't -0. + // Input is negative, +0 or -0. + masm.bind(&negativeOrZero); + // Branch on negative input. + masm.j(Assembler::NotEqual, &negative); + + // Bail on negative-zero. + masm.branchNegativeZero(input, output, &bailout, /* maybeNonZero = */ false); + if (!bailoutFrom(&bailout, lir->snapshot())) + return false; + + // Input is +0 + masm.xor32(output, output); + masm.jump(&end); + + // Input is negative. masm.bind(&negative); masm.loadConstantDouble(0.5, temp); @@ -1984,16 +1993,11 @@ CodeGeneratorX86Shared::visitRoundF(LRoundF *lir) FloatRegister scratch = ScratchFloat32Reg; Register output = ToRegister(lir->output()); - Label negative, end, bailout; + Label negativeOrZero, negative, end, bailout; - // Branch to a slow path for negative inputs. Doesn't catch NaN or -0. + // Branch to a slow path for non-positive inputs. Doesn't catch NaN. masm.xorps(scratch, scratch); - masm.branchFloat(Assembler::DoubleLessThan, input, scratch, &negative); - - // Bail on negative-zero. - masm.branchNegativeZeroFloat32(input, output, &bailout); - if (!bailoutFrom(&bailout, lir->snapshot())) - return false; + masm.branchFloat(Assembler::DoubleLessThanOrEqual, input, scratch, &negativeOrZero); // Input is non-negative. Add the biggest float less than 0.5 and truncate, // rounding down (because if the input is the biggest float less than 0.5, @@ -2008,7 +2012,21 @@ CodeGeneratorX86Shared::visitRoundF(LRoundF *lir) masm.jump(&end); - // Input is negative, but isn't -0. + // Input is negative, +0 or -0. + masm.bind(&negativeOrZero); + // Branch on negative input. + masm.j(Assembler::NotEqual, &negative); + + // Bail on negative-zero. + masm.branchNegativeZeroFloat32(input, output, &bailout); + if (!bailoutFrom(&bailout, lir->snapshot())) + return false; + + // Input is +0. + masm.xor32(output, output); + masm.jump(&end); + + // Input is negative. masm.bind(&negative); masm.loadConstantFloat32(0.5f, temp); diff --git a/js/src/jit/shared/MacroAssembler-x86-shared.cpp b/js/src/jit/shared/MacroAssembler-x86-shared.cpp index 820ff61708c..1684d6791de 100644 --- a/js/src/jit/shared/MacroAssembler-x86-shared.cpp +++ b/js/src/jit/shared/MacroAssembler-x86-shared.cpp @@ -197,7 +197,8 @@ MacroAssemblerX86Shared::buildOOLFakeExitFrame(void *fakeReturnAddr) void MacroAssemblerX86Shared::branchNegativeZero(FloatRegister reg, Register scratch, - Label *label) + Label *label, + bool maybeNonZero) { // Determines whether the low double contained in the XMM register reg // is equal to -0.0. @@ -205,12 +206,14 @@ MacroAssemblerX86Shared::branchNegativeZero(FloatRegister reg, #if defined(JS_CODEGEN_X86) Label nonZero; - // Compare to zero. Lets through {0, -0}. - xorpd(ScratchDoubleReg, ScratchDoubleReg); - - // If reg is non-zero, jump to nonZero. - branchDouble(DoubleNotEqual, reg, ScratchDoubleReg, &nonZero); + // if not already compared to zero + if (maybeNonZero) { + // Compare to zero. Lets through {0, -0}. + xorpd(ScratchDoubleReg, ScratchDoubleReg); + // If reg is non-zero, jump to nonZero. + branchDouble(DoubleNotEqual, reg, ScratchDoubleReg, &nonZero); + } // Input register is either zero or negative zero. Retrieve sign of input. movmskpd(reg, scratch); diff --git a/js/src/jit/shared/MacroAssembler-x86-shared.h b/js/src/jit/shared/MacroAssembler-x86-shared.h index 7ac03f1f397..961777384a7 100644 --- a/js/src/jit/shared/MacroAssembler-x86-shared.h +++ b/js/src/jit/shared/MacroAssembler-x86-shared.h @@ -90,7 +90,7 @@ class MacroAssemblerX86Shared : public Assembler j(ConditionFromDoubleCondition(cond), label); } - void branchNegativeZero(FloatRegister reg, Register scratch, Label *label); + void branchNegativeZero(FloatRegister reg, Register scratch, Label *label, bool maybeNonZero = true); void branchNegativeZeroFloat32(FloatRegister reg, Register scratch, Label *label); void move32(Imm32 imm, Register dest) { From 02e82059088c94a2d15ca5d7c544ca2799d6cf9e Mon Sep 17 00:00:00 2001 From: Andrew McCreight Date: Thu, 2 Oct 2014 08:40:21 -0700 Subject: [PATCH 005/146] Bug 1068276, part 3 - Set some defaults for leak parameters on B2G reftests. r=jmaher --- layout/tools/reftest/runreftestb2g.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/layout/tools/reftest/runreftestb2g.py b/layout/tools/reftest/runreftestb2g.py index 05a2678c361..7afef674949 100644 --- a/layout/tools/reftest/runreftestb2g.py +++ b/layout/tools/reftest/runreftestb2g.py @@ -203,6 +203,10 @@ class B2GOptions(ReftestOptions): if not options.httpdPath: options.httpdPath = os.path.join(options.xrePath, "components") + # B2G reftests do not do leak checking, but set some reasonable defaults to avoid errors. + options.leakThresholds = {} + options.ignoreMissingLeaks = [] + return options From 0f460a11cdf06fccacb6a857b0d472b45065bd60 Mon Sep 17 00:00:00 2001 From: Andrew McCreight Date: Thu, 2 Oct 2014 08:42:11 -0700 Subject: [PATCH 006/146] Bug 1068276, part 4 - Fix indentation of leak option setting in reftests. r=ted --- layout/tools/reftest/runreftest.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/layout/tools/reftest/runreftest.py b/layout/tools/reftest/runreftest.py index a9f22ffda41..1d543c51f65 100644 --- a/layout/tools/reftest/runreftest.py +++ b/layout/tools/reftest/runreftest.py @@ -511,9 +511,8 @@ class ReftestOptions(OptionParser): if options.debugger is not None: self.error("cannot specify a debugger with parallel tests") - options.leakThresholds = {"default": options.defaultLeakThreshold} - - options.ignoreMissingLeaks = [] + options.leakThresholds = {"default": options.defaultLeakThreshold} + options.ignoreMissingLeaks = [] return options From 9f0c7267f908ef0546accd147cf9528033d92acd Mon Sep 17 00:00:00 2001 From: Till Schneidereit Date: Thu, 2 Oct 2014 17:37:48 +0200 Subject: [PATCH 007/146] Bug 1075059 - Part 1: Backout of Array.prototpype.contains usages. r=me --HG-- extra : rebase_source : c40bca2849fdfee824560253c7ffe8fb93f9b774 --- browser/base/content/browser-addons.js | 2 +- browser/base/content/browser-places.js | 4 ++-- browser/base/content/browser.js | 2 +- browser/base/content/newtab/updater.js | 2 +- browser/base/content/pageinfo/permissions.js | 2 +- browser/base/content/sync/setup.js | 6 +++--- browser/base/content/tabbrowser.xml | 12 ++++++------ .../base/content/test/general/browser_bug521216.js | 2 +- .../content/test/general/browser_contentAreaClick.js | 4 ++-- .../test/general/browser_devices_get_user_media.js | 4 ++-- .../browser_devices_get_user_media_about_urls.js | 2 +- browser/base/content/test/general/head.js | 2 +- .../base/content/test/general/test_contextmenu.html | 2 +- browser/base/content/urlbarBindings.xml | 2 +- 14 files changed, 24 insertions(+), 24 deletions(-) diff --git a/browser/base/content/browser-addons.js b/browser/base/content/browser-addons.js index 2c25d343881..542f62f7ff1 100644 --- a/browser/base/content/browser-addons.js +++ b/browser/base/content/browser-addons.js @@ -46,7 +46,7 @@ const gXPInstallObserver = { } // Note that the above try/catch will pass through dead object proxies and // other degenerate objects. Make sure the browser is bonafide. - if (!browser || !gBrowser.browsers.contains(browser)) + if (!browser || gBrowser.browsers.indexOf(browser) == -1) return; const anchorID = "addons-notification-icon"; diff --git a/browser/base/content/browser-places.js b/browser/base/content/browser-places.js index 2cec42271b6..a1706677f6d 100644 --- a/browser/base/content/browser-places.js +++ b/browser/base/content/browser-places.js @@ -1371,7 +1371,7 @@ let BookmarkingUI = { // calls back. For such an edge case, retain all unique entries from both // arrays. this._itemIds = this._itemIds.filter( - function (id) !aItemIds.contains(id) + function (id) aItemIds.indexOf(id) == -1 ).concat(aItemIds); this._updateStar(); @@ -1592,7 +1592,7 @@ let BookmarkingUI = { aURI) { if (aURI && aURI.equals(this._uri)) { // If a new bookmark has been added to the tracked uri, register it. - if (!this._itemIds.contains(aItemId)) { + if (this._itemIds.indexOf(aItemId) == -1) { this._itemIds.push(aItemId); // Only need to update the UI if it wasn't marked as starred before: if (this._itemIds.length == 1) { diff --git a/browser/base/content/browser.js b/browser/base/content/browser.js index 127327b17f1..8e004763eec 100644 --- a/browser/base/content/browser.js +++ b/browser/base/content/browser.js @@ -2233,7 +2233,7 @@ function URLBarSetURI(aURI) { // Replace initial page URIs with an empty string // only if there's no opener (bug 370555). // Bug 863515 - Make content.opener checks work in electrolysis. - if (gInitialPages.contains(uri.spec)) + if (gInitialPages.indexOf(uri.spec) != -1) value = !gMultiProcessBrowser && content.opener ? uri.spec : ""; else value = losslessDecodeURI(uri); diff --git a/browser/base/content/newtab/updater.js b/browser/base/content/newtab/updater.js index 24fa82025e0..f22c1473e07 100644 --- a/browser/base/content/newtab/updater.js +++ b/browser/base/content/newtab/updater.js @@ -129,7 +129,7 @@ let gUpdater = { // Delete sites that were removed from the grid. gGrid.sites.forEach(function (aSite) { // The site must be valid and not in the current grid. - if (!aSite || aSites.contains(aSite)) + if (!aSite || aSites.indexOf(aSite) != -1) return; batch.push(new Promise(resolve => { diff --git a/browser/base/content/pageinfo/permissions.js b/browser/base/content/pageinfo/permissions.js index a183e142fcd..36d27d41bad 100644 --- a/browser/base/content/pageinfo/permissions.js +++ b/browser/base/content/pageinfo/permissions.js @@ -18,7 +18,7 @@ var permissionObserver = { if (aTopic == "perm-changed") { var permission = aSubject.QueryInterface(Components.interfaces.nsIPermission); if (permission.host == gPermURI.host) { - if (gPermissions.contains(permission.type)) + if (gPermissions.indexOf(permission.type) > -1) initRow(permission.type); else if (permission.type.startsWith("plugin")) setPluginsRadioState(); diff --git a/browser/base/content/sync/setup.js b/browser/base/content/sync/setup.js index 37c04c2e7bc..d8092760b77 100644 --- a/browser/base/content/sync/setup.js +++ b/browser/base/content/sync/setup.js @@ -152,9 +152,9 @@ var gSyncSetup = { // Only open the dialog if username + password are actually correct. Weave.Service.login(); - if (![Weave.LOGIN_FAILED_INVALID_PASSPHRASE, - Weave.LOGIN_FAILED_NO_PASSPHRASE, - Weave.LOGIN_SUCCEEDED].contains(Weave.Status.login)) { + if ([Weave.LOGIN_FAILED_INVALID_PASSPHRASE, + Weave.LOGIN_FAILED_NO_PASSPHRASE, + Weave.LOGIN_SUCCEEDED].indexOf(Weave.Status.login) == -1) { return; } diff --git a/browser/base/content/tabbrowser.xml b/browser/base/content/tabbrowser.xml index 660fcf22aec..cfdd97bedf4 100644 --- a/browser/base/content/tabbrowser.xml +++ b/browser/base/content/tabbrowser.xml @@ -1668,7 +1668,7 @@ if (!docShellsSwapped && !uriIsAboutBlank) { // pretend the user typed this so it'll be available till // the document successfully loads - if (aURI && !gInitialPages.contains(aURI)) + if (aURI && gInitialPages.indexOf(aURI) == -1) b.userTypedValue = aURI; let flags = Ci.nsIWebNavigation.LOAD_FLAGS_NONE; @@ -2167,14 +2167,14 @@ var tab = aTab; do { tab = tab.nextSibling; - } while (tab && !remainingTabs.contains(tab)); + } while (tab && remainingTabs.indexOf(tab) == -1); if (!tab) { tab = aTab; do { tab = tab.previousSibling; - } while (tab && !remainingTabs.contains(tab)); + } while (tab && remainingTabs.indexOf(tab) == -1); } this.selectedTab = tab; @@ -2422,10 +2422,10 @@ Date: Thu, 2 Oct 2014 17:38:46 +0200 Subject: [PATCH 008/146] Bug 1075059 - Part 2: Backout of Array.prototpype.contains. r=jorendorff --HG-- extra : rebase_source : 088d2cc999b55eb2c6c0d43b47bc5f3cc6d9d69a --- js/src/builtin/Array.js | 48 ----- js/src/builtin/Utilities.js | 7 +- js/src/jsarray.cpp | 6 - js/src/tests/Makefile.in | 1 - js/src/tests/ecma_7/Array/browser.js | 0 js/src/tests/ecma_7/Array/contains.js | 61 ------ js/src/tests/ecma_7/Array/shell.js | 0 js/src/tests/ecma_7/browser.js | 0 js/src/tests/ecma_7/shell.js | 197 -------------------- js/xpconnect/tests/chrome/test_xrayToJS.xul | 1 - 10 files changed, 1 insertion(+), 320 deletions(-) delete mode 100644 js/src/tests/ecma_7/Array/browser.js delete mode 100644 js/src/tests/ecma_7/Array/contains.js delete mode 100644 js/src/tests/ecma_7/Array/shell.js delete mode 100644 js/src/tests/ecma_7/browser.js delete mode 100644 js/src/tests/ecma_7/shell.js diff --git a/js/src/builtin/Array.js b/js/src/builtin/Array.js index 937c3bf9806..eaaf2c3b0eb 100644 --- a/js/src/builtin/Array.js +++ b/js/src/builtin/Array.js @@ -581,54 +581,6 @@ function ArrayFill(value, start = 0, end = undefined) { return O; } -// Proposed for ES7: -// https://github.com/domenic/Array.prototype.contains/blob/master/spec.md -function ArrayContains(searchElement, fromIndex = 0) { - // Steps 1-2. - var O = ToObject(this); - - // Steps 3-4. - var len = ToLength(O.length); - - // Step 5. - if (len === 0) - return false; - - // Steps 6-7. - var n = ToInteger(fromIndex); - - // Step 8. - if (n >= len) - return false; - - // Step 9. - var k; - if (n >= 0) { - k = n; - } - // Step 10. - else { - // Step a. - k = len + n; - // Step b. - if (k < 0) - k = 0; - } - - // Step 11. - while (k < len) { - // Steps a-c. - if (SameValueZero(searchElement, O[k])) - return true; - - // Step d. - k++; - } - - // Step 12. - return false; -} - #define ARRAY_ITERATOR_SLOT_ITERATED_OBJECT 0 #define ARRAY_ITERATOR_SLOT_NEXT_INDEX 1 #define ARRAY_ITERATOR_SLOT_ITEM_KIND 2 diff --git a/js/src/builtin/Utilities.js b/js/src/builtin/Utilities.js index d620d5a6c6d..1dd5a3dfd50 100644 --- a/js/src/builtin/Utilities.js +++ b/js/src/builtin/Utilities.js @@ -92,7 +92,7 @@ function CheckObjectCoercible(v) { ThrowError(JSMSG_CANT_CONVERT_TO, ToString(v), "object"); } -// Spec: ECMAScript Draft, 6th edition May 22, 2014, 7.1.15. +/* Spec: ECMAScript Draft, 6 edition May 22, 2014, 7.1.15 */ function ToLength(v) { v = ToInteger(v); @@ -103,11 +103,6 @@ function ToLength(v) { return v < 0x1fffffffffffff ? v : 0x1fffffffffffff; } -// Spec: ECMAScript Draft, 6th edition Aug 24, 2014, 7.2.4. -function SameValueZero(x, y) { - return x === y || (x !== x && y !== y); -} - /********** Testing code **********/ #ifdef ENABLE_PARALLEL_JS diff --git a/js/src/jsarray.cpp b/js/src/jsarray.cpp index ee5e5411681..7046ae2d40f 100644 --- a/js/src/jsarray.cpp +++ b/js/src/jsarray.cpp @@ -3023,12 +3023,6 @@ static const JSFunctionSpec array_methods[] = { JS_SELF_HOSTED_FN("@@iterator", "ArrayValues", 0,0), JS_SELF_HOSTED_FN("entries", "ArrayEntries", 0,0), JS_SELF_HOSTED_FN("keys", "ArrayKeys", 0,0), - - /* ES7 additions */ -#ifdef NIGHTLY_BUILD - JS_SELF_HOSTED_FN("contains", "ArrayContains", 2,0), -#endif - JS_FS_END }; diff --git a/js/src/tests/Makefile.in b/js/src/tests/Makefile.in index 817d545787a..1d67364c947 100644 --- a/js/src/tests/Makefile.in +++ b/js/src/tests/Makefile.in @@ -18,7 +18,6 @@ TEST_FILES = \ ecma_3_1/ \ ecma_5/ \ ecma_6/ \ - ecma_7/ \ Intl/ \ js1_1/ \ js1_2/ \ diff --git a/js/src/tests/ecma_7/Array/browser.js b/js/src/tests/ecma_7/Array/browser.js deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/js/src/tests/ecma_7/Array/contains.js b/js/src/tests/ecma_7/Array/contains.js deleted file mode 100644 index 90746a7fe09..00000000000 --- a/js/src/tests/ecma_7/Array/contains.js +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Any copyright is dedicated to the Public Domain. - * https://creativecommons.org/publicdomain/zero/1.0/ - */ - -var BUGNUMBER = 1069063; -var summary = "Implement Array.prototype.contains"; - -print(BUGNUMBER + ": " + summary); - -if ('contains' in Array.prototype) { - assertEq(typeof [].contains, "function"); - assertEq([].contains.length, 1); - - assertTrue([1, 2, 3].contains(2)); - assertTrue([1,,2].contains(2)); - assertTrue([1, 2, 3].contains(2, 1)); - assertTrue([1, 2, 3].contains(2, -2)); - assertTrue([1, 2, 3].contains(2, -100)); - assertTrue([Object, Function, Array].contains(Function)); - assertTrue([-0].contains(0)); - assertTrue([NaN].contains(NaN)); - assertTrue([,].contains()); - assertTrue(staticContains("123", "2")); - assertTrue(staticContains({length: 3, 1: 2}, 2)); - assertTrue(staticContains({length: 3, 1: 2, get 3(){throw ""}}, 2)); - assertTrue(staticContains({length: 3, get 1() {return 2}}, 2)); - assertTrue(staticContains({__proto__: {1: 2}, length: 3}, 2)); - assertTrue(staticContains(new Proxy([1], {get(){return 2}}), 2)); - - assertFalse([1, 2, 3].contains("2")); - assertFalse([1, 2, 3].contains(2, 2)); - assertFalse([1, 2, 3].contains(2, -1)); - assertFalse([undefined].contains(NaN)); - assertFalse([{}].contains({})); - assertFalse(staticContains({length: 3, 1: 2}, 2, 2)); - assertFalse(staticContains({length: 3, get 0(){delete this[1]}, 1: 2}, 2)); - assertFalse(staticContains({length: -100, 0: 1}, 1)); - - assertThrowsInstanceOf(() => staticContains(), TypeError); - assertThrowsInstanceOf(() => staticContains(null), TypeError); - assertThrowsInstanceOf(() => staticContains({get length(){throw TypeError()}}), TypeError); - assertThrowsInstanceOf(() => staticContains({length: 3, get 1() {throw TypeError()}}, 2), TypeError); - assertThrowsInstanceOf(() => staticContains({__proto__: {get 1() {throw TypeError()}}, length: 3}, 2), TypeError); - assertThrowsInstanceOf(() => staticContains(new Proxy([1], {get(){throw TypeError()}})), TypeError); -} - -function assertTrue(v){ - assertEq(v, true) -} - -function assertFalse(v){ - assertEq(v, false) -} - -function staticContains(o, v, f){ - return [].contains.call(o, v, f) -} - -if (typeof reportCompare === "function") - reportCompare(true, true); diff --git a/js/src/tests/ecma_7/Array/shell.js b/js/src/tests/ecma_7/Array/shell.js deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/js/src/tests/ecma_7/browser.js b/js/src/tests/ecma_7/browser.js deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/js/src/tests/ecma_7/shell.js b/js/src/tests/ecma_7/shell.js deleted file mode 100644 index 8334db7443a..00000000000 --- a/js/src/tests/ecma_7/shell.js +++ /dev/null @@ -1,197 +0,0 @@ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - - -if (typeof assertThrowsInstanceOf === 'undefined') { - var assertThrowsInstanceOf = function assertThrowsInstanceOf(f, ctor, msg) { - var fullmsg; - try { - f(); - } catch (exc) { - if (exc instanceof ctor) - return; - fullmsg = "Assertion failed: expected exception " + ctor.name + ", got " + exc; - } - if (fullmsg === undefined) - fullmsg = "Assertion failed: expected exception " + ctor.name + ", no exception thrown"; - if (msg !== undefined) - fullmsg += " - " + msg; - throw new Error(fullmsg); - }; -} - -if (typeof assertThrowsValue === 'undefined') { - var assertThrowsValue = function assertThrowsValue(f, val, msg) { - var fullmsg; - try { - f(); - } catch (exc) { - if ((exc === val) === (val === val) && (val !== 0 || 1 / exc === 1 / val)) - return; - fullmsg = "Assertion failed: expected exception " + val + ", got " + exc; - } - if (fullmsg === undefined) - fullmsg = "Assertion failed: expected exception " + val + ", no exception thrown"; - if (msg !== undefined) - fullmsg += " - " + msg; - throw new Error(fullmsg); - }; -} - -if (typeof assertDeepEq === 'undefined') { - var assertDeepEq = (function(){ - var call = Function.prototype.call, - Map_ = Map, - Error_ = Error, - Map_has = call.bind(Map.prototype.has), - Map_get = call.bind(Map.prototype.get), - Map_set = call.bind(Map.prototype.set), - Object_toString = call.bind(Object.prototype.toString), - Function_toString = call.bind(Function.prototype.toString), - Object_getPrototypeOf = Object.getPrototypeOf, - Object_hasOwnProperty = call.bind(Object.prototype.hasOwnProperty), - Object_getOwnPropertyDescriptor = Object.getOwnPropertyDescriptor, - Object_isExtensible = Object.isExtensible, - Object_getOwnPropertyNames = Object.getOwnPropertyNames, - uneval_ = uneval; - - // Return true iff ES6 Type(v) isn't Object. - // Note that `typeof document.all === "undefined"`. - function isPrimitive(v) { - return (v === null || - v === undefined || - typeof v === "boolean" || - typeof v === "number" || - typeof v === "string" || - typeof v === "symbol"); - } - - function assertSameValue(a, b, msg) { - try { - assertEq(a, b); - } catch (exc) { - throw Error_(exc.message + (msg ? " " + msg : "")); - } - } - - function assertSameClass(a, b, msg) { - var ac = Object_toString(a), bc = Object_toString(b); - assertSameValue(ac, bc, msg); - switch (ac) { - case "[object Function]": - assertSameValue(Function_toString(a), Function_toString(b), msg); - } - } - - function at(prevmsg, segment) { - return prevmsg ? prevmsg + segment : "at _" + segment; - } - - // Assert that the arguments a and b are thoroughly structurally equivalent. - // - // For the sake of speed, we cut a corner: - // var x = {}, y = {}, ax = [x]; - // assertDeepEq([ax, x], [ax, y]); // passes (?!) - // - // Technically this should fail, since the two object graphs are different. - // (The graph of [ax, y] contains one more object than the graph of [ax, x].) - // - // To get technically correct behavior, pass {strictEquivalence: true}. - // This is slower because we have to walk the entire graph, and Object.prototype - // is big. - // - return function assertDeepEq(a, b, options) { - var strictEquivalence = options ? options.strictEquivalence : false; - - function assertSameProto(a, b, msg) { - check(Object_getPrototypeOf(a), Object_getPrototypeOf(b), at(msg, ".__proto__")); - } - - function failPropList(na, nb, msg) { - throw Error_("got own properties " + uneval_(na) + ", expected " + uneval_(nb) + - (msg ? " " + msg : "")); - } - - function assertSameProps(a, b, msg) { - var na = Object_getOwnPropertyNames(a), - nb = Object_getOwnPropertyNames(b); - if (na.length !== nb.length) - failPropList(na, nb, msg); - for (var i = 0; i < na.length; i++) { - var name = na[i]; - if (name !== nb[i]) - failPropList(na, nb, msg); - var da = Object_getOwnPropertyDescriptor(a, name), - db = Object_getOwnPropertyDescriptor(b, name); - var pmsg = at(msg, /^[_$A-Za-z0-9]+$/.test(name) - ? /0|[1-9][0-9]*/.test(name) ? "[" + name + "]" : "." + name - : "[" + uneval_(name) + "]"); - assertSameValue(da.configurable, db.configurable, at(pmsg, ".[[Configurable]]")); - assertSameValue(da.enumerable, db.enumerable, at(pmsg, ".[[Enumerable]]")); - if (Object_hasOwnProperty(da, "value")) { - if (!Object_hasOwnProperty(db, "value")) - throw Error_("got data property, expected accessor property" + pmsg); - check(da.value, db.value, pmsg); - } else { - if (Object_hasOwnProperty(db, "value")) - throw Error_("got accessor property, expected data property" + pmsg); - check(da.get, db.get, at(pmsg, ".[[Get]]")); - check(da.set, db.set, at(pmsg, ".[[Set]]")); - } - } - }; - - var ab = Map_(); - var bpath = Map_(); - - function check(a, b, path) { - if (typeof a === "symbol") { - // Symbols are primitives, but they have identity. - // Symbol("x") !== Symbol("x") but - // assertDeepEq(Symbol("x"), Symbol("x")) should pass. - if (typeof b !== "symbol") { - throw Error_("got " + uneval_(a) + ", expected " + uneval_(b) + " " + path); - } else if (uneval_(a) !== uneval_(b)) { - // We lamely use uneval_ to distinguish well-known symbols - // from user-created symbols. The standard doesn't offer - // a convenient way to do it. - throw Error_("got " + uneval_(a) + ", expected " + uneval_(b) + " " + path); - } else if (Map_has(ab, a)) { - assertSameValue(Map_get(ab, a), b, path); - } else if (Map_has(bpath, b)) { - var bPrevPath = Map_get(bpath, b) || "_"; - throw Error_("got distinct symbols " + at(path, "") + " and " + - at(bPrevPath, "") + ", expected the same symbol both places"); - } else { - Map_set(ab, a, b); - Map_set(bpath, b, path); - } - } else if (isPrimitive(a)) { - assertSameValue(a, b, path); - } else if (isPrimitive(b)) { - throw Error_("got " + Object_toString(a) + ", expected " + uneval_(b) + " " + path); - } else if (Map_has(ab, a)) { - assertSameValue(Map_get(ab, a), b, path); - } else if (Map_has(bpath, b)) { - var bPrevPath = Map_get(bpath, b) || "_"; - throw Error_("got distinct objects " + at(path, "") + " and " + at(bPrevPath, "") + - ", expected the same object both places"); - } else { - Map_set(ab, a, b); - Map_set(bpath, b, path); - if (a !== b || strictEquivalence) { - assertSameClass(a, b, path); - assertSameProto(a, b, path); - assertSameProps(a, b, path); - assertSameValue(Object_isExtensible(a), - Object_isExtensible(b), - at(path, ".[[Extensible]]")); - } - } - } - - check(a, b, ""); - }; - })(); -} diff --git a/js/xpconnect/tests/chrome/test_xrayToJS.xul b/js/xpconnect/tests/chrome/test_xrayToJS.xul index 38e11d323a1..a06901c4cd1 100644 --- a/js/xpconnect/tests/chrome/test_xrayToJS.xul +++ b/js/xpconnect/tests/chrome/test_xrayToJS.xul @@ -176,7 +176,6 @@ https://bugzilla.mozilla.org/show_bug.cgi?id=933681 "forEach", "map", "reduce", "reduceRight", "filter", "some", "every", "find", "findIndex", "copyWithin", "fill", "@@iterator", "entries", "keys", "constructor"]; if (isNightlyBuild) { - gPrototypeProperties['Array'].push('contains'); let pjsMethods = ['mapPar', 'reducePar', 'scanPar', 'scatterPar', 'filterPar']; gPrototypeProperties['Array'] = gPrototypeProperties['Array'].concat(pjsMethods); } From b67610d4195ca9e951c53102f8aa5550f07347f3 Mon Sep 17 00:00:00 2001 From: Robert Strong Date: Thu, 2 Oct 2014 09:12:36 -0700 Subject: [PATCH 009/146] Mac v2 signing - Bug 1076370 - Fix Firefox.app l10n repacks for Mac v2 signing. r=jmaher --- b2g/installer/removed-files.in | 4 ++-- browser/installer/removed-files.in | 4 ++-- toolkit/locales/l10n.mk | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/b2g/installer/removed-files.in b/b2g/installer/removed-files.in index 8b4a451dfc3..557a379dadc 100644 --- a/b2g/installer/removed-files.in +++ b/b2g/installer/removed-files.in @@ -4,10 +4,10 @@ # location which also work on other platforms. # # @DIR_MACOS@ -# Equals Contents/MacOS/ on Mac OX X and is an empty string on other platforms. +# Equals Contents/MacOS/ on Mac OS X and is an empty string on other platforms. # # @DIR_RESOURCES@ -# Equals Contents/Resources/ on Mac OX X and is an empty string on other +# Equals Contents/Resources/ on Mac OS X and is an empty string on other # platforms. # Mac OS X v2 signing removals diff --git a/browser/installer/removed-files.in b/browser/installer/removed-files.in index e8f336f2769..87b9bb45184 100644 --- a/browser/installer/removed-files.in +++ b/browser/installer/removed-files.in @@ -58,10 +58,10 @@ # location which also work on other platforms. # # @DIR_MACOS@ -# Equals Contents/MacOS/ on Mac OX X and is an empty string on other platforms. +# Equals Contents/MacOS/ on Mac OS X and is an empty string on other platforms. # # @DIR_RESOURCES@ -# Equals Contents/Resources/ on Mac OX X and is an empty string on other +# Equals Contents/Resources/ on Mac OS X and is an empty string on other # platforms. # Common File Removals diff --git a/toolkit/locales/l10n.mk b/toolkit/locales/l10n.mk index 1369ba5ee3a..13a44f88fe7 100644 --- a/toolkit/locales/l10n.mk +++ b/toolkit/locales/l10n.mk @@ -62,7 +62,7 @@ clobber-%: PACKAGER_NO_LIBS = 1 ifeq (cocoa,$(MOZ_WIDGET_TOOLKIT)) -STAGEDIST = $(_ABS_DIST)/l10n-stage/$(MOZ_PKG_DIR)/$(_APPNAME)/Contents/MacOS +STAGEDIST = $(_ABS_DIST)/l10n-stage/$(MOZ_PKG_DIR)/$(_APPNAME)/Contents/Resources else STAGEDIST = $(_ABS_DIST)/l10n-stage/$(MOZ_PKG_DIR) endif @@ -115,7 +115,7 @@ endif $(if $(filter omni,$(MOZ_PACKAGER_FORMAT)),$(if $(NON_OMNIJAR_FILES),--non-resource $(NON_OMNIJAR_FILES))) ifneq (en,$(AB)) ifeq (cocoa,$(MOZ_WIDGET_TOOLKIT)) - mv $(_ABS_DIST)/l10n-stage/$(MOZ_PKG_DIR)/$(_APPNAME)/Contents/Resources/en.lproj $(_ABS_DIST)/l10n-stage/$(MOZ_PKG_DIR)/$(_APPNAME)/Contents/Resources/$(AB).lproj + mv $(STAGEDIST)/en.lproj $(STAGEDIST)/$(AB).lproj endif endif $(NSINSTALL) -D $(DIST)/l10n-stage/$(PKG_PATH) @@ -130,7 +130,7 @@ endif # packaging done, undo l10n stuff ifneq (en,$(AB)) ifeq (cocoa,$(MOZ_WIDGET_TOOLKIT)) - mv $(_ABS_DIST)/l10n-stage/$(MOZ_PKG_DIR)/$(_APPNAME)/Contents/Resources/$(AB).lproj $(_ABS_DIST)/l10n-stage/$(MOZ_PKG_DIR)/$(_APPNAME)/Contents/Resources/en.lproj + mv $(STAGEDIST)/$(AB).lproj $(STAGEDIST)/en.lproj endif endif $(NSINSTALL) -D $(DIST)/$(PKG_PATH) From b94b1a89df1deafabb0e5134589d31fdc15c1f45 Mon Sep 17 00:00:00 2001 From: Bobby Holley Date: Thu, 2 Oct 2014 18:34:16 +0200 Subject: [PATCH 010/146] Bug 987794 - Don't unwrap XrayWrappers in HasPropertyOnPrototype. r=bz --- dom/bindings/BindingUtils.cpp | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/dom/bindings/BindingUtils.cpp b/dom/bindings/BindingUtils.cpp index 21be713c3a3..940dc1a3574 100644 --- a/dom/bindings/BindingUtils.cpp +++ b/dom/bindings/BindingUtils.cpp @@ -1595,17 +1595,10 @@ bool HasPropertyOnPrototype(JSContext* cx, JS::Handle proxy, JS::Handle id) { - JS::Rooted obj(cx, proxy); - Maybe ac; - if (xpc::WrapperFactory::IsXrayWrapper(obj)) { - obj = js::UncheckedUnwrap(obj); - ac.emplace(cx, obj); - } - bool found; // We ignore an error from GetPropertyOnPrototype. We pass nullptr // for vp so that GetPropertyOnPrototype won't actually do a get. - return !GetPropertyOnPrototype(cx, obj, id, &found, nullptr) || found; + return !GetPropertyOnPrototype(cx, proxy, id, &found, nullptr) || found; } bool From e675583d7e44fef89ace47a561a1b688a7cfccfd Mon Sep 17 00:00:00 2001 From: Martin Thomson Date: Thu, 2 Oct 2014 10:03:30 -0700 Subject: [PATCH 011/146] Bug 1072382 - Remove version intolerance marker on inappropriate_fallback alert, r=keeler --- security/manager/ssl/src/nsNSSIOLayer.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/security/manager/ssl/src/nsNSSIOLayer.cpp b/security/manager/ssl/src/nsNSSIOLayer.cpp index 5e628bde718..7f915e7eb15 100644 --- a/security/manager/ssl/src/nsNSSIOLayer.cpp +++ b/security/manager/ssl/src/nsNSSIOLayer.cpp @@ -1030,6 +1030,17 @@ retryDueToTLSIntolerance(PRErrorCode err, nsNSSSocketInfo* socketInfo) uint32_t reason; switch (err) { + case SSL_ERROR_INAPPROPRIATE_FALLBACK_ALERT: + // This is a clear signal that we've fallen back too many versions. Treat + // this as a hard failure now, but also mark the next higher version as + // being tolerant so that later attempts don't use this version (i.e., + // range.max), which makes the error unrecoverable without a full restart. + socketInfo->SharedState().IOLayerHelpers() + .rememberTolerantAtVersion(socketInfo->GetHostName(), + socketInfo->GetPort(), + range.max + 1); + return false; + case SSL_ERROR_BAD_MAC_ALERT: reason = 1; break; case SSL_ERROR_BAD_MAC_READ: reason = 2; break; case SSL_ERROR_HANDSHAKE_FAILURE_ALERT: reason = 3; break; From fe7d8a461192f348c89fe3f365004680cb72b836 Mon Sep 17 00:00:00 2001 From: William Lachance Date: Tue, 30 Sep 2014 11:28:38 -0400 Subject: [PATCH 012/146] Bug 1073697 - mozversion should accept binary_path without .exe extension on windows;r=davehunt --- .../mozbase/mozversion/mozversion/mozversion.py | 5 ++++- testing/mozbase/mozversion/setup.py | 2 +- testing/mozbase/mozversion/tests/test_binary.py | 14 ++++++++++++++ 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/testing/mozbase/mozversion/mozversion/mozversion.py b/testing/mozbase/mozversion/mozversion/mozversion.py index ca5d76bf884..6d3d53821de 100644 --- a/testing/mozbase/mozversion/mozversion/mozversion.py +++ b/testing/mozbase/mozversion/mozversion/mozversion.py @@ -92,7 +92,10 @@ class LocalVersion(Version): return None if binary: - if not os.path.exists(binary): + # on Windows, the binary may be specified with or without the + # .exe extension + if not os.path.exists(binary) and not os.path.exists(binary + + '.exe'): raise IOError('Binary path does not exist: %s' % binary) path = find_location(os.path.dirname(os.path.realpath(binary))) else: diff --git a/testing/mozbase/mozversion/setup.py b/testing/mozbase/mozversion/setup.py index c3e4d65852a..b12b64bd467 100644 --- a/testing/mozbase/mozversion/setup.py +++ b/testing/mozbase/mozversion/setup.py @@ -4,7 +4,7 @@ from setuptools import setup -PACKAGE_VERSION = '0.7' +PACKAGE_VERSION = '0.8' dependencies = ['mozdevice >= 0.29', 'mozfile >= 1.0', diff --git a/testing/mozbase/mozversion/tests/test_binary.py b/testing/mozbase/mozversion/tests/test_binary.py index 9a199cdb0f5..b677d958ee3 100644 --- a/testing/mozbase/mozversion/tests/test_binary.py +++ b/testing/mozbase/mozversion/tests/test_binary.py @@ -83,6 +83,20 @@ SourceRepository = PlatformSourceRepo v = get_version(self.binary) self.assertTrue(isinstance(v, dict)) + def test_with_exe(self): + """Test that we can resolve .exe files""" + with open(os.path.join(self.tempdir, 'application.ini'), 'w') as f: + f.writelines(self.application_ini) + + with open(os.path.join(self.tempdir, 'platform.ini'), 'w') as f: + f.writelines(self.platform_ini) + + exe_name_unprefixed = self.binary + '1' + exe_name = exe_name_unprefixed + '.exe' + with open(exe_name, 'w') as f: + f.write('foobar') + self._check_version(get_version(exe_name_unprefixed)) + def _check_version(self, version): self.assertEqual(version.get('application_name'), 'AppName') self.assertEqual(version.get('application_display_name'), 'AppCodeName') From 867add49654dd74e405a85dfc02663e47c2448ce Mon Sep 17 00:00:00 2001 From: Jonathan Griffin Date: Thu, 2 Oct 2014 10:09:33 -0700 Subject: [PATCH 013/146] Bug 997909 - Make waitFor actually terminate if condition is never true, r=mdas --- testing/marionette/marionette-simpletest.js | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/testing/marionette/marionette-simpletest.js b/testing/marionette/marionette-simpletest.js index d07794b020d..10cd5055107 100644 --- a/testing/marionette/marionette-simpletest.js +++ b/testing/marionette/marionette-simpletest.js @@ -170,18 +170,19 @@ Marionette.prototype = { waitFor: function test_waitFor(callback, test, timeout) { this.heartbeatCallback(); if (test()) { - callback(); - return; + callback(); + return; } - var now = Date.now(); - var deadline = now + (typeof(timeout) == "undefined" ? this.timeout : timeout); + var now = new Date(); + var deadline = (timeout instanceof Date) ? timeout : + new Date(now.valueOf + (typeof(timeout) == "undefined" ? this.timeout : timeout)) if (deadline <= now) { dump("waitFor timeout: " + test.toString() + "\n"); // the script will timeout here, so no need to raise a separate // timeout exception return; } - this.window.setTimeout(this.waitFor.bind(this), 100, callback, test, deadline - now); + this.window.setTimeout(this.waitFor.bind(this), 100, callback, test, deadline); }, runEmulatorCmd: function runEmulatorCmd(cmd, callback) { From 965965ec6a20b531fbf595091bef769348fafcbb Mon Sep 17 00:00:00 2001 From: Jonathan Griffin Date: Thu, 2 Oct 2014 10:10:52 -0700 Subject: [PATCH 014/146] Bug 1074508 - Add non-universal mozconfig for OSX mulet, r=mshal --- b2g/dev/config/mozconfigs/macosx64/mulet | 25 ++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 b2g/dev/config/mozconfigs/macosx64/mulet diff --git a/b2g/dev/config/mozconfigs/macosx64/mulet b/b2g/dev/config/mozconfigs/macosx64/mulet new file mode 100644 index 00000000000..10b89e0d6ad --- /dev/null +++ b/b2g/dev/config/mozconfigs/macosx64/mulet @@ -0,0 +1,25 @@ +MOZ_AUTOMATION_TALOS_SENDCHANGE=0 +. $topsrcdir/build/macosx/mozconfig.common + +ac_add_options --enable-application=b2g/dev +ac_add_options --disable-install-strip +ac_add_options --enable-signmar +ac_add_options --enable-profiling +ac_add_options --enable-instruments +ac_add_options --enable-dtrace + +# Nightlies only since this has a cost in performance +ac_add_options --enable-js-diagnostics + +# Needed to enable breakpad in application.ini +export MOZILLA_OFFICIAL=1 + +ac_add_options --with-macbundlename-prefix=Firefox + +# Treat warnings as errors in directories with FAIL_ON_WARNINGS. +ac_add_options --enable-warnings-as-errors + +# Package js shell. +export MOZ_PACKAGE_JSSHELL=1 + +. "$topsrcdir/build/mozconfig.common.override" From d59eea946bb8c789cea5290f2be7a9fe61fc83f4 Mon Sep 17 00:00:00 2001 From: Patrick McManus Date: Thu, 2 Oct 2014 00:47:28 -0400 Subject: [PATCH 015/146] bug 1076129 generate event on socket transport cancelation r=sworkman --- netwerk/base/src/nsSocketTransport2.cpp | 4 +++- netwerk/protocol/http/nsHttpConnectionMgr.cpp | 8 ++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/netwerk/base/src/nsSocketTransport2.cpp b/netwerk/base/src/nsSocketTransport2.cpp index 19233f554e0..3b149ecca55 100644 --- a/netwerk/base/src/nsSocketTransport2.cpp +++ b/netwerk/base/src/nsSocketTransport2.cpp @@ -1239,7 +1239,9 @@ nsSocketTransport::InitiateSocket() netAddrCString.get())); } #endif - return NS_ERROR_CONNECTION_REFUSED; + mCondition = NS_ERROR_CONNECTION_REFUSED; + OnSocketDetached(nullptr); + return mCondition; } // diff --git a/netwerk/protocol/http/nsHttpConnectionMgr.cpp b/netwerk/protocol/http/nsHttpConnectionMgr.cpp index 65ee70d7d94..dd6f103dfdc 100644 --- a/netwerk/protocol/http/nsHttpConnectionMgr.cpp +++ b/netwerk/protocol/http/nsHttpConnectionMgr.cpp @@ -1467,7 +1467,15 @@ nsHttpConnectionMgr::MakeNewConnection(nsConnectionEntry *ent, LOG(("nsHttpConnectionMgr::MakeNewConnection [ci = %s]\n" "Found a speculative half open connection\n", ent->mConnInfo->HashKey().get())); + + uint32_t flags; ent->mHalfOpens[i]->SetSpeculative(false); + nsISocketTransport *transport = ent->mHalfOpens[i]->SocketTransport(); + if (NS_SUCCEEDED(transport->GetConnectionFlags(&flags))) { + flags &= ~nsISocketTransport::DISABLE_RFC1918; + transport->SetConnectionFlags(flags); + } + Telemetry::AutoCounter usedSpeculativeConn; ++usedSpeculativeConn; From 1239a3e8979243b81b1d31bcbffd51349a883ef4 Mon Sep 17 00:00:00 2001 From: Patrick McManus Date: Wed, 20 Aug 2014 16:30:16 -0400 Subject: [PATCH 016/146] bug 1003448 - HTTP/2 Alternate Service and Opportunistic Security [1/2 PSM] r=keeler --- netwerk/socket/nsISSLSocketControl.idl | 28 +++++- .../ssl/src/SSLServerCertVerification.cpp | 12 ++- security/manager/ssl/src/nsNSSIOLayer.cpp | 92 +++++++++++++++---- security/manager/ssl/src/nsNSSIOLayer.h | 18 ++++ 4 files changed, 131 insertions(+), 19 deletions(-) diff --git a/netwerk/socket/nsISSLSocketControl.idl b/netwerk/socket/nsISSLSocketControl.idl index 0288172dd0f..4bbf08af638 100644 --- a/netwerk/socket/nsISSLSocketControl.idl +++ b/netwerk/socket/nsISSLSocketControl.idl @@ -15,7 +15,7 @@ class nsCString; %} [ref] native nsCStringTArrayRef(nsTArray); -[scriptable, builtinclass, uuid(89b819dc-31b0-4d09-915a-66f8a3703483)] +[scriptable, builtinclass, uuid(f160ec31-01f3-47f2-b542-0e12a647b07f)] interface nsISSLSocketControl : nsISupports { attribute nsIInterfaceRequestor notificationCallbacks; @@ -53,6 +53,11 @@ interface nsISSLSocketControl : nsISupports { in ACString hostname, in long port); + /* Determine if existing connection should be trusted to convey information about + * a hostname. + */ + boolean isAcceptableForHost(in ACString hostname); + /* The Key Exchange Algorithm is used when determining whether or not to do false start and whether or not HTTP/2 can be used. @@ -103,5 +108,26 @@ interface nsISSLSocketControl : nsISupports { * the user or searching the set of rememebered user cert decisions. */ attribute nsIX509Cert clientCert; + + /** + * If you wish to verify the host certificate using a different name than + * was used for the tcp connection, but without using proxy semantics, you + * can set authenticationName and authenticationPort + */ + attribute ACString authenticationName; + [infallible] attribute long authenticationPort; + + /** + * set bypassAuthentication to true if the server certificate checks should + * not be enforced. This is to enable non-secure transport over TLS. + */ + [infallible] attribute boolean bypassAuthentication; + + /* + * failedVerification is true if any enforced certificate checks have failed. + * Connections that have not yet tried to verify, have verifications bypassed, + * or are using acceptable exceptions will all return false. + */ + [infallible] readonly attribute boolean failedVerification; }; diff --git a/security/manager/ssl/src/SSLServerCertVerification.cpp b/security/manager/ssl/src/SSLServerCertVerification.cpp index 41ee6b9fd83..224d008480b 100644 --- a/security/manager/ssl/src/SSLServerCertVerification.cpp +++ b/security/manager/ssl/src/SSLServerCertVerification.cpp @@ -400,6 +400,16 @@ CertErrorRunnable::CheckCertOverrides() mDefaultErrorCodeToReport); } + nsCOMPtr sslSocketControl = do_QueryInterface( + NS_ISUPPORTS_CAST(nsITransportSecurityInfo*, mInfoObject)); + if (sslSocketControl && + sslSocketControl->GetBypassAuthentication()) { + PR_LOG(gPIPNSSLog, PR_LOG_DEBUG, + ("[%p][%p] Bypass Auth in CheckCertOverrides\n", + mFdForLogging, this)); + return new SSLServerCertVerificationResult(mInfoObject, 0); + } + int32_t port; mInfoObject->GetPort(&port); @@ -490,8 +500,6 @@ CertErrorRunnable::CheckCertOverrides() // First, deliver the technical details of the broken SSL status. // Try to get a nsIBadCertListener2 implementation from the socket consumer. - nsCOMPtr sslSocketControl = do_QueryInterface( - NS_ISUPPORTS_CAST(nsITransportSecurityInfo*, mInfoObject)); if (sslSocketControl) { nsCOMPtr cb; sslSocketControl->GetNotificationCallbacks(getter_AddRefs(cb)); diff --git a/security/manager/ssl/src/nsNSSIOLayer.cpp b/security/manager/ssl/src/nsNSSIOLayer.cpp index 7f915e7eb15..630885f8e81 100644 --- a/security/manager/ssl/src/nsNSSIOLayer.cpp +++ b/security/manager/ssl/src/nsNSSIOLayer.cpp @@ -132,11 +132,13 @@ nsNSSSocketInfo::nsNSSSocketInfo(SharedSSLState& aState, uint32_t providerFlags) mJoined(false), mSentClientCert(false), mNotedTimeUntilReady(false), + mFailedVerification(false), mKEAUsed(nsISSLSocketControl::KEY_EXCHANGE_UNKNOWN), mKEAExpected(nsISSLSocketControl::KEY_EXCHANGE_UNKNOWN), mKEAKeyBits(0), mSSLVersionUsed(nsISSLSocketControl::SSL_VERSION_UNKNOWN), mMACAlgorithmUsed(nsISSLSocketControl::SSL_MAC_UNKNOWN), + mBypassAuthentication(false), mProviderFlags(providerFlags), mSocketCreationTimestamp(TimeStamp::Now()), mPlaintextBytesRead(0), @@ -226,6 +228,52 @@ nsNSSSocketInfo::SetClientCert(nsIX509Cert* aClientCert) return NS_OK; } +NS_IMETHODIMP +nsNSSSocketInfo::GetBypassAuthentication(bool* arg) +{ + *arg = mBypassAuthentication; + return NS_OK; +} + +NS_IMETHODIMP +nsNSSSocketInfo::SetBypassAuthentication(bool arg) +{ + mBypassAuthentication = arg; + return NS_OK; +} + +NS_IMETHODIMP +nsNSSSocketInfo::GetFailedVerification(bool* arg) +{ + *arg = mFailedVerification; + return NS_OK; +} + +NS_IMETHODIMP +nsNSSSocketInfo::GetAuthenticationName(nsACString& aAuthenticationName) +{ + aAuthenticationName = GetHostName(); + return NS_OK; +} + +NS_IMETHODIMP +nsNSSSocketInfo::SetAuthenticationName(const nsACString& aAuthenticationName) +{ + return SetHostName(PromiseFlatCString(aAuthenticationName).get()); +} + +NS_IMETHODIMP +nsNSSSocketInfo::GetAuthenticationPort(int32_t* aAuthenticationPort) +{ + return GetPort(aAuthenticationPort); +} + +NS_IMETHODIMP +nsNSSSocketInfo::SetAuthenticationPort(int32_t aAuthenticationPort) +{ + return SetPort(aAuthenticationPort); +} + NS_IMETHODIMP nsNSSSocketInfo::GetRememberClientAuthCertificate(bool* aRemember) { @@ -378,21 +426,8 @@ nsNSSSocketInfo::GetNegotiatedNPN(nsACString& aNegotiatedNPN) } NS_IMETHODIMP -nsNSSSocketInfo::JoinConnection(const nsACString& npnProtocol, - const nsACString& hostname, - int32_t port, - bool* _retval) +nsNSSSocketInfo::IsAcceptableForHost(const nsACString& hostname, bool* _retval) { - *_retval = false; - - // Different ports may not be joined together - if (port != GetPort()) - return NS_OK; - - // Make sure NPN has been completed and matches requested npnProtocol - if (!mNPNCompleted || !mNegotiatedNPN.Equals(npnProtocol)) - return NS_OK; - // If this is the same hostname then the certicate status does not // need to be considered. They are joinable. if (hostname.Equals(GetHostName())) { @@ -462,12 +497,36 @@ nsNSSSocketInfo::JoinConnection(const nsACString& npnProtocol, return NS_OK; } - // All tests pass - this is joinable - mJoined = true; + // All tests pass *_retval = true; return NS_OK; } +NS_IMETHODIMP +nsNSSSocketInfo::JoinConnection(const nsACString& npnProtocol, + const nsACString& hostname, + int32_t port, + bool* _retval) +{ + *_retval = false; + + // Different ports may not be joined together + if (port != GetPort()) + return NS_OK; + + // Make sure NPN has been completed and matches requested npnProtocol + if (!mNPNCompleted || !mNegotiatedNPN.Equals(npnProtocol)) + return NS_OK; + + IsAcceptableForHost(hostname, _retval); + + if (*_retval) { + // All tests pass - this is joinable + mJoined = true; + } + return NS_OK; +} + bool nsNSSSocketInfo::GetForSTARTTLS() { @@ -632,6 +691,7 @@ nsNSSSocketInfo::SetCertVerificationResult(PRErrorCode errorCode, } if (errorCode) { + mFailedVerification = true; SetCanceled(errorCode, errorMessageType); } diff --git a/security/manager/ssl/src/nsNSSIOLayer.h b/security/manager/ssl/src/nsNSSIOLayer.h index 1d3d735ded2..6624881a8fe 100644 --- a/security/manager/ssl/src/nsNSSIOLayer.h +++ b/security/manager/ssl/src/nsNSSIOLayer.h @@ -113,6 +113,22 @@ public: void SetMACAlgorithmUsed(int16_t mac) { mMACAlgorithmUsed = mac; } + inline bool GetBypassAuthentication() + { + bool result = false; + mozilla::DebugOnly rv = GetBypassAuthentication(&result); + MOZ_ASSERT(NS_SUCCEEDED(rv)); + return result; + } + + inline int32_t GetAuthenticationPort() + { + int32_t result = -1; + mozilla::DebugOnly rv = GetAuthenticationPort(&result); + MOZ_ASSERT(NS_SUCCEEDED(rv)); + return result; + } + protected: virtual ~nsNSSSocketInfo(); @@ -139,6 +155,7 @@ private: bool mJoined; bool mSentClientCert; bool mNotedTimeUntilReady; + bool mFailedVerification; // mKEA* are used in false start and http/2 detetermination // Values are from nsISSLSocketControl @@ -147,6 +164,7 @@ private: uint32_t mKEAKeyBits; int16_t mSSLVersionUsed; int16_t mMACAlgorithmUsed; + bool mBypassAuthentication; uint32_t mProviderFlags; mozilla::TimeStamp mSocketCreationTimestamp; From e6ca8478f36aa7b17740cba0ffede05a8063ac9f Mon Sep 17 00:00:00 2001 From: Patrick McManus Date: Thu, 21 Aug 2014 10:50:17 -0400 Subject: [PATCH 017/146] bug 1003448 - HTTP/2 Alternate Service and Opportunistic Security [2/2 necko] r=hurley --- modules/libpref/init/all.js | 10 + netwerk/base/public/nsISpeculativeConnect.idl | 7 +- netwerk/base/src/Predictor.cpp | 7 + netwerk/protocol/http/AlternateServices.cpp | 452 ++++++++++++++++++ netwerk/protocol/http/AlternateServices.h | 128 +++++ netwerk/protocol/http/Http2Session.cpp | 221 ++++++++- netwerk/protocol/http/Http2Session.h | 24 +- netwerk/protocol/http/NullHttpTransaction.cpp | 4 +- netwerk/protocol/http/NullHttpTransaction.h | 8 +- netwerk/protocol/http/SpdySession3.cpp | 30 +- netwerk/protocol/http/SpdySession31.cpp | 31 +- netwerk/protocol/http/moz.build | 1 + netwerk/protocol/http/nsAHttpConnection.h | 9 + netwerk/protocol/http/nsAHttpTransaction.h | 2 +- netwerk/protocol/http/nsHttp.cpp | 122 +++++ netwerk/protocol/http/nsHttp.h | 51 ++ netwerk/protocol/http/nsHttpAtomList.h | 2 + netwerk/protocol/http/nsHttpChannel.cpp | 211 +++++++- netwerk/protocol/http/nsHttpChannel.h | 3 + netwerk/protocol/http/nsHttpConnection.cpp | 69 ++- netwerk/protocol/http/nsHttpConnection.h | 2 + .../protocol/http/nsHttpConnectionInfo.cpp | 98 +++- netwerk/protocol/http/nsHttpConnectionInfo.h | 40 +- netwerk/protocol/http/nsHttpConnectionMgr.cpp | 65 ++- netwerk/protocol/http/nsHttpConnectionMgr.h | 13 +- netwerk/protocol/http/nsHttpHandler.cpp | 34 +- netwerk/protocol/http/nsHttpHandler.h | 21 + netwerk/protocol/http/nsHttpRequestHead.cpp | 12 + netwerk/protocol/http/nsHttpRequestHead.h | 4 + netwerk/protocol/http/nsHttpTransaction.cpp | 28 +- netwerk/protocol/http/nsHttpTransaction.h | 1 + netwerk/test/unit/test_http2.js | 55 +++ toolkit/components/telemetry/Histograms.json | 10 + 33 files changed, 1647 insertions(+), 128 deletions(-) create mode 100644 netwerk/protocol/http/AlternateServices.cpp create mode 100644 netwerk/protocol/http/AlternateServices.h diff --git a/modules/libpref/init/all.js b/modules/libpref/init/all.js index 1d181afe001..1688fd90d75 100644 --- a/modules/libpref/init/all.js +++ b/modules/libpref/init/all.js @@ -1246,6 +1246,16 @@ pref("network.http.spdy.send-buffer-size", 131072); pref("network.http.spdy.allow-push", true); pref("network.http.spdy.push-allowance", 131072); +// alt-svc allows separation of transport routing from +// the origin host without using a proxy. +#ifdef RELEASE_BUILD +pref("network.http.altsvc.enabled", false); +pref("network.http.altsvc.oe", false); +#else +pref("network.http.altsvc.enabled", true); +pref("network.http.altsvc.oe", true); +#endif + pref("network.http.diagnostics", false); pref("network.http.pacing.requests.enabled", true); diff --git a/netwerk/base/public/nsISpeculativeConnect.idl b/netwerk/base/public/nsISpeculativeConnect.idl index aabe54278db..4ef559d6337 100644 --- a/netwerk/base/public/nsISpeculativeConnect.idl +++ b/netwerk/base/public/nsISpeculativeConnect.idl @@ -35,7 +35,7 @@ interface nsISpeculativeConnect : nsISupports * inline) to determine whether or not to actually make a speculative * connection. */ -[builtinclass, uuid(a9cdd875-2ef8-4d53-95d6-e4e18f65e0db)] +[builtinclass, uuid(f6a0d1e5-369f-4abc-81ae-d370d36e4006)] interface nsISpeculativeConnectionOverrider : nsISupports { /** @@ -63,4 +63,9 @@ interface nsISpeculativeConnectionOverrider : nsISupports * usage. */ [infallible] readonly attribute boolean isFromPredictor; + + /** + * by default speculative connections are not made to RFC 1918 addresses + */ + [infallible] readonly attribute boolean allow1918; }; diff --git a/netwerk/base/src/Predictor.cpp b/netwerk/base/src/Predictor.cpp index 08904e7ad0a..e827bd5e875 100644 --- a/netwerk/base/src/Predictor.cpp +++ b/netwerk/base/src/Predictor.cpp @@ -395,6 +395,13 @@ Predictor::GetIsFromPredictor(bool *isFromPredictor) return NS_OK; } +NS_IMETHODIMP +Predictor::GetAllow1918(bool *allow1918) +{ + *allow1918 = false; + return NS_OK; +} + // Predictor::nsIInterfaceRequestor NS_IMETHODIMP diff --git a/netwerk/protocol/http/AlternateServices.cpp b/netwerk/protocol/http/AlternateServices.cpp new file mode 100644 index 00000000000..542568e9145 --- /dev/null +++ b/netwerk/protocol/http/AlternateServices.cpp @@ -0,0 +1,452 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set sw=2 ts=8 et tw=80 : */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "HttpLog.h" + +#include "AlternateServices.h" +#include "nsHttpConnectionInfo.h" +#include "nsHttpHandler.h" +#include "nsThreadUtils.h" +#include "NullHttpTransaction.h" +#include "nsISSLStatusProvider.h" +#include "nsISSLStatus.h" +#include "nsISSLSocketControl.h" + +namespace mozilla { +namespace net { + +AltSvcMapping::AltSvcMapping(const nsACString &originScheme, + const nsACString &originHost, + int32_t originPort, + const nsACString &username, + bool privateBrowsing, + uint32_t expiresAt, + const nsACString &alternateHost, + int32_t alternatePort, + const nsACString &npnToken) + : mAlternateHost(alternateHost) + , mAlternatePort(alternatePort) + , mOriginHost(originHost) + , mOriginPort(originPort) + , mUsername(username) + , mPrivate(privateBrowsing) + , mExpiresAt(expiresAt) + , mValidated(false) + , mRunning(false) + , mNPNToken(npnToken) +{ + mHttps = originScheme.Equals("https"); + + if (mAlternatePort == -1) { + mAlternatePort = mHttps ? NS_HTTPS_DEFAULT_PORT : NS_HTTP_DEFAULT_PORT; + } + if (mOriginPort == -1) { + mOriginPort = mHttps ? NS_HTTPS_DEFAULT_PORT : NS_HTTP_DEFAULT_PORT; + } + + LOG(("AltSvcMapping ctor %p %s://%s:%d to %s:%d\n", this, + nsCString(originScheme).get(), mOriginHost.get(), mOriginPort, + mAlternateHost.get(), mAlternatePort)); + + if (mAlternateHost.IsEmpty()) { + mAlternateHost = mOriginHost; + } + MakeHashKey(mHashKey, originScheme, mOriginHost, mOriginPort, mPrivate); +} + +void +AltSvcMapping::MakeHashKey(nsCString &outKey, + const nsACString &originScheme, + const nsACString &originHost, + int32_t originPort, + bool privateBrowsing) +{ + if (originPort == -1) { + bool isHttps = originScheme.Equals("https"); + originPort = isHttps ? NS_HTTPS_DEFAULT_PORT : NS_HTTP_DEFAULT_PORT; + } + + outKey.Append(originScheme); + outKey.Append(':'); + outKey.Append(originHost); + outKey.Append(':'); + outKey.AppendInt(originPort); + outKey.Append(':'); + outKey.Append(privateBrowsing ? 'P' : '.'); +} + +int32_t +AltSvcMapping::TTL() +{ + return mExpiresAt - NowInSeconds(); +} + +void +AltSvcMapping::SetExpired() +{ + mExpiresAt = NowInSeconds() - 1; +} + +bool +AltSvcMapping::RouteEquals(AltSvcMapping *map) +{ + MOZ_ASSERT(map->mHashKey.Equals(mHashKey)); + return mAlternateHost.Equals(map->mAlternateHost) && + (mAlternatePort == map->mAlternatePort) && + mNPNToken.Equals(map->mNPNToken); + + return false; +} + +void +AltSvcMapping::GetConnectionInfo(nsHttpConnectionInfo **outCI, + nsProxyInfo *pi) +{ + nsRefPtr ci = + new nsHttpConnectionInfo(mAlternateHost, mAlternatePort, mNPNToken, + mUsername, pi, mOriginHost, mOriginPort); + if (!mHttps) { + ci->SetRelaxed(true); + } + ci->SetPrivate(mPrivate); + ci.forget(outCI); +} + +// This is the asynchronous null transaction used to validate +// an alt-svc advertisement +class AltSvcTransaction MOZ_FINAL : public NullHttpTransaction +{ +public: + AltSvcTransaction(AltSvcMapping *map, + nsHttpConnectionInfo *ci, + nsIInterfaceRequestor *callbacks, + uint32_t caps) + : NullHttpTransaction(ci, callbacks, caps) + , mMapping(map) + , mRunning(false) + , mTriedToValidate(false) + , mTriedToWrite(false) + { + MOZ_ASSERT(mMapping); + LOG(("AltSvcTransaction ctor %p map %p [%s -> %s]", + this, map, map->OriginHost().get(), map->AlternateHost().get())); + } + + ~AltSvcTransaction() + { + LOG(("AltSvcTransaction dtor %p map %p running %d", + this, mMapping.get(), mRunning)); + + if (mRunning) { + MOZ_ASSERT(mMapping->IsRunning()); + MaybeValidate(NS_OK); + } + if (!mMapping->Validated()) { + // try again later + mMapping->SetExpiresAt(NowInSeconds() + 2); + } + LOG(("AltSvcTransaction dtor %p map %p validated %d [%s]", + this, mMapping.get(), mMapping->Validated(), + mMapping->HashKey().get())); + mMapping->SetRunning(false); + } + + void StartTransaction() + { + LOG(("AltSvcTransaction::StartTransaction() %p", this)); + + MOZ_ASSERT(!mRunning); + MOZ_ASSERT(!mMapping->IsRunning()); + mCaps &= ~NS_HTTP_ALLOW_KEEPALIVE; + mRunning = true; + mMapping->SetRunning(true); + } + + void MaybeValidate(nsresult reason) + { + if (mTriedToValidate) { + return; + } + mTriedToValidate = true; + + LOG(("AltSvcTransaction::MaybeValidate() %p reason=%x running=%d conn=%p write=%d", + this, reason, mRunning, mConnection.get(), mTriedToWrite)); + + if (mTriedToWrite && reason == NS_BASE_STREAM_CLOSED) { + // The normal course of events is to cause the transaction to fail with CLOSED + // on a write - so that's a success that means the HTTP/2 session is setup. + reason = NS_OK; + } + + if (NS_FAILED(reason) || !mRunning || !mConnection) { + LOG(("AltSvcTransaction::MaybeValidate %p Failed due to precondition", this)); + return; + } + + // insist on spdy/3* or >= http/2 + uint32_t version = mConnection->Version(); + LOG(("AltSvcTransaction::MaybeValidate() %p version %d\n", this, version)); + if ((version < HTTP_VERSION_2) && + (version != SPDY_VERSION_31) && (version != SPDY_VERSION_3)) { + LOG(("AltSvcTransaction::MaybeValidate %p Failed due to protocol version", this)); + return; + } + + nsCOMPtr secInfo; + mConnection->GetSecurityInfo(getter_AddRefs(secInfo)); + nsCOMPtr socketControl = do_QueryInterface(secInfo); + bool bypassAuth = false; + + if (!socketControl || + NS_FAILED(socketControl->GetBypassAuthentication(&bypassAuth))) { + bypassAuth = false; + } + + LOG(("AltSvcTransaction::MaybeValidate() %p socketControl=%p bypass=%d", + this, socketControl.get(), bypassAuth)); + + if (bypassAuth) { + LOG(("AltSvcTransaction::MaybeValidate() %p " + "validating alternate service because relaxed", this)); + mMapping->SetValidated(true); + return; + } + + if (socketControl->GetFailedVerification()) { + LOG(("AltSvcTransaction::MaybeValidate() %p " + "not validated due to auth error", this)); + return; + } + + LOG(("AltSvcTransaction::MaybeValidate() %p " + "validating alternate service with auth check", this)); + mMapping->SetValidated(true); + } + + void Close(nsresult reason) MOZ_OVERRIDE + { + LOG(("AltSvcTransaction::Close() %p reason=%x running %d", + this, reason, mRunning)); + + MaybeValidate(reason); + if (!mMapping->Validated() && mConnection) { + mConnection->DontReuse(); + } + NullHttpTransaction::Close(reason); + } + + nsresult ReadSegments(nsAHttpSegmentReader *reader, + uint32_t count, uint32_t *countRead) MOZ_OVERRIDE + { + LOG(("AltSvcTransaction::ReadSegements() %p\n")); + mTriedToWrite = true; + return NullHttpTransaction::ReadSegments(reader, count, countRead); + } + +private: + nsRefPtr mMapping; + uint32_t mRunning : 1; + uint32_t mTriedToValidate : 1; + uint32_t mTriedToWrite : 1; +}; + +void +AltSvcCache::UpdateAltServiceMapping(AltSvcMapping *map, nsProxyInfo *pi, + nsIInterfaceRequestor *aCallbacks, + uint32_t caps) +{ + MOZ_ASSERT(NS_IsMainThread()); + AltSvcMapping *existing = mHash.GetWeak(map->mHashKey); + LOG(("AltSvcCache::UpdateAltServiceMapping %p map %p existing %p %s", + this, map, existing, map->AlternateHost().get())); + + if (existing && (existing->TTL() <= 0)) { + LOG(("AltSvcCache::UpdateAltServiceMapping %p map %p is expired", + this, map)); + existing = nullptr; + mHash.Remove(map->mHashKey); + } + + if (existing && existing->mValidated) { + if (existing->RouteEquals(map)) { + // update expires + LOG(("AltSvcCache::UpdateAltServiceMapping %p map %p updates ttl of %p\n", + this, map, existing)); + existing->SetExpiresAt(map->GetExpiresAt()); + return; + } + + LOG(("AltSvcCache::UpdateAltServiceMapping %p map %p overwrites %p\n", + this, map, existing)); + existing = nullptr; + mHash.Remove(map->mHashKey); + } + + if (existing) { + LOG(("AltSvcCache::UpdateAltServiceMapping %p map %p ignored because %p " + "still in progress\n", this, map, existing)); + return; + } + + mHash.Put(map->mHashKey, map); + + nsRefPtr ci; + map->GetConnectionInfo(getter_AddRefs(ci), pi); + caps |= ci->GetAnonymous() ? NS_HTTP_LOAD_ANONYMOUS : 0; + + nsCOMPtr callbacks = new AltSvcOverride(aCallbacks); + + nsRefPtr nullTransaction = + new AltSvcTransaction(map, ci, aCallbacks, caps); + nullTransaction->StartTransaction(); + gHttpHandler->ConnMgr()->SpeculativeConnect(ci, callbacks, caps, nullTransaction); +} + +AltSvcMapping * +AltSvcCache::GetAltServiceMapping(const nsACString &scheme, const nsACString &host, + int32_t port, bool privateBrowsing) +{ + MOZ_ASSERT(NS_IsMainThread()); + if (!gHttpHandler->AllowAltSvc()) { + return nullptr; + } + if (!gHttpHandler->AllowAltSvcOE() && scheme.Equals(NS_LITERAL_CSTRING("http"))) { + return nullptr; + } + + nsAutoCString key; + AltSvcMapping::MakeHashKey(key, scheme, host, port, privateBrowsing); + AltSvcMapping *existing = mHash.GetWeak(key); + LOG(("AltSvcCache::GetAltServiceMapping %p key=%s " + "existing=%p validated=%d running=%d ttl=%d", + this, key.get(), existing, existing ? existing->mValidated : 0, + existing ? existing->mRunning : 0, + existing ? existing->TTL() : 0)); + if (existing && (existing->TTL() <= 0)) { + LOG(("AltSvcCache::GetAltServiceMapping %p map %p is expired", this, existing)); + mHash.Remove(existing->mHashKey); + existing = nullptr; + } + if (existing && existing->mValidated) + return existing; + return nullptr; +} + +class ProxyClearHostMapping : public nsRunnable { +public: + explicit ProxyClearHostMapping(const nsACString &host, int32_t port) + : mHost(host) + , mPort(port) + {} + + NS_IMETHOD Run() + { + MOZ_ASSERT(NS_IsMainThread()); + gHttpHandler->ConnMgr()->ClearHostMapping(mHost, mPort); + return NS_OK; + } +private: + nsCString mHost; + int32_t mPort; +}; + +void +AltSvcCache::ClearHostMapping(const nsACString &host, int32_t port) +{ + if (!NS_IsMainThread()) { + nsCOMPtr event = new ProxyClearHostMapping(host, port); + if (event) { + NS_DispatchToMainThread(event); + } + return; + } + + nsAutoCString key; + + AltSvcMapping::MakeHashKey(key, NS_LITERAL_CSTRING("http"), host, port, true); + AltSvcMapping *existing = mHash.GetWeak(key); + if (existing) { + existing->SetExpired(); + } + + AltSvcMapping::MakeHashKey(key, NS_LITERAL_CSTRING("https"), host, port, true); + existing = mHash.GetWeak(key); + if (existing) { + existing->SetExpired(); + } + + AltSvcMapping::MakeHashKey(key, NS_LITERAL_CSTRING("http"), host, port, false); + existing = mHash.GetWeak(key); + if (existing) { + existing->SetExpired(); + } + + AltSvcMapping::MakeHashKey(key, NS_LITERAL_CSTRING("https"), host, port, false); + existing = mHash.GetWeak(key); + if (existing) { + existing->SetExpired(); + } +} + +void +AltSvcCache::ClearAltServiceMappings() +{ + MOZ_ASSERT(NS_IsMainThread()); + mHash.Clear(); +} + +NS_IMETHODIMP +AltSvcOverride::GetInterface(const nsIID &iid, void **result) +{ + if (NS_SUCCEEDED(QueryInterface(iid, result)) && *result) { + return NS_OK; + } + return mCallbacks->GetInterface(iid, result); +} + +NS_IMETHODIMP +AltSvcOverride::GetIgnoreIdle(bool *ignoreIdle) +{ + *ignoreIdle = true; + return NS_OK; +} + +NS_IMETHODIMP +AltSvcOverride::GetIgnorePossibleSpdyConnections(bool *ignorePossibleSpdyConnections) +{ + *ignorePossibleSpdyConnections = true; + return NS_OK; +} + +NS_IMETHODIMP +AltSvcOverride::GetParallelSpeculativeConnectLimit( + uint32_t *parallelSpeculativeConnectLimit) +{ + *parallelSpeculativeConnectLimit = 32; + return NS_OK; +} + +NS_IMETHODIMP +AltSvcOverride::GetIsFromPredictor(bool *isFromPredictor) +{ + *isFromPredictor = false; + return NS_OK; +} + +NS_IMETHODIMP +AltSvcOverride::GetAllow1918(bool *allow) +{ + // normally we don't do speculative connects to 1918.. and we use + // speculative connects for the mapping validation, so override + // that default here for alt-svc + *allow = true; + return NS_OK; +} + +NS_IMPL_ISUPPORTS(AltSvcOverride, nsIInterfaceRequestor, nsISpeculativeConnectionOverrider) + +} // namespace mozilla::net +} // namespace mozilla diff --git a/netwerk/protocol/http/AlternateServices.h b/netwerk/protocol/http/AlternateServices.h new file mode 100644 index 00000000000..75d7bab6279 --- /dev/null +++ b/netwerk/protocol/http/AlternateServices.h @@ -0,0 +1,128 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set sw=2 ts=8 et tw=80 : */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* +Alt-Svc allows separation of transport routing from the origin host without +using a proxy. See https://httpwg.github.io/http-extensions/alt-svc.html + + Nice To Have Future Enhancements:: + * flush on network change event when we have an indicator + * use established https channel for http instead separate of conninfo hash + * pin via http-tls header + * clear based on origin when a random fail happens not just 421 + * upon establishment of channel, cancel and retry trans that have not yet written anything + * persistent storage (including private browsing filter) + * memory reporter for cache, but this is rather tiny +*/ + +#ifndef mozilla_net_AlternateServices_h +#define mozilla_net_AlternateServices_h + +#include "nsRefPtrHashtable.h" +#include "nsString.h" +#include "nsIInterfaceRequestor.h" +#include "nsISpeculativeConnect.h" + +class nsProxyInfo; + +namespace mozilla { namespace net { + +class nsHttpConnectionInfo; + +class AltSvcMapping +{ + NS_INLINE_DECL_THREADSAFE_REFCOUNTING(AltSvcMapping) + friend class AltSvcCache; + +public: + AltSvcMapping(const nsACString &originScheme, + const nsACString &originHost, + int32_t originPort, + const nsACString &username, + bool privateBrowsing, + uint32_t expiresAt, + const nsACString &alternateHost, + int32_t alternatePort, + const nsACString &npnToken); + + const nsCString &AlternateHost() const { return mAlternateHost; } + const nsCString &OriginHost() const { return mOriginHost; } + const nsCString &HashKey() const { return mHashKey; } + uint32_t AlternatePort() const { return mAlternatePort; } + bool Validated() { return mValidated; } + void SetValidated(bool val) { mValidated = val; } + bool IsRunning() { return mRunning; } + void SetRunning(bool val) { mRunning = val; } + int32_t GetExpiresAt() { return mExpiresAt; } + void SetExpiresAt(int32_t val) { mExpiresAt = val; } + void SetExpired(); + bool RouteEquals(AltSvcMapping *map); + + void GetConnectionInfo(nsHttpConnectionInfo **outCI, nsProxyInfo *pi); + int32_t TTL(); + +private: + virtual ~AltSvcMapping() {}; + static void MakeHashKey(nsCString &outKey, + const nsACString &originScheme, + const nsACString &originHost, + int32_t originPort, + bool privateBrowsing); + + nsCString mHashKey; + + nsCString mAlternateHost; + int32_t mAlternatePort; + + nsCString mOriginHost; + int32_t mOriginPort; + + nsCString mUsername; + bool mPrivate; + + uint32_t mExpiresAt; + + bool mValidated; + bool mRunning; + bool mHttps; + + nsCString mNPNToken; +}; + +class AltSvcOverride : public nsIInterfaceRequestor + , public nsISpeculativeConnectionOverrider +{ +public: + NS_DECL_THREADSAFE_ISUPPORTS + NS_DECL_NSISPECULATIVECONNECTIONOVERRIDER + NS_DECL_NSIINTERFACEREQUESTOR + + AltSvcOverride(nsIInterfaceRequestor *aRequestor) + : mCallbacks(aRequestor) {} + +private: + virtual ~AltSvcOverride() {} + nsCOMPtr mCallbacks; +}; + +class AltSvcCache +{ +public: + void UpdateAltServiceMapping(AltSvcMapping *map, nsProxyInfo *pi, + nsIInterfaceRequestor *, uint32_t caps); // main thread + AltSvcMapping *GetAltServiceMapping(const nsACString &scheme, + const nsACString &host, + int32_t port, bool pb); + void ClearAltServiceMappings(); + void ClearHostMapping(const nsACString &host, int32_t port); + +private: + nsRefPtrHashtable mHash; +}; + +}} // namespace mozilla::net + +#endif // include guard diff --git a/netwerk/protocol/http/Http2Session.cpp b/netwerk/protocol/http/Http2Session.cpp index 547a5cd5780..8c8fe1a12c4 100644 --- a/netwerk/protocol/http/Http2Session.cpp +++ b/netwerk/protocol/http/Http2Session.cpp @@ -258,7 +258,8 @@ static Http2ControlFx sControlFunctions[] = { Http2Session::RecvPing, Http2Session::RecvGoAway, Http2Session::RecvWindowUpdate, - Http2Session::RecvContinuation + Http2Session::RecvContinuation, + Http2Session::RecvAltSvc // extension for type 0x0A }; bool @@ -442,7 +443,8 @@ Http2Session::AddStream(nsAHttpTransaction *aHttpTransaction, mQueuedStreams.Push(stream); } - if (!(aHttpTransaction->Caps() & NS_HTTP_ALLOW_KEEPALIVE)) { + if (!(aHttpTransaction->Caps() & NS_HTTP_ALLOW_KEEPALIVE) && + !aHttpTransaction->IsNullTransaction()) { LOG3(("Http2Session::AddStream %p transaction %p forces keep-alive off.\n", this, aHttpTransaction)); DontReuse(); @@ -1858,6 +1860,208 @@ Http2Session::RecvContinuation(Http2Session *self) return RecvPushPromise(self); } +class UpdateAltSvcEvent : public nsRunnable +{ +public: + UpdateAltSvcEvent(const nsCString &host, const uint16_t port, + const nsCString &npnToken, const uint32_t expires, + const nsCString &aOrigin, + nsHttpConnectionInfo *aCI, + nsIInterfaceRequestor *callbacks) + : mHost(host) + , mPort(port) + , mNPNToken(npnToken) + , mExpires(expires) + , mOrigin(aOrigin) + , mCI(aCI) + , mCallbacks(callbacks) + { + } + + NS_IMETHOD Run() MOZ_OVERRIDE + { + MOZ_ASSERT(NS_IsMainThread()); + + nsCString originScheme; + nsCString originHost; + int32_t originPort = -1; + + nsCOMPtr uri; + if (NS_FAILED(NS_NewURI(getter_AddRefs(uri), mOrigin))) { + LOG(("UpdateAltSvcEvent origin does not parse %s\n", + mOrigin.get())); + return NS_OK; + } + uri->GetScheme(originScheme); + uri->GetHost(originHost); + uri->GetPort(&originPort); + + const char *username = mCI->Username(); + const bool privateBrowsing = mCI->GetPrivate(); + + LOG(("UpdateAltSvcEvent location=%s:%u protocol=%s expires=%u " + "origin=%s://%s:%u user=%s private=%d", mHost.get(), mPort, + mNPNToken.get(), mExpires, originScheme.get(), originHost.get(), + originPort, username, privateBrowsing)); + nsRefPtr mapping = new AltSvcMapping( + nsDependentCString(originScheme.get()), + nsDependentCString(originHost.get()), + originPort, nsDependentCString(username), privateBrowsing, mExpires, + mHost, mPort, mNPNToken); + + nsProxyInfo *proxyInfo = mCI->ProxyInfo(); + gHttpHandler->UpdateAltServiceMapping(mapping, proxyInfo, mCallbacks, 0); + return NS_OK; + } + +private: + nsCString mHost; + uint16_t mPort; + nsCString mNPNToken; + uint32_t mExpires; + nsCString mOrigin; + nsRefPtr mCI; + nsCOMPtr mCallbacks; +}; + +// defined as an http2 extension - alt-svc +// defines receipt of frame type 0x0A.. See AlternateSevices.h +nsresult +Http2Session::RecvAltSvc(Http2Session *self) +{ + MOZ_ASSERT(self->mInputFrameType == FRAME_TYPE_ALTSVC); + LOG3(("Http2Session::RecvAltSvc %p Flags 0x%X id 0x%X\n", self, + self->mInputFrameFlags, self->mInputFrameID)); + + if (self->mInputFrameDataSize < 8) { + LOG3(("Http2Session::RecvAltSvc %p frame too small", self)); + RETURN_SESSION_ERROR(self, FRAME_SIZE_ERROR); + } + + uint32_t maxAge = + PR_ntohl(*reinterpret_cast(self->mInputFrameBuffer.get() + kFrameHeaderBytes)); + uint16_t portRoute = + PR_ntohs(*reinterpret_cast(self->mInputFrameBuffer.get() + kFrameHeaderBytes + 4)); + uint8_t protoLen = self->mInputFrameBuffer.get()[kFrameHeaderBytes + 6]; + LOG3(("Http2Session::RecvAltSvc %p maxAge=%d port=%d protoLen=%d", self, + maxAge, portRoute, protoLen)); + + if (self->mInputFrameDataSize < (8U + protoLen)) { + LOG3(("Http2Session::RecvAltSvc %p frame too small for protocol", self)); + RETURN_SESSION_ERROR(self, FRAME_SIZE_ERROR); + } + nsAutoCString protocol; + protocol.Assign(self->mInputFrameBuffer.get() + kFrameHeaderBytes + 7, protoLen); + + uint32_t spdyIndex; + SpdyInformation *spdyInfo = gHttpHandler->SpdyInfo(); + if (!(NS_SUCCEEDED(spdyInfo->GetNPNIndex(protocol, &spdyIndex)) && + spdyInfo->ProtocolEnabled(spdyIndex))) { + LOG3(("Http2Session::RecvAltSvc %p unknown protocol %s, ignoring", self, + protocol.BeginReading())); + self->ResetDownstreamState(); + return NS_OK; + } + + uint8_t hostLen = self->mInputFrameBuffer.get()[kFrameHeaderBytes + 7 + protoLen]; + if (self->mInputFrameDataSize < (8U + protoLen + hostLen)) { + LOG3(("Http2Session::RecvAltSvc %p frame too small for host", self)); + RETURN_SESSION_ERROR(self, FRAME_SIZE_ERROR); + } + + nsRefPtr ci(self->ConnectionInfo()); + if (!self->mConnection || !ci) { + LOG3(("Http2Session::RecvAltSvc %p no connection or conninfo for %d", self, + self->mInputFrameID)); + self->ResetDownstreamState(); + return NS_OK; + } + + nsAutoCString hostRoute; + hostRoute.Assign(self->mInputFrameBuffer.get() + kFrameHeaderBytes + 8 + protoLen, hostLen); + + uint32_t originLen = self->mInputFrameDataSize - 8 - protoLen - hostLen; + nsAutoCString specifiedOrigin; + if (originLen) { + if (self->mInputFrameID) { + LOG3(("Http2Session::RecvAltSvc %p got frame w/origin on non zero stream", self)); + self->ResetDownstreamState(); + return NS_OK; + } + specifiedOrigin.Assign( + self->mInputFrameBuffer.get() + kFrameHeaderBytes + 8 + protoLen + hostLen, + originLen); + + bool okToReroute = true; + nsCOMPtr securityInfo; + self->mConnection->GetSecurityInfo(getter_AddRefs(securityInfo)); + nsCOMPtr ssl = do_QueryInterface(securityInfo); + if (!ssl) { + okToReroute = false; + } + + // a little off main thread origin parser. This is a non critical function because + // any alternate route created has to be verified anyhow + nsAutoCString specifiedOriginHost; + if (specifiedOrigin.EqualsIgnoreCase("https://", 8)) { + specifiedOriginHost.Assign(specifiedOrigin.get() + 8, + specifiedOrigin.Length() - 8); + if (ci->GetRelaxed()) { + // technically this is ok because it will still be confirmed before being used + // but let's not support it. + okToReroute = false; + } + } else if (specifiedOrigin.EqualsIgnoreCase("http://", 7)) { + specifiedOriginHost.Assign(specifiedOrigin.get() + 7, + specifiedOrigin.Length() - 7); + } + + int32_t colonOffset = specifiedOriginHost.FindCharInSet(":", 0); + if (colonOffset != kNotFound) { + specifiedOriginHost.Truncate(colonOffset); + } + + if (okToReroute) { + ssl->IsAcceptableForHost(specifiedOriginHost, &okToReroute); + } + if (!okToReroute) { + LOG3(("Http2Session::RecvAltSvc %p can't reroute non-authoritative origin %s", + self, specifiedOrigin.BeginReading())); + self->ResetDownstreamState(); + return NS_OK; + } + } else { + // no origin specified in frame. We need to have an active pull stream to match + // this up to as if it were a response header. + if (!(self->mInputFrameID & 0x1) || + NS_FAILED(self->SetInputFrameDataStream(self->mInputFrameID)) || + !self->mInputFrameDataStream->Transaction() || + !self->mInputFrameDataStream->Transaction()->RequestHead()) { + LOG3(("Http2Session::RecvAltSvc %p got frame w/o origin on invalid stream", self)); + self->ResetDownstreamState(); + return NS_OK; + } + + specifiedOrigin.Assign( + self->mInputFrameDataStream->Transaction()->RequestHead()->Origin()); + } + + nsCOMPtr callbacks; + self->mConnection->GetSecurityInfo(getter_AddRefs(callbacks)); + nsCOMPtr irCallbacks = do_QueryInterface(callbacks); + + nsRefPtr event = new UpdateAltSvcEvent( + hostRoute, portRoute, protocol, NowInSeconds() + maxAge, + specifiedOrigin, ci, irCallbacks); + NS_DispatchToMainThread(event); + + LOG3(("Http2Session::RecvAltSvc %p processed location=%s:%u protocol=%s " + "maxAge=%u origin=%s", self, hostRoute.get(), portRoute, + protocol.get(), maxAge, specifiedOrigin.get())); + self->ResetDownstreamState(); + return NS_OK; +} + //----------------------------------------------------------------------------- // nsAHttpTransaction. It is expected that nsHttpConnection is the caller // of these methods @@ -1983,10 +2187,17 @@ Http2Session::ReadSegments(nsAHttpSegmentReader *reader, } if (NS_FAILED(rv)) { - LOG3(("Http2Session::ReadSegments %p returning FAIL code %X", + LOG3(("Http2Session::ReadSegments %p may return FAIL code %X", this, rv)); - if (rv != NS_BASE_STREAM_WOULD_BLOCK) - CleanupStream(stream, rv, CANCEL_ERROR); + if (rv == NS_BASE_STREAM_WOULD_BLOCK) { + return rv; + } + + CleanupStream(stream, rv, CANCEL_ERROR); + if (SoftStreamError(rv)) { + LOG3(("Http2Session::ReadSegments %p soft error override\n", this)); + rv = NS_OK; + } return rv; } diff --git a/netwerk/protocol/http/Http2Session.h b/netwerk/protocol/http/Http2Session.h index e59d3828547..1c1d0bcd9da 100644 --- a/netwerk/protocol/http/Http2Session.h +++ b/netwerk/protocol/http/Http2Session.h @@ -75,17 +75,18 @@ public: */ enum frameType { - FRAME_TYPE_DATA = 0, - FRAME_TYPE_HEADERS = 1, - FRAME_TYPE_PRIORITY = 2, - FRAME_TYPE_RST_STREAM = 3, - FRAME_TYPE_SETTINGS = 4, - FRAME_TYPE_PUSH_PROMISE = 5, - FRAME_TYPE_PING = 6, - FRAME_TYPE_GOAWAY = 7, - FRAME_TYPE_WINDOW_UPDATE = 8, - FRAME_TYPE_CONTINUATION = 9, - FRAME_TYPE_LAST = 10 + FRAME_TYPE_DATA = 0x0, + FRAME_TYPE_HEADERS = 0x1, + FRAME_TYPE_PRIORITY = 0x2, + FRAME_TYPE_RST_STREAM = 0x3, + FRAME_TYPE_SETTINGS = 0x4, + FRAME_TYPE_PUSH_PROMISE = 0x5, + FRAME_TYPE_PING = 0x6, + FRAME_TYPE_GOAWAY = 0x7, + FRAME_TYPE_WINDOW_UPDATE = 0x8, + FRAME_TYPE_CONTINUATION = 0x9, + FRAME_TYPE_ALTSVC = 0xA, + FRAME_TYPE_LAST = 0xB }; // NO_ERROR is a macro defined on windows, so we'll name the HTTP2 goaway @@ -168,6 +169,7 @@ public: static nsresult RecvGoAway(Http2Session *); static nsresult RecvWindowUpdate(Http2Session *); static nsresult RecvContinuation(Http2Session *); + static nsresult RecvAltSvc(Http2Session *); char *EnsureOutputBuffer(uint32_t needed); diff --git a/netwerk/protocol/http/NullHttpTransaction.cpp b/netwerk/protocol/http/NullHttpTransaction.cpp index 58666533085..edc1c5bee2f 100644 --- a/netwerk/protocol/http/NullHttpTransaction.cpp +++ b/netwerk/protocol/http/NullHttpTransaction.cpp @@ -23,10 +23,10 @@ NullHttpTransaction::NullHttpTransaction(nsHttpConnectionInfo *ci, : mStatus(NS_OK) , mCaps(caps | NS_HTTP_ALLOW_KEEPALIVE) , mCapsToClear(0) - , mCallbacks(callbacks) - , mConnectionInfo(ci) , mRequestHead(nullptr) , mIsDone(false) + , mCallbacks(callbacks) + , mConnectionInfo(ci) { } diff --git a/netwerk/protocol/http/NullHttpTransaction.h b/netwerk/protocol/http/NullHttpTransaction.h index 45c1f6c7ef7..d32ebf622f6 100644 --- a/netwerk/protocol/http/NullHttpTransaction.h +++ b/netwerk/protocol/http/NullHttpTransaction.h @@ -49,18 +49,22 @@ protected: private: nsresult mStatus; +protected: uint32_t mCaps; +private: // mCapsToClear holds flags that should be cleared in mCaps, e.g. unset // NS_HTTP_REFRESH_DNS when DNS refresh request has completed to avoid // redundant requests on the network. To deal with raciness, only unsetting // bitfields should be allowed: 'lost races' will thus err on the // conservative side, e.g. by going ahead with a 2nd DNS refresh. uint32_t mCapsToClear; + nsHttpRequestHead *mRequestHead; + bool mIsDone; + +protected: nsRefPtr mConnection; nsCOMPtr mCallbacks; nsRefPtr mConnectionInfo; - nsHttpRequestHead *mRequestHead; - bool mIsDone; }; NS_DEFINE_STATIC_IID_ACCESSOR(NullHttpTransaction, NS_NULLHTTPTRANSACTION_IID) diff --git a/netwerk/protocol/http/SpdySession3.cpp b/netwerk/protocol/http/SpdySession3.cpp index 93a25a625b5..b556cac6610 100644 --- a/netwerk/protocol/http/SpdySession3.cpp +++ b/netwerk/protocol/http/SpdySession3.cpp @@ -386,7 +386,8 @@ SpdySession3::AddStream(nsAHttpTransaction *aHttpTransaction, mQueuedStreams.Push(stream); } - if (!(aHttpTransaction->Caps() & NS_HTTP_ALLOW_KEEPALIVE)) { + if (!(aHttpTransaction->Caps() & NS_HTTP_ALLOW_KEEPALIVE) && + !aHttpTransaction->IsNullTransaction()) { LOG3(("SpdySession3::AddStream %p transaction %p forces keep-alive off.\n", this, aHttpTransaction)); DontReuse(); @@ -401,12 +402,14 @@ SpdySession3::ActivateStream(SpdyStream3 *stream) MOZ_ASSERT(!stream->StreamID() || (stream->StreamID() & 1), "Do not activate pushed streams"); - ++mConcurrent; - if (mConcurrent > mConcurrentHighWater) - mConcurrentHighWater = mConcurrent; - LOG3(("SpdySession3::AddStream %p activating stream %p Currently %d " - "streams in session, high water mark is %d", - this, stream, mConcurrent, mConcurrentHighWater)); + if (!(stream->Transaction() && stream->Transaction()->IsNullTransaction())) { + ++mConcurrent; + if (mConcurrent > mConcurrentHighWater) + mConcurrentHighWater = mConcurrent; + LOG3(("SpdySession3::AddStream %p activating stream %p Currently %d " + "streams in session, high water mark is %d", + this, stream, mConcurrent, mConcurrentHighWater)); + } mReadyForWrite.Push(stream); SetWriteCallbacks(); @@ -1761,10 +1764,17 @@ SpdySession3::ReadSegments(nsAHttpSegmentReader *reader, } if (NS_FAILED(rv)) { - LOG3(("SpdySession3::ReadSegments %p returning FAIL code %X", + LOG3(("SpdySession3::ReadSegments %p may return FAIL code %X", this, rv)); - if (rv != NS_BASE_STREAM_WOULD_BLOCK) - CleanupStream(stream, rv, RST_CANCEL); + if (rv == NS_BASE_STREAM_WOULD_BLOCK) { + return rv; + } + + CleanupStream(stream, rv, RST_CANCEL); + if (SoftStreamError(rv)) { + LOG3(("SpdySession3::ReadSegments %p soft error override\n", this)); + rv = NS_OK; + } return rv; } diff --git a/netwerk/protocol/http/SpdySession31.cpp b/netwerk/protocol/http/SpdySession31.cpp index d570d56f671..fbe4a77d469 100644 --- a/netwerk/protocol/http/SpdySession31.cpp +++ b/netwerk/protocol/http/SpdySession31.cpp @@ -389,7 +389,8 @@ SpdySession31::AddStream(nsAHttpTransaction *aHttpTransaction, mQueuedStreams.Push(stream); } - if (!(aHttpTransaction->Caps() & NS_HTTP_ALLOW_KEEPALIVE)) { + if (!(aHttpTransaction->Caps() & NS_HTTP_ALLOW_KEEPALIVE) && + !aHttpTransaction->IsNullTransaction()) { LOG3(("SpdySession31::AddStream %p transaction %p forces keep-alive off.\n", this, aHttpTransaction)); DontReuse(); @@ -405,12 +406,15 @@ SpdySession31::ActivateStream(SpdyStream31 *stream) MOZ_ASSERT(!stream->StreamID() || (stream->StreamID() & 1), "Do not activate pushed streams"); - ++mConcurrent; - if (mConcurrent > mConcurrentHighWater) - mConcurrentHighWater = mConcurrent; - LOG3(("SpdySession31::AddStream %p activating stream %p Currently %d " - "streams in session, high water mark is %d", - this, stream, mConcurrent, mConcurrentHighWater)); + if (!(stream->Transaction() && stream->Transaction()->IsNullTransaction())) { + ++mConcurrent; + if (mConcurrent > mConcurrentHighWater) { + mConcurrentHighWater = mConcurrent; + } + LOG3(("SpdySession31::AddStream %p activating stream %p Currently %d " + "streams in session, high water mark is %d", + this, stream, mConcurrent, mConcurrentHighWater)); + } mReadyForWrite.Push(stream); SetWriteCallbacks(); @@ -1827,10 +1831,17 @@ SpdySession31::ReadSegments(nsAHttpSegmentReader *reader, } if (NS_FAILED(rv)) { - LOG3(("SpdySession31::ReadSegments %p returning FAIL code %X", + LOG3(("SpdySession31::ReadSegments %p may return FAIL code %X", this, rv)); - if (rv != NS_BASE_STREAM_WOULD_BLOCK) - CleanupStream(stream, rv, RST_CANCEL); + if (rv == NS_BASE_STREAM_WOULD_BLOCK) { + return rv; + } + + CleanupStream(stream, rv, RST_CANCEL); + if (SoftStreamError(rv)) { + LOG3(("SpdySession31::ReadSegments %p soft error override\n", this)); + rv = NS_OK; + } return rv; } diff --git a/netwerk/protocol/http/moz.build b/netwerk/protocol/http/moz.build index 64f90f36f3d..b84b8eb4d9c 100644 --- a/netwerk/protocol/http/moz.build +++ b/netwerk/protocol/http/moz.build @@ -41,6 +41,7 @@ EXPORTS.mozilla.net += [ # The rest of these files cannot be built in unified mode because they want to # force NSPR logging. SOURCES += [ + 'AlternateServices.cpp', 'ASpdySession.cpp', 'ConnectionDiagnostics.cpp', 'Http2Compression.cpp', diff --git a/netwerk/protocol/http/nsAHttpConnection.h b/netwerk/protocol/http/nsAHttpConnection.h index 7060f004442..994be14e281 100644 --- a/netwerk/protocol/http/nsAHttpConnection.h +++ b/netwerk/protocol/http/nsAHttpConnection.h @@ -140,6 +140,9 @@ public: // Update the callbacks used to provide security info. May be called on // any thread. virtual void SetSecurityCallbacks(nsIInterfaceRequestor* aCallbacks) = 0; + + // nsHttp.h version + virtual uint32_t Version() = 0; }; NS_DEFINE_STATIC_IID_ACCESSOR(nsAHttpConnection, NS_AHTTPCONNECTION_IID) @@ -207,6 +210,12 @@ NS_DEFINE_STATIC_IID_ACCESSOR(nsAHttpConnection, NS_AHTTPCONNECTION_IID) return nullptr; \ return (fwdObject)->Transport(); \ } \ + uint32_t Version() \ + { \ + return (fwdObject) ? \ + (fwdObject)->Version() : \ + NS_HTTP_VERSION_UNKNOWN; \ + } \ bool IsProxyConnectInProgress() \ { \ return (fwdObject)->IsProxyConnectInProgress(); \ diff --git a/netwerk/protocol/http/nsAHttpTransaction.h b/netwerk/protocol/http/nsAHttpTransaction.h index 6aed565e5a5..547e7942c55 100644 --- a/netwerk/protocol/http/nsAHttpTransaction.h +++ b/netwerk/protocol/http/nsAHttpTransaction.h @@ -203,7 +203,7 @@ NS_DEFINE_STATIC_IID_ACCESSOR(nsAHttpTransaction, NS_AHTTPTRANSACTION_IID) uint64_t Available(); \ virtual nsresult ReadSegments(nsAHttpSegmentReader *, uint32_t, uint32_t *); \ virtual nsresult WriteSegments(nsAHttpSegmentWriter *, uint32_t, uint32_t *); \ - void Close(nsresult reason); \ + virtual void Close(nsresult reason); \ nsHttpConnectionInfo *ConnectionInfo(); \ void SetProxyConnectFailed(); \ virtual nsHttpRequestHead *RequestHead(); \ diff --git a/netwerk/protocol/http/nsHttp.cpp b/netwerk/protocol/http/nsHttp.cpp index ac52e0e24ae..cae91990d52 100644 --- a/netwerk/protocol/http/nsHttp.cpp +++ b/netwerk/protocol/http/nsHttp.cpp @@ -346,6 +346,128 @@ void EnsureBuffer(nsAutoArrayPtr &buf, uint32_t newSize, { localEnsureBuffer (buf, newSize, preserve, objSize); } +/// + +void +ParsedHeaderValueList::Tokenize(char *input, uint32_t inputLen, char **token, + uint32_t *tokenLen, bool *foundEquals, char **next) +{ + if (foundEquals) { + *foundEquals = false; + } + if (next) { + *next = nullptr; + } + if (inputLen < 1 || !input || !token) { + return; + } + + bool foundFirst = false; + bool inQuote = false; + bool foundToken = false; + *token = input; + *tokenLen = inputLen; + + for (uint32_t index = 0; !foundToken && index < inputLen; ++index) { + // strip leading cruft + if (!foundFirst && + (input[index] == ' ' || input[index] == '"' || input[index] == '\t')) { + (*token)++; + } else { + foundFirst = true; + } + + if (input[index] == '"') { + inQuote = !inQuote; + continue; + } + + if (inQuote) { + continue; + } + + if (input[index] == '=' || input[index] == ';') { + *tokenLen = (input + index) - *token; + if (next && ((index + 1) < inputLen)) { + *next = input + index + 1; + } + foundToken = true; + if (foundEquals && input[index] == '=') { + *foundEquals = true; + } + break; + } + } + + if (!foundToken) { + *tokenLen = (input + inputLen) - *token; + } + + // strip trailing cruft + for (char *index = *token + *tokenLen - 1; index >= *token; --index) { + if (*index != ' ' && *index != '\t' && *index != '"') { + break; + } + --(*tokenLen); + if (*index == '"') { + break; + } + } +} + +ParsedHeaderValueList::ParsedHeaderValueList(char *t, uint32_t len) +{ + char *name = nullptr; + uint32_t nameLen = 0; + char *value = nullptr; + uint32_t valueLen = 0; + char *next = nullptr; + bool foundEquals; + + while (t) { + Tokenize(t, len, &name, &nameLen, &foundEquals, &next); + if (next) { + len -= next - t; + } + t = next; + if (foundEquals && t) { + Tokenize(t, len, &value, &valueLen, nullptr, &next); + if (next) { + len -= next - t; + } + t = next; + } + mValues.AppendElement(ParsedHeaderPair(name, nameLen, value, valueLen)); + value = name = nullptr; + valueLen = nameLen = 0; + next = nullptr; + } +} + +ParsedHeaderValueListList::ParsedHeaderValueListList(const nsCString &fullHeader) + : mFull(fullHeader) +{ + char *t = mFull.BeginWriting(); + uint32_t len = mFull.Length(); + char *last = t; + bool inQuote = false; + for (uint32_t index = 0; index < len; ++index) { + if (t[index] == '"') { + inQuote = !inQuote; + continue; + } + if (inQuote) { + continue; + } + if (t[index] == ',') { + mValues.AppendElement(ParsedHeaderValueList(last, (t + index) - last)); + last = t + index + 1; + } + } + if (!inQuote) { + mValues.AppendElement(ParsedHeaderValueList(last, (t + len) - last)); + } +} } // namespace mozilla::net } // namespace mozilla diff --git a/netwerk/protocol/http/nsHttp.h b/netwerk/protocol/http/nsHttp.h index 9be18962d8d..c05c4a1c92d 100644 --- a/netwerk/protocol/http/nsHttp.h +++ b/netwerk/protocol/http/nsHttp.h @@ -12,6 +12,7 @@ #include "nsAutoPtr.h" #include "nsString.h" #include "nsError.h" +#include "nsTArray.h" // http version codes #define NS_HTTP_VERSION_UNKNOWN 0 @@ -203,6 +204,56 @@ void EnsureBuffer(nsAutoArrayPtr &buf, uint32_t newSize, void EnsureBuffer(nsAutoArrayPtr &buf, uint32_t newSize, uint32_t preserve, uint32_t &objSize); +// h2=":443"; ma=60; single +// results in 3 mValues = {{h2, :443}, {ma, 60}, {single}} + +class ParsedHeaderPair +{ +public: + ParsedHeaderPair(const char *name, int32_t nameLen, + const char *val, int32_t valLen) + { + if (nameLen > 0) { + mName.Rebind(name, name + nameLen); + } + if (valLen > 0) { + mValue.Rebind(val, val + valLen); + } + } + + ParsedHeaderPair(ParsedHeaderPair const ©) + : mName(copy.mName) + , mValue(copy.mValue) + { + } + + nsDependentCSubstring mName; + nsDependentCSubstring mValue; +}; + +class ParsedHeaderValueList +{ +public: + ParsedHeaderValueList(char *t, uint32_t len); + nsTArray mValues; + +private: + void ParsePair(char *t, uint32_t len); + void Tokenize(char *input, uint32_t inputLen, char **token, + uint32_t *tokenLen, bool *foundEquals, char **next); +}; + +class ParsedHeaderValueListList +{ +public: + explicit ParsedHeaderValueListList(const nsCString &txt); + nsTArray mValues; + +private: + nsCString mFull; +}; + + } // namespace mozilla::net } // namespace mozilla diff --git a/netwerk/protocol/http/nsHttpAtomList.h b/netwerk/protocol/http/nsHttpAtomList.h index cebe85e1771..2ad6baa006d 100644 --- a/netwerk/protocol/http/nsHttpAtomList.h +++ b/netwerk/protocol/http/nsHttpAtomList.h @@ -23,6 +23,8 @@ HTTP_ATOM(Accept_Language, "Accept-Language") HTTP_ATOM(Accept_Ranges, "Accept-Ranges") HTTP_ATOM(Age, "Age") HTTP_ATOM(Allow, "Allow") +HTTP_ATOM(Alternate_Service, "Alt-Svc") +HTTP_ATOM(Alternate_Service_Used, "Alt-Svc-Used") HTTP_ATOM(Assoc_Req, "Assoc-Req") HTTP_ATOM(Authentication, "Authentication") HTTP_ATOM(Authorization, "Authorization") diff --git a/netwerk/protocol/http/nsHttpChannel.cpp b/netwerk/protocol/http/nsHttpChannel.cpp index 6224a4ead92..24c2d0e1450 100644 --- a/netwerk/protocol/http/nsHttpChannel.cpp +++ b/netwerk/protocol/http/nsHttpChannel.cpp @@ -65,6 +65,7 @@ #include "nsPerformance.h" #include "CacheObserver.h" #include "mozilla/Telemetry.h" +#include "AlternateServices.h" namespace mozilla { namespace net { @@ -278,11 +279,11 @@ nsHttpChannel::Connect() // data (it is read-only). // if the connection is not using SSL and either the exact host matches or // a superdomain wants to force HTTPS, do it. - bool usingSSL = false; - rv = mURI->SchemeIs("https", &usingSSL); + bool isHttps = false; + rv = mURI->SchemeIs("https", &isHttps); NS_ENSURE_SUCCESS(rv,rv); - if (mAllowSTS && !usingSSL) { + if (mAllowSTS && !isHttps) { // enforce Strict-Transport-Security nsISiteSecurityService* sss = gHttpHandler->GetSSService(); NS_ENSURE_TRUE(sss, NS_ERROR_OUT_OF_MEMORY); @@ -326,7 +327,7 @@ nsHttpChannel::Connect() } // open a cache entry for this channel... - rv = OpenCacheEntry(usingSSL); + rv = OpenCacheEntry(isHttps); // do not continue if asyncOpenCacheEntry is in progress if (mCacheEntriesToWaitFor) { @@ -1230,6 +1231,127 @@ nsHttpChannel::ProcessSSLInformation() } } +void +nsHttpChannel::ProcessAltService() +{ + // e.g. Alt-Svc: h2=":443"; ma=60 + // e.g. Alt-Svc: h2="otherhost:443" + // Alt-Svc = 1#( alternative *( OWS ";" OWS parameter ) ) + // alternative = protocol-id "=" alt-authority + // protocol-id = token ; percent-encoded ALPN protocol identifier + // alt-authority = quoted-string ; containing [ uri-host ] ":" port + + if (!gHttpHandler->AllowAltSvc()) { + return; + } + + nsAutoCString scheme; + mURI->GetScheme(scheme); + bool isHttp = scheme.Equals(NS_LITERAL_CSTRING("http")); + if (!isHttp && !scheme.Equals(NS_LITERAL_CSTRING("https"))) { + return; + } + + if (isHttp && !gHttpHandler->AllowAltSvcOE()) { + return; + } + + const char *altSvc; + if (!(altSvc = mResponseHead->PeekHeader(nsHttp::Alternate_Service))) { + return; + } + + LOG(("nsHttpChannel %p Alt-Svc Response Header %s\n", this, altSvc)); + + nsCString buf(altSvc); + if (!nsHttp::IsReasonableHeaderValue(buf)) { + LOG(("Alt-Svc Response Header seems unreasonable - skipping\n")); + return; + } + + ParsedHeaderValueListList parsedAltSvc(buf); + nsRefPtr mapping; + + nsAutoCString originHost; + int32_t originPort = 80; + mURI->GetPort(&originPort); + if (NS_FAILED(mURI->GetHost(originHost))) { + return; + } + uint32_t now = NowInSeconds(), currentAge = 0; + mResponseHead->ComputeCurrentAge(now, mRequestTime, ¤tAge); + + for (uint32_t index = 0; index < parsedAltSvc.mValues.Length(); ++index) { + uint32_t maxage = 86400; // default + nsAutoCString hostname; // Always empty in the header form + nsAutoCString npnToken; + int32_t portno = originPort; + + for (uint32_t pairIndex = 0; + pairIndex < parsedAltSvc.mValues[index].mValues.Length(); + ++pairIndex) { + nsDependentCSubstring ¤tName = + parsedAltSvc.mValues[index].mValues[pairIndex].mName; + nsDependentCSubstring ¤tValue = + parsedAltSvc.mValues[index].mValues[pairIndex].mValue; + + if (!pairIndex) { + // h2=:443 + npnToken = currentName; + int32_t colonIndex = currentValue.FindChar(':'); + if (colonIndex >= 0) { + portno = + atoi(PromiseFlatCString(currentValue).get() + colonIndex + 1); + } else { + colonIndex = 0; + } + hostname.Assign(currentValue.BeginReading(), colonIndex); + } else if (currentName.Equals(NS_LITERAL_CSTRING("ma"))) { + maxage = atoi(PromiseFlatCString(currentValue).get()); + break; + } + } + + // unescape modifies a c string in place, so afterwards + // update nsCString length + nsUnescape(npnToken.BeginWriting()); + npnToken.SetLength(strlen(npnToken.BeginReading())); + + uint32_t spdyIndex; + SpdyInformation *spdyInfo = gHttpHandler->SpdyInfo(); + if (!(NS_SUCCEEDED(spdyInfo->GetNPNIndex(npnToken, &spdyIndex)) && + spdyInfo->ProtocolEnabled(spdyIndex))) { + LOG(("Alt Svc %p unknown protocol %s, ignoring", this, npnToken.get())); + continue; + } + + mapping = new AltSvcMapping(scheme, + originHost, originPort, + mUsername, mPrivateBrowsing, + NowInSeconds() + maxage, + hostname, portno, npnToken); + if (!mapping) { + continue; + } + + nsCOMPtr callbacks; + NS_NewNotificationCallbacksAggregation(mCallbacks, mLoadGroup, + getter_AddRefs(callbacks)); + if (!callbacks) { + return; + } + + nsCOMPtr proxyInfo; + if (mProxyInfo) { + proxyInfo = do_QueryInterface(mProxyInfo); + } + + gHttpHandler-> + UpdateAltServiceMapping(mapping, proxyInfo, callbacks, + mCaps & (NS_HTTP_ALLOW_RSA_FALSESTART | NS_HTTP_DISALLOW_SPDY)); + } +} + nsresult nsHttpChannel::ProcessResponse() { @@ -1284,6 +1406,10 @@ nsHttpChannel::ProcessResponse() LOG((" continuation state has been reset")); } + if (httpStatus < 500) { + ProcessAltService(); + } + bool successfulReval = false; // handle different server response categories. Note that we handle @@ -2513,7 +2639,7 @@ IsSubRangeRequest(nsHttpRequestHead &aRequestHead) } nsresult -nsHttpChannel::OpenCacheEntry(bool usingSSL) +nsHttpChannel::OpenCacheEntry(bool isHttps) { MOZ_EVENT_TRACER_EXEC(this, "net::http::OpenCacheEntry"); @@ -2840,8 +2966,8 @@ nsHttpChannel::OnCacheEntryCheck(nsICacheEntry* entry, nsIApplicationCache* appC } } - bool usingSSL = false; - rv = mURI->SchemeIs("https", &usingSSL); + bool isHttps = false; + rv = mURI->SchemeIs("https", &isHttps); NS_ENSURE_SUCCESS(rv,rv); bool doValidation = false; @@ -2880,7 +3006,7 @@ nsHttpChannel::OnCacheEntryCheck(nsICacheEntry* entry, nsIApplicationCache* appC // if no-store or if no-cache and ssl, validate cached response (see // bug 112564 for an explanation of this logic) if (mCachedResponseHead->NoStore() || - (mCachedResponseHead->NoCache() && usingSSL)) { + (mCachedResponseHead->NoCache() && isHttps)) { LOG(("Validating based on (no-store || (no-cache && ssl)) logic\n")); doValidation = true; } @@ -3435,11 +3561,11 @@ nsHttpChannel::OpenCacheInputStream(nsICacheEntry* cacheEntry, bool startBufferi { nsresult rv; - bool usingSSL = false; - rv = mURI->SchemeIs("https", &usingSSL); + bool isHttps = false; + rv = mURI->SchemeIs("https", &isHttps); NS_ENSURE_SUCCESS(rv,rv); - if (usingSSL) { + if (isHttps) { rv = cacheEntry->GetSecurityInfo( getter_AddRefs(mCachedSecurityInfo)); if (NS_FAILED(rv)) { @@ -3793,9 +3919,11 @@ nsHttpChannel::UpdateInhibitPersistentCachingFlag() mLoadFlags |= INHIBIT_PERSISTENT_CACHING; // Only cache SSL content on disk if the pref is set + bool isHttps; if (!gHttpHandler->IsPersistentHttpsCachingEnabled() && - mConnectionInfo->EndToEndSSL()) + NS_SUCCEEDED(mURI->SchemeIs("https", &isHttps)) && isHttps) { mLoadFlags |= INHIBIT_PERSISTENT_CACHING; + } } nsresult @@ -4566,17 +4694,19 @@ nsHttpChannel::BeginConnect() // Construct connection info object nsAutoCString host; + nsAutoCString scheme; int32_t port = -1; - nsAutoCString username; - bool usingSSL = false; + bool isHttps = false; - rv = mURI->SchemeIs("https", &usingSSL); + rv = mURI->GetScheme(scheme); + if (NS_SUCCEEDED(rv)) + rv = mURI->SchemeIs("https", &isHttps); if (NS_SUCCEEDED(rv)) rv = mURI->GetAsciiHost(host); if (NS_SUCCEEDED(rv)) rv = mURI->GetPort(&port); if (NS_SUCCEEDED(rv)) - mURI->GetUsername(username); + mURI->GetUsername(mUsername); if (NS_SUCCEEDED(rv)) rv = mURI->GetAsciiSpec(mSpec); if (NS_FAILED(rv)) @@ -4592,8 +4722,47 @@ nsHttpChannel::BeginConnect() if (mProxyInfo) proxyInfo = do_QueryInterface(mProxyInfo); - mConnectionInfo = new nsHttpConnectionInfo(host, port, username, proxyInfo, usingSSL); - mRequestHead.SetHTTPS(usingSSL); + mRequestHead.SetHTTPS(isHttps); + mRequestHead.SetOrigin(scheme, host, port); + + nsRefPtr mapping; + if ((scheme.Equals(NS_LITERAL_CSTRING("http")) || + scheme.Equals(NS_LITERAL_CSTRING("https"))) && + (mapping = gHttpHandler->GetAltServiceMapping(scheme, + host, port, + mPrivateBrowsing))) { + LOG(("nsHttpChannel %p Alt Service Mapping Found %s://%s:%d\n", this, + scheme.get(), mapping->AlternateHost().get(), + mapping->AlternatePort())); + mRequestHead.SetHeader(nsHttp::Alternate_Service_Used, NS_LITERAL_CSTRING("1")); + + nsCOMPtr consoleService = + do_GetService(NS_CONSOLESERVICE_CONTRACTID); + if (consoleService) { + nsAutoString message(NS_LITERAL_STRING("Alternate Service Mapping found: ")); + AppendASCIItoUTF16(scheme.get(), message); + message.Append(NS_LITERAL_STRING("://")); + AppendASCIItoUTF16(host.get(), message); + message.Append(NS_LITERAL_STRING(":")); + message.AppendInt(port); + message.Append(NS_LITERAL_STRING(" to ")); + AppendASCIItoUTF16(scheme.get(), message); + message.Append(NS_LITERAL_STRING("://")); + AppendASCIItoUTF16(mapping->AlternateHost().get(), message); + message.Append(NS_LITERAL_STRING(":")); + message.AppendInt(mapping->AlternatePort()); + consoleService->LogStringMessage(message.get()); + } + + LOG(("nsHttpChannel %p Using connection info from altsvc mapping", this)); + mapping->GetConnectionInfo(getter_AddRefs(mConnectionInfo), proxyInfo); + Telemetry::Accumulate(Telemetry::HTTP_TRANSACTION_USE_ALTSVC, true); + Telemetry::Accumulate(Telemetry::HTTP_TRANSACTION_USE_ALTSVC_OE, !isHttps); + } else { + LOG(("nsHttpChannel %p Using default connection info", this)); + mConnectionInfo = new nsHttpConnectionInfo(host, port, EmptyCString(), mUsername, proxyInfo, isHttps); + Telemetry::Accumulate(Telemetry::HTTP_TRANSACTION_USE_ALTSVC, false); + } mAuthProvider = do_CreateInstance("@mozilla.org/network/http-channel-auth-provider;1", @@ -4868,8 +5037,10 @@ nsHttpChannel::GetResponseEnd(TimeStamp* _retval) { NS_IMETHODIMP nsHttpChannel::GetIsSSL(bool *aIsSSL) { - *aIsSSL = mConnectionInfo->EndToEndSSL(); - return NS_OK; + // this attribute is really misnamed - it wants to know if + // https:// is being used. SSL might be used to cover http:// + // in some circumstances (proxies, http/2, etc..) + return mURI->SchemeIs("https", aIsSSL); } NS_IMETHODIMP diff --git a/netwerk/protocol/http/nsHttpChannel.h b/netwerk/protocol/http/nsHttpChannel.h index 06debfe44df..36f42c03961 100644 --- a/netwerk/protocol/http/nsHttpChannel.h +++ b/netwerk/protocol/http/nsHttpChannel.h @@ -205,6 +205,7 @@ private: nsresult ContinueProcessResponse(nsresult); nsresult ProcessNormal(); nsresult ContinueProcessNormal(nsresult); + void ProcessAltService(); nsresult ProcessNotModified(); nsresult AsyncProcessRedirection(uint32_t httpStatus); nsresult ContinueProcessRedirection(nsresult); @@ -427,6 +428,8 @@ private: void PushRedirectAsyncFunc(nsContinueRedirectionFunc func); void PopRedirectAsyncFunc(nsContinueRedirectionFunc func); + nsCString mUsername; + protected: virtual void DoNotifyListenerCleanup(); nsPerformance* GetPerformance(); diff --git a/netwerk/protocol/http/nsHttpConnection.cpp b/netwerk/protocol/http/nsHttpConnection.cpp index 7e5f55e0966..1774df40d95 100644 --- a/netwerk/protocol/http/nsHttpConnection.cpp +++ b/netwerk/protocol/http/nsHttpConnection.cpp @@ -483,25 +483,48 @@ nsHttpConnection::SetupNPNList(nsISSLSocketControl *ssl, uint32_t caps) { nsTArray protocolArray; - // The first protocol is used as the fallback if none of the - // protocols supported overlap with the server's list. - // When using ALPN the advertised preferences are protocolArray indicies - // {1, .., N, 0} in decreasing order. - // For NPN, In the case of overlap, matching priority is driven by - // the order of the server's advertisement - with index 0 used when - // there is no match. - protocolArray.AppendElement(NS_LITERAL_CSTRING("http/1.1")); + nsCString npnToken = mConnInfo->GetNPNToken(); + if (npnToken.IsEmpty()) { + // The first protocol is used as the fallback if none of the + // protocols supported overlap with the server's list. + // When using ALPN the advertised preferences are protocolArray indicies + // {1, .., N, 0} in decreasing order. + // For NPN, In the case of overlap, matching priority is driven by + // the order of the server's advertisement - with index 0 used when + // there is no match. + protocolArray.AppendElement(NS_LITERAL_CSTRING("http/1.1")); - if (gHttpHandler->IsSpdyEnabled() && - !(caps & NS_HTTP_DISALLOW_SPDY)) { - LOG(("nsHttpConnection::SetupSSL Allow SPDY NPN selection")); - const SpdyInformation *info = gHttpHandler->SpdyInfo(); - for (uint32_t index = SpdyInformation::kCount; index > 0; --index) { - if (info->ProtocolEnabled(index - 1) && - info->ALPNCallbacks[index - 1](ssl)) { - protocolArray.AppendElement(info->VersionString[index - 1]); + if (gHttpHandler->IsSpdyEnabled() && + !(caps & NS_HTTP_DISALLOW_SPDY)) { + LOG(("nsHttpConnection::SetupSSL Allow SPDY NPN selection")); + const SpdyInformation *info = gHttpHandler->SpdyInfo(); + for (uint32_t index = SpdyInformation::kCount; index > 0; --index) { + if (info->ProtocolEnabled(index - 1) && + info->ALPNCallbacks[index - 1](ssl)) { + protocolArray.AppendElement(info->VersionString[index - 1]); + } } } + } else { + LOG(("nsHttpConnection::SetupSSL limiting NPN selection to %s", + npnToken.get())); + protocolArray.AppendElement(npnToken); + } + + nsCString authHost = mConnInfo->GetAuthenticationHost(); + int32_t authPort = mConnInfo->GetAuthenticationPort(); + + if (!authHost.IsEmpty()) { + ssl->SetAuthenticationName(authHost); + ssl->SetAuthenticationPort(authPort); + } + + if (mConnInfo->GetRelaxed()) { // http:// over tls + if (authHost.IsEmpty() || authHost.Equals(mConnInfo->GetHost())) { + LOG(("nsHttpConnection::SetupSSL %p TLS-Relaxed " + "with Same Host Auth Bypass", this)); + ssl->SetBypassAuthentication(true); + } } nsresult rv = ssl->SetNPNList(protocolArray); @@ -531,6 +554,14 @@ nsHttpConnection::AddTransaction(nsAHttpTransaction *httpTransaction, LOG(("nsHttpConnection::AddTransaction for SPDY%s", needTunnel ? " over tunnel" : "")); + // do a runtime check here just for defense in depth + if (transCI->GetRelaxed() && + httpTransaction->RequestHead() && httpTransaction->RequestHead()->IsHTTPS()) { + LOG(("This Cannot happen - https on relaxed tls stream\n")); + MOZ_ASSERT(false, "https:// on tls relaxed"); + return NS_ERROR_FAILURE; + } + if (!mSpdySession->AddStream(httpTransaction, priority, needTunnel, mCallbacks)) { MOZ_ASSERT(false); // this cannot happen! @@ -1407,6 +1438,12 @@ nsHttpConnection::EndIdleMonitoring() } } +uint32_t +nsHttpConnection::Version() +{ + return mUsingSpdyVersion ? mUsingSpdyVersion : mLastHttpResponseVersion; +} + //----------------------------------------------------------------------------- // nsHttpConnection //----------------------------------------------------------------------------- diff --git a/netwerk/protocol/http/nsHttpConnection.h b/netwerk/protocol/http/nsHttpConnection.h index c2e52ab9a83..4d999d5d00d 100644 --- a/netwerk/protocol/http/nsHttpConnection.h +++ b/netwerk/protocol/http/nsHttpConnection.h @@ -211,6 +211,8 @@ public: return mTrafficStamp && (mTrafficCount == (mTotalBytesWritten + mTotalBytesRead)); } + // override of nsAHttpConnection + virtual uint32_t Version(); private: // Value (set in mTCPKeepaliveConfig) indicates which set of prefs to use. diff --git a/netwerk/protocol/http/nsHttpConnectionInfo.cpp b/netwerk/protocol/http/nsHttpConnectionInfo.cpp index d01ad60c987..0453a599d50 100644 --- a/netwerk/protocol/http/nsHttpConnectionInfo.cpp +++ b/netwerk/protocol/http/nsHttpConnectionInfo.cpp @@ -20,16 +20,49 @@ namespace mozilla { namespace net { -nsHttpConnectionInfo::nsHttpConnectionInfo(const nsACString &host, int32_t port, +nsHttpConnectionInfo::nsHttpConnectionInfo(const nsACString &physicalHost, + int32_t physicalPort, + const nsACString &npnToken, const nsACString &username, - nsProxyInfo* proxyInfo, + nsProxyInfo *proxyInfo, bool endToEndSSL) - : mUsername(username) - , mProxyInfo(proxyInfo) - , mEndToEndSSL(endToEndSSL) - , mUsingConnect(false) + : mAuthenticationPort(443) { - LOG(("Creating nsHttpConnectionInfo @%x\n", this)); + Init(physicalHost, physicalPort, npnToken, username, proxyInfo, endToEndSSL); +} + +nsHttpConnectionInfo::nsHttpConnectionInfo(const nsACString &physicalHost, + int32_t physicalPort, + const nsACString &npnToken, + const nsACString &username, + nsProxyInfo *proxyInfo, + const nsACString &logicalHost, + int32_t logicalPort) + +{ + mEndToEndSSL = true; // so DefaultPort() works + mAuthenticationPort = logicalPort == -1 ? DefaultPort() : logicalPort; + + if (!physicalHost.Equals(logicalHost) || (physicalPort != logicalPort)) { + mAuthenticationHost = logicalHost; + } + Init(physicalHost, physicalPort, npnToken, username, proxyInfo, true); +} + +void +nsHttpConnectionInfo::Init(const nsACString &host, int32_t port, + const nsACString &npnToken, + const nsACString &username, + nsProxyInfo* proxyInfo, + bool e2eSSL) +{ + LOG(("Init nsHttpConnectionInfo @%p\n", this)); + + mUsername = username; + mProxyInfo = proxyInfo; + mEndToEndSSL = e2eSSL; + mUsingConnect = false; + mNPNToken = npnToken; mUsingHttpsProxy = (proxyInfo && proxyInfo->IsHTTPS()); mUsingHttpProxy = mUsingHttpsProxy || (proxyInfo && proxyInfo->IsHTTP()); @@ -78,8 +111,9 @@ nsHttpConnectionInfo::SetOriginServer(const nsACString &host, int32_t port) // byte 1 is S/. S is for end to end ssl such as https:// uris // byte 2 is A/. A is for an anonymous channel (no cookies, etc..) // byte 3 is P/. P is for a private browising channel - mHashKey.AssignLiteral("...."); + // byte 4 is R/. R is for 'relaxed' unauthed TLS for http:// uris + mHashKey.AssignLiteral("....."); mHashKey.Append(keyHost); mHashKey.Append(':'); mHashKey.AppendInt(keyPort); @@ -118,20 +152,62 @@ nsHttpConnectionInfo::SetOriginServer(const nsACString &host, int32_t port) mHashKey.AppendInt(ProxyPort()); mHashKey.Append(')'); } + + if(!mAuthenticationHost.IsEmpty()) { + mHashKey.AppendLiteral(" '); + } + + if (!mNPNToken.IsEmpty()) { + mHashKey.AppendLiteral(" {NPN-TOKEN "); + mHashKey.Append(mNPNToken); + mHashKey.AppendLiteral("}"); + } } nsHttpConnectionInfo* nsHttpConnectionInfo::Clone() const { - nsHttpConnectionInfo* clone = new nsHttpConnectionInfo(mHost, mPort, mUsername, mProxyInfo, mEndToEndSSL); + nsHttpConnectionInfo *clone; + if (mAuthenticationHost.IsEmpty()) { + clone = new nsHttpConnectionInfo(mHost, mPort, mNPNToken, mUsername, mProxyInfo, mEndToEndSSL); + } else { + MOZ_ASSERT(mEndToEndSSL); + clone = new nsHttpConnectionInfo(mHost, mPort, mNPNToken, mUsername, mProxyInfo, + mAuthenticationHost, + mAuthenticationPort); + } - // Make sure the anonymous and private flags are transferred! + // Make sure the anonymous, relaxed, and private flags are transferred clone->SetAnonymous(GetAnonymous()); clone->SetPrivate(GetPrivate()); + clone->SetRelaxed(GetRelaxed()); MOZ_ASSERT(clone->Equals(this)); + return clone; } +void +nsHttpConnectionInfo::CloneAsDirectRoute(nsHttpConnectionInfo **outCI) +{ + if (mAuthenticationHost.IsEmpty()) { + *outCI = Clone(); + return; + } + + nsRefPtr clone = + new nsHttpConnectionInfo(mAuthenticationHost, mAuthenticationPort, + EmptyCString(), mUsername, mProxyInfo, mEndToEndSSL); + // Make sure the anonymous, relaxed, and private flags are transferred + clone->SetAnonymous(GetAnonymous()); + clone->SetPrivate(GetPrivate()); + clone->SetRelaxed(GetRelaxed()); + clone.forget(outCI); +} + nsresult nsHttpConnectionInfo::CreateWildCard(nsHttpConnectionInfo **outParam) { @@ -145,7 +221,7 @@ nsHttpConnectionInfo::CreateWildCard(nsHttpConnectionInfo **outParam) nsRefPtr clone; clone = new nsHttpConnectionInfo(NS_LITERAL_CSTRING("*"), 0, - mUsername, mProxyInfo, true); + mNPNToken, mUsername, mProxyInfo, true); // Make sure the anonymous and private flags are transferred! clone->SetAnonymous(GetAnonymous()); clone->SetPrivate(GetPrivate()); diff --git a/netwerk/protocol/http/nsHttpConnectionInfo.h b/netwerk/protocol/http/nsHttpConnectionInfo.h index 6f3d0bdd4b9..af440a79a7a 100644 --- a/netwerk/protocol/http/nsHttpConnectionInfo.h +++ b/netwerk/protocol/http/nsHttpConnectionInfo.h @@ -32,11 +32,23 @@ namespace mozilla { namespace net { class nsHttpConnectionInfo { public: - nsHttpConnectionInfo(const nsACString &host, int32_t port, + nsHttpConnectionInfo(const nsACString &physicalHost, + int32_t physicalPort, + const nsACString &npnToken, const nsACString &username, - nsProxyInfo* proxyInfo, + nsProxyInfo *proxyInfo, bool endToEndSSL = false); + // this version must use TLS and you may supply the domain + // information to be validated + nsHttpConnectionInfo(const nsACString &physicalHost, + int32_t physicalPort, + const nsACString &npnToken, + const nsACString &username, + nsProxyInfo *proxyInfo, + const nsACString &logicalHost, + int32_t logicalPort); + private: virtual ~nsHttpConnectionInfo() { @@ -46,15 +58,12 @@ private: public: const nsAFlatCString &HashKey() const { return mHashKey; } - void SetOriginServer(const nsACString &host, int32_t port); - - void SetOriginServer(const char *host, int32_t port) - { - SetOriginServer(nsDependentCString(host), port); - } + const nsCString &GetAuthenticationHost() const { return mAuthenticationHost; } + int32_t GetAuthenticationPort() const { return mAuthenticationPort; } // OK to treat these as an infalible allocation nsHttpConnectionInfo* Clone() const; + void CloneAsDirectRoute(nsHttpConnectionInfo **outParam); nsresult CreateWildCard(nsHttpConnectionInfo **outParam); const char *ProxyHost() const { return mProxyInfo ? mProxyInfo->Host().get() : nullptr; } @@ -83,8 +92,12 @@ public: bool GetAnonymous() const { return mHashKey.CharAt(2) == 'A'; } void SetPrivate(bool priv) { mHashKey.SetCharAt(priv ? 'P' : '.', 3); } bool GetPrivate() const { return mHashKey.CharAt(3) == 'P'; } + void SetRelaxed(bool relaxed) + { mHashKey.SetCharAt(relaxed ? 'R' : '.', 4); } + bool GetRelaxed() const { return mHashKey.CharAt(4) == 'R'; } const nsCString &GetHost() { return mHost; } + const nsCString &GetNPNToken() { return mNPNToken; } // Returns true for any kind of proxy (http, socks, https, etc..) bool UsingProxy(); @@ -108,15 +121,26 @@ public: bool HostIsLocalIPLiteral() const; private: + void Init(const nsACString &host, + int32_t port, + const nsACString &npnToken, + const nsACString &username, + nsProxyInfo* proxyInfo, + bool EndToEndSSL); + void SetOriginServer(const nsACString &host, int32_t port); + nsCString mHashKey; nsCString mHost; int32_t mPort; nsCString mUsername; + nsCString mAuthenticationHost; + int32_t mAuthenticationPort; nsCOMPtr mProxyInfo; bool mUsingHttpProxy; bool mUsingHttpsProxy; bool mEndToEndSSL; bool mUsingConnect; // if will use CONNECT with http proxy + nsCString mNPNToken; // for nsRefPtr NS_INLINE_DECL_THREADSAFE_REFCOUNTING(nsHttpConnectionInfo) diff --git a/netwerk/protocol/http/nsHttpConnectionMgr.cpp b/netwerk/protocol/http/nsHttpConnectionMgr.cpp index dd6f103dfdc..208d0a6e887 100644 --- a/netwerk/protocol/http/nsHttpConnectionMgr.cpp +++ b/netwerk/protocol/http/nsHttpConnectionMgr.cpp @@ -382,6 +382,7 @@ public: // intentional! bool mIgnoreIdle; bool mIgnorePossibleSpdyConnections; bool mIsFromPredictor; + bool mAllow1918; // As above, added manually so we can use nsRefPtr without inheriting from // nsISupports @@ -396,16 +397,25 @@ NS_IMPL_RELEASE(SpeculativeConnectArgs) nsresult nsHttpConnectionMgr::SpeculativeConnect(nsHttpConnectionInfo *ci, nsIInterfaceRequestor *callbacks, - uint32_t caps) + uint32_t caps, + NullHttpTransaction *nullTransaction) { MOZ_ASSERT(NS_IsMainThread(), "nsHttpConnectionMgr::SpeculativeConnect called off main thread!"); LOG(("nsHttpConnectionMgr::SpeculativeConnect [ci=%s]\n", ci->HashKey().get())); + nsCOMPtr overrider = + do_GetInterface(callbacks); + + bool allow1918 = false; + if (overrider) { + overrider->GetAllow1918(&allow1918); + } + // Hosts that are Local IP Literals should not be speculatively // connected - Bug 853423. - if (ci && ci->HostIsLocalIPLiteral()) { + if ((!allow1918) && ci && ci->HostIsLocalIPLiteral()) { LOG(("nsHttpConnectionMgr::SpeculativeConnect skipping RFC1918 " "address [%s]", ci->Host())); return NS_OK; @@ -419,10 +429,9 @@ nsHttpConnectionMgr::SpeculativeConnect(nsHttpConnectionInfo *ci, NS_NewInterfaceRequestorAggregation(callbacks, nullptr, getter_AddRefs(wrappedCallbacks)); caps |= ci->GetAnonymous() ? NS_HTTP_LOAD_ANONYMOUS : 0; - args->mTrans = new NullHttpTransaction(ci, wrappedCallbacks, caps); + args->mTrans = + nullTransaction ? nullTransaction : new NullHttpTransaction(ci, wrappedCallbacks, caps); - nsCOMPtr overrider = - do_GetInterface(callbacks); if (overrider) { args->mOverridesOK = true; overrider->GetParallelSpeculativeConnectLimit( @@ -431,6 +440,7 @@ nsHttpConnectionMgr::SpeculativeConnect(nsHttpConnectionInfo *ci, overrider->GetIgnorePossibleSpdyConnections( &args->mIgnorePossibleSpdyConnections); overrider->GetIsFromPredictor(&args->mIsFromPredictor); + overrider->GetAllow1918(&args->mAllow1918); } nsresult rv = @@ -1297,7 +1307,7 @@ nsHttpConnectionMgr::ReportFailedToProcess(nsIURI *uri) // report the event for all the permutations of anonymous and // private versions of this host nsRefPtr ci = - new nsHttpConnectionInfo(host, port, username, nullptr, usingSSL); + new nsHttpConnectionInfo(host, port, EmptyCString(), username, nullptr, usingSSL); ci->SetAnonymous(false); ci->SetPrivate(false); PipelineFeedbackInfo(ci, RedCorruptedContent, nullptr, 0); @@ -1518,7 +1528,7 @@ nsHttpConnectionMgr::MakeNewConnection(nsConnectionEntry *ent, if (AtActiveConnectionLimit(ent, trans->Caps())) return NS_ERROR_NOT_AVAILABLE; - nsresult rv = CreateTransport(ent, trans, trans->Caps(), false); + nsresult rv = CreateTransport(ent, trans, trans->Caps(), false, false, true); if (NS_FAILED(rv)) { /* hard failure */ LOG(("nsHttpConnectionMgr::MakeNewConnection [ci = %s trans = %p] " @@ -2145,13 +2155,15 @@ nsHttpConnectionMgr::CreateTransport(nsConnectionEntry *ent, nsAHttpTransaction *trans, uint32_t caps, bool speculative, - bool isFromPredictor) + bool isFromPredictor, + bool allow1918) { MOZ_ASSERT(PR_GetCurrentThread() == gSocketThread); nsRefPtr sock = new nsHalfOpenSocket(ent, trans, caps); if (speculative) { sock->SetSpeculative(true); + sock->SetAllow1918(allow1918); Telemetry::AutoCounter totalSpeculativeConn; ++totalSpeculativeConn; @@ -2936,20 +2948,23 @@ nsHttpConnectionMgr::OnMsgSpeculativeConnect(int32_t, void *param) bool ignorePossibleSpdyConnections = false; bool ignoreIdle = false; bool isFromPredictor = false; + bool allow1918 = false; if (args->mOverridesOK) { parallelSpeculativeConnectLimit = args->mParallelSpeculativeConnectLimit; ignorePossibleSpdyConnections = args->mIgnorePossibleSpdyConnections; ignoreIdle = args->mIgnoreIdle; isFromPredictor = args->mIsFromPredictor; + allow1918 = args->mAllow1918; } + bool keepAlive = args->mTrans->Caps() & NS_HTTP_ALLOW_KEEPALIVE; if (mNumHalfOpenConns < parallelSpeculativeConnectLimit && ((ignoreIdle && (ent->mIdleConns.Length() < parallelSpeculativeConnectLimit)) || !ent->mIdleConns.Length()) && - !RestrictConnections(ent, ignorePossibleSpdyConnections) && + !(keepAlive && RestrictConnections(ent, ignorePossibleSpdyConnections)) && !AtActiveConnectionLimit(ent, args->mTrans->Caps())) { - CreateTransport(ent, args->mTrans, args->mTrans->Caps(), true, isFromPredictor); + CreateTransport(ent, args->mTrans, args->mTrans->Caps(), true, isFromPredictor, allow1918); } else { LOG((" Transport not created due to existing connection count\n")); @@ -2983,7 +2998,6 @@ nsHttpConnectionMgr::nsConnectionHandle::PushBack(const char *buf, uint32_t bufL //////////////////////// nsHalfOpenSocket - NS_IMPL_ISUPPORTS(nsHttpConnectionMgr::nsHalfOpenSocket, nsIOutputStreamCallback, nsITransportEventSink, @@ -2999,6 +3013,7 @@ nsHalfOpenSocket::nsHalfOpenSocket(nsConnectionEntry *ent, , mCaps(caps) , mSpeculative(false) , mIsFromPredictor(false) + , mAllow1918(true) , mHasConnected(false) , mPrimaryConnectedOK(false) , mBackupConnectedOK(false) @@ -3074,7 +3089,7 @@ nsHalfOpenSocket::SetupStreams(nsISocketTransport **transport, tmpFlags |= nsISocketTransport::DISABLE_IPV6; } - if (IsSpeculative()) { + if (!Allow1918()) { tmpFlags |= nsISocketTransport::DISABLE_RFC1918; } @@ -3138,6 +3153,8 @@ nsHttpConnectionMgr::nsHalfOpenSocket::SetupPrimaryStreams() nsresult nsHttpConnectionMgr::nsHalfOpenSocket::SetupBackupStreams() { + MOZ_ASSERT(mTransaction && !mTransaction->IsNullTransaction()); + mBackupSynStarted = TimeStamp::Now(); nsresult rv = SetupStreams(getter_AddRefs(mBackupTransport), getter_AddRefs(mBackupStreamIn), @@ -3160,8 +3177,8 @@ nsHttpConnectionMgr::nsHalfOpenSocket::SetupBackupTimer() { uint16_t timeout = gHttpHandler->GetIdleSynTimeout(); MOZ_ASSERT(!mSynTimer, "timer already initd"); - - if (timeout && !mTransaction->IsDone()) { + if (timeout && !mTransaction->IsDone() && + !mTransaction->IsNullTransaction()) { // Setup the timer that will establish a backup socket // if we do not get a writable event on the main one. // We do this because a lost SYN takes a very long time @@ -3347,8 +3364,7 @@ nsHalfOpenSocket::OnOutputStreamReady(nsIAsyncOutputStream *out) mEnt->mPendingQ.RemoveElementAt(index); gHttpHandler->ConnMgr()->AddActiveConn(conn, mEnt); rv = gHttpHandler->ConnMgr()->DispatchTransaction(mEnt, temp, conn); - } - else { + } else { // this transaction was dispatched off the pending q before all the // sockets established themselves. @@ -3366,17 +3382,22 @@ nsHalfOpenSocket::OnOutputStreamReady(nsIAsyncOutputStream *out) !mEnt->mConnInfo->UsingConnect()) { LOG(("nsHalfOpenSocket::OnOutputStreamReady null transaction will " "be used to finish SSL handshake on conn %p\n", conn.get())); - nsRefPtr trans = - new NullHttpTransaction(mEnt->mConnInfo, - callbacks, - mCaps & ~NS_HTTP_ALLOW_PIPELINING); + nsRefPtr trans; + if (mTransaction->IsNullTransaction()) { + // null transactions cannot be put in the entry queue, so that + // explains why it is not present. + trans = mTransaction; + } else { + trans = new NullHttpTransaction(mEnt->mConnInfo, + callbacks, + mCaps & ~NS_HTTP_ALLOW_PIPELINING); + } gHttpHandler->ConnMgr()->AddActiveConn(conn, mEnt); conn->Classify(nsAHttpTransaction::CLASS_SOLO); rv = gHttpHandler->ConnMgr()-> DispatchAbstractTransaction(mEnt, trans, mCaps, conn, 0); - } - else { + } else { // otherwise just put this in the persistent connection pool LOG(("nsHalfOpenSocket::OnOutputStreamReady no transaction match " "returning conn %p to pool\n", conn.get())); diff --git a/netwerk/protocol/http/nsHttpConnectionMgr.h b/netwerk/protocol/http/nsHttpConnectionMgr.h index 38f2cd04c50..1fd8bff14e2 100644 --- a/netwerk/protocol/http/nsHttpConnectionMgr.h +++ b/netwerk/protocol/http/nsHttpConnectionMgr.h @@ -16,6 +16,7 @@ #include "mozilla/ReentrantMonitor.h" #include "mozilla/TimeStamp.h" #include "mozilla/Attributes.h" +#include "AlternateServices.h" #include "nsIObserver.h" #include "nsITimer.h" @@ -25,11 +26,13 @@ class nsIHttpUpgradeListener; namespace mozilla { namespace net { class EventTokenBucket; +class NullHttpTransaction; struct HttpRetParams; //----------------------------------------------------------------------------- class nsHttpConnectionMgr : public nsIObserver + , public AltSvcCache { public: NS_DECL_THREADSAFE_ISUPPORTS @@ -115,7 +118,8 @@ public: // real transaction for this connectionInfo. nsresult SpeculativeConnect(nsHttpConnectionInfo *, nsIInterfaceRequestor *, - uint32_t caps = 0); + uint32_t caps = 0, + NullHttpTransaction * = nullptr); // called when a connection is done processing a transaction. if the // connection can be reused then it will be added to the idle list, else @@ -465,6 +469,9 @@ private: bool IsFromPredictor() { return mIsFromPredictor; } void SetIsFromPredictor(bool val) { mIsFromPredictor = val; } + bool Allow1918() { return mAllow1918; } + void SetAllow1918(bool val) { mAllow1918 = val; } + bool HasConnected() { return mHasConnected; } void PrintDiagnostics(nsCString &log); @@ -490,6 +497,8 @@ private: // connections from the predictor. bool mIsFromPredictor; + bool mAllow1918; + TimeStamp mPrimarySynStarted; TimeStamp mBackupSynStarted; @@ -562,7 +571,7 @@ private: void ClosePersistentConnections(nsConnectionEntry *ent); void ReportProxyTelemetry(nsConnectionEntry *ent); nsresult CreateTransport(nsConnectionEntry *, nsAHttpTransaction *, - uint32_t, bool, bool = false); + uint32_t, bool, bool, bool); void AddActiveConn(nsHttpConnection *, nsConnectionEntry *); void DecrementActiveConnCount(nsHttpConnection *); void StartedConnect(); diff --git a/netwerk/protocol/http/nsHttpHandler.cpp b/netwerk/protocol/http/nsHttpHandler.cpp index 1aa2321da03..a681284cf6e 100644 --- a/netwerk/protocol/http/nsHttpHandler.cpp +++ b/netwerk/protocol/http/nsHttpHandler.cpp @@ -190,6 +190,8 @@ nsHttpHandler::nsHttpHandler() , mCoalesceSpdy(true) , mSpdyPersistentSettings(false) , mAllowPush(true) + , mEnableAltSvc(true) + , mEnableAltSvcOE(true) , mSpdySendingChunkSize(ASpdySession::kSendingChunkSize) , mSpdySendBufferSize(ASpdySession::kTCPSendBufferSize) , mSpdyPushAllowance(32768) @@ -1230,6 +1232,21 @@ nsHttpHandler::PrefsChanged(nsIPrefBranch *prefs, const char *pref) mAllowPush = cVar; } + if (PREF_CHANGED(HTTP_PREF("altsvc.enabled"))) { + rv = prefs->GetBoolPref(HTTP_PREF("atsvc.enabled"), + &cVar); + if (NS_SUCCEEDED(rv)) + mEnableAltSvc = cVar; + } + + + if (PREF_CHANGED(HTTP_PREF("altsvc.oe"))) { + rv = prefs->GetBoolPref(HTTP_PREF("atsvc.oe"), + &cVar); + if (NS_SUCCEEDED(rv)) + mEnableAltSvcOE = cVar; + } + if (PREF_CHANGED(HTTP_PREF("spdy.push-allowance"))) { rv = prefs->GetIntPref(HTTP_PREF("spdy.push-allowance"), &val); if (NS_SUCCEEDED(rv)) { @@ -1834,11 +1851,18 @@ nsHttpHandler::Observe(nsISupports *subject, } } else if (!strcmp(topic, "last-pb-context-exited")) { mPrivateAuthCache.ClearAll(); + if (mConnMgr) { + mConnMgr->ClearAltServiceMappings(); + } } else if (!strcmp(topic, "browser:purge-session-history")) { - if (mConnMgr && gSocketTransportService) { - nsCOMPtr event = NS_NewRunnableMethod(mConnMgr, - &nsHttpConnectionMgr::ClearConnectionHistory); - gSocketTransportService->Dispatch(event, NS_DISPATCH_NORMAL); + if (mConnMgr) { + if (gSocketTransportService) { + nsCOMPtr event = + NS_NewRunnableMethod(mConnMgr, + &nsHttpConnectionMgr::ClearConnectionHistory); + gSocketTransportService->Dispatch(event, NS_DISPATCH_NORMAL); + } + mConnMgr->ClearAltServiceMappings(); } } else if (!strcmp(topic, NS_NETWORK_LINK_TOPIC)) { nsAutoCString converted = NS_ConvertUTF16toUTF8(data); @@ -1917,7 +1941,7 @@ nsHttpHandler::SpeculativeConnect(nsIURI *aURI, aURI->GetUsername(username); nsHttpConnectionInfo *ci = - new nsHttpConnectionInfo(host, port, username, nullptr, usingSSL); + new nsHttpConnectionInfo(host, port, EmptyCString(), username, nullptr, usingSSL); return SpeculativeConnect(ci, aCallbacks); } diff --git a/netwerk/protocol/http/nsHttpHandler.h b/netwerk/protocol/http/nsHttpHandler.h index 6fb1affb65d..b124bef6c75 100644 --- a/netwerk/protocol/http/nsHttpHandler.h +++ b/netwerk/protocol/http/nsHttpHandler.h @@ -37,6 +37,7 @@ class Tickler; class nsHttpConnection; class nsHttpConnectionInfo; class nsHttpTransaction; +class AltSvcMapping; //----------------------------------------------------------------------------- // nsHttpHandler - protocol handler for HTTP and HTTPS @@ -108,6 +109,8 @@ public: PRIntervalTime SpdyPingThreshold() { return mSpdyPingThreshold; } PRIntervalTime SpdyPingTimeout() { return mSpdyPingTimeout; } bool AllowPush() { return mAllowPush; } + bool AllowAltSvc() { return mEnableAltSvc; } + bool AllowAltSvcOE() { return mEnableAltSvcOE; } uint32_t ConnectTimeout() { return mConnectTimeout; } uint32_t ParallelSpeculativeConnectLimit() { return mParallelSpeculativeConnectLimit; } bool CriticalRequestPrioritization() { return mCriticalRequestPrioritization; } @@ -219,6 +222,22 @@ public: return mConnMgr->SpeculativeConnect(ci, callbacks, caps); } + // Alternate Services Maps are main thread only + void UpdateAltServiceMapping(AltSvcMapping *map, + nsProxyInfo *proxyInfo, + nsIInterfaceRequestor *callbacks, + uint32_t caps) + { + mConnMgr->UpdateAltServiceMapping(map, proxyInfo, callbacks, caps); + } + + AltSvcMapping *GetAltServiceMapping(const nsACString &scheme, + const nsACString &host, + int32_t port, bool pb) + { + return mConnMgr->GetAltServiceMapping(scheme, host, port, pb); + } + // // The HTTP handler caches pointers to specific XPCOM services, and // provides the following helper routines for accessing those services: @@ -454,6 +473,8 @@ private: uint32_t mCoalesceSpdy : 1; uint32_t mSpdyPersistentSettings : 1; uint32_t mAllowPush : 1; + uint32_t mEnableAltSvc : 1; + uint32_t mEnableAltSvcOE : 1; // Try to use SPDY features instead of HTTP/1.1 over SSL SpdyInformation mSpdyInfo; diff --git a/netwerk/protocol/http/nsHttpRequestHead.cpp b/netwerk/protocol/http/nsHttpRequestHead.cpp index 4ca578a4144..66835bffe90 100644 --- a/netwerk/protocol/http/nsHttpRequestHead.cpp +++ b/netwerk/protocol/http/nsHttpRequestHead.cpp @@ -51,6 +51,18 @@ nsHttpRequestHead::SetMethod(const nsACString &method) } } +void +nsHttpRequestHead::SetOrigin(const nsACString &scheme, const nsACString &host, int32_t port) +{ + mOrigin.Assign(scheme); + mOrigin.Append(NS_LITERAL_CSTRING("://")); + mOrigin.Append(host); + if (port >= 0) { + mOrigin.Append(NS_LITERAL_CSTRING(":")); + mOrigin.AppendInt(port); + } +} + bool nsHttpRequestHead::IsSafeMethod() const { diff --git a/netwerk/protocol/http/nsHttpRequestHead.h b/netwerk/protocol/http/nsHttpRequestHead.h index 2a0c010b70c..5d9c0ce60ee 100644 --- a/netwerk/protocol/http/nsHttpRequestHead.h +++ b/netwerk/protocol/http/nsHttpRequestHead.h @@ -36,6 +36,9 @@ public: void SetHTTPS(bool val) { mHTTPS = val; } bool IsHTTPS() const { return mHTTPS; } + void SetOrigin(const nsACString &scheme, const nsACString &host, int32_t port); + const nsCString &Origin() const { return mOrigin; } + const char *PeekHeader(nsHttpAtom h) const { return mHeaders.PeekHeader(h); @@ -97,6 +100,7 @@ private: nsCString mMethod; nsHttpVersion mVersion; nsCString mRequestURI; + nsCString mOrigin; ParsedMethodType mParsedMethod; bool mHTTPS; }; diff --git a/netwerk/protocol/http/nsHttpTransaction.cpp b/netwerk/protocol/http/nsHttpTransaction.cpp index 15c50476424..161f8fcb377 100644 --- a/netwerk/protocol/http/nsHttpTransaction.cpp +++ b/netwerk/protocol/http/nsHttpTransaction.cpp @@ -91,7 +91,6 @@ nsHttpTransaction::nsHttpTransaction() : mLock("transaction lock") , mRequestSize(0) , mConnection(nullptr) - , mConnInfo(nullptr) , mRequestHead(nullptr) , mResponseHead(nullptr) , mContentLength(-1) @@ -124,6 +123,7 @@ nsHttpTransaction::nsHttpTransaction() , mDispatchedAsBlocking(false) , mResponseTimeoutEnabled(true) , mDontRouteViaWildCard(false) + , mForceRestart(false) , mReportedStart(false) , mReportedResponseHeader(false) , mForTakeResponseHead(nullptr) @@ -848,6 +848,11 @@ nsHttpTransaction::Close(nsresult reason) // if (reason == NS_ERROR_NET_RESET || reason == NS_OK) { + if (mForceRestart && NS_SUCCEEDED(Restart())) { + LOG(("transaction force restarted\n")); + return; + } + // reallySentData is meant to separate the instances where data has // been sent by this transaction but buffered at a higher level while // a TLS session (perhaps via a tunnel) is setup. @@ -1110,6 +1115,17 @@ nsHttpTransaction::Restart() mCaps &= ~NS_HTTP_ALLOW_PIPELINING; SetPipelinePosition(0); + if (!mConnInfo->GetAuthenticationHost().IsEmpty()) { + MutexAutoLock lock(*nsHttp::GetLock()); + nsRefPtr ci; + mConnInfo->CloneAsDirectRoute(getter_AddRefs(ci)); + mConnInfo = ci; + if (mRequestHead) { + mRequestHead->SetHeader(nsHttp::Alternate_Service_Used, NS_LITERAL_CSTRING("0")); + } + } + mForceRestart = false; + return gHttpHandler->InitiateTransaction(this, mPriority); } @@ -1381,11 +1397,11 @@ nsHttpTransaction::ParseHead(char *buf, return NS_OK; } -// called on the socket thread nsresult nsHttpTransaction::HandleContentStart() { LOG(("nsHttpTransaction::HandleContentStart [this=%p]\n", this)); + MOZ_ASSERT(PR_GetCurrentThread() == gSocketThread); if (mResponseHead) { #if defined(PR_LOGGING) @@ -1429,6 +1445,14 @@ nsHttpTransaction::HandleContentStart() mNoContent = true; LOG(("this response should not contain a body.\n")); break; + case 421: + if (!mConnInfo->GetAuthenticationHost().IsEmpty()) { + LOG(("Not Authoritative.\n")); + gHttpHandler->ConnMgr()-> + ClearHostMapping(mConnInfo->GetHost(), mConnInfo->Port()); + mForceRestart = true; + } + break; } if (mResponseHead->Status() == 200 && diff --git a/netwerk/protocol/http/nsHttpTransaction.h b/netwerk/protocol/http/nsHttpTransaction.h index 791b66f89c0..6cae916d4b3 100644 --- a/netwerk/protocol/http/nsHttpTransaction.h +++ b/netwerk/protocol/http/nsHttpTransaction.h @@ -264,6 +264,7 @@ private: bool mDispatchedAsBlocking; bool mResponseTimeoutEnabled; bool mDontRouteViaWildCard; + bool mForceRestart; // mClosed := transaction has been explicitly closed // mTransactionDone := transaction ran to completion or was interrupted diff --git a/netwerk/test/unit/test_http2.js b/netwerk/test/unit/test_http2.js index 885e5ea9466..5bf9284c708 100644 --- a/netwerk/test/unit/test_http2.js +++ b/netwerk/test/unit/test_http2.js @@ -335,6 +335,53 @@ function test_http2_post_big() { do_post(posts[1], chan, listener); } +Cu.import("resource://testing-common/httpd.js"); +var httpserv = null; +var ios = Components.classes["@mozilla.org/network/io-service;1"] + .getService(Components.interfaces.nsIIOService); + +var altsvcClientListener = { + onStartRequest: function test_onStartR(request, ctx) { + do_check_eq(request.status, Components.results.NS_OK); + }, + + onDataAvailable: function test_ODA(request, cx, stream, offset, cnt) { + read_stream(stream, cnt); + }, + + onStopRequest: function test_onStopR(request, ctx, status) { + var isHttp2Connection = checkIsHttp2(request); + if (!isHttp2Connection) { + // not over tls yet - retry. It's all async and transparent to client + var chan = ios.newChannel("http://localhost:" + httpserv.identity.primaryPort + "/altsvc1", + null, null).QueryInterface(Components.interfaces.nsIHttpChannel); + chan.asyncOpen(altsvcClientListener, null); + } else { + do_check_true(isHttp2Connection); + httpserv.stop(do_test_finished); + run_next_test(); + } + } +}; + +function altsvcHttp1Server(metadata, response) { + response.setStatusLine(metadata.httpVersion, 200, "OK"); + response.setHeader("Content-Type", "text/plain", false); + response.setHeader("Alt-Svc", 'h2=":6944"; ma=3200, h2-14=":6944"', false); + var body = "this is where a cool kid would write something neat.\n"; + response.bodyOutputStream.write(body, body.length); +} + +function test_http2_altsvc() { + httpserv = new HttpServer(); + httpserv.registerPathHandler("/altsvc1", altsvcHttp1Server); + httpserv.start(-1); + + var chan = ios.newChannel("http://localhost:" + httpserv.identity.primaryPort + "/altsvc1", + null, null).QueryInterface(Components.interfaces.nsIHttpChannel); + chan.asyncOpen(altsvcClientListener, null); +} + // hack - the header test resets the multiplex object on the server, // so make sure header is always run before the multiplex test. // @@ -346,6 +393,7 @@ var tests = [ test_http2_post_big , test_http2_push2 , test_http2_push3 , test_http2_push4 + , test_http2_altsvc , test_http2_doubleheader , test_http2_xhr , test_http2_header @@ -432,6 +480,8 @@ function resetPrefs() { prefs.setBoolPref("network.http.spdy.allow-push", spdypush); prefs.setBoolPref("network.http.spdy.enabled.http2draft", http2pref); prefs.setBoolPref("network.http.spdy.enforce-tls-profile", tlspref); + prefs.setBoolPref("network.http.altsvc.enabled", altsvcpref1); + prefs.setBoolPref("network.http.altsvc.oe", altsvcpref2); } function run_test() { @@ -454,11 +504,16 @@ function run_test() { spdypush = prefs.getBoolPref("network.http.spdy.allow-push"); http2pref = prefs.getBoolPref("network.http.spdy.enabled.http2draft"); tlspref = prefs.getBoolPref("network.http.spdy.enforce-tls-profile"); + altsvcpref1 = prefs.getBoolPref("network.http.altsvc.enabled"); + altsvcpref2 = prefs.getBoolPref("network.http.altsvc.oe", true); + prefs.setBoolPref("network.http.spdy.enabled", true); prefs.setBoolPref("network.http.spdy.enabled.v3", true); prefs.setBoolPref("network.http.spdy.allow-push", true); prefs.setBoolPref("network.http.spdy.enabled.http2draft", true); prefs.setBoolPref("network.http.spdy.enforce-tls-profile", false); + prefs.setBoolPref("network.http.altsvc.enabled", true); + prefs.setBoolPref("network.http.altsvc.oe", true); loadGroup = Cc["@mozilla.org/network/load-group;1"].createInstance(Ci.nsILoadGroup); diff --git a/toolkit/components/telemetry/Histograms.json b/toolkit/components/telemetry/Histograms.json index af74dc30358..29477f05d1f 100644 --- a/toolkit/components/telemetry/Histograms.json +++ b/toolkit/components/telemetry/Histograms.json @@ -1193,6 +1193,16 @@ "kind": "boolean", "description": "Whether a HTTP base page load was over SSL or not." }, + "HTTP_TRANSACTION_USE_ALTSVC": { + "expires_in_version": "never", + "kind": "boolean", + "description": "Whether a HTTP transaction was routed via Alt-Svc or not." + }, + "HTTP_TRANSACTION_USE_ALTSVC_OE": { + "expires_in_version": "never", + "kind": "boolean", + "description": "Whether a HTTP transaction routed via Alt-Svc was scheme=http" + }, "SSL_HANDSHAKE_VERSION": { "expires_in_version": "never", "kind": "enumerated", From 257fe5f2b23c223b249516075fde3fdfa17af838 Mon Sep 17 00:00:00 2001 From: Stephen Pohl Date: Thu, 2 Oct 2014 13:19:34 -0400 Subject: [PATCH 018/146] Bug 1075691: The GreD for XPCOM-using subprocesses on OSX needs to change due to the v2 signature changes. r=bsmedberg --- ipc/glue/ScopedXREEmbed.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ipc/glue/ScopedXREEmbed.cpp b/ipc/glue/ScopedXREEmbed.cpp index 544de73150f..342decf42b3 100644 --- a/ipc/glue/ScopedXREEmbed.cpp +++ b/ipc/glue/ScopedXREEmbed.cpp @@ -90,6 +90,11 @@ ScopedXREEmbed::Start() localFile = do_QueryInterface(parent); NS_ENSURE_TRUE_VOID(localFile); + + rv = localFile->SetNativeLeafName(NS_LITERAL_CSTRING("Resources")); + if (NS_FAILED(rv)) { + return; + } } #endif From 90874a8ac134e209b8ec3e8213e1013bf6e26158 Mon Sep 17 00:00:00 2001 From: Nicolas Silva Date: Thu, 2 Oct 2014 19:31:27 +0200 Subject: [PATCH 019/146] Bug 1074378 - Blocklist driver Intel GMAX4500HD v 8,15,10,1749. r=Bas --- widget/windows/GfxInfo.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/widget/windows/GfxInfo.cpp b/widget/windows/GfxInfo.cpp index 95421de31c8..c0d42238241 100644 --- a/widget/windows/GfxInfo.cpp +++ b/widget/windows/GfxInfo.cpp @@ -975,6 +975,13 @@ GfxInfo::GetGfxDriverInfo() nsIGfxInfo::FEATURE_WEBGL_OPENGL, nsIGfxInfo::FEATURE_DISCOURAGED, DRIVER_LESS_THAN, GfxDriverInfo::allDriverVersions ); + // Bug 1074378 + APPEND_TO_DRIVER_BLOCKLIST( DRIVER_OS_ALL, + (nsAString&) GfxDriverInfo::GetDeviceVendor(VendorIntel), + (GfxDeviceFamily*) GfxDriverInfo::GetDeviceFamily(IntelGMAX4500HD), + nsIGfxInfo::FEATURE_DIRECT3D_11_LAYERS, nsIGfxInfo::FEATURE_BLOCKED_DRIVER_VERSION, + DRIVER_EQUAL, V(8,15,10,1749), "8.15.10.1749"); + /** * Disable acceleration on Intel HD 3000 for graphics drivers <= 8.15.10.2321. * See bug 1018278 and bug 1060736. From 564ea347f1f8f87e656ecd2adf0b1f2703a5b07b Mon Sep 17 00:00:00 2001 From: Ryan VanderMeulen Date: Thu, 2 Oct 2014 13:43:59 -0400 Subject: [PATCH 020/146] Bug 1025040 - Disable test_single_finger_desktop.py on Windows for frequent failures. --- testing/marionette/client/marionette/tests/unit/unit-tests.ini | 1 + 1 file changed, 1 insertion(+) diff --git a/testing/marionette/client/marionette/tests/unit/unit-tests.ini b/testing/marionette/client/marionette/tests/unit/unit-tests.ini index d8d82593c29..597766067a1 100644 --- a/testing/marionette/client/marionette/tests/unit/unit-tests.ini +++ b/testing/marionette/client/marionette/tests/unit/unit-tests.ini @@ -75,6 +75,7 @@ b2g = true browser = false [test_single_finger_desktop.py] b2g = false +skip-if = os == "win" # Bug 1025040 [test_multi_finger.py] disabled = "Bug 1060060" From 743e1927233e89022a43f550d2c812ee0355e08d Mon Sep 17 00:00:00 2001 From: Camilo Viecco Date: Thu, 2 Oct 2014 10:49:56 -0700 Subject: [PATCH 021/146] Bug 1075081 - Enhance pinning test to ensure the neterror page the one found. r=keeler --- .../content/test/general/browser_blockHPKP.js | 24 +++++++------------ 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/browser/base/content/test/general/browser_blockHPKP.js b/browser/base/content/test/general/browser_blockHPKP.js index a24bac12b52..57ff2edcc28 100644 --- a/browser/base/content/test/general/browser_blockHPKP.js +++ b/browser/base/content/test/general/browser_blockHPKP.js @@ -69,23 +69,17 @@ let successfulPinningPageListener = { // The browser should load about:neterror, when this happens, proceed // to load the pinning domain again, this time removing the pinning information let certErrorProgressListener = { - buttonClicked: false, onStateChange: function(aWebProgress, aRequest, aStateFlags, aStatus) { if (aStateFlags & Ci.nsIWebProgressListener.STATE_STOP) { - let self = this; - // Can't directly call button.click() in onStateChange - executeSoon(function() { - let button = content.document.getElementById("errorTryAgain"); - // If about:neterror hasn't fully loaded, the button won't be present. - // It will eventually be there, however. - if (button && !self.buttonClicked) { - gBrowser.removeProgressListener(self); - gBrowser.selectedBrowser.addEventListener("load", - successfulPinningRemovalPageListener, - true); - gBrowser.selectedBrowser.loadURI("https://" + kPinningDomain + kURLPath + "zeromaxagevalid"); - } - }); + let textElement = content.document.getElementById("errorShortDescText"); + let text = textElement.innerHTML; + ok(text.indexOf("mozilla_pkix_error_key_pinning_failure") > 0, + "Got a pinning error page"); + gBrowser.removeProgressListener(this); + gBrowser.selectedBrowser.addEventListener("load", + successfulPinningRemovalPageListener, + true); + gBrowser.selectedBrowser.loadURI("https://" + kPinningDomain + kURLPath + "zeromaxagevalid"); } } }; From 21ebf7ae94f256f144acda335c3afefd393d824c Mon Sep 17 00:00:00 2001 From: Ralph Giles Date: Thu, 2 Oct 2014 11:20:47 -0700 Subject: [PATCH 022/146] Bug 1063356 - Back out partial libvpx update. r=me It's taking a while to get the new version building on all platforms. Removing the partial commits so we can land cleanly. --- media/libvpx/mingw.patch | 30 + media/libvpx/moz.build | 7 - media/libvpx/stdint.patch | 39 +- media/libvpx/unified.patch | 82 + media/libvpx/update.py | 90 +- .../common/arm/neon/bilinearpredict_neon.c | 699 --- .../libvpx/vp8/common/arm/neon/copymem_neon.c | 59 - .../common/arm/neon/dc_only_idct_add_neon.c | 42 - .../vp8/common/arm/neon/dequant_idct_neon.c | 142 - .../vp8/common/arm/neon/dequantizeb_neon.c | 25 - .../common/arm/neon/idct_dequant_0_2x_neon.c | 62 - .../arm/neon/idct_dequant_full_2x_neon.c | 185 - .../libvpx/vp8/common/arm/neon/iwalsh_neon.c | 102 - .../vp8/common/arm/neon/loopfilter_neon.c | 549 --- .../loopfiltersimplehorizontaledge_neon.c | 111 - .../neon/loopfiltersimpleverticaledge_neon.c | 279 -- .../vp8/common/arm/neon/mbloopfilter_neon.c | 625 --- .../vp8/common/arm/neon/reconintra_neon.c | 210 - media/libvpx/vp8/common/arm/neon/sad_neon.c | 184 - .../common/arm/neon/shortidct4x4llm_neon.c | 123 - .../vp8/common/arm/neon/sixtappredict_neon.c | 1754 -------- .../vp8/common/arm/neon/variance_neon.c | 320 -- .../arm/neon/vp8_subpixelvariance_neon.c | 1024 ----- .../x86/loopfilter_block_sse2_x86_64.asm | 815 ---- media/libvpx/vp8/decoder/decodeframe.c | 1397 ------ .../vp8/encoder/arm/neon/denoising_neon.c | 478 -- .../vp8/encoder/arm/neon/shortfdct_neon.c | 269 -- .../vp8/encoder/arm/neon/subtract_neon.c | 154 - .../vp8/encoder/arm/neon/vp8_mse16x16_neon.c | 131 - .../encoder/arm/neon/vp8_shortwalsh4x4_neon.c | 118 - media/libvpx/vp8/encoder/x86/quantize_sse4.c | 128 - media/libvpx/vp8/encoder/x86/quantize_ssse3.c | 114 - .../vp8/encoder/x86/ssim_opt_x86_64.asm | 216 - .../arm/neon/vp9_idct16x16_1_add_neon.asm | 198 - .../arm/neon/vp9_idct16x16_add_neon.asm | 1179 ----- .../arm/neon/vp9_idct32x32_1_add_neon.asm | 144 - .../arm/neon/vp9_idct32x32_add_neon.asm | 1299 ------ .../arm/neon/vp9_idct4x4_1_add_neon.asm | 68 - .../common/arm/neon/vp9_idct4x4_add_neon.asm | 190 - .../arm/neon/vp9_idct8x8_1_add_neon.asm | 88 - .../common/arm/neon/vp9_idct8x8_add_neon.asm | 519 --- .../common/arm/neon/vp9_iht4x4_add_neon.asm | 237 - .../common/arm/neon/vp9_iht8x8_add_neon.asm | 698 --- .../arm/neon/vp9_loopfilter_16_neon.asm | 199 - .../common/arm/neon/vp9_loopfilter_16_neon.c | 53 - .../common/arm/neon/vp9_reconintra_neon.asm | 634 --- media/libvpx/vp9/common/vp9_blockd.c | 149 - media/libvpx/vp9/common/vp9_frame_buffers.c | 86 - media/libvpx/vp9/common/vp9_frame_buffers.h | 53 - media/libvpx/vp9/common/vp9_prob.c | 61 - media/libvpx/vp9/common/vp9_prob.h | 84 - media/libvpx/vp9/common/vp9_thread.c | 184 - media/libvpx/vp9/common/vp9_thread.h | 219 - .../common/x86/vp9_high_intrapred_sse2.asm | 476 -- .../x86/vp9_high_loopfilter_intrin_sse2.c | 1119 ----- .../common/x86/vp9_high_subpixel_8t_sse2.asm | 962 ---- .../x86/vp9_high_subpixel_bilinear_sse2.asm | 494 -- .../vp9/common/x86/vp9_idct_intrin_sse2.h | 175 - .../vp9/common/x86/vp9_idct_intrin_ssse3.c | 762 ---- .../vp9/common/x86/vp9_idct_ssse3_x86_64.asm | 300 -- .../common/x86/vp9_subpixel_8t_intrin_avx2.c | 544 --- .../common/x86/vp9_subpixel_8t_intrin_ssse3.c | 492 -- .../common/x86/vp9_subpixel_bilinear_sse2.asm | 448 -- .../x86/vp9_subpixel_bilinear_ssse3.asm | 422 -- media/libvpx/vp9/decoder/vp9_decodeframe.c | 1522 ------- media/libvpx/vp9/decoder/vp9_decodeframe.h | 38 - media/libvpx/vp9/decoder/vp9_decoder.c | 382 -- media/libvpx/vp9/decoder/vp9_decoder.h | 105 - media/libvpx/vp9/decoder/vp9_dthread.c | 269 -- media/libvpx/vp9/decoder/vp9_dthread.h | 58 - .../libvpx/vp9/decoder/vp9_read_bit_buffer.c | 41 - media/libvpx/vp9/decoder/vp9_reader.c | 106 - media/libvpx/vp9/decoder/vp9_reader.h | 119 - .../vp9/encoder/arm/neon/vp9_dct_neon.c | 223 - .../vp9/encoder/arm/neon/vp9_quantize_neon.c | 119 - .../vp9/encoder/arm/neon/vp9_sad_neon.c | 130 - .../vp9/encoder/arm/neon/vp9_subtract_neon.c | 81 - .../vp9/encoder/arm/neon/vp9_variance_neon.c | 227 - media/libvpx/vp9/encoder/vp9_aq_complexity.c | 151 - media/libvpx/vp9/encoder/vp9_aq_complexity.h | 34 - .../libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c | 324 -- .../libvpx/vp9/encoder/vp9_aq_cyclicrefresh.h | 50 - media/libvpx/vp9/encoder/vp9_aq_variance.c | 149 - media/libvpx/vp9/encoder/vp9_aq_variance.h | 34 - media/libvpx/vp9/encoder/vp9_context_tree.c | 158 - media/libvpx/vp9/encoder/vp9_context_tree.h | 78 - media/libvpx/vp9/encoder/vp9_cost.c | 62 - media/libvpx/vp9/encoder/vp9_cost.h | 55 - media/libvpx/vp9/encoder/vp9_denoiser.c | 491 -- media/libvpx/vp9/encoder/vp9_denoiser.h | 64 - media/libvpx/vp9/encoder/vp9_encoder.c | 3984 ----------------- media/libvpx/vp9/encoder/vp9_encoder.h | 541 --- media/libvpx/vp9/encoder/vp9_extend.c | 131 - media/libvpx/vp9/encoder/vp9_extend.h | 33 - media/libvpx/vp9/encoder/vp9_pickmode.c | 764 ---- media/libvpx/vp9/encoder/vp9_pickmode.h | 32 - media/libvpx/vp9/encoder/vp9_rd.c | 600 --- media/libvpx/vp9/encoder/vp9_rd.h | 169 - media/libvpx/vp9/encoder/vp9_resize.c | 920 ---- media/libvpx/vp9/encoder/vp9_resize.h | 124 - media/libvpx/vp9/encoder/vp9_sad.c | 271 -- media/libvpx/vp9/encoder/vp9_speed_features.c | 438 -- media/libvpx/vp9/encoder/vp9_speed_features.h | 449 -- media/libvpx/vp9/encoder/vp9_ssim.h | 46 - .../libvpx/vp9/encoder/vp9_svc_layercontext.c | 345 -- .../libvpx/vp9/encoder/vp9_svc_layercontext.h | 108 - media/libvpx/vp9/encoder/vp9_variance.c | 641 --- .../libvpx/vp9/encoder/vp9_write_bit_buffer.c | 35 - media/libvpx/vp9/encoder/vp9_writer.c | 34 - media/libvpx/vp9/encoder/vp9_writer.h | 98 - .../vp9/encoder/x86/vp9_dct32x32_avx2.c | 2710 ----------- media/libvpx/vp9/encoder/x86/vp9_dct_avx2.c | 26 - media/libvpx/vp9/encoder/x86/vp9_dct_mmx.asm | 70 - .../vp9/encoder/x86/vp9_dct_ssse3_x86_64.asm | 182 - .../vp9/encoder/x86/vp9_error_intrin_avx2.c | 72 - .../encoder/x86/vp9_quantize_ssse3_x86_64.asm | 402 -- .../vp9/encoder/x86/vp9_sad4d_intrin_avx2.c | 167 - .../vp9/encoder/x86/vp9_ssim_opt_x86_64.asm | 216 - .../vp9_subpel_variance_impl_intrin_avx2.c | 539 --- .../vp9/encoder/x86/vp9_variance_avx2.c | 190 - .../x86/vp9_variance_impl_intrin_avx2.c | 213 - media/libvpx/vpx/internal/vpx_psnr.h | 34 - media/libvpx/vpx/src/vpx_psnr.c | 24 - media/libvpx/vpx/vpx_frame_buffer.h | 80 - 124 files changed, 149 insertions(+), 42708 deletions(-) create mode 100644 media/libvpx/mingw.patch create mode 100644 media/libvpx/unified.patch delete mode 100644 media/libvpx/vp8/common/arm/neon/bilinearpredict_neon.c delete mode 100644 media/libvpx/vp8/common/arm/neon/copymem_neon.c delete mode 100644 media/libvpx/vp8/common/arm/neon/dc_only_idct_add_neon.c delete mode 100644 media/libvpx/vp8/common/arm/neon/dequant_idct_neon.c delete mode 100644 media/libvpx/vp8/common/arm/neon/dequantizeb_neon.c delete mode 100644 media/libvpx/vp8/common/arm/neon/idct_dequant_0_2x_neon.c delete mode 100644 media/libvpx/vp8/common/arm/neon/idct_dequant_full_2x_neon.c delete mode 100644 media/libvpx/vp8/common/arm/neon/iwalsh_neon.c delete mode 100644 media/libvpx/vp8/common/arm/neon/loopfilter_neon.c delete mode 100644 media/libvpx/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.c delete mode 100644 media/libvpx/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.c delete mode 100644 media/libvpx/vp8/common/arm/neon/mbloopfilter_neon.c delete mode 100644 media/libvpx/vp8/common/arm/neon/reconintra_neon.c delete mode 100644 media/libvpx/vp8/common/arm/neon/sad_neon.c delete mode 100644 media/libvpx/vp8/common/arm/neon/shortidct4x4llm_neon.c delete mode 100644 media/libvpx/vp8/common/arm/neon/sixtappredict_neon.c delete mode 100644 media/libvpx/vp8/common/arm/neon/variance_neon.c delete mode 100644 media/libvpx/vp8/common/arm/neon/vp8_subpixelvariance_neon.c delete mode 100644 media/libvpx/vp8/common/x86/loopfilter_block_sse2_x86_64.asm delete mode 100644 media/libvpx/vp8/decoder/decodeframe.c delete mode 100644 media/libvpx/vp8/encoder/arm/neon/denoising_neon.c delete mode 100644 media/libvpx/vp8/encoder/arm/neon/shortfdct_neon.c delete mode 100644 media/libvpx/vp8/encoder/arm/neon/subtract_neon.c delete mode 100644 media/libvpx/vp8/encoder/arm/neon/vp8_mse16x16_neon.c delete mode 100644 media/libvpx/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.c delete mode 100644 media/libvpx/vp8/encoder/x86/quantize_sse4.c delete mode 100644 media/libvpx/vp8/encoder/x86/quantize_ssse3.c delete mode 100644 media/libvpx/vp8/encoder/x86/ssim_opt_x86_64.asm delete mode 100644 media/libvpx/vp9/common/arm/neon/vp9_idct16x16_1_add_neon.asm delete mode 100644 media/libvpx/vp9/common/arm/neon/vp9_idct16x16_add_neon.asm delete mode 100644 media/libvpx/vp9/common/arm/neon/vp9_idct32x32_1_add_neon.asm delete mode 100644 media/libvpx/vp9/common/arm/neon/vp9_idct32x32_add_neon.asm delete mode 100644 media/libvpx/vp9/common/arm/neon/vp9_idct4x4_1_add_neon.asm delete mode 100644 media/libvpx/vp9/common/arm/neon/vp9_idct4x4_add_neon.asm delete mode 100644 media/libvpx/vp9/common/arm/neon/vp9_idct8x8_1_add_neon.asm delete mode 100644 media/libvpx/vp9/common/arm/neon/vp9_idct8x8_add_neon.asm delete mode 100644 media/libvpx/vp9/common/arm/neon/vp9_iht4x4_add_neon.asm delete mode 100644 media/libvpx/vp9/common/arm/neon/vp9_iht8x8_add_neon.asm delete mode 100644 media/libvpx/vp9/common/arm/neon/vp9_loopfilter_16_neon.asm delete mode 100644 media/libvpx/vp9/common/arm/neon/vp9_loopfilter_16_neon.c delete mode 100644 media/libvpx/vp9/common/arm/neon/vp9_reconintra_neon.asm delete mode 100644 media/libvpx/vp9/common/vp9_blockd.c delete mode 100644 media/libvpx/vp9/common/vp9_frame_buffers.c delete mode 100644 media/libvpx/vp9/common/vp9_frame_buffers.h delete mode 100644 media/libvpx/vp9/common/vp9_prob.c delete mode 100644 media/libvpx/vp9/common/vp9_prob.h delete mode 100644 media/libvpx/vp9/common/vp9_thread.c delete mode 100644 media/libvpx/vp9/common/vp9_thread.h delete mode 100644 media/libvpx/vp9/common/x86/vp9_high_intrapred_sse2.asm delete mode 100644 media/libvpx/vp9/common/x86/vp9_high_loopfilter_intrin_sse2.c delete mode 100644 media/libvpx/vp9/common/x86/vp9_high_subpixel_8t_sse2.asm delete mode 100644 media/libvpx/vp9/common/x86/vp9_high_subpixel_bilinear_sse2.asm delete mode 100644 media/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.h delete mode 100644 media/libvpx/vp9/common/x86/vp9_idct_intrin_ssse3.c delete mode 100644 media/libvpx/vp9/common/x86/vp9_idct_ssse3_x86_64.asm delete mode 100644 media/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c delete mode 100644 media/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_ssse3.c delete mode 100644 media/libvpx/vp9/common/x86/vp9_subpixel_bilinear_sse2.asm delete mode 100644 media/libvpx/vp9/common/x86/vp9_subpixel_bilinear_ssse3.asm delete mode 100644 media/libvpx/vp9/decoder/vp9_decodeframe.c delete mode 100644 media/libvpx/vp9/decoder/vp9_decodeframe.h delete mode 100644 media/libvpx/vp9/decoder/vp9_decoder.c delete mode 100644 media/libvpx/vp9/decoder/vp9_decoder.h delete mode 100644 media/libvpx/vp9/decoder/vp9_dthread.c delete mode 100644 media/libvpx/vp9/decoder/vp9_dthread.h delete mode 100644 media/libvpx/vp9/decoder/vp9_read_bit_buffer.c delete mode 100644 media/libvpx/vp9/decoder/vp9_reader.c delete mode 100644 media/libvpx/vp9/decoder/vp9_reader.h delete mode 100644 media/libvpx/vp9/encoder/arm/neon/vp9_dct_neon.c delete mode 100644 media/libvpx/vp9/encoder/arm/neon/vp9_quantize_neon.c delete mode 100644 media/libvpx/vp9/encoder/arm/neon/vp9_sad_neon.c delete mode 100644 media/libvpx/vp9/encoder/arm/neon/vp9_subtract_neon.c delete mode 100644 media/libvpx/vp9/encoder/arm/neon/vp9_variance_neon.c delete mode 100644 media/libvpx/vp9/encoder/vp9_aq_complexity.c delete mode 100644 media/libvpx/vp9/encoder/vp9_aq_complexity.h delete mode 100644 media/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c delete mode 100644 media/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.h delete mode 100644 media/libvpx/vp9/encoder/vp9_aq_variance.c delete mode 100644 media/libvpx/vp9/encoder/vp9_aq_variance.h delete mode 100644 media/libvpx/vp9/encoder/vp9_context_tree.c delete mode 100644 media/libvpx/vp9/encoder/vp9_context_tree.h delete mode 100644 media/libvpx/vp9/encoder/vp9_cost.c delete mode 100644 media/libvpx/vp9/encoder/vp9_cost.h delete mode 100644 media/libvpx/vp9/encoder/vp9_denoiser.c delete mode 100644 media/libvpx/vp9/encoder/vp9_denoiser.h delete mode 100644 media/libvpx/vp9/encoder/vp9_encoder.c delete mode 100644 media/libvpx/vp9/encoder/vp9_encoder.h delete mode 100644 media/libvpx/vp9/encoder/vp9_extend.c delete mode 100644 media/libvpx/vp9/encoder/vp9_extend.h delete mode 100644 media/libvpx/vp9/encoder/vp9_pickmode.c delete mode 100644 media/libvpx/vp9/encoder/vp9_pickmode.h delete mode 100644 media/libvpx/vp9/encoder/vp9_rd.c delete mode 100644 media/libvpx/vp9/encoder/vp9_rd.h delete mode 100644 media/libvpx/vp9/encoder/vp9_resize.c delete mode 100644 media/libvpx/vp9/encoder/vp9_resize.h delete mode 100644 media/libvpx/vp9/encoder/vp9_sad.c delete mode 100644 media/libvpx/vp9/encoder/vp9_speed_features.c delete mode 100644 media/libvpx/vp9/encoder/vp9_speed_features.h delete mode 100644 media/libvpx/vp9/encoder/vp9_ssim.h delete mode 100644 media/libvpx/vp9/encoder/vp9_svc_layercontext.c delete mode 100644 media/libvpx/vp9/encoder/vp9_svc_layercontext.h delete mode 100644 media/libvpx/vp9/encoder/vp9_variance.c delete mode 100644 media/libvpx/vp9/encoder/vp9_write_bit_buffer.c delete mode 100644 media/libvpx/vp9/encoder/vp9_writer.c delete mode 100644 media/libvpx/vp9/encoder/vp9_writer.h delete mode 100644 media/libvpx/vp9/encoder/x86/vp9_dct32x32_avx2.c delete mode 100644 media/libvpx/vp9/encoder/x86/vp9_dct_avx2.c delete mode 100644 media/libvpx/vp9/encoder/x86/vp9_dct_mmx.asm delete mode 100644 media/libvpx/vp9/encoder/x86/vp9_dct_ssse3_x86_64.asm delete mode 100644 media/libvpx/vp9/encoder/x86/vp9_error_intrin_avx2.c delete mode 100644 media/libvpx/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm delete mode 100644 media/libvpx/vp9/encoder/x86/vp9_sad4d_intrin_avx2.c delete mode 100644 media/libvpx/vp9/encoder/x86/vp9_ssim_opt_x86_64.asm delete mode 100644 media/libvpx/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c delete mode 100644 media/libvpx/vp9/encoder/x86/vp9_variance_avx2.c delete mode 100644 media/libvpx/vp9/encoder/x86/vp9_variance_impl_intrin_avx2.c delete mode 100644 media/libvpx/vpx/internal/vpx_psnr.h delete mode 100644 media/libvpx/vpx/src/vpx_psnr.c delete mode 100644 media/libvpx/vpx/vpx_frame_buffer.h diff --git a/media/libvpx/mingw.patch b/media/libvpx/mingw.patch new file mode 100644 index 00000000000..e0f407e7873 --- /dev/null +++ b/media/libvpx/mingw.patch @@ -0,0 +1,30 @@ +diff --git a/media/libvpx/vpx/src/svc_encodeframe.c b/media/libvpx/vpx/src/svc_encodeframe.c +index 57d21dc..2514ad3 100644 +--- a/media/libvpx/vpx/src/svc_encodeframe.c ++++ b/media/libvpx/vpx/src/svc_encodeframe.c +@@ -18,21 +18,23 @@ + #include + #include + #define VPX_DISABLE_CTRL_TYPECHECKS 1 + #define VPX_CODEC_DISABLE_COMPAT 1 + #include "vpx/svc_context.h" + #include "vpx/vp8cx.h" + #include "vpx/vpx_encoder.h" + +-#if defined(__MINGW32__) && !defined(MINGW_HAS_SECURE_API) ++#ifdef __MINGW32__ + #define strtok_r strtok_s ++#ifndef MINGW_HAS_SECURE_API + // proto from /usr/x86_64-w64-mingw32/include/sec_api/string_s.h + _CRTIMP char *__cdecl strtok_s(char *str, const char *delim, char **context); +-#endif ++#endif /* MINGW_HAS_SECURE_API */ ++#endif /* __MINGW32__ */ + + #ifdef _MSC_VER + #define strdup _strdup + #define strtok_r strtok_s + #endif + + #define SVC_REFERENCE_FRAMES 8 + #define SUPERFRAME_SLOTS (8) diff --git a/media/libvpx/moz.build b/media/libvpx/moz.build index 97797e66273..03aa3edd5aa 100644 --- a/media/libvpx/moz.build +++ b/media/libvpx/moz.build @@ -45,13 +45,6 @@ if CONFIG['VPX_X86_ASM']: if CONFIG['OS_TARGET'] == 'Darwin': SOURCES += files['AVX2'] - # Expected support is hard-coded in the various vpx_config files but - # we need to propagate the config checks here to get the right flags. - if CONFIG['HAVE_TOOLCHAIN_SUPPORT_MSSSE3']: - CFLAGS += ['-mssse3'] - if CONFIG['HAVE_TOOLCHAIN_SUPPORT_MSSE4_1']: - CFLAGS += ['-msse4.1'] - #postproc is only enabled on x86 with asm SOURCES += files['VP8_POSTPROC'] diff --git a/media/libvpx/stdint.patch b/media/libvpx/stdint.patch index 61e2c78c08c..70fa7481f6b 100644 --- a/media/libvpx/stdint.patch +++ b/media/libvpx/stdint.patch @@ -1,20 +1,41 @@ ---- vpx/vpx_integer.h- 2014-09-17 15:49:58.000000000 -0700 -+++ vpx/vpx_integer.h 2014-09-17 15:52:59.000000000 -0700 -@@ -15,6 +15,8 @@ +diff --git a/media/libvpx/vpx/vpx_integer.h b/media/libvpx/vpx/vpx_integer.h +--- a/media/libvpx/vpx/vpx_integer.h ++++ b/media/libvpx/vpx/vpx_integer.h +@@ -10,16 +10,18 @@ + + + #ifndef VPX_INTEGER_H + #define VPX_INTEGER_H + /* get ptrdiff_t, size_t, wchar_t, NULL */ #include +#if !defined(VPX_DONT_DEFINE_STDINT_TYPES) + - #if defined(_MSC_VER) - #define VPX_FORCE_INLINE __forceinline - #define VPX_INLINE __inline -@@ -56,6 +58,8 @@ + #if (defined(_MSC_VER) && (_MSC_VER < 1600)) || defined(VPX_EMULATE_INTTYPES) + typedef signed char int8_t; + typedef signed short int16_t; + typedef signed int int32_t; + + typedef unsigned char uint8_t; + typedef unsigned short uint16_t; + typedef unsigned int uint32_t; +@@ -47,16 +49,18 @@ typedef unsigned int uintptr_t; + + #if defined(__cplusplus) && !defined(__STDC_FORMAT_MACROS) + #define __STDC_FORMAT_MACROS + #endif + #include #endif -+#endif // VPX_DONT_DEFINE_STDINT_TYPES ++#endif + /* VS2010 defines stdint.h, but not inttypes.h */ - #if defined(_MSC_VER) && _MSC_VER < 1800 + #if defined(_MSC_VER) #define PRId64 "I64d" + #else + #include + #endif + + #endif diff --git a/media/libvpx/unified.patch b/media/libvpx/unified.patch new file mode 100644 index 00000000000..f4eddf66f50 --- /dev/null +++ b/media/libvpx/unified.patch @@ -0,0 +1,82 @@ +diff --git a/media/libvpx/vp8/common/setupintrarecon.h b/media/libvpx/vp8/common/setupintrarecon.h +index e515c3a..9317a6d 100644 +--- a/media/libvpx/vp8/common/setupintrarecon.h ++++ b/media/libvpx/vp8/common/setupintrarecon.h +@@ -3,16 +3,18 @@ + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + ++#ifndef SETUPINTRARECON_H ++#define SETUPINTRARECON_H + + #include "vpx_scale/yv12config.h" + extern void vp8_setup_intra_recon(YV12_BUFFER_CONFIG *ybf); + extern void vp8_setup_intra_recon_top_line(YV12_BUFFER_CONFIG *ybf); + + static + void setup_intra_recon_left(unsigned char *y_buffer, + unsigned char *u_buffer, +@@ -26,8 +28,10 @@ void setup_intra_recon_left(unsigned char *y_buffer, + y_buffer[y_stride *i] = (unsigned char) 129; + + for (i = 0; i < 8; i++) + u_buffer[uv_stride *i] = (unsigned char) 129; + + for (i = 0; i < 8; i++) + v_buffer[uv_stride *i] = (unsigned char) 129; + } ++ ++#endif +diff --git a/media/libvpx/vpx_ports/vpx_once.h b/media/libvpx/vpx_ports/vpx_once.h +index 16a735c..0387a71 100644 +--- a/media/libvpx/vpx_ports/vpx_once.h ++++ b/media/libvpx/vpx_ports/vpx_once.h +@@ -2,16 +2,19 @@ + * Copyright (c) 2011 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ ++#ifndef VPX_ONCE_H ++#define VPX_ONCE_H ++ + #include "vpx_config.h" + + #if CONFIG_MULTITHREAD && defined(_WIN32) + #include + #include + static void once(void (*func)(void)) + { + static CRITICAL_SECTION *lock; +@@ -90,8 +93,10 @@ static void once(void (*func)(void)) + + if(!done) + { + func(); + done = 1; + } + } + #endif ++ ++#endif +diff --git a/media/libvpx/vp8/common/loopfilter.c b/media/libvpx/vp8/common/loopfilter.c +index 19857a7..3c0fa63 100644 +--- a/media/libvpx/vp8/common/loopfilter.c ++++ b/media/libvpx/vp8/common/loopfilter.c +@@ -15,8 +15,6 @@ + #include "onyxc_int.h" + #include "vpx_mem/vpx_mem.h" + +-typedef unsigned char uc; +- + static void lf_init_lut(loop_filter_info_n *lfi) + { + int filt_lvl; diff --git a/media/libvpx/update.py b/media/libvpx/update.py index 390b1158aef..8e0149edef7 100755 --- a/media/libvpx/update.py +++ b/media/libvpx/update.py @@ -120,20 +120,16 @@ MODULES = { 'VP8_COMMON_SRCS-$(HAVE_MEDIA)', 'VP8_COMMON_SRCS-$(HAVE_NEON)', 'VP9_COMMON_SRCS-$(HAVE_NEON)', - 'VP9_COMMON_SRCS-$(HAVE_NEON_ASM)', 'VP8_CX_SRCS-$(ARCH_ARM)', 'VP8_CX_SRCS-$(HAVE_EDSP)', 'VP8_CX_SRCS-$(HAVE_MEDIA)', 'VP8_CX_SRCS-$(HAVE_NEON)', - 'VP8_CX_SRCS-$(HAVE_NEON_ASM)', - 'VP9_CX_SRCS-$(HAVE_NEON)', ], 'ERROR_CONCEALMENT': [ 'VP8_DX_SRCS-$(CONFIG_ERROR_CONCEALMENT)', ], 'AVX2': [ 'VP9_COMMON_SRCS-$(HAVE_AVX2)', - 'VP9_CX_SRCS-$(HAVE_AVX2)', ], 'VP8_POSTPROC': [ 'VP8_COMMON_SRCS-$(CONFIG_POSTPROC)', @@ -144,14 +140,12 @@ MODULES = { } DISABLED_MODULES = [ - 'API_SRCS-$(CONFIG_SPATIAL_SVC)', 'MEM_SRCS-$(CONFIG_MEM_MANAGER)', 'MEM_SRCS-$(CONFIG_MEM_TRACKER)', 'VP8_COMMON_SRCS-$(CONFIG_POSTPROC_VISUALIZER)', 'VP9_COMMON_SRCS-$(CONFIG_POSTPROC_VISUALIZER)', 'VP8_CX_SRCS-$(CONFIG_INTERNAL_STATS)', 'VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS)', - 'VP9_CX_SRCS-$(CONFIG_VP9_TEMPORAL_DENOISING)', # mips files are also ignored via ignored_folders 'SCALE_SRCS-$(HAVE_DSPR2)', @@ -211,95 +205,23 @@ files = { 'vpx/vpx_codec.h', 'vpx/vpx_decoder.h', 'vpx/vpx_encoder.h', - 'vpx/vpx_frame_buffer.h', 'vpx/vpx_image.h', 'vpx/vpx_integer.h', ], 'X86-64_ASM': [ 'third_party/x86inc/x86inc.asm', - 'vp8/common/x86/loopfilter_block_sse2_x86_64.asm', - 'vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm', + 'vp8/common/x86/loopfilter_block_sse2.asm', + 'vp9/encoder/x86/vp9_quantize_ssse3.asm', ], 'SOURCES': [ 'vp8/common/rtcd.c', 'vp8/common/sad_c.c', - 'vp8/encoder/bitstream.c', - 'vp8/encoder/onyx_if.c', 'vp8/vp8_dx_iface.c', - 'vp9/common/vp9_alloccommon.c', - 'vp9/common/vp9_blockd.c', - 'vp9/common/vp9_common_data.c', - 'vp9/common/vp9_convolve.c', - 'vp9/common/vp9_debugmodes.c', - 'vp9/common/vp9_entropy.c', - 'vp9/common/vp9_entropymode.c', 'vp9/common/vp9_entropymv.c', - 'vp9/common/vp9_filter.c', - 'vp9/common/vp9_frame_buffers.c', - 'vp9/common/vp9_idct.c', - 'vp9/common/vp9_loopfilter.c', - 'vp9/common/vp9_loopfilter_filters.c', - 'vp9/common/vp9_mvref_common.c', - 'vp9/common/vp9_pred_common.c', - 'vp9/common/vp9_prob.c', - 'vp9/common/vp9_quant_common.c', - 'vp9/common/vp9_reconinter.c', - 'vp9/common/vp9_reconintra.c', 'vp9/common/vp9_rtcd.c', - 'vp9/common/vp9_scale.c', - 'vp9/common/vp9_scan.c', - 'vp9/common/vp9_seg_common.c', - 'vp9/common/vp9_thread.c', - 'vp9/common/vp9_tile_common.c', - 'vp9/decoder/vp9_decodeframe.c', - 'vp9/decoder/vp9_decodemv.c', - 'vp9/decoder/vp9_decoder.c', - 'vp9/decoder/vp9_detokenize.c', - 'vp9/decoder/vp9_dsubexp.c', - 'vp9/decoder/vp9_dthread.c', - 'vp9/decoder/vp9_reader.c', 'vp9/encoder/vp9_bitstream.c', - 'vp9/encoder/vp9_aq_complexity.c', - 'vp9/encoder/vp9_aq_cyclicrefresh.c', - 'vp9/encoder/vp9_aq_variance.c', - 'vp9/encoder/vp9_context_tree.c', - 'vp9/encoder/vp9_cost.c', - 'vp9/encoder/vp9_dct.c', - 'vp9/encoder/vp9_encodeframe.c', - 'vp9/encoder/vp9_encodemb.c', - 'vp9/encoder/vp9_encodemv.c', - 'vp9/encoder/vp9_encoder.c', - 'vp9/encoder/vp9_extend.c', - 'vp9/encoder/vp9_firstpass.c', - 'vp9/encoder/vp9_lookahead.c', - 'vp9/encoder/vp9_mbgraph.c', - 'vp9/encoder/vp9_mcomp.c', - 'vp9/encoder/vp9_picklpf.c', - 'vp9/encoder/vp9_pickmode.c', - 'vp9/encoder/vp9_quantize.c', - 'vp9/encoder/vp9_ratectrl.c', - 'vp9/encoder/vp9_rd.c', - 'vp9/encoder/vp9_rdopt.c', - 'vp9/encoder/vp9_resize.c', - 'vp9/encoder/vp9_sad.c', - 'vp9/encoder/vp9_segmentation.c', - 'vp9/encoder/vp9_speed_features.c', - 'vp9/encoder/vp9_subexp.c', - 'vp9/encoder/vp9_svc_layercontext.c', - 'vp9/encoder/vp9_temporal_filter.c', - 'vp9/encoder/vp9_tokenize.c', - 'vp9/encoder/vp9_treewriter.c', - 'vp9/encoder/vp9_variance.c', - 'vp9/encoder/vp9_write_bit_buffer.c', - 'vp9/encoder/vp9_writer.c', - 'vp9/vp9_cx_iface.c', - 'vp9/vp9_dx_iface.c', 'vpx/src/svc_encodeframe.c', - 'vpx/src/vpx_encoder.c', 'vpx_mem/vpx_mem.c', - 'vpx_scale/vpx_scale_rtcd.c', - 'vpx_scale/generic/yv12config.c', - 'vpx_scale/generic/yv12extend.c', ] } @@ -308,8 +230,8 @@ manual = [ 'vp8/encoder/boolhuff.c', # 64bit only - 'vp8/common/x86/loopfilter_block_sse2_x86_64.asm', - 'vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm', + 'vp8/common/x86/loopfilter_block_sse2.asm', + 'vp9/encoder/x86/vp9_quantize_ssse3.asm', # offsets are special cased in Makefile.in 'vp8/encoder/vp8_asm_enc_offsets.c', @@ -528,7 +450,9 @@ def update_and_remove_files(prefix, libvpx_files, files): def apply_patches(): # Patch to permit vpx users to specify their own types. - os.system("patch -p0 < stdint.patch") + os.system("patch -p3 < stdint.patch") + os.system("patch -p3 < unified.patch") + os.system("patch -p3 < mingw.patch") def update_readme(commit): with open('README_MOZILLA') as f: diff --git a/media/libvpx/vp8/common/arm/neon/bilinearpredict_neon.c b/media/libvpx/vp8/common/arm/neon/bilinearpredict_neon.c deleted file mode 100644 index 9824a319368..00000000000 --- a/media/libvpx/vp8/common/arm/neon/bilinearpredict_neon.c +++ /dev/null @@ -1,699 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include - -static const uint8_t bifilter4_coeff[8][2] = { - {128, 0}, - {112, 16}, - { 96, 32}, - { 80, 48}, - { 64, 64}, - { 48, 80}, - { 32, 96}, - { 16, 112} -}; - -void vp8_bilinear_predict4x4_neon( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch) { - uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8; - uint8x8_t d26u8, d27u8, d28u8, d29u8, d30u8; - uint8x16_t q1u8, q2u8; - uint16x8_t q1u16, q2u16; - uint16x8_t q7u16, q8u16, q9u16; - uint64x2_t q4u64, q5u64; - uint64x1_t d12u64; - uint32x2x2_t d0u32x2, d1u32x2, d2u32x2, d3u32x2; - - if (xoffset == 0) { // skip_1stpass_filter - uint32x2_t d28u32 = vdup_n_u32(0); - uint32x2_t d29u32 = vdup_n_u32(0); - uint32x2_t d30u32 = vdup_n_u32(0); - - d28u32 = vld1_lane_u32((const uint32_t *)src_ptr, d28u32, 0); - src_ptr += src_pixels_per_line; - d28u32 = vld1_lane_u32((const uint32_t *)src_ptr, d28u32, 1); - src_ptr += src_pixels_per_line; - d29u32 = vld1_lane_u32((const uint32_t *)src_ptr, d29u32, 0); - src_ptr += src_pixels_per_line; - d29u32 = vld1_lane_u32((const uint32_t *)src_ptr, d29u32, 1); - src_ptr += src_pixels_per_line; - d30u32 = vld1_lane_u32((const uint32_t *)src_ptr, d30u32, 0); - d28u8 = vreinterpret_u8_u32(d28u32); - d29u8 = vreinterpret_u8_u32(d29u32); - d30u8 = vreinterpret_u8_u32(d30u32); - } else { - d2u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; - d3u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; - d4u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; - d5u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; - d6u8 = vld1_u8(src_ptr); - - q1u8 = vcombine_u8(d2u8, d3u8); - q2u8 = vcombine_u8(d4u8, d5u8); - - d0u8 = vdup_n_u8(bifilter4_coeff[xoffset][0]); - d1u8 = vdup_n_u8(bifilter4_coeff[xoffset][1]); - - q4u64 = vshrq_n_u64(vreinterpretq_u64_u8(q1u8), 8); - q5u64 = vshrq_n_u64(vreinterpretq_u64_u8(q2u8), 8); - d12u64 = vshr_n_u64(vreinterpret_u64_u8(d6u8), 8); - - d0u32x2 = vzip_u32(vreinterpret_u32_u8(vget_low_u8(q1u8)), - vreinterpret_u32_u8(vget_high_u8(q1u8))); - d1u32x2 = vzip_u32(vreinterpret_u32_u8(vget_low_u8(q2u8)), - vreinterpret_u32_u8(vget_high_u8(q2u8))); - d2u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q4u64)), - vreinterpret_u32_u64(vget_high_u64(q4u64))); - d3u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q5u64)), - vreinterpret_u32_u64(vget_high_u64(q5u64))); - - q7u16 = vmull_u8(vreinterpret_u8_u32(d0u32x2.val[0]), d0u8); - q8u16 = vmull_u8(vreinterpret_u8_u32(d1u32x2.val[0]), d0u8); - q9u16 = vmull_u8(d6u8, d0u8); - - q7u16 = vmlal_u8(q7u16, vreinterpret_u8_u32(d2u32x2.val[0]), d1u8); - q8u16 = vmlal_u8(q8u16, vreinterpret_u8_u32(d3u32x2.val[0]), d1u8); - q9u16 = vmlal_u8(q9u16, vreinterpret_u8_u64(d12u64), d1u8); - - d28u8 = vqrshrn_n_u16(q7u16, 7); - d29u8 = vqrshrn_n_u16(q8u16, 7); - d30u8 = vqrshrn_n_u16(q9u16, 7); - } - - // secondpass_filter - if (yoffset == 0) { // skip_2ndpass_filter - vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d28u8), 0); - dst_ptr += dst_pitch; - vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d28u8), 1); - dst_ptr += dst_pitch; - vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d29u8), 0); - dst_ptr += dst_pitch; - vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d29u8), 1); - } else { - d0u8 = vdup_n_u8(bifilter4_coeff[yoffset][0]); - d1u8 = vdup_n_u8(bifilter4_coeff[yoffset][1]); - - q1u16 = vmull_u8(d28u8, d0u8); - q2u16 = vmull_u8(d29u8, d0u8); - - d26u8 = vext_u8(d28u8, d29u8, 4); - d27u8 = vext_u8(d29u8, d30u8, 4); - - q1u16 = vmlal_u8(q1u16, d26u8, d1u8); - q2u16 = vmlal_u8(q2u16, d27u8, d1u8); - - d2u8 = vqrshrn_n_u16(q1u16, 7); - d3u8 = vqrshrn_n_u16(q2u16, 7); - - vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d2u8), 0); - dst_ptr += dst_pitch; - vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d2u8), 1); - dst_ptr += dst_pitch; - vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d3u8), 0); - dst_ptr += dst_pitch; - vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d3u8), 1); - } - return; -} - -void vp8_bilinear_predict8x4_neon( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch) { - uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8; - uint8x8_t d7u8, d9u8, d11u8, d22u8, d23u8, d24u8, d25u8, d26u8; - uint8x16_t q1u8, q2u8, q3u8, q4u8, q5u8; - uint16x8_t q1u16, q2u16, q3u16, q4u16; - uint16x8_t q6u16, q7u16, q8u16, q9u16, q10u16; - - if (xoffset == 0) { // skip_1stpass_filter - d22u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; - d23u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; - d24u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; - d25u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; - d26u8 = vld1_u8(src_ptr); - } else { - q1u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; - q2u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; - q3u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; - q4u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; - q5u8 = vld1q_u8(src_ptr); - - d0u8 = vdup_n_u8(bifilter4_coeff[xoffset][0]); - d1u8 = vdup_n_u8(bifilter4_coeff[xoffset][1]); - - q6u16 = vmull_u8(vget_low_u8(q1u8), d0u8); - q7u16 = vmull_u8(vget_low_u8(q2u8), d0u8); - q8u16 = vmull_u8(vget_low_u8(q3u8), d0u8); - q9u16 = vmull_u8(vget_low_u8(q4u8), d0u8); - q10u16 = vmull_u8(vget_low_u8(q5u8), d0u8); - - d3u8 = vext_u8(vget_low_u8(q1u8), vget_high_u8(q1u8), 1); - d5u8 = vext_u8(vget_low_u8(q2u8), vget_high_u8(q2u8), 1); - d7u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1); - d9u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1); - d11u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 1); - - q6u16 = vmlal_u8(q6u16, d3u8, d1u8); - q7u16 = vmlal_u8(q7u16, d5u8, d1u8); - q8u16 = vmlal_u8(q8u16, d7u8, d1u8); - q9u16 = vmlal_u8(q9u16, d9u8, d1u8); - q10u16 = vmlal_u8(q10u16, d11u8, d1u8); - - d22u8 = vqrshrn_n_u16(q6u16, 7); - d23u8 = vqrshrn_n_u16(q7u16, 7); - d24u8 = vqrshrn_n_u16(q8u16, 7); - d25u8 = vqrshrn_n_u16(q9u16, 7); - d26u8 = vqrshrn_n_u16(q10u16, 7); - } - - // secondpass_filter - if (yoffset == 0) { // skip_2ndpass_filter - vst1_u8((uint8_t *)dst_ptr, d22u8); dst_ptr += dst_pitch; - vst1_u8((uint8_t *)dst_ptr, d23u8); dst_ptr += dst_pitch; - vst1_u8((uint8_t *)dst_ptr, d24u8); dst_ptr += dst_pitch; - vst1_u8((uint8_t *)dst_ptr, d25u8); - } else { - d0u8 = vdup_n_u8(bifilter4_coeff[yoffset][0]); - d1u8 = vdup_n_u8(bifilter4_coeff[yoffset][1]); - - q1u16 = vmull_u8(d22u8, d0u8); - q2u16 = vmull_u8(d23u8, d0u8); - q3u16 = vmull_u8(d24u8, d0u8); - q4u16 = vmull_u8(d25u8, d0u8); - - q1u16 = vmlal_u8(q1u16, d23u8, d1u8); - q2u16 = vmlal_u8(q2u16, d24u8, d1u8); - q3u16 = vmlal_u8(q3u16, d25u8, d1u8); - q4u16 = vmlal_u8(q4u16, d26u8, d1u8); - - d2u8 = vqrshrn_n_u16(q1u16, 7); - d3u8 = vqrshrn_n_u16(q2u16, 7); - d4u8 = vqrshrn_n_u16(q3u16, 7); - d5u8 = vqrshrn_n_u16(q4u16, 7); - - vst1_u8((uint8_t *)dst_ptr, d2u8); dst_ptr += dst_pitch; - vst1_u8((uint8_t *)dst_ptr, d3u8); dst_ptr += dst_pitch; - vst1_u8((uint8_t *)dst_ptr, d4u8); dst_ptr += dst_pitch; - vst1_u8((uint8_t *)dst_ptr, d5u8); - } - return; -} - -void vp8_bilinear_predict8x8_neon( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch) { - uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8, d8u8, d9u8, d11u8; - uint8x8_t d22u8, d23u8, d24u8, d25u8, d26u8, d27u8, d28u8, d29u8, d30u8; - uint8x16_t q1u8, q2u8, q3u8, q4u8, q5u8; - uint16x8_t q1u16, q2u16, q3u16, q4u16, q5u16; - uint16x8_t q6u16, q7u16, q8u16, q9u16, q10u16; - - if (xoffset == 0) { // skip_1stpass_filter - d22u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; - d23u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; - d24u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; - d25u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; - d26u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; - d27u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; - d28u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; - d29u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; - d30u8 = vld1_u8(src_ptr); - } else { - q1u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; - q2u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; - q3u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; - q4u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; - - d0u8 = vdup_n_u8(bifilter4_coeff[xoffset][0]); - d1u8 = vdup_n_u8(bifilter4_coeff[xoffset][1]); - - q6u16 = vmull_u8(vget_low_u8(q1u8), d0u8); - q7u16 = vmull_u8(vget_low_u8(q2u8), d0u8); - q8u16 = vmull_u8(vget_low_u8(q3u8), d0u8); - q9u16 = vmull_u8(vget_low_u8(q4u8), d0u8); - - d3u8 = vext_u8(vget_low_u8(q1u8), vget_high_u8(q1u8), 1); - d5u8 = vext_u8(vget_low_u8(q2u8), vget_high_u8(q2u8), 1); - d7u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1); - d9u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1); - - q6u16 = vmlal_u8(q6u16, d3u8, d1u8); - q7u16 = vmlal_u8(q7u16, d5u8, d1u8); - q8u16 = vmlal_u8(q8u16, d7u8, d1u8); - q9u16 = vmlal_u8(q9u16, d9u8, d1u8); - - d22u8 = vqrshrn_n_u16(q6u16, 7); - d23u8 = vqrshrn_n_u16(q7u16, 7); - d24u8 = vqrshrn_n_u16(q8u16, 7); - d25u8 = vqrshrn_n_u16(q9u16, 7); - - // first_pass filtering on the rest 5-line data - q1u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; - q2u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; - q3u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; - q4u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; - q5u8 = vld1q_u8(src_ptr); - - q6u16 = vmull_u8(vget_low_u8(q1u8), d0u8); - q7u16 = vmull_u8(vget_low_u8(q2u8), d0u8); - q8u16 = vmull_u8(vget_low_u8(q3u8), d0u8); - q9u16 = vmull_u8(vget_low_u8(q4u8), d0u8); - q10u16 = vmull_u8(vget_low_u8(q5u8), d0u8); - - d3u8 = vext_u8(vget_low_u8(q1u8), vget_high_u8(q1u8), 1); - d5u8 = vext_u8(vget_low_u8(q2u8), vget_high_u8(q2u8), 1); - d7u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1); - d9u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1); - d11u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 1); - - q6u16 = vmlal_u8(q6u16, d3u8, d1u8); - q7u16 = vmlal_u8(q7u16, d5u8, d1u8); - q8u16 = vmlal_u8(q8u16, d7u8, d1u8); - q9u16 = vmlal_u8(q9u16, d9u8, d1u8); - q10u16 = vmlal_u8(q10u16, d11u8, d1u8); - - d26u8 = vqrshrn_n_u16(q6u16, 7); - d27u8 = vqrshrn_n_u16(q7u16, 7); - d28u8 = vqrshrn_n_u16(q8u16, 7); - d29u8 = vqrshrn_n_u16(q9u16, 7); - d30u8 = vqrshrn_n_u16(q10u16, 7); - } - - // secondpass_filter - if (yoffset == 0) { // skip_2ndpass_filter - vst1_u8((uint8_t *)dst_ptr, d22u8); dst_ptr += dst_pitch; - vst1_u8((uint8_t *)dst_ptr, d23u8); dst_ptr += dst_pitch; - vst1_u8((uint8_t *)dst_ptr, d24u8); dst_ptr += dst_pitch; - vst1_u8((uint8_t *)dst_ptr, d25u8); dst_ptr += dst_pitch; - vst1_u8((uint8_t *)dst_ptr, d26u8); dst_ptr += dst_pitch; - vst1_u8((uint8_t *)dst_ptr, d27u8); dst_ptr += dst_pitch; - vst1_u8((uint8_t *)dst_ptr, d28u8); dst_ptr += dst_pitch; - vst1_u8((uint8_t *)dst_ptr, d29u8); - } else { - d0u8 = vdup_n_u8(bifilter4_coeff[yoffset][0]); - d1u8 = vdup_n_u8(bifilter4_coeff[yoffset][1]); - - q1u16 = vmull_u8(d22u8, d0u8); - q2u16 = vmull_u8(d23u8, d0u8); - q3u16 = vmull_u8(d24u8, d0u8); - q4u16 = vmull_u8(d25u8, d0u8); - q5u16 = vmull_u8(d26u8, d0u8); - q6u16 = vmull_u8(d27u8, d0u8); - q7u16 = vmull_u8(d28u8, d0u8); - q8u16 = vmull_u8(d29u8, d0u8); - - q1u16 = vmlal_u8(q1u16, d23u8, d1u8); - q2u16 = vmlal_u8(q2u16, d24u8, d1u8); - q3u16 = vmlal_u8(q3u16, d25u8, d1u8); - q4u16 = vmlal_u8(q4u16, d26u8, d1u8); - q5u16 = vmlal_u8(q5u16, d27u8, d1u8); - q6u16 = vmlal_u8(q6u16, d28u8, d1u8); - q7u16 = vmlal_u8(q7u16, d29u8, d1u8); - q8u16 = vmlal_u8(q8u16, d30u8, d1u8); - - d2u8 = vqrshrn_n_u16(q1u16, 7); - d3u8 = vqrshrn_n_u16(q2u16, 7); - d4u8 = vqrshrn_n_u16(q3u16, 7); - d5u8 = vqrshrn_n_u16(q4u16, 7); - d6u8 = vqrshrn_n_u16(q5u16, 7); - d7u8 = vqrshrn_n_u16(q6u16, 7); - d8u8 = vqrshrn_n_u16(q7u16, 7); - d9u8 = vqrshrn_n_u16(q8u16, 7); - - vst1_u8((uint8_t *)dst_ptr, d2u8); dst_ptr += dst_pitch; - vst1_u8((uint8_t *)dst_ptr, d3u8); dst_ptr += dst_pitch; - vst1_u8((uint8_t *)dst_ptr, d4u8); dst_ptr += dst_pitch; - vst1_u8((uint8_t *)dst_ptr, d5u8); dst_ptr += dst_pitch; - vst1_u8((uint8_t *)dst_ptr, d6u8); dst_ptr += dst_pitch; - vst1_u8((uint8_t *)dst_ptr, d7u8); dst_ptr += dst_pitch; - vst1_u8((uint8_t *)dst_ptr, d8u8); dst_ptr += dst_pitch; - vst1_u8((uint8_t *)dst_ptr, d9u8); - } - return; -} - -void vp8_bilinear_predict16x16_neon( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch) { - int i; - unsigned char tmp[272]; - unsigned char *tmpp; - uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8, d8u8, d9u8; - uint8x8_t d10u8, d11u8, d12u8, d13u8, d14u8, d15u8, d16u8, d17u8, d18u8; - uint8x8_t d19u8, d20u8, d21u8; - uint8x16_t q1u8, q2u8, q3u8, q4u8, q5u8, q6u8, q7u8, q8u8, q9u8, q10u8; - uint8x16_t q11u8, q12u8, q13u8, q14u8, q15u8; - uint16x8_t q1u16, q2u16, q3u16, q4u16, q5u16, q6u16, q7u16, q8u16; - uint16x8_t q9u16, q10u16, q11u16, q12u16, q13u16, q14u16; - - if (xoffset == 0) { // secondpass_bfilter16x16_only - d0u8 = vdup_n_u8(bifilter4_coeff[yoffset][0]); - d1u8 = vdup_n_u8(bifilter4_coeff[yoffset][1]); - - q11u8 = vld1q_u8(src_ptr); - src_ptr += src_pixels_per_line; - for (i = 4; i > 0; i--) { - q12u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; - q13u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; - q14u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; - q15u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; - - q1u16 = vmull_u8(vget_low_u8(q11u8), d0u8); - q2u16 = vmull_u8(vget_high_u8(q11u8), d0u8); - q3u16 = vmull_u8(vget_low_u8(q12u8), d0u8); - q4u16 = vmull_u8(vget_high_u8(q12u8), d0u8); - q5u16 = vmull_u8(vget_low_u8(q13u8), d0u8); - q6u16 = vmull_u8(vget_high_u8(q13u8), d0u8); - q7u16 = vmull_u8(vget_low_u8(q14u8), d0u8); - q8u16 = vmull_u8(vget_high_u8(q14u8), d0u8); - - q1u16 = vmlal_u8(q1u16, vget_low_u8(q12u8), d1u8); - q2u16 = vmlal_u8(q2u16, vget_high_u8(q12u8), d1u8); - q3u16 = vmlal_u8(q3u16, vget_low_u8(q13u8), d1u8); - q4u16 = vmlal_u8(q4u16, vget_high_u8(q13u8), d1u8); - q5u16 = vmlal_u8(q5u16, vget_low_u8(q14u8), d1u8); - q6u16 = vmlal_u8(q6u16, vget_high_u8(q14u8), d1u8); - q7u16 = vmlal_u8(q7u16, vget_low_u8(q15u8), d1u8); - q8u16 = vmlal_u8(q8u16, vget_high_u8(q15u8), d1u8); - - d2u8 = vqrshrn_n_u16(q1u16, 7); - d3u8 = vqrshrn_n_u16(q2u16, 7); - d4u8 = vqrshrn_n_u16(q3u16, 7); - d5u8 = vqrshrn_n_u16(q4u16, 7); - d6u8 = vqrshrn_n_u16(q5u16, 7); - d7u8 = vqrshrn_n_u16(q6u16, 7); - d8u8 = vqrshrn_n_u16(q7u16, 7); - d9u8 = vqrshrn_n_u16(q8u16, 7); - - q1u8 = vcombine_u8(d2u8, d3u8); - q2u8 = vcombine_u8(d4u8, d5u8); - q3u8 = vcombine_u8(d6u8, d7u8); - q4u8 = vcombine_u8(d8u8, d9u8); - - q11u8 = q15u8; - - vst1q_u8((uint8_t *)dst_ptr, q1u8); dst_ptr += dst_pitch; - vst1q_u8((uint8_t *)dst_ptr, q2u8); dst_ptr += dst_pitch; - vst1q_u8((uint8_t *)dst_ptr, q3u8); dst_ptr += dst_pitch; - vst1q_u8((uint8_t *)dst_ptr, q4u8); dst_ptr += dst_pitch; - } - return; - } - - if (yoffset == 0) { // firstpass_bfilter16x16_only - d0u8 = vdup_n_u8(bifilter4_coeff[xoffset][0]); - d1u8 = vdup_n_u8(bifilter4_coeff[xoffset][1]); - - for (i = 4; i > 0 ; i--) { - d2u8 = vld1_u8(src_ptr); - d3u8 = vld1_u8(src_ptr + 8); - d4u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line; - d5u8 = vld1_u8(src_ptr); - d6u8 = vld1_u8(src_ptr + 8); - d7u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line; - d8u8 = vld1_u8(src_ptr); - d9u8 = vld1_u8(src_ptr + 8); - d10u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line; - d11u8 = vld1_u8(src_ptr); - d12u8 = vld1_u8(src_ptr + 8); - d13u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line; - - q7u16 = vmull_u8(d2u8, d0u8); - q8u16 = vmull_u8(d3u8, d0u8); - q9u16 = vmull_u8(d5u8, d0u8); - q10u16 = vmull_u8(d6u8, d0u8); - q11u16 = vmull_u8(d8u8, d0u8); - q12u16 = vmull_u8(d9u8, d0u8); - q13u16 = vmull_u8(d11u8, d0u8); - q14u16 = vmull_u8(d12u8, d0u8); - - d2u8 = vext_u8(d2u8, d3u8, 1); - d5u8 = vext_u8(d5u8, d6u8, 1); - d8u8 = vext_u8(d8u8, d9u8, 1); - d11u8 = vext_u8(d11u8, d12u8, 1); - - q7u16 = vmlal_u8(q7u16, d2u8, d1u8); - q9u16 = vmlal_u8(q9u16, d5u8, d1u8); - q11u16 = vmlal_u8(q11u16, d8u8, d1u8); - q13u16 = vmlal_u8(q13u16, d11u8, d1u8); - - d3u8 = vext_u8(d3u8, d4u8, 1); - d6u8 = vext_u8(d6u8, d7u8, 1); - d9u8 = vext_u8(d9u8, d10u8, 1); - d12u8 = vext_u8(d12u8, d13u8, 1); - - q8u16 = vmlal_u8(q8u16, d3u8, d1u8); - q10u16 = vmlal_u8(q10u16, d6u8, d1u8); - q12u16 = vmlal_u8(q12u16, d9u8, d1u8); - q14u16 = vmlal_u8(q14u16, d12u8, d1u8); - - d14u8 = vqrshrn_n_u16(q7u16, 7); - d15u8 = vqrshrn_n_u16(q8u16, 7); - d16u8 = vqrshrn_n_u16(q9u16, 7); - d17u8 = vqrshrn_n_u16(q10u16, 7); - d18u8 = vqrshrn_n_u16(q11u16, 7); - d19u8 = vqrshrn_n_u16(q12u16, 7); - d20u8 = vqrshrn_n_u16(q13u16, 7); - d21u8 = vqrshrn_n_u16(q14u16, 7); - - q7u8 = vcombine_u8(d14u8, d15u8); - q8u8 = vcombine_u8(d16u8, d17u8); - q9u8 = vcombine_u8(d18u8, d19u8); - q10u8 =vcombine_u8(d20u8, d21u8); - - vst1q_u8((uint8_t *)dst_ptr, q7u8); dst_ptr += dst_pitch; - vst1q_u8((uint8_t *)dst_ptr, q8u8); dst_ptr += dst_pitch; - vst1q_u8((uint8_t *)dst_ptr, q9u8); dst_ptr += dst_pitch; - vst1q_u8((uint8_t *)dst_ptr, q10u8); dst_ptr += dst_pitch; - } - return; - } - - d0u8 = vdup_n_u8(bifilter4_coeff[xoffset][0]); - d1u8 = vdup_n_u8(bifilter4_coeff[xoffset][1]); - - d2u8 = vld1_u8(src_ptr); - d3u8 = vld1_u8(src_ptr + 8); - d4u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line; - d5u8 = vld1_u8(src_ptr); - d6u8 = vld1_u8(src_ptr + 8); - d7u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line; - d8u8 = vld1_u8(src_ptr); - d9u8 = vld1_u8(src_ptr + 8); - d10u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line; - d11u8 = vld1_u8(src_ptr); - d12u8 = vld1_u8(src_ptr + 8); - d13u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line; - - // First Pass: output_height lines x output_width columns (17x16) - tmpp = tmp; - for (i = 3; i > 0; i--) { - q7u16 = vmull_u8(d2u8, d0u8); - q8u16 = vmull_u8(d3u8, d0u8); - q9u16 = vmull_u8(d5u8, d0u8); - q10u16 = vmull_u8(d6u8, d0u8); - q11u16 = vmull_u8(d8u8, d0u8); - q12u16 = vmull_u8(d9u8, d0u8); - q13u16 = vmull_u8(d11u8, d0u8); - q14u16 = vmull_u8(d12u8, d0u8); - - d2u8 = vext_u8(d2u8, d3u8, 1); - d5u8 = vext_u8(d5u8, d6u8, 1); - d8u8 = vext_u8(d8u8, d9u8, 1); - d11u8 = vext_u8(d11u8, d12u8, 1); - - q7u16 = vmlal_u8(q7u16, d2u8, d1u8); - q9u16 = vmlal_u8(q9u16, d5u8, d1u8); - q11u16 = vmlal_u8(q11u16, d8u8, d1u8); - q13u16 = vmlal_u8(q13u16, d11u8, d1u8); - - d3u8 = vext_u8(d3u8, d4u8, 1); - d6u8 = vext_u8(d6u8, d7u8, 1); - d9u8 = vext_u8(d9u8, d10u8, 1); - d12u8 = vext_u8(d12u8, d13u8, 1); - - q8u16 = vmlal_u8(q8u16, d3u8, d1u8); - q10u16 = vmlal_u8(q10u16, d6u8, d1u8); - q12u16 = vmlal_u8(q12u16, d9u8, d1u8); - q14u16 = vmlal_u8(q14u16, d12u8, d1u8); - - d14u8 = vqrshrn_n_u16(q7u16, 7); - d15u8 = vqrshrn_n_u16(q8u16, 7); - d16u8 = vqrshrn_n_u16(q9u16, 7); - d17u8 = vqrshrn_n_u16(q10u16, 7); - d18u8 = vqrshrn_n_u16(q11u16, 7); - d19u8 = vqrshrn_n_u16(q12u16, 7); - d20u8 = vqrshrn_n_u16(q13u16, 7); - d21u8 = vqrshrn_n_u16(q14u16, 7); - - d2u8 = vld1_u8(src_ptr); - d3u8 = vld1_u8(src_ptr + 8); - d4u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line; - d5u8 = vld1_u8(src_ptr); - d6u8 = vld1_u8(src_ptr + 8); - d7u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line; - d8u8 = vld1_u8(src_ptr); - d9u8 = vld1_u8(src_ptr + 8); - d10u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line; - d11u8 = vld1_u8(src_ptr); - d12u8 = vld1_u8(src_ptr + 8); - d13u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line; - - q7u8 = vcombine_u8(d14u8, d15u8); - q8u8 = vcombine_u8(d16u8, d17u8); - q9u8 = vcombine_u8(d18u8, d19u8); - q10u8 = vcombine_u8(d20u8, d21u8); - - vst1q_u8((uint8_t *)tmpp, q7u8); tmpp += 16; - vst1q_u8((uint8_t *)tmpp, q8u8); tmpp += 16; - vst1q_u8((uint8_t *)tmpp, q9u8); tmpp += 16; - vst1q_u8((uint8_t *)tmpp, q10u8); tmpp += 16; - } - - // First-pass filtering for rest 5 lines - d14u8 = vld1_u8(src_ptr); - d15u8 = vld1_u8(src_ptr + 8); - d16u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line; - - q9u16 = vmull_u8(d2u8, d0u8); - q10u16 = vmull_u8(d3u8, d0u8); - q11u16 = vmull_u8(d5u8, d0u8); - q12u16 = vmull_u8(d6u8, d0u8); - q13u16 = vmull_u8(d8u8, d0u8); - q14u16 = vmull_u8(d9u8, d0u8); - - d2u8 = vext_u8(d2u8, d3u8, 1); - d5u8 = vext_u8(d5u8, d6u8, 1); - d8u8 = vext_u8(d8u8, d9u8, 1); - - q9u16 = vmlal_u8(q9u16, d2u8, d1u8); - q11u16 = vmlal_u8(q11u16, d5u8, d1u8); - q13u16 = vmlal_u8(q13u16, d8u8, d1u8); - - d3u8 = vext_u8(d3u8, d4u8, 1); - d6u8 = vext_u8(d6u8, d7u8, 1); - d9u8 = vext_u8(d9u8, d10u8, 1); - - q10u16 = vmlal_u8(q10u16, d3u8, d1u8); - q12u16 = vmlal_u8(q12u16, d6u8, d1u8); - q14u16 = vmlal_u8(q14u16, d9u8, d1u8); - - q1u16 = vmull_u8(d11u8, d0u8); - q2u16 = vmull_u8(d12u8, d0u8); - q3u16 = vmull_u8(d14u8, d0u8); - q4u16 = vmull_u8(d15u8, d0u8); - - d11u8 = vext_u8(d11u8, d12u8, 1); - d14u8 = vext_u8(d14u8, d15u8, 1); - - q1u16 = vmlal_u8(q1u16, d11u8, d1u8); - q3u16 = vmlal_u8(q3u16, d14u8, d1u8); - - d12u8 = vext_u8(d12u8, d13u8, 1); - d15u8 = vext_u8(d15u8, d16u8, 1); - - q2u16 = vmlal_u8(q2u16, d12u8, d1u8); - q4u16 = vmlal_u8(q4u16, d15u8, d1u8); - - d10u8 = vqrshrn_n_u16(q9u16, 7); - d11u8 = vqrshrn_n_u16(q10u16, 7); - d12u8 = vqrshrn_n_u16(q11u16, 7); - d13u8 = vqrshrn_n_u16(q12u16, 7); - d14u8 = vqrshrn_n_u16(q13u16, 7); - d15u8 = vqrshrn_n_u16(q14u16, 7); - d16u8 = vqrshrn_n_u16(q1u16, 7); - d17u8 = vqrshrn_n_u16(q2u16, 7); - d18u8 = vqrshrn_n_u16(q3u16, 7); - d19u8 = vqrshrn_n_u16(q4u16, 7); - - q5u8 = vcombine_u8(d10u8, d11u8); - q6u8 = vcombine_u8(d12u8, d13u8); - q7u8 = vcombine_u8(d14u8, d15u8); - q8u8 = vcombine_u8(d16u8, d17u8); - q9u8 = vcombine_u8(d18u8, d19u8); - - vst1q_u8((uint8_t *)tmpp, q5u8); tmpp += 16; - vst1q_u8((uint8_t *)tmpp, q6u8); tmpp += 16; - vst1q_u8((uint8_t *)tmpp, q7u8); tmpp += 16; - vst1q_u8((uint8_t *)tmpp, q8u8); tmpp += 16; - vst1q_u8((uint8_t *)tmpp, q9u8); - - // secondpass_filter - d0u8 = vdup_n_u8(bifilter4_coeff[yoffset][0]); - d1u8 = vdup_n_u8(bifilter4_coeff[yoffset][1]); - - tmpp = tmp; - q11u8 = vld1q_u8(tmpp); - tmpp += 16; - for (i = 4; i > 0; i--) { - q12u8 = vld1q_u8(tmpp); tmpp += 16; - q13u8 = vld1q_u8(tmpp); tmpp += 16; - q14u8 = vld1q_u8(tmpp); tmpp += 16; - q15u8 = vld1q_u8(tmpp); tmpp += 16; - - q1u16 = vmull_u8(vget_low_u8(q11u8), d0u8); - q2u16 = vmull_u8(vget_high_u8(q11u8), d0u8); - q3u16 = vmull_u8(vget_low_u8(q12u8), d0u8); - q4u16 = vmull_u8(vget_high_u8(q12u8), d0u8); - q5u16 = vmull_u8(vget_low_u8(q13u8), d0u8); - q6u16 = vmull_u8(vget_high_u8(q13u8), d0u8); - q7u16 = vmull_u8(vget_low_u8(q14u8), d0u8); - q8u16 = vmull_u8(vget_high_u8(q14u8), d0u8); - - q1u16 = vmlal_u8(q1u16, vget_low_u8(q12u8), d1u8); - q2u16 = vmlal_u8(q2u16, vget_high_u8(q12u8), d1u8); - q3u16 = vmlal_u8(q3u16, vget_low_u8(q13u8), d1u8); - q4u16 = vmlal_u8(q4u16, vget_high_u8(q13u8), d1u8); - q5u16 = vmlal_u8(q5u16, vget_low_u8(q14u8), d1u8); - q6u16 = vmlal_u8(q6u16, vget_high_u8(q14u8), d1u8); - q7u16 = vmlal_u8(q7u16, vget_low_u8(q15u8), d1u8); - q8u16 = vmlal_u8(q8u16, vget_high_u8(q15u8), d1u8); - - d2u8 = vqrshrn_n_u16(q1u16, 7); - d3u8 = vqrshrn_n_u16(q2u16, 7); - d4u8 = vqrshrn_n_u16(q3u16, 7); - d5u8 = vqrshrn_n_u16(q4u16, 7); - d6u8 = vqrshrn_n_u16(q5u16, 7); - d7u8 = vqrshrn_n_u16(q6u16, 7); - d8u8 = vqrshrn_n_u16(q7u16, 7); - d9u8 = vqrshrn_n_u16(q8u16, 7); - - q1u8 = vcombine_u8(d2u8, d3u8); - q2u8 = vcombine_u8(d4u8, d5u8); - q3u8 = vcombine_u8(d6u8, d7u8); - q4u8 = vcombine_u8(d8u8, d9u8); - - q11u8 = q15u8; - - vst1q_u8((uint8_t *)dst_ptr, q1u8); dst_ptr += dst_pitch; - vst1q_u8((uint8_t *)dst_ptr, q2u8); dst_ptr += dst_pitch; - vst1q_u8((uint8_t *)dst_ptr, q3u8); dst_ptr += dst_pitch; - vst1q_u8((uint8_t *)dst_ptr, q4u8); dst_ptr += dst_pitch; - } - return; -} diff --git a/media/libvpx/vp8/common/arm/neon/copymem_neon.c b/media/libvpx/vp8/common/arm/neon/copymem_neon.c deleted file mode 100644 index deced115c14..00000000000 --- a/media/libvpx/vp8/common/arm/neon/copymem_neon.c +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include - -void vp8_copy_mem8x4_neon( - unsigned char *src, - int src_stride, - unsigned char *dst, - int dst_stride) { - uint8x8_t vtmp; - int r; - - for (r = 0; r < 4; r++) { - vtmp = vld1_u8(src); - vst1_u8(dst, vtmp); - src += src_stride; - dst += dst_stride; - } -} - -void vp8_copy_mem8x8_neon( - unsigned char *src, - int src_stride, - unsigned char *dst, - int dst_stride) { - uint8x8_t vtmp; - int r; - - for (r = 0; r < 8; r++) { - vtmp = vld1_u8(src); - vst1_u8(dst, vtmp); - src += src_stride; - dst += dst_stride; - } -} - -void vp8_copy_mem16x16_neon( - unsigned char *src, - int src_stride, - unsigned char *dst, - int dst_stride) { - int r; - uint8x16_t qtmp; - - for (r = 0; r < 16; r++) { - qtmp = vld1q_u8(src); - vst1q_u8(dst, qtmp); - src += src_stride; - dst += dst_stride; - } -} diff --git a/media/libvpx/vp8/common/arm/neon/dc_only_idct_add_neon.c b/media/libvpx/vp8/common/arm/neon/dc_only_idct_add_neon.c deleted file mode 100644 index ad5f41d7dee..00000000000 --- a/media/libvpx/vp8/common/arm/neon/dc_only_idct_add_neon.c +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include - -void vp8_dc_only_idct_add_neon( - int16_t input_dc, - unsigned char *pred_ptr, - int pred_stride, - unsigned char *dst_ptr, - int dst_stride) { - int i; - uint16_t a1 = ((input_dc + 4) >> 3); - uint32x2_t d2u32 = vdup_n_u32(0); - uint8x8_t d2u8; - uint16x8_t q1u16; - uint16x8_t qAdd; - - qAdd = vdupq_n_u16(a1); - - for (i = 0; i < 2; i++) { - d2u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d2u32, 0); - pred_ptr += pred_stride; - d2u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d2u32, 1); - pred_ptr += pred_stride; - - q1u16 = vaddw_u8(qAdd, vreinterpret_u8_u32(d2u32)); - d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q1u16)); - - vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d2u8), 0); - dst_ptr += dst_stride; - vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d2u8), 1); - dst_ptr += dst_stride; - } -} diff --git a/media/libvpx/vp8/common/arm/neon/dequant_idct_neon.c b/media/libvpx/vp8/common/arm/neon/dequant_idct_neon.c deleted file mode 100644 index 58e11922c76..00000000000 --- a/media/libvpx/vp8/common/arm/neon/dequant_idct_neon.c +++ /dev/null @@ -1,142 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include - -static const int16_t cospi8sqrt2minus1 = 20091; -static const int16_t sinpi8sqrt2 = 35468; - -void vp8_dequant_idct_add_neon( - int16_t *input, - int16_t *dq, - unsigned char *dst, - int stride) { - unsigned char *dst0; - int32x2_t d14, d15; - int16x4_t d2, d3, d4, d5, d10, d11, d12, d13; - int16x8_t q1, q2, q3, q4, q5, q6; - int16x8_t qEmpty = vdupq_n_s16(0); - int32x2x2_t d2tmp0, d2tmp1; - int16x4x2_t d2tmp2, d2tmp3; - - d14 = d15 = vdup_n_s32(0); - - // load input - q3 = vld1q_s16(input); - vst1q_s16(input, qEmpty); - input += 8; - q4 = vld1q_s16(input); - vst1q_s16(input, qEmpty); - - // load dq - q5 = vld1q_s16(dq); - dq += 8; - q6 = vld1q_s16(dq); - - // load src from dst - dst0 = dst; - d14 = vld1_lane_s32((const int32_t *)dst0, d14, 0); - dst0 += stride; - d14 = vld1_lane_s32((const int32_t *)dst0, d14, 1); - dst0 += stride; - d15 = vld1_lane_s32((const int32_t *)dst0, d15, 0); - dst0 += stride; - d15 = vld1_lane_s32((const int32_t *)dst0, d15, 1); - - q1 = vreinterpretq_s16_u16(vmulq_u16(vreinterpretq_u16_s16(q3), - vreinterpretq_u16_s16(q5))); - q2 = vreinterpretq_s16_u16(vmulq_u16(vreinterpretq_u16_s16(q4), - vreinterpretq_u16_s16(q6))); - - d12 = vqadd_s16(vget_low_s16(q1), vget_low_s16(q2)); - d13 = vqsub_s16(vget_low_s16(q1), vget_low_s16(q2)); - - q2 = vcombine_s16(vget_high_s16(q1), vget_high_s16(q2)); - - q3 = vqdmulhq_n_s16(q2, sinpi8sqrt2); - q4 = vqdmulhq_n_s16(q2, cospi8sqrt2minus1); - - q3 = vshrq_n_s16(q3, 1); - q4 = vshrq_n_s16(q4, 1); - - q3 = vqaddq_s16(q3, q2); - q4 = vqaddq_s16(q4, q2); - - d10 = vqsub_s16(vget_low_s16(q3), vget_high_s16(q4)); - d11 = vqadd_s16(vget_high_s16(q3), vget_low_s16(q4)); - - d2 = vqadd_s16(d12, d11); - d3 = vqadd_s16(d13, d10); - d4 = vqsub_s16(d13, d10); - d5 = vqsub_s16(d12, d11); - - d2tmp0 = vtrn_s32(vreinterpret_s32_s16(d2), vreinterpret_s32_s16(d4)); - d2tmp1 = vtrn_s32(vreinterpret_s32_s16(d3), vreinterpret_s32_s16(d5)); - d2tmp2 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[0]), - vreinterpret_s16_s32(d2tmp1.val[0])); - d2tmp3 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[1]), - vreinterpret_s16_s32(d2tmp1.val[1])); - - // loop 2 - q2 = vcombine_s16(d2tmp2.val[1], d2tmp3.val[1]); - - q3 = vqdmulhq_n_s16(q2, sinpi8sqrt2); - q4 = vqdmulhq_n_s16(q2, cospi8sqrt2minus1); - - d12 = vqadd_s16(d2tmp2.val[0], d2tmp3.val[0]); - d13 = vqsub_s16(d2tmp2.val[0], d2tmp3.val[0]); - - q3 = vshrq_n_s16(q3, 1); - q4 = vshrq_n_s16(q4, 1); - - q3 = vqaddq_s16(q3, q2); - q4 = vqaddq_s16(q4, q2); - - d10 = vqsub_s16(vget_low_s16(q3), vget_high_s16(q4)); - d11 = vqadd_s16(vget_high_s16(q3), vget_low_s16(q4)); - - d2 = vqadd_s16(d12, d11); - d3 = vqadd_s16(d13, d10); - d4 = vqsub_s16(d13, d10); - d5 = vqsub_s16(d12, d11); - - d2 = vrshr_n_s16(d2, 3); - d3 = vrshr_n_s16(d3, 3); - d4 = vrshr_n_s16(d4, 3); - d5 = vrshr_n_s16(d5, 3); - - d2tmp0 = vtrn_s32(vreinterpret_s32_s16(d2), vreinterpret_s32_s16(d4)); - d2tmp1 = vtrn_s32(vreinterpret_s32_s16(d3), vreinterpret_s32_s16(d5)); - d2tmp2 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[0]), - vreinterpret_s16_s32(d2tmp1.val[0])); - d2tmp3 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[1]), - vreinterpret_s16_s32(d2tmp1.val[1])); - - q1 = vcombine_s16(d2tmp2.val[0], d2tmp2.val[1]); - q2 = vcombine_s16(d2tmp3.val[0], d2tmp3.val[1]); - - q1 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q1), - vreinterpret_u8_s32(d14))); - q2 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2), - vreinterpret_u8_s32(d15))); - - d14 = vreinterpret_s32_u8(vqmovun_s16(q1)); - d15 = vreinterpret_s32_u8(vqmovun_s16(q2)); - - dst0 = dst; - vst1_lane_s32((int32_t *)dst0, d14, 0); - dst0 += stride; - vst1_lane_s32((int32_t *)dst0, d14, 1); - dst0 += stride; - vst1_lane_s32((int32_t *)dst0, d15, 0); - dst0 += stride; - vst1_lane_s32((int32_t *)dst0, d15, 1); - return; -} diff --git a/media/libvpx/vp8/common/arm/neon/dequantizeb_neon.c b/media/libvpx/vp8/common/arm/neon/dequantizeb_neon.c deleted file mode 100644 index 54e709dd3c3..00000000000 --- a/media/libvpx/vp8/common/arm/neon/dequantizeb_neon.c +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include - -#include "vp8/common/blockd.h" - -void vp8_dequantize_b_neon(BLOCKD *d, short *DQC) { - int16x8x2_t qQ, qDQC, qDQ; - - qQ = vld2q_s16(d->qcoeff); - qDQC = vld2q_s16(DQC); - - qDQ.val[0] = vmulq_s16(qQ.val[0], qDQC.val[0]); - qDQ.val[1] = vmulq_s16(qQ.val[1], qDQC.val[1]); - - vst2q_s16(d->dqcoeff, qDQ); -} diff --git a/media/libvpx/vp8/common/arm/neon/idct_dequant_0_2x_neon.c b/media/libvpx/vp8/common/arm/neon/idct_dequant_0_2x_neon.c deleted file mode 100644 index 967c322804f..00000000000 --- a/media/libvpx/vp8/common/arm/neon/idct_dequant_0_2x_neon.c +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include - -void idct_dequant_0_2x_neon( - int16_t *q, - int16_t dq, - unsigned char *dst, - int stride) { - unsigned char *dst0; - int i, a0, a1; - int16x8x2_t q2Add; - int32x2_t d2s32, d4s32; - uint8x8_t d2u8, d4u8; - uint16x8_t q1u16, q2u16; - - a0 = ((q[0] * dq) + 4) >> 3; - a1 = ((q[16] * dq) + 4) >> 3; - q[0] = q[16] = 0; - q2Add.val[0] = vdupq_n_s16((int16_t)a0); - q2Add.val[1] = vdupq_n_s16((int16_t)a1); - - for (i = 0; i < 2; i++, dst += 4) { - dst0 = dst; - d2s32 = vld1_lane_s32((const int32_t *)dst0, d2s32, 0); - dst0 += stride; - d2s32 = vld1_lane_s32((const int32_t *)dst0, d2s32, 1); - dst0 += stride; - d4s32 = vld1_lane_s32((const int32_t *)dst0, d4s32, 0); - dst0 += stride; - d4s32 = vld1_lane_s32((const int32_t *)dst0, d4s32, 1); - - q1u16 = vaddw_u8(vreinterpretq_u16_s16(q2Add.val[i]), - vreinterpret_u8_s32(d2s32)); - q2u16 = vaddw_u8(vreinterpretq_u16_s16(q2Add.val[i]), - vreinterpret_u8_s32(d4s32)); - - d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q1u16)); - d4u8 = vqmovun_s16(vreinterpretq_s16_u16(q2u16)); - - d2s32 = vreinterpret_s32_u8(d2u8); - d4s32 = vreinterpret_s32_u8(d4u8); - - dst0 = dst; - vst1_lane_s32((int32_t *)dst0, d2s32, 0); - dst0 += stride; - vst1_lane_s32((int32_t *)dst0, d2s32, 1); - dst0 += stride; - vst1_lane_s32((int32_t *)dst0, d4s32, 0); - dst0 += stride; - vst1_lane_s32((int32_t *)dst0, d4s32, 1); - } - return; -} diff --git a/media/libvpx/vp8/common/arm/neon/idct_dequant_full_2x_neon.c b/media/libvpx/vp8/common/arm/neon/idct_dequant_full_2x_neon.c deleted file mode 100644 index a60ed46b764..00000000000 --- a/media/libvpx/vp8/common/arm/neon/idct_dequant_full_2x_neon.c +++ /dev/null @@ -1,185 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include - -static const int16_t cospi8sqrt2minus1 = 20091; -static const int16_t sinpi8sqrt2 = 17734; -// because the lowest bit in 0x8a8c is 0, we can pre-shift this - -void idct_dequant_full_2x_neon( - int16_t *q, - int16_t *dq, - unsigned char *dst, - int stride) { - unsigned char *dst0, *dst1; - int32x2_t d28, d29, d30, d31; - int16x8_t q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11; - int16x8_t qEmpty = vdupq_n_s16(0); - int32x4x2_t q2tmp0, q2tmp1; - int16x8x2_t q2tmp2, q2tmp3; - int16x4_t dLow0, dLow1, dHigh0, dHigh1; - - d28 = d29 = d30 = d31 = vdup_n_s32(0); - - // load dq - q0 = vld1q_s16(dq); - dq += 8; - q1 = vld1q_s16(dq); - - // load q - q2 = vld1q_s16(q); - vst1q_s16(q, qEmpty); - q += 8; - q3 = vld1q_s16(q); - vst1q_s16(q, qEmpty); - q += 8; - q4 = vld1q_s16(q); - vst1q_s16(q, qEmpty); - q += 8; - q5 = vld1q_s16(q); - vst1q_s16(q, qEmpty); - - // load src from dst - dst0 = dst; - dst1 = dst + 4; - d28 = vld1_lane_s32((const int32_t *)dst0, d28, 0); - dst0 += stride; - d28 = vld1_lane_s32((const int32_t *)dst1, d28, 1); - dst1 += stride; - d29 = vld1_lane_s32((const int32_t *)dst0, d29, 0); - dst0 += stride; - d29 = vld1_lane_s32((const int32_t *)dst1, d29, 1); - dst1 += stride; - - d30 = vld1_lane_s32((const int32_t *)dst0, d30, 0); - dst0 += stride; - d30 = vld1_lane_s32((const int32_t *)dst1, d30, 1); - dst1 += stride; - d31 = vld1_lane_s32((const int32_t *)dst0, d31, 0); - d31 = vld1_lane_s32((const int32_t *)dst1, d31, 1); - - q2 = vmulq_s16(q2, q0); - q3 = vmulq_s16(q3, q1); - q4 = vmulq_s16(q4, q0); - q5 = vmulq_s16(q5, q1); - - // vswp - dLow0 = vget_low_s16(q2); - dHigh0 = vget_high_s16(q2); - dLow1 = vget_low_s16(q4); - dHigh1 = vget_high_s16(q4); - q2 = vcombine_s16(dLow0, dLow1); - q4 = vcombine_s16(dHigh0, dHigh1); - - dLow0 = vget_low_s16(q3); - dHigh0 = vget_high_s16(q3); - dLow1 = vget_low_s16(q5); - dHigh1 = vget_high_s16(q5); - q3 = vcombine_s16(dLow0, dLow1); - q5 = vcombine_s16(dHigh0, dHigh1); - - q6 = vqdmulhq_n_s16(q4, sinpi8sqrt2); - q7 = vqdmulhq_n_s16(q5, sinpi8sqrt2); - q8 = vqdmulhq_n_s16(q4, cospi8sqrt2minus1); - q9 = vqdmulhq_n_s16(q5, cospi8sqrt2minus1); - - q10 = vqaddq_s16(q2, q3); - q11 = vqsubq_s16(q2, q3); - - q8 = vshrq_n_s16(q8, 1); - q9 = vshrq_n_s16(q9, 1); - - q4 = vqaddq_s16(q4, q8); - q5 = vqaddq_s16(q5, q9); - - q2 = vqsubq_s16(q6, q5); - q3 = vqaddq_s16(q7, q4); - - q4 = vqaddq_s16(q10, q3); - q5 = vqaddq_s16(q11, q2); - q6 = vqsubq_s16(q11, q2); - q7 = vqsubq_s16(q10, q3); - - q2tmp0 = vtrnq_s32(vreinterpretq_s32_s16(q4), vreinterpretq_s32_s16(q6)); - q2tmp1 = vtrnq_s32(vreinterpretq_s32_s16(q5), vreinterpretq_s32_s16(q7)); - q2tmp2 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[0]), - vreinterpretq_s16_s32(q2tmp1.val[0])); - q2tmp3 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[1]), - vreinterpretq_s16_s32(q2tmp1.val[1])); - - // loop 2 - q8 = vqdmulhq_n_s16(q2tmp2.val[1], sinpi8sqrt2); - q9 = vqdmulhq_n_s16(q2tmp3.val[1], sinpi8sqrt2); - q10 = vqdmulhq_n_s16(q2tmp2.val[1], cospi8sqrt2minus1); - q11 = vqdmulhq_n_s16(q2tmp3.val[1], cospi8sqrt2minus1); - - q2 = vqaddq_s16(q2tmp2.val[0], q2tmp3.val[0]); - q3 = vqsubq_s16(q2tmp2.val[0], q2tmp3.val[0]); - - q10 = vshrq_n_s16(q10, 1); - q11 = vshrq_n_s16(q11, 1); - - q10 = vqaddq_s16(q2tmp2.val[1], q10); - q11 = vqaddq_s16(q2tmp3.val[1], q11); - - q8 = vqsubq_s16(q8, q11); - q9 = vqaddq_s16(q9, q10); - - q4 = vqaddq_s16(q2, q9); - q5 = vqaddq_s16(q3, q8); - q6 = vqsubq_s16(q3, q8); - q7 = vqsubq_s16(q2, q9); - - q4 = vrshrq_n_s16(q4, 3); - q5 = vrshrq_n_s16(q5, 3); - q6 = vrshrq_n_s16(q6, 3); - q7 = vrshrq_n_s16(q7, 3); - - q2tmp0 = vtrnq_s32(vreinterpretq_s32_s16(q4), vreinterpretq_s32_s16(q6)); - q2tmp1 = vtrnq_s32(vreinterpretq_s32_s16(q5), vreinterpretq_s32_s16(q7)); - q2tmp2 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[0]), - vreinterpretq_s16_s32(q2tmp1.val[0])); - q2tmp3 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[1]), - vreinterpretq_s16_s32(q2tmp1.val[1])); - - q4 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2tmp2.val[0]), - vreinterpret_u8_s32(d28))); - q5 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2tmp2.val[1]), - vreinterpret_u8_s32(d29))); - q6 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2tmp3.val[0]), - vreinterpret_u8_s32(d30))); - q7 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2tmp3.val[1]), - vreinterpret_u8_s32(d31))); - - d28 = vreinterpret_s32_u8(vqmovun_s16(q4)); - d29 = vreinterpret_s32_u8(vqmovun_s16(q5)); - d30 = vreinterpret_s32_u8(vqmovun_s16(q6)); - d31 = vreinterpret_s32_u8(vqmovun_s16(q7)); - - dst0 = dst; - dst1 = dst + 4; - vst1_lane_s32((int32_t *)dst0, d28, 0); - dst0 += stride; - vst1_lane_s32((int32_t *)dst1, d28, 1); - dst1 += stride; - vst1_lane_s32((int32_t *)dst0, d29, 0); - dst0 += stride; - vst1_lane_s32((int32_t *)dst1, d29, 1); - dst1 += stride; - - vst1_lane_s32((int32_t *)dst0, d30, 0); - dst0 += stride; - vst1_lane_s32((int32_t *)dst1, d30, 1); - dst1 += stride; - vst1_lane_s32((int32_t *)dst0, d31, 0); - vst1_lane_s32((int32_t *)dst1, d31, 1); - return; -} diff --git a/media/libvpx/vp8/common/arm/neon/iwalsh_neon.c b/media/libvpx/vp8/common/arm/neon/iwalsh_neon.c deleted file mode 100644 index 6ea9dd712aa..00000000000 --- a/media/libvpx/vp8/common/arm/neon/iwalsh_neon.c +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include - -void vp8_short_inv_walsh4x4_neon( - int16_t *input, - int16_t *mb_dqcoeff) { - int16x8_t q0s16, q1s16, q2s16, q3s16; - int16x4_t d4s16, d5s16, d6s16, d7s16; - int16x4x2_t v2tmp0, v2tmp1; - int32x2x2_t v2tmp2, v2tmp3; - int16x8_t qAdd3; - - q0s16 = vld1q_s16(input); - q1s16 = vld1q_s16(input + 8); - - // 1st for loop - d4s16 = vadd_s16(vget_low_s16(q0s16), vget_high_s16(q1s16)); - d6s16 = vadd_s16(vget_high_s16(q0s16), vget_low_s16(q1s16)); - d5s16 = vsub_s16(vget_low_s16(q0s16), vget_high_s16(q1s16)); - d7s16 = vsub_s16(vget_high_s16(q0s16), vget_low_s16(q1s16)); - - q2s16 = vcombine_s16(d4s16, d5s16); - q3s16 = vcombine_s16(d6s16, d7s16); - - q0s16 = vaddq_s16(q2s16, q3s16); - q1s16 = vsubq_s16(q2s16, q3s16); - - v2tmp2 = vtrn_s32(vreinterpret_s32_s16(vget_low_s16(q0s16)), - vreinterpret_s32_s16(vget_low_s16(q1s16))); - v2tmp3 = vtrn_s32(vreinterpret_s32_s16(vget_high_s16(q0s16)), - vreinterpret_s32_s16(vget_high_s16(q1s16))); - v2tmp0 = vtrn_s16(vreinterpret_s16_s32(v2tmp2.val[0]), - vreinterpret_s16_s32(v2tmp3.val[0])); - v2tmp1 = vtrn_s16(vreinterpret_s16_s32(v2tmp2.val[1]), - vreinterpret_s16_s32(v2tmp3.val[1])); - - // 2nd for loop - d4s16 = vadd_s16(v2tmp0.val[0], v2tmp1.val[1]); - d6s16 = vadd_s16(v2tmp0.val[1], v2tmp1.val[0]); - d5s16 = vsub_s16(v2tmp0.val[0], v2tmp1.val[1]); - d7s16 = vsub_s16(v2tmp0.val[1], v2tmp1.val[0]); - q2s16 = vcombine_s16(d4s16, d5s16); - q3s16 = vcombine_s16(d6s16, d7s16); - - qAdd3 = vdupq_n_s16(3); - - q0s16 = vaddq_s16(q2s16, q3s16); - q1s16 = vsubq_s16(q2s16, q3s16); - - q0s16 = vaddq_s16(q0s16, qAdd3); - q1s16 = vaddq_s16(q1s16, qAdd3); - - q0s16 = vshrq_n_s16(q0s16, 3); - q1s16 = vshrq_n_s16(q1s16, 3); - - // store - vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16), 0); - mb_dqcoeff += 16; - vst1_lane_s16(mb_dqcoeff, vget_high_s16(q0s16), 0); - mb_dqcoeff += 16; - vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16), 0); - mb_dqcoeff += 16; - vst1_lane_s16(mb_dqcoeff, vget_high_s16(q1s16), 0); - mb_dqcoeff += 16; - - vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16), 1); - mb_dqcoeff += 16; - vst1_lane_s16(mb_dqcoeff, vget_high_s16(q0s16), 1); - mb_dqcoeff += 16; - vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16), 1); - mb_dqcoeff += 16; - vst1_lane_s16(mb_dqcoeff, vget_high_s16(q1s16), 1); - mb_dqcoeff += 16; - - vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16), 2); - mb_dqcoeff += 16; - vst1_lane_s16(mb_dqcoeff, vget_high_s16(q0s16), 2); - mb_dqcoeff += 16; - vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16), 2); - mb_dqcoeff += 16; - vst1_lane_s16(mb_dqcoeff, vget_high_s16(q1s16), 2); - mb_dqcoeff += 16; - - vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16), 3); - mb_dqcoeff += 16; - vst1_lane_s16(mb_dqcoeff, vget_high_s16(q0s16), 3); - mb_dqcoeff += 16; - vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16), 3); - mb_dqcoeff += 16; - vst1_lane_s16(mb_dqcoeff, vget_high_s16(q1s16), 3); - mb_dqcoeff += 16; - return; -} diff --git a/media/libvpx/vp8/common/arm/neon/loopfilter_neon.c b/media/libvpx/vp8/common/arm/neon/loopfilter_neon.c deleted file mode 100644 index 0bec7fb0643..00000000000 --- a/media/libvpx/vp8/common/arm/neon/loopfilter_neon.c +++ /dev/null @@ -1,549 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include -#include "./vpx_config.h" - -static INLINE void vp8_loop_filter_neon( - uint8x16_t qblimit, // flimit - uint8x16_t qlimit, // limit - uint8x16_t qthresh, // thresh - uint8x16_t q3, // p3 - uint8x16_t q4, // p2 - uint8x16_t q5, // p1 - uint8x16_t q6, // p0 - uint8x16_t q7, // q0 - uint8x16_t q8, // q1 - uint8x16_t q9, // q2 - uint8x16_t q10, // q3 - uint8x16_t *q5r, // p1 - uint8x16_t *q6r, // p0 - uint8x16_t *q7r, // q0 - uint8x16_t *q8r) { // q1 - uint8x16_t q0u8, q1u8, q2u8, q11u8, q12u8, q13u8, q14u8, q15u8; - int16x8_t q2s16, q11s16; - uint16x8_t q4u16; - int8x16_t q1s8, q2s8, q10s8, q11s8, q12s8, q13s8; - int8x8_t d2s8, d3s8; - - q11u8 = vabdq_u8(q3, q4); - q12u8 = vabdq_u8(q4, q5); - q13u8 = vabdq_u8(q5, q6); - q14u8 = vabdq_u8(q8, q7); - q3 = vabdq_u8(q9, q8); - q4 = vabdq_u8(q10, q9); - - q11u8 = vmaxq_u8(q11u8, q12u8); - q12u8 = vmaxq_u8(q13u8, q14u8); - q3 = vmaxq_u8(q3, q4); - q15u8 = vmaxq_u8(q11u8, q12u8); - - q9 = vabdq_u8(q6, q7); - - // vp8_hevmask - q13u8 = vcgtq_u8(q13u8, qthresh); - q14u8 = vcgtq_u8(q14u8, qthresh); - q15u8 = vmaxq_u8(q15u8, q3); - - q2u8 = vabdq_u8(q5, q8); - q9 = vqaddq_u8(q9, q9); - - q15u8 = vcgeq_u8(qlimit, q15u8); - - // vp8_filter() function - // convert to signed - q10 = vdupq_n_u8(0x80); - q8 = veorq_u8(q8, q10); - q7 = veorq_u8(q7, q10); - q6 = veorq_u8(q6, q10); - q5 = veorq_u8(q5, q10); - - q2u8 = vshrq_n_u8(q2u8, 1); - q9 = vqaddq_u8(q9, q2u8); - - q10 = vdupq_n_u8(3); - - q2s16 = vsubl_s8(vget_low_s8(vreinterpretq_s8_u8(q7)), - vget_low_s8(vreinterpretq_s8_u8(q6))); - q11s16 = vsubl_s8(vget_high_s8(vreinterpretq_s8_u8(q7)), - vget_high_s8(vreinterpretq_s8_u8(q6))); - - q9 = vcgeq_u8(qblimit, q9); - - q1s8 = vqsubq_s8(vreinterpretq_s8_u8(q5), - vreinterpretq_s8_u8(q8)); - - q14u8 = vorrq_u8(q13u8, q14u8); - - q4u16 = vmovl_u8(vget_low_u8(q10)); - q2s16 = vmulq_s16(q2s16, vreinterpretq_s16_u16(q4u16)); - q11s16 = vmulq_s16(q11s16, vreinterpretq_s16_u16(q4u16)); - - q1u8 = vandq_u8(vreinterpretq_u8_s8(q1s8), q14u8); - q15u8 = vandq_u8(q15u8, q9); - - q1s8 = vreinterpretq_s8_u8(q1u8); - q2s16 = vaddw_s8(q2s16, vget_low_s8(q1s8)); - q11s16 = vaddw_s8(q11s16, vget_high_s8(q1s8)); - - q9 = vdupq_n_u8(4); - // vp8_filter = clamp(vp8_filter + 3 * ( qs0 - ps0)) - d2s8 = vqmovn_s16(q2s16); - d3s8 = vqmovn_s16(q11s16); - q1s8 = vcombine_s8(d2s8, d3s8); - q1u8 = vandq_u8(vreinterpretq_u8_s8(q1s8), q15u8); - q1s8 = vreinterpretq_s8_u8(q1u8); - - q2s8 = vqaddq_s8(q1s8, vreinterpretq_s8_u8(q10)); - q1s8 = vqaddq_s8(q1s8, vreinterpretq_s8_u8(q9)); - q2s8 = vshrq_n_s8(q2s8, 3); - q1s8 = vshrq_n_s8(q1s8, 3); - - q11s8 = vqaddq_s8(vreinterpretq_s8_u8(q6), q2s8); - q10s8 = vqsubq_s8(vreinterpretq_s8_u8(q7), q1s8); - - q1s8 = vrshrq_n_s8(q1s8, 1); - q1s8 = vbicq_s8(q1s8, vreinterpretq_s8_u8(q14u8)); - - q13s8 = vqaddq_s8(vreinterpretq_s8_u8(q5), q1s8); - q12s8 = vqsubq_s8(vreinterpretq_s8_u8(q8), q1s8); - - q0u8 = vdupq_n_u8(0x80); - *q8r = veorq_u8(vreinterpretq_u8_s8(q12s8), q0u8); - *q7r = veorq_u8(vreinterpretq_u8_s8(q10s8), q0u8); - *q6r = veorq_u8(vreinterpretq_u8_s8(q11s8), q0u8); - *q5r = veorq_u8(vreinterpretq_u8_s8(q13s8), q0u8); - return; -} - -void vp8_loop_filter_horizontal_edge_y_neon( - unsigned char *src, - int pitch, - unsigned char blimit, - unsigned char limit, - unsigned char thresh) { - uint8x16_t qblimit, qlimit, qthresh, q3, q4; - uint8x16_t q5, q6, q7, q8, q9, q10; - - qblimit = vdupq_n_u8(blimit); - qlimit = vdupq_n_u8(limit); - qthresh = vdupq_n_u8(thresh); - src -= (pitch << 2); - - q3 = vld1q_u8(src); - src += pitch; - q4 = vld1q_u8(src); - src += pitch; - q5 = vld1q_u8(src); - src += pitch; - q6 = vld1q_u8(src); - src += pitch; - q7 = vld1q_u8(src); - src += pitch; - q8 = vld1q_u8(src); - src += pitch; - q9 = vld1q_u8(src); - src += pitch; - q10 = vld1q_u8(src); - - vp8_loop_filter_neon(qblimit, qlimit, qthresh, q3, q4, - q5, q6, q7, q8, q9, q10, - &q5, &q6, &q7, &q8); - - src -= (pitch * 5); - vst1q_u8(src, q5); - src += pitch; - vst1q_u8(src, q6); - src += pitch; - vst1q_u8(src, q7); - src += pitch; - vst1q_u8(src, q8); - return; -} - -void vp8_loop_filter_horizontal_edge_uv_neon( - unsigned char *u, - int pitch, - unsigned char blimit, - unsigned char limit, - unsigned char thresh, - unsigned char *v) { - uint8x16_t qblimit, qlimit, qthresh, q3, q4; - uint8x16_t q5, q6, q7, q8, q9, q10; - uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14; - uint8x8_t d15, d16, d17, d18, d19, d20, d21; - - qblimit = vdupq_n_u8(blimit); - qlimit = vdupq_n_u8(limit); - qthresh = vdupq_n_u8(thresh); - - u -= (pitch << 2); - v -= (pitch << 2); - - d6 = vld1_u8(u); - u += pitch; - d7 = vld1_u8(v); - v += pitch; - d8 = vld1_u8(u); - u += pitch; - d9 = vld1_u8(v); - v += pitch; - d10 = vld1_u8(u); - u += pitch; - d11 = vld1_u8(v); - v += pitch; - d12 = vld1_u8(u); - u += pitch; - d13 = vld1_u8(v); - v += pitch; - d14 = vld1_u8(u); - u += pitch; - d15 = vld1_u8(v); - v += pitch; - d16 = vld1_u8(u); - u += pitch; - d17 = vld1_u8(v); - v += pitch; - d18 = vld1_u8(u); - u += pitch; - d19 = vld1_u8(v); - v += pitch; - d20 = vld1_u8(u); - d21 = vld1_u8(v); - - q3 = vcombine_u8(d6, d7); - q4 = vcombine_u8(d8, d9); - q5 = vcombine_u8(d10, d11); - q6 = vcombine_u8(d12, d13); - q7 = vcombine_u8(d14, d15); - q8 = vcombine_u8(d16, d17); - q9 = vcombine_u8(d18, d19); - q10 = vcombine_u8(d20, d21); - - vp8_loop_filter_neon(qblimit, qlimit, qthresh, q3, q4, - q5, q6, q7, q8, q9, q10, - &q5, &q6, &q7, &q8); - - u -= (pitch * 5); - vst1_u8(u, vget_low_u8(q5)); - u += pitch; - vst1_u8(u, vget_low_u8(q6)); - u += pitch; - vst1_u8(u, vget_low_u8(q7)); - u += pitch; - vst1_u8(u, vget_low_u8(q8)); - - v -= (pitch * 5); - vst1_u8(v, vget_high_u8(q5)); - v += pitch; - vst1_u8(v, vget_high_u8(q6)); - v += pitch; - vst1_u8(v, vget_high_u8(q7)); - v += pitch; - vst1_u8(v, vget_high_u8(q8)); - return; -} - -static INLINE void write_4x8(unsigned char *dst, int pitch, - const uint8x8x4_t result) { -#if (__GNUC__ == 4 && (__GNUC_MINOR__ >= 7)) - vst4_lane_u8(dst, result, 0); - dst += pitch; - vst4_lane_u8(dst, result, 1); - dst += pitch; - vst4_lane_u8(dst, result, 2); - dst += pitch; - vst4_lane_u8(dst, result, 3); - dst += pitch; - vst4_lane_u8(dst, result, 4); - dst += pitch; - vst4_lane_u8(dst, result, 5); - dst += pitch; - vst4_lane_u8(dst, result, 6); - dst += pitch; - vst4_lane_u8(dst, result, 7); -#else - /* - * uint8x8x4_t result - 00 01 02 03 | 04 05 06 07 - 10 11 12 13 | 14 15 16 17 - 20 21 22 23 | 24 25 26 27 - 30 31 32 33 | 34 35 36 37 - --- - * after vtrn_u16 - 00 01 20 21 | 04 05 24 25 - 02 03 22 23 | 06 07 26 27 - 10 11 30 31 | 14 15 34 35 - 12 13 32 33 | 16 17 36 37 - --- - * after vtrn_u8 - 00 10 20 30 | 04 14 24 34 - 01 11 21 31 | 05 15 25 35 - 02 12 22 32 | 06 16 26 36 - 03 13 23 33 | 07 17 27 37 - */ - const uint16x4x2_t r02_u16 = vtrn_u16(vreinterpret_u16_u8(result.val[0]), - vreinterpret_u16_u8(result.val[2])); - const uint16x4x2_t r13_u16 = vtrn_u16(vreinterpret_u16_u8(result.val[1]), - vreinterpret_u16_u8(result.val[3])); - const uint8x8x2_t r01_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[0]), - vreinterpret_u8_u16(r13_u16.val[0])); - const uint8x8x2_t r23_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[1]), - vreinterpret_u8_u16(r13_u16.val[1])); - const uint32x2_t x_0_4 = vreinterpret_u32_u8(r01_u8.val[0]); - const uint32x2_t x_1_5 = vreinterpret_u32_u8(r01_u8.val[1]); - const uint32x2_t x_2_6 = vreinterpret_u32_u8(r23_u8.val[0]); - const uint32x2_t x_3_7 = vreinterpret_u32_u8(r23_u8.val[1]); - vst1_lane_u32((uint32_t *)dst, x_0_4, 0); - dst += pitch; - vst1_lane_u32((uint32_t *)dst, x_1_5, 0); - dst += pitch; - vst1_lane_u32((uint32_t *)dst, x_2_6, 0); - dst += pitch; - vst1_lane_u32((uint32_t *)dst, x_3_7, 0); - dst += pitch; - vst1_lane_u32((uint32_t *)dst, x_0_4, 1); - dst += pitch; - vst1_lane_u32((uint32_t *)dst, x_1_5, 1); - dst += pitch; - vst1_lane_u32((uint32_t *)dst, x_2_6, 1); - dst += pitch; - vst1_lane_u32((uint32_t *)dst, x_3_7, 1); -#endif -} - -void vp8_loop_filter_vertical_edge_y_neon( - unsigned char *src, - int pitch, - unsigned char blimit, - unsigned char limit, - unsigned char thresh) { - unsigned char *s, *d; - uint8x16_t qblimit, qlimit, qthresh, q3, q4; - uint8x16_t q5, q6, q7, q8, q9, q10; - uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14; - uint8x8_t d15, d16, d17, d18, d19, d20, d21; - uint32x4x2_t q2tmp0, q2tmp1, q2tmp2, q2tmp3; - uint16x8x2_t q2tmp4, q2tmp5, q2tmp6, q2tmp7; - uint8x16x2_t q2tmp8, q2tmp9, q2tmp10, q2tmp11; - uint8x8x4_t q4ResultH, q4ResultL; - - qblimit = vdupq_n_u8(blimit); - qlimit = vdupq_n_u8(limit); - qthresh = vdupq_n_u8(thresh); - - s = src - 4; - d6 = vld1_u8(s); - s += pitch; - d8 = vld1_u8(s); - s += pitch; - d10 = vld1_u8(s); - s += pitch; - d12 = vld1_u8(s); - s += pitch; - d14 = vld1_u8(s); - s += pitch; - d16 = vld1_u8(s); - s += pitch; - d18 = vld1_u8(s); - s += pitch; - d20 = vld1_u8(s); - s += pitch; - d7 = vld1_u8(s); - s += pitch; - d9 = vld1_u8(s); - s += pitch; - d11 = vld1_u8(s); - s += pitch; - d13 = vld1_u8(s); - s += pitch; - d15 = vld1_u8(s); - s += pitch; - d17 = vld1_u8(s); - s += pitch; - d19 = vld1_u8(s); - s += pitch; - d21 = vld1_u8(s); - - q3 = vcombine_u8(d6, d7); - q4 = vcombine_u8(d8, d9); - q5 = vcombine_u8(d10, d11); - q6 = vcombine_u8(d12, d13); - q7 = vcombine_u8(d14, d15); - q8 = vcombine_u8(d16, d17); - q9 = vcombine_u8(d18, d19); - q10 = vcombine_u8(d20, d21); - - q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7)); - q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8)); - q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9)); - q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10)); - - q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]), - vreinterpretq_u16_u32(q2tmp2.val[0])); - q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]), - vreinterpretq_u16_u32(q2tmp3.val[0])); - q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]), - vreinterpretq_u16_u32(q2tmp2.val[1])); - q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]), - vreinterpretq_u16_u32(q2tmp3.val[1])); - - q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]), - vreinterpretq_u8_u16(q2tmp5.val[0])); - q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]), - vreinterpretq_u8_u16(q2tmp5.val[1])); - q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]), - vreinterpretq_u8_u16(q2tmp7.val[0])); - q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]), - vreinterpretq_u8_u16(q2tmp7.val[1])); - - q3 = q2tmp8.val[0]; - q4 = q2tmp8.val[1]; - q5 = q2tmp9.val[0]; - q6 = q2tmp9.val[1]; - q7 = q2tmp10.val[0]; - q8 = q2tmp10.val[1]; - q9 = q2tmp11.val[0]; - q10 = q2tmp11.val[1]; - - vp8_loop_filter_neon(qblimit, qlimit, qthresh, q3, q4, - q5, q6, q7, q8, q9, q10, - &q5, &q6, &q7, &q8); - - q4ResultL.val[0] = vget_low_u8(q5); // d10 - q4ResultL.val[1] = vget_low_u8(q6); // d12 - q4ResultL.val[2] = vget_low_u8(q7); // d14 - q4ResultL.val[3] = vget_low_u8(q8); // d16 - q4ResultH.val[0] = vget_high_u8(q5); // d11 - q4ResultH.val[1] = vget_high_u8(q6); // d13 - q4ResultH.val[2] = vget_high_u8(q7); // d15 - q4ResultH.val[3] = vget_high_u8(q8); // d17 - - d = src - 2; - write_4x8(d, pitch, q4ResultL); - d += pitch * 8; - write_4x8(d, pitch, q4ResultH); -} - -void vp8_loop_filter_vertical_edge_uv_neon( - unsigned char *u, - int pitch, - unsigned char blimit, - unsigned char limit, - unsigned char thresh, - unsigned char *v) { - unsigned char *us, *ud; - unsigned char *vs, *vd; - uint8x16_t qblimit, qlimit, qthresh, q3, q4; - uint8x16_t q5, q6, q7, q8, q9, q10; - uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14; - uint8x8_t d15, d16, d17, d18, d19, d20, d21; - uint32x4x2_t q2tmp0, q2tmp1, q2tmp2, q2tmp3; - uint16x8x2_t q2tmp4, q2tmp5, q2tmp6, q2tmp7; - uint8x16x2_t q2tmp8, q2tmp9, q2tmp10, q2tmp11; - uint8x8x4_t q4ResultH, q4ResultL; - - qblimit = vdupq_n_u8(blimit); - qlimit = vdupq_n_u8(limit); - qthresh = vdupq_n_u8(thresh); - - us = u - 4; - d6 = vld1_u8(us); - us += pitch; - d8 = vld1_u8(us); - us += pitch; - d10 = vld1_u8(us); - us += pitch; - d12 = vld1_u8(us); - us += pitch; - d14 = vld1_u8(us); - us += pitch; - d16 = vld1_u8(us); - us += pitch; - d18 = vld1_u8(us); - us += pitch; - d20 = vld1_u8(us); - - vs = v - 4; - d7 = vld1_u8(vs); - vs += pitch; - d9 = vld1_u8(vs); - vs += pitch; - d11 = vld1_u8(vs); - vs += pitch; - d13 = vld1_u8(vs); - vs += pitch; - d15 = vld1_u8(vs); - vs += pitch; - d17 = vld1_u8(vs); - vs += pitch; - d19 = vld1_u8(vs); - vs += pitch; - d21 = vld1_u8(vs); - - q3 = vcombine_u8(d6, d7); - q4 = vcombine_u8(d8, d9); - q5 = vcombine_u8(d10, d11); - q6 = vcombine_u8(d12, d13); - q7 = vcombine_u8(d14, d15); - q8 = vcombine_u8(d16, d17); - q9 = vcombine_u8(d18, d19); - q10 = vcombine_u8(d20, d21); - - q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7)); - q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8)); - q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9)); - q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10)); - - q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]), - vreinterpretq_u16_u32(q2tmp2.val[0])); - q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]), - vreinterpretq_u16_u32(q2tmp3.val[0])); - q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]), - vreinterpretq_u16_u32(q2tmp2.val[1])); - q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]), - vreinterpretq_u16_u32(q2tmp3.val[1])); - - q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]), - vreinterpretq_u8_u16(q2tmp5.val[0])); - q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]), - vreinterpretq_u8_u16(q2tmp5.val[1])); - q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]), - vreinterpretq_u8_u16(q2tmp7.val[0])); - q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]), - vreinterpretq_u8_u16(q2tmp7.val[1])); - - q3 = q2tmp8.val[0]; - q4 = q2tmp8.val[1]; - q5 = q2tmp9.val[0]; - q6 = q2tmp9.val[1]; - q7 = q2tmp10.val[0]; - q8 = q2tmp10.val[1]; - q9 = q2tmp11.val[0]; - q10 = q2tmp11.val[1]; - - vp8_loop_filter_neon(qblimit, qlimit, qthresh, q3, q4, - q5, q6, q7, q8, q9, q10, - &q5, &q6, &q7, &q8); - - q4ResultL.val[0] = vget_low_u8(q5); // d10 - q4ResultL.val[1] = vget_low_u8(q6); // d12 - q4ResultL.val[2] = vget_low_u8(q7); // d14 - q4ResultL.val[3] = vget_low_u8(q8); // d16 - ud = u - 2; - write_4x8(ud, pitch, q4ResultL); - - q4ResultH.val[0] = vget_high_u8(q5); // d11 - q4ResultH.val[1] = vget_high_u8(q6); // d13 - q4ResultH.val[2] = vget_high_u8(q7); // d15 - q4ResultH.val[3] = vget_high_u8(q8); // d17 - vd = v - 2; - write_4x8(vd, pitch, q4ResultH); -} diff --git a/media/libvpx/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.c b/media/libvpx/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.c deleted file mode 100644 index b25686ffb88..00000000000 --- a/media/libvpx/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.c +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include -#include "./vpx_config.h" - -static INLINE void vp8_loop_filter_simple_horizontal_edge_neon( - unsigned char *s, - int p, - const unsigned char *blimit) { - uint8_t *sp; - uint8x16_t qblimit, q0u8; - uint8x16_t q5u8, q6u8, q7u8, q8u8, q9u8, q10u8, q14u8, q15u8; - int16x8_t q2s16, q3s16, q13s16; - int8x8_t d8s8, d9s8; - int8x16_t q2s8, q3s8, q4s8, q10s8, q11s8, q14s8; - - qblimit = vdupq_n_u8(*blimit); - - sp = s - (p << 1); - q5u8 = vld1q_u8(sp); - sp += p; - q6u8 = vld1q_u8(sp); - sp += p; - q7u8 = vld1q_u8(sp); - sp += p; - q8u8 = vld1q_u8(sp); - - q15u8 = vabdq_u8(q6u8, q7u8); - q14u8 = vabdq_u8(q5u8, q8u8); - - q15u8 = vqaddq_u8(q15u8, q15u8); - q14u8 = vshrq_n_u8(q14u8, 1); - q0u8 = vdupq_n_u8(0x80); - q13s16 = vdupq_n_s16(3); - q15u8 = vqaddq_u8(q15u8, q14u8); - - q5u8 = veorq_u8(q5u8, q0u8); - q6u8 = veorq_u8(q6u8, q0u8); - q7u8 = veorq_u8(q7u8, q0u8); - q8u8 = veorq_u8(q8u8, q0u8); - - q15u8 = vcgeq_u8(qblimit, q15u8); - - q2s16 = vsubl_s8(vget_low_s8(vreinterpretq_s8_u8(q7u8)), - vget_low_s8(vreinterpretq_s8_u8(q6u8))); - q3s16 = vsubl_s8(vget_high_s8(vreinterpretq_s8_u8(q7u8)), - vget_high_s8(vreinterpretq_s8_u8(q6u8))); - - q4s8 = vqsubq_s8(vreinterpretq_s8_u8(q5u8), - vreinterpretq_s8_u8(q8u8)); - - q2s16 = vmulq_s16(q2s16, q13s16); - q3s16 = vmulq_s16(q3s16, q13s16); - - q10u8 = vdupq_n_u8(3); - q9u8 = vdupq_n_u8(4); - - q2s16 = vaddw_s8(q2s16, vget_low_s8(q4s8)); - q3s16 = vaddw_s8(q3s16, vget_high_s8(q4s8)); - - d8s8 = vqmovn_s16(q2s16); - d9s8 = vqmovn_s16(q3s16); - q4s8 = vcombine_s8(d8s8, d9s8); - - q14s8 = vandq_s8(q4s8, vreinterpretq_s8_u8(q15u8)); - - q2s8 = vqaddq_s8(q14s8, vreinterpretq_s8_u8(q10u8)); - q3s8 = vqaddq_s8(q14s8, vreinterpretq_s8_u8(q9u8)); - q2s8 = vshrq_n_s8(q2s8, 3); - q3s8 = vshrq_n_s8(q3s8, 3); - - q11s8 = vqaddq_s8(vreinterpretq_s8_u8(q6u8), q2s8); - q10s8 = vqsubq_s8(vreinterpretq_s8_u8(q7u8), q3s8); - - q6u8 = veorq_u8(vreinterpretq_u8_s8(q11s8), q0u8); - q7u8 = veorq_u8(vreinterpretq_u8_s8(q10s8), q0u8); - - vst1q_u8(s, q7u8); - s -= p; - vst1q_u8(s, q6u8); - return; -} - -void vp8_loop_filter_bhs_neon( - unsigned char *y_ptr, - int y_stride, - const unsigned char *blimit) { - y_ptr += y_stride * 4; - vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, blimit); - y_ptr += y_stride * 4; - vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, blimit); - y_ptr += y_stride * 4; - vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, blimit); - return; -} - -void vp8_loop_filter_mbhs_neon( - unsigned char *y_ptr, - int y_stride, - const unsigned char *blimit) { - vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, blimit); - return; -} diff --git a/media/libvpx/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.c b/media/libvpx/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.c deleted file mode 100644 index d5178bbae14..00000000000 --- a/media/libvpx/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.c +++ /dev/null @@ -1,279 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include -#include "./vpx_config.h" - -#if (__GNUC__ == 4 && (__GNUC_MINOR__ >= 7)) -static INLINE void write_2x8(unsigned char *dst, int pitch, - const uint8x8x2_t result, - const uint8x8x2_t result2) { - vst2_lane_u8(dst, result, 0); - dst += pitch; - vst2_lane_u8(dst, result, 1); - dst += pitch; - vst2_lane_u8(dst, result, 2); - dst += pitch; - vst2_lane_u8(dst, result, 3); - dst += pitch; - vst2_lane_u8(dst, result, 4); - dst += pitch; - vst2_lane_u8(dst, result, 5); - dst += pitch; - vst2_lane_u8(dst, result, 6); - dst += pitch; - vst2_lane_u8(dst, result, 7); - dst += pitch; - - vst2_lane_u8(dst, result2, 0); - dst += pitch; - vst2_lane_u8(dst, result2, 1); - dst += pitch; - vst2_lane_u8(dst, result2, 2); - dst += pitch; - vst2_lane_u8(dst, result2, 3); - dst += pitch; - vst2_lane_u8(dst, result2, 4); - dst += pitch; - vst2_lane_u8(dst, result2, 5); - dst += pitch; - vst2_lane_u8(dst, result2, 6); - dst += pitch; - vst2_lane_u8(dst, result2, 7); -} -#else -static INLINE void write_2x4(unsigned char *dst, int pitch, - const uint8x8x2_t result) { - /* - * uint8x8x2_t result - 00 01 02 03 | 04 05 06 07 - 10 11 12 13 | 14 15 16 17 - --- - * after vtrn_u8 - 00 10 02 12 | 04 14 06 16 - 01 11 03 13 | 05 15 07 17 - */ - const uint8x8x2_t r01_u8 = vtrn_u8(result.val[0], - result.val[1]); - const uint16x4_t x_0_4 = vreinterpret_u16_u8(r01_u8.val[0]); - const uint16x4_t x_1_5 = vreinterpret_u16_u8(r01_u8.val[1]); - vst1_lane_u16((uint16_t *)dst, x_0_4, 0); - dst += pitch; - vst1_lane_u16((uint16_t *)dst, x_1_5, 0); - dst += pitch; - vst1_lane_u16((uint16_t *)dst, x_0_4, 1); - dst += pitch; - vst1_lane_u16((uint16_t *)dst, x_1_5, 1); - dst += pitch; - vst1_lane_u16((uint16_t *)dst, x_0_4, 2); - dst += pitch; - vst1_lane_u16((uint16_t *)dst, x_1_5, 2); - dst += pitch; - vst1_lane_u16((uint16_t *)dst, x_0_4, 3); - dst += pitch; - vst1_lane_u16((uint16_t *)dst, x_1_5, 3); -} - -static INLINE void write_2x8(unsigned char *dst, int pitch, - const uint8x8x2_t result, - const uint8x8x2_t result2) { - write_2x4(dst, pitch, result); - dst += pitch * 8; - write_2x4(dst, pitch, result2); -} -#endif - - -#if (__GNUC__ == 4 && (__GNUC_MINOR__ >= 7)) -static INLINE -uint8x8x4_t read_4x8(unsigned char *src, int pitch, uint8x8x4_t x) { - x = vld4_lane_u8(src, x, 0); - src += pitch; - x = vld4_lane_u8(src, x, 1); - src += pitch; - x = vld4_lane_u8(src, x, 2); - src += pitch; - x = vld4_lane_u8(src, x, 3); - src += pitch; - x = vld4_lane_u8(src, x, 4); - src += pitch; - x = vld4_lane_u8(src, x, 5); - src += pitch; - x = vld4_lane_u8(src, x, 6); - src += pitch; - x = vld4_lane_u8(src, x, 7); - return x; -} -#else -static INLINE -uint8x8x4_t read_4x8(unsigned char *src, int pitch, uint8x8x4_t x) { - const uint8x8_t a = vld1_u8(src); - const uint8x8_t b = vld1_u8(src + pitch * 1); - const uint8x8_t c = vld1_u8(src + pitch * 2); - const uint8x8_t d = vld1_u8(src + pitch * 3); - const uint8x8_t e = vld1_u8(src + pitch * 4); - const uint8x8_t f = vld1_u8(src + pitch * 5); - const uint8x8_t g = vld1_u8(src + pitch * 6); - const uint8x8_t h = vld1_u8(src + pitch * 7); - const uint32x2x2_t r04_u32 = vtrn_u32(vreinterpret_u32_u8(a), - vreinterpret_u32_u8(e)); - const uint32x2x2_t r15_u32 = vtrn_u32(vreinterpret_u32_u8(b), - vreinterpret_u32_u8(f)); - const uint32x2x2_t r26_u32 = vtrn_u32(vreinterpret_u32_u8(c), - vreinterpret_u32_u8(g)); - const uint32x2x2_t r37_u32 = vtrn_u32(vreinterpret_u32_u8(d), - vreinterpret_u32_u8(h)); - const uint16x4x2_t r02_u16 = vtrn_u16(vreinterpret_u16_u32(r04_u32.val[0]), - vreinterpret_u16_u32(r26_u32.val[0])); - const uint16x4x2_t r13_u16 = vtrn_u16(vreinterpret_u16_u32(r15_u32.val[0]), - vreinterpret_u16_u32(r37_u32.val[0])); - const uint8x8x2_t r01_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[0]), - vreinterpret_u8_u16(r13_u16.val[0])); - const uint8x8x2_t r23_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[1]), - vreinterpret_u8_u16(r13_u16.val[1])); - /* - * after vtrn_u32 - 00 01 02 03 | 40 41 42 43 - 10 11 12 13 | 50 51 52 53 - 20 21 22 23 | 60 61 62 63 - 30 31 32 33 | 70 71 72 73 - --- - * after vtrn_u16 - 00 01 20 21 | 40 41 60 61 - 02 03 22 23 | 42 43 62 63 - 10 11 30 31 | 50 51 70 71 - 12 13 32 33 | 52 52 72 73 - - 00 01 20 21 | 40 41 60 61 - 10 11 30 31 | 50 51 70 71 - 02 03 22 23 | 42 43 62 63 - 12 13 32 33 | 52 52 72 73 - --- - * after vtrn_u8 - 00 10 20 30 | 40 50 60 70 - 01 11 21 31 | 41 51 61 71 - 02 12 22 32 | 42 52 62 72 - 03 13 23 33 | 43 53 63 73 - */ - x.val[0] = r01_u8.val[0]; - x.val[1] = r01_u8.val[1]; - x.val[2] = r23_u8.val[0]; - x.val[3] = r23_u8.val[1]; - - return x; -} -#endif - -static INLINE void vp8_loop_filter_simple_vertical_edge_neon( - unsigned char *s, - int p, - const unsigned char *blimit) { - unsigned char *src1; - uint8x16_t qblimit, q0u8; - uint8x16_t q3u8, q4u8, q5u8, q6u8, q7u8, q11u8, q12u8, q14u8, q15u8; - int16x8_t q2s16, q13s16, q11s16; - int8x8_t d28s8, d29s8; - int8x16_t q2s8, q3s8, q10s8, q11s8, q14s8; - uint8x8x4_t d0u8x4; // d6, d7, d8, d9 - uint8x8x4_t d1u8x4; // d10, d11, d12, d13 - uint8x8x2_t d2u8x2; // d12, d13 - uint8x8x2_t d3u8x2; // d14, d15 - - qblimit = vdupq_n_u8(*blimit); - - src1 = s - 2; - d0u8x4 = read_4x8(src1, p, d0u8x4); - src1 += p * 8; - d1u8x4 = read_4x8(src1, p, d1u8x4); - - q3u8 = vcombine_u8(d0u8x4.val[0], d1u8x4.val[0]); // d6 d10 - q4u8 = vcombine_u8(d0u8x4.val[2], d1u8x4.val[2]); // d8 d12 - q5u8 = vcombine_u8(d0u8x4.val[1], d1u8x4.val[1]); // d7 d11 - q6u8 = vcombine_u8(d0u8x4.val[3], d1u8x4.val[3]); // d9 d13 - - q15u8 = vabdq_u8(q5u8, q4u8); - q14u8 = vabdq_u8(q3u8, q6u8); - - q15u8 = vqaddq_u8(q15u8, q15u8); - q14u8 = vshrq_n_u8(q14u8, 1); - q0u8 = vdupq_n_u8(0x80); - q11s16 = vdupq_n_s16(3); - q15u8 = vqaddq_u8(q15u8, q14u8); - - q3u8 = veorq_u8(q3u8, q0u8); - q4u8 = veorq_u8(q4u8, q0u8); - q5u8 = veorq_u8(q5u8, q0u8); - q6u8 = veorq_u8(q6u8, q0u8); - - q15u8 = vcgeq_u8(qblimit, q15u8); - - q2s16 = vsubl_s8(vget_low_s8(vreinterpretq_s8_u8(q4u8)), - vget_low_s8(vreinterpretq_s8_u8(q5u8))); - q13s16 = vsubl_s8(vget_high_s8(vreinterpretq_s8_u8(q4u8)), - vget_high_s8(vreinterpretq_s8_u8(q5u8))); - - q14s8 = vqsubq_s8(vreinterpretq_s8_u8(q3u8), - vreinterpretq_s8_u8(q6u8)); - - q2s16 = vmulq_s16(q2s16, q11s16); - q13s16 = vmulq_s16(q13s16, q11s16); - - q11u8 = vdupq_n_u8(3); - q12u8 = vdupq_n_u8(4); - - q2s16 = vaddw_s8(q2s16, vget_low_s8(q14s8)); - q13s16 = vaddw_s8(q13s16, vget_high_s8(q14s8)); - - d28s8 = vqmovn_s16(q2s16); - d29s8 = vqmovn_s16(q13s16); - q14s8 = vcombine_s8(d28s8, d29s8); - - q14s8 = vandq_s8(q14s8, vreinterpretq_s8_u8(q15u8)); - - q2s8 = vqaddq_s8(q14s8, vreinterpretq_s8_u8(q11u8)); - q3s8 = vqaddq_s8(q14s8, vreinterpretq_s8_u8(q12u8)); - q2s8 = vshrq_n_s8(q2s8, 3); - q14s8 = vshrq_n_s8(q3s8, 3); - - q11s8 = vqaddq_s8(vreinterpretq_s8_u8(q5u8), q2s8); - q10s8 = vqsubq_s8(vreinterpretq_s8_u8(q4u8), q14s8); - - q6u8 = veorq_u8(vreinterpretq_u8_s8(q11s8), q0u8); - q7u8 = veorq_u8(vreinterpretq_u8_s8(q10s8), q0u8); - - d2u8x2.val[0] = vget_low_u8(q6u8); // d12 - d2u8x2.val[1] = vget_low_u8(q7u8); // d14 - d3u8x2.val[0] = vget_high_u8(q6u8); // d13 - d3u8x2.val[1] = vget_high_u8(q7u8); // d15 - - src1 = s - 1; - write_2x8(src1, p, d2u8x2, d3u8x2); -} - -void vp8_loop_filter_bvs_neon( - unsigned char *y_ptr, - int y_stride, - const unsigned char *blimit) { - y_ptr += 4; - vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, blimit); - y_ptr += 4; - vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, blimit); - y_ptr += 4; - vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, blimit); - return; -} - -void vp8_loop_filter_mbvs_neon( - unsigned char *y_ptr, - int y_stride, - const unsigned char *blimit) { - vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, blimit); - return; -} diff --git a/media/libvpx/vp8/common/arm/neon/mbloopfilter_neon.c b/media/libvpx/vp8/common/arm/neon/mbloopfilter_neon.c deleted file mode 100644 index 5351f4be665..00000000000 --- a/media/libvpx/vp8/common/arm/neon/mbloopfilter_neon.c +++ /dev/null @@ -1,625 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include -#include "./vpx_config.h" - -static INLINE void vp8_mbloop_filter_neon( - uint8x16_t qblimit, // mblimit - uint8x16_t qlimit, // limit - uint8x16_t qthresh, // thresh - uint8x16_t q3, // p2 - uint8x16_t q4, // p2 - uint8x16_t q5, // p1 - uint8x16_t q6, // p0 - uint8x16_t q7, // q0 - uint8x16_t q8, // q1 - uint8x16_t q9, // q2 - uint8x16_t q10, // q3 - uint8x16_t *q4r, // p1 - uint8x16_t *q5r, // p1 - uint8x16_t *q6r, // p0 - uint8x16_t *q7r, // q0 - uint8x16_t *q8r, // q1 - uint8x16_t *q9r) { // q1 - uint8x16_t q0u8, q1u8, q11u8, q12u8, q13u8, q14u8, q15u8; - int16x8_t q0s16, q2s16, q11s16, q12s16, q13s16, q14s16, q15s16; - int8x16_t q1s8, q6s8, q7s8, q2s8, q11s8, q13s8; - uint16x8_t q0u16, q11u16, q12u16, q13u16, q14u16, q15u16; - int8x16_t q0s8, q12s8, q14s8, q15s8; - int8x8_t d0, d1, d2, d3, d4, d5, d24, d25, d28, d29; - - q11u8 = vabdq_u8(q3, q4); - q12u8 = vabdq_u8(q4, q5); - q13u8 = vabdq_u8(q5, q6); - q14u8 = vabdq_u8(q8, q7); - q1u8 = vabdq_u8(q9, q8); - q0u8 = vabdq_u8(q10, q9); - - q11u8 = vmaxq_u8(q11u8, q12u8); - q12u8 = vmaxq_u8(q13u8, q14u8); - q1u8 = vmaxq_u8(q1u8, q0u8); - q15u8 = vmaxq_u8(q11u8, q12u8); - - q12u8 = vabdq_u8(q6, q7); - - // vp8_hevmask - q13u8 = vcgtq_u8(q13u8, qthresh); - q14u8 = vcgtq_u8(q14u8, qthresh); - q15u8 = vmaxq_u8(q15u8, q1u8); - - q15u8 = vcgeq_u8(qlimit, q15u8); - - q1u8 = vabdq_u8(q5, q8); - q12u8 = vqaddq_u8(q12u8, q12u8); - - // vp8_filter() function - // convert to signed - q0u8 = vdupq_n_u8(0x80); - q9 = veorq_u8(q9, q0u8); - q8 = veorq_u8(q8, q0u8); - q7 = veorq_u8(q7, q0u8); - q6 = veorq_u8(q6, q0u8); - q5 = veorq_u8(q5, q0u8); - q4 = veorq_u8(q4, q0u8); - - q1u8 = vshrq_n_u8(q1u8, 1); - q12u8 = vqaddq_u8(q12u8, q1u8); - - q14u8 = vorrq_u8(q13u8, q14u8); - q12u8 = vcgeq_u8(qblimit, q12u8); - - q2s16 = vsubl_s8(vget_low_s8(vreinterpretq_s8_u8(q7)), - vget_low_s8(vreinterpretq_s8_u8(q6))); - q13s16 = vsubl_s8(vget_high_s8(vreinterpretq_s8_u8(q7)), - vget_high_s8(vreinterpretq_s8_u8(q6))); - - q1s8 = vqsubq_s8(vreinterpretq_s8_u8(q5), - vreinterpretq_s8_u8(q8)); - - q11s16 = vdupq_n_s16(3); - q2s16 = vmulq_s16(q2s16, q11s16); - q13s16 = vmulq_s16(q13s16, q11s16); - - q15u8 = vandq_u8(q15u8, q12u8); - - q2s16 = vaddw_s8(q2s16, vget_low_s8(q1s8)); - q13s16 = vaddw_s8(q13s16, vget_high_s8(q1s8)); - - q12u8 = vdupq_n_u8(3); - q11u8 = vdupq_n_u8(4); - // vp8_filter = clamp(vp8_filter + 3 * ( qs0 - ps0)) - d2 = vqmovn_s16(q2s16); - d3 = vqmovn_s16(q13s16); - q1s8 = vcombine_s8(d2, d3); - q1s8 = vandq_s8(q1s8, vreinterpretq_s8_u8(q15u8)); - q13s8 = vandq_s8(q1s8, vreinterpretq_s8_u8(q14u8)); - - q2s8 = vqaddq_s8(q13s8, vreinterpretq_s8_u8(q11u8)); - q13s8 = vqaddq_s8(q13s8, vreinterpretq_s8_u8(q12u8)); - q2s8 = vshrq_n_s8(q2s8, 3); - q13s8 = vshrq_n_s8(q13s8, 3); - - q7s8 = vqsubq_s8(vreinterpretq_s8_u8(q7), q2s8); - q6s8 = vqaddq_s8(vreinterpretq_s8_u8(q6), q13s8); - - q1s8 = vbicq_s8(q1s8, vreinterpretq_s8_u8(q14u8)); - - q0u16 = q11u16 = q12u16 = q13u16 = q14u16 = q15u16 = vdupq_n_u16(63); - d5 = vdup_n_s8(9); - d4 = vdup_n_s8(18); - - q0s16 = vmlal_s8(vreinterpretq_s16_u16(q0u16), vget_low_s8(q1s8), d5); - q11s16 = vmlal_s8(vreinterpretq_s16_u16(q11u16), vget_high_s8(q1s8), d5); - d5 = vdup_n_s8(27); - q12s16 = vmlal_s8(vreinterpretq_s16_u16(q12u16), vget_low_s8(q1s8), d4); - q13s16 = vmlal_s8(vreinterpretq_s16_u16(q13u16), vget_high_s8(q1s8), d4); - q14s16 = vmlal_s8(vreinterpretq_s16_u16(q14u16), vget_low_s8(q1s8), d5); - q15s16 = vmlal_s8(vreinterpretq_s16_u16(q15u16), vget_high_s8(q1s8), d5); - - d0 = vqshrn_n_s16(q0s16 , 7); - d1 = vqshrn_n_s16(q11s16, 7); - d24 = vqshrn_n_s16(q12s16, 7); - d25 = vqshrn_n_s16(q13s16, 7); - d28 = vqshrn_n_s16(q14s16, 7); - d29 = vqshrn_n_s16(q15s16, 7); - - q0s8 = vcombine_s8(d0, d1); - q12s8 = vcombine_s8(d24, d25); - q14s8 = vcombine_s8(d28, d29); - - q11s8 = vqsubq_s8(vreinterpretq_s8_u8(q9), q0s8); - q0s8 = vqaddq_s8(vreinterpretq_s8_u8(q4), q0s8); - q13s8 = vqsubq_s8(vreinterpretq_s8_u8(q8), q12s8); - q12s8 = vqaddq_s8(vreinterpretq_s8_u8(q5), q12s8); - q15s8 = vqsubq_s8((q7s8), q14s8); - q14s8 = vqaddq_s8((q6s8), q14s8); - - q1u8 = vdupq_n_u8(0x80); - *q9r = veorq_u8(vreinterpretq_u8_s8(q11s8), q1u8); - *q8r = veorq_u8(vreinterpretq_u8_s8(q13s8), q1u8); - *q7r = veorq_u8(vreinterpretq_u8_s8(q15s8), q1u8); - *q6r = veorq_u8(vreinterpretq_u8_s8(q14s8), q1u8); - *q5r = veorq_u8(vreinterpretq_u8_s8(q12s8), q1u8); - *q4r = veorq_u8(vreinterpretq_u8_s8(q0s8), q1u8); - return; -} - -void vp8_mbloop_filter_horizontal_edge_y_neon( - unsigned char *src, - int pitch, - unsigned char blimit, - unsigned char limit, - unsigned char thresh) { - uint8x16_t qblimit, qlimit, qthresh, q3, q4; - uint8x16_t q5, q6, q7, q8, q9, q10; - - qblimit = vdupq_n_u8(blimit); - qlimit = vdupq_n_u8(limit); - qthresh = vdupq_n_u8(thresh); - - src -= (pitch << 2); - - q3 = vld1q_u8(src); - src += pitch; - q4 = vld1q_u8(src); - src += pitch; - q5 = vld1q_u8(src); - src += pitch; - q6 = vld1q_u8(src); - src += pitch; - q7 = vld1q_u8(src); - src += pitch; - q8 = vld1q_u8(src); - src += pitch; - q9 = vld1q_u8(src); - src += pitch; - q10 = vld1q_u8(src); - - vp8_mbloop_filter_neon(qblimit, qlimit, qthresh, q3, q4, - q5, q6, q7, q8, q9, q10, - &q4, &q5, &q6, &q7, &q8, &q9); - - src -= (pitch * 6); - vst1q_u8(src, q4); - src += pitch; - vst1q_u8(src, q5); - src += pitch; - vst1q_u8(src, q6); - src += pitch; - vst1q_u8(src, q7); - src += pitch; - vst1q_u8(src, q8); - src += pitch; - vst1q_u8(src, q9); - return; -} - -void vp8_mbloop_filter_horizontal_edge_uv_neon( - unsigned char *u, - int pitch, - unsigned char blimit, - unsigned char limit, - unsigned char thresh, - unsigned char *v) { - uint8x16_t qblimit, qlimit, qthresh, q3, q4; - uint8x16_t q5, q6, q7, q8, q9, q10; - uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14; - uint8x8_t d15, d16, d17, d18, d19, d20, d21; - - qblimit = vdupq_n_u8(blimit); - qlimit = vdupq_n_u8(limit); - qthresh = vdupq_n_u8(thresh); - - u -= (pitch << 2); - v -= (pitch << 2); - - d6 = vld1_u8(u); - u += pitch; - d7 = vld1_u8(v); - v += pitch; - d8 = vld1_u8(u); - u += pitch; - d9 = vld1_u8(v); - v += pitch; - d10 = vld1_u8(u); - u += pitch; - d11 = vld1_u8(v); - v += pitch; - d12 = vld1_u8(u); - u += pitch; - d13 = vld1_u8(v); - v += pitch; - d14 = vld1_u8(u); - u += pitch; - d15 = vld1_u8(v); - v += pitch; - d16 = vld1_u8(u); - u += pitch; - d17 = vld1_u8(v); - v += pitch; - d18 = vld1_u8(u); - u += pitch; - d19 = vld1_u8(v); - v += pitch; - d20 = vld1_u8(u); - d21 = vld1_u8(v); - - q3 = vcombine_u8(d6, d7); - q4 = vcombine_u8(d8, d9); - q5 = vcombine_u8(d10, d11); - q6 = vcombine_u8(d12, d13); - q7 = vcombine_u8(d14, d15); - q8 = vcombine_u8(d16, d17); - q9 = vcombine_u8(d18, d19); - q10 = vcombine_u8(d20, d21); - - vp8_mbloop_filter_neon(qblimit, qlimit, qthresh, q3, q4, - q5, q6, q7, q8, q9, q10, - &q4, &q5, &q6, &q7, &q8, &q9); - - u -= (pitch * 6); - v -= (pitch * 6); - vst1_u8(u, vget_low_u8(q4)); - u += pitch; - vst1_u8(v, vget_high_u8(q4)); - v += pitch; - vst1_u8(u, vget_low_u8(q5)); - u += pitch; - vst1_u8(v, vget_high_u8(q5)); - v += pitch; - vst1_u8(u, vget_low_u8(q6)); - u += pitch; - vst1_u8(v, vget_high_u8(q6)); - v += pitch; - vst1_u8(u, vget_low_u8(q7)); - u += pitch; - vst1_u8(v, vget_high_u8(q7)); - v += pitch; - vst1_u8(u, vget_low_u8(q8)); - u += pitch; - vst1_u8(v, vget_high_u8(q8)); - v += pitch; - vst1_u8(u, vget_low_u8(q9)); - vst1_u8(v, vget_high_u8(q9)); - return; -} - -void vp8_mbloop_filter_vertical_edge_y_neon( - unsigned char *src, - int pitch, - unsigned char blimit, - unsigned char limit, - unsigned char thresh) { - unsigned char *s1, *s2; - uint8x16_t qblimit, qlimit, qthresh, q3, q4; - uint8x16_t q5, q6, q7, q8, q9, q10; - uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14; - uint8x8_t d15, d16, d17, d18, d19, d20, d21; - uint32x4x2_t q2tmp0, q2tmp1, q2tmp2, q2tmp3; - uint16x8x2_t q2tmp4, q2tmp5, q2tmp6, q2tmp7; - uint8x16x2_t q2tmp8, q2tmp9, q2tmp10, q2tmp11; - - qblimit = vdupq_n_u8(blimit); - qlimit = vdupq_n_u8(limit); - qthresh = vdupq_n_u8(thresh); - - s1 = src - 4; - s2 = s1 + 8 * pitch; - d6 = vld1_u8(s1); - s1 += pitch; - d7 = vld1_u8(s2); - s2 += pitch; - d8 = vld1_u8(s1); - s1 += pitch; - d9 = vld1_u8(s2); - s2 += pitch; - d10 = vld1_u8(s1); - s1 += pitch; - d11 = vld1_u8(s2); - s2 += pitch; - d12 = vld1_u8(s1); - s1 += pitch; - d13 = vld1_u8(s2); - s2 += pitch; - d14 = vld1_u8(s1); - s1 += pitch; - d15 = vld1_u8(s2); - s2 += pitch; - d16 = vld1_u8(s1); - s1 += pitch; - d17 = vld1_u8(s2); - s2 += pitch; - d18 = vld1_u8(s1); - s1 += pitch; - d19 = vld1_u8(s2); - s2 += pitch; - d20 = vld1_u8(s1); - d21 = vld1_u8(s2); - - q3 = vcombine_u8(d6, d7); - q4 = vcombine_u8(d8, d9); - q5 = vcombine_u8(d10, d11); - q6 = vcombine_u8(d12, d13); - q7 = vcombine_u8(d14, d15); - q8 = vcombine_u8(d16, d17); - q9 = vcombine_u8(d18, d19); - q10 = vcombine_u8(d20, d21); - - q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7)); - q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8)); - q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9)); - q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10)); - - q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]), - vreinterpretq_u16_u32(q2tmp2.val[0])); - q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]), - vreinterpretq_u16_u32(q2tmp3.val[0])); - q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]), - vreinterpretq_u16_u32(q2tmp2.val[1])); - q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]), - vreinterpretq_u16_u32(q2tmp3.val[1])); - - q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]), - vreinterpretq_u8_u16(q2tmp5.val[0])); - q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]), - vreinterpretq_u8_u16(q2tmp5.val[1])); - q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]), - vreinterpretq_u8_u16(q2tmp7.val[0])); - q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]), - vreinterpretq_u8_u16(q2tmp7.val[1])); - - q3 = q2tmp8.val[0]; - q4 = q2tmp8.val[1]; - q5 = q2tmp9.val[0]; - q6 = q2tmp9.val[1]; - q7 = q2tmp10.val[0]; - q8 = q2tmp10.val[1]; - q9 = q2tmp11.val[0]; - q10 = q2tmp11.val[1]; - - vp8_mbloop_filter_neon(qblimit, qlimit, qthresh, q3, q4, - q5, q6, q7, q8, q9, q10, - &q4, &q5, &q6, &q7, &q8, &q9); - - q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7)); - q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8)); - q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9)); - q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10)); - - q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]), - vreinterpretq_u16_u32(q2tmp2.val[0])); - q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]), - vreinterpretq_u16_u32(q2tmp3.val[0])); - q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]), - vreinterpretq_u16_u32(q2tmp2.val[1])); - q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]), - vreinterpretq_u16_u32(q2tmp3.val[1])); - - q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]), - vreinterpretq_u8_u16(q2tmp5.val[0])); - q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]), - vreinterpretq_u8_u16(q2tmp5.val[1])); - q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]), - vreinterpretq_u8_u16(q2tmp7.val[0])); - q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]), - vreinterpretq_u8_u16(q2tmp7.val[1])); - - q3 = q2tmp8.val[0]; - q4 = q2tmp8.val[1]; - q5 = q2tmp9.val[0]; - q6 = q2tmp9.val[1]; - q7 = q2tmp10.val[0]; - q8 = q2tmp10.val[1]; - q9 = q2tmp11.val[0]; - q10 = q2tmp11.val[1]; - - s1 -= 7 * pitch; - s2 -= 7 * pitch; - - vst1_u8(s1, vget_low_u8(q3)); - s1 += pitch; - vst1_u8(s2, vget_high_u8(q3)); - s2 += pitch; - vst1_u8(s1, vget_low_u8(q4)); - s1 += pitch; - vst1_u8(s2, vget_high_u8(q4)); - s2 += pitch; - vst1_u8(s1, vget_low_u8(q5)); - s1 += pitch; - vst1_u8(s2, vget_high_u8(q5)); - s2 += pitch; - vst1_u8(s1, vget_low_u8(q6)); - s1 += pitch; - vst1_u8(s2, vget_high_u8(q6)); - s2 += pitch; - vst1_u8(s1, vget_low_u8(q7)); - s1 += pitch; - vst1_u8(s2, vget_high_u8(q7)); - s2 += pitch; - vst1_u8(s1, vget_low_u8(q8)); - s1 += pitch; - vst1_u8(s2, vget_high_u8(q8)); - s2 += pitch; - vst1_u8(s1, vget_low_u8(q9)); - s1 += pitch; - vst1_u8(s2, vget_high_u8(q9)); - s2 += pitch; - vst1_u8(s1, vget_low_u8(q10)); - vst1_u8(s2, vget_high_u8(q10)); - return; -} - -void vp8_mbloop_filter_vertical_edge_uv_neon( - unsigned char *u, - int pitch, - unsigned char blimit, - unsigned char limit, - unsigned char thresh, - unsigned char *v) { - unsigned char *us, *ud; - unsigned char *vs, *vd; - uint8x16_t qblimit, qlimit, qthresh, q3, q4; - uint8x16_t q5, q6, q7, q8, q9, q10; - uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14; - uint8x8_t d15, d16, d17, d18, d19, d20, d21; - uint32x4x2_t q2tmp0, q2tmp1, q2tmp2, q2tmp3; - uint16x8x2_t q2tmp4, q2tmp5, q2tmp6, q2tmp7; - uint8x16x2_t q2tmp8, q2tmp9, q2tmp10, q2tmp11; - - qblimit = vdupq_n_u8(blimit); - qlimit = vdupq_n_u8(limit); - qthresh = vdupq_n_u8(thresh); - - us = u - 4; - vs = v - 4; - d6 = vld1_u8(us); - us += pitch; - d7 = vld1_u8(vs); - vs += pitch; - d8 = vld1_u8(us); - us += pitch; - d9 = vld1_u8(vs); - vs += pitch; - d10 = vld1_u8(us); - us += pitch; - d11 = vld1_u8(vs); - vs += pitch; - d12 = vld1_u8(us); - us += pitch; - d13 = vld1_u8(vs); - vs += pitch; - d14 = vld1_u8(us); - us += pitch; - d15 = vld1_u8(vs); - vs += pitch; - d16 = vld1_u8(us); - us += pitch; - d17 = vld1_u8(vs); - vs += pitch; - d18 = vld1_u8(us); - us += pitch; - d19 = vld1_u8(vs); - vs += pitch; - d20 = vld1_u8(us); - d21 = vld1_u8(vs); - - q3 = vcombine_u8(d6, d7); - q4 = vcombine_u8(d8, d9); - q5 = vcombine_u8(d10, d11); - q6 = vcombine_u8(d12, d13); - q7 = vcombine_u8(d14, d15); - q8 = vcombine_u8(d16, d17); - q9 = vcombine_u8(d18, d19); - q10 = vcombine_u8(d20, d21); - - q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7)); - q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8)); - q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9)); - q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10)); - - q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]), - vreinterpretq_u16_u32(q2tmp2.val[0])); - q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]), - vreinterpretq_u16_u32(q2tmp3.val[0])); - q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]), - vreinterpretq_u16_u32(q2tmp2.val[1])); - q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]), - vreinterpretq_u16_u32(q2tmp3.val[1])); - - q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]), - vreinterpretq_u8_u16(q2tmp5.val[0])); - q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]), - vreinterpretq_u8_u16(q2tmp5.val[1])); - q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]), - vreinterpretq_u8_u16(q2tmp7.val[0])); - q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]), - vreinterpretq_u8_u16(q2tmp7.val[1])); - - q3 = q2tmp8.val[0]; - q4 = q2tmp8.val[1]; - q5 = q2tmp9.val[0]; - q6 = q2tmp9.val[1]; - q7 = q2tmp10.val[0]; - q8 = q2tmp10.val[1]; - q9 = q2tmp11.val[0]; - q10 = q2tmp11.val[1]; - - vp8_mbloop_filter_neon(qblimit, qlimit, qthresh, q3, q4, - q5, q6, q7, q8, q9, q10, - &q4, &q5, &q6, &q7, &q8, &q9); - - q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7)); - q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8)); - q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9)); - q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10)); - - q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]), - vreinterpretq_u16_u32(q2tmp2.val[0])); - q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]), - vreinterpretq_u16_u32(q2tmp3.val[0])); - q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]), - vreinterpretq_u16_u32(q2tmp2.val[1])); - q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]), - vreinterpretq_u16_u32(q2tmp3.val[1])); - - q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]), - vreinterpretq_u8_u16(q2tmp5.val[0])); - q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]), - vreinterpretq_u8_u16(q2tmp5.val[1])); - q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]), - vreinterpretq_u8_u16(q2tmp7.val[0])); - q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]), - vreinterpretq_u8_u16(q2tmp7.val[1])); - - q3 = q2tmp8.val[0]; - q4 = q2tmp8.val[1]; - q5 = q2tmp9.val[0]; - q6 = q2tmp9.val[1]; - q7 = q2tmp10.val[0]; - q8 = q2tmp10.val[1]; - q9 = q2tmp11.val[0]; - q10 = q2tmp11.val[1]; - - ud = u - 4; - vst1_u8(ud, vget_low_u8(q3)); - ud += pitch; - vst1_u8(ud, vget_low_u8(q4)); - ud += pitch; - vst1_u8(ud, vget_low_u8(q5)); - ud += pitch; - vst1_u8(ud, vget_low_u8(q6)); - ud += pitch; - vst1_u8(ud, vget_low_u8(q7)); - ud += pitch; - vst1_u8(ud, vget_low_u8(q8)); - ud += pitch; - vst1_u8(ud, vget_low_u8(q9)); - ud += pitch; - vst1_u8(ud, vget_low_u8(q10)); - - vd = v - 4; - vst1_u8(vd, vget_high_u8(q3)); - vd += pitch; - vst1_u8(vd, vget_high_u8(q4)); - vd += pitch; - vst1_u8(vd, vget_high_u8(q5)); - vd += pitch; - vst1_u8(vd, vget_high_u8(q6)); - vd += pitch; - vst1_u8(vd, vget_high_u8(q7)); - vd += pitch; - vst1_u8(vd, vget_high_u8(q8)); - vd += pitch; - vst1_u8(vd, vget_high_u8(q9)); - vd += pitch; - vst1_u8(vd, vget_high_u8(q10)); - return; -} diff --git a/media/libvpx/vp8/common/arm/neon/reconintra_neon.c b/media/libvpx/vp8/common/arm/neon/reconintra_neon.c deleted file mode 100644 index af52cd5ea51..00000000000 --- a/media/libvpx/vp8/common/arm/neon/reconintra_neon.c +++ /dev/null @@ -1,210 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include - -#include "vp8/common/blockd.h" - -void vp8_build_intra_predictors_mby_s_neon(MACROBLOCKD *x, - unsigned char * yabove_row, - unsigned char * yleft, - int left_stride, - unsigned char * ypred_ptr, - int y_stride) { - const int mode = x->mode_info_context->mbmi.mode; - int i; - - switch (mode) { - case DC_PRED: - { - int shift = x->up_available + x->left_available; - uint8x16_t v_expected_dc = vdupq_n_u8(128); - - if (shift) { - unsigned int average = 0; - int expected_dc; - if (x->up_available) { - const uint8x16_t v_above = vld1q_u8(yabove_row); - const uint16x8_t a = vpaddlq_u8(v_above); - const uint32x4_t b = vpaddlq_u16(a); - const uint64x2_t c = vpaddlq_u32(b); - const uint32x2_t d = vadd_u32(vreinterpret_u32_u64(vget_low_u64(c)), - vreinterpret_u32_u64(vget_high_u64(c))); - average = vget_lane_u32(d, 0); - } - if (x->left_available) { - for (i = 0; i < 16; ++i) { - average += yleft[0]; - yleft += left_stride; - } - } - shift += 3; - expected_dc = (average + (1 << (shift - 1))) >> shift; - v_expected_dc = vmovq_n_u8((uint8_t)expected_dc); - } - for (i = 0; i < 16; ++i) { - vst1q_u8(ypred_ptr, v_expected_dc); - ypred_ptr += y_stride; - } - } - break; - case V_PRED: - { - const uint8x16_t v_above = vld1q_u8(yabove_row); - for (i = 0; i < 16; ++i) { - vst1q_u8(ypred_ptr, v_above); - ypred_ptr += y_stride; - } - } - break; - case H_PRED: - { - for (i = 0; i < 16; ++i) { - const uint8x16_t v_yleft = vmovq_n_u8((uint8_t)yleft[0]); - yleft += left_stride; - vst1q_u8(ypred_ptr, v_yleft); - ypred_ptr += y_stride; - } - } - break; - case TM_PRED: - { - const uint16x8_t v_ytop_left = vmovq_n_u16((int16_t)yabove_row[-1]); - const uint8x16_t v_above = vld1q_u8(yabove_row); - for (i = 0; i < 16; ++i) { - const uint8x8_t v_yleft = vmov_n_u8((int8_t)yleft[0]); - const uint16x8_t a_lo = vaddl_u8(vget_low_u8(v_above), v_yleft); - const uint16x8_t a_hi = vaddl_u8(vget_high_u8(v_above), v_yleft); - const int16x8_t b_lo = vsubq_s16(vreinterpretq_s16_u16(a_lo), - vreinterpretq_s16_u16(v_ytop_left)); - const int16x8_t b_hi = vsubq_s16(vreinterpretq_s16_u16(a_hi), - vreinterpretq_s16_u16(v_ytop_left)); - const uint8x8_t pred_lo = vqmovun_s16(b_lo); - const uint8x8_t pred_hi = vqmovun_s16(b_hi); - - vst1q_u8(ypred_ptr, vcombine_u8(pred_lo, pred_hi)); - ypred_ptr += y_stride; - yleft += left_stride; - } - } - break; - } -} - -void vp8_build_intra_predictors_mbuv_s_neon(MACROBLOCKD *x, - unsigned char * uabove_row, - unsigned char * vabove_row, - unsigned char * uleft, - unsigned char * vleft, - int left_stride, - unsigned char * upred_ptr, - unsigned char * vpred_ptr, - int pred_stride) { - const int mode = x->mode_info_context->mbmi.uv_mode; - int i; - - switch (mode) { - case DC_PRED: - { - int shift = x->up_available + x->left_available; - uint8x8_t v_expected_udc = vdup_n_u8(128); - uint8x8_t v_expected_vdc = vdup_n_u8(128); - - if (shift) { - unsigned int average_u = 0; - unsigned int average_v = 0; - int expected_udc; - int expected_vdc; - if (x->up_available) { - const uint8x8_t v_uabove = vld1_u8(uabove_row); - const uint8x8_t v_vabove = vld1_u8(vabove_row); - const uint16x8_t a = vpaddlq_u8(vcombine_u8(v_uabove, v_vabove)); - const uint32x4_t b = vpaddlq_u16(a); - const uint64x2_t c = vpaddlq_u32(b); - average_u = vgetq_lane_u32(vreinterpretq_u32_u64((c)), 0); - average_v = vgetq_lane_u32(vreinterpretq_u32_u64((c)), 2); - } - if (x->left_available) { - for (i = 0; i < 8; ++i) { - average_u += uleft[0]; - uleft += left_stride; - average_v += vleft[0]; - vleft += left_stride; - } - } - shift += 2; - expected_udc = (average_u + (1 << (shift - 1))) >> shift; - expected_vdc = (average_v + (1 << (shift - 1))) >> shift; - v_expected_udc = vmov_n_u8((uint8_t)expected_udc); - v_expected_vdc = vmov_n_u8((uint8_t)expected_vdc); - } - for (i = 0; i < 8; ++i) { - vst1_u8(upred_ptr, v_expected_udc); - upred_ptr += pred_stride; - vst1_u8(vpred_ptr, v_expected_vdc); - vpred_ptr += pred_stride; - } - } - break; - case V_PRED: - { - const uint8x8_t v_uabove = vld1_u8(uabove_row); - const uint8x8_t v_vabove = vld1_u8(vabove_row); - for (i = 0; i < 8; ++i) { - vst1_u8(upred_ptr, v_uabove); - upred_ptr += pred_stride; - vst1_u8(vpred_ptr, v_vabove); - vpred_ptr += pred_stride; - } - } - break; - case H_PRED: - { - for (i = 0; i < 8; ++i) { - const uint8x8_t v_uleft = vmov_n_u8((uint8_t)uleft[0]); - const uint8x8_t v_vleft = vmov_n_u8((uint8_t)vleft[0]); - uleft += left_stride; - vleft += left_stride; - vst1_u8(upred_ptr, v_uleft); - upred_ptr += pred_stride; - vst1_u8(vpred_ptr, v_vleft); - vpred_ptr += pred_stride; - } - } - break; - case TM_PRED: - { - const uint16x8_t v_utop_left = vmovq_n_u16((int16_t)uabove_row[-1]); - const uint16x8_t v_vtop_left = vmovq_n_u16((int16_t)vabove_row[-1]); - const uint8x8_t v_uabove = vld1_u8(uabove_row); - const uint8x8_t v_vabove = vld1_u8(vabove_row); - for (i = 0; i < 8; ++i) { - const uint8x8_t v_uleft = vmov_n_u8((int8_t)uleft[0]); - const uint8x8_t v_vleft = vmov_n_u8((int8_t)vleft[0]); - const uint16x8_t a_u = vaddl_u8(v_uabove, v_uleft); - const uint16x8_t a_v = vaddl_u8(v_vabove, v_vleft); - const int16x8_t b_u = vsubq_s16(vreinterpretq_s16_u16(a_u), - vreinterpretq_s16_u16(v_utop_left)); - const int16x8_t b_v = vsubq_s16(vreinterpretq_s16_u16(a_v), - vreinterpretq_s16_u16(v_vtop_left)); - const uint8x8_t pred_u = vqmovun_s16(b_u); - const uint8x8_t pred_v = vqmovun_s16(b_v); - - vst1_u8(upred_ptr, pred_u); - vst1_u8(vpred_ptr, pred_v); - upred_ptr += pred_stride; - vpred_ptr += pred_stride; - uleft += left_stride; - vleft += left_stride; - } - } - break; - } -} diff --git a/media/libvpx/vp8/common/arm/neon/sad_neon.c b/media/libvpx/vp8/common/arm/neon/sad_neon.c deleted file mode 100644 index 6595ac0519b..00000000000 --- a/media/libvpx/vp8/common/arm/neon/sad_neon.c +++ /dev/null @@ -1,184 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include - -unsigned int vp8_sad8x8_neon( - unsigned char *src_ptr, - int src_stride, - unsigned char *ref_ptr, - int ref_stride) { - uint8x8_t d0, d8; - uint16x8_t q12; - uint32x4_t q1; - uint64x2_t q3; - uint32x2_t d5; - int i; - - d0 = vld1_u8(src_ptr); - src_ptr += src_stride; - d8 = vld1_u8(ref_ptr); - ref_ptr += ref_stride; - q12 = vabdl_u8(d0, d8); - - for (i = 0; i < 7; i++) { - d0 = vld1_u8(src_ptr); - src_ptr += src_stride; - d8 = vld1_u8(ref_ptr); - ref_ptr += ref_stride; - q12 = vabal_u8(q12, d0, d8); - } - - q1 = vpaddlq_u16(q12); - q3 = vpaddlq_u32(q1); - d5 = vadd_u32(vreinterpret_u32_u64(vget_low_u64(q3)), - vreinterpret_u32_u64(vget_high_u64(q3))); - - return vget_lane_u32(d5, 0); -} - -unsigned int vp8_sad8x16_neon( - unsigned char *src_ptr, - int src_stride, - unsigned char *ref_ptr, - int ref_stride) { - uint8x8_t d0, d8; - uint16x8_t q12; - uint32x4_t q1; - uint64x2_t q3; - uint32x2_t d5; - int i; - - d0 = vld1_u8(src_ptr); - src_ptr += src_stride; - d8 = vld1_u8(ref_ptr); - ref_ptr += ref_stride; - q12 = vabdl_u8(d0, d8); - - for (i = 0; i < 15; i++) { - d0 = vld1_u8(src_ptr); - src_ptr += src_stride; - d8 = vld1_u8(ref_ptr); - ref_ptr += ref_stride; - q12 = vabal_u8(q12, d0, d8); - } - - q1 = vpaddlq_u16(q12); - q3 = vpaddlq_u32(q1); - d5 = vadd_u32(vreinterpret_u32_u64(vget_low_u64(q3)), - vreinterpret_u32_u64(vget_high_u64(q3))); - - return vget_lane_u32(d5, 0); -} - -unsigned int vp8_sad4x4_neon( - unsigned char *src_ptr, - int src_stride, - unsigned char *ref_ptr, - int ref_stride) { - uint8x8_t d0, d8; - uint16x8_t q12; - uint32x2_t d1; - uint64x1_t d3; - int i; - - d0 = vld1_u8(src_ptr); - src_ptr += src_stride; - d8 = vld1_u8(ref_ptr); - ref_ptr += ref_stride; - q12 = vabdl_u8(d0, d8); - - for (i = 0; i < 3; i++) { - d0 = vld1_u8(src_ptr); - src_ptr += src_stride; - d8 = vld1_u8(ref_ptr); - ref_ptr += ref_stride; - q12 = vabal_u8(q12, d0, d8); - } - - d1 = vpaddl_u16(vget_low_u16(q12)); - d3 = vpaddl_u32(d1); - - return vget_lane_u32(vreinterpret_u32_u64(d3), 0); -} - -unsigned int vp8_sad16x16_neon( - unsigned char *src_ptr, - int src_stride, - unsigned char *ref_ptr, - int ref_stride) { - uint8x16_t q0, q4; - uint16x8_t q12, q13; - uint32x4_t q1; - uint64x2_t q3; - uint32x2_t d5; - int i; - - q0 = vld1q_u8(src_ptr); - src_ptr += src_stride; - q4 = vld1q_u8(ref_ptr); - ref_ptr += ref_stride; - q12 = vabdl_u8(vget_low_u8(q0), vget_low_u8(q4)); - q13 = vabdl_u8(vget_high_u8(q0), vget_high_u8(q4)); - - for (i = 0; i < 15; i++) { - q0 = vld1q_u8(src_ptr); - src_ptr += src_stride; - q4 = vld1q_u8(ref_ptr); - ref_ptr += ref_stride; - q12 = vabal_u8(q12, vget_low_u8(q0), vget_low_u8(q4)); - q13 = vabal_u8(q13, vget_high_u8(q0), vget_high_u8(q4)); - } - - q12 = vaddq_u16(q12, q13); - q1 = vpaddlq_u16(q12); - q3 = vpaddlq_u32(q1); - d5 = vadd_u32(vreinterpret_u32_u64(vget_low_u64(q3)), - vreinterpret_u32_u64(vget_high_u64(q3))); - - return vget_lane_u32(d5, 0); -} - -unsigned int vp8_sad16x8_neon( - unsigned char *src_ptr, - int src_stride, - unsigned char *ref_ptr, - int ref_stride) { - uint8x16_t q0, q4; - uint16x8_t q12, q13; - uint32x4_t q1; - uint64x2_t q3; - uint32x2_t d5; - int i; - - q0 = vld1q_u8(src_ptr); - src_ptr += src_stride; - q4 = vld1q_u8(ref_ptr); - ref_ptr += ref_stride; - q12 = vabdl_u8(vget_low_u8(q0), vget_low_u8(q4)); - q13 = vabdl_u8(vget_high_u8(q0), vget_high_u8(q4)); - - for (i = 0; i < 7; i++) { - q0 = vld1q_u8(src_ptr); - src_ptr += src_stride; - q4 = vld1q_u8(ref_ptr); - ref_ptr += ref_stride; - q12 = vabal_u8(q12, vget_low_u8(q0), vget_low_u8(q4)); - q13 = vabal_u8(q13, vget_high_u8(q0), vget_high_u8(q4)); - } - - q12 = vaddq_u16(q12, q13); - q1 = vpaddlq_u16(q12); - q3 = vpaddlq_u32(q1); - d5 = vadd_u32(vreinterpret_u32_u64(vget_low_u64(q3)), - vreinterpret_u32_u64(vget_high_u64(q3))); - - return vget_lane_u32(d5, 0); -} diff --git a/media/libvpx/vp8/common/arm/neon/shortidct4x4llm_neon.c b/media/libvpx/vp8/common/arm/neon/shortidct4x4llm_neon.c deleted file mode 100644 index 373afa6ed35..00000000000 --- a/media/libvpx/vp8/common/arm/neon/shortidct4x4llm_neon.c +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include - -static const int16_t cospi8sqrt2minus1 = 20091; -static const int16_t sinpi8sqrt2 = 35468; - -void vp8_short_idct4x4llm_neon( - int16_t *input, - unsigned char *pred_ptr, - int pred_stride, - unsigned char *dst_ptr, - int dst_stride) { - int i; - uint32x2_t d6u32 = vdup_n_u32(0); - uint8x8_t d1u8; - int16x4_t d2, d3, d4, d5, d10, d11, d12, d13; - uint16x8_t q1u16; - int16x8_t q1s16, q2s16, q3s16, q4s16; - int32x2x2_t v2tmp0, v2tmp1; - int16x4x2_t v2tmp2, v2tmp3; - - d2 = vld1_s16(input); - d3 = vld1_s16(input + 4); - d4 = vld1_s16(input + 8); - d5 = vld1_s16(input + 12); - - // 1st for loop - q1s16 = vcombine_s16(d2, d4); // Swap d3 d4 here - q2s16 = vcombine_s16(d3, d5); - - q3s16 = vqdmulhq_n_s16(q2s16, sinpi8sqrt2); - q4s16 = vqdmulhq_n_s16(q2s16, cospi8sqrt2minus1); - - d12 = vqadd_s16(vget_low_s16(q1s16), vget_high_s16(q1s16)); // a1 - d13 = vqsub_s16(vget_low_s16(q1s16), vget_high_s16(q1s16)); // b1 - - q3s16 = vshrq_n_s16(q3s16, 1); - q4s16 = vshrq_n_s16(q4s16, 1); - - q3s16 = vqaddq_s16(q3s16, q2s16); - q4s16 = vqaddq_s16(q4s16, q2s16); - - d10 = vqsub_s16(vget_low_s16(q3s16), vget_high_s16(q4s16)); // c1 - d11 = vqadd_s16(vget_high_s16(q3s16), vget_low_s16(q4s16)); // d1 - - d2 = vqadd_s16(d12, d11); - d3 = vqadd_s16(d13, d10); - d4 = vqsub_s16(d13, d10); - d5 = vqsub_s16(d12, d11); - - v2tmp0 = vtrn_s32(vreinterpret_s32_s16(d2), vreinterpret_s32_s16(d4)); - v2tmp1 = vtrn_s32(vreinterpret_s32_s16(d3), vreinterpret_s32_s16(d5)); - v2tmp2 = vtrn_s16(vreinterpret_s16_s32(v2tmp0.val[0]), - vreinterpret_s16_s32(v2tmp1.val[0])); - v2tmp3 = vtrn_s16(vreinterpret_s16_s32(v2tmp0.val[1]), - vreinterpret_s16_s32(v2tmp1.val[1])); - - // 2nd for loop - q1s16 = vcombine_s16(v2tmp2.val[0], v2tmp3.val[0]); - q2s16 = vcombine_s16(v2tmp2.val[1], v2tmp3.val[1]); - - q3s16 = vqdmulhq_n_s16(q2s16, sinpi8sqrt2); - q4s16 = vqdmulhq_n_s16(q2s16, cospi8sqrt2minus1); - - d12 = vqadd_s16(vget_low_s16(q1s16), vget_high_s16(q1s16)); // a1 - d13 = vqsub_s16(vget_low_s16(q1s16), vget_high_s16(q1s16)); // b1 - - q3s16 = vshrq_n_s16(q3s16, 1); - q4s16 = vshrq_n_s16(q4s16, 1); - - q3s16 = vqaddq_s16(q3s16, q2s16); - q4s16 = vqaddq_s16(q4s16, q2s16); - - d10 = vqsub_s16(vget_low_s16(q3s16), vget_high_s16(q4s16)); // c1 - d11 = vqadd_s16(vget_high_s16(q3s16), vget_low_s16(q4s16)); // d1 - - d2 = vqadd_s16(d12, d11); - d3 = vqadd_s16(d13, d10); - d4 = vqsub_s16(d13, d10); - d5 = vqsub_s16(d12, d11); - - d2 = vrshr_n_s16(d2, 3); - d3 = vrshr_n_s16(d3, 3); - d4 = vrshr_n_s16(d4, 3); - d5 = vrshr_n_s16(d5, 3); - - v2tmp0 = vtrn_s32(vreinterpret_s32_s16(d2), vreinterpret_s32_s16(d4)); - v2tmp1 = vtrn_s32(vreinterpret_s32_s16(d3), vreinterpret_s32_s16(d5)); - v2tmp2 = vtrn_s16(vreinterpret_s16_s32(v2tmp0.val[0]), - vreinterpret_s16_s32(v2tmp1.val[0])); - v2tmp3 = vtrn_s16(vreinterpret_s16_s32(v2tmp0.val[1]), - vreinterpret_s16_s32(v2tmp1.val[1])); - - q1s16 = vcombine_s16(v2tmp2.val[0], v2tmp2.val[1]); - q2s16 = vcombine_s16(v2tmp3.val[0], v2tmp3.val[1]); - - // dc_only_idct_add - for (i = 0; i < 2; i++, q1s16 = q2s16) { - d6u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d6u32, 0); - pred_ptr += pred_stride; - d6u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d6u32, 1); - pred_ptr += pred_stride; - - q1u16 = vaddw_u8(vreinterpretq_u16_s16(q1s16), - vreinterpret_u8_u32(d6u32)); - d1u8 = vqmovun_s16(vreinterpretq_s16_u16(q1u16)); - - vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d1u8), 0); - dst_ptr += dst_stride; - vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d1u8), 1); - dst_ptr += dst_stride; - } - return; -} diff --git a/media/libvpx/vp8/common/arm/neon/sixtappredict_neon.c b/media/libvpx/vp8/common/arm/neon/sixtappredict_neon.c deleted file mode 100644 index 4c2efc92b13..00000000000 --- a/media/libvpx/vp8/common/arm/neon/sixtappredict_neon.c +++ /dev/null @@ -1,1754 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include -#include "vpx_ports/mem.h" - -static const int8_t vp8_sub_pel_filters[8][8] = { - {0, 0, 128, 0, 0, 0, 0, 0}, /* note that 1/8 pel positionyys are */ - {0, -6, 123, 12, -1, 0, 0, 0}, /* just as per alpha -0.5 bicubic */ - {2, -11, 108, 36, -8, 1, 0, 0}, /* New 1/4 pel 6 tap filter */ - {0, -9, 93, 50, -6, 0, 0, 0}, - {3, -16, 77, 77, -16, 3, 0, 0}, /* New 1/2 pel 6 tap filter */ - {0, -6, 50, 93, -9, 0, 0, 0}, - {1, -8, 36, 108, -11, 2, 0, 0}, /* New 1/4 pel 6 tap filter */ - {0, -1, 12, 123, -6, 0, 0, 0}, -}; - -void vp8_sixtap_predict4x4_neon( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch) { - unsigned char *src; - uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d18u8, d19u8, d20u8, d21u8; - uint8x8_t d23u8, d24u8, d25u8, d26u8, d27u8, d28u8, d29u8, d30u8, d31u8; - int8x8_t dtmps8, d0s8, d1s8, d2s8, d3s8, d4s8, d5s8; - uint16x8_t q3u16, q4u16, q5u16, q6u16, q7u16; - uint16x8_t q8u16, q9u16, q10u16, q11u16, q12u16; - int16x8_t q3s16, q4s16, q5s16, q6s16, q7s16; - int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16; - uint8x16_t q3u8, q4u8, q5u8, q6u8, q11u8; - uint64x2_t q3u64, q4u64, q5u64, q6u64, q9u64, q10u64; - uint32x2x2_t d0u32x2, d1u32x2; - - if (xoffset == 0) { // secondpass_filter4x4_only - uint32x2_t d27u32 = vdup_n_u32(0); - uint32x2_t d28u32 = vdup_n_u32(0); - uint32x2_t d29u32 = vdup_n_u32(0); - uint32x2_t d30u32 = vdup_n_u32(0); - uint32x2_t d31u32 = vdup_n_u32(0); - - // load second_pass filter - dtmps8 = vld1_s8(vp8_sub_pel_filters[yoffset]); - d0s8 = vdup_lane_s8(dtmps8, 0); - d1s8 = vdup_lane_s8(dtmps8, 1); - d2s8 = vdup_lane_s8(dtmps8, 2); - d3s8 = vdup_lane_s8(dtmps8, 3); - d4s8 = vdup_lane_s8(dtmps8, 4); - d5s8 = vdup_lane_s8(dtmps8, 5); - d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8)); - d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8)); - d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8)); - d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8)); - d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8)); - d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8)); - - // load src data - src = src_ptr - src_pixels_per_line * 2; - d27u32 = vld1_lane_u32((const uint32_t *)src, d27u32, 0); - src += src_pixels_per_line; - d27u32 = vld1_lane_u32((const uint32_t *)src, d27u32, 1); - src += src_pixels_per_line; - d28u32 = vld1_lane_u32((const uint32_t *)src, d28u32, 0); - src += src_pixels_per_line; - d28u32 = vld1_lane_u32((const uint32_t *)src, d28u32, 1); - src += src_pixels_per_line; - d29u32 = vld1_lane_u32((const uint32_t *)src, d29u32, 0); - src += src_pixels_per_line; - d29u32 = vld1_lane_u32((const uint32_t *)src, d29u32, 1); - src += src_pixels_per_line; - d30u32 = vld1_lane_u32((const uint32_t *)src, d30u32, 0); - src += src_pixels_per_line; - d30u32 = vld1_lane_u32((const uint32_t *)src, d30u32, 1); - src += src_pixels_per_line; - d31u32 = vld1_lane_u32((const uint32_t *)src, d31u32, 0); - - d27u8 = vreinterpret_u8_u32(d27u32); - d28u8 = vreinterpret_u8_u32(d28u32); - d29u8 = vreinterpret_u8_u32(d29u32); - d30u8 = vreinterpret_u8_u32(d30u32); - d31u8 = vreinterpret_u8_u32(d31u32); - - d23u8 = vext_u8(d27u8, d28u8, 4); - d24u8 = vext_u8(d28u8, d29u8, 4); - d25u8 = vext_u8(d29u8, d30u8, 4); - d26u8 = vext_u8(d30u8, d31u8, 4); - - q3u16 = vmull_u8(d27u8, d0u8); - q4u16 = vmull_u8(d28u8, d0u8); - q5u16 = vmull_u8(d25u8, d5u8); - q6u16 = vmull_u8(d26u8, d5u8); - - q3u16 = vmlsl_u8(q3u16, d29u8, d4u8); - q4u16 = vmlsl_u8(q4u16, d30u8, d4u8); - q5u16 = vmlsl_u8(q5u16, d23u8, d1u8); - q6u16 = vmlsl_u8(q6u16, d24u8, d1u8); - - q3u16 = vmlal_u8(q3u16, d28u8, d2u8); - q4u16 = vmlal_u8(q4u16, d29u8, d2u8); - q5u16 = vmlal_u8(q5u16, d24u8, d3u8); - q6u16 = vmlal_u8(q6u16, d25u8, d3u8); - - q3s16 = vreinterpretq_s16_u16(q3u16); - q4s16 = vreinterpretq_s16_u16(q4u16); - q5s16 = vreinterpretq_s16_u16(q5u16); - q6s16 = vreinterpretq_s16_u16(q6u16); - - q5s16 = vqaddq_s16(q5s16, q3s16); - q6s16 = vqaddq_s16(q6s16, q4s16); - - d3u8 = vqrshrun_n_s16(q5s16, 7); - d4u8 = vqrshrun_n_s16(q6s16, 7); - - vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d3u8), 0); - dst_ptr += dst_pitch; - vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d3u8), 1); - dst_ptr += dst_pitch; - vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d4u8), 0); - dst_ptr += dst_pitch; - vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d4u8), 1); - return; - } - - // load first_pass filter - dtmps8 = vld1_s8(vp8_sub_pel_filters[xoffset]); - d0s8 = vdup_lane_s8(dtmps8, 0); - d1s8 = vdup_lane_s8(dtmps8, 1); - d2s8 = vdup_lane_s8(dtmps8, 2); - d3s8 = vdup_lane_s8(dtmps8, 3); - d4s8 = vdup_lane_s8(dtmps8, 4); - d5s8 = vdup_lane_s8(dtmps8, 5); - d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8)); - d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8)); - d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8)); - d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8)); - d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8)); - d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8)); - - // First pass: output_height lines x output_width columns (9x4) - - if (yoffset == 0) // firstpass_filter4x4_only - src = src_ptr - 2; - else - src = src_ptr - 2 - (src_pixels_per_line * 2); - - q3u8 = vld1q_u8(src); - src += src_pixels_per_line; - q4u8 = vld1q_u8(src); - src += src_pixels_per_line; - q5u8 = vld1q_u8(src); - src += src_pixels_per_line; - q6u8 = vld1q_u8(src); - src += src_pixels_per_line; - - d18u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 5); - d19u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 5); - d20u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 5); - d21u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 5); - - // vswp here - q3u8 = vcombine_u8(vget_low_u8(q3u8), vget_low_u8(q4u8)); - q5u8 = vcombine_u8(vget_low_u8(q5u8), vget_low_u8(q6u8)); - - d0u32x2 = vzip_u32(vreinterpret_u32_u8(d18u8), // d18 d19 - vreinterpret_u32_u8(d19u8)); - d1u32x2 = vzip_u32(vreinterpret_u32_u8(d20u8), // d20 d21 - vreinterpret_u32_u8(d21u8)); - q7u16 = vmull_u8(vreinterpret_u8_u32(d0u32x2.val[0]), d5u8); - q8u16 = vmull_u8(vreinterpret_u8_u32(d1u32x2.val[0]), d5u8); - - // keep original src data in q4 q6 - q4u64 = vreinterpretq_u64_u8(q3u8); - q6u64 = vreinterpretq_u64_u8(q5u8); - - d0u32x2 = vzip_u32(vreinterpret_u32_u8(vget_low_u8(q3u8)), // d6 d7 - vreinterpret_u32_u8(vget_high_u8(q3u8))); - d1u32x2 = vzip_u32(vreinterpret_u32_u8(vget_low_u8(q5u8)), // d10 d11 - vreinterpret_u32_u8(vget_high_u8(q5u8))); - q9u64 = vshrq_n_u64(q4u64, 8); - q10u64 = vshrq_n_u64(q6u64, 8); - q7u16 = vmlal_u8(q7u16, vreinterpret_u8_u32(d0u32x2.val[0]), d0u8); - q8u16 = vmlal_u8(q8u16, vreinterpret_u8_u32(d1u32x2.val[0]), d0u8); - - d0u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q9u64)), // d18 d19 - vreinterpret_u32_u64(vget_high_u64(q9u64))); - d1u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q10u64)), // d20 d211 - vreinterpret_u32_u64(vget_high_u64(q10u64))); - q3u64 = vshrq_n_u64(q4u64, 32); - q5u64 = vshrq_n_u64(q6u64, 32); - q7u16 = vmlsl_u8(q7u16, vreinterpret_u8_u32(d0u32x2.val[0]), d1u8); - q8u16 = vmlsl_u8(q8u16, vreinterpret_u8_u32(d1u32x2.val[0]), d1u8); - - d0u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q3u64)), // d6 d7 - vreinterpret_u32_u64(vget_high_u64(q3u64))); - d1u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q5u64)), // d10 d11 - vreinterpret_u32_u64(vget_high_u64(q5u64))); - q9u64 = vshrq_n_u64(q4u64, 16); - q10u64 = vshrq_n_u64(q6u64, 16); - q7u16 = vmlsl_u8(q7u16, vreinterpret_u8_u32(d0u32x2.val[0]), d4u8); - q8u16 = vmlsl_u8(q8u16, vreinterpret_u8_u32(d1u32x2.val[0]), d4u8); - - d0u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q9u64)), // d18 d19 - vreinterpret_u32_u64(vget_high_u64(q9u64))); - d1u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q10u64)), // d20 d211 - vreinterpret_u32_u64(vget_high_u64(q10u64))); - q3u64 = vshrq_n_u64(q4u64, 24); - q5u64 = vshrq_n_u64(q6u64, 24); - q7u16 = vmlal_u8(q7u16, vreinterpret_u8_u32(d0u32x2.val[0]), d2u8); - q8u16 = vmlal_u8(q8u16, vreinterpret_u8_u32(d1u32x2.val[0]), d2u8); - - d0u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q3u64)), // d6 d7 - vreinterpret_u32_u64(vget_high_u64(q3u64))); - d1u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q5u64)), // d10 d11 - vreinterpret_u32_u64(vget_high_u64(q5u64))); - q9u16 = vmull_u8(vreinterpret_u8_u32(d0u32x2.val[0]), d3u8); - q10u16 = vmull_u8(vreinterpret_u8_u32(d1u32x2.val[0]), d3u8); - - q7s16 = vreinterpretq_s16_u16(q7u16); - q8s16 = vreinterpretq_s16_u16(q8u16); - q9s16 = vreinterpretq_s16_u16(q9u16); - q10s16 = vreinterpretq_s16_u16(q10u16); - q7s16 = vqaddq_s16(q7s16, q9s16); - q8s16 = vqaddq_s16(q8s16, q10s16); - - d27u8 = vqrshrun_n_s16(q7s16, 7); - d28u8 = vqrshrun_n_s16(q8s16, 7); - - if (yoffset == 0) { // firstpass_filter4x4_only - vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d27u8), 0); - dst_ptr += dst_pitch; - vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d27u8), 1); - dst_ptr += dst_pitch; - vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d28u8), 0); - dst_ptr += dst_pitch; - vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d28u8), 1); - return; - } - - // First Pass on rest 5-line data - q3u8 = vld1q_u8(src); - src += src_pixels_per_line; - q4u8 = vld1q_u8(src); - src += src_pixels_per_line; - q5u8 = vld1q_u8(src); - src += src_pixels_per_line; - q6u8 = vld1q_u8(src); - src += src_pixels_per_line; - q11u8 = vld1q_u8(src); - - d18u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 5); - d19u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 5); - d20u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 5); - d21u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 5); - - // vswp here - q3u8 = vcombine_u8(vget_low_u8(q3u8), vget_low_u8(q4u8)); - q5u8 = vcombine_u8(vget_low_u8(q5u8), vget_low_u8(q6u8)); - - d0u32x2 = vzip_u32(vreinterpret_u32_u8(d18u8), // d18 d19 - vreinterpret_u32_u8(d19u8)); - d1u32x2 = vzip_u32(vreinterpret_u32_u8(d20u8), // d20 d21 - vreinterpret_u32_u8(d21u8)); - d31u8 = vext_u8(vget_low_u8(q11u8), vget_high_u8(q11u8), 5); - q7u16 = vmull_u8(vreinterpret_u8_u32(d0u32x2.val[0]), d5u8); - q8u16 = vmull_u8(vreinterpret_u8_u32(d1u32x2.val[0]), d5u8); - q12u16 = vmull_u8(d31u8, d5u8); - - q4u64 = vreinterpretq_u64_u8(q3u8); - q6u64 = vreinterpretq_u64_u8(q5u8); - - d0u32x2 = vzip_u32(vreinterpret_u32_u8(vget_low_u8(q3u8)), // d6 d7 - vreinterpret_u32_u8(vget_high_u8(q3u8))); - d1u32x2 = vzip_u32(vreinterpret_u32_u8(vget_low_u8(q5u8)), // d10 d11 - vreinterpret_u32_u8(vget_high_u8(q5u8))); - q9u64 = vshrq_n_u64(q4u64, 8); - q10u64 = vshrq_n_u64(q6u64, 8); - q7u16 = vmlal_u8(q7u16, vreinterpret_u8_u32(d0u32x2.val[0]), d0u8); - q8u16 = vmlal_u8(q8u16, vreinterpret_u8_u32(d1u32x2.val[0]), d0u8); - q12u16 = vmlal_u8(q12u16, vget_low_u8(q11u8), d0u8); - - d0u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q9u64)), // d18 d19 - vreinterpret_u32_u64(vget_high_u64(q9u64))); - d1u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q10u64)), // d20 d211 - vreinterpret_u32_u64(vget_high_u64(q10u64))); - q3u64 = vshrq_n_u64(q4u64, 32); - q5u64 = vshrq_n_u64(q6u64, 32); - d31u8 = vext_u8(vget_low_u8(q11u8), vget_high_u8(q11u8), 1); - q7u16 = vmlsl_u8(q7u16, vreinterpret_u8_u32(d0u32x2.val[0]), d1u8); - q8u16 = vmlsl_u8(q8u16, vreinterpret_u8_u32(d1u32x2.val[0]), d1u8); - q12u16 = vmlsl_u8(q12u16, d31u8, d1u8); - - d0u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q3u64)), // d6 d7 - vreinterpret_u32_u64(vget_high_u64(q3u64))); - d1u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q5u64)), // d10 d11 - vreinterpret_u32_u64(vget_high_u64(q5u64))); - q9u64 = vshrq_n_u64(q4u64, 16); - q10u64 = vshrq_n_u64(q6u64, 16); - d31u8 = vext_u8(vget_low_u8(q11u8), vget_high_u8(q11u8), 4); - q7u16 = vmlsl_u8(q7u16, vreinterpret_u8_u32(d0u32x2.val[0]), d4u8); - q8u16 = vmlsl_u8(q8u16, vreinterpret_u8_u32(d1u32x2.val[0]), d4u8); - q12u16 = vmlsl_u8(q12u16, d31u8, d4u8); - - d0u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q9u64)), // d18 d19 - vreinterpret_u32_u64(vget_high_u64(q9u64))); - d1u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q10u64)), // d20 d211 - vreinterpret_u32_u64(vget_high_u64(q10u64))); - q3u64 = vshrq_n_u64(q4u64, 24); - q5u64 = vshrq_n_u64(q6u64, 24); - d31u8 = vext_u8(vget_low_u8(q11u8), vget_high_u8(q11u8), 2); - q7u16 = vmlal_u8(q7u16, vreinterpret_u8_u32(d0u32x2.val[0]), d2u8); - q8u16 = vmlal_u8(q8u16, vreinterpret_u8_u32(d1u32x2.val[0]), d2u8); - q12u16 = vmlal_u8(q12u16, d31u8, d2u8); - - d0u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q3u64)), // d6 d7 - vreinterpret_u32_u64(vget_high_u64(q3u64))); - d1u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q5u64)), // d10 d11 - vreinterpret_u32_u64(vget_high_u64(q5u64))); - d31u8 = vext_u8(vget_low_u8(q11u8), vget_high_u8(q11u8), 3); - q9u16 = vmull_u8(vreinterpret_u8_u32(d0u32x2.val[0]), d3u8); - q10u16 = vmull_u8(vreinterpret_u8_u32(d1u32x2.val[0]), d3u8); - q11u16 = vmull_u8(d31u8, d3u8); - - q7s16 = vreinterpretq_s16_u16(q7u16); - q8s16 = vreinterpretq_s16_u16(q8u16); - q9s16 = vreinterpretq_s16_u16(q9u16); - q10s16 = vreinterpretq_s16_u16(q10u16); - q11s16 = vreinterpretq_s16_u16(q11u16); - q12s16 = vreinterpretq_s16_u16(q12u16); - q7s16 = vqaddq_s16(q7s16, q9s16); - q8s16 = vqaddq_s16(q8s16, q10s16); - q12s16 = vqaddq_s16(q12s16, q11s16); - - d29u8 = vqrshrun_n_s16(q7s16, 7); - d30u8 = vqrshrun_n_s16(q8s16, 7); - d31u8 = vqrshrun_n_s16(q12s16, 7); - - // Second pass: 4x4 - dtmps8 = vld1_s8(vp8_sub_pel_filters[yoffset]); - d0s8 = vdup_lane_s8(dtmps8, 0); - d1s8 = vdup_lane_s8(dtmps8, 1); - d2s8 = vdup_lane_s8(dtmps8, 2); - d3s8 = vdup_lane_s8(dtmps8, 3); - d4s8 = vdup_lane_s8(dtmps8, 4); - d5s8 = vdup_lane_s8(dtmps8, 5); - d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8)); - d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8)); - d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8)); - d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8)); - d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8)); - d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8)); - - d23u8 = vext_u8(d27u8, d28u8, 4); - d24u8 = vext_u8(d28u8, d29u8, 4); - d25u8 = vext_u8(d29u8, d30u8, 4); - d26u8 = vext_u8(d30u8, d31u8, 4); - - q3u16 = vmull_u8(d27u8, d0u8); - q4u16 = vmull_u8(d28u8, d0u8); - q5u16 = vmull_u8(d25u8, d5u8); - q6u16 = vmull_u8(d26u8, d5u8); - - q3u16 = vmlsl_u8(q3u16, d29u8, d4u8); - q4u16 = vmlsl_u8(q4u16, d30u8, d4u8); - q5u16 = vmlsl_u8(q5u16, d23u8, d1u8); - q6u16 = vmlsl_u8(q6u16, d24u8, d1u8); - - q3u16 = vmlal_u8(q3u16, d28u8, d2u8); - q4u16 = vmlal_u8(q4u16, d29u8, d2u8); - q5u16 = vmlal_u8(q5u16, d24u8, d3u8); - q6u16 = vmlal_u8(q6u16, d25u8, d3u8); - - q3s16 = vreinterpretq_s16_u16(q3u16); - q4s16 = vreinterpretq_s16_u16(q4u16); - q5s16 = vreinterpretq_s16_u16(q5u16); - q6s16 = vreinterpretq_s16_u16(q6u16); - - q5s16 = vqaddq_s16(q5s16, q3s16); - q6s16 = vqaddq_s16(q6s16, q4s16); - - d3u8 = vqrshrun_n_s16(q5s16, 7); - d4u8 = vqrshrun_n_s16(q6s16, 7); - - vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d3u8), 0); - dst_ptr += dst_pitch; - vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d3u8), 1); - dst_ptr += dst_pitch; - vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d4u8), 0); - dst_ptr += dst_pitch; - vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d4u8), 1); - return; -} - -void vp8_sixtap_predict8x4_neon( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch) { - unsigned char *src; - uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8, d8u8, d9u8; - uint8x8_t d22u8, d23u8, d24u8, d25u8, d26u8; - uint8x8_t d27u8, d28u8, d29u8, d30u8, d31u8; - int8x8_t dtmps8, d0s8, d1s8, d2s8, d3s8, d4s8, d5s8; - uint16x8_t q3u16, q4u16, q5u16, q6u16, q7u16; - uint16x8_t q8u16, q9u16, q10u16, q11u16, q12u16; - int16x8_t q3s16, q4s16, q5s16, q6s16, q7s16; - int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16; - uint8x16_t q3u8, q4u8, q5u8, q6u8, q7u8; - - if (xoffset == 0) { // secondpass_filter8x4_only - // load second_pass filter - dtmps8 = vld1_s8(vp8_sub_pel_filters[yoffset]); - d0s8 = vdup_lane_s8(dtmps8, 0); - d1s8 = vdup_lane_s8(dtmps8, 1); - d2s8 = vdup_lane_s8(dtmps8, 2); - d3s8 = vdup_lane_s8(dtmps8, 3); - d4s8 = vdup_lane_s8(dtmps8, 4); - d5s8 = vdup_lane_s8(dtmps8, 5); - d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8)); - d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8)); - d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8)); - d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8)); - d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8)); - d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8)); - - // load src data - src = src_ptr - src_pixels_per_line * 2; - d22u8 = vld1_u8(src); - src += src_pixels_per_line; - d23u8 = vld1_u8(src); - src += src_pixels_per_line; - d24u8 = vld1_u8(src); - src += src_pixels_per_line; - d25u8 = vld1_u8(src); - src += src_pixels_per_line; - d26u8 = vld1_u8(src); - src += src_pixels_per_line; - d27u8 = vld1_u8(src); - src += src_pixels_per_line; - d28u8 = vld1_u8(src); - src += src_pixels_per_line; - d29u8 = vld1_u8(src); - src += src_pixels_per_line; - d30u8 = vld1_u8(src); - - q3u16 = vmull_u8(d22u8, d0u8); - q4u16 = vmull_u8(d23u8, d0u8); - q5u16 = vmull_u8(d24u8, d0u8); - q6u16 = vmull_u8(d25u8, d0u8); - - q3u16 = vmlsl_u8(q3u16, d23u8, d1u8); - q4u16 = vmlsl_u8(q4u16, d24u8, d1u8); - q5u16 = vmlsl_u8(q5u16, d25u8, d1u8); - q6u16 = vmlsl_u8(q6u16, d26u8, d1u8); - - q3u16 = vmlsl_u8(q3u16, d26u8, d4u8); - q4u16 = vmlsl_u8(q4u16, d27u8, d4u8); - q5u16 = vmlsl_u8(q5u16, d28u8, d4u8); - q6u16 = vmlsl_u8(q6u16, d29u8, d4u8); - - q3u16 = vmlal_u8(q3u16, d24u8, d2u8); - q4u16 = vmlal_u8(q4u16, d25u8, d2u8); - q5u16 = vmlal_u8(q5u16, d26u8, d2u8); - q6u16 = vmlal_u8(q6u16, d27u8, d2u8); - - q3u16 = vmlal_u8(q3u16, d27u8, d5u8); - q4u16 = vmlal_u8(q4u16, d28u8, d5u8); - q5u16 = vmlal_u8(q5u16, d29u8, d5u8); - q6u16 = vmlal_u8(q6u16, d30u8, d5u8); - - q7u16 = vmull_u8(d25u8, d3u8); - q8u16 = vmull_u8(d26u8, d3u8); - q9u16 = vmull_u8(d27u8, d3u8); - q10u16 = vmull_u8(d28u8, d3u8); - - q3s16 = vreinterpretq_s16_u16(q3u16); - q4s16 = vreinterpretq_s16_u16(q4u16); - q5s16 = vreinterpretq_s16_u16(q5u16); - q6s16 = vreinterpretq_s16_u16(q6u16); - q7s16 = vreinterpretq_s16_u16(q7u16); - q8s16 = vreinterpretq_s16_u16(q8u16); - q9s16 = vreinterpretq_s16_u16(q9u16); - q10s16 = vreinterpretq_s16_u16(q10u16); - - q7s16 = vqaddq_s16(q7s16, q3s16); - q8s16 = vqaddq_s16(q8s16, q4s16); - q9s16 = vqaddq_s16(q9s16, q5s16); - q10s16 = vqaddq_s16(q10s16, q6s16); - - d6u8 = vqrshrun_n_s16(q7s16, 7); - d7u8 = vqrshrun_n_s16(q8s16, 7); - d8u8 = vqrshrun_n_s16(q9s16, 7); - d9u8 = vqrshrun_n_s16(q10s16, 7); - - vst1_u8(dst_ptr, d6u8); - dst_ptr += dst_pitch; - vst1_u8(dst_ptr, d7u8); - dst_ptr += dst_pitch; - vst1_u8(dst_ptr, d8u8); - dst_ptr += dst_pitch; - vst1_u8(dst_ptr, d9u8); - return; - } - - // load first_pass filter - dtmps8 = vld1_s8(vp8_sub_pel_filters[xoffset]); - d0s8 = vdup_lane_s8(dtmps8, 0); - d1s8 = vdup_lane_s8(dtmps8, 1); - d2s8 = vdup_lane_s8(dtmps8, 2); - d3s8 = vdup_lane_s8(dtmps8, 3); - d4s8 = vdup_lane_s8(dtmps8, 4); - d5s8 = vdup_lane_s8(dtmps8, 5); - d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8)); - d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8)); - d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8)); - d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8)); - d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8)); - d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8)); - - // First pass: output_height lines x output_width columns (9x4) - if (yoffset == 0) // firstpass_filter4x4_only - src = src_ptr - 2; - else - src = src_ptr - 2 - (src_pixels_per_line * 2); - q3u8 = vld1q_u8(src); - src += src_pixels_per_line; - q4u8 = vld1q_u8(src); - src += src_pixels_per_line; - q5u8 = vld1q_u8(src); - src += src_pixels_per_line; - q6u8 = vld1q_u8(src); - - q7u16 = vmull_u8(vget_low_u8(q3u8), d0u8); - q8u16 = vmull_u8(vget_low_u8(q4u8), d0u8); - q9u16 = vmull_u8(vget_low_u8(q5u8), d0u8); - q10u16 = vmull_u8(vget_low_u8(q6u8), d0u8); - - d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1); - d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1); - d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 1); - d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 1); - - q7u16 = vmlsl_u8(q7u16, d28u8, d1u8); - q8u16 = vmlsl_u8(q8u16, d29u8, d1u8); - q9u16 = vmlsl_u8(q9u16, d30u8, d1u8); - q10u16 = vmlsl_u8(q10u16, d31u8, d1u8); - - d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 4); - d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 4); - d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 4); - d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 4); - - q7u16 = vmlsl_u8(q7u16, d28u8, d4u8); - q8u16 = vmlsl_u8(q8u16, d29u8, d4u8); - q9u16 = vmlsl_u8(q9u16, d30u8, d4u8); - q10u16 = vmlsl_u8(q10u16, d31u8, d4u8); - - d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 2); - d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 2); - d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 2); - d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 2); - - q7u16 = vmlal_u8(q7u16, d28u8, d2u8); - q8u16 = vmlal_u8(q8u16, d29u8, d2u8); - q9u16 = vmlal_u8(q9u16, d30u8, d2u8); - q10u16 = vmlal_u8(q10u16, d31u8, d2u8); - - d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 5); - d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 5); - d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 5); - d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 5); - - q7u16 = vmlal_u8(q7u16, d28u8, d5u8); - q8u16 = vmlal_u8(q8u16, d29u8, d5u8); - q9u16 = vmlal_u8(q9u16, d30u8, d5u8); - q10u16 = vmlal_u8(q10u16, d31u8, d5u8); - - d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 3); - d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 3); - d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 3); - d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 3); - - q3u16 = vmull_u8(d28u8, d3u8); - q4u16 = vmull_u8(d29u8, d3u8); - q5u16 = vmull_u8(d30u8, d3u8); - q6u16 = vmull_u8(d31u8, d3u8); - - q3s16 = vreinterpretq_s16_u16(q3u16); - q4s16 = vreinterpretq_s16_u16(q4u16); - q5s16 = vreinterpretq_s16_u16(q5u16); - q6s16 = vreinterpretq_s16_u16(q6u16); - q7s16 = vreinterpretq_s16_u16(q7u16); - q8s16 = vreinterpretq_s16_u16(q8u16); - q9s16 = vreinterpretq_s16_u16(q9u16); - q10s16 = vreinterpretq_s16_u16(q10u16); - - q7s16 = vqaddq_s16(q7s16, q3s16); - q8s16 = vqaddq_s16(q8s16, q4s16); - q9s16 = vqaddq_s16(q9s16, q5s16); - q10s16 = vqaddq_s16(q10s16, q6s16); - - d22u8 = vqrshrun_n_s16(q7s16, 7); - d23u8 = vqrshrun_n_s16(q8s16, 7); - d24u8 = vqrshrun_n_s16(q9s16, 7); - d25u8 = vqrshrun_n_s16(q10s16, 7); - - if (yoffset == 0) { // firstpass_filter8x4_only - vst1_u8(dst_ptr, d22u8); - dst_ptr += dst_pitch; - vst1_u8(dst_ptr, d23u8); - dst_ptr += dst_pitch; - vst1_u8(dst_ptr, d24u8); - dst_ptr += dst_pitch; - vst1_u8(dst_ptr, d25u8); - return; - } - - // First Pass on rest 5-line data - src += src_pixels_per_line; - q3u8 = vld1q_u8(src); - src += src_pixels_per_line; - q4u8 = vld1q_u8(src); - src += src_pixels_per_line; - q5u8 = vld1q_u8(src); - src += src_pixels_per_line; - q6u8 = vld1q_u8(src); - src += src_pixels_per_line; - q7u8 = vld1q_u8(src); - - q8u16 = vmull_u8(vget_low_u8(q3u8), d0u8); - q9u16 = vmull_u8(vget_low_u8(q4u8), d0u8); - q10u16 = vmull_u8(vget_low_u8(q5u8), d0u8); - q11u16 = vmull_u8(vget_low_u8(q6u8), d0u8); - q12u16 = vmull_u8(vget_low_u8(q7u8), d0u8); - - d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1); - d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1); - d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 1); - d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 1); - d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 1); - - q8u16 = vmlsl_u8(q8u16, d27u8, d1u8); - q9u16 = vmlsl_u8(q9u16, d28u8, d1u8); - q10u16 = vmlsl_u8(q10u16, d29u8, d1u8); - q11u16 = vmlsl_u8(q11u16, d30u8, d1u8); - q12u16 = vmlsl_u8(q12u16, d31u8, d1u8); - - d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 4); - d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 4); - d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 4); - d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 4); - d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 4); - - q8u16 = vmlsl_u8(q8u16, d27u8, d4u8); - q9u16 = vmlsl_u8(q9u16, d28u8, d4u8); - q10u16 = vmlsl_u8(q10u16, d29u8, d4u8); - q11u16 = vmlsl_u8(q11u16, d30u8, d4u8); - q12u16 = vmlsl_u8(q12u16, d31u8, d4u8); - - d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 2); - d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 2); - d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 2); - d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 2); - d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 2); - - q8u16 = vmlal_u8(q8u16, d27u8, d2u8); - q9u16 = vmlal_u8(q9u16, d28u8, d2u8); - q10u16 = vmlal_u8(q10u16, d29u8, d2u8); - q11u16 = vmlal_u8(q11u16, d30u8, d2u8); - q12u16 = vmlal_u8(q12u16, d31u8, d2u8); - - d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 5); - d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 5); - d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 5); - d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 5); - d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 5); - - q8u16 = vmlal_u8(q8u16, d27u8, d5u8); - q9u16 = vmlal_u8(q9u16, d28u8, d5u8); - q10u16 = vmlal_u8(q10u16, d29u8, d5u8); - q11u16 = vmlal_u8(q11u16, d30u8, d5u8); - q12u16 = vmlal_u8(q12u16, d31u8, d5u8); - - d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 3); - d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 3); - d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 3); - d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 3); - d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 3); - - q3u16 = vmull_u8(d27u8, d3u8); - q4u16 = vmull_u8(d28u8, d3u8); - q5u16 = vmull_u8(d29u8, d3u8); - q6u16 = vmull_u8(d30u8, d3u8); - q7u16 = vmull_u8(d31u8, d3u8); - - q3s16 = vreinterpretq_s16_u16(q3u16); - q4s16 = vreinterpretq_s16_u16(q4u16); - q5s16 = vreinterpretq_s16_u16(q5u16); - q6s16 = vreinterpretq_s16_u16(q6u16); - q7s16 = vreinterpretq_s16_u16(q7u16); - q8s16 = vreinterpretq_s16_u16(q8u16); - q9s16 = vreinterpretq_s16_u16(q9u16); - q10s16 = vreinterpretq_s16_u16(q10u16); - q11s16 = vreinterpretq_s16_u16(q11u16); - q12s16 = vreinterpretq_s16_u16(q12u16); - - q8s16 = vqaddq_s16(q8s16, q3s16); - q9s16 = vqaddq_s16(q9s16, q4s16); - q10s16 = vqaddq_s16(q10s16, q5s16); - q11s16 = vqaddq_s16(q11s16, q6s16); - q12s16 = vqaddq_s16(q12s16, q7s16); - - d26u8 = vqrshrun_n_s16(q8s16, 7); - d27u8 = vqrshrun_n_s16(q9s16, 7); - d28u8 = vqrshrun_n_s16(q10s16, 7); - d29u8 = vqrshrun_n_s16(q11s16, 7); - d30u8 = vqrshrun_n_s16(q12s16, 7); - - // Second pass: 8x4 - dtmps8 = vld1_s8(vp8_sub_pel_filters[yoffset]); - d0s8 = vdup_lane_s8(dtmps8, 0); - d1s8 = vdup_lane_s8(dtmps8, 1); - d2s8 = vdup_lane_s8(dtmps8, 2); - d3s8 = vdup_lane_s8(dtmps8, 3); - d4s8 = vdup_lane_s8(dtmps8, 4); - d5s8 = vdup_lane_s8(dtmps8, 5); - d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8)); - d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8)); - d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8)); - d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8)); - d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8)); - d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8)); - - q3u16 = vmull_u8(d22u8, d0u8); - q4u16 = vmull_u8(d23u8, d0u8); - q5u16 = vmull_u8(d24u8, d0u8); - q6u16 = vmull_u8(d25u8, d0u8); - - q3u16 = vmlsl_u8(q3u16, d23u8, d1u8); - q4u16 = vmlsl_u8(q4u16, d24u8, d1u8); - q5u16 = vmlsl_u8(q5u16, d25u8, d1u8); - q6u16 = vmlsl_u8(q6u16, d26u8, d1u8); - - q3u16 = vmlsl_u8(q3u16, d26u8, d4u8); - q4u16 = vmlsl_u8(q4u16, d27u8, d4u8); - q5u16 = vmlsl_u8(q5u16, d28u8, d4u8); - q6u16 = vmlsl_u8(q6u16, d29u8, d4u8); - - q3u16 = vmlal_u8(q3u16, d24u8, d2u8); - q4u16 = vmlal_u8(q4u16, d25u8, d2u8); - q5u16 = vmlal_u8(q5u16, d26u8, d2u8); - q6u16 = vmlal_u8(q6u16, d27u8, d2u8); - - q3u16 = vmlal_u8(q3u16, d27u8, d5u8); - q4u16 = vmlal_u8(q4u16, d28u8, d5u8); - q5u16 = vmlal_u8(q5u16, d29u8, d5u8); - q6u16 = vmlal_u8(q6u16, d30u8, d5u8); - - q7u16 = vmull_u8(d25u8, d3u8); - q8u16 = vmull_u8(d26u8, d3u8); - q9u16 = vmull_u8(d27u8, d3u8); - q10u16 = vmull_u8(d28u8, d3u8); - - q3s16 = vreinterpretq_s16_u16(q3u16); - q4s16 = vreinterpretq_s16_u16(q4u16); - q5s16 = vreinterpretq_s16_u16(q5u16); - q6s16 = vreinterpretq_s16_u16(q6u16); - q7s16 = vreinterpretq_s16_u16(q7u16); - q8s16 = vreinterpretq_s16_u16(q8u16); - q9s16 = vreinterpretq_s16_u16(q9u16); - q10s16 = vreinterpretq_s16_u16(q10u16); - - q7s16 = vqaddq_s16(q7s16, q3s16); - q8s16 = vqaddq_s16(q8s16, q4s16); - q9s16 = vqaddq_s16(q9s16, q5s16); - q10s16 = vqaddq_s16(q10s16, q6s16); - - d6u8 = vqrshrun_n_s16(q7s16, 7); - d7u8 = vqrshrun_n_s16(q8s16, 7); - d8u8 = vqrshrun_n_s16(q9s16, 7); - d9u8 = vqrshrun_n_s16(q10s16, 7); - - vst1_u8(dst_ptr, d6u8); - dst_ptr += dst_pitch; - vst1_u8(dst_ptr, d7u8); - dst_ptr += dst_pitch; - vst1_u8(dst_ptr, d8u8); - dst_ptr += dst_pitch; - vst1_u8(dst_ptr, d9u8); - return; -} - -void vp8_sixtap_predict8x8_neon( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch) { - unsigned char *src, *tmpp; - unsigned char tmp[64]; - int i; - uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8, d8u8, d9u8; - uint8x8_t d18u8, d19u8, d20u8, d21u8, d22u8, d23u8, d24u8, d25u8; - uint8x8_t d26u8, d27u8, d28u8, d29u8, d30u8, d31u8; - int8x8_t dtmps8, d0s8, d1s8, d2s8, d3s8, d4s8, d5s8; - uint16x8_t q3u16, q4u16, q5u16, q6u16, q7u16; - uint16x8_t q8u16, q9u16, q10u16, q11u16, q12u16; - int16x8_t q3s16, q4s16, q5s16, q6s16, q7s16; - int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16; - uint8x16_t q3u8, q4u8, q5u8, q6u8, q7u8, q9u8, q10u8, q11u8, q12u8; - - if (xoffset == 0) { // secondpass_filter8x8_only - // load second_pass filter - dtmps8 = vld1_s8(vp8_sub_pel_filters[yoffset]); - d0s8 = vdup_lane_s8(dtmps8, 0); - d1s8 = vdup_lane_s8(dtmps8, 1); - d2s8 = vdup_lane_s8(dtmps8, 2); - d3s8 = vdup_lane_s8(dtmps8, 3); - d4s8 = vdup_lane_s8(dtmps8, 4); - d5s8 = vdup_lane_s8(dtmps8, 5); - d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8)); - d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8)); - d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8)); - d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8)); - d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8)); - d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8)); - - // load src data - src = src_ptr - src_pixels_per_line * 2; - d18u8 = vld1_u8(src); - src += src_pixels_per_line; - d19u8 = vld1_u8(src); - src += src_pixels_per_line; - d20u8 = vld1_u8(src); - src += src_pixels_per_line; - d21u8 = vld1_u8(src); - src += src_pixels_per_line; - d22u8 = vld1_u8(src); - src += src_pixels_per_line; - d23u8 = vld1_u8(src); - src += src_pixels_per_line; - d24u8 = vld1_u8(src); - src += src_pixels_per_line; - d25u8 = vld1_u8(src); - src += src_pixels_per_line; - d26u8 = vld1_u8(src); - src += src_pixels_per_line; - d27u8 = vld1_u8(src); - src += src_pixels_per_line; - d28u8 = vld1_u8(src); - src += src_pixels_per_line; - d29u8 = vld1_u8(src); - src += src_pixels_per_line; - d30u8 = vld1_u8(src); - - for (i = 2; i > 0; i--) { - q3u16 = vmull_u8(d18u8, d0u8); - q4u16 = vmull_u8(d19u8, d0u8); - q5u16 = vmull_u8(d20u8, d0u8); - q6u16 = vmull_u8(d21u8, d0u8); - - q3u16 = vmlsl_u8(q3u16, d19u8, d1u8); - q4u16 = vmlsl_u8(q4u16, d20u8, d1u8); - q5u16 = vmlsl_u8(q5u16, d21u8, d1u8); - q6u16 = vmlsl_u8(q6u16, d22u8, d1u8); - - q3u16 = vmlsl_u8(q3u16, d22u8, d4u8); - q4u16 = vmlsl_u8(q4u16, d23u8, d4u8); - q5u16 = vmlsl_u8(q5u16, d24u8, d4u8); - q6u16 = vmlsl_u8(q6u16, d25u8, d4u8); - - q3u16 = vmlal_u8(q3u16, d20u8, d2u8); - q4u16 = vmlal_u8(q4u16, d21u8, d2u8); - q5u16 = vmlal_u8(q5u16, d22u8, d2u8); - q6u16 = vmlal_u8(q6u16, d23u8, d2u8); - - q3u16 = vmlal_u8(q3u16, d23u8, d5u8); - q4u16 = vmlal_u8(q4u16, d24u8, d5u8); - q5u16 = vmlal_u8(q5u16, d25u8, d5u8); - q6u16 = vmlal_u8(q6u16, d26u8, d5u8); - - q7u16 = vmull_u8(d21u8, d3u8); - q8u16 = vmull_u8(d22u8, d3u8); - q9u16 = vmull_u8(d23u8, d3u8); - q10u16 = vmull_u8(d24u8, d3u8); - - q3s16 = vreinterpretq_s16_u16(q3u16); - q4s16 = vreinterpretq_s16_u16(q4u16); - q5s16 = vreinterpretq_s16_u16(q5u16); - q6s16 = vreinterpretq_s16_u16(q6u16); - q7s16 = vreinterpretq_s16_u16(q7u16); - q8s16 = vreinterpretq_s16_u16(q8u16); - q9s16 = vreinterpretq_s16_u16(q9u16); - q10s16 = vreinterpretq_s16_u16(q10u16); - - q7s16 = vqaddq_s16(q7s16, q3s16); - q8s16 = vqaddq_s16(q8s16, q4s16); - q9s16 = vqaddq_s16(q9s16, q5s16); - q10s16 = vqaddq_s16(q10s16, q6s16); - - d6u8 = vqrshrun_n_s16(q7s16, 7); - d7u8 = vqrshrun_n_s16(q8s16, 7); - d8u8 = vqrshrun_n_s16(q9s16, 7); - d9u8 = vqrshrun_n_s16(q10s16, 7); - - d18u8 = d22u8; - d19u8 = d23u8; - d20u8 = d24u8; - d21u8 = d25u8; - d22u8 = d26u8; - d23u8 = d27u8; - d24u8 = d28u8; - d25u8 = d29u8; - d26u8 = d30u8; - - vst1_u8(dst_ptr, d6u8); - dst_ptr += dst_pitch; - vst1_u8(dst_ptr, d7u8); - dst_ptr += dst_pitch; - vst1_u8(dst_ptr, d8u8); - dst_ptr += dst_pitch; - vst1_u8(dst_ptr, d9u8); - dst_ptr += dst_pitch; - } - return; - } - - // load first_pass filter - dtmps8 = vld1_s8(vp8_sub_pel_filters[xoffset]); - d0s8 = vdup_lane_s8(dtmps8, 0); - d1s8 = vdup_lane_s8(dtmps8, 1); - d2s8 = vdup_lane_s8(dtmps8, 2); - d3s8 = vdup_lane_s8(dtmps8, 3); - d4s8 = vdup_lane_s8(dtmps8, 4); - d5s8 = vdup_lane_s8(dtmps8, 5); - d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8)); - d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8)); - d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8)); - d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8)); - d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8)); - d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8)); - - // First pass: output_height lines x output_width columns (9x4) - if (yoffset == 0) // firstpass_filter4x4_only - src = src_ptr - 2; - else - src = src_ptr - 2 - (src_pixels_per_line * 2); - - tmpp = tmp; - for (i = 2; i > 0; i--) { - q3u8 = vld1q_u8(src); - src += src_pixels_per_line; - q4u8 = vld1q_u8(src); - src += src_pixels_per_line; - q5u8 = vld1q_u8(src); - src += src_pixels_per_line; - q6u8 = vld1q_u8(src); - src += src_pixels_per_line; - - __builtin_prefetch(src); - __builtin_prefetch(src + src_pixels_per_line); - __builtin_prefetch(src + src_pixels_per_line * 2); - - q7u16 = vmull_u8(vget_low_u8(q3u8), d0u8); - q8u16 = vmull_u8(vget_low_u8(q4u8), d0u8); - q9u16 = vmull_u8(vget_low_u8(q5u8), d0u8); - q10u16 = vmull_u8(vget_low_u8(q6u8), d0u8); - - d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1); - d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1); - d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 1); - d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 1); - - q7u16 = vmlsl_u8(q7u16, d28u8, d1u8); - q8u16 = vmlsl_u8(q8u16, d29u8, d1u8); - q9u16 = vmlsl_u8(q9u16, d30u8, d1u8); - q10u16 = vmlsl_u8(q10u16, d31u8, d1u8); - - d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 4); - d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 4); - d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 4); - d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 4); - - q7u16 = vmlsl_u8(q7u16, d28u8, d4u8); - q8u16 = vmlsl_u8(q8u16, d29u8, d4u8); - q9u16 = vmlsl_u8(q9u16, d30u8, d4u8); - q10u16 = vmlsl_u8(q10u16, d31u8, d4u8); - - d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 2); - d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 2); - d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 2); - d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 2); - - q7u16 = vmlal_u8(q7u16, d28u8, d2u8); - q8u16 = vmlal_u8(q8u16, d29u8, d2u8); - q9u16 = vmlal_u8(q9u16, d30u8, d2u8); - q10u16 = vmlal_u8(q10u16, d31u8, d2u8); - - d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 5); - d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 5); - d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 5); - d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 5); - - q7u16 = vmlal_u8(q7u16, d28u8, d5u8); - q8u16 = vmlal_u8(q8u16, d29u8, d5u8); - q9u16 = vmlal_u8(q9u16, d30u8, d5u8); - q10u16 = vmlal_u8(q10u16, d31u8, d5u8); - - d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 3); - d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 3); - d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 3); - d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 3); - - q3u16 = vmull_u8(d28u8, d3u8); - q4u16 = vmull_u8(d29u8, d3u8); - q5u16 = vmull_u8(d30u8, d3u8); - q6u16 = vmull_u8(d31u8, d3u8); - - q3s16 = vreinterpretq_s16_u16(q3u16); - q4s16 = vreinterpretq_s16_u16(q4u16); - q5s16 = vreinterpretq_s16_u16(q5u16); - q6s16 = vreinterpretq_s16_u16(q6u16); - q7s16 = vreinterpretq_s16_u16(q7u16); - q8s16 = vreinterpretq_s16_u16(q8u16); - q9s16 = vreinterpretq_s16_u16(q9u16); - q10s16 = vreinterpretq_s16_u16(q10u16); - - q7s16 = vqaddq_s16(q7s16, q3s16); - q8s16 = vqaddq_s16(q8s16, q4s16); - q9s16 = vqaddq_s16(q9s16, q5s16); - q10s16 = vqaddq_s16(q10s16, q6s16); - - d22u8 = vqrshrun_n_s16(q7s16, 7); - d23u8 = vqrshrun_n_s16(q8s16, 7); - d24u8 = vqrshrun_n_s16(q9s16, 7); - d25u8 = vqrshrun_n_s16(q10s16, 7); - - if (yoffset == 0) { // firstpass_filter8x4_only - vst1_u8(dst_ptr, d22u8); - dst_ptr += dst_pitch; - vst1_u8(dst_ptr, d23u8); - dst_ptr += dst_pitch; - vst1_u8(dst_ptr, d24u8); - dst_ptr += dst_pitch; - vst1_u8(dst_ptr, d25u8); - dst_ptr += dst_pitch; - } else { - vst1_u8(tmpp, d22u8); - tmpp += 8; - vst1_u8(tmpp, d23u8); - tmpp += 8; - vst1_u8(tmpp, d24u8); - tmpp += 8; - vst1_u8(tmpp, d25u8); - tmpp += 8; - } - } - if (yoffset == 0) - return; - - // First Pass on rest 5-line data - q3u8 = vld1q_u8(src); - src += src_pixels_per_line; - q4u8 = vld1q_u8(src); - src += src_pixels_per_line; - q5u8 = vld1q_u8(src); - src += src_pixels_per_line; - q6u8 = vld1q_u8(src); - src += src_pixels_per_line; - q7u8 = vld1q_u8(src); - - q8u16 = vmull_u8(vget_low_u8(q3u8), d0u8); - q9u16 = vmull_u8(vget_low_u8(q4u8), d0u8); - q10u16 = vmull_u8(vget_low_u8(q5u8), d0u8); - q11u16 = vmull_u8(vget_low_u8(q6u8), d0u8); - q12u16 = vmull_u8(vget_low_u8(q7u8), d0u8); - - d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1); - d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1); - d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 1); - d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 1); - d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 1); - - q8u16 = vmlsl_u8(q8u16, d27u8, d1u8); - q9u16 = vmlsl_u8(q9u16, d28u8, d1u8); - q10u16 = vmlsl_u8(q10u16, d29u8, d1u8); - q11u16 = vmlsl_u8(q11u16, d30u8, d1u8); - q12u16 = vmlsl_u8(q12u16, d31u8, d1u8); - - d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 4); - d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 4); - d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 4); - d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 4); - d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 4); - - q8u16 = vmlsl_u8(q8u16, d27u8, d4u8); - q9u16 = vmlsl_u8(q9u16, d28u8, d4u8); - q10u16 = vmlsl_u8(q10u16, d29u8, d4u8); - q11u16 = vmlsl_u8(q11u16, d30u8, d4u8); - q12u16 = vmlsl_u8(q12u16, d31u8, d4u8); - - d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 2); - d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 2); - d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 2); - d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 2); - d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 2); - - q8u16 = vmlal_u8(q8u16, d27u8, d2u8); - q9u16 = vmlal_u8(q9u16, d28u8, d2u8); - q10u16 = vmlal_u8(q10u16, d29u8, d2u8); - q11u16 = vmlal_u8(q11u16, d30u8, d2u8); - q12u16 = vmlal_u8(q12u16, d31u8, d2u8); - - d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 5); - d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 5); - d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 5); - d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 5); - d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 5); - - q8u16 = vmlal_u8(q8u16, d27u8, d5u8); - q9u16 = vmlal_u8(q9u16, d28u8, d5u8); - q10u16 = vmlal_u8(q10u16, d29u8, d5u8); - q11u16 = vmlal_u8(q11u16, d30u8, d5u8); - q12u16 = vmlal_u8(q12u16, d31u8, d5u8); - - d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 3); - d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 3); - d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 3); - d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 3); - d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 3); - - q3u16 = vmull_u8(d27u8, d3u8); - q4u16 = vmull_u8(d28u8, d3u8); - q5u16 = vmull_u8(d29u8, d3u8); - q6u16 = vmull_u8(d30u8, d3u8); - q7u16 = vmull_u8(d31u8, d3u8); - - q3s16 = vreinterpretq_s16_u16(q3u16); - q4s16 = vreinterpretq_s16_u16(q4u16); - q5s16 = vreinterpretq_s16_u16(q5u16); - q6s16 = vreinterpretq_s16_u16(q6u16); - q7s16 = vreinterpretq_s16_u16(q7u16); - q8s16 = vreinterpretq_s16_u16(q8u16); - q9s16 = vreinterpretq_s16_u16(q9u16); - q10s16 = vreinterpretq_s16_u16(q10u16); - q11s16 = vreinterpretq_s16_u16(q11u16); - q12s16 = vreinterpretq_s16_u16(q12u16); - - q8s16 = vqaddq_s16(q8s16, q3s16); - q9s16 = vqaddq_s16(q9s16, q4s16); - q10s16 = vqaddq_s16(q10s16, q5s16); - q11s16 = vqaddq_s16(q11s16, q6s16); - q12s16 = vqaddq_s16(q12s16, q7s16); - - d26u8 = vqrshrun_n_s16(q8s16, 7); - d27u8 = vqrshrun_n_s16(q9s16, 7); - d28u8 = vqrshrun_n_s16(q10s16, 7); - d29u8 = vqrshrun_n_s16(q11s16, 7); - d30u8 = vqrshrun_n_s16(q12s16, 7); - - // Second pass: 8x8 - dtmps8 = vld1_s8(vp8_sub_pel_filters[yoffset]); - d0s8 = vdup_lane_s8(dtmps8, 0); - d1s8 = vdup_lane_s8(dtmps8, 1); - d2s8 = vdup_lane_s8(dtmps8, 2); - d3s8 = vdup_lane_s8(dtmps8, 3); - d4s8 = vdup_lane_s8(dtmps8, 4); - d5s8 = vdup_lane_s8(dtmps8, 5); - d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8)); - d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8)); - d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8)); - d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8)); - d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8)); - d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8)); - - tmpp = tmp; - q9u8 = vld1q_u8(tmpp); - tmpp += 16; - q10u8 = vld1q_u8(tmpp); - tmpp += 16; - q11u8 = vld1q_u8(tmpp); - tmpp += 16; - q12u8 = vld1q_u8(tmpp); - - d18u8 = vget_low_u8(q9u8); - d19u8 = vget_high_u8(q9u8); - d20u8 = vget_low_u8(q10u8); - d21u8 = vget_high_u8(q10u8); - d22u8 = vget_low_u8(q11u8); - d23u8 = vget_high_u8(q11u8); - d24u8 = vget_low_u8(q12u8); - d25u8 = vget_high_u8(q12u8); - - for (i = 2; i > 0; i--) { - q3u16 = vmull_u8(d18u8, d0u8); - q4u16 = vmull_u8(d19u8, d0u8); - q5u16 = vmull_u8(d20u8, d0u8); - q6u16 = vmull_u8(d21u8, d0u8); - - q3u16 = vmlsl_u8(q3u16, d19u8, d1u8); - q4u16 = vmlsl_u8(q4u16, d20u8, d1u8); - q5u16 = vmlsl_u8(q5u16, d21u8, d1u8); - q6u16 = vmlsl_u8(q6u16, d22u8, d1u8); - - q3u16 = vmlsl_u8(q3u16, d22u8, d4u8); - q4u16 = vmlsl_u8(q4u16, d23u8, d4u8); - q5u16 = vmlsl_u8(q5u16, d24u8, d4u8); - q6u16 = vmlsl_u8(q6u16, d25u8, d4u8); - - q3u16 = vmlal_u8(q3u16, d20u8, d2u8); - q4u16 = vmlal_u8(q4u16, d21u8, d2u8); - q5u16 = vmlal_u8(q5u16, d22u8, d2u8); - q6u16 = vmlal_u8(q6u16, d23u8, d2u8); - - q3u16 = vmlal_u8(q3u16, d23u8, d5u8); - q4u16 = vmlal_u8(q4u16, d24u8, d5u8); - q5u16 = vmlal_u8(q5u16, d25u8, d5u8); - q6u16 = vmlal_u8(q6u16, d26u8, d5u8); - - q7u16 = vmull_u8(d21u8, d3u8); - q8u16 = vmull_u8(d22u8, d3u8); - q9u16 = vmull_u8(d23u8, d3u8); - q10u16 = vmull_u8(d24u8, d3u8); - - q3s16 = vreinterpretq_s16_u16(q3u16); - q4s16 = vreinterpretq_s16_u16(q4u16); - q5s16 = vreinterpretq_s16_u16(q5u16); - q6s16 = vreinterpretq_s16_u16(q6u16); - q7s16 = vreinterpretq_s16_u16(q7u16); - q8s16 = vreinterpretq_s16_u16(q8u16); - q9s16 = vreinterpretq_s16_u16(q9u16); - q10s16 = vreinterpretq_s16_u16(q10u16); - - q7s16 = vqaddq_s16(q7s16, q3s16); - q8s16 = vqaddq_s16(q8s16, q4s16); - q9s16 = vqaddq_s16(q9s16, q5s16); - q10s16 = vqaddq_s16(q10s16, q6s16); - - d6u8 = vqrshrun_n_s16(q7s16, 7); - d7u8 = vqrshrun_n_s16(q8s16, 7); - d8u8 = vqrshrun_n_s16(q9s16, 7); - d9u8 = vqrshrun_n_s16(q10s16, 7); - - d18u8 = d22u8; - d19u8 = d23u8; - d20u8 = d24u8; - d21u8 = d25u8; - d22u8 = d26u8; - d23u8 = d27u8; - d24u8 = d28u8; - d25u8 = d29u8; - d26u8 = d30u8; - - vst1_u8(dst_ptr, d6u8); - dst_ptr += dst_pitch; - vst1_u8(dst_ptr, d7u8); - dst_ptr += dst_pitch; - vst1_u8(dst_ptr, d8u8); - dst_ptr += dst_pitch; - vst1_u8(dst_ptr, d9u8); - dst_ptr += dst_pitch; - } - return; -} - -void vp8_sixtap_predict16x16_neon( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch) { - unsigned char *src, *src_tmp, *dst, *tmpp; - unsigned char tmp[336]; - int i, j; - uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8, d8u8, d9u8; - uint8x8_t d10u8, d11u8, d12u8, d13u8, d14u8, d15u8, d18u8, d19u8; - uint8x8_t d20u8, d21u8, d22u8, d23u8, d24u8, d25u8, d26u8, d27u8; - uint8x8_t d28u8, d29u8, d30u8, d31u8; - int8x8_t dtmps8, d0s8, d1s8, d2s8, d3s8, d4s8, d5s8; - uint8x16_t q3u8, q4u8; - uint16x8_t q3u16, q4u16, q5u16, q6u16, q7u16, q8u16, q9u16, q10u16; - uint16x8_t q11u16, q12u16, q13u16, q15u16; - int16x8_t q3s16, q4s16, q5s16, q6s16, q7s16, q8s16, q9s16, q10s16; - int16x8_t q11s16, q12s16, q13s16, q15s16; - - if (xoffset == 0) { // secondpass_filter8x8_only - // load second_pass filter - dtmps8 = vld1_s8(vp8_sub_pel_filters[yoffset]); - d0s8 = vdup_lane_s8(dtmps8, 0); - d1s8 = vdup_lane_s8(dtmps8, 1); - d2s8 = vdup_lane_s8(dtmps8, 2); - d3s8 = vdup_lane_s8(dtmps8, 3); - d4s8 = vdup_lane_s8(dtmps8, 4); - d5s8 = vdup_lane_s8(dtmps8, 5); - d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8)); - d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8)); - d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8)); - d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8)); - d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8)); - d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8)); - - // load src data - src_tmp = src_ptr - src_pixels_per_line * 2; - for (i = 0; i < 2; i++) { - src = src_tmp + i * 8; - dst = dst_ptr + i * 8; - d18u8 = vld1_u8(src); - src += src_pixels_per_line; - d19u8 = vld1_u8(src); - src += src_pixels_per_line; - d20u8 = vld1_u8(src); - src += src_pixels_per_line; - d21u8 = vld1_u8(src); - src += src_pixels_per_line; - d22u8 = vld1_u8(src); - src += src_pixels_per_line; - for (j = 0; j < 4; j++) { - d23u8 = vld1_u8(src); - src += src_pixels_per_line; - d24u8 = vld1_u8(src); - src += src_pixels_per_line; - d25u8 = vld1_u8(src); - src += src_pixels_per_line; - d26u8 = vld1_u8(src); - src += src_pixels_per_line; - - q3u16 = vmull_u8(d18u8, d0u8); - q4u16 = vmull_u8(d19u8, d0u8); - q5u16 = vmull_u8(d20u8, d0u8); - q6u16 = vmull_u8(d21u8, d0u8); - - q3u16 = vmlsl_u8(q3u16, d19u8, d1u8); - q4u16 = vmlsl_u8(q4u16, d20u8, d1u8); - q5u16 = vmlsl_u8(q5u16, d21u8, d1u8); - q6u16 = vmlsl_u8(q6u16, d22u8, d1u8); - - q3u16 = vmlsl_u8(q3u16, d22u8, d4u8); - q4u16 = vmlsl_u8(q4u16, d23u8, d4u8); - q5u16 = vmlsl_u8(q5u16, d24u8, d4u8); - q6u16 = vmlsl_u8(q6u16, d25u8, d4u8); - - q3u16 = vmlal_u8(q3u16, d20u8, d2u8); - q4u16 = vmlal_u8(q4u16, d21u8, d2u8); - q5u16 = vmlal_u8(q5u16, d22u8, d2u8); - q6u16 = vmlal_u8(q6u16, d23u8, d2u8); - - q3u16 = vmlal_u8(q3u16, d23u8, d5u8); - q4u16 = vmlal_u8(q4u16, d24u8, d5u8); - q5u16 = vmlal_u8(q5u16, d25u8, d5u8); - q6u16 = vmlal_u8(q6u16, d26u8, d5u8); - - q7u16 = vmull_u8(d21u8, d3u8); - q8u16 = vmull_u8(d22u8, d3u8); - q9u16 = vmull_u8(d23u8, d3u8); - q10u16 = vmull_u8(d24u8, d3u8); - - q3s16 = vreinterpretq_s16_u16(q3u16); - q4s16 = vreinterpretq_s16_u16(q4u16); - q5s16 = vreinterpretq_s16_u16(q5u16); - q6s16 = vreinterpretq_s16_u16(q6u16); - q7s16 = vreinterpretq_s16_u16(q7u16); - q8s16 = vreinterpretq_s16_u16(q8u16); - q9s16 = vreinterpretq_s16_u16(q9u16); - q10s16 = vreinterpretq_s16_u16(q10u16); - - q7s16 = vqaddq_s16(q7s16, q3s16); - q8s16 = vqaddq_s16(q8s16, q4s16); - q9s16 = vqaddq_s16(q9s16, q5s16); - q10s16 = vqaddq_s16(q10s16, q6s16); - - d6u8 = vqrshrun_n_s16(q7s16, 7); - d7u8 = vqrshrun_n_s16(q8s16, 7); - d8u8 = vqrshrun_n_s16(q9s16, 7); - d9u8 = vqrshrun_n_s16(q10s16, 7); - - d18u8 = d22u8; - d19u8 = d23u8; - d20u8 = d24u8; - d21u8 = d25u8; - d22u8 = d26u8; - - vst1_u8(dst, d6u8); - dst += dst_pitch; - vst1_u8(dst, d7u8); - dst += dst_pitch; - vst1_u8(dst, d8u8); - dst += dst_pitch; - vst1_u8(dst, d9u8); - dst += dst_pitch; - } - } - return; - } - - // load first_pass filter - dtmps8 = vld1_s8(vp8_sub_pel_filters[xoffset]); - d0s8 = vdup_lane_s8(dtmps8, 0); - d1s8 = vdup_lane_s8(dtmps8, 1); - d2s8 = vdup_lane_s8(dtmps8, 2); - d3s8 = vdup_lane_s8(dtmps8, 3); - d4s8 = vdup_lane_s8(dtmps8, 4); - d5s8 = vdup_lane_s8(dtmps8, 5); - d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8)); - d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8)); - d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8)); - d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8)); - d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8)); - d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8)); - - // First pass: output_height lines x output_width columns (9x4) - if (yoffset == 0) { // firstpass_filter4x4_only - src = src_ptr - 2; - dst = dst_ptr; - for (i = 0; i < 8; i++) { - d6u8 = vld1_u8(src); - d7u8 = vld1_u8(src + 8); - d8u8 = vld1_u8(src + 16); - src += src_pixels_per_line; - d9u8 = vld1_u8(src); - d10u8 = vld1_u8(src + 8); - d11u8 = vld1_u8(src + 16); - src += src_pixels_per_line; - - __builtin_prefetch(src); - __builtin_prefetch(src + src_pixels_per_line); - - q6u16 = vmull_u8(d6u8, d0u8); - q7u16 = vmull_u8(d7u8, d0u8); - q8u16 = vmull_u8(d9u8, d0u8); - q9u16 = vmull_u8(d10u8, d0u8); - - d20u8 = vext_u8(d6u8, d7u8, 1); - d21u8 = vext_u8(d9u8, d10u8, 1); - d22u8 = vext_u8(d7u8, d8u8, 1); - d23u8 = vext_u8(d10u8, d11u8, 1); - d24u8 = vext_u8(d6u8, d7u8, 4); - d25u8 = vext_u8(d9u8, d10u8, 4); - d26u8 = vext_u8(d7u8, d8u8, 4); - d27u8 = vext_u8(d10u8, d11u8, 4); - d28u8 = vext_u8(d6u8, d7u8, 5); - d29u8 = vext_u8(d9u8, d10u8, 5); - - q6u16 = vmlsl_u8(q6u16, d20u8, d1u8); - q8u16 = vmlsl_u8(q8u16, d21u8, d1u8); - q7u16 = vmlsl_u8(q7u16, d22u8, d1u8); - q9u16 = vmlsl_u8(q9u16, d23u8, d1u8); - q6u16 = vmlsl_u8(q6u16, d24u8, d4u8); - q8u16 = vmlsl_u8(q8u16, d25u8, d4u8); - q7u16 = vmlsl_u8(q7u16, d26u8, d4u8); - q9u16 = vmlsl_u8(q9u16, d27u8, d4u8); - q6u16 = vmlal_u8(q6u16, d28u8, d5u8); - q8u16 = vmlal_u8(q8u16, d29u8, d5u8); - - d20u8 = vext_u8(d7u8, d8u8, 5); - d21u8 = vext_u8(d10u8, d11u8, 5); - d22u8 = vext_u8(d6u8, d7u8, 2); - d23u8 = vext_u8(d9u8, d10u8, 2); - d24u8 = vext_u8(d7u8, d8u8, 2); - d25u8 = vext_u8(d10u8, d11u8, 2); - d26u8 = vext_u8(d6u8, d7u8, 3); - d27u8 = vext_u8(d9u8, d10u8, 3); - d28u8 = vext_u8(d7u8, d8u8, 3); - d29u8 = vext_u8(d10u8, d11u8, 3); - - q7u16 = vmlal_u8(q7u16, d20u8, d5u8); - q9u16 = vmlal_u8(q9u16, d21u8, d5u8); - q6u16 = vmlal_u8(q6u16, d22u8, d2u8); - q8u16 = vmlal_u8(q8u16, d23u8, d2u8); - q7u16 = vmlal_u8(q7u16, d24u8, d2u8); - q9u16 = vmlal_u8(q9u16, d25u8, d2u8); - - q10u16 = vmull_u8(d26u8, d3u8); - q11u16 = vmull_u8(d27u8, d3u8); - q12u16 = vmull_u8(d28u8, d3u8); - q15u16 = vmull_u8(d29u8, d3u8); - - q6s16 = vreinterpretq_s16_u16(q6u16); - q7s16 = vreinterpretq_s16_u16(q7u16); - q8s16 = vreinterpretq_s16_u16(q8u16); - q9s16 = vreinterpretq_s16_u16(q9u16); - q10s16 = vreinterpretq_s16_u16(q10u16); - q11s16 = vreinterpretq_s16_u16(q11u16); - q12s16 = vreinterpretq_s16_u16(q12u16); - q15s16 = vreinterpretq_s16_u16(q15u16); - - q6s16 = vqaddq_s16(q6s16, q10s16); - q8s16 = vqaddq_s16(q8s16, q11s16); - q7s16 = vqaddq_s16(q7s16, q12s16); - q9s16 = vqaddq_s16(q9s16, q15s16); - - d6u8 = vqrshrun_n_s16(q6s16, 7); - d7u8 = vqrshrun_n_s16(q7s16, 7); - d8u8 = vqrshrun_n_s16(q8s16, 7); - d9u8 = vqrshrun_n_s16(q9s16, 7); - - q3u8 = vcombine_u8(d6u8, d7u8); - q4u8 = vcombine_u8(d8u8, d9u8); - vst1q_u8(dst, q3u8); - dst += dst_pitch; - vst1q_u8(dst, q4u8); - dst += dst_pitch; - } - return; - } - - src = src_ptr - 2 - src_pixels_per_line * 2; - tmpp = tmp; - for (i = 0; i < 7; i++) { - d6u8 = vld1_u8(src); - d7u8 = vld1_u8(src + 8); - d8u8 = vld1_u8(src + 16); - src += src_pixels_per_line; - d9u8 = vld1_u8(src); - d10u8 = vld1_u8(src + 8); - d11u8 = vld1_u8(src + 16); - src += src_pixels_per_line; - d12u8 = vld1_u8(src); - d13u8 = vld1_u8(src + 8); - d14u8 = vld1_u8(src + 16); - src += src_pixels_per_line; - - __builtin_prefetch(src); - __builtin_prefetch(src + src_pixels_per_line); - __builtin_prefetch(src + src_pixels_per_line * 2); - - q8u16 = vmull_u8(d6u8, d0u8); - q9u16 = vmull_u8(d7u8, d0u8); - q10u16 = vmull_u8(d9u8, d0u8); - q11u16 = vmull_u8(d10u8, d0u8); - q12u16 = vmull_u8(d12u8, d0u8); - q13u16 = vmull_u8(d13u8, d0u8); - - d28u8 = vext_u8(d6u8, d7u8, 1); - d29u8 = vext_u8(d9u8, d10u8, 1); - d30u8 = vext_u8(d12u8, d13u8, 1); - q8u16 = vmlsl_u8(q8u16, d28u8, d1u8); - q10u16 = vmlsl_u8(q10u16, d29u8, d1u8); - q12u16 = vmlsl_u8(q12u16, d30u8, d1u8); - d28u8 = vext_u8(d7u8, d8u8, 1); - d29u8 = vext_u8(d10u8, d11u8, 1); - d30u8 = vext_u8(d13u8, d14u8, 1); - q9u16 = vmlsl_u8(q9u16, d28u8, d1u8); - q11u16 = vmlsl_u8(q11u16, d29u8, d1u8); - q13u16 = vmlsl_u8(q13u16, d30u8, d1u8); - - d28u8 = vext_u8(d6u8, d7u8, 4); - d29u8 = vext_u8(d9u8, d10u8, 4); - d30u8 = vext_u8(d12u8, d13u8, 4); - q8u16 = vmlsl_u8(q8u16, d28u8, d4u8); - q10u16 = vmlsl_u8(q10u16, d29u8, d4u8); - q12u16 = vmlsl_u8(q12u16, d30u8, d4u8); - d28u8 = vext_u8(d7u8, d8u8, 4); - d29u8 = vext_u8(d10u8, d11u8, 4); - d30u8 = vext_u8(d13u8, d14u8, 4); - q9u16 = vmlsl_u8(q9u16, d28u8, d4u8); - q11u16 = vmlsl_u8(q11u16, d29u8, d4u8); - q13u16 = vmlsl_u8(q13u16, d30u8, d4u8); - - d28u8 = vext_u8(d6u8, d7u8, 5); - d29u8 = vext_u8(d9u8, d10u8, 5); - d30u8 = vext_u8(d12u8, d13u8, 5); - q8u16 = vmlal_u8(q8u16, d28u8, d5u8); - q10u16 = vmlal_u8(q10u16, d29u8, d5u8); - q12u16 = vmlal_u8(q12u16, d30u8, d5u8); - d28u8 = vext_u8(d7u8, d8u8, 5); - d29u8 = vext_u8(d10u8, d11u8, 5); - d30u8 = vext_u8(d13u8, d14u8, 5); - q9u16 = vmlal_u8(q9u16, d28u8, d5u8); - q11u16 = vmlal_u8(q11u16, d29u8, d5u8); - q13u16 = vmlal_u8(q13u16, d30u8, d5u8); - - d28u8 = vext_u8(d6u8, d7u8, 2); - d29u8 = vext_u8(d9u8, d10u8, 2); - d30u8 = vext_u8(d12u8, d13u8, 2); - q8u16 = vmlal_u8(q8u16, d28u8, d2u8); - q10u16 = vmlal_u8(q10u16, d29u8, d2u8); - q12u16 = vmlal_u8(q12u16, d30u8, d2u8); - d28u8 = vext_u8(d7u8, d8u8, 2); - d29u8 = vext_u8(d10u8, d11u8, 2); - d30u8 = vext_u8(d13u8, d14u8, 2); - q9u16 = vmlal_u8(q9u16, d28u8, d2u8); - q11u16 = vmlal_u8(q11u16, d29u8, d2u8); - q13u16 = vmlal_u8(q13u16, d30u8, d2u8); - - d28u8 = vext_u8(d6u8, d7u8, 3); - d29u8 = vext_u8(d9u8, d10u8, 3); - d30u8 = vext_u8(d12u8, d13u8, 3); - d15u8 = vext_u8(d7u8, d8u8, 3); - d31u8 = vext_u8(d10u8, d11u8, 3); - d6u8 = vext_u8(d13u8, d14u8, 3); - q4u16 = vmull_u8(d28u8, d3u8); - q5u16 = vmull_u8(d29u8, d3u8); - q6u16 = vmull_u8(d30u8, d3u8); - q4s16 = vreinterpretq_s16_u16(q4u16); - q5s16 = vreinterpretq_s16_u16(q5u16); - q6s16 = vreinterpretq_s16_u16(q6u16); - q8s16 = vreinterpretq_s16_u16(q8u16); - q10s16 = vreinterpretq_s16_u16(q10u16); - q12s16 = vreinterpretq_s16_u16(q12u16); - q8s16 = vqaddq_s16(q8s16, q4s16); - q10s16 = vqaddq_s16(q10s16, q5s16); - q12s16 = vqaddq_s16(q12s16, q6s16); - - q6u16 = vmull_u8(d15u8, d3u8); - q7u16 = vmull_u8(d31u8, d3u8); - q3u16 = vmull_u8(d6u8, d3u8); - q3s16 = vreinterpretq_s16_u16(q3u16); - q6s16 = vreinterpretq_s16_u16(q6u16); - q7s16 = vreinterpretq_s16_u16(q7u16); - q9s16 = vreinterpretq_s16_u16(q9u16); - q11s16 = vreinterpretq_s16_u16(q11u16); - q13s16 = vreinterpretq_s16_u16(q13u16); - q9s16 = vqaddq_s16(q9s16, q6s16); - q11s16 = vqaddq_s16(q11s16, q7s16); - q13s16 = vqaddq_s16(q13s16, q3s16); - - d6u8 = vqrshrun_n_s16(q8s16, 7); - d7u8 = vqrshrun_n_s16(q9s16, 7); - d8u8 = vqrshrun_n_s16(q10s16, 7); - d9u8 = vqrshrun_n_s16(q11s16, 7); - d10u8 = vqrshrun_n_s16(q12s16, 7); - d11u8 = vqrshrun_n_s16(q13s16, 7); - - vst1_u8(tmpp, d6u8); - tmpp += 8; - vst1_u8(tmpp, d7u8); - tmpp += 8; - vst1_u8(tmpp, d8u8); - tmpp += 8; - vst1_u8(tmpp, d9u8); - tmpp += 8; - vst1_u8(tmpp, d10u8); - tmpp += 8; - vst1_u8(tmpp, d11u8); - tmpp += 8; - } - - // Second pass: 16x16 - dtmps8 = vld1_s8(vp8_sub_pel_filters[yoffset]); - d0s8 = vdup_lane_s8(dtmps8, 0); - d1s8 = vdup_lane_s8(dtmps8, 1); - d2s8 = vdup_lane_s8(dtmps8, 2); - d3s8 = vdup_lane_s8(dtmps8, 3); - d4s8 = vdup_lane_s8(dtmps8, 4); - d5s8 = vdup_lane_s8(dtmps8, 5); - d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8)); - d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8)); - d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8)); - d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8)); - d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8)); - d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8)); - - for (i = 0; i < 2; i++) { - dst = dst_ptr + 8 * i; - tmpp = tmp + 8 * i; - d18u8 = vld1_u8(tmpp); - tmpp += 16; - d19u8 = vld1_u8(tmpp); - tmpp += 16; - d20u8 = vld1_u8(tmpp); - tmpp += 16; - d21u8 = vld1_u8(tmpp); - tmpp += 16; - d22u8 = vld1_u8(tmpp); - tmpp += 16; - for (j = 0; j < 4; j++) { - d23u8 = vld1_u8(tmpp); - tmpp += 16; - d24u8 = vld1_u8(tmpp); - tmpp += 16; - d25u8 = vld1_u8(tmpp); - tmpp += 16; - d26u8 = vld1_u8(tmpp); - tmpp += 16; - - q3u16 = vmull_u8(d18u8, d0u8); - q4u16 = vmull_u8(d19u8, d0u8); - q5u16 = vmull_u8(d20u8, d0u8); - q6u16 = vmull_u8(d21u8, d0u8); - - q3u16 = vmlsl_u8(q3u16, d19u8, d1u8); - q4u16 = vmlsl_u8(q4u16, d20u8, d1u8); - q5u16 = vmlsl_u8(q5u16, d21u8, d1u8); - q6u16 = vmlsl_u8(q6u16, d22u8, d1u8); - - q3u16 = vmlsl_u8(q3u16, d22u8, d4u8); - q4u16 = vmlsl_u8(q4u16, d23u8, d4u8); - q5u16 = vmlsl_u8(q5u16, d24u8, d4u8); - q6u16 = vmlsl_u8(q6u16, d25u8, d4u8); - - q3u16 = vmlal_u8(q3u16, d20u8, d2u8); - q4u16 = vmlal_u8(q4u16, d21u8, d2u8); - q5u16 = vmlal_u8(q5u16, d22u8, d2u8); - q6u16 = vmlal_u8(q6u16, d23u8, d2u8); - - q3u16 = vmlal_u8(q3u16, d23u8, d5u8); - q4u16 = vmlal_u8(q4u16, d24u8, d5u8); - q5u16 = vmlal_u8(q5u16, d25u8, d5u8); - q6u16 = vmlal_u8(q6u16, d26u8, d5u8); - - q7u16 = vmull_u8(d21u8, d3u8); - q8u16 = vmull_u8(d22u8, d3u8); - q9u16 = vmull_u8(d23u8, d3u8); - q10u16 = vmull_u8(d24u8, d3u8); - - q3s16 = vreinterpretq_s16_u16(q3u16); - q4s16 = vreinterpretq_s16_u16(q4u16); - q5s16 = vreinterpretq_s16_u16(q5u16); - q6s16 = vreinterpretq_s16_u16(q6u16); - q7s16 = vreinterpretq_s16_u16(q7u16); - q8s16 = vreinterpretq_s16_u16(q8u16); - q9s16 = vreinterpretq_s16_u16(q9u16); - q10s16 = vreinterpretq_s16_u16(q10u16); - - q7s16 = vqaddq_s16(q7s16, q3s16); - q8s16 = vqaddq_s16(q8s16, q4s16); - q9s16 = vqaddq_s16(q9s16, q5s16); - q10s16 = vqaddq_s16(q10s16, q6s16); - - d6u8 = vqrshrun_n_s16(q7s16, 7); - d7u8 = vqrshrun_n_s16(q8s16, 7); - d8u8 = vqrshrun_n_s16(q9s16, 7); - d9u8 = vqrshrun_n_s16(q10s16, 7); - - d18u8 = d22u8; - d19u8 = d23u8; - d20u8 = d24u8; - d21u8 = d25u8; - d22u8 = d26u8; - - vst1_u8(dst, d6u8); - dst += dst_pitch; - vst1_u8(dst, d7u8); - dst += dst_pitch; - vst1_u8(dst, d8u8); - dst += dst_pitch; - vst1_u8(dst, d9u8); - dst += dst_pitch; - } - } - return; -} diff --git a/media/libvpx/vp8/common/arm/neon/variance_neon.c b/media/libvpx/vp8/common/arm/neon/variance_neon.c deleted file mode 100644 index 1b1979073e5..00000000000 --- a/media/libvpx/vp8/common/arm/neon/variance_neon.c +++ /dev/null @@ -1,320 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include -#include "vpx_ports/mem.h" - -unsigned int vp8_variance16x16_neon( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) { - int i; - int16x4_t d22s16, d23s16, d24s16, d25s16, d26s16, d27s16, d28s16, d29s16; - uint32x2_t d0u32, d10u32; - int64x1_t d0s64, d1s64; - uint8x16_t q0u8, q1u8, q2u8, q3u8; - uint16x8_t q11u16, q12u16, q13u16, q14u16; - int32x4_t q8s32, q9s32, q10s32; - int64x2_t q0s64, q1s64, q5s64; - - q8s32 = vdupq_n_s32(0); - q9s32 = vdupq_n_s32(0); - q10s32 = vdupq_n_s32(0); - - for (i = 0; i < 8; i++) { - q0u8 = vld1q_u8(src_ptr); - src_ptr += source_stride; - q1u8 = vld1q_u8(src_ptr); - src_ptr += source_stride; - __builtin_prefetch(src_ptr); - - q2u8 = vld1q_u8(ref_ptr); - ref_ptr += recon_stride; - q3u8 = vld1q_u8(ref_ptr); - ref_ptr += recon_stride; - __builtin_prefetch(ref_ptr); - - q11u16 = vsubl_u8(vget_low_u8(q0u8), vget_low_u8(q2u8)); - q12u16 = vsubl_u8(vget_high_u8(q0u8), vget_high_u8(q2u8)); - q13u16 = vsubl_u8(vget_low_u8(q1u8), vget_low_u8(q3u8)); - q14u16 = vsubl_u8(vget_high_u8(q1u8), vget_high_u8(q3u8)); - - d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16)); - d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16)); - q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q11u16)); - q9s32 = vmlal_s16(q9s32, d22s16, d22s16); - q10s32 = vmlal_s16(q10s32, d23s16, d23s16); - - d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16)); - d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16)); - q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q12u16)); - q9s32 = vmlal_s16(q9s32, d24s16, d24s16); - q10s32 = vmlal_s16(q10s32, d25s16, d25s16); - - d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16)); - d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16)); - q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q13u16)); - q9s32 = vmlal_s16(q9s32, d26s16, d26s16); - q10s32 = vmlal_s16(q10s32, d27s16, d27s16); - - d28s16 = vreinterpret_s16_u16(vget_low_u16(q14u16)); - d29s16 = vreinterpret_s16_u16(vget_high_u16(q14u16)); - q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q14u16)); - q9s32 = vmlal_s16(q9s32, d28s16, d28s16); - q10s32 = vmlal_s16(q10s32, d29s16, d29s16); - } - - q10s32 = vaddq_s32(q10s32, q9s32); - q0s64 = vpaddlq_s32(q8s32); - q1s64 = vpaddlq_s32(q10s32); - - d0s64 = vadd_s64(vget_low_s64(q0s64), vget_high_s64(q0s64)); - d1s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64)); - - q5s64 = vmull_s32(vreinterpret_s32_s64(d0s64), - vreinterpret_s32_s64(d0s64)); - vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d1s64), 0); - - d10u32 = vshr_n_u32(vreinterpret_u32_s64(vget_low_s64(q5s64)), 8); - d0u32 = vsub_u32(vreinterpret_u32_s64(d1s64), d10u32); - - return vget_lane_u32(d0u32, 0); -} - -unsigned int vp8_variance16x8_neon( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) { - int i; - int16x4_t d22s16, d23s16, d24s16, d25s16, d26s16, d27s16, d28s16, d29s16; - uint32x2_t d0u32, d10u32; - int64x1_t d0s64, d1s64; - uint8x16_t q0u8, q1u8, q2u8, q3u8; - uint16x8_t q11u16, q12u16, q13u16, q14u16; - int32x4_t q8s32, q9s32, q10s32; - int64x2_t q0s64, q1s64, q5s64; - - q8s32 = vdupq_n_s32(0); - q9s32 = vdupq_n_s32(0); - q10s32 = vdupq_n_s32(0); - - for (i = 0; i < 4; i++) { // variance16x8_neon_loop - q0u8 = vld1q_u8(src_ptr); - src_ptr += source_stride; - q1u8 = vld1q_u8(src_ptr); - src_ptr += source_stride; - __builtin_prefetch(src_ptr); - - q2u8 = vld1q_u8(ref_ptr); - ref_ptr += recon_stride; - q3u8 = vld1q_u8(ref_ptr); - ref_ptr += recon_stride; - __builtin_prefetch(ref_ptr); - - q11u16 = vsubl_u8(vget_low_u8(q0u8), vget_low_u8(q2u8)); - q12u16 = vsubl_u8(vget_high_u8(q0u8), vget_high_u8(q2u8)); - q13u16 = vsubl_u8(vget_low_u8(q1u8), vget_low_u8(q3u8)); - q14u16 = vsubl_u8(vget_high_u8(q1u8), vget_high_u8(q3u8)); - - d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16)); - d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16)); - q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q11u16)); - q9s32 = vmlal_s16(q9s32, d22s16, d22s16); - q10s32 = vmlal_s16(q10s32, d23s16, d23s16); - - d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16)); - d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16)); - q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q12u16)); - q9s32 = vmlal_s16(q9s32, d24s16, d24s16); - q10s32 = vmlal_s16(q10s32, d25s16, d25s16); - - d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16)); - d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16)); - q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q13u16)); - q9s32 = vmlal_s16(q9s32, d26s16, d26s16); - q10s32 = vmlal_s16(q10s32, d27s16, d27s16); - - d28s16 = vreinterpret_s16_u16(vget_low_u16(q14u16)); - d29s16 = vreinterpret_s16_u16(vget_high_u16(q14u16)); - q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q14u16)); - q9s32 = vmlal_s16(q9s32, d28s16, d28s16); - q10s32 = vmlal_s16(q10s32, d29s16, d29s16); - } - - q10s32 = vaddq_s32(q10s32, q9s32); - q0s64 = vpaddlq_s32(q8s32); - q1s64 = vpaddlq_s32(q10s32); - - d0s64 = vadd_s64(vget_low_s64(q0s64), vget_high_s64(q0s64)); - d1s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64)); - - q5s64 = vmull_s32(vreinterpret_s32_s64(d0s64), - vreinterpret_s32_s64(d0s64)); - vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d1s64), 0); - - d10u32 = vshr_n_u32(vreinterpret_u32_s64(vget_low_s64(q5s64)), 7); - d0u32 = vsub_u32(vreinterpret_u32_s64(d1s64), d10u32); - - return vget_lane_u32(d0u32, 0); -} - -unsigned int vp8_variance8x16_neon( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) { - int i; - uint8x8_t d0u8, d2u8, d4u8, d6u8; - int16x4_t d22s16, d23s16, d24s16, d25s16; - uint32x2_t d0u32, d10u32; - int64x1_t d0s64, d1s64; - uint16x8_t q11u16, q12u16; - int32x4_t q8s32, q9s32, q10s32; - int64x2_t q0s64, q1s64, q5s64; - - q8s32 = vdupq_n_s32(0); - q9s32 = vdupq_n_s32(0); - q10s32 = vdupq_n_s32(0); - - for (i = 0; i < 8; i++) { // variance8x16_neon_loop - d0u8 = vld1_u8(src_ptr); - src_ptr += source_stride; - d2u8 = vld1_u8(src_ptr); - src_ptr += source_stride; - __builtin_prefetch(src_ptr); - - d4u8 = vld1_u8(ref_ptr); - ref_ptr += recon_stride; - d6u8 = vld1_u8(ref_ptr); - ref_ptr += recon_stride; - __builtin_prefetch(ref_ptr); - - q11u16 = vsubl_u8(d0u8, d4u8); - q12u16 = vsubl_u8(d2u8, d6u8); - - d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16)); - d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16)); - q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q11u16)); - q9s32 = vmlal_s16(q9s32, d22s16, d22s16); - q10s32 = vmlal_s16(q10s32, d23s16, d23s16); - - d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16)); - d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16)); - q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q12u16)); - q9s32 = vmlal_s16(q9s32, d24s16, d24s16); - q10s32 = vmlal_s16(q10s32, d25s16, d25s16); - } - - q10s32 = vaddq_s32(q10s32, q9s32); - q0s64 = vpaddlq_s32(q8s32); - q1s64 = vpaddlq_s32(q10s32); - - d0s64 = vadd_s64(vget_low_s64(q0s64), vget_high_s64(q0s64)); - d1s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64)); - - q5s64 = vmull_s32(vreinterpret_s32_s64(d0s64), - vreinterpret_s32_s64(d0s64)); - vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d1s64), 0); - - d10u32 = vshr_n_u32(vreinterpret_u32_s64(vget_low_s64(q5s64)), 7); - d0u32 = vsub_u32(vreinterpret_u32_s64(d1s64), d10u32); - - return vget_lane_u32(d0u32, 0); -} - -unsigned int vp8_variance8x8_neon( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) { - int i; - uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8; - int16x4_t d22s16, d23s16, d24s16, d25s16, d26s16, d27s16, d28s16, d29s16; - uint32x2_t d0u32, d10u32; - int64x1_t d0s64, d1s64; - uint16x8_t q11u16, q12u16, q13u16, q14u16; - int32x4_t q8s32, q9s32, q10s32; - int64x2_t q0s64, q1s64, q5s64; - - q8s32 = vdupq_n_s32(0); - q9s32 = vdupq_n_s32(0); - q10s32 = vdupq_n_s32(0); - - for (i = 0; i < 2; i++) { // variance8x8_neon_loop - d0u8 = vld1_u8(src_ptr); - src_ptr += source_stride; - d1u8 = vld1_u8(src_ptr); - src_ptr += source_stride; - d2u8 = vld1_u8(src_ptr); - src_ptr += source_stride; - d3u8 = vld1_u8(src_ptr); - src_ptr += source_stride; - - d4u8 = vld1_u8(ref_ptr); - ref_ptr += recon_stride; - d5u8 = vld1_u8(ref_ptr); - ref_ptr += recon_stride; - d6u8 = vld1_u8(ref_ptr); - ref_ptr += recon_stride; - d7u8 = vld1_u8(ref_ptr); - ref_ptr += recon_stride; - - q11u16 = vsubl_u8(d0u8, d4u8); - q12u16 = vsubl_u8(d1u8, d5u8); - q13u16 = vsubl_u8(d2u8, d6u8); - q14u16 = vsubl_u8(d3u8, d7u8); - - d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16)); - d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16)); - q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q11u16)); - q9s32 = vmlal_s16(q9s32, d22s16, d22s16); - q10s32 = vmlal_s16(q10s32, d23s16, d23s16); - - d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16)); - d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16)); - q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q12u16)); - q9s32 = vmlal_s16(q9s32, d24s16, d24s16); - q10s32 = vmlal_s16(q10s32, d25s16, d25s16); - - d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16)); - d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16)); - q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q13u16)); - q9s32 = vmlal_s16(q9s32, d26s16, d26s16); - q10s32 = vmlal_s16(q10s32, d27s16, d27s16); - - d28s16 = vreinterpret_s16_u16(vget_low_u16(q14u16)); - d29s16 = vreinterpret_s16_u16(vget_high_u16(q14u16)); - q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q14u16)); - q9s32 = vmlal_s16(q9s32, d28s16, d28s16); - q10s32 = vmlal_s16(q10s32, d29s16, d29s16); - } - - q10s32 = vaddq_s32(q10s32, q9s32); - q0s64 = vpaddlq_s32(q8s32); - q1s64 = vpaddlq_s32(q10s32); - - d0s64 = vadd_s64(vget_low_s64(q0s64), vget_high_s64(q0s64)); - d1s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64)); - - q5s64 = vmull_s32(vreinterpret_s32_s64(d0s64), - vreinterpret_s32_s64(d0s64)); - vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d1s64), 0); - - d10u32 = vshr_n_u32(vreinterpret_u32_s64(vget_low_s64(q5s64)), 6); - d0u32 = vsub_u32(vreinterpret_u32_s64(d1s64), d10u32); - - return vget_lane_u32(d0u32, 0); -} diff --git a/media/libvpx/vp8/common/arm/neon/vp8_subpixelvariance_neon.c b/media/libvpx/vp8/common/arm/neon/vp8_subpixelvariance_neon.c deleted file mode 100644 index 8308d555b37..00000000000 --- a/media/libvpx/vp8/common/arm/neon/vp8_subpixelvariance_neon.c +++ /dev/null @@ -1,1024 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include -#include "vpx_ports/mem.h" -#include "vpx/vpx_integer.h" - -static const uint16_t bilinear_taps_coeff[8][2] = { - {128, 0}, - {112, 16}, - { 96, 32}, - { 80, 48}, - { 64, 64}, - { 48, 80}, - { 32, 96}, - { 16, 112} -}; - -unsigned int vp8_sub_pixel_variance16x16_neon_func( - const unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - const unsigned char *dst_ptr, - int dst_pixels_per_line, - unsigned int *sse) { - int i; - DECLARE_ALIGNED_ARRAY(16, unsigned char, tmp, 528); - unsigned char *tmpp; - unsigned char *tmpp2; - uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8, d8u8, d9u8; - uint8x8_t d10u8, d11u8, d12u8, d13u8, d14u8, d15u8, d16u8, d17u8, d18u8; - uint8x8_t d19u8, d20u8, d21u8; - int16x4_t d22s16, d23s16, d24s16, d25s16, d26s16, d27s16, d28s16, d29s16; - uint32x2_t d0u32, d10u32; - int64x1_t d0s64, d1s64, d2s64, d3s64; - uint8x16_t q0u8, q1u8, q2u8, q3u8, q4u8, q5u8, q6u8, q7u8, q8u8, q9u8; - uint8x16_t q10u8, q11u8, q12u8, q13u8, q14u8, q15u8; - uint16x8_t q1u16, q2u16, q3u16, q4u16, q5u16, q6u16, q7u16, q8u16; - uint16x8_t q9u16, q10u16, q11u16, q12u16, q13u16, q14u16; - int32x4_t q8s32, q9s32, q10s32; - int64x2_t q0s64, q1s64, q5s64; - - tmpp2 = tmp + 272; - tmpp = tmp; - if (xoffset == 0) { // secondpass_bfilter16x16_only - d0u8 = vdup_n_u8(bilinear_taps_coeff[yoffset][0]); - d1u8 = vdup_n_u8(bilinear_taps_coeff[yoffset][1]); - - q11u8 = vld1q_u8(src_ptr); - src_ptr += src_pixels_per_line; - for (i = 4; i > 0; i--) { - q12u8 = vld1q_u8(src_ptr); - src_ptr += src_pixels_per_line; - q13u8 = vld1q_u8(src_ptr); - src_ptr += src_pixels_per_line; - q14u8 = vld1q_u8(src_ptr); - src_ptr += src_pixels_per_line; - q15u8 = vld1q_u8(src_ptr); - src_ptr += src_pixels_per_line; - - __builtin_prefetch(src_ptr); - __builtin_prefetch(src_ptr + src_pixels_per_line); - __builtin_prefetch(src_ptr + src_pixels_per_line * 2); - - q1u16 = vmull_u8(vget_low_u8(q11u8), d0u8); - q2u16 = vmull_u8(vget_high_u8(q11u8), d0u8); - q3u16 = vmull_u8(vget_low_u8(q12u8), d0u8); - q4u16 = vmull_u8(vget_high_u8(q12u8), d0u8); - q5u16 = vmull_u8(vget_low_u8(q13u8), d0u8); - q6u16 = vmull_u8(vget_high_u8(q13u8), d0u8); - q7u16 = vmull_u8(vget_low_u8(q14u8), d0u8); - q8u16 = vmull_u8(vget_high_u8(q14u8), d0u8); - - q1u16 = vmlal_u8(q1u16, vget_low_u8(q12u8), d1u8); - q2u16 = vmlal_u8(q2u16, vget_high_u8(q12u8), d1u8); - q3u16 = vmlal_u8(q3u16, vget_low_u8(q13u8), d1u8); - q4u16 = vmlal_u8(q4u16, vget_high_u8(q13u8), d1u8); - q5u16 = vmlal_u8(q5u16, vget_low_u8(q14u8), d1u8); - q6u16 = vmlal_u8(q6u16, vget_high_u8(q14u8), d1u8); - q7u16 = vmlal_u8(q7u16, vget_low_u8(q15u8), d1u8); - q8u16 = vmlal_u8(q8u16, vget_high_u8(q15u8), d1u8); - - d2u8 = vqrshrn_n_u16(q1u16, 7); - d3u8 = vqrshrn_n_u16(q2u16, 7); - d4u8 = vqrshrn_n_u16(q3u16, 7); - d5u8 = vqrshrn_n_u16(q4u16, 7); - d6u8 = vqrshrn_n_u16(q5u16, 7); - d7u8 = vqrshrn_n_u16(q6u16, 7); - d8u8 = vqrshrn_n_u16(q7u16, 7); - d9u8 = vqrshrn_n_u16(q8u16, 7); - - q1u8 = vcombine_u8(d2u8, d3u8); - q2u8 = vcombine_u8(d4u8, d5u8); - q3u8 = vcombine_u8(d6u8, d7u8); - q4u8 = vcombine_u8(d8u8, d9u8); - - q11u8 = q15u8; - - vst1q_u8((uint8_t *)tmpp2, q1u8); - tmpp2 += 16; - vst1q_u8((uint8_t *)tmpp2, q2u8); - tmpp2 += 16; - vst1q_u8((uint8_t *)tmpp2, q3u8); - tmpp2 += 16; - vst1q_u8((uint8_t *)tmpp2, q4u8); - tmpp2 += 16; - } - } else if (yoffset == 0) { // firstpass_bfilter16x16_only - d0u8 = vdup_n_u8(bilinear_taps_coeff[xoffset][0]); - d1u8 = vdup_n_u8(bilinear_taps_coeff[xoffset][1]); - - for (i = 4; i > 0 ; i--) { - d2u8 = vld1_u8(src_ptr); - d3u8 = vld1_u8(src_ptr + 8); - d4u8 = vld1_u8(src_ptr + 16); - src_ptr += src_pixels_per_line; - d5u8 = vld1_u8(src_ptr); - d6u8 = vld1_u8(src_ptr + 8); - d7u8 = vld1_u8(src_ptr + 16); - src_ptr += src_pixels_per_line; - d8u8 = vld1_u8(src_ptr); - d9u8 = vld1_u8(src_ptr + 8); - d10u8 = vld1_u8(src_ptr + 16); - src_ptr += src_pixels_per_line; - d11u8 = vld1_u8(src_ptr); - d12u8 = vld1_u8(src_ptr + 8); - d13u8 = vld1_u8(src_ptr + 16); - src_ptr += src_pixels_per_line; - - __builtin_prefetch(src_ptr); - __builtin_prefetch(src_ptr + src_pixels_per_line); - __builtin_prefetch(src_ptr + src_pixels_per_line * 2); - - q7u16 = vmull_u8(d2u8, d0u8); - q8u16 = vmull_u8(d3u8, d0u8); - q9u16 = vmull_u8(d5u8, d0u8); - q10u16 = vmull_u8(d6u8, d0u8); - q11u16 = vmull_u8(d8u8, d0u8); - q12u16 = vmull_u8(d9u8, d0u8); - q13u16 = vmull_u8(d11u8, d0u8); - q14u16 = vmull_u8(d12u8, d0u8); - - d2u8 = vext_u8(d2u8, d3u8, 1); - d5u8 = vext_u8(d5u8, d6u8, 1); - d8u8 = vext_u8(d8u8, d9u8, 1); - d11u8 = vext_u8(d11u8, d12u8, 1); - - q7u16 = vmlal_u8(q7u16, d2u8, d1u8); - q9u16 = vmlal_u8(q9u16, d5u8, d1u8); - q11u16 = vmlal_u8(q11u16, d8u8, d1u8); - q13u16 = vmlal_u8(q13u16, d11u8, d1u8); - - d3u8 = vext_u8(d3u8, d4u8, 1); - d6u8 = vext_u8(d6u8, d7u8, 1); - d9u8 = vext_u8(d9u8, d10u8, 1); - d12u8 = vext_u8(d12u8, d13u8, 1); - - q8u16 = vmlal_u8(q8u16, d3u8, d1u8); - q10u16 = vmlal_u8(q10u16, d6u8, d1u8); - q12u16 = vmlal_u8(q12u16, d9u8, d1u8); - q14u16 = vmlal_u8(q14u16, d12u8, d1u8); - - d14u8 = vqrshrn_n_u16(q7u16, 7); - d15u8 = vqrshrn_n_u16(q8u16, 7); - d16u8 = vqrshrn_n_u16(q9u16, 7); - d17u8 = vqrshrn_n_u16(q10u16, 7); - d18u8 = vqrshrn_n_u16(q11u16, 7); - d19u8 = vqrshrn_n_u16(q12u16, 7); - d20u8 = vqrshrn_n_u16(q13u16, 7); - d21u8 = vqrshrn_n_u16(q14u16, 7); - - q7u8 = vcombine_u8(d14u8, d15u8); - q8u8 = vcombine_u8(d16u8, d17u8); - q9u8 = vcombine_u8(d18u8, d19u8); - q10u8 = vcombine_u8(d20u8, d21u8); - - vst1q_u8((uint8_t *)tmpp2, q7u8); - tmpp2 += 16; - vst1q_u8((uint8_t *)tmpp2, q8u8); - tmpp2 += 16; - vst1q_u8((uint8_t *)tmpp2, q9u8); - tmpp2 += 16; - vst1q_u8((uint8_t *)tmpp2, q10u8); - tmpp2 += 16; - } - } else { - d0u8 = vdup_n_u8(bilinear_taps_coeff[xoffset][0]); - d1u8 = vdup_n_u8(bilinear_taps_coeff[xoffset][1]); - - d2u8 = vld1_u8(src_ptr); - d3u8 = vld1_u8(src_ptr + 8); - d4u8 = vld1_u8(src_ptr + 16); - src_ptr += src_pixels_per_line; - d5u8 = vld1_u8(src_ptr); - d6u8 = vld1_u8(src_ptr + 8); - d7u8 = vld1_u8(src_ptr + 16); - src_ptr += src_pixels_per_line; - d8u8 = vld1_u8(src_ptr); - d9u8 = vld1_u8(src_ptr + 8); - d10u8 = vld1_u8(src_ptr + 16); - src_ptr += src_pixels_per_line; - d11u8 = vld1_u8(src_ptr); - d12u8 = vld1_u8(src_ptr + 8); - d13u8 = vld1_u8(src_ptr + 16); - src_ptr += src_pixels_per_line; - - // First Pass: output_height lines x output_width columns (17x16) - for (i = 3; i > 0; i--) { - q7u16 = vmull_u8(d2u8, d0u8); - q8u16 = vmull_u8(d3u8, d0u8); - q9u16 = vmull_u8(d5u8, d0u8); - q10u16 = vmull_u8(d6u8, d0u8); - q11u16 = vmull_u8(d8u8, d0u8); - q12u16 = vmull_u8(d9u8, d0u8); - q13u16 = vmull_u8(d11u8, d0u8); - q14u16 = vmull_u8(d12u8, d0u8); - - d2u8 = vext_u8(d2u8, d3u8, 1); - d5u8 = vext_u8(d5u8, d6u8, 1); - d8u8 = vext_u8(d8u8, d9u8, 1); - d11u8 = vext_u8(d11u8, d12u8, 1); - - q7u16 = vmlal_u8(q7u16, d2u8, d1u8); - q9u16 = vmlal_u8(q9u16, d5u8, d1u8); - q11u16 = vmlal_u8(q11u16, d8u8, d1u8); - q13u16 = vmlal_u8(q13u16, d11u8, d1u8); - - d3u8 = vext_u8(d3u8, d4u8, 1); - d6u8 = vext_u8(d6u8, d7u8, 1); - d9u8 = vext_u8(d9u8, d10u8, 1); - d12u8 = vext_u8(d12u8, d13u8, 1); - - q8u16 = vmlal_u8(q8u16, d3u8, d1u8); - q10u16 = vmlal_u8(q10u16, d6u8, d1u8); - q12u16 = vmlal_u8(q12u16, d9u8, d1u8); - q14u16 = vmlal_u8(q14u16, d12u8, d1u8); - - d14u8 = vqrshrn_n_u16(q7u16, 7); - d15u8 = vqrshrn_n_u16(q8u16, 7); - d16u8 = vqrshrn_n_u16(q9u16, 7); - d17u8 = vqrshrn_n_u16(q10u16, 7); - d18u8 = vqrshrn_n_u16(q11u16, 7); - d19u8 = vqrshrn_n_u16(q12u16, 7); - d20u8 = vqrshrn_n_u16(q13u16, 7); - d21u8 = vqrshrn_n_u16(q14u16, 7); - - d2u8 = vld1_u8(src_ptr); - d3u8 = vld1_u8(src_ptr + 8); - d4u8 = vld1_u8(src_ptr + 16); - src_ptr += src_pixels_per_line; - d5u8 = vld1_u8(src_ptr); - d6u8 = vld1_u8(src_ptr + 8); - d7u8 = vld1_u8(src_ptr + 16); - src_ptr += src_pixels_per_line; - d8u8 = vld1_u8(src_ptr); - d9u8 = vld1_u8(src_ptr + 8); - d10u8 = vld1_u8(src_ptr + 16); - src_ptr += src_pixels_per_line; - d11u8 = vld1_u8(src_ptr); - d12u8 = vld1_u8(src_ptr + 8); - d13u8 = vld1_u8(src_ptr + 16); - src_ptr += src_pixels_per_line; - - q7u8 = vcombine_u8(d14u8, d15u8); - q8u8 = vcombine_u8(d16u8, d17u8); - q9u8 = vcombine_u8(d18u8, d19u8); - q10u8 = vcombine_u8(d20u8, d21u8); - - vst1q_u8((uint8_t *)tmpp, q7u8); - tmpp += 16; - vst1q_u8((uint8_t *)tmpp, q8u8); - tmpp += 16; - vst1q_u8((uint8_t *)tmpp, q9u8); - tmpp += 16; - vst1q_u8((uint8_t *)tmpp, q10u8); - tmpp += 16; - } - - // First-pass filtering for rest 5 lines - d14u8 = vld1_u8(src_ptr); - d15u8 = vld1_u8(src_ptr + 8); - d16u8 = vld1_u8(src_ptr + 16); - src_ptr += src_pixels_per_line; - - q9u16 = vmull_u8(d2u8, d0u8); - q10u16 = vmull_u8(d3u8, d0u8); - q11u16 = vmull_u8(d5u8, d0u8); - q12u16 = vmull_u8(d6u8, d0u8); - q13u16 = vmull_u8(d8u8, d0u8); - q14u16 = vmull_u8(d9u8, d0u8); - - d2u8 = vext_u8(d2u8, d3u8, 1); - d5u8 = vext_u8(d5u8, d6u8, 1); - d8u8 = vext_u8(d8u8, d9u8, 1); - - q9u16 = vmlal_u8(q9u16, d2u8, d1u8); - q11u16 = vmlal_u8(q11u16, d5u8, d1u8); - q13u16 = vmlal_u8(q13u16, d8u8, d1u8); - - d3u8 = vext_u8(d3u8, d4u8, 1); - d6u8 = vext_u8(d6u8, d7u8, 1); - d9u8 = vext_u8(d9u8, d10u8, 1); - - q10u16 = vmlal_u8(q10u16, d3u8, d1u8); - q12u16 = vmlal_u8(q12u16, d6u8, d1u8); - q14u16 = vmlal_u8(q14u16, d9u8, d1u8); - - q1u16 = vmull_u8(d11u8, d0u8); - q2u16 = vmull_u8(d12u8, d0u8); - q3u16 = vmull_u8(d14u8, d0u8); - q4u16 = vmull_u8(d15u8, d0u8); - - d11u8 = vext_u8(d11u8, d12u8, 1); - d14u8 = vext_u8(d14u8, d15u8, 1); - - q1u16 = vmlal_u8(q1u16, d11u8, d1u8); - q3u16 = vmlal_u8(q3u16, d14u8, d1u8); - - d12u8 = vext_u8(d12u8, d13u8, 1); - d15u8 = vext_u8(d15u8, d16u8, 1); - - q2u16 = vmlal_u8(q2u16, d12u8, d1u8); - q4u16 = vmlal_u8(q4u16, d15u8, d1u8); - - d10u8 = vqrshrn_n_u16(q9u16, 7); - d11u8 = vqrshrn_n_u16(q10u16, 7); - d12u8 = vqrshrn_n_u16(q11u16, 7); - d13u8 = vqrshrn_n_u16(q12u16, 7); - d14u8 = vqrshrn_n_u16(q13u16, 7); - d15u8 = vqrshrn_n_u16(q14u16, 7); - d16u8 = vqrshrn_n_u16(q1u16, 7); - d17u8 = vqrshrn_n_u16(q2u16, 7); - d18u8 = vqrshrn_n_u16(q3u16, 7); - d19u8 = vqrshrn_n_u16(q4u16, 7); - - q5u8 = vcombine_u8(d10u8, d11u8); - q6u8 = vcombine_u8(d12u8, d13u8); - q7u8 = vcombine_u8(d14u8, d15u8); - q8u8 = vcombine_u8(d16u8, d17u8); - q9u8 = vcombine_u8(d18u8, d19u8); - - vst1q_u8((uint8_t *)tmpp, q5u8); - tmpp += 16; - vst1q_u8((uint8_t *)tmpp, q6u8); - tmpp += 16; - vst1q_u8((uint8_t *)tmpp, q7u8); - tmpp += 16; - vst1q_u8((uint8_t *)tmpp, q8u8); - tmpp += 16; - vst1q_u8((uint8_t *)tmpp, q9u8); - - // secondpass_filter - d0u8 = vdup_n_u8(bilinear_taps_coeff[yoffset][0]); - d1u8 = vdup_n_u8(bilinear_taps_coeff[yoffset][1]); - - tmpp = tmp; - tmpp2 = tmpp + 272; - q11u8 = vld1q_u8(tmpp); - tmpp += 16; - for (i = 4; i > 0; i--) { - q12u8 = vld1q_u8(tmpp); - tmpp += 16; - q13u8 = vld1q_u8(tmpp); - tmpp += 16; - q14u8 = vld1q_u8(tmpp); - tmpp += 16; - q15u8 = vld1q_u8(tmpp); - tmpp += 16; - - q1u16 = vmull_u8(vget_low_u8(q11u8), d0u8); - q2u16 = vmull_u8(vget_high_u8(q11u8), d0u8); - q3u16 = vmull_u8(vget_low_u8(q12u8), d0u8); - q4u16 = vmull_u8(vget_high_u8(q12u8), d0u8); - q5u16 = vmull_u8(vget_low_u8(q13u8), d0u8); - q6u16 = vmull_u8(vget_high_u8(q13u8), d0u8); - q7u16 = vmull_u8(vget_low_u8(q14u8), d0u8); - q8u16 = vmull_u8(vget_high_u8(q14u8), d0u8); - - q1u16 = vmlal_u8(q1u16, vget_low_u8(q12u8), d1u8); - q2u16 = vmlal_u8(q2u16, vget_high_u8(q12u8), d1u8); - q3u16 = vmlal_u8(q3u16, vget_low_u8(q13u8), d1u8); - q4u16 = vmlal_u8(q4u16, vget_high_u8(q13u8), d1u8); - q5u16 = vmlal_u8(q5u16, vget_low_u8(q14u8), d1u8); - q6u16 = vmlal_u8(q6u16, vget_high_u8(q14u8), d1u8); - q7u16 = vmlal_u8(q7u16, vget_low_u8(q15u8), d1u8); - q8u16 = vmlal_u8(q8u16, vget_high_u8(q15u8), d1u8); - - d2u8 = vqrshrn_n_u16(q1u16, 7); - d3u8 = vqrshrn_n_u16(q2u16, 7); - d4u8 = vqrshrn_n_u16(q3u16, 7); - d5u8 = vqrshrn_n_u16(q4u16, 7); - d6u8 = vqrshrn_n_u16(q5u16, 7); - d7u8 = vqrshrn_n_u16(q6u16, 7); - d8u8 = vqrshrn_n_u16(q7u16, 7); - d9u8 = vqrshrn_n_u16(q8u16, 7); - - q1u8 = vcombine_u8(d2u8, d3u8); - q2u8 = vcombine_u8(d4u8, d5u8); - q3u8 = vcombine_u8(d6u8, d7u8); - q4u8 = vcombine_u8(d8u8, d9u8); - - q11u8 = q15u8; - - vst1q_u8((uint8_t *)tmpp2, q1u8); - tmpp2 += 16; - vst1q_u8((uint8_t *)tmpp2, q2u8); - tmpp2 += 16; - vst1q_u8((uint8_t *)tmpp2, q3u8); - tmpp2 += 16; - vst1q_u8((uint8_t *)tmpp2, q4u8); - tmpp2 += 16; - } - } - - // sub_pixel_variance16x16_neon - q8s32 = vdupq_n_s32(0); - q9s32 = vdupq_n_s32(0); - q10s32 = vdupq_n_s32(0); - - tmpp = tmp + 272; - for (i = 0; i < 8; i++) { // sub_pixel_variance16x16_neon_loop - q0u8 = vld1q_u8(tmpp); - tmpp += 16; - q1u8 = vld1q_u8(tmpp); - tmpp += 16; - q2u8 = vld1q_u8(dst_ptr); - dst_ptr += dst_pixels_per_line; - q3u8 = vld1q_u8(dst_ptr); - dst_ptr += dst_pixels_per_line; - - d0u8 = vget_low_u8(q0u8); - d1u8 = vget_high_u8(q0u8); - d2u8 = vget_low_u8(q1u8); - d3u8 = vget_high_u8(q1u8); - - q11u16 = vsubl_u8(d0u8, vget_low_u8(q2u8)); - q12u16 = vsubl_u8(d1u8, vget_high_u8(q2u8)); - q13u16 = vsubl_u8(d2u8, vget_low_u8(q3u8)); - q14u16 = vsubl_u8(d3u8, vget_high_u8(q3u8)); - - d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16)); - d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16)); - q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q11u16)); - q9s32 = vmlal_s16(q9s32, d22s16, d22s16); - q10s32 = vmlal_s16(q10s32, d23s16, d23s16); - - d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16)); - d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16)); - q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q12u16)); - q9s32 = vmlal_s16(q9s32, d24s16, d24s16); - q10s32 = vmlal_s16(q10s32, d25s16, d25s16); - - d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16)); - d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16)); - q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q13u16)); - q9s32 = vmlal_s16(q9s32, d26s16, d26s16); - q10s32 = vmlal_s16(q10s32, d27s16, d27s16); - - d28s16 = vreinterpret_s16_u16(vget_low_u16(q14u16)); - d29s16 = vreinterpret_s16_u16(vget_high_u16(q14u16)); - q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q14u16)); - q9s32 = vmlal_s16(q9s32, d28s16, d28s16); - q10s32 = vmlal_s16(q10s32, d29s16, d29s16); - } - - q10s32 = vaddq_s32(q10s32, q9s32); - q0s64 = vpaddlq_s32(q8s32); - q1s64 = vpaddlq_s32(q10s32); - - d0s64 = vget_low_s64(q0s64); - d1s64 = vget_high_s64(q0s64); - d2s64 = vget_low_s64(q1s64); - d3s64 = vget_high_s64(q1s64); - d0s64 = vadd_s64(d0s64, d1s64); - d1s64 = vadd_s64(d2s64, d3s64); - - q5s64 = vmull_s32(vreinterpret_s32_s64(d0s64), - vreinterpret_s32_s64(d0s64)); - vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d1s64), 0); - - d10u32 = vshr_n_u32(vreinterpret_u32_s64(vget_low_s64(q5s64)), 8); - d0u32 = vsub_u32(vreinterpret_u32_s64(d1s64), d10u32); - - return vget_lane_u32(d0u32, 0); -} - -unsigned int vp8_variance_halfpixvar16x16_h_neon( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) { - int i; - uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8; - int16x4_t d0s16, d1s16, d2s16, d3s16, d4s16, d5s16, d6s16, d7s16; - int16x4_t d8s16, d9s16, d10s16, d11s16, d12s16, d13s16, d14s16, d15s16; - uint32x2_t d0u32, d10u32; - int64x1_t d0s64, d1s64, d2s64, d3s64; - uint8x16_t q0u8, q1u8, q2u8, q3u8, q4u8, q5u8, q6u8; - uint8x16_t q7u8, q11u8, q12u8, q13u8, q14u8; - uint16x8_t q0u16, q1u16, q2u16, q3u16, q4u16, q5u16, q6u16, q7u16; - int32x4_t q8s32, q9s32, q10s32; - int64x2_t q0s64, q1s64, q5s64; - - q8s32 = vdupq_n_s32(0); - q9s32 = vdupq_n_s32(0); - q10s32 = vdupq_n_s32(0); - - for (i = 0; i < 4; i++) { // vp8_filt_fpo16x16s_4_0_loop_neon - q0u8 = vld1q_u8(src_ptr); - q1u8 = vld1q_u8(src_ptr + 16); - src_ptr += source_stride; - q2u8 = vld1q_u8(src_ptr); - q3u8 = vld1q_u8(src_ptr + 16); - src_ptr += source_stride; - q4u8 = vld1q_u8(src_ptr); - q5u8 = vld1q_u8(src_ptr + 16); - src_ptr += source_stride; - q6u8 = vld1q_u8(src_ptr); - q7u8 = vld1q_u8(src_ptr + 16); - src_ptr += source_stride; - - q11u8 = vld1q_u8(ref_ptr); - ref_ptr += recon_stride; - q12u8 = vld1q_u8(ref_ptr); - ref_ptr += recon_stride; - q13u8 = vld1q_u8(ref_ptr); - ref_ptr += recon_stride; - q14u8 = vld1q_u8(ref_ptr); - ref_ptr += recon_stride; - - q1u8 = vextq_u8(q0u8, q1u8, 1); - q3u8 = vextq_u8(q2u8, q3u8, 1); - q5u8 = vextq_u8(q4u8, q5u8, 1); - q7u8 = vextq_u8(q6u8, q7u8, 1); - - q0u8 = vrhaddq_u8(q0u8, q1u8); - q1u8 = vrhaddq_u8(q2u8, q3u8); - q2u8 = vrhaddq_u8(q4u8, q5u8); - q3u8 = vrhaddq_u8(q6u8, q7u8); - - d0u8 = vget_low_u8(q0u8); - d1u8 = vget_high_u8(q0u8); - d2u8 = vget_low_u8(q1u8); - d3u8 = vget_high_u8(q1u8); - d4u8 = vget_low_u8(q2u8); - d5u8 = vget_high_u8(q2u8); - d6u8 = vget_low_u8(q3u8); - d7u8 = vget_high_u8(q3u8); - - q4u16 = vsubl_u8(d0u8, vget_low_u8(q11u8)); - q5u16 = vsubl_u8(d1u8, vget_high_u8(q11u8)); - q6u16 = vsubl_u8(d2u8, vget_low_u8(q12u8)); - q7u16 = vsubl_u8(d3u8, vget_high_u8(q12u8)); - q0u16 = vsubl_u8(d4u8, vget_low_u8(q13u8)); - q1u16 = vsubl_u8(d5u8, vget_high_u8(q13u8)); - q2u16 = vsubl_u8(d6u8, vget_low_u8(q14u8)); - q3u16 = vsubl_u8(d7u8, vget_high_u8(q14u8)); - - d8s16 = vreinterpret_s16_u16(vget_low_u16(q4u16)); - d9s16 = vreinterpret_s16_u16(vget_high_u16(q4u16)); - q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q4u16)); - q9s32 = vmlal_s16(q9s32, d8s16, d8s16); - q10s32 = vmlal_s16(q10s32, d9s16, d9s16); - d10s16 = vreinterpret_s16_u16(vget_low_u16(q5u16)); - d11s16 = vreinterpret_s16_u16(vget_high_u16(q5u16)); - q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q5u16)); - q9s32 = vmlal_s16(q9s32, d10s16, d10s16); - q10s32 = vmlal_s16(q10s32, d11s16, d11s16); - d12s16 = vreinterpret_s16_u16(vget_low_u16(q6u16)); - d13s16 = vreinterpret_s16_u16(vget_high_u16(q6u16)); - q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q6u16)); - q9s32 = vmlal_s16(q9s32, d12s16, d12s16); - q10s32 = vmlal_s16(q10s32, d13s16, d13s16); - d14s16 = vreinterpret_s16_u16(vget_low_u16(q7u16)); - d15s16 = vreinterpret_s16_u16(vget_high_u16(q7u16)); - q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q7u16)); - q9s32 = vmlal_s16(q9s32, d14s16, d14s16); - q10s32 = vmlal_s16(q10s32, d15s16, d15s16); - d0s16 = vreinterpret_s16_u16(vget_low_u16(q0u16)); - d1s16 = vreinterpret_s16_u16(vget_high_u16(q0u16)); - q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q0u16)); - q9s32 = vmlal_s16(q9s32, d0s16, d0s16); - q10s32 = vmlal_s16(q10s32, d1s16, d1s16); - d2s16 = vreinterpret_s16_u16(vget_low_u16(q1u16)); - d3s16 = vreinterpret_s16_u16(vget_high_u16(q1u16)); - q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q1u16)); - q9s32 = vmlal_s16(q9s32, d2s16, d2s16); - q10s32 = vmlal_s16(q10s32, d3s16, d3s16); - d4s16 = vreinterpret_s16_u16(vget_low_u16(q2u16)); - d5s16 = vreinterpret_s16_u16(vget_high_u16(q2u16)); - q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q2u16)); - q9s32 = vmlal_s16(q9s32, d4s16, d4s16); - q10s32 = vmlal_s16(q10s32, d5s16, d5s16); - d6s16 = vreinterpret_s16_u16(vget_low_u16(q3u16)); - d7s16 = vreinterpret_s16_u16(vget_high_u16(q3u16)); - q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q3u16)); - q9s32 = vmlal_s16(q9s32, d6s16, d6s16); - q10s32 = vmlal_s16(q10s32, d7s16, d7s16); - } - - q10s32 = vaddq_s32(q10s32, q9s32); - q0s64 = vpaddlq_s32(q8s32); - q1s64 = vpaddlq_s32(q10s32); - - d0s64 = vget_low_s64(q0s64); - d1s64 = vget_high_s64(q0s64); - d2s64 = vget_low_s64(q1s64); - d3s64 = vget_high_s64(q1s64); - d0s64 = vadd_s64(d0s64, d1s64); - d1s64 = vadd_s64(d2s64, d3s64); - - q5s64 = vmull_s32(vreinterpret_s32_s64(d0s64), - vreinterpret_s32_s64(d0s64)); - vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d1s64), 0); - - d10u32 = vshr_n_u32(vreinterpret_u32_s64(vget_low_s64(q5s64)), 8); - d0u32 = vsub_u32(vreinterpret_u32_s64(d1s64), d10u32); - - return vget_lane_u32(d0u32, 0); -} - -unsigned int vp8_variance_halfpixvar16x16_v_neon( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) { - int i; - uint8x8_t d0u8, d1u8, d4u8, d5u8, d8u8, d9u8, d12u8, d13u8; - int16x4_t d22s16, d23s16, d24s16, d25s16, d26s16, d27s16, d28s16, d29s16; - int16x4_t d0s16, d1s16, d2s16, d3s16, d4s16, d5s16, d6s16, d7s16; - uint32x2_t d0u32, d10u32; - int64x1_t d0s64, d1s64, d2s64, d3s64; - uint8x16_t q0u8, q1u8, q2u8, q3u8, q4u8, q5u8, q6u8, q7u8, q15u8; - uint16x8_t q0u16, q1u16, q2u16, q3u16, q11u16, q12u16, q13u16, q14u16; - int32x4_t q8s32, q9s32, q10s32; - int64x2_t q0s64, q1s64, q5s64; - - q8s32 = vdupq_n_s32(0); - q9s32 = vdupq_n_s32(0); - q10s32 = vdupq_n_s32(0); - - q0u8 = vld1q_u8(src_ptr); - src_ptr += source_stride; - for (i = 0; i < 4; i++) { // vp8_filt_fpo16x16s_4_0_loop_neon - q2u8 = vld1q_u8(src_ptr); - src_ptr += source_stride; - q4u8 = vld1q_u8(src_ptr); - src_ptr += source_stride; - q6u8 = vld1q_u8(src_ptr); - src_ptr += source_stride; - q15u8 = vld1q_u8(src_ptr); - src_ptr += source_stride; - - q1u8 = vld1q_u8(ref_ptr); - ref_ptr += recon_stride; - q3u8 = vld1q_u8(ref_ptr); - ref_ptr += recon_stride; - q5u8 = vld1q_u8(ref_ptr); - ref_ptr += recon_stride; - q7u8 = vld1q_u8(ref_ptr); - ref_ptr += recon_stride; - - q0u8 = vrhaddq_u8(q0u8, q2u8); - q2u8 = vrhaddq_u8(q2u8, q4u8); - q4u8 = vrhaddq_u8(q4u8, q6u8); - q6u8 = vrhaddq_u8(q6u8, q15u8); - - d0u8 = vget_low_u8(q0u8); - d1u8 = vget_high_u8(q0u8); - d4u8 = vget_low_u8(q2u8); - d5u8 = vget_high_u8(q2u8); - d8u8 = vget_low_u8(q4u8); - d9u8 = vget_high_u8(q4u8); - d12u8 = vget_low_u8(q6u8); - d13u8 = vget_high_u8(q6u8); - - q11u16 = vsubl_u8(d0u8, vget_low_u8(q1u8)); - q12u16 = vsubl_u8(d1u8, vget_high_u8(q1u8)); - q13u16 = vsubl_u8(d4u8, vget_low_u8(q3u8)); - q14u16 = vsubl_u8(d5u8, vget_high_u8(q3u8)); - q0u16 = vsubl_u8(d8u8, vget_low_u8(q5u8)); - q1u16 = vsubl_u8(d9u8, vget_high_u8(q5u8)); - q2u16 = vsubl_u8(d12u8, vget_low_u8(q7u8)); - q3u16 = vsubl_u8(d13u8, vget_high_u8(q7u8)); - - d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16)); - d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16)); - q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q11u16)); - q9s32 = vmlal_s16(q9s32, d22s16, d22s16); - q10s32 = vmlal_s16(q10s32, d23s16, d23s16); - d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16)); - d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16)); - q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q12u16)); - q9s32 = vmlal_s16(q9s32, d24s16, d24s16); - q10s32 = vmlal_s16(q10s32, d25s16, d25s16); - d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16)); - d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16)); - q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q13u16)); - q9s32 = vmlal_s16(q9s32, d26s16, d26s16); - q10s32 = vmlal_s16(q10s32, d27s16, d27s16); - d28s16 = vreinterpret_s16_u16(vget_low_u16(q14u16)); - d29s16 = vreinterpret_s16_u16(vget_high_u16(q14u16)); - q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q14u16)); - q9s32 = vmlal_s16(q9s32, d28s16, d28s16); - q10s32 = vmlal_s16(q10s32, d29s16, d29s16); - d0s16 = vreinterpret_s16_u16(vget_low_u16(q0u16)); - d1s16 = vreinterpret_s16_u16(vget_high_u16(q0u16)); - q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q0u16)); - q9s32 = vmlal_s16(q9s32, d0s16, d0s16); - q10s32 = vmlal_s16(q10s32, d1s16, d1s16); - d2s16 = vreinterpret_s16_u16(vget_low_u16(q1u16)); - d3s16 = vreinterpret_s16_u16(vget_high_u16(q1u16)); - q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q1u16)); - q9s32 = vmlal_s16(q9s32, d2s16, d2s16); - q10s32 = vmlal_s16(q10s32, d3s16, d3s16); - d4s16 = vreinterpret_s16_u16(vget_low_u16(q2u16)); - d5s16 = vreinterpret_s16_u16(vget_high_u16(q2u16)); - q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q2u16)); - q9s32 = vmlal_s16(q9s32, d4s16, d4s16); - q10s32 = vmlal_s16(q10s32, d5s16, d5s16); - d6s16 = vreinterpret_s16_u16(vget_low_u16(q3u16)); - d7s16 = vreinterpret_s16_u16(vget_high_u16(q3u16)); - q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q3u16)); - q9s32 = vmlal_s16(q9s32, d6s16, d6s16); - q10s32 = vmlal_s16(q10s32, d7s16, d7s16); - - q0u8 = q15u8; - } - - q10s32 = vaddq_s32(q10s32, q9s32); - q0s64 = vpaddlq_s32(q8s32); - q1s64 = vpaddlq_s32(q10s32); - - d0s64 = vget_low_s64(q0s64); - d1s64 = vget_high_s64(q0s64); - d2s64 = vget_low_s64(q1s64); - d3s64 = vget_high_s64(q1s64); - d0s64 = vadd_s64(d0s64, d1s64); - d1s64 = vadd_s64(d2s64, d3s64); - - q5s64 = vmull_s32(vreinterpret_s32_s64(d0s64), - vreinterpret_s32_s64(d0s64)); - vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d1s64), 0); - - d10u32 = vshr_n_u32(vreinterpret_u32_s64(vget_low_s64(q5s64)), 8); - d0u32 = vsub_u32(vreinterpret_u32_s64(d1s64), d10u32); - - return vget_lane_u32(d0u32, 0); -} - -unsigned int vp8_variance_halfpixvar16x16_hv_neon( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) { - int i; - uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8; - int16x4_t d0s16, d1s16, d2s16, d3s16, d10s16, d11s16, d12s16, d13s16; - int16x4_t d18s16, d19s16, d20s16, d21s16, d22s16, d23s16, d24s16, d25s16; - uint32x2_t d0u32, d10u32; - int64x1_t d0s64, d1s64, d2s64, d3s64; - uint8x16_t q0u8, q1u8, q2u8, q3u8, q4u8, q5u8, q6u8, q7u8, q8u8, q9u8; - uint16x8_t q0u16, q1u16, q5u16, q6u16, q9u16, q10u16, q11u16, q12u16; - int32x4_t q13s32, q14s32, q15s32; - int64x2_t q0s64, q1s64, q5s64; - - q13s32 = vdupq_n_s32(0); - q14s32 = vdupq_n_s32(0); - q15s32 = vdupq_n_s32(0); - - q0u8 = vld1q_u8(src_ptr); - q1u8 = vld1q_u8(src_ptr + 16); - src_ptr += source_stride; - q1u8 = vextq_u8(q0u8, q1u8, 1); - q0u8 = vrhaddq_u8(q0u8, q1u8); - for (i = 0; i < 4; i++) { // vp8_filt_fpo16x16s_4_0_loop_neon - q2u8 = vld1q_u8(src_ptr); - q3u8 = vld1q_u8(src_ptr + 16); - src_ptr += source_stride; - q4u8 = vld1q_u8(src_ptr); - q5u8 = vld1q_u8(src_ptr + 16); - src_ptr += source_stride; - q6u8 = vld1q_u8(src_ptr); - q7u8 = vld1q_u8(src_ptr + 16); - src_ptr += source_stride; - q8u8 = vld1q_u8(src_ptr); - q9u8 = vld1q_u8(src_ptr + 16); - src_ptr += source_stride; - - q3u8 = vextq_u8(q2u8, q3u8, 1); - q5u8 = vextq_u8(q4u8, q5u8, 1); - q7u8 = vextq_u8(q6u8, q7u8, 1); - q9u8 = vextq_u8(q8u8, q9u8, 1); - - q1u8 = vrhaddq_u8(q2u8, q3u8); - q2u8 = vrhaddq_u8(q4u8, q5u8); - q3u8 = vrhaddq_u8(q6u8, q7u8); - q4u8 = vrhaddq_u8(q8u8, q9u8); - q0u8 = vrhaddq_u8(q0u8, q1u8); - q1u8 = vrhaddq_u8(q1u8, q2u8); - q2u8 = vrhaddq_u8(q2u8, q3u8); - q3u8 = vrhaddq_u8(q3u8, q4u8); - - q5u8 = vld1q_u8(ref_ptr); - ref_ptr += recon_stride; - q6u8 = vld1q_u8(ref_ptr); - ref_ptr += recon_stride; - q7u8 = vld1q_u8(ref_ptr); - ref_ptr += recon_stride; - q8u8 = vld1q_u8(ref_ptr); - ref_ptr += recon_stride; - - d0u8 = vget_low_u8(q0u8); - d1u8 = vget_high_u8(q0u8); - d2u8 = vget_low_u8(q1u8); - d3u8 = vget_high_u8(q1u8); - d4u8 = vget_low_u8(q2u8); - d5u8 = vget_high_u8(q2u8); - d6u8 = vget_low_u8(q3u8); - d7u8 = vget_high_u8(q3u8); - - q9u16 = vsubl_u8(d0u8, vget_low_u8(q5u8)); - q10u16 = vsubl_u8(d1u8, vget_high_u8(q5u8)); - q11u16 = vsubl_u8(d2u8, vget_low_u8(q6u8)); - q12u16 = vsubl_u8(d3u8, vget_high_u8(q6u8)); - q0u16 = vsubl_u8(d4u8, vget_low_u8(q7u8)); - q1u16 = vsubl_u8(d5u8, vget_high_u8(q7u8)); - q5u16 = vsubl_u8(d6u8, vget_low_u8(q8u8)); - q6u16 = vsubl_u8(d7u8, vget_high_u8(q8u8)); - - d18s16 = vreinterpret_s16_u16(vget_low_u16(q9u16)); - d19s16 = vreinterpret_s16_u16(vget_high_u16(q9u16)); - q13s32 = vpadalq_s16(q13s32, vreinterpretq_s16_u16(q9u16)); - q14s32 = vmlal_s16(q14s32, d18s16, d18s16); - q15s32 = vmlal_s16(q15s32, d19s16, d19s16); - - d20s16 = vreinterpret_s16_u16(vget_low_u16(q10u16)); - d21s16 = vreinterpret_s16_u16(vget_high_u16(q10u16)); - q13s32 = vpadalq_s16(q13s32, vreinterpretq_s16_u16(q10u16)); - q14s32 = vmlal_s16(q14s32, d20s16, d20s16); - q15s32 = vmlal_s16(q15s32, d21s16, d21s16); - - d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16)); - d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16)); - q13s32 = vpadalq_s16(q13s32, vreinterpretq_s16_u16(q11u16)); - q14s32 = vmlal_s16(q14s32, d22s16, d22s16); - q15s32 = vmlal_s16(q15s32, d23s16, d23s16); - - d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16)); - d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16)); - q13s32 = vpadalq_s16(q13s32, vreinterpretq_s16_u16(q12u16)); - q14s32 = vmlal_s16(q14s32, d24s16, d24s16); - q15s32 = vmlal_s16(q15s32, d25s16, d25s16); - - d0s16 = vreinterpret_s16_u16(vget_low_u16(q0u16)); - d1s16 = vreinterpret_s16_u16(vget_high_u16(q0u16)); - q13s32 = vpadalq_s16(q13s32, vreinterpretq_s16_u16(q0u16)); - q14s32 = vmlal_s16(q14s32, d0s16, d0s16); - q15s32 = vmlal_s16(q15s32, d1s16, d1s16); - - d2s16 = vreinterpret_s16_u16(vget_low_u16(q1u16)); - d3s16 = vreinterpret_s16_u16(vget_high_u16(q1u16)); - q13s32 = vpadalq_s16(q13s32, vreinterpretq_s16_u16(q1u16)); - q14s32 = vmlal_s16(q14s32, d2s16, d2s16); - q15s32 = vmlal_s16(q15s32, d3s16, d3s16); - - d10s16 = vreinterpret_s16_u16(vget_low_u16(q5u16)); - d11s16 = vreinterpret_s16_u16(vget_high_u16(q5u16)); - q13s32 = vpadalq_s16(q13s32, vreinterpretq_s16_u16(q5u16)); - q14s32 = vmlal_s16(q14s32, d10s16, d10s16); - q15s32 = vmlal_s16(q15s32, d11s16, d11s16); - - d12s16 = vreinterpret_s16_u16(vget_low_u16(q6u16)); - d13s16 = vreinterpret_s16_u16(vget_high_u16(q6u16)); - q13s32 = vpadalq_s16(q13s32, vreinterpretq_s16_u16(q6u16)); - q14s32 = vmlal_s16(q14s32, d12s16, d12s16); - q15s32 = vmlal_s16(q15s32, d13s16, d13s16); - - q0u8 = q4u8; - } - - q15s32 = vaddq_s32(q14s32, q15s32); - q0s64 = vpaddlq_s32(q13s32); - q1s64 = vpaddlq_s32(q15s32); - - d0s64 = vget_low_s64(q0s64); - d1s64 = vget_high_s64(q0s64); - d2s64 = vget_low_s64(q1s64); - d3s64 = vget_high_s64(q1s64); - d0s64 = vadd_s64(d0s64, d1s64); - d1s64 = vadd_s64(d2s64, d3s64); - - q5s64 = vmull_s32(vreinterpret_s32_s64(d0s64), - vreinterpret_s32_s64(d0s64)); - vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d1s64), 0); - - d10u32 = vshr_n_u32(vreinterpret_u32_s64(vget_low_s64(q5s64)), 8); - d0u32 = vsub_u32(vreinterpret_u32_s64(d1s64), d10u32); - - return vget_lane_u32(d0u32, 0); -} - -enum { kWidth8 = 8 }; -enum { kHeight8 = 8 }; -enum { kHeight8PlusOne = 9 }; -enum { kPixelStepOne = 1 }; -enum { kAlign16 = 16 }; - -#define FILTER_BITS 7 - -static INLINE int horizontal_add_s16x8(const int16x8_t v_16x8) { - const int32x4_t a = vpaddlq_s16(v_16x8); - const int64x2_t b = vpaddlq_s32(a); - const int32x2_t c = vadd_s32(vreinterpret_s32_s64(vget_low_s64(b)), - vreinterpret_s32_s64(vget_high_s64(b))); - return vget_lane_s32(c, 0); -} - -static INLINE int horizontal_add_s32x4(const int32x4_t v_32x4) { - const int64x2_t b = vpaddlq_s32(v_32x4); - const int32x2_t c = vadd_s32(vreinterpret_s32_s64(vget_low_s64(b)), - vreinterpret_s32_s64(vget_high_s64(b))); - return vget_lane_s32(c, 0); -} - -static void variance_neon_w8(const uint8_t *a, int a_stride, - const uint8_t *b, int b_stride, - int w, int h, unsigned int *sse, int *sum) { - int i, j; - int16x8_t v_sum = vdupq_n_s16(0); - int32x4_t v_sse_lo = vdupq_n_s32(0); - int32x4_t v_sse_hi = vdupq_n_s32(0); - - for (i = 0; i < h; ++i) { - for (j = 0; j < w; j += 8) { - const uint8x8_t v_a = vld1_u8(&a[j]); - const uint8x8_t v_b = vld1_u8(&b[j]); - const uint16x8_t v_diff = vsubl_u8(v_a, v_b); - const int16x8_t sv_diff = vreinterpretq_s16_u16(v_diff); - v_sum = vaddq_s16(v_sum, sv_diff); - v_sse_lo = vmlal_s16(v_sse_lo, - vget_low_s16(sv_diff), - vget_low_s16(sv_diff)); - v_sse_hi = vmlal_s16(v_sse_hi, - vget_high_s16(sv_diff), - vget_high_s16(sv_diff)); - } - a += a_stride; - b += b_stride; - } - - *sum = horizontal_add_s16x8(v_sum); - *sse = (unsigned int)horizontal_add_s32x4(vaddq_s32(v_sse_lo, v_sse_hi)); -} - -static unsigned int variance8x8_neon(const uint8_t *a, int a_stride, - const uint8_t *b, int b_stride, - unsigned int *sse) { - int sum; - variance_neon_w8(a, a_stride, b, b_stride, kWidth8, kHeight8, sse, &sum); - return *sse - (((int64_t)sum * sum) / (kWidth8 * kHeight8)); -} - -static void var_filter_block2d_bil_w8(const uint8_t *src_ptr, - uint8_t *output_ptr, - unsigned int src_pixels_per_line, - int pixel_step, - unsigned int output_height, - unsigned int output_width, - const uint16_t *vpx_filter) { - const uint8x8_t f0 = vmov_n_u8((uint8_t)vpx_filter[0]); - const uint8x8_t f1 = vmov_n_u8((uint8_t)vpx_filter[1]); - unsigned int i; - for (i = 0; i < output_height; ++i) { - const uint8x8_t src_0 = vld1_u8(&src_ptr[0]); - const uint8x8_t src_1 = vld1_u8(&src_ptr[pixel_step]); - const uint16x8_t a = vmull_u8(src_0, f0); - const uint16x8_t b = vmlal_u8(a, src_1, f1); - const uint8x8_t out = vrshrn_n_u16(b, FILTER_BITS); - vst1_u8(&output_ptr[0], out); - // Next row... - src_ptr += src_pixels_per_line; - output_ptr += output_width; - } -} - -unsigned int vp8_sub_pixel_variance8x8_neon( - const unsigned char *src, - int src_stride, - int xoffset, - int yoffset, - const unsigned char *dst, - int dst_stride, - unsigned int *sse) { - DECLARE_ALIGNED_ARRAY(kAlign16, uint8_t, temp2, kHeight8 * kWidth8); - DECLARE_ALIGNED_ARRAY(kAlign16, uint8_t, fdata3, kHeight8PlusOne * kWidth8); - if (xoffset == 0) { - var_filter_block2d_bil_w8(src, temp2, src_stride, kWidth8, kHeight8, - kWidth8, bilinear_taps_coeff[yoffset]); - } else if (yoffset == 0) { - var_filter_block2d_bil_w8(src, temp2, src_stride, kPixelStepOne, - kHeight8PlusOne, kWidth8, - bilinear_taps_coeff[xoffset]); - } else { - var_filter_block2d_bil_w8(src, fdata3, src_stride, kPixelStepOne, - kHeight8PlusOne, kWidth8, - bilinear_taps_coeff[xoffset]); - var_filter_block2d_bil_w8(fdata3, temp2, kWidth8, kWidth8, kHeight8, - kWidth8, bilinear_taps_coeff[yoffset]); - } - return variance8x8_neon(temp2, kWidth8, dst, dst_stride, sse); -} - diff --git a/media/libvpx/vp8/common/x86/loopfilter_block_sse2_x86_64.asm b/media/libvpx/vp8/common/x86/loopfilter_block_sse2_x86_64.asm deleted file mode 100644 index 6d5aaa19db7..00000000000 --- a/media/libvpx/vp8/common/x86/loopfilter_block_sse2_x86_64.asm +++ /dev/null @@ -1,815 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" - -%macro LF_ABS 2 - ; %1 value not preserved - ; %2 value preserved - ; output in %1 - movdqa scratch1, %2 ; v2 - - psubusb scratch1, %1 ; v2 - v1 - psubusb %1, %2 ; v1 - v2 - por %1, scratch1 ; abs(v2 - v1) -%endmacro - -%macro LF_FILTER_HEV_MASK 8-9 - - LF_ABS %1, %2 ; abs(p3 - p2) - LF_ABS %2, %3 ; abs(p2 - p1) - pmaxub %1, %2 ; accumulate mask -%if %0 == 8 - movdqa scratch2, %3 ; save p1 - LF_ABS scratch2, %4 ; abs(p1 - p0) -%endif - LF_ABS %4, %5 ; abs(p0 - q0) - LF_ABS %5, %6 ; abs(q0 - q1) -%if %0 == 8 - pmaxub %5, scratch2 ; accumulate hev -%else - pmaxub %5, %9 -%endif - pmaxub %1, %5 ; accumulate mask - - LF_ABS %3, %6 ; abs(p1 - q1) - LF_ABS %6, %7 ; abs(q1 - q2) - pmaxub %1, %6 ; accumulate mask - LF_ABS %7, %8 ; abs(q2 - q3) - pmaxub %1, %7 ; accumulate mask - - paddusb %4, %4 ; 2 * abs(p0 - q0) - pand %3, [GLOBAL(tfe)] - psrlw %3, 1 ; abs(p1 - q1) / 2 - paddusb %4, %3 ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2 - - psubusb %1, [limit] - psubusb %4, [blimit] - por %1, %4 - pcmpeqb %1, zero ; mask - - psubusb %5, [thresh] - pcmpeqb %5, zero ; ~hev -%endmacro - -%macro LF_FILTER 6 - ; %1-%4: p1-q1 - ; %5: mask - ; %6: hev - - movdqa scratch2, %6 ; save hev - - pxor %1, [GLOBAL(t80)] ; ps1 - pxor %4, [GLOBAL(t80)] ; qs1 - movdqa scratch1, %1 - psubsb scratch1, %4 ; signed_char_clamp(ps1 - qs1) - pandn scratch2, scratch1 ; vp8_filter &= hev - - pxor %2, [GLOBAL(t80)] ; ps0 - pxor %3, [GLOBAL(t80)] ; qs0 - movdqa scratch1, %3 - psubsb scratch1, %2 ; qs0 - ps0 - paddsb scratch2, scratch1 ; vp8_filter += (qs0 - ps0) - paddsb scratch2, scratch1 ; vp8_filter += (qs0 - ps0) - paddsb scratch2, scratch1 ; vp8_filter += (qs0 - ps0) - pand %5, scratch2 ; &= mask - - movdqa scratch2, %5 - paddsb %5, [GLOBAL(t4)] ; Filter1 - paddsb scratch2, [GLOBAL(t3)] ; Filter2 - - ; Filter1 >> 3 - movdqa scratch1, zero - pcmpgtb scratch1, %5 - psrlw %5, 3 - pand scratch1, [GLOBAL(te0)] - pand %5, [GLOBAL(t1f)] - por %5, scratch1 - - psubsb %3, %5 ; qs0 - Filter1 - pxor %3, [GLOBAL(t80)] - - ; Filter2 >> 3 - movdqa scratch1, zero - pcmpgtb scratch1, scratch2 - psrlw scratch2, 3 - pand scratch1, [GLOBAL(te0)] - pand scratch2, [GLOBAL(t1f)] - por scratch2, scratch1 - - paddsb %2, scratch2 ; ps0 + Filter2 - pxor %2, [GLOBAL(t80)] - - ; outer tap adjustments - paddsb %5, [GLOBAL(t1)] - movdqa scratch1, zero - pcmpgtb scratch1, %5 - psrlw %5, 1 - pand scratch1, [GLOBAL(t80)] - pand %5, [GLOBAL(t7f)] - por %5, scratch1 - pand %5, %6 ; vp8_filter &= ~hev - - psubsb %4, %5 ; qs1 - vp8_filter - pxor %4, [GLOBAL(t80)] - - paddsb %1, %5 ; ps1 + vp8_filter - pxor %1, [GLOBAL(t80)] -%endmacro - -;void vp8_loop_filter_bh_y_sse2 -;( -; unsigned char *src_ptr, -; int src_pixel_step, -; const char *blimit, -; const char *limit, -; const char *thresh -;) -global sym(vp8_loop_filter_bh_y_sse2) PRIVATE -sym(vp8_loop_filter_bh_y_sse2): - -%if LIBVPX_YASM_WIN64 - %define src rcx ; src_ptr - %define stride rdx ; src_pixel_step - %define blimit r8 - %define limit r9 - %define thresh r10 - - %define spp rax - %define stride3 r11 - %define stride5 r12 - %define stride7 r13 - - push rbp - mov rbp, rsp - SAVE_XMM 11 - push r12 - push r13 - mov thresh, arg(4) -%else - %define src rdi ; src_ptr - %define stride rsi ; src_pixel_step - %define blimit rdx - %define limit rcx - %define thresh r8 - - %define spp rax - %define stride3 r9 - %define stride5 r10 - %define stride7 r11 -%endif - - %define scratch1 xmm5 - %define scratch2 xmm6 - %define zero xmm7 - - %define i0 [src] - %define i1 [spp] - %define i2 [src + 2 * stride] - %define i3 [spp + 2 * stride] - %define i4 [src + 4 * stride] - %define i5 [spp + 4 * stride] - %define i6 [src + 2 * stride3] - %define i7 [spp + 2 * stride3] - %define i8 [src + 8 * stride] - %define i9 [spp + 8 * stride] - %define i10 [src + 2 * stride5] - %define i11 [spp + 2 * stride5] - %define i12 [src + 4 * stride3] - %define i13 [spp + 4 * stride3] - %define i14 [src + 2 * stride7] - %define i15 [spp + 2 * stride7] - - ; prep work - lea spp, [src + stride] - lea stride3, [stride + 2 * stride] - lea stride5, [stride3 + 2 * stride] - lea stride7, [stride3 + 4 * stride] - pxor zero, zero - - ; load the first set into registers - movdqa xmm0, i0 - movdqa xmm1, i1 - movdqa xmm2, i2 - movdqa xmm3, i3 - movdqa xmm4, i4 - movdqa xmm8, i5 - movdqa xmm9, i6 ; q2, will contain abs(p1-p0) - movdqa xmm10, i7 -LF_FILTER_HEV_MASK xmm0, xmm1, xmm2, xmm3, xmm4, xmm8, xmm9, xmm10 - - movdqa xmm1, i2 - movdqa xmm2, i3 - movdqa xmm3, i4 - movdqa xmm8, i5 -LF_FILTER xmm1, xmm2, xmm3, xmm8, xmm0, xmm4 - movdqa i2, xmm1 - movdqa i3, xmm2 - -; second set - movdqa i4, xmm3 - movdqa i5, xmm8 - - movdqa xmm0, i6 - movdqa xmm1, i7 - movdqa xmm2, i8 - movdqa xmm4, i9 - movdqa xmm10, i10 ; q2, will contain abs(p1-p0) - movdqa xmm11, i11 -LF_FILTER_HEV_MASK xmm3, xmm8, xmm0, xmm1, xmm2, xmm4, xmm10, xmm11, xmm9 - - movdqa xmm0, i6 - movdqa xmm1, i7 - movdqa xmm4, i8 - movdqa xmm8, i9 -LF_FILTER xmm0, xmm1, xmm4, xmm8, xmm3, xmm2 - movdqa i6, xmm0 - movdqa i7, xmm1 - -; last set - movdqa i8, xmm4 - movdqa i9, xmm8 - - movdqa xmm0, i10 - movdqa xmm1, i11 - movdqa xmm2, i12 - movdqa xmm3, i13 - movdqa xmm9, i14 ; q2, will contain abs(p1-p0) - movdqa xmm11, i15 -LF_FILTER_HEV_MASK xmm4, xmm8, xmm0, xmm1, xmm2, xmm3, xmm9, xmm11, xmm10 - - movdqa xmm0, i10 - movdqa xmm1, i11 - movdqa xmm3, i12 - movdqa xmm8, i13 -LF_FILTER xmm0, xmm1, xmm3, xmm8, xmm4, xmm2 - movdqa i10, xmm0 - movdqa i11, xmm1 - movdqa i12, xmm3 - movdqa i13, xmm8 - -%if LIBVPX_YASM_WIN64 - pop r13 - pop r12 - RESTORE_XMM - pop rbp -%endif - - ret - - -;void vp8_loop_filter_bv_y_sse2 -;( -; unsigned char *src_ptr, -; int src_pixel_step, -; const char *blimit, -; const char *limit, -; const char *thresh -;) - -global sym(vp8_loop_filter_bv_y_sse2) PRIVATE -sym(vp8_loop_filter_bv_y_sse2): - -%if LIBVPX_YASM_WIN64 - %define src rcx ; src_ptr - %define stride rdx ; src_pixel_step - %define blimit r8 - %define limit r9 - %define thresh r10 - - %define spp rax - %define stride3 r11 - %define stride5 r12 - %define stride7 r13 - - push rbp - mov rbp, rsp - SAVE_XMM 15 - push r12 - push r13 - mov thresh, arg(4) -%else - %define src rdi - %define stride rsi - %define blimit rdx - %define limit rcx - %define thresh r8 - - %define spp rax - %define stride3 r9 - %define stride5 r10 - %define stride7 r11 -%endif - - %define scratch1 xmm5 - %define scratch2 xmm6 - %define zero xmm7 - - %define s0 [src] - %define s1 [spp] - %define s2 [src + 2 * stride] - %define s3 [spp + 2 * stride] - %define s4 [src + 4 * stride] - %define s5 [spp + 4 * stride] - %define s6 [src + 2 * stride3] - %define s7 [spp + 2 * stride3] - %define s8 [src + 8 * stride] - %define s9 [spp + 8 * stride] - %define s10 [src + 2 * stride5] - %define s11 [spp + 2 * stride5] - %define s12 [src + 4 * stride3] - %define s13 [spp + 4 * stride3] - %define s14 [src + 2 * stride7] - %define s15 [spp + 2 * stride7] - - %define i0 [rsp] - %define i1 [rsp + 16] - %define i2 [rsp + 32] - %define i3 [rsp + 48] - %define i4 [rsp + 64] - %define i5 [rsp + 80] - %define i6 [rsp + 96] - %define i7 [rsp + 112] - %define i8 [rsp + 128] - %define i9 [rsp + 144] - %define i10 [rsp + 160] - %define i11 [rsp + 176] - %define i12 [rsp + 192] - %define i13 [rsp + 208] - %define i14 [rsp + 224] - %define i15 [rsp + 240] - - ALIGN_STACK 16, rax - - ; reserve stack space - %define temp_storage 0 ; size is 256 (16*16) - %define stack_size 256 - sub rsp, stack_size - - ; prep work - lea spp, [src + stride] - lea stride3, [stride + 2 * stride] - lea stride5, [stride3 + 2 * stride] - lea stride7, [stride3 + 4 * stride] - - ; 8-f - movdqa xmm0, s8 - movdqa xmm1, xmm0 - punpcklbw xmm0, s9 ; 80 90 - punpckhbw xmm1, s9 ; 88 98 - - movdqa xmm2, s10 - movdqa xmm3, xmm2 - punpcklbw xmm2, s11 ; a0 b0 - punpckhbw xmm3, s11 ; a8 b8 - - movdqa xmm4, xmm0 - punpcklwd xmm0, xmm2 ; 80 90 a0 b0 - punpckhwd xmm4, xmm2 ; 84 94 a4 b4 - - movdqa xmm2, xmm1 - punpcklwd xmm1, xmm3 ; 88 98 a8 b8 - punpckhwd xmm2, xmm3 ; 8c 9c ac bc - - ; using xmm[0124] - ; work on next 4 rows - - movdqa xmm3, s12 - movdqa xmm5, xmm3 - punpcklbw xmm3, s13 ; c0 d0 - punpckhbw xmm5, s13 ; c8 d8 - - movdqa xmm6, s14 - movdqa xmm7, xmm6 - punpcklbw xmm6, s15 ; e0 f0 - punpckhbw xmm7, s15 ; e8 f8 - - movdqa xmm8, xmm3 - punpcklwd xmm3, xmm6 ; c0 d0 e0 f0 - punpckhwd xmm8, xmm6 ; c4 d4 e4 f4 - - movdqa xmm6, xmm5 - punpcklwd xmm5, xmm7 ; c8 d8 e8 f8 - punpckhwd xmm6, xmm7 ; cc dc ec fc - - ; pull the third and fourth sets together - - movdqa xmm7, xmm0 - punpckldq xmm0, xmm3 ; 80 90 a0 b0 c0 d0 e0 f0 - punpckhdq xmm7, xmm3 ; 82 92 a2 b2 c2 d2 e2 f2 - - movdqa xmm3, xmm4 - punpckldq xmm4, xmm8 ; 84 94 a4 b4 c4 d4 e4 f4 - punpckhdq xmm3, xmm8 ; 86 96 a6 b6 c6 d6 e6 f6 - - movdqa xmm8, xmm1 - punpckldq xmm1, xmm5 ; 88 88 a8 b8 c8 d8 e8 f8 - punpckhdq xmm8, xmm5 ; 8a 9a aa ba ca da ea fa - - movdqa xmm5, xmm2 - punpckldq xmm2, xmm6 ; 8c 9c ac bc cc dc ec fc - punpckhdq xmm5, xmm6 ; 8e 9e ae be ce de ee fe - - ; save the calculations. we only have 15 registers ... - movdqa i0, xmm0 - movdqa i1, xmm7 - movdqa i2, xmm4 - movdqa i3, xmm3 - movdqa i4, xmm1 - movdqa i5, xmm8 - movdqa i6, xmm2 - movdqa i7, xmm5 - - ; 0-7 - movdqa xmm0, s0 - movdqa xmm1, xmm0 - punpcklbw xmm0, s1 ; 00 10 - punpckhbw xmm1, s1 ; 08 18 - - movdqa xmm2, s2 - movdqa xmm3, xmm2 - punpcklbw xmm2, s3 ; 20 30 - punpckhbw xmm3, s3 ; 28 38 - - movdqa xmm4, xmm0 - punpcklwd xmm0, xmm2 ; 00 10 20 30 - punpckhwd xmm4, xmm2 ; 04 14 24 34 - - movdqa xmm2, xmm1 - punpcklwd xmm1, xmm3 ; 08 18 28 38 - punpckhwd xmm2, xmm3 ; 0c 1c 2c 3c - - ; using xmm[0124] - ; work on next 4 rows - - movdqa xmm3, s4 - movdqa xmm5, xmm3 - punpcklbw xmm3, s5 ; 40 50 - punpckhbw xmm5, s5 ; 48 58 - - movdqa xmm6, s6 - movdqa xmm7, xmm6 - punpcklbw xmm6, s7 ; 60 70 - punpckhbw xmm7, s7 ; 68 78 - - movdqa xmm8, xmm3 - punpcklwd xmm3, xmm6 ; 40 50 60 70 - punpckhwd xmm8, xmm6 ; 44 54 64 74 - - movdqa xmm6, xmm5 - punpcklwd xmm5, xmm7 ; 48 58 68 78 - punpckhwd xmm6, xmm7 ; 4c 5c 6c 7c - - ; pull the first two sets together - - movdqa xmm7, xmm0 - punpckldq xmm0, xmm3 ; 00 10 20 30 40 50 60 70 - punpckhdq xmm7, xmm3 ; 02 12 22 32 42 52 62 72 - - movdqa xmm3, xmm4 - punpckldq xmm4, xmm8 ; 04 14 24 34 44 54 64 74 - punpckhdq xmm3, xmm8 ; 06 16 26 36 46 56 66 76 - - movdqa xmm8, xmm1 - punpckldq xmm1, xmm5 ; 08 18 28 38 48 58 68 78 - punpckhdq xmm8, xmm5 ; 0a 1a 2a 3a 4a 5a 6a 7a - - movdqa xmm5, xmm2 - punpckldq xmm2, xmm6 ; 0c 1c 2c 3c 4c 5c 6c 7c - punpckhdq xmm5, xmm6 ; 0e 1e 2e 3e 4e 5e 6e 7e - ; final combination - - movdqa xmm6, xmm0 - punpcklqdq xmm0, i0 - punpckhqdq xmm6, i0 - - movdqa xmm9, xmm7 - punpcklqdq xmm7, i1 - punpckhqdq xmm9, i1 - - movdqa xmm10, xmm4 - punpcklqdq xmm4, i2 - punpckhqdq xmm10, i2 - - movdqa xmm11, xmm3 - punpcklqdq xmm3, i3 - punpckhqdq xmm11, i3 - - movdqa xmm12, xmm1 - punpcklqdq xmm1, i4 - punpckhqdq xmm12, i4 - - movdqa xmm13, xmm8 - punpcklqdq xmm8, i5 - punpckhqdq xmm13, i5 - - movdqa xmm14, xmm2 - punpcklqdq xmm2, i6 - punpckhqdq xmm14, i6 - - movdqa xmm15, xmm5 - punpcklqdq xmm5, i7 - punpckhqdq xmm15, i7 - - movdqa i0, xmm0 - movdqa i1, xmm6 - movdqa i2, xmm7 - movdqa i3, xmm9 - movdqa i4, xmm4 - movdqa i5, xmm10 - movdqa i6, xmm3 - movdqa i7, xmm11 - movdqa i8, xmm1 - movdqa i9, xmm12 - movdqa i10, xmm8 - movdqa i11, xmm13 - movdqa i12, xmm2 - movdqa i13, xmm14 - movdqa i14, xmm5 - movdqa i15, xmm15 - -; TRANSPOSED DATA AVAILABLE ON THE STACK - - movdqa xmm12, xmm6 - movdqa xmm13, xmm7 - - pxor zero, zero - -LF_FILTER_HEV_MASK xmm0, xmm12, xmm13, xmm9, xmm4, xmm10, xmm3, xmm11 - - movdqa xmm1, i2 - movdqa xmm2, i3 - movdqa xmm8, i4 - movdqa xmm9, i5 -LF_FILTER xmm1, xmm2, xmm8, xmm9, xmm0, xmm4 - movdqa i2, xmm1 - movdqa i3, xmm2 - -; second set - movdqa i4, xmm8 - movdqa i5, xmm9 - - movdqa xmm0, i6 - movdqa xmm1, i7 - movdqa xmm2, i8 - movdqa xmm4, i9 - movdqa xmm10, i10 ; q2, will contain abs(p1-p0) - movdqa xmm11, i11 -LF_FILTER_HEV_MASK xmm8, xmm9, xmm0, xmm1, xmm2, xmm4, xmm10, xmm11, xmm3 - - movdqa xmm0, i6 - movdqa xmm1, i7 - movdqa xmm3, i8 - movdqa xmm4, i9 -LF_FILTER xmm0, xmm1, xmm3, xmm4, xmm8, xmm2 - movdqa i6, xmm0 - movdqa i7, xmm1 - -; last set - movdqa i8, xmm3 - movdqa i9, xmm4 - - movdqa xmm0, i10 - movdqa xmm1, i11 - movdqa xmm2, i12 - movdqa xmm8, i13 - movdqa xmm9, i14 ; q2, will contain abs(p1-p0) - movdqa xmm11, i15 -LF_FILTER_HEV_MASK xmm3, xmm4, xmm0, xmm1, xmm2, xmm8, xmm9, xmm11, xmm10 - - movdqa xmm0, i10 - movdqa xmm1, i11 - movdqa xmm4, i12 - movdqa xmm8, i13 -LF_FILTER xmm0, xmm1, xmm4, xmm8, xmm3, xmm2 - movdqa i10, xmm0 - movdqa i11, xmm1 - movdqa i12, xmm4 - movdqa i13, xmm8 - - -; RESHUFFLE AND WRITE OUT - ; 8-f - movdqa xmm0, i8 - movdqa xmm1, xmm0 - punpcklbw xmm0, i9 ; 80 90 - punpckhbw xmm1, i9 ; 88 98 - - movdqa xmm2, i10 - movdqa xmm3, xmm2 - punpcklbw xmm2, i11 ; a0 b0 - punpckhbw xmm3, i11 ; a8 b8 - - movdqa xmm4, xmm0 - punpcklwd xmm0, xmm2 ; 80 90 a0 b0 - punpckhwd xmm4, xmm2 ; 84 94 a4 b4 - - movdqa xmm2, xmm1 - punpcklwd xmm1, xmm3 ; 88 98 a8 b8 - punpckhwd xmm2, xmm3 ; 8c 9c ac bc - - ; using xmm[0124] - ; work on next 4 rows - - movdqa xmm3, i12 - movdqa xmm5, xmm3 - punpcklbw xmm3, i13 ; c0 d0 - punpckhbw xmm5, i13 ; c8 d8 - - movdqa xmm6, i14 - movdqa xmm7, xmm6 - punpcklbw xmm6, i15 ; e0 f0 - punpckhbw xmm7, i15 ; e8 f8 - - movdqa xmm8, xmm3 - punpcklwd xmm3, xmm6 ; c0 d0 e0 f0 - punpckhwd xmm8, xmm6 ; c4 d4 e4 f4 - - movdqa xmm6, xmm5 - punpcklwd xmm5, xmm7 ; c8 d8 e8 f8 - punpckhwd xmm6, xmm7 ; cc dc ec fc - - ; pull the third and fourth sets together - - movdqa xmm7, xmm0 - punpckldq xmm0, xmm3 ; 80 90 a0 b0 c0 d0 e0 f0 - punpckhdq xmm7, xmm3 ; 82 92 a2 b2 c2 d2 e2 f2 - - movdqa xmm3, xmm4 - punpckldq xmm4, xmm8 ; 84 94 a4 b4 c4 d4 e4 f4 - punpckhdq xmm3, xmm8 ; 86 96 a6 b6 c6 d6 e6 f6 - - movdqa xmm8, xmm1 - punpckldq xmm1, xmm5 ; 88 88 a8 b8 c8 d8 e8 f8 - punpckhdq xmm8, xmm5 ; 8a 9a aa ba ca da ea fa - - movdqa xmm5, xmm2 - punpckldq xmm2, xmm6 ; 8c 9c ac bc cc dc ec fc - punpckhdq xmm5, xmm6 ; 8e 9e ae be ce de ee fe - - ; save the calculations. we only have 15 registers ... - movdqa i8, xmm0 - movdqa i9, xmm7 - movdqa i10, xmm4 - movdqa i11, xmm3 - movdqa i12, xmm1 - movdqa i13, xmm8 - movdqa i14, xmm2 - movdqa i15, xmm5 - - ; 0-7 - movdqa xmm0, i0 - movdqa xmm1, xmm0 - punpcklbw xmm0, i1 ; 00 10 - punpckhbw xmm1, i1 ; 08 18 - - movdqa xmm2, i2 - movdqa xmm3, xmm2 - punpcklbw xmm2, i3 ; 20 30 - punpckhbw xmm3, i3 ; 28 38 - - movdqa xmm4, xmm0 - punpcklwd xmm0, xmm2 ; 00 10 20 30 - punpckhwd xmm4, xmm2 ; 04 14 24 34 - - movdqa xmm2, xmm1 - punpcklwd xmm1, xmm3 ; 08 18 28 38 - punpckhwd xmm2, xmm3 ; 0c 1c 2c 3c - - ; using xmm[0124] - ; work on next 4 rows - - movdqa xmm3, i4 - movdqa xmm5, xmm3 - punpcklbw xmm3, i5 ; 40 50 - punpckhbw xmm5, i5 ; 48 58 - - movdqa xmm6, i6 - movdqa xmm7, xmm6 - punpcklbw xmm6, i7 ; 60 70 - punpckhbw xmm7, i7 ; 68 78 - - movdqa xmm8, xmm3 - punpcklwd xmm3, xmm6 ; 40 50 60 70 - punpckhwd xmm8, xmm6 ; 44 54 64 74 - - movdqa xmm6, xmm5 - punpcklwd xmm5, xmm7 ; 48 58 68 78 - punpckhwd xmm6, xmm7 ; 4c 5c 6c 7c - - ; pull the first two sets together - - movdqa xmm7, xmm0 - punpckldq xmm0, xmm3 ; 00 10 20 30 40 50 60 70 - punpckhdq xmm7, xmm3 ; 02 12 22 32 42 52 62 72 - - movdqa xmm3, xmm4 - punpckldq xmm4, xmm8 ; 04 14 24 34 44 54 64 74 - punpckhdq xmm3, xmm8 ; 06 16 26 36 46 56 66 76 - - movdqa xmm8, xmm1 - punpckldq xmm1, xmm5 ; 08 18 28 38 48 58 68 78 - punpckhdq xmm8, xmm5 ; 0a 1a 2a 3a 4a 5a 6a 7a - - movdqa xmm5, xmm2 - punpckldq xmm2, xmm6 ; 0c 1c 2c 3c 4c 5c 6c 7c - punpckhdq xmm5, xmm6 ; 0e 1e 2e 3e 4e 5e 6e 7e - ; final combination - - movdqa xmm6, xmm0 - punpcklqdq xmm0, i8 - punpckhqdq xmm6, i8 - - movdqa xmm9, xmm7 - punpcklqdq xmm7, i9 - punpckhqdq xmm9, i9 - - movdqa xmm10, xmm4 - punpcklqdq xmm4, i10 - punpckhqdq xmm10, i10 - - movdqa xmm11, xmm3 - punpcklqdq xmm3, i11 - punpckhqdq xmm11, i11 - - movdqa xmm12, xmm1 - punpcklqdq xmm1, i12 - punpckhqdq xmm12, i12 - - movdqa xmm13, xmm8 - punpcklqdq xmm8, i13 - punpckhqdq xmm13, i13 - - movdqa xmm14, xmm2 - punpcklqdq xmm2, i14 - punpckhqdq xmm14, i14 - - movdqa xmm15, xmm5 - punpcklqdq xmm5, i15 - punpckhqdq xmm15, i15 - - movdqa s0, xmm0 - movdqa s1, xmm6 - movdqa s2, xmm7 - movdqa s3, xmm9 - movdqa s4, xmm4 - movdqa s5, xmm10 - movdqa s6, xmm3 - movdqa s7, xmm11 - movdqa s8, xmm1 - movdqa s9, xmm12 - movdqa s10, xmm8 - movdqa s11, xmm13 - movdqa s12, xmm2 - movdqa s13, xmm14 - movdqa s14, xmm5 - movdqa s15, xmm15 - - ; free stack space - add rsp, stack_size - - ; un-ALIGN_STACK - pop rsp - -%if LIBVPX_YASM_WIN64 - pop r13 - pop r12 - RESTORE_XMM - pop rbp -%endif - - ret - -SECTION_RODATA -align 16 -te0: - times 16 db 0xe0 -align 16 -t7f: - times 16 db 0x7f -align 16 -tfe: - times 16 db 0xfe -align 16 -t1f: - times 16 db 0x1f -align 16 -t80: - times 16 db 0x80 -align 16 -t1: - times 16 db 0x01 -align 16 -t3: - times 16 db 0x03 -align 16 -t4: - times 16 db 0x04 diff --git a/media/libvpx/vp8/decoder/decodeframe.c b/media/libvpx/vp8/decoder/decodeframe.c deleted file mode 100644 index e7cf0d9b9c6..00000000000 --- a/media/libvpx/vp8/decoder/decodeframe.c +++ /dev/null @@ -1,1397 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "vpx_config.h" -#include "vp8_rtcd.h" -#include "./vpx_scale_rtcd.h" -#include "onyxd_int.h" -#include "vp8/common/header.h" -#include "vp8/common/reconintra4x4.h" -#include "vp8/common/reconinter.h" -#include "detokenize.h" -#include "vp8/common/common.h" -#include "vp8/common/invtrans.h" -#include "vp8/common/alloccommon.h" -#include "vp8/common/entropymode.h" -#include "vp8/common/quant_common.h" -#include "vpx_scale/vpx_scale.h" -#include "vp8/common/setupintrarecon.h" - -#include "decodemv.h" -#include "vp8/common/extend.h" -#if CONFIG_ERROR_CONCEALMENT -#include "error_concealment.h" -#endif -#include "vpx_mem/vpx_mem.h" -#include "vp8/common/threading.h" -#include "decoderthreading.h" -#include "dboolhuff.h" - -#include -#include - -void vp8cx_init_de_quantizer(VP8D_COMP *pbi) -{ - int Q; - VP8_COMMON *const pc = & pbi->common; - - for (Q = 0; Q < QINDEX_RANGE; Q++) - { - pc->Y1dequant[Q][0] = (short)vp8_dc_quant(Q, pc->y1dc_delta_q); - pc->Y2dequant[Q][0] = (short)vp8_dc2quant(Q, pc->y2dc_delta_q); - pc->UVdequant[Q][0] = (short)vp8_dc_uv_quant(Q, pc->uvdc_delta_q); - - pc->Y1dequant[Q][1] = (short)vp8_ac_yquant(Q); - pc->Y2dequant[Q][1] = (short)vp8_ac2quant(Q, pc->y2ac_delta_q); - pc->UVdequant[Q][1] = (short)vp8_ac_uv_quant(Q, pc->uvac_delta_q); - } -} - -void vp8_mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd) -{ - int i; - int QIndex; - MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; - VP8_COMMON *const pc = & pbi->common; - - /* Decide whether to use the default or alternate baseline Q value. */ - if (xd->segmentation_enabled) - { - /* Abs Value */ - if (xd->mb_segement_abs_delta == SEGMENT_ABSDATA) - QIndex = xd->segment_feature_data[MB_LVL_ALT_Q][mbmi->segment_id]; - - /* Delta Value */ - else - { - QIndex = pc->base_qindex + xd->segment_feature_data[MB_LVL_ALT_Q][mbmi->segment_id]; - QIndex = (QIndex >= 0) ? ((QIndex <= MAXQ) ? QIndex : MAXQ) : 0; /* Clamp to valid range */ - } - } - else - QIndex = pc->base_qindex; - - /* Set up the macroblock dequant constants */ - xd->dequant_y1_dc[0] = 1; - xd->dequant_y1[0] = pc->Y1dequant[QIndex][0]; - xd->dequant_y2[0] = pc->Y2dequant[QIndex][0]; - xd->dequant_uv[0] = pc->UVdequant[QIndex][0]; - - for (i = 1; i < 16; i++) - { - xd->dequant_y1_dc[i] = - xd->dequant_y1[i] = pc->Y1dequant[QIndex][1]; - xd->dequant_y2[i] = pc->Y2dequant[QIndex][1]; - xd->dequant_uv[i] = pc->UVdequant[QIndex][1]; - } -} - -static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, - unsigned int mb_idx) -{ - MB_PREDICTION_MODE mode; - int i; -#if CONFIG_ERROR_CONCEALMENT - int corruption_detected = 0; -#endif - - if (xd->mode_info_context->mbmi.mb_skip_coeff) - { - vp8_reset_mb_tokens_context(xd); - } - else if (!vp8dx_bool_error(xd->current_bc)) - { - int eobtotal; - eobtotal = vp8_decode_mb_tokens(pbi, xd); - - /* Special case: Force the loopfilter to skip when eobtotal is zero */ - xd->mode_info_context->mbmi.mb_skip_coeff = (eobtotal==0); - } - - mode = xd->mode_info_context->mbmi.mode; - - if (xd->segmentation_enabled) - vp8_mb_init_dequantizer(pbi, xd); - - -#if CONFIG_ERROR_CONCEALMENT - - if(pbi->ec_active) - { - int throw_residual; - /* When we have independent partitions we can apply residual even - * though other partitions within the frame are corrupt. - */ - throw_residual = (!pbi->independent_partitions && - pbi->frame_corrupt_residual); - throw_residual = (throw_residual || vp8dx_bool_error(xd->current_bc)); - - if ((mb_idx >= pbi->mvs_corrupt_from_mb || throw_residual)) - { - /* MB with corrupt residuals or corrupt mode/motion vectors. - * Better to use the predictor as reconstruction. - */ - pbi->frame_corrupt_residual = 1; - vpx_memset(xd->qcoeff, 0, sizeof(xd->qcoeff)); - vp8_conceal_corrupt_mb(xd); - - - corruption_detected = 1; - - /* force idct to be skipped for B_PRED and use the - * prediction only for reconstruction - * */ - vpx_memset(xd->eobs, 0, 25); - } - } -#endif - - /* do prediction */ - if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) - { - vp8_build_intra_predictors_mbuv_s(xd, - xd->recon_above[1], - xd->recon_above[2], - xd->recon_left[1], - xd->recon_left[2], - xd->recon_left_stride[1], - xd->dst.u_buffer, xd->dst.v_buffer, - xd->dst.uv_stride); - - if (mode != B_PRED) - { - vp8_build_intra_predictors_mby_s(xd, - xd->recon_above[0], - xd->recon_left[0], - xd->recon_left_stride[0], - xd->dst.y_buffer, - xd->dst.y_stride); - } - else - { - short *DQC = xd->dequant_y1; - int dst_stride = xd->dst.y_stride; - - /* clear out residual eob info */ - if(xd->mode_info_context->mbmi.mb_skip_coeff) - vpx_memset(xd->eobs, 0, 25); - - intra_prediction_down_copy(xd, xd->recon_above[0] + 16); - - for (i = 0; i < 16; i++) - { - BLOCKD *b = &xd->block[i]; - unsigned char *dst = xd->dst.y_buffer + b->offset; - B_PREDICTION_MODE b_mode = - xd->mode_info_context->bmi[i].as_mode; - unsigned char *Above = dst - dst_stride; - unsigned char *yleft = dst - 1; - int left_stride = dst_stride; - unsigned char top_left = Above[-1]; - - vp8_intra4x4_predict(Above, yleft, left_stride, b_mode, - dst, dst_stride, top_left); - - if (xd->eobs[i]) - { - if (xd->eobs[i] > 1) - { - vp8_dequant_idct_add(b->qcoeff, DQC, dst, dst_stride); - } - else - { - vp8_dc_only_idct_add - (b->qcoeff[0] * DQC[0], - dst, dst_stride, - dst, dst_stride); - vpx_memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0])); - } - } - } - } - } - else - { - vp8_build_inter_predictors_mb(xd); - } - - -#if CONFIG_ERROR_CONCEALMENT - if (corruption_detected) - { - return; - } -#endif - - if(!xd->mode_info_context->mbmi.mb_skip_coeff) - { - /* dequantization and idct */ - if (mode != B_PRED) - { - short *DQC = xd->dequant_y1; - - if (mode != SPLITMV) - { - BLOCKD *b = &xd->block[24]; - - /* do 2nd order transform on the dc block */ - if (xd->eobs[24] > 1) - { - vp8_dequantize_b(b, xd->dequant_y2); - - vp8_short_inv_walsh4x4(&b->dqcoeff[0], - xd->qcoeff); - vpx_memset(b->qcoeff, 0, 16 * sizeof(b->qcoeff[0])); - } - else - { - b->dqcoeff[0] = b->qcoeff[0] * xd->dequant_y2[0]; - vp8_short_inv_walsh4x4_1(&b->dqcoeff[0], - xd->qcoeff); - vpx_memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0])); - } - - /* override the dc dequant constant in order to preserve the - * dc components - */ - DQC = xd->dequant_y1_dc; - } - - vp8_dequant_idct_add_y_block - (xd->qcoeff, DQC, - xd->dst.y_buffer, - xd->dst.y_stride, xd->eobs); - } - - vp8_dequant_idct_add_uv_block - (xd->qcoeff+16*16, xd->dequant_uv, - xd->dst.u_buffer, xd->dst.v_buffer, - xd->dst.uv_stride, xd->eobs+16); - } -} - -static int get_delta_q(vp8_reader *bc, int prev, int *q_update) -{ - int ret_val = 0; - - if (vp8_read_bit(bc)) - { - ret_val = vp8_read_literal(bc, 4); - - if (vp8_read_bit(bc)) - ret_val = -ret_val; - } - - /* Trigger a quantizer update if the delta-q value has changed */ - if (ret_val != prev) - *q_update = 1; - - return ret_val; -} - -#ifdef PACKET_TESTING -#include -FILE *vpxlog = 0; -#endif - -static void yv12_extend_frame_top_c(YV12_BUFFER_CONFIG *ybf) -{ - int i; - unsigned char *src_ptr1; - unsigned char *dest_ptr1; - - unsigned int Border; - int plane_stride; - - /***********/ - /* Y Plane */ - /***********/ - Border = ybf->border; - plane_stride = ybf->y_stride; - src_ptr1 = ybf->y_buffer - Border; - dest_ptr1 = src_ptr1 - (Border * plane_stride); - - for (i = 0; i < (int)Border; i++) - { - vpx_memcpy(dest_ptr1, src_ptr1, plane_stride); - dest_ptr1 += plane_stride; - } - - - /***********/ - /* U Plane */ - /***********/ - plane_stride = ybf->uv_stride; - Border /= 2; - src_ptr1 = ybf->u_buffer - Border; - dest_ptr1 = src_ptr1 - (Border * plane_stride); - - for (i = 0; i < (int)(Border); i++) - { - vpx_memcpy(dest_ptr1, src_ptr1, plane_stride); - dest_ptr1 += plane_stride; - } - - /***********/ - /* V Plane */ - /***********/ - - src_ptr1 = ybf->v_buffer - Border; - dest_ptr1 = src_ptr1 - (Border * plane_stride); - - for (i = 0; i < (int)(Border); i++) - { - vpx_memcpy(dest_ptr1, src_ptr1, plane_stride); - dest_ptr1 += plane_stride; - } -} - -static void yv12_extend_frame_bottom_c(YV12_BUFFER_CONFIG *ybf) -{ - int i; - unsigned char *src_ptr1, *src_ptr2; - unsigned char *dest_ptr2; - - unsigned int Border; - int plane_stride; - int plane_height; - - /***********/ - /* Y Plane */ - /***********/ - Border = ybf->border; - plane_stride = ybf->y_stride; - plane_height = ybf->y_height; - - src_ptr1 = ybf->y_buffer - Border; - src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; - dest_ptr2 = src_ptr2 + plane_stride; - - for (i = 0; i < (int)Border; i++) - { - vpx_memcpy(dest_ptr2, src_ptr2, plane_stride); - dest_ptr2 += plane_stride; - } - - - /***********/ - /* U Plane */ - /***********/ - plane_stride = ybf->uv_stride; - plane_height = ybf->uv_height; - Border /= 2; - - src_ptr1 = ybf->u_buffer - Border; - src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; - dest_ptr2 = src_ptr2 + plane_stride; - - for (i = 0; i < (int)(Border); i++) - { - vpx_memcpy(dest_ptr2, src_ptr2, plane_stride); - dest_ptr2 += plane_stride; - } - - /***********/ - /* V Plane */ - /***********/ - - src_ptr1 = ybf->v_buffer - Border; - src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; - dest_ptr2 = src_ptr2 + plane_stride; - - for (i = 0; i < (int)(Border); i++) - { - vpx_memcpy(dest_ptr2, src_ptr2, plane_stride); - dest_ptr2 += plane_stride; - } -} - -static void yv12_extend_frame_left_right_c(YV12_BUFFER_CONFIG *ybf, - unsigned char *y_src, - unsigned char *u_src, - unsigned char *v_src) -{ - int i; - unsigned char *src_ptr1, *src_ptr2; - unsigned char *dest_ptr1, *dest_ptr2; - - unsigned int Border; - int plane_stride; - int plane_height; - int plane_width; - - /***********/ - /* Y Plane */ - /***********/ - Border = ybf->border; - plane_stride = ybf->y_stride; - plane_height = 16; - plane_width = ybf->y_width; - - /* copy the left and right most columns out */ - src_ptr1 = y_src; - src_ptr2 = src_ptr1 + plane_width - 1; - dest_ptr1 = src_ptr1 - Border; - dest_ptr2 = src_ptr2 + 1; - - for (i = 0; i < plane_height; i++) - { - vpx_memset(dest_ptr1, src_ptr1[0], Border); - vpx_memset(dest_ptr2, src_ptr2[0], Border); - src_ptr1 += plane_stride; - src_ptr2 += plane_stride; - dest_ptr1 += plane_stride; - dest_ptr2 += plane_stride; - } - - /***********/ - /* U Plane */ - /***********/ - plane_stride = ybf->uv_stride; - plane_height = 8; - plane_width = ybf->uv_width; - Border /= 2; - - /* copy the left and right most columns out */ - src_ptr1 = u_src; - src_ptr2 = src_ptr1 + plane_width - 1; - dest_ptr1 = src_ptr1 - Border; - dest_ptr2 = src_ptr2 + 1; - - for (i = 0; i < plane_height; i++) - { - vpx_memset(dest_ptr1, src_ptr1[0], Border); - vpx_memset(dest_ptr2, src_ptr2[0], Border); - src_ptr1 += plane_stride; - src_ptr2 += plane_stride; - dest_ptr1 += plane_stride; - dest_ptr2 += plane_stride; - } - - /***********/ - /* V Plane */ - /***********/ - - /* copy the left and right most columns out */ - src_ptr1 = v_src; - src_ptr2 = src_ptr1 + plane_width - 1; - dest_ptr1 = src_ptr1 - Border; - dest_ptr2 = src_ptr2 + 1; - - for (i = 0; i < plane_height; i++) - { - vpx_memset(dest_ptr1, src_ptr1[0], Border); - vpx_memset(dest_ptr2, src_ptr2[0], Border); - src_ptr1 += plane_stride; - src_ptr2 += plane_stride; - dest_ptr1 += plane_stride; - dest_ptr2 += plane_stride; - } -} - -static void decode_mb_rows(VP8D_COMP *pbi) -{ - VP8_COMMON *const pc = & pbi->common; - MACROBLOCKD *const xd = & pbi->mb; - - MODE_INFO *lf_mic = xd->mode_info_context; - - int ibc = 0; - int num_part = 1 << pc->multi_token_partition; - - int recon_yoffset, recon_uvoffset; - int mb_row, mb_col; - int mb_idx = 0; - - YV12_BUFFER_CONFIG *yv12_fb_new = pbi->dec_fb_ref[INTRA_FRAME]; - - int recon_y_stride = yv12_fb_new->y_stride; - int recon_uv_stride = yv12_fb_new->uv_stride; - - unsigned char *ref_buffer[MAX_REF_FRAMES][3]; - unsigned char *dst_buffer[3]; - unsigned char *lf_dst[3]; - unsigned char *eb_dst[3]; - int i; - int ref_fb_corrupted[MAX_REF_FRAMES]; - - ref_fb_corrupted[INTRA_FRAME] = 0; - - for(i = 1; i < MAX_REF_FRAMES; i++) - { - YV12_BUFFER_CONFIG *this_fb = pbi->dec_fb_ref[i]; - - ref_buffer[i][0] = this_fb->y_buffer; - ref_buffer[i][1] = this_fb->u_buffer; - ref_buffer[i][2] = this_fb->v_buffer; - - ref_fb_corrupted[i] = this_fb->corrupted; - } - - /* Set up the buffer pointers */ - eb_dst[0] = lf_dst[0] = dst_buffer[0] = yv12_fb_new->y_buffer; - eb_dst[1] = lf_dst[1] = dst_buffer[1] = yv12_fb_new->u_buffer; - eb_dst[2] = lf_dst[2] = dst_buffer[2] = yv12_fb_new->v_buffer; - - xd->up_available = 0; - - /* Initialize the loop filter for this frame. */ - if(pc->filter_level) - vp8_loop_filter_frame_init(pc, xd, pc->filter_level); - - vp8_setup_intra_recon_top_line(yv12_fb_new); - - /* Decode the individual macro block */ - for (mb_row = 0; mb_row < pc->mb_rows; mb_row++) - { - if (num_part > 1) - { - xd->current_bc = & pbi->mbc[ibc]; - ibc++; - - if (ibc == num_part) - ibc = 0; - } - - recon_yoffset = mb_row * recon_y_stride * 16; - recon_uvoffset = mb_row * recon_uv_stride * 8; - - /* reset contexts */ - xd->above_context = pc->above_context; - vpx_memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)); - - xd->left_available = 0; - - xd->mb_to_top_edge = -((mb_row * 16) << 3); - xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3; - - xd->recon_above[0] = dst_buffer[0] + recon_yoffset; - xd->recon_above[1] = dst_buffer[1] + recon_uvoffset; - xd->recon_above[2] = dst_buffer[2] + recon_uvoffset; - - xd->recon_left[0] = xd->recon_above[0] - 1; - xd->recon_left[1] = xd->recon_above[1] - 1; - xd->recon_left[2] = xd->recon_above[2] - 1; - - xd->recon_above[0] -= xd->dst.y_stride; - xd->recon_above[1] -= xd->dst.uv_stride; - xd->recon_above[2] -= xd->dst.uv_stride; - - /* TODO: move to outside row loop */ - xd->recon_left_stride[0] = xd->dst.y_stride; - xd->recon_left_stride[1] = xd->dst.uv_stride; - - setup_intra_recon_left(xd->recon_left[0], xd->recon_left[1], - xd->recon_left[2], xd->dst.y_stride, - xd->dst.uv_stride); - - for (mb_col = 0; mb_col < pc->mb_cols; mb_col++) - { - /* Distance of Mb to the various image edges. - * These are specified to 8th pel as they are always compared to values - * that are in 1/8th pel units - */ - xd->mb_to_left_edge = -((mb_col * 16) << 3); - xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3; - -#if CONFIG_ERROR_CONCEALMENT - { - int corrupt_residual = (!pbi->independent_partitions && - pbi->frame_corrupt_residual) || - vp8dx_bool_error(xd->current_bc); - if (pbi->ec_active && - xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME && - corrupt_residual) - { - /* We have an intra block with corrupt coefficients, better to - * conceal with an inter block. Interpolate MVs from neighboring - * MBs. - * - * Note that for the first mb with corrupt residual in a frame, - * we might not discover that before decoding the residual. That - * happens after this check, and therefore no inter concealment - * will be done. - */ - vp8_interpolate_motion(xd, - mb_row, mb_col, - pc->mb_rows, pc->mb_cols, - pc->mode_info_stride); - } - } -#endif - - xd->dst.y_buffer = dst_buffer[0] + recon_yoffset; - xd->dst.u_buffer = dst_buffer[1] + recon_uvoffset; - xd->dst.v_buffer = dst_buffer[2] + recon_uvoffset; - - if (xd->mode_info_context->mbmi.ref_frame >= LAST_FRAME) { - MV_REFERENCE_FRAME ref = xd->mode_info_context->mbmi.ref_frame; - xd->pre.y_buffer = ref_buffer[ref][0] + recon_yoffset; - xd->pre.u_buffer = ref_buffer[ref][1] + recon_uvoffset; - xd->pre.v_buffer = ref_buffer[ref][2] + recon_uvoffset; - } else { - // ref_frame is INTRA_FRAME, pre buffer should not be used. - xd->pre.y_buffer = 0; - xd->pre.u_buffer = 0; - xd->pre.v_buffer = 0; - } - - /* propagate errors from reference frames */ - xd->corrupted |= ref_fb_corrupted[xd->mode_info_context->mbmi.ref_frame]; - - decode_macroblock(pbi, xd, mb_idx); - - mb_idx++; - xd->left_available = 1; - - /* check if the boolean decoder has suffered an error */ - xd->corrupted |= vp8dx_bool_error(xd->current_bc); - - xd->recon_above[0] += 16; - xd->recon_above[1] += 8; - xd->recon_above[2] += 8; - xd->recon_left[0] += 16; - xd->recon_left[1] += 8; - xd->recon_left[2] += 8; - - recon_yoffset += 16; - recon_uvoffset += 8; - - ++xd->mode_info_context; /* next mb */ - - xd->above_context++; - } - - /* adjust to the next row of mbs */ - vp8_extend_mb_row(yv12_fb_new, xd->dst.y_buffer + 16, - xd->dst.u_buffer + 8, xd->dst.v_buffer + 8); - - ++xd->mode_info_context; /* skip prediction column */ - xd->up_available = 1; - - if(pc->filter_level) - { - if(mb_row > 0) - { - if (pc->filter_type == NORMAL_LOOPFILTER) - vp8_loop_filter_row_normal(pc, lf_mic, mb_row-1, - recon_y_stride, recon_uv_stride, - lf_dst[0], lf_dst[1], lf_dst[2]); - else - vp8_loop_filter_row_simple(pc, lf_mic, mb_row-1, - recon_y_stride, recon_uv_stride, - lf_dst[0], lf_dst[1], lf_dst[2]); - if(mb_row > 1) - { - yv12_extend_frame_left_right_c(yv12_fb_new, - eb_dst[0], - eb_dst[1], - eb_dst[2]); - - eb_dst[0] += recon_y_stride * 16; - eb_dst[1] += recon_uv_stride * 8; - eb_dst[2] += recon_uv_stride * 8; - } - - lf_dst[0] += recon_y_stride * 16; - lf_dst[1] += recon_uv_stride * 8; - lf_dst[2] += recon_uv_stride * 8; - lf_mic += pc->mb_cols; - lf_mic++; /* Skip border mb */ - } - } - else - { - if(mb_row > 0) - { - /**/ - yv12_extend_frame_left_right_c(yv12_fb_new, - eb_dst[0], - eb_dst[1], - eb_dst[2]); - eb_dst[0] += recon_y_stride * 16; - eb_dst[1] += recon_uv_stride * 8; - eb_dst[2] += recon_uv_stride * 8; - } - } - } - - if(pc->filter_level) - { - if (pc->filter_type == NORMAL_LOOPFILTER) - vp8_loop_filter_row_normal(pc, lf_mic, mb_row-1, recon_y_stride, - recon_uv_stride, lf_dst[0], lf_dst[1], - lf_dst[2]); - else - vp8_loop_filter_row_simple(pc, lf_mic, mb_row-1, recon_y_stride, - recon_uv_stride, lf_dst[0], lf_dst[1], - lf_dst[2]); - - yv12_extend_frame_left_right_c(yv12_fb_new, - eb_dst[0], - eb_dst[1], - eb_dst[2]); - eb_dst[0] += recon_y_stride * 16; - eb_dst[1] += recon_uv_stride * 8; - eb_dst[2] += recon_uv_stride * 8; - } - yv12_extend_frame_left_right_c(yv12_fb_new, - eb_dst[0], - eb_dst[1], - eb_dst[2]); - yv12_extend_frame_top_c(yv12_fb_new); - yv12_extend_frame_bottom_c(yv12_fb_new); - -} - -static unsigned int read_partition_size(VP8D_COMP *pbi, - const unsigned char *cx_size) -{ - unsigned char temp[3]; - if (pbi->decrypt_cb) - { - pbi->decrypt_cb(pbi->decrypt_state, cx_size, temp, 3); - cx_size = temp; - } - return cx_size[0] + (cx_size[1] << 8) + (cx_size[2] << 16); -} - -static int read_is_valid(const unsigned char *start, - size_t len, - const unsigned char *end) -{ - return (start + len > start && start + len <= end); -} - -static unsigned int read_available_partition_size( - VP8D_COMP *pbi, - const unsigned char *token_part_sizes, - const unsigned char *fragment_start, - const unsigned char *first_fragment_end, - const unsigned char *fragment_end, - int i, - int num_part) -{ - VP8_COMMON* pc = &pbi->common; - const unsigned char *partition_size_ptr = token_part_sizes + i * 3; - unsigned int partition_size = 0; - ptrdiff_t bytes_left = fragment_end - fragment_start; - /* Calculate the length of this partition. The last partition - * size is implicit. If the partition size can't be read, then - * either use the remaining data in the buffer (for EC mode) - * or throw an error. - */ - if (i < num_part - 1) - { - if (read_is_valid(partition_size_ptr, 3, first_fragment_end)) - partition_size = read_partition_size(pbi, partition_size_ptr); - else if (pbi->ec_active) - partition_size = (unsigned int)bytes_left; - else - vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, - "Truncated partition size data"); - } - else - partition_size = (unsigned int)bytes_left; - - /* Validate the calculated partition length. If the buffer - * described by the partition can't be fully read, then restrict - * it to the portion that can be (for EC mode) or throw an error. - */ - if (!read_is_valid(fragment_start, partition_size, fragment_end)) - { - if (pbi->ec_active) - partition_size = (unsigned int)bytes_left; - else - vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, - "Truncated packet or corrupt partition " - "%d length", i + 1); - } - return partition_size; -} - - -static void setup_token_decoder(VP8D_COMP *pbi, - const unsigned char* token_part_sizes) -{ - vp8_reader *bool_decoder = &pbi->mbc[0]; - unsigned int partition_idx; - unsigned int fragment_idx; - unsigned int num_token_partitions; - const unsigned char *first_fragment_end = pbi->fragments.ptrs[0] + - pbi->fragments.sizes[0]; - - TOKEN_PARTITION multi_token_partition = - (TOKEN_PARTITION)vp8_read_literal(&pbi->mbc[8], 2); - if (!vp8dx_bool_error(&pbi->mbc[8])) - pbi->common.multi_token_partition = multi_token_partition; - num_token_partitions = 1 << pbi->common.multi_token_partition; - - /* Check for partitions within the fragments and unpack the fragments - * so that each fragment pointer points to its corresponding partition. */ - for (fragment_idx = 0; fragment_idx < pbi->fragments.count; ++fragment_idx) - { - unsigned int fragment_size = pbi->fragments.sizes[fragment_idx]; - const unsigned char *fragment_end = pbi->fragments.ptrs[fragment_idx] + - fragment_size; - /* Special case for handling the first partition since we have already - * read its size. */ - if (fragment_idx == 0) - { - /* Size of first partition + token partition sizes element */ - ptrdiff_t ext_first_part_size = token_part_sizes - - pbi->fragments.ptrs[0] + 3 * (num_token_partitions - 1); - fragment_size -= (unsigned int)ext_first_part_size; - if (fragment_size > 0) - { - pbi->fragments.sizes[0] = (unsigned int)ext_first_part_size; - /* The fragment contains an additional partition. Move to - * next. */ - fragment_idx++; - pbi->fragments.ptrs[fragment_idx] = pbi->fragments.ptrs[0] + - pbi->fragments.sizes[0]; - } - } - /* Split the chunk into partitions read from the bitstream */ - while (fragment_size > 0) - { - ptrdiff_t partition_size = read_available_partition_size( - pbi, - token_part_sizes, - pbi->fragments.ptrs[fragment_idx], - first_fragment_end, - fragment_end, - fragment_idx - 1, - num_token_partitions); - pbi->fragments.sizes[fragment_idx] = (unsigned int)partition_size; - fragment_size -= (unsigned int)partition_size; - assert(fragment_idx <= num_token_partitions); - if (fragment_size > 0) - { - /* The fragment contains an additional partition. - * Move to next. */ - fragment_idx++; - pbi->fragments.ptrs[fragment_idx] = - pbi->fragments.ptrs[fragment_idx - 1] + partition_size; - } - } - } - - pbi->fragments.count = num_token_partitions + 1; - - for (partition_idx = 1; partition_idx < pbi->fragments.count; ++partition_idx) - { - if (vp8dx_start_decode(bool_decoder, - pbi->fragments.ptrs[partition_idx], - pbi->fragments.sizes[partition_idx], - pbi->decrypt_cb, pbi->decrypt_state)) - vpx_internal_error(&pbi->common.error, VPX_CODEC_MEM_ERROR, - "Failed to allocate bool decoder %d", - partition_idx); - - bool_decoder++; - } - -#if CONFIG_MULTITHREAD - /* Clamp number of decoder threads */ - if (pbi->decoding_thread_count > num_token_partitions - 1) - pbi->decoding_thread_count = num_token_partitions - 1; -#endif -} - - -static void init_frame(VP8D_COMP *pbi) -{ - VP8_COMMON *const pc = & pbi->common; - MACROBLOCKD *const xd = & pbi->mb; - - if (pc->frame_type == KEY_FRAME) - { - /* Various keyframe initializations */ - vpx_memcpy(pc->fc.mvc, vp8_default_mv_context, sizeof(vp8_default_mv_context)); - - vp8_init_mbmode_probs(pc); - - vp8_default_coef_probs(pc); - - /* reset the segment feature data to 0 with delta coding (Default state). */ - vpx_memset(xd->segment_feature_data, 0, sizeof(xd->segment_feature_data)); - xd->mb_segement_abs_delta = SEGMENT_DELTADATA; - - /* reset the mode ref deltasa for loop filter */ - vpx_memset(xd->ref_lf_deltas, 0, sizeof(xd->ref_lf_deltas)); - vpx_memset(xd->mode_lf_deltas, 0, sizeof(xd->mode_lf_deltas)); - - /* All buffers are implicitly updated on key frames. */ - pc->refresh_golden_frame = 1; - pc->refresh_alt_ref_frame = 1; - pc->copy_buffer_to_gf = 0; - pc->copy_buffer_to_arf = 0; - - /* Note that Golden and Altref modes cannot be used on a key frame so - * ref_frame_sign_bias[] is undefined and meaningless - */ - pc->ref_frame_sign_bias[GOLDEN_FRAME] = 0; - pc->ref_frame_sign_bias[ALTREF_FRAME] = 0; - } - else - { - /* To enable choice of different interploation filters */ - if (!pc->use_bilinear_mc_filter) - { - xd->subpixel_predict = vp8_sixtap_predict4x4; - xd->subpixel_predict8x4 = vp8_sixtap_predict8x4; - xd->subpixel_predict8x8 = vp8_sixtap_predict8x8; - xd->subpixel_predict16x16 = vp8_sixtap_predict16x16; - } - else - { - xd->subpixel_predict = vp8_bilinear_predict4x4; - xd->subpixel_predict8x4 = vp8_bilinear_predict8x4; - xd->subpixel_predict8x8 = vp8_bilinear_predict8x8; - xd->subpixel_predict16x16 = vp8_bilinear_predict16x16; - } - - if (pbi->decoded_key_frame && pbi->ec_enabled && !pbi->ec_active) - pbi->ec_active = 1; - } - - xd->left_context = &pc->left_context; - xd->mode_info_context = pc->mi; - xd->frame_type = pc->frame_type; - xd->mode_info_context->mbmi.mode = DC_PRED; - xd->mode_info_stride = pc->mode_info_stride; - xd->corrupted = 0; /* init without corruption */ - - xd->fullpixel_mask = 0xffffffff; - if(pc->full_pixel) - xd->fullpixel_mask = 0xfffffff8; - -} - -int vp8_decode_frame(VP8D_COMP *pbi) -{ - vp8_reader *const bc = &pbi->mbc[8]; - VP8_COMMON *const pc = &pbi->common; - MACROBLOCKD *const xd = &pbi->mb; - const unsigned char *data = pbi->fragments.ptrs[0]; - const unsigned char *data_end = data + pbi->fragments.sizes[0]; - ptrdiff_t first_partition_length_in_bytes; - - int i, j, k, l; - const int *const mb_feature_data_bits = vp8_mb_feature_data_bits; - int corrupt_tokens = 0; - int prev_independent_partitions = pbi->independent_partitions; - - YV12_BUFFER_CONFIG *yv12_fb_new = pbi->dec_fb_ref[INTRA_FRAME]; - - /* start with no corruption of current frame */ - xd->corrupted = 0; - yv12_fb_new->corrupted = 0; - - if (data_end - data < 3) - { - if (!pbi->ec_active) - { - vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, - "Truncated packet"); - } - - /* Declare the missing frame as an inter frame since it will - be handled as an inter frame when we have estimated its - motion vectors. */ - pc->frame_type = INTER_FRAME; - pc->version = 0; - pc->show_frame = 1; - first_partition_length_in_bytes = 0; - } - else - { - unsigned char clear_buffer[10]; - const unsigned char *clear = data; - if (pbi->decrypt_cb) - { - int n = (int)MIN(sizeof(clear_buffer), data_end - data); - pbi->decrypt_cb(pbi->decrypt_state, data, clear_buffer, n); - clear = clear_buffer; - } - - pc->frame_type = (FRAME_TYPE)(clear[0] & 1); - pc->version = (clear[0] >> 1) & 7; - pc->show_frame = (clear[0] >> 4) & 1; - first_partition_length_in_bytes = - (clear[0] | (clear[1] << 8) | (clear[2] << 16)) >> 5; - - if (!pbi->ec_active && - (data + first_partition_length_in_bytes > data_end - || data + first_partition_length_in_bytes < data)) - vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, - "Truncated packet or corrupt partition 0 length"); - - data += 3; - clear += 3; - - vp8_setup_version(pc); - - - if (pc->frame_type == KEY_FRAME) - { - /* vet via sync code */ - /* When error concealment is enabled we should only check the sync - * code if we have enough bits available - */ - if (!pbi->ec_active || data + 3 < data_end) - { - if (clear[0] != 0x9d || clear[1] != 0x01 || clear[2] != 0x2a) - vpx_internal_error(&pc->error, VPX_CODEC_UNSUP_BITSTREAM, - "Invalid frame sync code"); - } - - /* If error concealment is enabled we should only parse the new size - * if we have enough data. Otherwise we will end up with the wrong - * size. - */ - if (!pbi->ec_active || data + 6 < data_end) - { - pc->Width = (clear[3] | (clear[4] << 8)) & 0x3fff; - pc->horiz_scale = clear[4] >> 6; - pc->Height = (clear[5] | (clear[6] << 8)) & 0x3fff; - pc->vert_scale = clear[6] >> 6; - } - data += 7; - clear += 7; - } - else - { - vpx_memcpy(&xd->pre, yv12_fb_new, sizeof(YV12_BUFFER_CONFIG)); - vpx_memcpy(&xd->dst, yv12_fb_new, sizeof(YV12_BUFFER_CONFIG)); - } - } - if ((!pbi->decoded_key_frame && pc->frame_type != KEY_FRAME)) - { - return -1; - } - - init_frame(pbi); - - if (vp8dx_start_decode(bc, data, (unsigned int)(data_end - data), - pbi->decrypt_cb, pbi->decrypt_state)) - vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR, - "Failed to allocate bool decoder 0"); - if (pc->frame_type == KEY_FRAME) { - (void)vp8_read_bit(bc); // colorspace - pc->clamp_type = (CLAMP_TYPE)vp8_read_bit(bc); - } - - /* Is segmentation enabled */ - xd->segmentation_enabled = (unsigned char)vp8_read_bit(bc); - - if (xd->segmentation_enabled) - { - /* Signal whether or not the segmentation map is being explicitly updated this frame. */ - xd->update_mb_segmentation_map = (unsigned char)vp8_read_bit(bc); - xd->update_mb_segmentation_data = (unsigned char)vp8_read_bit(bc); - - if (xd->update_mb_segmentation_data) - { - xd->mb_segement_abs_delta = (unsigned char)vp8_read_bit(bc); - - vpx_memset(xd->segment_feature_data, 0, sizeof(xd->segment_feature_data)); - - /* For each segmentation feature (Quant and loop filter level) */ - for (i = 0; i < MB_LVL_MAX; i++) - { - for (j = 0; j < MAX_MB_SEGMENTS; j++) - { - /* Frame level data */ - if (vp8_read_bit(bc)) - { - xd->segment_feature_data[i][j] = (signed char)vp8_read_literal(bc, mb_feature_data_bits[i]); - - if (vp8_read_bit(bc)) - xd->segment_feature_data[i][j] = -xd->segment_feature_data[i][j]; - } - else - xd->segment_feature_data[i][j] = 0; - } - } - } - - if (xd->update_mb_segmentation_map) - { - /* Which macro block level features are enabled */ - vpx_memset(xd->mb_segment_tree_probs, 255, sizeof(xd->mb_segment_tree_probs)); - - /* Read the probs used to decode the segment id for each macro block. */ - for (i = 0; i < MB_FEATURE_TREE_PROBS; i++) - { - /* If not explicitly set value is defaulted to 255 by memset above */ - if (vp8_read_bit(bc)) - xd->mb_segment_tree_probs[i] = (vp8_prob)vp8_read_literal(bc, 8); - } - } - } - else - { - /* No segmentation updates on this frame */ - xd->update_mb_segmentation_map = 0; - xd->update_mb_segmentation_data = 0; - } - - /* Read the loop filter level and type */ - pc->filter_type = (LOOPFILTERTYPE) vp8_read_bit(bc); - pc->filter_level = vp8_read_literal(bc, 6); - pc->sharpness_level = vp8_read_literal(bc, 3); - - /* Read in loop filter deltas applied at the MB level based on mode or ref frame. */ - xd->mode_ref_lf_delta_update = 0; - xd->mode_ref_lf_delta_enabled = (unsigned char)vp8_read_bit(bc); - - if (xd->mode_ref_lf_delta_enabled) - { - /* Do the deltas need to be updated */ - xd->mode_ref_lf_delta_update = (unsigned char)vp8_read_bit(bc); - - if (xd->mode_ref_lf_delta_update) - { - /* Send update */ - for (i = 0; i < MAX_REF_LF_DELTAS; i++) - { - if (vp8_read_bit(bc)) - { - /*sign = vp8_read_bit( bc );*/ - xd->ref_lf_deltas[i] = (signed char)vp8_read_literal(bc, 6); - - if (vp8_read_bit(bc)) /* Apply sign */ - xd->ref_lf_deltas[i] = xd->ref_lf_deltas[i] * -1; - } - } - - /* Send update */ - for (i = 0; i < MAX_MODE_LF_DELTAS; i++) - { - if (vp8_read_bit(bc)) - { - /*sign = vp8_read_bit( bc );*/ - xd->mode_lf_deltas[i] = (signed char)vp8_read_literal(bc, 6); - - if (vp8_read_bit(bc)) /* Apply sign */ - xd->mode_lf_deltas[i] = xd->mode_lf_deltas[i] * -1; - } - } - } - } - - setup_token_decoder(pbi, data + first_partition_length_in_bytes); - - xd->current_bc = &pbi->mbc[0]; - - /* Read the default quantizers. */ - { - int Q, q_update; - - Q = vp8_read_literal(bc, 7); /* AC 1st order Q = default */ - pc->base_qindex = Q; - q_update = 0; - pc->y1dc_delta_q = get_delta_q(bc, pc->y1dc_delta_q, &q_update); - pc->y2dc_delta_q = get_delta_q(bc, pc->y2dc_delta_q, &q_update); - pc->y2ac_delta_q = get_delta_q(bc, pc->y2ac_delta_q, &q_update); - pc->uvdc_delta_q = get_delta_q(bc, pc->uvdc_delta_q, &q_update); - pc->uvac_delta_q = get_delta_q(bc, pc->uvac_delta_q, &q_update); - - if (q_update) - vp8cx_init_de_quantizer(pbi); - - /* MB level dequantizer setup */ - vp8_mb_init_dequantizer(pbi, &pbi->mb); - } - - /* Determine if the golden frame or ARF buffer should be updated and how. - * For all non key frames the GF and ARF refresh flags and sign bias - * flags must be set explicitly. - */ - if (pc->frame_type != KEY_FRAME) - { - /* Should the GF or ARF be updated from the current frame */ - pc->refresh_golden_frame = vp8_read_bit(bc); -#if CONFIG_ERROR_CONCEALMENT - /* Assume we shouldn't refresh golden if the bit is missing */ - xd->corrupted |= vp8dx_bool_error(bc); - if (pbi->ec_active && xd->corrupted) - pc->refresh_golden_frame = 0; -#endif - - pc->refresh_alt_ref_frame = vp8_read_bit(bc); -#if CONFIG_ERROR_CONCEALMENT - /* Assume we shouldn't refresh altref if the bit is missing */ - xd->corrupted |= vp8dx_bool_error(bc); - if (pbi->ec_active && xd->corrupted) - pc->refresh_alt_ref_frame = 0; -#endif - - /* Buffer to buffer copy flags. */ - pc->copy_buffer_to_gf = 0; - - if (!pc->refresh_golden_frame) - pc->copy_buffer_to_gf = vp8_read_literal(bc, 2); - -#if CONFIG_ERROR_CONCEALMENT - /* Assume we shouldn't copy to the golden if the bit is missing */ - xd->corrupted |= vp8dx_bool_error(bc); - if (pbi->ec_active && xd->corrupted) - pc->copy_buffer_to_gf = 0; -#endif - - pc->copy_buffer_to_arf = 0; - - if (!pc->refresh_alt_ref_frame) - pc->copy_buffer_to_arf = vp8_read_literal(bc, 2); - -#if CONFIG_ERROR_CONCEALMENT - /* Assume we shouldn't copy to the alt-ref if the bit is missing */ - xd->corrupted |= vp8dx_bool_error(bc); - if (pbi->ec_active && xd->corrupted) - pc->copy_buffer_to_arf = 0; -#endif - - - pc->ref_frame_sign_bias[GOLDEN_FRAME] = vp8_read_bit(bc); - pc->ref_frame_sign_bias[ALTREF_FRAME] = vp8_read_bit(bc); - } - - pc->refresh_entropy_probs = vp8_read_bit(bc); -#if CONFIG_ERROR_CONCEALMENT - /* Assume we shouldn't refresh the probabilities if the bit is - * missing */ - xd->corrupted |= vp8dx_bool_error(bc); - if (pbi->ec_active && xd->corrupted) - pc->refresh_entropy_probs = 0; -#endif - if (pc->refresh_entropy_probs == 0) - { - vpx_memcpy(&pc->lfc, &pc->fc, sizeof(pc->fc)); - } - - pc->refresh_last_frame = pc->frame_type == KEY_FRAME || vp8_read_bit(bc); - -#if CONFIG_ERROR_CONCEALMENT - /* Assume we should refresh the last frame if the bit is missing */ - xd->corrupted |= vp8dx_bool_error(bc); - if (pbi->ec_active && xd->corrupted) - pc->refresh_last_frame = 1; -#endif - - if (0) - { - FILE *z = fopen("decodestats.stt", "a"); - fprintf(z, "%6d F:%d,G:%d,A:%d,L:%d,Q:%d\n", - pc->current_video_frame, - pc->frame_type, - pc->refresh_golden_frame, - pc->refresh_alt_ref_frame, - pc->refresh_last_frame, - pc->base_qindex); - fclose(z); - } - - { - pbi->independent_partitions = 1; - - /* read coef probability tree */ - for (i = 0; i < BLOCK_TYPES; i++) - for (j = 0; j < COEF_BANDS; j++) - for (k = 0; k < PREV_COEF_CONTEXTS; k++) - for (l = 0; l < ENTROPY_NODES; l++) - { - - vp8_prob *const p = pc->fc.coef_probs [i][j][k] + l; - - if (vp8_read(bc, vp8_coef_update_probs [i][j][k][l])) - { - *p = (vp8_prob)vp8_read_literal(bc, 8); - - } - if (k > 0 && *p != pc->fc.coef_probs[i][j][k-1][l]) - pbi->independent_partitions = 0; - - } - } - - /* clear out the coeff buffer */ - vpx_memset(xd->qcoeff, 0, sizeof(xd->qcoeff)); - - vp8_decode_mode_mvs(pbi); - -#if CONFIG_ERROR_CONCEALMENT - if (pbi->ec_active && - pbi->mvs_corrupt_from_mb < (unsigned int)pc->mb_cols * pc->mb_rows) - { - /* Motion vectors are missing in this frame. We will try to estimate - * them and then continue decoding the frame as usual */ - vp8_estimate_missing_mvs(pbi); - } -#endif - - vpx_memset(pc->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * pc->mb_cols); - pbi->frame_corrupt_residual = 0; - -#if CONFIG_MULTITHREAD - if (pbi->b_multithreaded_rd && pc->multi_token_partition != ONE_PARTITION) - { - unsigned int thread; - vp8mt_decode_mb_rows(pbi, xd); - vp8_yv12_extend_frame_borders(yv12_fb_new); - for (thread = 0; thread < pbi->decoding_thread_count; ++thread) - corrupt_tokens |= pbi->mb_row_di[thread].mbd.corrupted; - } - else -#endif - { - decode_mb_rows(pbi); - corrupt_tokens |= xd->corrupted; - } - - /* Collect information about decoder corruption. */ - /* 1. Check first boolean decoder for errors. */ - yv12_fb_new->corrupted = vp8dx_bool_error(bc); - /* 2. Check the macroblock information */ - yv12_fb_new->corrupted |= corrupt_tokens; - - if (!pbi->decoded_key_frame) - { - if (pc->frame_type == KEY_FRAME && - !yv12_fb_new->corrupted) - pbi->decoded_key_frame = 1; - else - vpx_internal_error(&pbi->common.error, VPX_CODEC_CORRUPT_FRAME, - "A stream must start with a complete key frame"); - } - - /* vpx_log("Decoder: Frame Decoded, Size Roughly:%d bytes \n",bc->pos+pbi->bc2.pos); */ - - if (pc->refresh_entropy_probs == 0) - { - vpx_memcpy(&pc->fc, &pc->lfc, sizeof(pc->fc)); - pbi->independent_partitions = prev_independent_partitions; - } - -#ifdef PACKET_TESTING - { - FILE *f = fopen("decompressor.VP8", "ab"); - unsigned int size = pbi->bc2.pos + pbi->bc.pos + 8; - fwrite((void *) &size, 4, 1, f); - fwrite((void *) pbi->Source, size, 1, f); - fclose(f); - } -#endif - - return 0; -} diff --git a/media/libvpx/vp8/encoder/arm/neon/denoising_neon.c b/media/libvpx/vp8/encoder/arm/neon/denoising_neon.c deleted file mode 100644 index 08be76e4338..00000000000 --- a/media/libvpx/vp8/encoder/arm/neon/denoising_neon.c +++ /dev/null @@ -1,478 +0,0 @@ -/* - * Copyright (c) 2012 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include - -#include "vp8/encoder/denoising.h" -#include "vpx_mem/vpx_mem.h" -#include "./vp8_rtcd.h" - -/* - * The filter function was modified to reduce the computational complexity. - * - * Step 1: - * Instead of applying tap coefficients for each pixel, we calculated the - * pixel adjustments vs. pixel diff value ahead of time. - * adjustment = filtered_value - current_raw - * = (filter_coefficient * diff + 128) >> 8 - * where - * filter_coefficient = (255 << 8) / (256 + ((abs_diff * 330) >> 3)); - * filter_coefficient += filter_coefficient / - * (3 + motion_magnitude_adjustment); - * filter_coefficient is clamped to 0 ~ 255. - * - * Step 2: - * The adjustment vs. diff curve becomes flat very quick when diff increases. - * This allowed us to use only several levels to approximate the curve without - * changing the filtering algorithm too much. - * The adjustments were further corrected by checking the motion magnitude. - * The levels used are: - * diff level adjustment w/o adjustment w/ - * motion correction motion correction - * [-255, -16] 3 -6 -7 - * [-15, -8] 2 -4 -5 - * [-7, -4] 1 -3 -4 - * [-3, 3] 0 diff diff - * [4, 7] 1 3 4 - * [8, 15] 2 4 5 - * [16, 255] 3 6 7 - */ - -int vp8_denoiser_filter_neon(unsigned char *mc_running_avg_y, - int mc_running_avg_y_stride, - unsigned char *running_avg_y, - int running_avg_y_stride, - unsigned char *sig, int sig_stride, - unsigned int motion_magnitude, - int increase_denoising) { - /* If motion_magnitude is small, making the denoiser more aggressive by - * increasing the adjustment for each level, level1 adjustment is - * increased, the deltas stay the same. - */ - int shift_inc = (increase_denoising && - motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ? 1 : 0; - const uint8x16_t v_level1_adjustment = vmovq_n_u8( - (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ? 4 + shift_inc : 3); - const uint8x16_t v_delta_level_1_and_2 = vdupq_n_u8(1); - const uint8x16_t v_delta_level_2_and_3 = vdupq_n_u8(2); - const uint8x16_t v_level1_threshold = vmovq_n_u8(4 + shift_inc); - const uint8x16_t v_level2_threshold = vdupq_n_u8(8); - const uint8x16_t v_level3_threshold = vdupq_n_u8(16); - int64x2_t v_sum_diff_total = vdupq_n_s64(0); - - /* Go over lines. */ - int r; - for (r = 0; r < 16; ++r) { - /* Load inputs. */ - const uint8x16_t v_sig = vld1q_u8(sig); - const uint8x16_t v_mc_running_avg_y = vld1q_u8(mc_running_avg_y); - - /* Calculate absolute difference and sign masks. */ - const uint8x16_t v_abs_diff = vabdq_u8(v_sig, v_mc_running_avg_y); - const uint8x16_t v_diff_pos_mask = vcltq_u8(v_sig, v_mc_running_avg_y); - const uint8x16_t v_diff_neg_mask = vcgtq_u8(v_sig, v_mc_running_avg_y); - - /* Figure out which level that put us in. */ - const uint8x16_t v_level1_mask = vcleq_u8(v_level1_threshold, - v_abs_diff); - const uint8x16_t v_level2_mask = vcleq_u8(v_level2_threshold, - v_abs_diff); - const uint8x16_t v_level3_mask = vcleq_u8(v_level3_threshold, - v_abs_diff); - - /* Calculate absolute adjustments for level 1, 2 and 3. */ - const uint8x16_t v_level2_adjustment = vandq_u8(v_level2_mask, - v_delta_level_1_and_2); - const uint8x16_t v_level3_adjustment = vandq_u8(v_level3_mask, - v_delta_level_2_and_3); - const uint8x16_t v_level1and2_adjustment = vaddq_u8(v_level1_adjustment, - v_level2_adjustment); - const uint8x16_t v_level1and2and3_adjustment = vaddq_u8( - v_level1and2_adjustment, v_level3_adjustment); - - /* Figure adjustment absolute value by selecting between the absolute - * difference if in level0 or the value for level 1, 2 and 3. - */ - const uint8x16_t v_abs_adjustment = vbslq_u8(v_level1_mask, - v_level1and2and3_adjustment, v_abs_diff); - - /* Calculate positive and negative adjustments. Apply them to the signal - * and accumulate them. Adjustments are less than eight and the maximum - * sum of them (7 * 16) can fit in a signed char. - */ - const uint8x16_t v_pos_adjustment = vandq_u8(v_diff_pos_mask, - v_abs_adjustment); - const uint8x16_t v_neg_adjustment = vandq_u8(v_diff_neg_mask, - v_abs_adjustment); - - uint8x16_t v_running_avg_y = vqaddq_u8(v_sig, v_pos_adjustment); - v_running_avg_y = vqsubq_u8(v_running_avg_y, v_neg_adjustment); - - /* Store results. */ - vst1q_u8(running_avg_y, v_running_avg_y); - - /* Sum all the accumulators to have the sum of all pixel differences - * for this macroblock. - */ - { - const int8x16_t v_sum_diff = - vqsubq_s8(vreinterpretq_s8_u8(v_pos_adjustment), - vreinterpretq_s8_u8(v_neg_adjustment)); - - const int16x8_t fe_dc_ba_98_76_54_32_10 = vpaddlq_s8(v_sum_diff); - - const int32x4_t fedc_ba98_7654_3210 = - vpaddlq_s16(fe_dc_ba_98_76_54_32_10); - - const int64x2_t fedcba98_76543210 = - vpaddlq_s32(fedc_ba98_7654_3210); - - v_sum_diff_total = vqaddq_s64(v_sum_diff_total, fedcba98_76543210); - } - - /* Update pointers for next iteration. */ - sig += sig_stride; - mc_running_avg_y += mc_running_avg_y_stride; - running_avg_y += running_avg_y_stride; - } - - /* Too much adjustments => copy block. */ - { - int64x1_t x = vqadd_s64(vget_high_s64(v_sum_diff_total), - vget_low_s64(v_sum_diff_total)); - int sum_diff = vget_lane_s32(vabs_s32(vreinterpret_s32_s64(x)), 0); - int sum_diff_thresh = SUM_DIFF_THRESHOLD; - - if (increase_denoising) sum_diff_thresh = SUM_DIFF_THRESHOLD_HIGH; - if (sum_diff > sum_diff_thresh) { - // Before returning to copy the block (i.e., apply no denoising), - // checK if we can still apply some (weaker) temporal filtering to - // this block, that would otherwise not be denoised at all. Simplest - // is to apply an additional adjustment to running_avg_y to bring it - // closer to sig. The adjustment is capped by a maximum delta, and - // chosen such that in most cases the resulting sum_diff will be - // within the accceptable range given by sum_diff_thresh. - - // The delta is set by the excess of absolute pixel diff over the - // threshold. - int delta = ((sum_diff - sum_diff_thresh) >> 8) + 1; - // Only apply the adjustment for max delta up to 3. - if (delta < 4) { - const uint8x16_t k_delta = vmovq_n_u8(delta); - sig -= sig_stride * 16; - mc_running_avg_y -= mc_running_avg_y_stride * 16; - running_avg_y -= running_avg_y_stride * 16; - for (r = 0; r < 16; ++r) { - uint8x16_t v_running_avg_y = vld1q_u8(running_avg_y); - const uint8x16_t v_sig = vld1q_u8(sig); - const uint8x16_t v_mc_running_avg_y = vld1q_u8(mc_running_avg_y); - - /* Calculate absolute difference and sign masks. */ - const uint8x16_t v_abs_diff = vabdq_u8(v_sig, - v_mc_running_avg_y); - const uint8x16_t v_diff_pos_mask = vcltq_u8(v_sig, - v_mc_running_avg_y); - const uint8x16_t v_diff_neg_mask = vcgtq_u8(v_sig, - v_mc_running_avg_y); - // Clamp absolute difference to delta to get the adjustment. - const uint8x16_t v_abs_adjustment = - vminq_u8(v_abs_diff, (k_delta)); - - const uint8x16_t v_pos_adjustment = vandq_u8(v_diff_pos_mask, - v_abs_adjustment); - const uint8x16_t v_neg_adjustment = vandq_u8(v_diff_neg_mask, - v_abs_adjustment); - - v_running_avg_y = vqsubq_u8(v_running_avg_y, v_pos_adjustment); - v_running_avg_y = vqaddq_u8(v_running_avg_y, v_neg_adjustment); - - /* Store results. */ - vst1q_u8(running_avg_y, v_running_avg_y); - - { - const int8x16_t v_sum_diff = - vqsubq_s8(vreinterpretq_s8_u8(v_neg_adjustment), - vreinterpretq_s8_u8(v_pos_adjustment)); - - const int16x8_t fe_dc_ba_98_76_54_32_10 = - vpaddlq_s8(v_sum_diff); - const int32x4_t fedc_ba98_7654_3210 = - vpaddlq_s16(fe_dc_ba_98_76_54_32_10); - const int64x2_t fedcba98_76543210 = - vpaddlq_s32(fedc_ba98_7654_3210); - - v_sum_diff_total = vqaddq_s64(v_sum_diff_total, - fedcba98_76543210); - } - /* Update pointers for next iteration. */ - sig += sig_stride; - mc_running_avg_y += mc_running_avg_y_stride; - running_avg_y += running_avg_y_stride; - } - { - // Update the sum of all pixel differences of this MB. - x = vqadd_s64(vget_high_s64(v_sum_diff_total), - vget_low_s64(v_sum_diff_total)); - sum_diff = vget_lane_s32(vabs_s32(vreinterpret_s32_s64(x)), 0); - - if (sum_diff > sum_diff_thresh) { - return COPY_BLOCK; - } - } - } else { - return COPY_BLOCK; - } - } - } - - /* Tell above level that block was filtered. */ - running_avg_y -= running_avg_y_stride * 16; - sig -= sig_stride * 16; - - vp8_copy_mem16x16(running_avg_y, running_avg_y_stride, sig, sig_stride); - - return FILTER_BLOCK; -} - -int vp8_denoiser_filter_uv_neon(unsigned char *mc_running_avg, - int mc_running_avg_stride, - unsigned char *running_avg, - int running_avg_stride, - unsigned char *sig, int sig_stride, - unsigned int motion_magnitude, - int increase_denoising) { - /* If motion_magnitude is small, making the denoiser more aggressive by - * increasing the adjustment for each level, level1 adjustment is - * increased, the deltas stay the same. - */ - int shift_inc = (increase_denoising && - motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD_UV) ? 1 : 0; - const uint8x16_t v_level1_adjustment = vmovq_n_u8( - (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD_UV) ? 4 + shift_inc : 3); - - const uint8x16_t v_delta_level_1_and_2 = vdupq_n_u8(1); - const uint8x16_t v_delta_level_2_and_3 = vdupq_n_u8(2); - const uint8x16_t v_level1_threshold = vmovq_n_u8(4 + shift_inc); - const uint8x16_t v_level2_threshold = vdupq_n_u8(8); - const uint8x16_t v_level3_threshold = vdupq_n_u8(16); - int64x2_t v_sum_diff_total = vdupq_n_s64(0); - int r; - - { - uint16x4_t v_sum_block = vdup_n_u16(0); - - // Avoid denoising color signal if its close to average level. - for (r = 0; r < 8; ++r) { - const uint8x8_t v_sig = vld1_u8(sig); - const uint16x4_t _76_54_32_10 = vpaddl_u8(v_sig); - v_sum_block = vqadd_u16(v_sum_block, _76_54_32_10); - sig += sig_stride; - } - sig -= sig_stride * 8; - { - const uint32x2_t _7654_3210 = vpaddl_u16(v_sum_block); - const uint64x1_t _76543210 = vpaddl_u32(_7654_3210); - const int sum_block = - vget_lane_s32(vreinterpret_s32_u64(_76543210), 0); - if (abs(sum_block - (128 * 8 * 8)) < SUM_DIFF_FROM_AVG_THRESH_UV) { - return COPY_BLOCK; - } - } - } - - /* Go over lines. */ - for (r = 0; r < 4; ++r) { - /* Load inputs. */ - const uint8x8_t v_sig_lo = vld1_u8(sig); - const uint8x8_t v_sig_hi = vld1_u8(&sig[sig_stride]); - const uint8x16_t v_sig = vcombine_u8(v_sig_lo, v_sig_hi); - const uint8x8_t v_mc_running_avg_lo = vld1_u8(mc_running_avg); - const uint8x8_t v_mc_running_avg_hi = - vld1_u8(&mc_running_avg[mc_running_avg_stride]); - const uint8x16_t v_mc_running_avg = - vcombine_u8(v_mc_running_avg_lo, v_mc_running_avg_hi); - /* Calculate absolute difference and sign masks. */ - const uint8x16_t v_abs_diff = vabdq_u8(v_sig, v_mc_running_avg); - const uint8x16_t v_diff_pos_mask = vcltq_u8(v_sig, v_mc_running_avg); - const uint8x16_t v_diff_neg_mask = vcgtq_u8(v_sig, v_mc_running_avg); - - /* Figure out which level that put us in. */ - const uint8x16_t v_level1_mask = vcleq_u8(v_level1_threshold, - v_abs_diff); - const uint8x16_t v_level2_mask = vcleq_u8(v_level2_threshold, - v_abs_diff); - const uint8x16_t v_level3_mask = vcleq_u8(v_level3_threshold, - v_abs_diff); - - /* Calculate absolute adjustments for level 1, 2 and 3. */ - const uint8x16_t v_level2_adjustment = vandq_u8(v_level2_mask, - v_delta_level_1_and_2); - const uint8x16_t v_level3_adjustment = vandq_u8(v_level3_mask, - v_delta_level_2_and_3); - const uint8x16_t v_level1and2_adjustment = vaddq_u8(v_level1_adjustment, - v_level2_adjustment); - const uint8x16_t v_level1and2and3_adjustment = vaddq_u8( - v_level1and2_adjustment, v_level3_adjustment); - - /* Figure adjustment absolute value by selecting between the absolute - * difference if in level0 or the value for level 1, 2 and 3. - */ - const uint8x16_t v_abs_adjustment = vbslq_u8(v_level1_mask, - v_level1and2and3_adjustment, v_abs_diff); - - /* Calculate positive and negative adjustments. Apply them to the signal - * and accumulate them. Adjustments are less than eight and the maximum - * sum of them (7 * 16) can fit in a signed char. - */ - const uint8x16_t v_pos_adjustment = vandq_u8(v_diff_pos_mask, - v_abs_adjustment); - const uint8x16_t v_neg_adjustment = vandq_u8(v_diff_neg_mask, - v_abs_adjustment); - - uint8x16_t v_running_avg = vqaddq_u8(v_sig, v_pos_adjustment); - v_running_avg = vqsubq_u8(v_running_avg, v_neg_adjustment); - - /* Store results. */ - vst1_u8(running_avg, vget_low_u8(v_running_avg)); - vst1_u8(&running_avg[running_avg_stride], vget_high_u8(v_running_avg)); - - /* Sum all the accumulators to have the sum of all pixel differences - * for this macroblock. - */ - { - const int8x16_t v_sum_diff = - vqsubq_s8(vreinterpretq_s8_u8(v_pos_adjustment), - vreinterpretq_s8_u8(v_neg_adjustment)); - - const int16x8_t fe_dc_ba_98_76_54_32_10 = vpaddlq_s8(v_sum_diff); - - const int32x4_t fedc_ba98_7654_3210 = - vpaddlq_s16(fe_dc_ba_98_76_54_32_10); - - const int64x2_t fedcba98_76543210 = - vpaddlq_s32(fedc_ba98_7654_3210); - - v_sum_diff_total = vqaddq_s64(v_sum_diff_total, fedcba98_76543210); - } - - /* Update pointers for next iteration. */ - sig += sig_stride * 2; - mc_running_avg += mc_running_avg_stride * 2; - running_avg += running_avg_stride * 2; - } - - - /* Too much adjustments => copy block. */ - { - int64x1_t x = vqadd_s64(vget_high_s64(v_sum_diff_total), - vget_low_s64(v_sum_diff_total)); - int sum_diff = vget_lane_s32(vabs_s32(vreinterpret_s32_s64(x)), 0); - int sum_diff_thresh = SUM_DIFF_THRESHOLD_UV; - if (increase_denoising) sum_diff_thresh = SUM_DIFF_THRESHOLD_HIGH_UV; - if (sum_diff > sum_diff_thresh) { - // Before returning to copy the block (i.e., apply no denoising), - // checK if we can still apply some (weaker) temporal filtering to - // this block, that would otherwise not be denoised at all. Simplest - // is to apply an additional adjustment to running_avg_y to bring it - // closer to sig. The adjustment is capped by a maximum delta, and - // chosen such that in most cases the resulting sum_diff will be - // within the accceptable range given by sum_diff_thresh. - - // The delta is set by the excess of absolute pixel diff over the - // threshold. - int delta = ((sum_diff - sum_diff_thresh) >> 8) + 1; - // Only apply the adjustment for max delta up to 3. - if (delta < 4) { - const uint8x16_t k_delta = vmovq_n_u8(delta); - sig -= sig_stride * 8; - mc_running_avg -= mc_running_avg_stride * 8; - running_avg -= running_avg_stride * 8; - for (r = 0; r < 4; ++r) { - const uint8x8_t v_sig_lo = vld1_u8(sig); - const uint8x8_t v_sig_hi = vld1_u8(&sig[sig_stride]); - const uint8x16_t v_sig = vcombine_u8(v_sig_lo, v_sig_hi); - const uint8x8_t v_mc_running_avg_lo = vld1_u8(mc_running_avg); - const uint8x8_t v_mc_running_avg_hi = - vld1_u8(&mc_running_avg[mc_running_avg_stride]); - const uint8x16_t v_mc_running_avg = - vcombine_u8(v_mc_running_avg_lo, v_mc_running_avg_hi); - /* Calculate absolute difference and sign masks. */ - const uint8x16_t v_abs_diff = vabdq_u8(v_sig, - v_mc_running_avg); - const uint8x16_t v_diff_pos_mask = vcltq_u8(v_sig, - v_mc_running_avg); - const uint8x16_t v_diff_neg_mask = vcgtq_u8(v_sig, - v_mc_running_avg); - // Clamp absolute difference to delta to get the adjustment. - const uint8x16_t v_abs_adjustment = - vminq_u8(v_abs_diff, (k_delta)); - - const uint8x16_t v_pos_adjustment = vandq_u8(v_diff_pos_mask, - v_abs_adjustment); - const uint8x16_t v_neg_adjustment = vandq_u8(v_diff_neg_mask, - v_abs_adjustment); - const uint8x8_t v_running_avg_lo = vld1_u8(running_avg); - const uint8x8_t v_running_avg_hi = - vld1_u8(&running_avg[running_avg_stride]); - uint8x16_t v_running_avg = - vcombine_u8(v_running_avg_lo, v_running_avg_hi); - - v_running_avg = vqsubq_u8(v_running_avg, v_pos_adjustment); - v_running_avg = vqaddq_u8(v_running_avg, v_neg_adjustment); - - /* Store results. */ - vst1_u8(running_avg, vget_low_u8(v_running_avg)); - vst1_u8(&running_avg[running_avg_stride], - vget_high_u8(v_running_avg)); - - { - const int8x16_t v_sum_diff = - vqsubq_s8(vreinterpretq_s8_u8(v_neg_adjustment), - vreinterpretq_s8_u8(v_pos_adjustment)); - - const int16x8_t fe_dc_ba_98_76_54_32_10 = - vpaddlq_s8(v_sum_diff); - const int32x4_t fedc_ba98_7654_3210 = - vpaddlq_s16(fe_dc_ba_98_76_54_32_10); - const int64x2_t fedcba98_76543210 = - vpaddlq_s32(fedc_ba98_7654_3210); - - v_sum_diff_total = vqaddq_s64(v_sum_diff_total, - fedcba98_76543210); - } - /* Update pointers for next iteration. */ - sig += sig_stride * 2; - mc_running_avg += mc_running_avg_stride * 2; - running_avg += running_avg_stride * 2; - } - { - // Update the sum of all pixel differences of this MB. - x = vqadd_s64(vget_high_s64(v_sum_diff_total), - vget_low_s64(v_sum_diff_total)); - sum_diff = vget_lane_s32(vabs_s32(vreinterpret_s32_s64(x)), 0); - - if (sum_diff > sum_diff_thresh) { - return COPY_BLOCK; - } - } - } else { - return COPY_BLOCK; - } - } - } - - /* Tell above level that block was filtered. */ - running_avg -= running_avg_stride * 8; - sig -= sig_stride * 8; - - vp8_copy_mem8x8(running_avg, running_avg_stride, sig, sig_stride); - - return FILTER_BLOCK; -} diff --git a/media/libvpx/vp8/encoder/arm/neon/shortfdct_neon.c b/media/libvpx/vp8/encoder/arm/neon/shortfdct_neon.c deleted file mode 100644 index 391e5f99070..00000000000 --- a/media/libvpx/vp8/encoder/arm/neon/shortfdct_neon.c +++ /dev/null @@ -1,269 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include - -void vp8_short_fdct4x4_neon( - int16_t *input, - int16_t *output, - int pitch) { - int16x4_t d0s16, d1s16, d2s16, d3s16, d4s16, d5s16, d6s16, d7s16; - int16x4_t d16s16, d17s16, d26s16, dEmptys16; - uint16x4_t d4u16; - int16x8_t q0s16, q1s16; - int32x4_t q9s32, q10s32, q11s32, q12s32; - int16x4x2_t v2tmp0, v2tmp1; - int32x2x2_t v2tmp2, v2tmp3; - - d16s16 = vdup_n_s16(5352); - d17s16 = vdup_n_s16(2217); - q9s32 = vdupq_n_s32(14500); - q10s32 = vdupq_n_s32(7500); - q11s32 = vdupq_n_s32(12000); - q12s32 = vdupq_n_s32(51000); - - // Part one - pitch >>= 1; - d0s16 = vld1_s16(input); - input += pitch; - d1s16 = vld1_s16(input); - input += pitch; - d2s16 = vld1_s16(input); - input += pitch; - d3s16 = vld1_s16(input); - - v2tmp2 = vtrn_s32(vreinterpret_s32_s16(d0s16), - vreinterpret_s32_s16(d2s16)); - v2tmp3 = vtrn_s32(vreinterpret_s32_s16(d1s16), - vreinterpret_s32_s16(d3s16)); - v2tmp0 = vtrn_s16(vreinterpret_s16_s32(v2tmp2.val[0]), // d0 - vreinterpret_s16_s32(v2tmp3.val[0])); // d1 - v2tmp1 = vtrn_s16(vreinterpret_s16_s32(v2tmp2.val[1]), // d2 - vreinterpret_s16_s32(v2tmp3.val[1])); // d3 - - d4s16 = vadd_s16(v2tmp0.val[0], v2tmp1.val[1]); - d5s16 = vadd_s16(v2tmp0.val[1], v2tmp1.val[0]); - d6s16 = vsub_s16(v2tmp0.val[1], v2tmp1.val[0]); - d7s16 = vsub_s16(v2tmp0.val[0], v2tmp1.val[1]); - - d4s16 = vshl_n_s16(d4s16, 3); - d5s16 = vshl_n_s16(d5s16, 3); - d6s16 = vshl_n_s16(d6s16, 3); - d7s16 = vshl_n_s16(d7s16, 3); - - d0s16 = vadd_s16(d4s16, d5s16); - d2s16 = vsub_s16(d4s16, d5s16); - - q9s32 = vmlal_s16(q9s32, d7s16, d16s16); - q10s32 = vmlal_s16(q10s32, d7s16, d17s16); - q9s32 = vmlal_s16(q9s32, d6s16, d17s16); - q10s32 = vmlsl_s16(q10s32, d6s16, d16s16); - - d1s16 = vshrn_n_s32(q9s32, 12); - d3s16 = vshrn_n_s32(q10s32, 12); - - // Part two - v2tmp2 = vtrn_s32(vreinterpret_s32_s16(d0s16), - vreinterpret_s32_s16(d2s16)); - v2tmp3 = vtrn_s32(vreinterpret_s32_s16(d1s16), - vreinterpret_s32_s16(d3s16)); - v2tmp0 = vtrn_s16(vreinterpret_s16_s32(v2tmp2.val[0]), // d0 - vreinterpret_s16_s32(v2tmp3.val[0])); // d1 - v2tmp1 = vtrn_s16(vreinterpret_s16_s32(v2tmp2.val[1]), // d2 - vreinterpret_s16_s32(v2tmp3.val[1])); // d3 - - d4s16 = vadd_s16(v2tmp0.val[0], v2tmp1.val[1]); - d5s16 = vadd_s16(v2tmp0.val[1], v2tmp1.val[0]); - d6s16 = vsub_s16(v2tmp0.val[1], v2tmp1.val[0]); - d7s16 = vsub_s16(v2tmp0.val[0], v2tmp1.val[1]); - - d26s16 = vdup_n_s16(7); - d4s16 = vadd_s16(d4s16, d26s16); - - d0s16 = vadd_s16(d4s16, d5s16); - d2s16 = vsub_s16(d4s16, d5s16); - - q11s32 = vmlal_s16(q11s32, d7s16, d16s16); - q12s32 = vmlal_s16(q12s32, d7s16, d17s16); - - dEmptys16 = vdup_n_s16(0); - d4u16 = vceq_s16(d7s16, dEmptys16); - - d0s16 = vshr_n_s16(d0s16, 4); - d2s16 = vshr_n_s16(d2s16, 4); - - q11s32 = vmlal_s16(q11s32, d6s16, d17s16); - q12s32 = vmlsl_s16(q12s32, d6s16, d16s16); - - d4u16 = vmvn_u16(d4u16); - d1s16 = vshrn_n_s32(q11s32, 16); - d1s16 = vsub_s16(d1s16, vreinterpret_s16_u16(d4u16)); - d3s16 = vshrn_n_s32(q12s32, 16); - - q0s16 = vcombine_s16(d0s16, d1s16); - q1s16 = vcombine_s16(d2s16, d3s16); - - vst1q_s16(output, q0s16); - vst1q_s16(output + 8, q1s16); - return; -} - -void vp8_short_fdct8x4_neon( - int16_t *input, - int16_t *output, - int pitch) { - int16x4_t d0s16, d1s16, d2s16, d3s16, d4s16, d5s16, d6s16, d7s16; - int16x4_t d16s16, d17s16, d26s16, d27s16, d28s16, d29s16; - uint16x4_t d28u16, d29u16; - uint16x8_t q14u16; - int16x8_t q0s16, q1s16, q2s16, q3s16; - int16x8_t q11s16, q12s16, q13s16, q14s16, q15s16, qEmptys16; - int32x4_t q9s32, q10s32, q11s32, q12s32; - int16x8x2_t v2tmp0, v2tmp1; - int32x4x2_t v2tmp2, v2tmp3; - - d16s16 = vdup_n_s16(5352); - d17s16 = vdup_n_s16(2217); - q9s32 = vdupq_n_s32(14500); - q10s32 = vdupq_n_s32(7500); - - // Part one - pitch >>= 1; - q0s16 = vld1q_s16(input); - input += pitch; - q1s16 = vld1q_s16(input); - input += pitch; - q2s16 = vld1q_s16(input); - input += pitch; - q3s16 = vld1q_s16(input); - - v2tmp2 = vtrnq_s32(vreinterpretq_s32_s16(q0s16), - vreinterpretq_s32_s16(q2s16)); - v2tmp3 = vtrnq_s32(vreinterpretq_s32_s16(q1s16), - vreinterpretq_s32_s16(q3s16)); - v2tmp0 = vtrnq_s16(vreinterpretq_s16_s32(v2tmp2.val[0]), // q0 - vreinterpretq_s16_s32(v2tmp3.val[0])); // q1 - v2tmp1 = vtrnq_s16(vreinterpretq_s16_s32(v2tmp2.val[1]), // q2 - vreinterpretq_s16_s32(v2tmp3.val[1])); // q3 - - q11s16 = vaddq_s16(v2tmp0.val[0], v2tmp1.val[1]); - q12s16 = vaddq_s16(v2tmp0.val[1], v2tmp1.val[0]); - q13s16 = vsubq_s16(v2tmp0.val[1], v2tmp1.val[0]); - q14s16 = vsubq_s16(v2tmp0.val[0], v2tmp1.val[1]); - - q11s16 = vshlq_n_s16(q11s16, 3); - q12s16 = vshlq_n_s16(q12s16, 3); - q13s16 = vshlq_n_s16(q13s16, 3); - q14s16 = vshlq_n_s16(q14s16, 3); - - q0s16 = vaddq_s16(q11s16, q12s16); - q2s16 = vsubq_s16(q11s16, q12s16); - - q11s32 = q9s32; - q12s32 = q10s32; - - d26s16 = vget_low_s16(q13s16); - d27s16 = vget_high_s16(q13s16); - d28s16 = vget_low_s16(q14s16); - d29s16 = vget_high_s16(q14s16); - - q9s32 = vmlal_s16(q9s32, d28s16, d16s16); - q10s32 = vmlal_s16(q10s32, d28s16, d17s16); - q11s32 = vmlal_s16(q11s32, d29s16, d16s16); - q12s32 = vmlal_s16(q12s32, d29s16, d17s16); - - q9s32 = vmlal_s16(q9s32, d26s16, d17s16); - q10s32 = vmlsl_s16(q10s32, d26s16, d16s16); - q11s32 = vmlal_s16(q11s32, d27s16, d17s16); - q12s32 = vmlsl_s16(q12s32, d27s16, d16s16); - - d2s16 = vshrn_n_s32(q9s32, 12); - d6s16 = vshrn_n_s32(q10s32, 12); - d3s16 = vshrn_n_s32(q11s32, 12); - d7s16 = vshrn_n_s32(q12s32, 12); - q1s16 = vcombine_s16(d2s16, d3s16); - q3s16 = vcombine_s16(d6s16, d7s16); - - // Part two - q9s32 = vdupq_n_s32(12000); - q10s32 = vdupq_n_s32(51000); - - v2tmp2 = vtrnq_s32(vreinterpretq_s32_s16(q0s16), - vreinterpretq_s32_s16(q2s16)); - v2tmp3 = vtrnq_s32(vreinterpretq_s32_s16(q1s16), - vreinterpretq_s32_s16(q3s16)); - v2tmp0 = vtrnq_s16(vreinterpretq_s16_s32(v2tmp2.val[0]), // q0 - vreinterpretq_s16_s32(v2tmp3.val[0])); // q1 - v2tmp1 = vtrnq_s16(vreinterpretq_s16_s32(v2tmp2.val[1]), // q2 - vreinterpretq_s16_s32(v2tmp3.val[1])); // q3 - - q11s16 = vaddq_s16(v2tmp0.val[0], v2tmp1.val[1]); - q12s16 = vaddq_s16(v2tmp0.val[1], v2tmp1.val[0]); - q13s16 = vsubq_s16(v2tmp0.val[1], v2tmp1.val[0]); - q14s16 = vsubq_s16(v2tmp0.val[0], v2tmp1.val[1]); - - q15s16 = vdupq_n_s16(7); - q11s16 = vaddq_s16(q11s16, q15s16); - q0s16 = vaddq_s16(q11s16, q12s16); - q1s16 = vsubq_s16(q11s16, q12s16); - - q11s32 = q9s32; - q12s32 = q10s32; - - d0s16 = vget_low_s16(q0s16); - d1s16 = vget_high_s16(q0s16); - d2s16 = vget_low_s16(q1s16); - d3s16 = vget_high_s16(q1s16); - - d0s16 = vshr_n_s16(d0s16, 4); - d4s16 = vshr_n_s16(d1s16, 4); - d2s16 = vshr_n_s16(d2s16, 4); - d6s16 = vshr_n_s16(d3s16, 4); - - d26s16 = vget_low_s16(q13s16); - d27s16 = vget_high_s16(q13s16); - d28s16 = vget_low_s16(q14s16); - d29s16 = vget_high_s16(q14s16); - - q9s32 = vmlal_s16(q9s32, d28s16, d16s16); - q10s32 = vmlal_s16(q10s32, d28s16, d17s16); - q11s32 = vmlal_s16(q11s32, d29s16, d16s16); - q12s32 = vmlal_s16(q12s32, d29s16, d17s16); - - q9s32 = vmlal_s16(q9s32, d26s16, d17s16); - q10s32 = vmlsl_s16(q10s32, d26s16, d16s16); - q11s32 = vmlal_s16(q11s32, d27s16, d17s16); - q12s32 = vmlsl_s16(q12s32, d27s16, d16s16); - - d1s16 = vshrn_n_s32(q9s32, 16); - d3s16 = vshrn_n_s32(q10s32, 16); - d5s16 = vshrn_n_s32(q11s32, 16); - d7s16 = vshrn_n_s32(q12s32, 16); - - qEmptys16 = vdupq_n_s16(0); - q14u16 = vceqq_s16(q14s16, qEmptys16); - q14u16 = vmvnq_u16(q14u16); - - d28u16 = vget_low_u16(q14u16); - d29u16 = vget_high_u16(q14u16); - d1s16 = vsub_s16(d1s16, vreinterpret_s16_u16(d28u16)); - d5s16 = vsub_s16(d5s16, vreinterpret_s16_u16(d29u16)); - - q0s16 = vcombine_s16(d0s16, d1s16); - q1s16 = vcombine_s16(d2s16, d3s16); - q2s16 = vcombine_s16(d4s16, d5s16); - q3s16 = vcombine_s16(d6s16, d7s16); - - vst1q_s16(output, q0s16); - vst1q_s16(output + 8, q1s16); - vst1q_s16(output + 16, q2s16); - vst1q_s16(output + 24, q3s16); - return; -} diff --git a/media/libvpx/vp8/encoder/arm/neon/subtract_neon.c b/media/libvpx/vp8/encoder/arm/neon/subtract_neon.c deleted file mode 100644 index d3ab7b16553..00000000000 --- a/media/libvpx/vp8/encoder/arm/neon/subtract_neon.c +++ /dev/null @@ -1,154 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include -#include "vp8/encoder/block.h" - -void vp8_subtract_b_neon( - BLOCK *be, - BLOCKD *bd, - int pitch) { - unsigned char *src_ptr, *predictor; - int src_stride; - int16_t *src_diff; - uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8; - uint16x8_t q10u16, q11u16, q12u16, q13u16; - - src_ptr = *be->base_src + be->src; - src_stride = be->src_stride; - predictor = bd->predictor; - - d0u8 = vld1_u8(src_ptr); - src_ptr += src_stride; - d2u8 = vld1_u8(src_ptr); - src_ptr += src_stride; - d4u8 = vld1_u8(src_ptr); - src_ptr += src_stride; - d6u8 = vld1_u8(src_ptr); - - d1u8 = vld1_u8(predictor); - predictor += pitch; - d3u8 = vld1_u8(predictor); - predictor += pitch; - d5u8 = vld1_u8(predictor); - predictor += pitch; - d7u8 = vld1_u8(predictor); - - q10u16 = vsubl_u8(d0u8, d1u8); - q11u16 = vsubl_u8(d2u8, d3u8); - q12u16 = vsubl_u8(d4u8, d5u8); - q13u16 = vsubl_u8(d6u8, d7u8); - - src_diff = be->src_diff; - vst1_u16((uint16_t *)src_diff, vget_low_u16(q10u16)); - src_diff += pitch; - vst1_u16((uint16_t *)src_diff, vget_low_u16(q11u16)); - src_diff += pitch; - vst1_u16((uint16_t *)src_diff, vget_low_u16(q12u16)); - src_diff += pitch; - vst1_u16((uint16_t *)src_diff, vget_low_u16(q13u16)); - return; -} - -void vp8_subtract_mby_neon( - int16_t *diff, - unsigned char *src, - int src_stride, - unsigned char *pred, - int pred_stride) { - int i; - uint8x16_t q0u8, q1u8, q2u8, q3u8; - uint16x8_t q8u16, q9u16, q10u16, q11u16; - - for (i = 0; i < 8; i++) { // subtract_mby_loop - q0u8 = vld1q_u8(src); - src += src_stride; - q2u8 = vld1q_u8(src); - src += src_stride; - q1u8 = vld1q_u8(pred); - pred += pred_stride; - q3u8 = vld1q_u8(pred); - pred += pred_stride; - - q8u16 = vsubl_u8(vget_low_u8(q0u8), vget_low_u8(q1u8)); - q9u16 = vsubl_u8(vget_high_u8(q0u8), vget_high_u8(q1u8)); - q10u16 = vsubl_u8(vget_low_u8(q2u8), vget_low_u8(q3u8)); - q11u16 = vsubl_u8(vget_high_u8(q2u8), vget_high_u8(q3u8)); - - vst1q_u16((uint16_t *)diff, q8u16); - diff += 8; - vst1q_u16((uint16_t *)diff, q9u16); - diff += 8; - vst1q_u16((uint16_t *)diff, q10u16); - diff += 8; - vst1q_u16((uint16_t *)diff, q11u16); - diff += 8; - } - return; -} - -void vp8_subtract_mbuv_neon( - int16_t *diff, - unsigned char *usrc, - unsigned char *vsrc, - int src_stride, - unsigned char *upred, - unsigned char *vpred, - int pred_stride) { - int i, j; - unsigned char *src_ptr, *pred_ptr; - uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8; - uint16x8_t q8u16, q9u16, q10u16, q11u16; - - diff += 256; - for (i = 0; i < 2; i++) { - if (i == 0) { - src_ptr = usrc; - pred_ptr = upred; - } else if (i == 1) { - src_ptr = vsrc; - pred_ptr = vpred; - } - - for (j = 0; j < 2; j++) { - d0u8 = vld1_u8(src_ptr); - src_ptr += src_stride; - d1u8 = vld1_u8(pred_ptr); - pred_ptr += pred_stride; - d2u8 = vld1_u8(src_ptr); - src_ptr += src_stride; - d3u8 = vld1_u8(pred_ptr); - pred_ptr += pred_stride; - d4u8 = vld1_u8(src_ptr); - src_ptr += src_stride; - d5u8 = vld1_u8(pred_ptr); - pred_ptr += pred_stride; - d6u8 = vld1_u8(src_ptr); - src_ptr += src_stride; - d7u8 = vld1_u8(pred_ptr); - pred_ptr += pred_stride; - - q8u16 = vsubl_u8(d0u8, d1u8); - q9u16 = vsubl_u8(d2u8, d3u8); - q10u16 = vsubl_u8(d4u8, d5u8); - q11u16 = vsubl_u8(d6u8, d7u8); - - vst1q_u16((uint16_t *)diff, q8u16); - diff += 8; - vst1q_u16((uint16_t *)diff, q9u16); - diff += 8; - vst1q_u16((uint16_t *)diff, q10u16); - diff += 8; - vst1q_u16((uint16_t *)diff, q11u16); - diff += 8; - } - } - return; -} diff --git a/media/libvpx/vp8/encoder/arm/neon/vp8_mse16x16_neon.c b/media/libvpx/vp8/encoder/arm/neon/vp8_mse16x16_neon.c deleted file mode 100644 index f806809df5b..00000000000 --- a/media/libvpx/vp8/encoder/arm/neon/vp8_mse16x16_neon.c +++ /dev/null @@ -1,131 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include - -unsigned int vp8_mse16x16_neon( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) { - int i; - int16x4_t d22s16, d23s16, d24s16, d25s16, d26s16, d27s16, d28s16, d29s16; - int64x1_t d0s64; - uint8x16_t q0u8, q1u8, q2u8, q3u8; - int32x4_t q7s32, q8s32, q9s32, q10s32; - uint16x8_t q11u16, q12u16, q13u16, q14u16; - int64x2_t q1s64; - - q7s32 = vdupq_n_s32(0); - q8s32 = vdupq_n_s32(0); - q9s32 = vdupq_n_s32(0); - q10s32 = vdupq_n_s32(0); - - for (i = 0; i < 8; i++) { // mse16x16_neon_loop - q0u8 = vld1q_u8(src_ptr); - src_ptr += source_stride; - q1u8 = vld1q_u8(src_ptr); - src_ptr += source_stride; - q2u8 = vld1q_u8(ref_ptr); - ref_ptr += recon_stride; - q3u8 = vld1q_u8(ref_ptr); - ref_ptr += recon_stride; - - q11u16 = vsubl_u8(vget_low_u8(q0u8), vget_low_u8(q2u8)); - q12u16 = vsubl_u8(vget_high_u8(q0u8), vget_high_u8(q2u8)); - q13u16 = vsubl_u8(vget_low_u8(q1u8), vget_low_u8(q3u8)); - q14u16 = vsubl_u8(vget_high_u8(q1u8), vget_high_u8(q3u8)); - - d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16)); - d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16)); - q7s32 = vmlal_s16(q7s32, d22s16, d22s16); - q8s32 = vmlal_s16(q8s32, d23s16, d23s16); - - d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16)); - d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16)); - q9s32 = vmlal_s16(q9s32, d24s16, d24s16); - q10s32 = vmlal_s16(q10s32, d25s16, d25s16); - - d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16)); - d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16)); - q7s32 = vmlal_s16(q7s32, d26s16, d26s16); - q8s32 = vmlal_s16(q8s32, d27s16, d27s16); - - d28s16 = vreinterpret_s16_u16(vget_low_u16(q14u16)); - d29s16 = vreinterpret_s16_u16(vget_high_u16(q14u16)); - q9s32 = vmlal_s16(q9s32, d28s16, d28s16); - q10s32 = vmlal_s16(q10s32, d29s16, d29s16); - } - - q7s32 = vaddq_s32(q7s32, q8s32); - q9s32 = vaddq_s32(q9s32, q10s32); - q10s32 = vaddq_s32(q7s32, q9s32); - - q1s64 = vpaddlq_s32(q10s32); - d0s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64)); - - vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d0s64), 0); - return vget_lane_u32(vreinterpret_u32_s64(d0s64), 0); -} - -unsigned int vp8_get4x4sse_cs_neon( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride) { - int16x4_t d22s16, d24s16, d26s16, d28s16; - int64x1_t d0s64; - uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8; - int32x4_t q7s32, q8s32, q9s32, q10s32; - uint16x8_t q11u16, q12u16, q13u16, q14u16; - int64x2_t q1s64; - - d0u8 = vld1_u8(src_ptr); - src_ptr += source_stride; - d4u8 = vld1_u8(ref_ptr); - ref_ptr += recon_stride; - d1u8 = vld1_u8(src_ptr); - src_ptr += source_stride; - d5u8 = vld1_u8(ref_ptr); - ref_ptr += recon_stride; - d2u8 = vld1_u8(src_ptr); - src_ptr += source_stride; - d6u8 = vld1_u8(ref_ptr); - ref_ptr += recon_stride; - d3u8 = vld1_u8(src_ptr); - src_ptr += source_stride; - d7u8 = vld1_u8(ref_ptr); - ref_ptr += recon_stride; - - q11u16 = vsubl_u8(d0u8, d4u8); - q12u16 = vsubl_u8(d1u8, d5u8); - q13u16 = vsubl_u8(d2u8, d6u8); - q14u16 = vsubl_u8(d3u8, d7u8); - - d22s16 = vget_low_s16(vreinterpretq_s16_u16(q11u16)); - d24s16 = vget_low_s16(vreinterpretq_s16_u16(q12u16)); - d26s16 = vget_low_s16(vreinterpretq_s16_u16(q13u16)); - d28s16 = vget_low_s16(vreinterpretq_s16_u16(q14u16)); - - q7s32 = vmull_s16(d22s16, d22s16); - q8s32 = vmull_s16(d24s16, d24s16); - q9s32 = vmull_s16(d26s16, d26s16); - q10s32 = vmull_s16(d28s16, d28s16); - - q7s32 = vaddq_s32(q7s32, q8s32); - q9s32 = vaddq_s32(q9s32, q10s32); - q9s32 = vaddq_s32(q7s32, q9s32); - - q1s64 = vpaddlq_s32(q9s32); - d0s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64)); - - return vget_lane_u32(vreinterpret_u32_s64(d0s64), 0); -} diff --git a/media/libvpx/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.c b/media/libvpx/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.c deleted file mode 100644 index d6b67f89503..00000000000 --- a/media/libvpx/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.c +++ /dev/null @@ -1,118 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include - -void vp8_short_walsh4x4_neon( - int16_t *input, - int16_t *output, - int pitch) { - uint16x4_t d16u16; - int16x8_t q0s16, q1s16; - int16x4_t dEmptys16, d0s16, d1s16, d2s16, d3s16, d4s16, d5s16, d6s16, d7s16; - int32x4_t qEmptys32, q0s32, q1s32, q2s32, q3s32, q8s32; - int32x4_t q9s32, q10s32, q11s32, q15s32; - uint32x4_t q8u32, q9u32, q10u32, q11u32; - int16x4x2_t v2tmp0, v2tmp1; - int32x2x2_t v2tmp2, v2tmp3; - - dEmptys16 = vdup_n_s16(0); - qEmptys32 = vdupq_n_s32(0); - q15s32 = vdupq_n_s32(3); - - d0s16 = vld1_s16(input); - input += pitch/2; - d1s16 = vld1_s16(input); - input += pitch/2; - d2s16 = vld1_s16(input); - input += pitch/2; - d3s16 = vld1_s16(input); - - v2tmp2 = vtrn_s32(vreinterpret_s32_s16(d0s16), - vreinterpret_s32_s16(d2s16)); - v2tmp3 = vtrn_s32(vreinterpret_s32_s16(d1s16), - vreinterpret_s32_s16(d3s16)); - v2tmp0 = vtrn_s16(vreinterpret_s16_s32(v2tmp2.val[0]), // d0 - vreinterpret_s16_s32(v2tmp3.val[0])); // d1 - v2tmp1 = vtrn_s16(vreinterpret_s16_s32(v2tmp2.val[1]), // d2 - vreinterpret_s16_s32(v2tmp3.val[1])); // d3 - - d4s16 = vadd_s16(v2tmp0.val[0], v2tmp1.val[0]); - d5s16 = vadd_s16(v2tmp0.val[1], v2tmp1.val[1]); - d6s16 = vsub_s16(v2tmp0.val[1], v2tmp1.val[1]); - d7s16 = vsub_s16(v2tmp0.val[0], v2tmp1.val[0]); - - d4s16 = vshl_n_s16(d4s16, 2); - d5s16 = vshl_n_s16(d5s16, 2); - d6s16 = vshl_n_s16(d6s16, 2); - d7s16 = vshl_n_s16(d7s16, 2); - - d16u16 = vceq_s16(d4s16, dEmptys16); - d16u16 = vmvn_u16(d16u16); - - d0s16 = vadd_s16(d4s16, d5s16); - d3s16 = vsub_s16(d4s16, d5s16); - d1s16 = vadd_s16(d7s16, d6s16); - d2s16 = vsub_s16(d7s16, d6s16); - - d0s16 = vsub_s16(d0s16, vreinterpret_s16_u16(d16u16)); - - // Second for-loop - v2tmp2 = vtrn_s32(vreinterpret_s32_s16(d1s16), - vreinterpret_s32_s16(d3s16)); - v2tmp3 = vtrn_s32(vreinterpret_s32_s16(d0s16), - vreinterpret_s32_s16(d2s16)); - v2tmp0 = vtrn_s16(vreinterpret_s16_s32(v2tmp3.val[1]), // d2 - vreinterpret_s16_s32(v2tmp2.val[1])); // d3 - v2tmp1 = vtrn_s16(vreinterpret_s16_s32(v2tmp3.val[0]), // d0 - vreinterpret_s16_s32(v2tmp2.val[0])); // d1 - - q8s32 = vaddl_s16(v2tmp1.val[0], v2tmp0.val[0]); - q9s32 = vaddl_s16(v2tmp1.val[1], v2tmp0.val[1]); - q10s32 = vsubl_s16(v2tmp1.val[1], v2tmp0.val[1]); - q11s32 = vsubl_s16(v2tmp1.val[0], v2tmp0.val[0]); - - q0s32 = vaddq_s32(q8s32, q9s32); - q1s32 = vaddq_s32(q11s32, q10s32); - q2s32 = vsubq_s32(q11s32, q10s32); - q3s32 = vsubq_s32(q8s32, q9s32); - - q8u32 = vcltq_s32(q0s32, qEmptys32); - q9u32 = vcltq_s32(q1s32, qEmptys32); - q10u32 = vcltq_s32(q2s32, qEmptys32); - q11u32 = vcltq_s32(q3s32, qEmptys32); - - q8s32 = vreinterpretq_s32_u32(q8u32); - q9s32 = vreinterpretq_s32_u32(q9u32); - q10s32 = vreinterpretq_s32_u32(q10u32); - q11s32 = vreinterpretq_s32_u32(q11u32); - - q0s32 = vsubq_s32(q0s32, q8s32); - q1s32 = vsubq_s32(q1s32, q9s32); - q2s32 = vsubq_s32(q2s32, q10s32); - q3s32 = vsubq_s32(q3s32, q11s32); - - q8s32 = vaddq_s32(q0s32, q15s32); - q9s32 = vaddq_s32(q1s32, q15s32); - q10s32 = vaddq_s32(q2s32, q15s32); - q11s32 = vaddq_s32(q3s32, q15s32); - - d0s16 = vshrn_n_s32(q8s32, 3); - d1s16 = vshrn_n_s32(q9s32, 3); - d2s16 = vshrn_n_s32(q10s32, 3); - d3s16 = vshrn_n_s32(q11s32, 3); - - q0s16 = vcombine_s16(d0s16, d1s16); - q1s16 = vcombine_s16(d2s16, d3s16); - - vst1q_s16(output, q0s16); - vst1q_s16(output + 8, q1s16); - return; -} diff --git a/media/libvpx/vp8/encoder/x86/quantize_sse4.c b/media/libvpx/vp8/encoder/x86/quantize_sse4.c deleted file mode 100644 index 601dd23a2f7..00000000000 --- a/media/libvpx/vp8/encoder/x86/quantize_sse4.c +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Copyright (c) 2012 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include /* SSE4.1 */ - -#include "./vp8_rtcd.h" -#include "vp8/encoder/block.h" -#include "vp8/common/entropy.h" /* vp8_default_inv_zig_zag */ - -#define SELECT_EOB(i, z, x, y, q) \ - do { \ - short boost = *zbin_boost_ptr; \ - short x_z = _mm_extract_epi16(x, z); \ - short y_z = _mm_extract_epi16(y, z); \ - int cmp = (x_z < boost) | (y_z == 0); \ - zbin_boost_ptr++; \ - if (cmp) \ - break; \ - q = _mm_insert_epi16(q, y_z, z); \ - eob = i; \ - zbin_boost_ptr = b->zrun_zbin_boost; \ - } while (0) - -void vp8_regular_quantize_b_sse4_1(BLOCK *b, BLOCKD *d) { - char eob = 0; - short *zbin_boost_ptr = b->zrun_zbin_boost; - - __m128i sz0, x0, sz1, x1, y0, y1, x_minus_zbin0, x_minus_zbin1, - dqcoeff0, dqcoeff1; - __m128i quant_shift0 = _mm_load_si128((__m128i *)(b->quant_shift)); - __m128i quant_shift1 = _mm_load_si128((__m128i *)(b->quant_shift + 8)); - __m128i z0 = _mm_load_si128((__m128i *)(b->coeff)); - __m128i z1 = _mm_load_si128((__m128i *)(b->coeff+8)); - __m128i zbin_extra = _mm_cvtsi32_si128(b->zbin_extra); - __m128i zbin0 = _mm_load_si128((__m128i *)(b->zbin)); - __m128i zbin1 = _mm_load_si128((__m128i *)(b->zbin + 8)); - __m128i round0 = _mm_load_si128((__m128i *)(b->round)); - __m128i round1 = _mm_load_si128((__m128i *)(b->round + 8)); - __m128i quant0 = _mm_load_si128((__m128i *)(b->quant)); - __m128i quant1 = _mm_load_si128((__m128i *)(b->quant + 8)); - __m128i dequant0 = _mm_load_si128((__m128i *)(d->dequant)); - __m128i dequant1 = _mm_load_si128((__m128i *)(d->dequant + 8)); - __m128i qcoeff0 = _mm_setzero_si128(); - __m128i qcoeff1 = _mm_setzero_si128(); - - /* Duplicate to all lanes. */ - zbin_extra = _mm_shufflelo_epi16(zbin_extra, 0); - zbin_extra = _mm_unpacklo_epi16(zbin_extra, zbin_extra); - - /* Sign of z: z >> 15 */ - sz0 = _mm_srai_epi16(z0, 15); - sz1 = _mm_srai_epi16(z1, 15); - - /* x = abs(z): (z ^ sz) - sz */ - x0 = _mm_xor_si128(z0, sz0); - x1 = _mm_xor_si128(z1, sz1); - x0 = _mm_sub_epi16(x0, sz0); - x1 = _mm_sub_epi16(x1, sz1); - - /* zbin[] + zbin_extra */ - zbin0 = _mm_add_epi16(zbin0, zbin_extra); - zbin1 = _mm_add_epi16(zbin1, zbin_extra); - - /* In C x is compared to zbin where zbin = zbin[] + boost + extra. Rebalance - * the equation because boost is the only value which can change: - * x - (zbin[] + extra) >= boost */ - x_minus_zbin0 = _mm_sub_epi16(x0, zbin0); - x_minus_zbin1 = _mm_sub_epi16(x1, zbin1); - - /* All the remaining calculations are valid whether they are done now with - * simd or later inside the loop one at a time. */ - x0 = _mm_add_epi16(x0, round0); - x1 = _mm_add_epi16(x1, round1); - - y0 = _mm_mulhi_epi16(x0, quant0); - y1 = _mm_mulhi_epi16(x1, quant1); - - y0 = _mm_add_epi16(y0, x0); - y1 = _mm_add_epi16(y1, x1); - - /* Instead of shifting each value independently we convert the scaling - * factor with 1 << (16 - shift) so we can use multiply/return high half. */ - y0 = _mm_mulhi_epi16(y0, quant_shift0); - y1 = _mm_mulhi_epi16(y1, quant_shift1); - - /* Return the sign: (y ^ sz) - sz */ - y0 = _mm_xor_si128(y0, sz0); - y1 = _mm_xor_si128(y1, sz1); - y0 = _mm_sub_epi16(y0, sz0); - y1 = _mm_sub_epi16(y1, sz1); - - /* The loop gets unrolled anyway. Avoid the vp8_default_zig_zag1d lookup. */ - SELECT_EOB(1, 0, x_minus_zbin0, y0, qcoeff0); - SELECT_EOB(2, 1, x_minus_zbin0, y0, qcoeff0); - SELECT_EOB(3, 4, x_minus_zbin0, y0, qcoeff0); - SELECT_EOB(4, 0, x_minus_zbin1, y1, qcoeff1); - SELECT_EOB(5, 5, x_minus_zbin0, y0, qcoeff0); - SELECT_EOB(6, 2, x_minus_zbin0, y0, qcoeff0); - SELECT_EOB(7, 3, x_minus_zbin0, y0, qcoeff0); - SELECT_EOB(8, 6, x_minus_zbin0, y0, qcoeff0); - SELECT_EOB(9, 1, x_minus_zbin1, y1, qcoeff1); - SELECT_EOB(10, 4, x_minus_zbin1, y1, qcoeff1); - SELECT_EOB(11, 5, x_minus_zbin1, y1, qcoeff1); - SELECT_EOB(12, 2, x_minus_zbin1, y1, qcoeff1); - SELECT_EOB(13, 7, x_minus_zbin0, y0, qcoeff0); - SELECT_EOB(14, 3, x_minus_zbin1, y1, qcoeff1); - SELECT_EOB(15, 6, x_minus_zbin1, y1, qcoeff1); - SELECT_EOB(16, 7, x_minus_zbin1, y1, qcoeff1); - - _mm_store_si128((__m128i *)(d->qcoeff), qcoeff0); - _mm_store_si128((__m128i *)(d->qcoeff + 8), qcoeff1); - - dqcoeff0 = _mm_mullo_epi16(qcoeff0, dequant0); - dqcoeff1 = _mm_mullo_epi16(qcoeff1, dequant1); - - _mm_store_si128((__m128i *)(d->dqcoeff), dqcoeff0); - _mm_store_si128((__m128i *)(d->dqcoeff + 8), dqcoeff1); - - *d->eob = eob; -} diff --git a/media/libvpx/vp8/encoder/x86/quantize_ssse3.c b/media/libvpx/vp8/encoder/x86/quantize_ssse3.c deleted file mode 100644 index 448217ff412..00000000000 --- a/media/libvpx/vp8/encoder/x86/quantize_ssse3.c +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Copyright (c) 2012 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include /* SSSE3 */ - -#include "vp8/encoder/block.h" - -/* bitscan reverse (bsr) */ -#if defined(_MSC_VER) -#include -#pragma intrinsic(_BitScanReverse) -static int bsr(int mask) { - int eob; - _BitScanReverse(&eob, mask); - eob++; - if (mask == 0) - eob = 0; - return eob; -} -#else -static int bsr(int mask) { - int eob; -#if defined(__GNUC__) && __GNUC__ - __asm__ __volatile__("bsr %1, %0" : "=r" (eob) : "r" (mask) : "flags"); -#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) - asm volatile("bsr %1, %0" : "=r" (eob) : "r" (mask) : "flags"); -#endif - eob++; - if (mask == 0) - eob = 0; - return eob; -} -#endif - -void vp8_fast_quantize_b_ssse3(BLOCK *b, BLOCKD *d) { - int eob, mask; - - __m128i z0 = _mm_load_si128((__m128i *)(b->coeff)); - __m128i z1 = _mm_load_si128((__m128i *)(b->coeff + 8)); - __m128i round0 = _mm_load_si128((__m128i *)(b->round)); - __m128i round1 = _mm_load_si128((__m128i *)(b->round + 8)); - __m128i quant_fast0 = _mm_load_si128((__m128i *)(b->quant_fast)); - __m128i quant_fast1 = _mm_load_si128((__m128i *)(b->quant_fast + 8)); - __m128i dequant0 = _mm_load_si128((__m128i *)(d->dequant)); - __m128i dequant1 = _mm_load_si128((__m128i *)(d->dequant + 8)); - - __m128i sz0, sz1, x, x0, x1, y0, y1, zeros, abs0, abs1; - - DECLARE_ALIGNED(16, const uint8_t, pshufb_zig_zag_mask[16]) = - { 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15 }; - __m128i zig_zag = _mm_load_si128((const __m128i *)pshufb_zig_zag_mask); - - /* sign of z: z >> 15 */ - sz0 = _mm_srai_epi16(z0, 15); - sz1 = _mm_srai_epi16(z1, 15); - - /* x = abs(z) */ - x0 = _mm_abs_epi16(z0); - x1 = _mm_abs_epi16(z1); - - /* x += round */ - x0 = _mm_add_epi16(x0, round0); - x1 = _mm_add_epi16(x1, round1); - - /* y = (x * quant) >> 16 */ - y0 = _mm_mulhi_epi16(x0, quant_fast0); - y1 = _mm_mulhi_epi16(x1, quant_fast1); - - /* ASM saves Y for EOB */ - /* I think we can ignore that because adding the sign doesn't change anything - * and multiplying 0 by dequant is OK as well */ - abs0 = y0; - abs1 = y1; - - /* Restore the sign bit. */ - y0 = _mm_xor_si128(y0, sz0); - y1 = _mm_xor_si128(y1, sz1); - x0 = _mm_sub_epi16(y0, sz0); - x1 = _mm_sub_epi16(y1, sz1); - - /* qcoeff = x */ - _mm_store_si128((__m128i *)(d->qcoeff), x0); - _mm_store_si128((__m128i *)(d->qcoeff + 8), x1); - - /* x * dequant */ - x0 = _mm_mullo_epi16(x0, dequant0); - x1 = _mm_mullo_epi16(x1, dequant1); - - /* dqcoeff = x * dequant */ - _mm_store_si128((__m128i *)(d->dqcoeff), x0); - _mm_store_si128((__m128i *)(d->dqcoeff + 8), x1); - - zeros = _mm_setzero_si128(); - - x0 = _mm_cmpgt_epi16(abs0, zeros); - x1 = _mm_cmpgt_epi16(abs1, zeros); - - x = _mm_packs_epi16(x0, x1); - - x = _mm_shuffle_epi8(x, zig_zag); - - mask = _mm_movemask_epi8(x); - - eob = bsr(mask); - - *d->eob = 0xFF & eob; -} diff --git a/media/libvpx/vp8/encoder/x86/ssim_opt_x86_64.asm b/media/libvpx/vp8/encoder/x86/ssim_opt_x86_64.asm deleted file mode 100644 index 5964a85f2cf..00000000000 --- a/media/libvpx/vp8/encoder/x86/ssim_opt_x86_64.asm +++ /dev/null @@ -1,216 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - -%include "vpx_ports/x86_abi_support.asm" - -; tabulate_ssim - sums sum_s,sum_r,sum_sq_s,sum_sq_r, sum_sxr -%macro TABULATE_SSIM 0 - paddusw xmm15, xmm3 ; sum_s - paddusw xmm14, xmm4 ; sum_r - movdqa xmm1, xmm3 - pmaddwd xmm1, xmm1 - paddd xmm13, xmm1 ; sum_sq_s - movdqa xmm2, xmm4 - pmaddwd xmm2, xmm2 - paddd xmm12, xmm2 ; sum_sq_r - pmaddwd xmm3, xmm4 - paddd xmm11, xmm3 ; sum_sxr -%endmacro - -; Sum across the register %1 starting with q words -%macro SUM_ACROSS_Q 1 - movdqa xmm2,%1 - punpckldq %1,xmm0 - punpckhdq xmm2,xmm0 - paddq %1,xmm2 - movdqa xmm2,%1 - punpcklqdq %1,xmm0 - punpckhqdq xmm2,xmm0 - paddq %1,xmm2 -%endmacro - -; Sum across the register %1 starting with q words -%macro SUM_ACROSS_W 1 - movdqa xmm1, %1 - punpcklwd %1,xmm0 - punpckhwd xmm1,xmm0 - paddd %1, xmm1 - SUM_ACROSS_Q %1 -%endmacro -;void ssim_parms_sse2( -; unsigned char *s, -; int sp, -; unsigned char *r, -; int rp -; unsigned long *sum_s, -; unsigned long *sum_r, -; unsigned long *sum_sq_s, -; unsigned long *sum_sq_r, -; unsigned long *sum_sxr); -; -; TODO: Use parm passing through structure, probably don't need the pxors -; ( calling app will initialize to 0 ) could easily fit everything in sse2 -; without too much hastle, and can probably do better estimates with psadw -; or pavgb At this point this is just meant to be first pass for calculating -; all the parms needed for 16x16 ssim so we can play with dssim as distortion -; in mode selection code. -global sym(vp8_ssim_parms_16x16_sse2) PRIVATE -sym(vp8_ssim_parms_16x16_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 9 - SAVE_XMM 15 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;s - mov rcx, arg(1) ;sp - mov rdi, arg(2) ;r - mov rax, arg(3) ;rp - - pxor xmm0, xmm0 - pxor xmm15,xmm15 ;sum_s - pxor xmm14,xmm14 ;sum_r - pxor xmm13,xmm13 ;sum_sq_s - pxor xmm12,xmm12 ;sum_sq_r - pxor xmm11,xmm11 ;sum_sxr - - mov rdx, 16 ;row counter -.NextRow: - - ;grab source and reference pixels - movdqu xmm5, [rsi] - movdqu xmm6, [rdi] - movdqa xmm3, xmm5 - movdqa xmm4, xmm6 - punpckhbw xmm3, xmm0 ; high_s - punpckhbw xmm4, xmm0 ; high_r - - TABULATE_SSIM - - movdqa xmm3, xmm5 - movdqa xmm4, xmm6 - punpcklbw xmm3, xmm0 ; low_s - punpcklbw xmm4, xmm0 ; low_r - - TABULATE_SSIM - - add rsi, rcx ; next s row - add rdi, rax ; next r row - - dec rdx ; counter - jnz .NextRow - - SUM_ACROSS_W xmm15 - SUM_ACROSS_W xmm14 - SUM_ACROSS_Q xmm13 - SUM_ACROSS_Q xmm12 - SUM_ACROSS_Q xmm11 - - mov rdi,arg(4) - movd [rdi], xmm15; - mov rdi,arg(5) - movd [rdi], xmm14; - mov rdi,arg(6) - movd [rdi], xmm13; - mov rdi,arg(7) - movd [rdi], xmm12; - mov rdi,arg(8) - movd [rdi], xmm11; - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -;void ssim_parms_sse2( -; unsigned char *s, -; int sp, -; unsigned char *r, -; int rp -; unsigned long *sum_s, -; unsigned long *sum_r, -; unsigned long *sum_sq_s, -; unsigned long *sum_sq_r, -; unsigned long *sum_sxr); -; -; TODO: Use parm passing through structure, probably don't need the pxors -; ( calling app will initialize to 0 ) could easily fit everything in sse2 -; without too much hastle, and can probably do better estimates with psadw -; or pavgb At this point this is just meant to be first pass for calculating -; all the parms needed for 16x16 ssim so we can play with dssim as distortion -; in mode selection code. -global sym(vp8_ssim_parms_8x8_sse2) PRIVATE -sym(vp8_ssim_parms_8x8_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 9 - SAVE_XMM 15 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;s - mov rcx, arg(1) ;sp - mov rdi, arg(2) ;r - mov rax, arg(3) ;rp - - pxor xmm0, xmm0 - pxor xmm15,xmm15 ;sum_s - pxor xmm14,xmm14 ;sum_r - pxor xmm13,xmm13 ;sum_sq_s - pxor xmm12,xmm12 ;sum_sq_r - pxor xmm11,xmm11 ;sum_sxr - - mov rdx, 8 ;row counter -.NextRow: - - ;grab source and reference pixels - movq xmm3, [rsi] - movq xmm4, [rdi] - punpcklbw xmm3, xmm0 ; low_s - punpcklbw xmm4, xmm0 ; low_r - - TABULATE_SSIM - - add rsi, rcx ; next s row - add rdi, rax ; next r row - - dec rdx ; counter - jnz .NextRow - - SUM_ACROSS_W xmm15 - SUM_ACROSS_W xmm14 - SUM_ACROSS_Q xmm13 - SUM_ACROSS_Q xmm12 - SUM_ACROSS_Q xmm11 - - mov rdi,arg(4) - movd [rdi], xmm15; - mov rdi,arg(5) - movd [rdi], xmm14; - mov rdi,arg(6) - movd [rdi], xmm13; - mov rdi,arg(7) - movd [rdi], xmm12; - mov rdi,arg(8) - movd [rdi], xmm11; - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret diff --git a/media/libvpx/vp9/common/arm/neon/vp9_idct16x16_1_add_neon.asm b/media/libvpx/vp9/common/arm/neon/vp9_idct16x16_1_add_neon.asm deleted file mode 100644 index b1fd21bb61f..00000000000 --- a/media/libvpx/vp9/common/arm/neon/vp9_idct16x16_1_add_neon.asm +++ /dev/null @@ -1,198 +0,0 @@ -; -; Copyright (c) 2013 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. -; - - - EXPORT |vp9_idct16x16_1_add_neon| - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 - -;void vp9_idct16x16_1_add_neon(int16_t *input, uint8_t *dest, -; int dest_stride) -; -; r0 int16_t input -; r1 uint8_t *dest -; r2 int dest_stride) - -|vp9_idct16x16_1_add_neon| PROC - ldrsh r0, [r0] - - ; generate cospi_16_64 = 11585 - mov r12, #0x2d00 - add r12, #0x41 - - ; out = dct_const_round_shift(input[0] * cospi_16_64) - mul r0, r0, r12 ; input[0] * cospi_16_64 - add r0, r0, #0x2000 ; +(1 << ((DCT_CONST_BITS) - 1)) - asr r0, r0, #14 ; >> DCT_CONST_BITS - - ; out = dct_const_round_shift(out * cospi_16_64) - mul r0, r0, r12 ; out * cospi_16_64 - mov r12, r1 ; save dest - add r0, r0, #0x2000 ; +(1 << ((DCT_CONST_BITS) - 1)) - asr r0, r0, #14 ; >> DCT_CONST_BITS - - ; a1 = ROUND_POWER_OF_TWO(out, 6) - add r0, r0, #32 ; + (1 <<((6) - 1)) - asr r0, r0, #6 ; >> 6 - - vdup.s16 q0, r0 ; duplicate a1 - mov r0, #8 - sub r2, #8 - - ; load destination data row0 - row3 - vld1.64 {d2}, [r1], r0 - vld1.64 {d3}, [r1], r2 - vld1.64 {d4}, [r1], r0 - vld1.64 {d5}, [r1], r2 - vld1.64 {d6}, [r1], r0 - vld1.64 {d7}, [r1], r2 - vld1.64 {d16}, [r1], r0 - vld1.64 {d17}, [r1], r2 - - vaddw.u8 q9, q0, d2 ; dest[x] + a1 - vaddw.u8 q10, q0, d3 ; dest[x] + a1 - vaddw.u8 q11, q0, d4 ; dest[x] + a1 - vaddw.u8 q12, q0, d5 ; dest[x] + a1 - vqmovun.s16 d2, q9 ; clip_pixel - vqmovun.s16 d3, q10 ; clip_pixel - vqmovun.s16 d30, q11 ; clip_pixel - vqmovun.s16 d31, q12 ; clip_pixel - vst1.64 {d2}, [r12], r0 - vst1.64 {d3}, [r12], r2 - vst1.64 {d30}, [r12], r0 - vst1.64 {d31}, [r12], r2 - - vaddw.u8 q9, q0, d6 ; dest[x] + a1 - vaddw.u8 q10, q0, d7 ; dest[x] + a1 - vaddw.u8 q11, q0, d16 ; dest[x] + a1 - vaddw.u8 q12, q0, d17 ; dest[x] + a1 - vqmovun.s16 d2, q9 ; clip_pixel - vqmovun.s16 d3, q10 ; clip_pixel - vqmovun.s16 d30, q11 ; clip_pixel - vqmovun.s16 d31, q12 ; clip_pixel - vst1.64 {d2}, [r12], r0 - vst1.64 {d3}, [r12], r2 - vst1.64 {d30}, [r12], r0 - vst1.64 {d31}, [r12], r2 - - ; load destination data row4 - row7 - vld1.64 {d2}, [r1], r0 - vld1.64 {d3}, [r1], r2 - vld1.64 {d4}, [r1], r0 - vld1.64 {d5}, [r1], r2 - vld1.64 {d6}, [r1], r0 - vld1.64 {d7}, [r1], r2 - vld1.64 {d16}, [r1], r0 - vld1.64 {d17}, [r1], r2 - - vaddw.u8 q9, q0, d2 ; dest[x] + a1 - vaddw.u8 q10, q0, d3 ; dest[x] + a1 - vaddw.u8 q11, q0, d4 ; dest[x] + a1 - vaddw.u8 q12, q0, d5 ; dest[x] + a1 - vqmovun.s16 d2, q9 ; clip_pixel - vqmovun.s16 d3, q10 ; clip_pixel - vqmovun.s16 d30, q11 ; clip_pixel - vqmovun.s16 d31, q12 ; clip_pixel - vst1.64 {d2}, [r12], r0 - vst1.64 {d3}, [r12], r2 - vst1.64 {d30}, [r12], r0 - vst1.64 {d31}, [r12], r2 - - vaddw.u8 q9, q0, d6 ; dest[x] + a1 - vaddw.u8 q10, q0, d7 ; dest[x] + a1 - vaddw.u8 q11, q0, d16 ; dest[x] + a1 - vaddw.u8 q12, q0, d17 ; dest[x] + a1 - vqmovun.s16 d2, q9 ; clip_pixel - vqmovun.s16 d3, q10 ; clip_pixel - vqmovun.s16 d30, q11 ; clip_pixel - vqmovun.s16 d31, q12 ; clip_pixel - vst1.64 {d2}, [r12], r0 - vst1.64 {d3}, [r12], r2 - vst1.64 {d30}, [r12], r0 - vst1.64 {d31}, [r12], r2 - - ; load destination data row8 - row11 - vld1.64 {d2}, [r1], r0 - vld1.64 {d3}, [r1], r2 - vld1.64 {d4}, [r1], r0 - vld1.64 {d5}, [r1], r2 - vld1.64 {d6}, [r1], r0 - vld1.64 {d7}, [r1], r2 - vld1.64 {d16}, [r1], r0 - vld1.64 {d17}, [r1], r2 - - vaddw.u8 q9, q0, d2 ; dest[x] + a1 - vaddw.u8 q10, q0, d3 ; dest[x] + a1 - vaddw.u8 q11, q0, d4 ; dest[x] + a1 - vaddw.u8 q12, q0, d5 ; dest[x] + a1 - vqmovun.s16 d2, q9 ; clip_pixel - vqmovun.s16 d3, q10 ; clip_pixel - vqmovun.s16 d30, q11 ; clip_pixel - vqmovun.s16 d31, q12 ; clip_pixel - vst1.64 {d2}, [r12], r0 - vst1.64 {d3}, [r12], r2 - vst1.64 {d30}, [r12], r0 - vst1.64 {d31}, [r12], r2 - - vaddw.u8 q9, q0, d6 ; dest[x] + a1 - vaddw.u8 q10, q0, d7 ; dest[x] + a1 - vaddw.u8 q11, q0, d16 ; dest[x] + a1 - vaddw.u8 q12, q0, d17 ; dest[x] + a1 - vqmovun.s16 d2, q9 ; clip_pixel - vqmovun.s16 d3, q10 ; clip_pixel - vqmovun.s16 d30, q11 ; clip_pixel - vqmovun.s16 d31, q12 ; clip_pixel - vst1.64 {d2}, [r12], r0 - vst1.64 {d3}, [r12], r2 - vst1.64 {d30}, [r12], r0 - vst1.64 {d31}, [r12], r2 - - ; load destination data row12 - row15 - vld1.64 {d2}, [r1], r0 - vld1.64 {d3}, [r1], r2 - vld1.64 {d4}, [r1], r0 - vld1.64 {d5}, [r1], r2 - vld1.64 {d6}, [r1], r0 - vld1.64 {d7}, [r1], r2 - vld1.64 {d16}, [r1], r0 - vld1.64 {d17}, [r1], r2 - - vaddw.u8 q9, q0, d2 ; dest[x] + a1 - vaddw.u8 q10, q0, d3 ; dest[x] + a1 - vaddw.u8 q11, q0, d4 ; dest[x] + a1 - vaddw.u8 q12, q0, d5 ; dest[x] + a1 - vqmovun.s16 d2, q9 ; clip_pixel - vqmovun.s16 d3, q10 ; clip_pixel - vqmovun.s16 d30, q11 ; clip_pixel - vqmovun.s16 d31, q12 ; clip_pixel - vst1.64 {d2}, [r12], r0 - vst1.64 {d3}, [r12], r2 - vst1.64 {d30}, [r12], r0 - vst1.64 {d31}, [r12], r2 - - vaddw.u8 q9, q0, d6 ; dest[x] + a1 - vaddw.u8 q10, q0, d7 ; dest[x] + a1 - vaddw.u8 q11, q0, d16 ; dest[x] + a1 - vaddw.u8 q12, q0, d17 ; dest[x] + a1 - vqmovun.s16 d2, q9 ; clip_pixel - vqmovun.s16 d3, q10 ; clip_pixel - vqmovun.s16 d30, q11 ; clip_pixel - vqmovun.s16 d31, q12 ; clip_pixel - vst1.64 {d2}, [r12], r0 - vst1.64 {d3}, [r12], r2 - vst1.64 {d30}, [r12], r0 - vst1.64 {d31}, [r12], r2 - - bx lr - ENDP ; |vp9_idct16x16_1_add_neon| - - END diff --git a/media/libvpx/vp9/common/arm/neon/vp9_idct16x16_add_neon.asm b/media/libvpx/vp9/common/arm/neon/vp9_idct16x16_add_neon.asm deleted file mode 100644 index a13c0d04b83..00000000000 --- a/media/libvpx/vp9/common/arm/neon/vp9_idct16x16_add_neon.asm +++ /dev/null @@ -1,1179 +0,0 @@ -; -; Copyright (c) 2013 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - EXPORT |vp9_idct16x16_256_add_neon_pass1| - EXPORT |vp9_idct16x16_256_add_neon_pass2| - EXPORT |vp9_idct16x16_10_add_neon_pass1| - EXPORT |vp9_idct16x16_10_add_neon_pass2| - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 - - ; Transpose a 8x8 16bit data matrix. Datas are loaded in q8-q15. - MACRO - TRANSPOSE8X8 - vswp d17, d24 - vswp d23, d30 - vswp d21, d28 - vswp d19, d26 - vtrn.32 q8, q10 - vtrn.32 q9, q11 - vtrn.32 q12, q14 - vtrn.32 q13, q15 - vtrn.16 q8, q9 - vtrn.16 q10, q11 - vtrn.16 q12, q13 - vtrn.16 q14, q15 - MEND - - AREA Block, CODE, READONLY ; name this block of code -;void |vp9_idct16x16_256_add_neon_pass1|(int16_t *input, -; int16_t *output, int output_stride) -; -; r0 int16_t input -; r1 int16_t *output -; r2 int output_stride) - -; idct16 stage1 - stage6 on all the elements loaded in q8-q15. The output -; will be stored back into q8-q15 registers. This function will touch q0-q7 -; registers and use them as buffer during calculation. -|vp9_idct16x16_256_add_neon_pass1| PROC - - ; TODO(hkuang): Find a better way to load the elements. - ; load elements of 0, 2, 4, 6, 8, 10, 12, 14 into q8 - q15 - vld2.s16 {q8,q9}, [r0]! - vld2.s16 {q9,q10}, [r0]! - vld2.s16 {q10,q11}, [r0]! - vld2.s16 {q11,q12}, [r0]! - vld2.s16 {q12,q13}, [r0]! - vld2.s16 {q13,q14}, [r0]! - vld2.s16 {q14,q15}, [r0]! - vld2.s16 {q1,q2}, [r0]! - vmov.s16 q15, q1 - - ; generate cospi_28_64 = 3196 - mov r3, #0xc00 - add r3, #0x7c - - ; generate cospi_4_64 = 16069 - mov r12, #0x3e00 - add r12, #0xc5 - - ; transpose the input data - TRANSPOSE8X8 - - ; stage 3 - vdup.16 d0, r3 ; duplicate cospi_28_64 - vdup.16 d1, r12 ; duplicate cospi_4_64 - - ; preloading to avoid stall - ; generate cospi_12_64 = 13623 - mov r3, #0x3500 - add r3, #0x37 - - ; generate cospi_20_64 = 9102 - mov r12, #0x2300 - add r12, #0x8e - - ; step2[4] * cospi_28_64 - vmull.s16 q2, d18, d0 - vmull.s16 q3, d19, d0 - - ; step2[4] * cospi_4_64 - vmull.s16 q5, d18, d1 - vmull.s16 q6, d19, d1 - - ; temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64 - vmlsl.s16 q2, d30, d1 - vmlsl.s16 q3, d31, d1 - - ; temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64 - vmlal.s16 q5, d30, d0 - vmlal.s16 q6, d31, d0 - - vdup.16 d2, r3 ; duplicate cospi_12_64 - vdup.16 d3, r12 ; duplicate cospi_20_64 - - ; dct_const_round_shift(temp1) - vqrshrn.s32 d8, q2, #14 ; >> 14 - vqrshrn.s32 d9, q3, #14 ; >> 14 - - ; dct_const_round_shift(temp2) - vqrshrn.s32 d14, q5, #14 ; >> 14 - vqrshrn.s32 d15, q6, #14 ; >> 14 - - ; preloading to avoid stall - ; generate cospi_16_64 = 11585 - mov r3, #0x2d00 - add r3, #0x41 - - ; generate cospi_24_64 = 6270 - mov r12, #0x1800 - add r12, #0x7e - - ; step2[5] * cospi_12_64 - vmull.s16 q2, d26, d2 - vmull.s16 q3, d27, d2 - - ; step2[5] * cospi_20_64 - vmull.s16 q9, d26, d3 - vmull.s16 q15, d27, d3 - - ; temp1 = input[5] * cospi_12_64 - input[3] * cospi_20_64 - vmlsl.s16 q2, d22, d3 - vmlsl.s16 q3, d23, d3 - - ; temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64 - vmlal.s16 q9, d22, d2 - vmlal.s16 q15, d23, d2 - - ; dct_const_round_shift(temp1) - vqrshrn.s32 d10, q2, #14 ; >> 14 - vqrshrn.s32 d11, q3, #14 ; >> 14 - - ; dct_const_round_shift(temp2) - vqrshrn.s32 d12, q9, #14 ; >> 14 - vqrshrn.s32 d13, q15, #14 ; >> 14 - - ; stage 4 - vdup.16 d30, r3 ; cospi_16_64 - - ; step1[0] * cospi_16_64 - vmull.s16 q2, d16, d30 - vmull.s16 q11, d17, d30 - - ; step1[1] * cospi_16_64 - vmull.s16 q0, d24, d30 - vmull.s16 q1, d25, d30 - - ; generate cospi_8_64 = 15137 - mov r3, #0x3b00 - add r3, #0x21 - - vdup.16 d30, r12 ; duplicate cospi_24_64 - vdup.16 d31, r3 ; duplicate cospi_8_64 - - ; temp1 = (step1[0] + step1[1]) * cospi_16_64 - vadd.s32 q3, q2, q0 - vadd.s32 q12, q11, q1 - - ; temp2 = (step1[0] - step1[1]) * cospi_16_64 - vsub.s32 q13, q2, q0 - vsub.s32 q1, q11, q1 - - ; dct_const_round_shift(temp1) - vqrshrn.s32 d16, q3, #14 ; >> 14 - vqrshrn.s32 d17, q12, #14 ; >> 14 - - ; dct_const_round_shift(temp2) - vqrshrn.s32 d18, q13, #14 ; >> 14 - vqrshrn.s32 d19, q1, #14 ; >> 14 - - ; step1[2] * cospi_24_64 - step1[3] * cospi_8_64; - ; step1[2] * cospi_8_64 - vmull.s16 q0, d20, d31 - vmull.s16 q1, d21, d31 - - ; step1[2] * cospi_24_64 - vmull.s16 q12, d20, d30 - vmull.s16 q13, d21, d30 - - ; temp2 = input[1] * cospi_8_64 + input[3] * cospi_24_64 - vmlal.s16 q0, d28, d30 - vmlal.s16 q1, d29, d30 - - ; temp1 = input[1] * cospi_24_64 - input[3] * cospi_8_64 - vmlsl.s16 q12, d28, d31 - vmlsl.s16 q13, d29, d31 - - ; dct_const_round_shift(temp2) - vqrshrn.s32 d22, q0, #14 ; >> 14 - vqrshrn.s32 d23, q1, #14 ; >> 14 - - ; dct_const_round_shift(temp1) - vqrshrn.s32 d20, q12, #14 ; >> 14 - vqrshrn.s32 d21, q13, #14 ; >> 14 - - vsub.s16 q13, q4, q5 ; step2[5] = step1[4] - step1[5]; - vadd.s16 q4, q4, q5 ; step2[4] = step1[4] + step1[5]; - vsub.s16 q14, q7, q6 ; step2[6] = -step1[6] + step1[7]; - vadd.s16 q15, q6, q7 ; step2[7] = step1[6] + step1[7]; - - ; generate cospi_16_64 = 11585 - mov r3, #0x2d00 - add r3, #0x41 - - ; stage 5 - vadd.s16 q0, q8, q11 ; step1[0] = step2[0] + step2[3]; - vadd.s16 q1, q9, q10 ; step1[1] = step2[1] + step2[2]; - vsub.s16 q2, q9, q10 ; step1[2] = step2[1] - step2[2]; - vsub.s16 q3, q8, q11 ; step1[3] = step2[0] - step2[3]; - - vdup.16 d16, r3; ; duplicate cospi_16_64 - - ; step2[5] * cospi_16_64 - vmull.s16 q11, d26, d16 - vmull.s16 q12, d27, d16 - - ; step2[6] * cospi_16_64 - vmull.s16 q9, d28, d16 - vmull.s16 q10, d29, d16 - - ; temp1 = (step2[6] - step2[5]) * cospi_16_64 - vsub.s32 q6, q9, q11 - vsub.s32 q13, q10, q12 - - ; temp2 = (step2[5] + step2[6]) * cospi_16_64 - vadd.s32 q9, q9, q11 - vadd.s32 q10, q10, q12 - - ; dct_const_round_shift(temp1) - vqrshrn.s32 d10, q6, #14 ; >> 14 - vqrshrn.s32 d11, q13, #14 ; >> 14 - - ; dct_const_round_shift(temp2) - vqrshrn.s32 d12, q9, #14 ; >> 14 - vqrshrn.s32 d13, q10, #14 ; >> 14 - - ; stage 6 - vadd.s16 q8, q0, q15 ; step2[0] = step1[0] + step1[7]; - vadd.s16 q9, q1, q6 ; step2[1] = step1[1] + step1[6]; - vadd.s16 q10, q2, q5 ; step2[2] = step1[2] + step1[5]; - vadd.s16 q11, q3, q4 ; step2[3] = step1[3] + step1[4]; - vsub.s16 q12, q3, q4 ; step2[4] = step1[3] - step1[4]; - vsub.s16 q13, q2, q5 ; step2[5] = step1[2] - step1[5]; - vsub.s16 q14, q1, q6 ; step2[6] = step1[1] - step1[6]; - vsub.s16 q15, q0, q15 ; step2[7] = step1[0] - step1[7]; - - ; store the data - vst1.64 {d16}, [r1], r2 - vst1.64 {d17}, [r1], r2 - vst1.64 {d18}, [r1], r2 - vst1.64 {d19}, [r1], r2 - vst1.64 {d20}, [r1], r2 - vst1.64 {d21}, [r1], r2 - vst1.64 {d22}, [r1], r2 - vst1.64 {d23}, [r1], r2 - vst1.64 {d24}, [r1], r2 - vst1.64 {d25}, [r1], r2 - vst1.64 {d26}, [r1], r2 - vst1.64 {d27}, [r1], r2 - vst1.64 {d28}, [r1], r2 - vst1.64 {d29}, [r1], r2 - vst1.64 {d30}, [r1], r2 - vst1.64 {d31}, [r1], r2 - - bx lr - ENDP ; |vp9_idct16x16_256_add_neon_pass1| - -;void vp9_idct16x16_256_add_neon_pass2(int16_t *src, -; int16_t *output, -; int16_t *pass1Output, -; int16_t skip_adding, -; uint8_t *dest, -; int dest_stride) -; -; r0 int16_t *src -; r1 int16_t *output, -; r2 int16_t *pass1Output, -; r3 int16_t skip_adding, -; r4 uint8_t *dest, -; r5 int dest_stride) - -; idct16 stage1 - stage7 on all the elements loaded in q8-q15. The output -; will be stored back into q8-q15 registers. This function will touch q0-q7 -; registers and use them as buffer during calculation. -|vp9_idct16x16_256_add_neon_pass2| PROC - push {r3-r9} - - ; TODO(hkuang): Find a better way to load the elements. - ; load elements of 1, 3, 5, 7, 9, 11, 13, 15 into q8 - q15 - vld2.s16 {q8,q9}, [r0]! - vld2.s16 {q9,q10}, [r0]! - vld2.s16 {q10,q11}, [r0]! - vld2.s16 {q11,q12}, [r0]! - vld2.s16 {q12,q13}, [r0]! - vld2.s16 {q13,q14}, [r0]! - vld2.s16 {q14,q15}, [r0]! - vld2.s16 {q0,q1}, [r0]! - vmov.s16 q15, q0; - - ; generate cospi_30_64 = 1606 - mov r3, #0x0600 - add r3, #0x46 - - ; generate cospi_2_64 = 16305 - mov r12, #0x3f00 - add r12, #0xb1 - - ; transpose the input data - TRANSPOSE8X8 - - ; stage 3 - vdup.16 d12, r3 ; duplicate cospi_30_64 - vdup.16 d13, r12 ; duplicate cospi_2_64 - - ; preloading to avoid stall - ; generate cospi_14_64 = 12665 - mov r3, #0x3100 - add r3, #0x79 - - ; generate cospi_18_64 = 10394 - mov r12, #0x2800 - add r12, #0x9a - - ; step1[8] * cospi_30_64 - vmull.s16 q2, d16, d12 - vmull.s16 q3, d17, d12 - - ; step1[8] * cospi_2_64 - vmull.s16 q1, d16, d13 - vmull.s16 q4, d17, d13 - - ; temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64 - vmlsl.s16 q2, d30, d13 - vmlsl.s16 q3, d31, d13 - - ; temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64 - vmlal.s16 q1, d30, d12 - vmlal.s16 q4, d31, d12 - - vdup.16 d30, r3 ; duplicate cospi_14_64 - vdup.16 d31, r12 ; duplicate cospi_18_64 - - ; dct_const_round_shift(temp1) - vqrshrn.s32 d0, q2, #14 ; >> 14 - vqrshrn.s32 d1, q3, #14 ; >> 14 - - ; dct_const_round_shift(temp2) - vqrshrn.s32 d14, q1, #14 ; >> 14 - vqrshrn.s32 d15, q4, #14 ; >> 14 - - ; preloading to avoid stall - ; generate cospi_22_64 = 7723 - mov r3, #0x1e00 - add r3, #0x2b - - ; generate cospi_10_64 = 14449 - mov r12, #0x3800 - add r12, #0x71 - - ; step1[9] * cospi_14_64 - vmull.s16 q2, d24, d30 - vmull.s16 q3, d25, d30 - - ; step1[9] * cospi_18_64 - vmull.s16 q4, d24, d31 - vmull.s16 q5, d25, d31 - - ; temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64 - vmlsl.s16 q2, d22, d31 - vmlsl.s16 q3, d23, d31 - - ; temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64 - vmlal.s16 q4, d22, d30 - vmlal.s16 q5, d23, d30 - - vdup.16 d30, r3 ; duplicate cospi_22_64 - vdup.16 d31, r12 ; duplicate cospi_10_64 - - ; dct_const_round_shift(temp1) - vqrshrn.s32 d2, q2, #14 ; >> 14 - vqrshrn.s32 d3, q3, #14 ; >> 14 - - ; dct_const_round_shift(temp2) - vqrshrn.s32 d12, q4, #14 ; >> 14 - vqrshrn.s32 d13, q5, #14 ; >> 14 - - ; step1[10] * cospi_22_64 - vmull.s16 q11, d20, d30 - vmull.s16 q12, d21, d30 - - ; step1[10] * cospi_10_64 - vmull.s16 q4, d20, d31 - vmull.s16 q5, d21, d31 - - ; temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64 - vmlsl.s16 q11, d26, d31 - vmlsl.s16 q12, d27, d31 - - ; temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64 - vmlal.s16 q4, d26, d30 - vmlal.s16 q5, d27, d30 - - ; preloading to avoid stall - ; generate cospi_6_64 = 15679 - mov r3, #0x3d00 - add r3, #0x3f - - ; generate cospi_26_64 = 4756 - mov r12, #0x1200 - add r12, #0x94 - - vdup.16 d30, r3 ; duplicate cospi_6_64 - vdup.16 d31, r12 ; duplicate cospi_26_64 - - ; dct_const_round_shift(temp1) - vqrshrn.s32 d4, q11, #14 ; >> 14 - vqrshrn.s32 d5, q12, #14 ; >> 14 - - ; dct_const_round_shift(temp2) - vqrshrn.s32 d11, q5, #14 ; >> 14 - vqrshrn.s32 d10, q4, #14 ; >> 14 - - ; step1[11] * cospi_6_64 - vmull.s16 q10, d28, d30 - vmull.s16 q11, d29, d30 - - ; step1[11] * cospi_26_64 - vmull.s16 q12, d28, d31 - vmull.s16 q13, d29, d31 - - ; temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64 - vmlsl.s16 q10, d18, d31 - vmlsl.s16 q11, d19, d31 - - ; temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64 - vmlal.s16 q12, d18, d30 - vmlal.s16 q13, d19, d30 - - vsub.s16 q9, q0, q1 ; step1[9]=step2[8]-step2[9] - vadd.s16 q0, q0, q1 ; step1[8]=step2[8]+step2[9] - - ; dct_const_round_shift(temp1) - vqrshrn.s32 d6, q10, #14 ; >> 14 - vqrshrn.s32 d7, q11, #14 ; >> 14 - - ; dct_const_round_shift(temp2) - vqrshrn.s32 d8, q12, #14 ; >> 14 - vqrshrn.s32 d9, q13, #14 ; >> 14 - - ; stage 3 - vsub.s16 q10, q3, q2 ; step1[10]=-step2[10]+step2[11] - vadd.s16 q11, q2, q3 ; step1[11]=step2[10]+step2[11] - vadd.s16 q12, q4, q5 ; step1[12]=step2[12]+step2[13] - vsub.s16 q13, q4, q5 ; step1[13]=step2[12]-step2[13] - vsub.s16 q14, q7, q6 ; step1[14]=-step2[14]+tep2[15] - vadd.s16 q7, q6, q7 ; step1[15]=step2[14]+step2[15] - - ; stage 4 - ; generate cospi_24_64 = 6270 - mov r3, #0x1800 - add r3, #0x7e - - ; generate cospi_8_64 = 15137 - mov r12, #0x3b00 - add r12, #0x21 - - ; -step1[9] * cospi_8_64 + step1[14] * cospi_24_64 - vdup.16 d30, r12 ; duplicate cospi_8_64 - vdup.16 d31, r3 ; duplicate cospi_24_64 - - ; step1[9] * cospi_24_64 - vmull.s16 q2, d18, d31 - vmull.s16 q3, d19, d31 - - ; step1[14] * cospi_24_64 - vmull.s16 q4, d28, d31 - vmull.s16 q5, d29, d31 - - ; temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64 - vmlal.s16 q2, d28, d30 - vmlal.s16 q3, d29, d30 - - ; temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64 - vmlsl.s16 q4, d18, d30 - vmlsl.s16 q5, d19, d30 - - rsb r12, #0 - vdup.16 d30, r12 ; duplicate -cospi_8_64 - - ; dct_const_round_shift(temp2) - vqrshrn.s32 d12, q2, #14 ; >> 14 - vqrshrn.s32 d13, q3, #14 ; >> 14 - - ; dct_const_round_shift(temp1) - vqrshrn.s32 d2, q4, #14 ; >> 14 - vqrshrn.s32 d3, q5, #14 ; >> 14 - - vmov.s16 q3, q11 - vmov.s16 q4, q12 - - ; - step1[13] * cospi_8_64 - vmull.s16 q11, d26, d30 - vmull.s16 q12, d27, d30 - - ; -step1[10] * cospi_8_64 - vmull.s16 q8, d20, d30 - vmull.s16 q9, d21, d30 - - ; temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64 - vmlsl.s16 q11, d20, d31 - vmlsl.s16 q12, d21, d31 - - ; temp1 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64 - vmlal.s16 q8, d26, d31 - vmlal.s16 q9, d27, d31 - - ; dct_const_round_shift(temp2) - vqrshrn.s32 d4, q11, #14 ; >> 14 - vqrshrn.s32 d5, q12, #14 ; >> 14 - - ; dct_const_round_shift(temp1) - vqrshrn.s32 d10, q8, #14 ; >> 14 - vqrshrn.s32 d11, q9, #14 ; >> 14 - - ; stage 5 - vadd.s16 q8, q0, q3 ; step1[8] = step2[8]+step2[11]; - vadd.s16 q9, q1, q2 ; step1[9] = step2[9]+step2[10]; - vsub.s16 q10, q1, q2 ; step1[10] = step2[9]-step2[10]; - vsub.s16 q11, q0, q3 ; step1[11] = step2[8]-step2[11]; - vsub.s16 q12, q7, q4 ; step1[12] =-step2[12]+step2[15]; - vsub.s16 q13, q6, q5 ; step1[13] =-step2[13]+step2[14]; - vadd.s16 q14, q6, q5 ; step1[14] =step2[13]+step2[14]; - vadd.s16 q15, q7, q4 ; step1[15] =step2[12]+step2[15]; - - ; stage 6. - ; generate cospi_16_64 = 11585 - mov r12, #0x2d00 - add r12, #0x41 - - vdup.16 d14, r12 ; duplicate cospi_16_64 - - ; step1[13] * cospi_16_64 - vmull.s16 q3, d26, d14 - vmull.s16 q4, d27, d14 - - ; step1[10] * cospi_16_64 - vmull.s16 q0, d20, d14 - vmull.s16 q1, d21, d14 - - ; temp1 = (-step1[10] + step1[13]) * cospi_16_64 - vsub.s32 q5, q3, q0 - vsub.s32 q6, q4, q1 - - ; temp2 = (step1[10] + step1[13]) * cospi_16_64 - vadd.s32 q10, q3, q0 - vadd.s32 q4, q4, q1 - - ; dct_const_round_shift(temp1) - vqrshrn.s32 d4, q5, #14 ; >> 14 - vqrshrn.s32 d5, q6, #14 ; >> 14 - - ; dct_const_round_shift(temp2) - vqrshrn.s32 d10, q10, #14 ; >> 14 - vqrshrn.s32 d11, q4, #14 ; >> 14 - - ; step1[11] * cospi_16_64 - vmull.s16 q0, d22, d14 - vmull.s16 q1, d23, d14 - - ; step1[12] * cospi_16_64 - vmull.s16 q13, d24, d14 - vmull.s16 q6, d25, d14 - - ; temp1 = (-step1[11] + step1[12]) * cospi_16_64 - vsub.s32 q10, q13, q0 - vsub.s32 q4, q6, q1 - - ; temp2 = (step1[11] + step1[12]) * cospi_16_64 - vadd.s32 q13, q13, q0 - vadd.s32 q6, q6, q1 - - ; dct_const_round_shift(temp1) - vqrshrn.s32 d6, q10, #14 ; >> 14 - vqrshrn.s32 d7, q4, #14 ; >> 14 - - ; dct_const_round_shift(temp2) - vqrshrn.s32 d8, q13, #14 ; >> 14 - vqrshrn.s32 d9, q6, #14 ; >> 14 - - mov r4, #16 ; pass1Output stride - ldr r3, [sp] ; load skip_adding - cmp r3, #0 ; check if need adding dest data - beq skip_adding_dest - - ldr r7, [sp, #28] ; dest used to save element 0-7 - mov r9, r7 ; save dest pointer for later use - ldr r8, [sp, #32] ; load dest_stride - - ; stage 7 - ; load the data in pass1 - vld1.s16 {q0}, [r2], r4 ; load data step2[0] - vld1.s16 {q1}, [r2], r4 ; load data step2[1] - vld1.s16 {q10}, [r2], r4 ; load data step2[2] - vld1.s16 {q11}, [r2], r4 ; load data step2[3] - vld1.64 {d12}, [r7], r8 ; load destinatoin data - vld1.64 {d13}, [r7], r8 ; load destinatoin data - vadd.s16 q12, q0, q15 ; step2[0] + step2[15] - vadd.s16 q13, q1, q14 ; step2[1] + step2[14] - vrshr.s16 q12, q12, #6 ; ROUND_POWER_OF_TWO - vrshr.s16 q13, q13, #6 ; ROUND_POWER_OF_TWO - vaddw.u8 q12, q12, d12 ; + dest[j * dest_stride + i] - vaddw.u8 q13, q13, d13 ; + dest[j * dest_stride + i] - vqmovun.s16 d12, q12 ; clip pixel - vqmovun.s16 d13, q13 ; clip pixel - vst1.64 {d12}, [r9], r8 ; store the data - vst1.64 {d13}, [r9], r8 ; store the data - vsub.s16 q14, q1, q14 ; step2[1] - step2[14] - vsub.s16 q15, q0, q15 ; step2[0] - step2[15] - vld1.64 {d12}, [r7], r8 ; load destinatoin data - vld1.64 {d13}, [r7], r8 ; load destinatoin data - vadd.s16 q12, q10, q5 ; step2[2] + step2[13] - vadd.s16 q13, q11, q4 ; step2[3] + step2[12] - vrshr.s16 q12, q12, #6 ; ROUND_POWER_OF_TWO - vrshr.s16 q13, q13, #6 ; ROUND_POWER_OF_TWO - vaddw.u8 q12, q12, d12 ; + dest[j * dest_stride + i] - vaddw.u8 q13, q13, d13 ; + dest[j * dest_stride + i] - vqmovun.s16 d12, q12 ; clip pixel - vqmovun.s16 d13, q13 ; clip pixel - vst1.64 {d12}, [r9], r8 ; store the data - vst1.64 {d13}, [r9], r8 ; store the data - vsub.s16 q4, q11, q4 ; step2[3] - step2[12] - vsub.s16 q5, q10, q5 ; step2[2] - step2[13] - vld1.s16 {q0}, [r2], r4 ; load data step2[4] - vld1.s16 {q1}, [r2], r4 ; load data step2[5] - vld1.s16 {q10}, [r2], r4 ; load data step2[6] - vld1.s16 {q11}, [r2], r4 ; load data step2[7] - vld1.64 {d12}, [r7], r8 ; load destinatoin data - vld1.64 {d13}, [r7], r8 ; load destinatoin data - vadd.s16 q12, q0, q3 ; step2[4] + step2[11] - vadd.s16 q13, q1, q2 ; step2[5] + step2[10] - vrshr.s16 q12, q12, #6 ; ROUND_POWER_OF_TWO - vrshr.s16 q13, q13, #6 ; ROUND_POWER_OF_TWO - vaddw.u8 q12, q12, d12 ; + dest[j * dest_stride + i] - vaddw.u8 q13, q13, d13 ; + dest[j * dest_stride + i] - vqmovun.s16 d12, q12 ; clip pixel - vqmovun.s16 d13, q13 ; clip pixel - vst1.64 {d12}, [r9], r8 ; store the data - vst1.64 {d13}, [r9], r8 ; store the data - vsub.s16 q2, q1, q2 ; step2[5] - step2[10] - vsub.s16 q3, q0, q3 ; step2[4] - step2[11] - vld1.64 {d12}, [r7], r8 ; load destinatoin data - vld1.64 {d13}, [r7], r8 ; load destinatoin data - vadd.s16 q12, q10, q9 ; step2[6] + step2[9] - vadd.s16 q13, q11, q8 ; step2[7] + step2[8] - vrshr.s16 q12, q12, #6 ; ROUND_POWER_OF_TWO - vrshr.s16 q13, q13, #6 ; ROUND_POWER_OF_TWO - vaddw.u8 q12, q12, d12 ; + dest[j * dest_stride + i] - vaddw.u8 q13, q13, d13 ; + dest[j * dest_stride + i] - vqmovun.s16 d12, q12 ; clip pixel - vqmovun.s16 d13, q13 ; clip pixel - vst1.64 {d12}, [r9], r8 ; store the data - vst1.64 {d13}, [r9], r8 ; store the data - vld1.64 {d12}, [r7], r8 ; load destinatoin data - vld1.64 {d13}, [r7], r8 ; load destinatoin data - vsub.s16 q8, q11, q8 ; step2[7] - step2[8] - vsub.s16 q9, q10, q9 ; step2[6] - step2[9] - - ; store the data output 8,9,10,11,12,13,14,15 - vrshr.s16 q8, q8, #6 ; ROUND_POWER_OF_TWO - vaddw.u8 q8, q8, d12 ; + dest[j * dest_stride + i] - vqmovun.s16 d12, q8 ; clip pixel - vst1.64 {d12}, [r9], r8 ; store the data - vld1.64 {d12}, [r7], r8 ; load destinatoin data - vrshr.s16 q9, q9, #6 - vaddw.u8 q9, q9, d13 ; + dest[j * dest_stride + i] - vqmovun.s16 d13, q9 ; clip pixel - vst1.64 {d13}, [r9], r8 ; store the data - vld1.64 {d13}, [r7], r8 ; load destinatoin data - vrshr.s16 q2, q2, #6 - vaddw.u8 q2, q2, d12 ; + dest[j * dest_stride + i] - vqmovun.s16 d12, q2 ; clip pixel - vst1.64 {d12}, [r9], r8 ; store the data - vld1.64 {d12}, [r7], r8 ; load destinatoin data - vrshr.s16 q3, q3, #6 - vaddw.u8 q3, q3, d13 ; + dest[j * dest_stride + i] - vqmovun.s16 d13, q3 ; clip pixel - vst1.64 {d13}, [r9], r8 ; store the data - vld1.64 {d13}, [r7], r8 ; load destinatoin data - vrshr.s16 q4, q4, #6 - vaddw.u8 q4, q4, d12 ; + dest[j * dest_stride + i] - vqmovun.s16 d12, q4 ; clip pixel - vst1.64 {d12}, [r9], r8 ; store the data - vld1.64 {d12}, [r7], r8 ; load destinatoin data - vrshr.s16 q5, q5, #6 - vaddw.u8 q5, q5, d13 ; + dest[j * dest_stride + i] - vqmovun.s16 d13, q5 ; clip pixel - vst1.64 {d13}, [r9], r8 ; store the data - vld1.64 {d13}, [r7], r8 ; load destinatoin data - vrshr.s16 q14, q14, #6 - vaddw.u8 q14, q14, d12 ; + dest[j * dest_stride + i] - vqmovun.s16 d12, q14 ; clip pixel - vst1.64 {d12}, [r9], r8 ; store the data - vld1.64 {d12}, [r7], r8 ; load destinatoin data - vrshr.s16 q15, q15, #6 - vaddw.u8 q15, q15, d13 ; + dest[j * dest_stride + i] - vqmovun.s16 d13, q15 ; clip pixel - vst1.64 {d13}, [r9], r8 ; store the data - b end_idct16x16_pass2 - -skip_adding_dest - ; stage 7 - ; load the data in pass1 - mov r5, #24 - mov r3, #8 - - vld1.s16 {q0}, [r2], r4 ; load data step2[0] - vld1.s16 {q1}, [r2], r4 ; load data step2[1] - vadd.s16 q12, q0, q15 ; step2[0] + step2[15] - vadd.s16 q13, q1, q14 ; step2[1] + step2[14] - vld1.s16 {q10}, [r2], r4 ; load data step2[2] - vld1.s16 {q11}, [r2], r4 ; load data step2[3] - vst1.64 {d24}, [r1], r3 ; store output[0] - vst1.64 {d25}, [r1], r5 - vst1.64 {d26}, [r1], r3 ; store output[1] - vst1.64 {d27}, [r1], r5 - vadd.s16 q12, q10, q5 ; step2[2] + step2[13] - vadd.s16 q13, q11, q4 ; step2[3] + step2[12] - vsub.s16 q14, q1, q14 ; step2[1] - step2[14] - vsub.s16 q15, q0, q15 ; step2[0] - step2[15] - vst1.64 {d24}, [r1], r3 ; store output[2] - vst1.64 {d25}, [r1], r5 - vst1.64 {d26}, [r1], r3 ; store output[3] - vst1.64 {d27}, [r1], r5 - vsub.s16 q4, q11, q4 ; step2[3] - step2[12] - vsub.s16 q5, q10, q5 ; step2[2] - step2[13] - vld1.s16 {q0}, [r2], r4 ; load data step2[4] - vld1.s16 {q1}, [r2], r4 ; load data step2[5] - vadd.s16 q12, q0, q3 ; step2[4] + step2[11] - vadd.s16 q13, q1, q2 ; step2[5] + step2[10] - vld1.s16 {q10}, [r2], r4 ; load data step2[6] - vld1.s16 {q11}, [r2], r4 ; load data step2[7] - vst1.64 {d24}, [r1], r3 ; store output[4] - vst1.64 {d25}, [r1], r5 - vst1.64 {d26}, [r1], r3 ; store output[5] - vst1.64 {d27}, [r1], r5 - vadd.s16 q12, q10, q9 ; step2[6] + step2[9] - vadd.s16 q13, q11, q8 ; step2[7] + step2[8] - vsub.s16 q2, q1, q2 ; step2[5] - step2[10] - vsub.s16 q3, q0, q3 ; step2[4] - step2[11] - vsub.s16 q8, q11, q8 ; step2[7] - step2[8] - vsub.s16 q9, q10, q9 ; step2[6] - step2[9] - vst1.64 {d24}, [r1], r3 ; store output[6] - vst1.64 {d25}, [r1], r5 - vst1.64 {d26}, [r1], r3 ; store output[7] - vst1.64 {d27}, [r1], r5 - - ; store the data output 8,9,10,11,12,13,14,15 - vst1.64 {d16}, [r1], r3 - vst1.64 {d17}, [r1], r5 - vst1.64 {d18}, [r1], r3 - vst1.64 {d19}, [r1], r5 - vst1.64 {d4}, [r1], r3 - vst1.64 {d5}, [r1], r5 - vst1.64 {d6}, [r1], r3 - vst1.64 {d7}, [r1], r5 - vst1.64 {d8}, [r1], r3 - vst1.64 {d9}, [r1], r5 - vst1.64 {d10}, [r1], r3 - vst1.64 {d11}, [r1], r5 - vst1.64 {d28}, [r1], r3 - vst1.64 {d29}, [r1], r5 - vst1.64 {d30}, [r1], r3 - vst1.64 {d31}, [r1], r5 -end_idct16x16_pass2 - pop {r3-r9} - bx lr - ENDP ; |vp9_idct16x16_256_add_neon_pass2| - -;void |vp9_idct16x16_10_add_neon_pass1|(int16_t *input, -; int16_t *output, int output_stride) -; -; r0 int16_t input -; r1 int16_t *output -; r2 int output_stride) - -; idct16 stage1 - stage6 on all the elements loaded in q8-q15. The output -; will be stored back into q8-q15 registers. This function will touch q0-q7 -; registers and use them as buffer during calculation. -|vp9_idct16x16_10_add_neon_pass1| PROC - - ; TODO(hkuang): Find a better way to load the elements. - ; load elements of 0, 2, 4, 6, 8, 10, 12, 14 into q8 - q15 - vld2.s16 {q8,q9}, [r0]! - vld2.s16 {q9,q10}, [r0]! - vld2.s16 {q10,q11}, [r0]! - vld2.s16 {q11,q12}, [r0]! - vld2.s16 {q12,q13}, [r0]! - vld2.s16 {q13,q14}, [r0]! - vld2.s16 {q14,q15}, [r0]! - vld2.s16 {q1,q2}, [r0]! - vmov.s16 q15, q1 - - ; generate cospi_28_64*2 = 6392 - mov r3, #0x1800 - add r3, #0xf8 - - ; generate cospi_4_64*2 = 32138 - mov r12, #0x7d00 - add r12, #0x8a - - ; transpose the input data - TRANSPOSE8X8 - - ; stage 3 - vdup.16 q0, r3 ; duplicate cospi_28_64*2 - vdup.16 q1, r12 ; duplicate cospi_4_64*2 - - ; The following instructions use vqrdmulh to do the - ; dct_const_round_shift(step2[4] * cospi_28_64). vvqrdmulh will multiply, - ; double, and return the high 16 bits, effectively giving >> 15. Doubling - ; the constant will change this to >> 14. - ; dct_const_round_shift(step2[4] * cospi_28_64); - vqrdmulh.s16 q4, q9, q0 - - ; preloading to avoid stall - ; generate cospi_16_64*2 = 23170 - mov r3, #0x5a00 - add r3, #0x82 - - ; dct_const_round_shift(step2[4] * cospi_4_64); - vqrdmulh.s16 q7, q9, q1 - - ; stage 4 - vdup.16 q1, r3 ; cospi_16_64*2 - - ; generate cospi_16_64 = 11585 - mov r3, #0x2d00 - add r3, #0x41 - - vdup.16 d4, r3; ; duplicate cospi_16_64 - - ; dct_const_round_shift(step1[0] * cospi_16_64) - vqrdmulh.s16 q8, q8, q1 - - ; step2[6] * cospi_16_64 - vmull.s16 q9, d14, d4 - vmull.s16 q10, d15, d4 - - ; step2[5] * cospi_16_64 - vmull.s16 q12, d9, d4 - vmull.s16 q11, d8, d4 - - ; temp1 = (step2[6] - step2[5]) * cospi_16_64 - vsub.s32 q15, q10, q12 - vsub.s32 q6, q9, q11 - - ; temp2 = (step2[5] + step2[6]) * cospi_16_64 - vadd.s32 q9, q9, q11 - vadd.s32 q10, q10, q12 - - ; dct_const_round_shift(temp1) - vqrshrn.s32 d11, q15, #14 ; >> 14 - vqrshrn.s32 d10, q6, #14 ; >> 14 - - ; dct_const_round_shift(temp2) - vqrshrn.s32 d12, q9, #14 ; >> 14 - vqrshrn.s32 d13, q10, #14 ; >> 14 - - ; stage 6 - vadd.s16 q2, q8, q7 ; step2[0] = step1[0] + step1[7]; - vadd.s16 q10, q8, q5 ; step2[2] = step1[2] + step1[5]; - vadd.s16 q11, q8, q4 ; step2[3] = step1[3] + step1[4]; - vadd.s16 q9, q8, q6 ; step2[1] = step1[1] + step1[6]; - vsub.s16 q12, q8, q4 ; step2[4] = step1[3] - step1[4]; - vsub.s16 q13, q8, q5 ; step2[5] = step1[2] - step1[5]; - vsub.s16 q14, q8, q6 ; step2[6] = step1[1] - step1[6]; - vsub.s16 q15, q8, q7 ; step2[7] = step1[0] - step1[7]; - - ; store the data - vst1.64 {d4}, [r1], r2 - vst1.64 {d5}, [r1], r2 - vst1.64 {d18}, [r1], r2 - vst1.64 {d19}, [r1], r2 - vst1.64 {d20}, [r1], r2 - vst1.64 {d21}, [r1], r2 - vst1.64 {d22}, [r1], r2 - vst1.64 {d23}, [r1], r2 - vst1.64 {d24}, [r1], r2 - vst1.64 {d25}, [r1], r2 - vst1.64 {d26}, [r1], r2 - vst1.64 {d27}, [r1], r2 - vst1.64 {d28}, [r1], r2 - vst1.64 {d29}, [r1], r2 - vst1.64 {d30}, [r1], r2 - vst1.64 {d31}, [r1], r2 - - bx lr - ENDP ; |vp9_idct16x16_10_add_neon_pass1| - -;void vp9_idct16x16_10_add_neon_pass2(int16_t *src, -; int16_t *output, -; int16_t *pass1Output, -; int16_t skip_adding, -; uint8_t *dest, -; int dest_stride) -; -; r0 int16_t *src -; r1 int16_t *output, -; r2 int16_t *pass1Output, -; r3 int16_t skip_adding, -; r4 uint8_t *dest, -; r5 int dest_stride) - -; idct16 stage1 - stage7 on all the elements loaded in q8-q15. The output -; will be stored back into q8-q15 registers. This function will touch q0-q7 -; registers and use them as buffer during calculation. -|vp9_idct16x16_10_add_neon_pass2| PROC - push {r3-r9} - - ; TODO(hkuang): Find a better way to load the elements. - ; load elements of 1, 3, 5, 7, 9, 11, 13, 15 into q8 - q15 - vld2.s16 {q8,q9}, [r0]! - vld2.s16 {q9,q10}, [r0]! - vld2.s16 {q10,q11}, [r0]! - vld2.s16 {q11,q12}, [r0]! - vld2.s16 {q12,q13}, [r0]! - vld2.s16 {q13,q14}, [r0]! - vld2.s16 {q14,q15}, [r0]! - vld2.s16 {q0,q1}, [r0]! - vmov.s16 q15, q0; - - ; generate 2*cospi_30_64 = 3212 - mov r3, #0xc00 - add r3, #0x8c - - ; generate 2*cospi_2_64 = 32610 - mov r12, #0x7f00 - add r12, #0x62 - - ; transpose the input data - TRANSPOSE8X8 - - ; stage 3 - vdup.16 q6, r3 ; duplicate 2*cospi_30_64 - - ; dct_const_round_shift(step1[8] * cospi_30_64) - vqrdmulh.s16 q0, q8, q6 - - vdup.16 q6, r12 ; duplicate 2*cospi_2_64 - - ; dct_const_round_shift(step1[8] * cospi_2_64) - vqrdmulh.s16 q7, q8, q6 - - ; preloading to avoid stall - ; generate 2*cospi_26_64 = 9512 - mov r12, #0x2500 - add r12, #0x28 - rsb r12, #0 - vdup.16 q15, r12 ; duplicate -2*cospi_26_64 - - ; generate 2*cospi_6_64 = 31358 - mov r3, #0x7a00 - add r3, #0x7e - vdup.16 q14, r3 ; duplicate 2*cospi_6_64 - - ; dct_const_round_shift(- step1[12] * cospi_26_64) - vqrdmulh.s16 q3, q9, q15 - - ; dct_const_round_shift(step1[12] * cospi_6_64) - vqrdmulh.s16 q4, q9, q14 - - ; stage 4 - ; generate cospi_24_64 = 6270 - mov r3, #0x1800 - add r3, #0x7e - vdup.16 d31, r3 ; duplicate cospi_24_64 - - ; generate cospi_8_64 = 15137 - mov r12, #0x3b00 - add r12, #0x21 - vdup.16 d30, r12 ; duplicate cospi_8_64 - - ; step1[14] * cospi_24_64 - vmull.s16 q12, d14, d31 - vmull.s16 q5, d15, d31 - - ; step1[9] * cospi_24_64 - vmull.s16 q2, d0, d31 - vmull.s16 q11, d1, d31 - - ; temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64 - vmlsl.s16 q12, d0, d30 - vmlsl.s16 q5, d1, d30 - - ; temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64 - vmlal.s16 q2, d14, d30 - vmlal.s16 q11, d15, d30 - - rsb r12, #0 - vdup.16 d30, r12 ; duplicate -cospi_8_64 - - ; dct_const_round_shift(temp1) - vqrshrn.s32 d2, q12, #14 ; >> 14 - vqrshrn.s32 d3, q5, #14 ; >> 14 - - ; dct_const_round_shift(temp2) - vqrshrn.s32 d12, q2, #14 ; >> 14 - vqrshrn.s32 d13, q11, #14 ; >> 14 - - ; - step1[13] * cospi_8_64 - vmull.s16 q10, d8, d30 - vmull.s16 q13, d9, d30 - - ; -step1[10] * cospi_8_64 - vmull.s16 q8, d6, d30 - vmull.s16 q9, d7, d30 - - ; temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64 - vmlsl.s16 q10, d6, d31 - vmlsl.s16 q13, d7, d31 - - ; temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64 - vmlal.s16 q8, d8, d31 - vmlal.s16 q9, d9, d31 - - ; dct_const_round_shift(temp1) - vqrshrn.s32 d4, q10, #14 ; >> 14 - vqrshrn.s32 d5, q13, #14 ; >> 14 - - ; dct_const_round_shift(temp2) - vqrshrn.s32 d10, q8, #14 ; >> 14 - vqrshrn.s32 d11, q9, #14 ; >> 14 - - ; stage 5 - vadd.s16 q8, q0, q3 ; step1[8] = step2[8]+step2[11]; - vadd.s16 q9, q1, q2 ; step1[9] = step2[9]+step2[10]; - vsub.s16 q10, q1, q2 ; step1[10] = step2[9]-step2[10]; - vsub.s16 q11, q0, q3 ; step1[11] = step2[8]-step2[11]; - vsub.s16 q12, q7, q4 ; step1[12] =-step2[12]+step2[15]; - vsub.s16 q13, q6, q5 ; step1[13] =-step2[13]+step2[14]; - vadd.s16 q14, q6, q5 ; step1[14] =step2[13]+step2[14]; - vadd.s16 q15, q7, q4 ; step1[15] =step2[12]+step2[15]; - - ; stage 6. - ; generate cospi_16_64 = 11585 - mov r12, #0x2d00 - add r12, #0x41 - - vdup.16 d14, r12 ; duplicate cospi_16_64 - - ; step1[13] * cospi_16_64 - vmull.s16 q3, d26, d14 - vmull.s16 q4, d27, d14 - - ; step1[10] * cospi_16_64 - vmull.s16 q0, d20, d14 - vmull.s16 q1, d21, d14 - - ; temp1 = (-step1[10] + step1[13]) * cospi_16_64 - vsub.s32 q5, q3, q0 - vsub.s32 q6, q4, q1 - - ; temp2 = (step1[10] + step1[13]) * cospi_16_64 - vadd.s32 q0, q3, q0 - vadd.s32 q1, q4, q1 - - ; dct_const_round_shift(temp1) - vqrshrn.s32 d4, q5, #14 ; >> 14 - vqrshrn.s32 d5, q6, #14 ; >> 14 - - ; dct_const_round_shift(temp2) - vqrshrn.s32 d10, q0, #14 ; >> 14 - vqrshrn.s32 d11, q1, #14 ; >> 14 - - ; step1[11] * cospi_16_64 - vmull.s16 q0, d22, d14 - vmull.s16 q1, d23, d14 - - ; step1[12] * cospi_16_64 - vmull.s16 q13, d24, d14 - vmull.s16 q6, d25, d14 - - ; temp1 = (-step1[11] + step1[12]) * cospi_16_64 - vsub.s32 q10, q13, q0 - vsub.s32 q4, q6, q1 - - ; temp2 = (step1[11] + step1[12]) * cospi_16_64 - vadd.s32 q13, q13, q0 - vadd.s32 q6, q6, q1 - - ; dct_const_round_shift(input_dc * cospi_16_64) - vqrshrn.s32 d6, q10, #14 ; >> 14 - vqrshrn.s32 d7, q4, #14 ; >> 14 - - ; dct_const_round_shift((step1[11] + step1[12]) * cospi_16_64); - vqrshrn.s32 d8, q13, #14 ; >> 14 - vqrshrn.s32 d9, q6, #14 ; >> 14 - - mov r4, #16 ; pass1Output stride - ldr r3, [sp] ; load skip_adding - - ; stage 7 - ; load the data in pass1 - mov r5, #24 - mov r3, #8 - - vld1.s16 {q0}, [r2], r4 ; load data step2[0] - vld1.s16 {q1}, [r2], r4 ; load data step2[1] - vadd.s16 q12, q0, q15 ; step2[0] + step2[15] - vadd.s16 q13, q1, q14 ; step2[1] + step2[14] - vld1.s16 {q10}, [r2], r4 ; load data step2[2] - vld1.s16 {q11}, [r2], r4 ; load data step2[3] - vst1.64 {d24}, [r1], r3 ; store output[0] - vst1.64 {d25}, [r1], r5 - vst1.64 {d26}, [r1], r3 ; store output[1] - vst1.64 {d27}, [r1], r5 - vadd.s16 q12, q10, q5 ; step2[2] + step2[13] - vadd.s16 q13, q11, q4 ; step2[3] + step2[12] - vsub.s16 q14, q1, q14 ; step2[1] - step2[14] - vsub.s16 q15, q0, q15 ; step2[0] - step2[15] - vst1.64 {d24}, [r1], r3 ; store output[2] - vst1.64 {d25}, [r1], r5 - vst1.64 {d26}, [r1], r3 ; store output[3] - vst1.64 {d27}, [r1], r5 - vsub.s16 q4, q11, q4 ; step2[3] - step2[12] - vsub.s16 q5, q10, q5 ; step2[2] - step2[13] - vld1.s16 {q0}, [r2], r4 ; load data step2[4] - vld1.s16 {q1}, [r2], r4 ; load data step2[5] - vadd.s16 q12, q0, q3 ; step2[4] + step2[11] - vadd.s16 q13, q1, q2 ; step2[5] + step2[10] - vld1.s16 {q10}, [r2], r4 ; load data step2[6] - vld1.s16 {q11}, [r2], r4 ; load data step2[7] - vst1.64 {d24}, [r1], r3 ; store output[4] - vst1.64 {d25}, [r1], r5 - vst1.64 {d26}, [r1], r3 ; store output[5] - vst1.64 {d27}, [r1], r5 - vadd.s16 q12, q10, q9 ; step2[6] + step2[9] - vadd.s16 q13, q11, q8 ; step2[7] + step2[8] - vsub.s16 q2, q1, q2 ; step2[5] - step2[10] - vsub.s16 q3, q0, q3 ; step2[4] - step2[11] - vsub.s16 q8, q11, q8 ; step2[7] - step2[8] - vsub.s16 q9, q10, q9 ; step2[6] - step2[9] - vst1.64 {d24}, [r1], r3 ; store output[6] - vst1.64 {d25}, [r1], r5 - vst1.64 {d26}, [r1], r3 ; store output[7] - vst1.64 {d27}, [r1], r5 - - ; store the data output 8,9,10,11,12,13,14,15 - vst1.64 {d16}, [r1], r3 - vst1.64 {d17}, [r1], r5 - vst1.64 {d18}, [r1], r3 - vst1.64 {d19}, [r1], r5 - vst1.64 {d4}, [r1], r3 - vst1.64 {d5}, [r1], r5 - vst1.64 {d6}, [r1], r3 - vst1.64 {d7}, [r1], r5 - vst1.64 {d8}, [r1], r3 - vst1.64 {d9}, [r1], r5 - vst1.64 {d10}, [r1], r3 - vst1.64 {d11}, [r1], r5 - vst1.64 {d28}, [r1], r3 - vst1.64 {d29}, [r1], r5 - vst1.64 {d30}, [r1], r3 - vst1.64 {d31}, [r1], r5 -end_idct10_16x16_pass2 - pop {r3-r9} - bx lr - ENDP ; |vp9_idct16x16_10_add_neon_pass2| - END diff --git a/media/libvpx/vp9/common/arm/neon/vp9_idct32x32_1_add_neon.asm b/media/libvpx/vp9/common/arm/neon/vp9_idct32x32_1_add_neon.asm deleted file mode 100644 index d290d07531c..00000000000 --- a/media/libvpx/vp9/common/arm/neon/vp9_idct32x32_1_add_neon.asm +++ /dev/null @@ -1,144 +0,0 @@ -; -; Copyright (c) 2013 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. -; - - EXPORT |vp9_idct32x32_1_add_neon| - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 - - ;TODO(hkuang): put the following macros in a seperate - ;file so other idct function could also use them. - MACRO - LD_16x8 $src, $stride - vld1.8 {q8}, [$src], $stride - vld1.8 {q9}, [$src], $stride - vld1.8 {q10}, [$src], $stride - vld1.8 {q11}, [$src], $stride - vld1.8 {q12}, [$src], $stride - vld1.8 {q13}, [$src], $stride - vld1.8 {q14}, [$src], $stride - vld1.8 {q15}, [$src], $stride - MEND - - MACRO - ADD_DIFF_16x8 $diff - vqadd.u8 q8, q8, $diff - vqadd.u8 q9, q9, $diff - vqadd.u8 q10, q10, $diff - vqadd.u8 q11, q11, $diff - vqadd.u8 q12, q12, $diff - vqadd.u8 q13, q13, $diff - vqadd.u8 q14, q14, $diff - vqadd.u8 q15, q15, $diff - MEND - - MACRO - SUB_DIFF_16x8 $diff - vqsub.u8 q8, q8, $diff - vqsub.u8 q9, q9, $diff - vqsub.u8 q10, q10, $diff - vqsub.u8 q11, q11, $diff - vqsub.u8 q12, q12, $diff - vqsub.u8 q13, q13, $diff - vqsub.u8 q14, q14, $diff - vqsub.u8 q15, q15, $diff - MEND - - MACRO - ST_16x8 $dst, $stride - vst1.8 {q8}, [$dst], $stride - vst1.8 {q9}, [$dst], $stride - vst1.8 {q10},[$dst], $stride - vst1.8 {q11},[$dst], $stride - vst1.8 {q12},[$dst], $stride - vst1.8 {q13},[$dst], $stride - vst1.8 {q14},[$dst], $stride - vst1.8 {q15},[$dst], $stride - MEND - -;void vp9_idct32x32_1_add_neon(int16_t *input, uint8_t *dest, -; int dest_stride) -; -; r0 int16_t input -; r1 uint8_t *dest -; r2 int dest_stride - -|vp9_idct32x32_1_add_neon| PROC - push {lr} - pld [r1] - add r3, r1, #16 ; r3 dest + 16 for second loop - ldrsh r0, [r0] - - ; generate cospi_16_64 = 11585 - mov r12, #0x2d00 - add r12, #0x41 - - ; out = dct_const_round_shift(input[0] * cospi_16_64) - mul r0, r0, r12 ; input[0] * cospi_16_64 - add r0, r0, #0x2000 ; +(1 << ((DCT_CONST_BITS) - 1)) - asr r0, r0, #14 ; >> DCT_CONST_BITS - - ; out = dct_const_round_shift(out * cospi_16_64) - mul r0, r0, r12 ; out * cospi_16_64 - mov r12, r1 ; save dest - add r0, r0, #0x2000 ; +(1 << ((DCT_CONST_BITS) - 1)) - asr r0, r0, #14 ; >> DCT_CONST_BITS - - ; a1 = ROUND_POWER_OF_TWO(out, 6) - add r0, r0, #32 ; + (1 <<((6) - 1)) - asrs r0, r0, #6 ; >> 6 - bge diff_positive_32_32 - -diff_negative_32_32 - neg r0, r0 - usat r0, #8, r0 - vdup.u8 q0, r0 - mov r0, #4 - -diff_negative_32_32_loop - sub r0, #1 - LD_16x8 r1, r2 - SUB_DIFF_16x8 q0 - ST_16x8 r12, r2 - - LD_16x8 r1, r2 - SUB_DIFF_16x8 q0 - ST_16x8 r12, r2 - cmp r0, #2 - moveq r1, r3 - moveq r12, r3 - cmp r0, #0 - bne diff_negative_32_32_loop - pop {pc} - -diff_positive_32_32 - usat r0, #8, r0 - vdup.u8 q0, r0 - mov r0, #4 - -diff_positive_32_32_loop - sub r0, #1 - LD_16x8 r1, r2 - ADD_DIFF_16x8 q0 - ST_16x8 r12, r2 - - LD_16x8 r1, r2 - ADD_DIFF_16x8 q0 - ST_16x8 r12, r2 - cmp r0, #2 - moveq r1, r3 - moveq r12, r3 - cmp r0, #0 - bne diff_positive_32_32_loop - pop {pc} - - ENDP ; |vp9_idct32x32_1_add_neon| - END diff --git a/media/libvpx/vp9/common/arm/neon/vp9_idct32x32_add_neon.asm b/media/libvpx/vp9/common/arm/neon/vp9_idct32x32_add_neon.asm deleted file mode 100644 index 72e933eee96..00000000000 --- a/media/libvpx/vp9/common/arm/neon/vp9_idct32x32_add_neon.asm +++ /dev/null @@ -1,1299 +0,0 @@ -; -; Copyright (c) 2013 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - -;TODO(cd): adjust these constant to be able to use vqdmulh for faster -; dct_const_round_shift(a * b) within butterfly calculations. -cospi_1_64 EQU 16364 -cospi_2_64 EQU 16305 -cospi_3_64 EQU 16207 -cospi_4_64 EQU 16069 -cospi_5_64 EQU 15893 -cospi_6_64 EQU 15679 -cospi_7_64 EQU 15426 -cospi_8_64 EQU 15137 -cospi_9_64 EQU 14811 -cospi_10_64 EQU 14449 -cospi_11_64 EQU 14053 -cospi_12_64 EQU 13623 -cospi_13_64 EQU 13160 -cospi_14_64 EQU 12665 -cospi_15_64 EQU 12140 -cospi_16_64 EQU 11585 -cospi_17_64 EQU 11003 -cospi_18_64 EQU 10394 -cospi_19_64 EQU 9760 -cospi_20_64 EQU 9102 -cospi_21_64 EQU 8423 -cospi_22_64 EQU 7723 -cospi_23_64 EQU 7005 -cospi_24_64 EQU 6270 -cospi_25_64 EQU 5520 -cospi_26_64 EQU 4756 -cospi_27_64 EQU 3981 -cospi_28_64 EQU 3196 -cospi_29_64 EQU 2404 -cospi_30_64 EQU 1606 -cospi_31_64 EQU 804 - - - EXPORT |vp9_idct32x32_1024_add_neon| - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 - - AREA Block, CODE, READONLY - - ; -------------------------------------------------------------------------- - ; Load from transposed_buffer - ; q13 = transposed_buffer[first_offset] - ; q14 = transposed_buffer[second_offset] - ; for proper address calculation, the last offset used when manipulating - ; transposed_buffer must be passed in. use 0 for first use. - MACRO - LOAD_FROM_TRANSPOSED $prev_offset, $first_offset, $second_offset - ; address calculation with proper stride and loading - add r0, #($first_offset - $prev_offset )*8*2 - vld1.s16 {q14}, [r0] - add r0, #($second_offset - $first_offset)*8*2 - vld1.s16 {q13}, [r0] - ; (used) two registers (q14, q13) - MEND - ; -------------------------------------------------------------------------- - ; Load from output (used as temporary storage) - ; reg1 = output[first_offset] - ; reg2 = output[second_offset] - ; for proper address calculation, the last offset used when manipulating - ; output, whether reading or storing) must be passed in. use 0 for first - ; use. - MACRO - LOAD_FROM_OUTPUT $prev_offset, $first_offset, $second_offset, $reg1, $reg2 - ; address calculation with proper stride and loading - add r1, #($first_offset - $prev_offset )*32*2 - vld1.s16 {$reg1}, [r1] - add r1, #($second_offset - $first_offset)*32*2 - vld1.s16 {$reg2}, [r1] - ; (used) two registers ($reg1, $reg2) - MEND - ; -------------------------------------------------------------------------- - ; Store into output (sometimes as as temporary storage) - ; output[first_offset] = reg1 - ; output[second_offset] = reg2 - ; for proper address calculation, the last offset used when manipulating - ; output, whether reading or storing) must be passed in. use 0 for first - ; use. - MACRO - STORE_IN_OUTPUT $prev_offset, $first_offset, $second_offset, $reg1, $reg2 - ; address calculation with proper stride and storing - add r1, #($first_offset - $prev_offset )*32*2 - vst1.16 {$reg1}, [r1] - add r1, #($second_offset - $first_offset)*32*2 - vst1.16 {$reg2}, [r1] - MEND - ; -------------------------------------------------------------------------- - ; Combine-add results with current destination content - ; q6-q9 contain the results (out[j * 32 + 0-31]) - MACRO - STORE_COMBINE_CENTER_RESULTS - ; load dest[j * dest_stride + 0-31] - vld1.s16 {d8}, [r10], r2 - vld1.s16 {d11}, [r9], r11 - vld1.s16 {d9}, [r10] - vld1.s16 {d10}, [r9] - ; ROUND_POWER_OF_TWO - vrshr.s16 q7, q7, #6 - vrshr.s16 q8, q8, #6 - vrshr.s16 q9, q9, #6 - vrshr.s16 q6, q6, #6 - ; add to dest[j * dest_stride + 0-31] - vaddw.u8 q7, q7, d9 - vaddw.u8 q8, q8, d10 - vaddw.u8 q9, q9, d11 - vaddw.u8 q6, q6, d8 - ; clip pixel - vqmovun.s16 d9, q7 - vqmovun.s16 d10, q8 - vqmovun.s16 d11, q9 - vqmovun.s16 d8, q6 - ; store back into dest[j * dest_stride + 0-31] - vst1.16 {d9}, [r10], r11 - vst1.16 {d10}, [r9], r2 - vst1.16 {d8}, [r10] - vst1.16 {d11}, [r9] - ; update pointers (by dest_stride * 2) - sub r9, r9, r2, lsl #1 - add r10, r10, r2, lsl #1 - MEND - ; -------------------------------------------------------------------------- - ; Combine-add results with current destination content - ; q6-q9 contain the results (out[j * 32 + 0-31]) - MACRO - STORE_COMBINE_CENTER_RESULTS_LAST - ; load dest[j * dest_stride + 0-31] - vld1.s16 {d8}, [r10], r2 - vld1.s16 {d11}, [r9], r11 - vld1.s16 {d9}, [r10] - vld1.s16 {d10}, [r9] - ; ROUND_POWER_OF_TWO - vrshr.s16 q7, q7, #6 - vrshr.s16 q8, q8, #6 - vrshr.s16 q9, q9, #6 - vrshr.s16 q6, q6, #6 - ; add to dest[j * dest_stride + 0-31] - vaddw.u8 q7, q7, d9 - vaddw.u8 q8, q8, d10 - vaddw.u8 q9, q9, d11 - vaddw.u8 q6, q6, d8 - ; clip pixel - vqmovun.s16 d9, q7 - vqmovun.s16 d10, q8 - vqmovun.s16 d11, q9 - vqmovun.s16 d8, q6 - ; store back into dest[j * dest_stride + 0-31] - vst1.16 {d9}, [r10], r11 - vst1.16 {d10}, [r9], r2 - vst1.16 {d8}, [r10]! - vst1.16 {d11}, [r9]! - ; update pointers (by dest_stride * 2) - sub r9, r9, r2, lsl #1 - add r10, r10, r2, lsl #1 - MEND - ; -------------------------------------------------------------------------- - ; Combine-add results with current destination content - ; q4-q7 contain the results (out[j * 32 + 0-31]) - MACRO - STORE_COMBINE_EXTREME_RESULTS - ; load dest[j * dest_stride + 0-31] - vld1.s16 {d4}, [r7], r2 - vld1.s16 {d7}, [r6], r11 - vld1.s16 {d5}, [r7] - vld1.s16 {d6}, [r6] - ; ROUND_POWER_OF_TWO - vrshr.s16 q5, q5, #6 - vrshr.s16 q6, q6, #6 - vrshr.s16 q7, q7, #6 - vrshr.s16 q4, q4, #6 - ; add to dest[j * dest_stride + 0-31] - vaddw.u8 q5, q5, d5 - vaddw.u8 q6, q6, d6 - vaddw.u8 q7, q7, d7 - vaddw.u8 q4, q4, d4 - ; clip pixel - vqmovun.s16 d5, q5 - vqmovun.s16 d6, q6 - vqmovun.s16 d7, q7 - vqmovun.s16 d4, q4 - ; store back into dest[j * dest_stride + 0-31] - vst1.16 {d5}, [r7], r11 - vst1.16 {d6}, [r6], r2 - vst1.16 {d7}, [r6] - vst1.16 {d4}, [r7] - ; update pointers (by dest_stride * 2) - sub r6, r6, r2, lsl #1 - add r7, r7, r2, lsl #1 - MEND - ; -------------------------------------------------------------------------- - ; Combine-add results with current destination content - ; q4-q7 contain the results (out[j * 32 + 0-31]) - MACRO - STORE_COMBINE_EXTREME_RESULTS_LAST - ; load dest[j * dest_stride + 0-31] - vld1.s16 {d4}, [r7], r2 - vld1.s16 {d7}, [r6], r11 - vld1.s16 {d5}, [r7] - vld1.s16 {d6}, [r6] - ; ROUND_POWER_OF_TWO - vrshr.s16 q5, q5, #6 - vrshr.s16 q6, q6, #6 - vrshr.s16 q7, q7, #6 - vrshr.s16 q4, q4, #6 - ; add to dest[j * dest_stride + 0-31] - vaddw.u8 q5, q5, d5 - vaddw.u8 q6, q6, d6 - vaddw.u8 q7, q7, d7 - vaddw.u8 q4, q4, d4 - ; clip pixel - vqmovun.s16 d5, q5 - vqmovun.s16 d6, q6 - vqmovun.s16 d7, q7 - vqmovun.s16 d4, q4 - ; store back into dest[j * dest_stride + 0-31] - vst1.16 {d5}, [r7], r11 - vst1.16 {d6}, [r6], r2 - vst1.16 {d7}, [r6]! - vst1.16 {d4}, [r7]! - ; update pointers (by dest_stride * 2) - sub r6, r6, r2, lsl #1 - add r7, r7, r2, lsl #1 - MEND - ; -------------------------------------------------------------------------- - ; Touches q8-q12, q15 (q13-q14 are preserved) - ; valid output registers are anything but q8-q11 - MACRO - DO_BUTTERFLY $regC, $regD, $regA, $regB, $first_constant, $second_constant, $reg1, $reg2, $reg3, $reg4 - ; TODO(cd): have special case to re-use constants when they are similar for - ; consecutive butterflies - ; TODO(cd): have special case when both constants are the same, do the - ; additions/subtractions before the multiplies. - ; generate the constants - ; generate scalar constants - mov r8, #$first_constant & 0xFF00 - mov r12, #$second_constant & 0xFF00 - add r8, #$first_constant & 0x00FF - add r12, #$second_constant & 0x00FF - ; generate vector constants - vdup.16 d30, r8 - vdup.16 d31, r12 - ; (used) two for inputs (regA-regD), one for constants (q15) - ; do some multiplications (ordered for maximum latency hiding) - vmull.s16 q8, $regC, d30 - vmull.s16 q10, $regA, d31 - vmull.s16 q9, $regD, d30 - vmull.s16 q11, $regB, d31 - vmull.s16 q12, $regC, d31 - ; (used) five for intermediate (q8-q12), one for constants (q15) - ; do some addition/subtractions (to get back two register) - vsub.s32 q8, q8, q10 - vsub.s32 q9, q9, q11 - ; do more multiplications (ordered for maximum latency hiding) - vmull.s16 q10, $regD, d31 - vmull.s16 q11, $regA, d30 - vmull.s16 q15, $regB, d30 - ; (used) six for intermediate (q8-q12, q15) - ; do more addition/subtractions - vadd.s32 q11, q12, q11 - vadd.s32 q10, q10, q15 - ; (used) four for intermediate (q8-q11) - ; dct_const_round_shift - vqrshrn.s32 $reg1, q8, #14 - vqrshrn.s32 $reg2, q9, #14 - vqrshrn.s32 $reg3, q11, #14 - vqrshrn.s32 $reg4, q10, #14 - ; (used) two for results, well four d registers - MEND - ; -------------------------------------------------------------------------- - ; Touches q8-q12, q15 (q13-q14 are preserved) - ; valid output registers are anything but q8-q11 - MACRO - DO_BUTTERFLY_STD $first_constant, $second_constant, $reg1, $reg2, $reg3, $reg4 - DO_BUTTERFLY d28, d29, d26, d27, $first_constant, $second_constant, $reg1, $reg2, $reg3, $reg4 - MEND - ; -------------------------------------------------------------------------- - -;void vp9_idct32x32_1024_add_neon(int16_t *input, uint8_t *dest, int dest_stride); -; -; r0 int16_t *input, -; r1 uint8_t *dest, -; r2 int dest_stride) -; loop counters -; r4 bands loop counter -; r5 pass loop counter -; r8 transpose loop counter -; combine-add pointers -; r6 dest + 31 * dest_stride, descending (30, 29, 28, ...) -; r7 dest + 0 * dest_stride, ascending (1, 2, 3, ...) -; r9 dest + 15 * dest_stride, descending (14, 13, 12, ...) -; r10 dest + 16 * dest_stride, ascending (17, 18, 19, ...) - -|vp9_idct32x32_1024_add_neon| PROC - ; This function does one pass of idct32x32 transform. - ; - ; This is done by transposing the input and then doing a 1d transform on - ; columns. In the first pass, the transposed columns are the original - ; rows. In the second pass, after the transposition, the colums are the - ; original columns. - ; The 1d transform is done by looping over bands of eight columns (the - ; idct32_bands loop). For each band, the transform input transposition - ; is done on demand, one band of four 8x8 matrices at a time. The four - ; matrices are transposed by pairs (the idct32_transpose_pair loop). - push {r4-r11} - vpush {d8-d15} - ; stack operation - ; internal buffer used to transpose 8 lines into before transforming them - ; int16_t transpose_buffer[32 * 8]; - ; at sp + [4096, 4607] - ; results of the first pass (transpose and transform rows) - ; int16_t pass1[32 * 32]; - ; at sp + [0, 2047] - ; results of the second pass (transpose and transform columns) - ; int16_t pass2[32 * 32]; - ; at sp + [2048, 4095] - sub sp, sp, #512+2048+2048 - - ; r6 = dest + 31 * dest_stride - ; r7 = dest + 0 * dest_stride - ; r9 = dest + 15 * dest_stride - ; r10 = dest + 16 * dest_stride - rsb r6, r2, r2, lsl #5 - rsb r9, r2, r2, lsl #4 - add r10, r1, r2, lsl #4 - mov r7, r1 - add r6, r6, r1 - add r9, r9, r1 - ; r11 = -dest_stride - neg r11, r2 - ; r3 = input - mov r3, r0 - ; parameters for first pass - ; r0 = transpose_buffer[32 * 8] - add r0, sp, #4096 - ; r1 = pass1[32 * 32] - mov r1, sp - - mov r5, #0 ; initialize pass loop counter -idct32_pass_loop - mov r4, #4 ; initialize bands loop counter -idct32_bands_loop - mov r8, #2 ; initialize transpose loop counter -idct32_transpose_pair_loop - ; Load two horizontally consecutive 8x8 16bit data matrices. The first one - ; into q0-q7 and the second one into q8-q15. There is a stride of 64, - ; adjusted to 32 because of the two post-increments. - vld1.s16 {q8}, [r3]! - vld1.s16 {q0}, [r3]! - add r3, #32 - vld1.s16 {q9}, [r3]! - vld1.s16 {q1}, [r3]! - add r3, #32 - vld1.s16 {q10}, [r3]! - vld1.s16 {q2}, [r3]! - add r3, #32 - vld1.s16 {q11}, [r3]! - vld1.s16 {q3}, [r3]! - add r3, #32 - vld1.s16 {q12}, [r3]! - vld1.s16 {q4}, [r3]! - add r3, #32 - vld1.s16 {q13}, [r3]! - vld1.s16 {q5}, [r3]! - add r3, #32 - vld1.s16 {q14}, [r3]! - vld1.s16 {q6}, [r3]! - add r3, #32 - vld1.s16 {q15}, [r3]! - vld1.s16 {q7}, [r3]! - - ; Transpose the two 8x8 16bit data matrices. - vswp d17, d24 - vswp d23, d30 - vswp d21, d28 - vswp d19, d26 - vswp d1, d8 - vswp d7, d14 - vswp d5, d12 - vswp d3, d10 - vtrn.32 q8, q10 - vtrn.32 q9, q11 - vtrn.32 q12, q14 - vtrn.32 q13, q15 - vtrn.32 q0, q2 - vtrn.32 q1, q3 - vtrn.32 q4, q6 - vtrn.32 q5, q7 - vtrn.16 q8, q9 - vtrn.16 q10, q11 - vtrn.16 q12, q13 - vtrn.16 q14, q15 - vtrn.16 q0, q1 - vtrn.16 q2, q3 - vtrn.16 q4, q5 - vtrn.16 q6, q7 - - ; Store both matrices after each other. There is a stride of 32, which - ; adjusts to nothing because of the post-increments. - vst1.16 {q8}, [r0]! - vst1.16 {q9}, [r0]! - vst1.16 {q10}, [r0]! - vst1.16 {q11}, [r0]! - vst1.16 {q12}, [r0]! - vst1.16 {q13}, [r0]! - vst1.16 {q14}, [r0]! - vst1.16 {q15}, [r0]! - vst1.16 {q0}, [r0]! - vst1.16 {q1}, [r0]! - vst1.16 {q2}, [r0]! - vst1.16 {q3}, [r0]! - vst1.16 {q4}, [r0]! - vst1.16 {q5}, [r0]! - vst1.16 {q6}, [r0]! - vst1.16 {q7}, [r0]! - - ; increment pointers by adjusted stride (not necessary for r0/out) - ; go back by 7*32 for the seven lines moved fully by read and add - ; go back by 32 for the eigth line only read - ; advance by 16*2 to go the next pair - sub r3, r3, #7*32*2 + 32 - 16*2 - ; transpose pair loop processing - subs r8, r8, #1 - bne idct32_transpose_pair_loop - - ; restore r0/input to its original value - sub r0, r0, #32*8*2 - - ; Instead of doing the transforms stage by stage, it is done by loading - ; some input values and doing as many stages as possible to minimize the - ; storing/loading of intermediate results. To fit within registers, the - ; final coefficients are cut into four blocks: - ; BLOCK A: 16-19,28-31 - ; BLOCK B: 20-23,24-27 - ; BLOCK C: 8-10,11-15 - ; BLOCK D: 0-3,4-7 - ; Blocks A and C are straight calculation through the various stages. In - ; block B, further calculations are performed using the results from - ; block A. In block D, further calculations are performed using the results - ; from block C and then the final calculations are done using results from - ; block A and B which have been combined at the end of block B. - - ; -------------------------------------------------------------------------- - ; BLOCK A: 16-19,28-31 - ; -------------------------------------------------------------------------- - ; generate 16,17,30,31 - ; -------------------------------------------------------------------------- - ; part of stage 1 - ;temp1 = input[1 * 32] * cospi_31_64 - input[31 * 32] * cospi_1_64; - ;temp2 = input[1 * 32] * cospi_1_64 + input[31 * 32] * cospi_31_64; - ;step1b[16][i] = dct_const_round_shift(temp1); - ;step1b[31][i] = dct_const_round_shift(temp2); - LOAD_FROM_TRANSPOSED 0, 1, 31 - DO_BUTTERFLY_STD cospi_31_64, cospi_1_64, d0, d1, d4, d5 - ; -------------------------------------------------------------------------- - ; part of stage 1 - ;temp1 = input[17 * 32] * cospi_15_64 - input[15 * 32] * cospi_17_64; - ;temp2 = input[17 * 32] * cospi_17_64 + input[15 * 32] * cospi_15_64; - ;step1b[17][i] = dct_const_round_shift(temp1); - ;step1b[30][i] = dct_const_round_shift(temp2); - LOAD_FROM_TRANSPOSED 31, 17, 15 - DO_BUTTERFLY_STD cospi_15_64, cospi_17_64, d2, d3, d6, d7 - ; -------------------------------------------------------------------------- - ; part of stage 2 - ;step2[16] = step1b[16][i] + step1b[17][i]; - ;step2[17] = step1b[16][i] - step1b[17][i]; - ;step2[30] = -step1b[30][i] + step1b[31][i]; - ;step2[31] = step1b[30][i] + step1b[31][i]; - vadd.s16 q4, q0, q1 - vsub.s16 q13, q0, q1 - vadd.s16 q6, q2, q3 - vsub.s16 q14, q2, q3 - ; -------------------------------------------------------------------------- - ; part of stage 3 - ;temp1 = step1b[30][i] * cospi_28_64 - step1b[17][i] * cospi_4_64; - ;temp2 = step1b[30][i] * cospi_4_64 - step1b[17][i] * cospi_28_64; - ;step3[17] = dct_const_round_shift(temp1); - ;step3[30] = dct_const_round_shift(temp2); - DO_BUTTERFLY_STD cospi_28_64, cospi_4_64, d10, d11, d14, d15 - ; -------------------------------------------------------------------------- - ; generate 18,19,28,29 - ; -------------------------------------------------------------------------- - ; part of stage 1 - ;temp1 = input[9 * 32] * cospi_23_64 - input[23 * 32] * cospi_9_64; - ;temp2 = input[9 * 32] * cospi_9_64 + input[23 * 32] * cospi_23_64; - ;step1b[18][i] = dct_const_round_shift(temp1); - ;step1b[29][i] = dct_const_round_shift(temp2); - LOAD_FROM_TRANSPOSED 15, 9, 23 - DO_BUTTERFLY_STD cospi_23_64, cospi_9_64, d0, d1, d4, d5 - ; -------------------------------------------------------------------------- - ; part of stage 1 - ;temp1 = input[25 * 32] * cospi_7_64 - input[7 * 32] * cospi_25_64; - ;temp2 = input[25 * 32] * cospi_25_64 + input[7 * 32] * cospi_7_64; - ;step1b[19][i] = dct_const_round_shift(temp1); - ;step1b[28][i] = dct_const_round_shift(temp2); - LOAD_FROM_TRANSPOSED 23, 25, 7 - DO_BUTTERFLY_STD cospi_7_64, cospi_25_64, d2, d3, d6, d7 - ; -------------------------------------------------------------------------- - ; part of stage 2 - ;step2[18] = -step1b[18][i] + step1b[19][i]; - ;step2[19] = step1b[18][i] + step1b[19][i]; - ;step2[28] = step1b[28][i] + step1b[29][i]; - ;step2[29] = step1b[28][i] - step1b[29][i]; - vsub.s16 q13, q3, q2 - vadd.s16 q3, q3, q2 - vsub.s16 q14, q1, q0 - vadd.s16 q2, q1, q0 - ; -------------------------------------------------------------------------- - ; part of stage 3 - ;temp1 = step1b[18][i] * (-cospi_4_64) - step1b[29][i] * (-cospi_28_64); - ;temp2 = step1b[18][i] * (-cospi_28_64) + step1b[29][i] * (-cospi_4_64); - ;step3[29] = dct_const_round_shift(temp1); - ;step3[18] = dct_const_round_shift(temp2); - DO_BUTTERFLY_STD (-cospi_4_64), (-cospi_28_64), d2, d3, d0, d1 - ; -------------------------------------------------------------------------- - ; combine 16-19,28-31 - ; -------------------------------------------------------------------------- - ; part of stage 4 - ;step1[16] = step1b[16][i] + step1b[19][i]; - ;step1[17] = step1b[17][i] + step1b[18][i]; - ;step1[18] = step1b[17][i] - step1b[18][i]; - ;step1[29] = step1b[30][i] - step1b[29][i]; - ;step1[30] = step1b[30][i] + step1b[29][i]; - ;step1[31] = step1b[31][i] + step1b[28][i]; - vadd.s16 q8, q4, q2 - vadd.s16 q9, q5, q0 - vadd.s16 q10, q7, q1 - vadd.s16 q15, q6, q3 - vsub.s16 q13, q5, q0 - vsub.s16 q14, q7, q1 - STORE_IN_OUTPUT 0, 16, 31, q8, q15 - STORE_IN_OUTPUT 31, 17, 30, q9, q10 - ; -------------------------------------------------------------------------- - ; part of stage 5 - ;temp1 = step1b[29][i] * cospi_24_64 - step1b[18][i] * cospi_8_64; - ;temp2 = step1b[29][i] * cospi_8_64 + step1b[18][i] * cospi_24_64; - ;step2[18] = dct_const_round_shift(temp1); - ;step2[29] = dct_const_round_shift(temp2); - DO_BUTTERFLY_STD cospi_24_64, cospi_8_64, d0, d1, d2, d3 - STORE_IN_OUTPUT 30, 29, 18, q1, q0 - ; -------------------------------------------------------------------------- - ; part of stage 4 - ;step1[19] = step1b[16][i] - step1b[19][i]; - ;step1[28] = step1b[31][i] - step1b[28][i]; - vsub.s16 q13, q4, q2 - vsub.s16 q14, q6, q3 - ; -------------------------------------------------------------------------- - ; part of stage 5 - ;temp1 = step1b[28][i] * cospi_24_64 - step1b[19][i] * cospi_8_64; - ;temp2 = step1b[28][i] * cospi_8_64 + step1b[19][i] * cospi_24_64; - ;step2[19] = dct_const_round_shift(temp1); - ;step2[28] = dct_const_round_shift(temp2); - DO_BUTTERFLY_STD cospi_24_64, cospi_8_64, d8, d9, d12, d13 - STORE_IN_OUTPUT 18, 19, 28, q4, q6 - ; -------------------------------------------------------------------------- - - - ; -------------------------------------------------------------------------- - ; BLOCK B: 20-23,24-27 - ; -------------------------------------------------------------------------- - ; generate 20,21,26,27 - ; -------------------------------------------------------------------------- - ; part of stage 1 - ;temp1 = input[5 * 32] * cospi_27_64 - input[27 * 32] * cospi_5_64; - ;temp2 = input[5 * 32] * cospi_5_64 + input[27 * 32] * cospi_27_64; - ;step1b[20][i] = dct_const_round_shift(temp1); - ;step1b[27][i] = dct_const_round_shift(temp2); - LOAD_FROM_TRANSPOSED 7, 5, 27 - DO_BUTTERFLY_STD cospi_27_64, cospi_5_64, d0, d1, d4, d5 - ; -------------------------------------------------------------------------- - ; part of stage 1 - ;temp1 = input[21 * 32] * cospi_11_64 - input[11 * 32] * cospi_21_64; - ;temp2 = input[21 * 32] * cospi_21_64 + input[11 * 32] * cospi_11_64; - ;step1b[21][i] = dct_const_round_shift(temp1); - ;step1b[26][i] = dct_const_round_shift(temp2); - LOAD_FROM_TRANSPOSED 27, 21, 11 - DO_BUTTERFLY_STD cospi_11_64, cospi_21_64, d2, d3, d6, d7 - ; -------------------------------------------------------------------------- - ; part of stage 2 - ;step2[20] = step1b[20][i] + step1b[21][i]; - ;step2[21] = step1b[20][i] - step1b[21][i]; - ;step2[26] = -step1b[26][i] + step1b[27][i]; - ;step2[27] = step1b[26][i] + step1b[27][i]; - vsub.s16 q13, q0, q1 - vadd.s16 q0, q0, q1 - vsub.s16 q14, q2, q3 - vadd.s16 q2, q2, q3 - ; -------------------------------------------------------------------------- - ; part of stage 3 - ;temp1 = step1b[26][i] * cospi_12_64 - step1b[21][i] * cospi_20_64; - ;temp2 = step1b[26][i] * cospi_20_64 + step1b[21][i] * cospi_12_64; - ;step3[21] = dct_const_round_shift(temp1); - ;step3[26] = dct_const_round_shift(temp2); - DO_BUTTERFLY_STD cospi_12_64, cospi_20_64, d2, d3, d6, d7 - ; -------------------------------------------------------------------------- - ; generate 22,23,24,25 - ; -------------------------------------------------------------------------- - ; part of stage 1 - ;temp1 = input[13 * 32] * cospi_19_64 - input[19 * 32] * cospi_13_64; - ;temp2 = input[13 * 32] * cospi_13_64 + input[19 * 32] * cospi_19_64; - ;step1b[22][i] = dct_const_round_shift(temp1); - ;step1b[25][i] = dct_const_round_shift(temp2); - LOAD_FROM_TRANSPOSED 11, 13, 19 - DO_BUTTERFLY_STD cospi_19_64, cospi_13_64, d10, d11, d14, d15 - ; -------------------------------------------------------------------------- - ; part of stage 1 - ;temp1 = input[29 * 32] * cospi_3_64 - input[3 * 32] * cospi_29_64; - ;temp2 = input[29 * 32] * cospi_29_64 + input[3 * 32] * cospi_3_64; - ;step1b[23][i] = dct_const_round_shift(temp1); - ;step1b[24][i] = dct_const_round_shift(temp2); - LOAD_FROM_TRANSPOSED 19, 29, 3 - DO_BUTTERFLY_STD cospi_3_64, cospi_29_64, d8, d9, d12, d13 - ; -------------------------------------------------------------------------- - ; part of stage 2 - ;step2[22] = -step1b[22][i] + step1b[23][i]; - ;step2[23] = step1b[22][i] + step1b[23][i]; - ;step2[24] = step1b[24][i] + step1b[25][i]; - ;step2[25] = step1b[24][i] - step1b[25][i]; - vsub.s16 q14, q4, q5 - vadd.s16 q5, q4, q5 - vsub.s16 q13, q6, q7 - vadd.s16 q6, q6, q7 - ; -------------------------------------------------------------------------- - ; part of stage 3 - ;temp1 = step1b[22][i] * (-cospi_20_64) - step1b[25][i] * (-cospi_12_64); - ;temp2 = step1b[22][i] * (-cospi_12_64) + step1b[25][i] * (-cospi_20_64); - ;step3[25] = dct_const_round_shift(temp1); - ;step3[22] = dct_const_round_shift(temp2); - DO_BUTTERFLY_STD (-cospi_20_64), (-cospi_12_64), d8, d9, d14, d15 - ; -------------------------------------------------------------------------- - ; combine 20-23,24-27 - ; -------------------------------------------------------------------------- - ; part of stage 4 - ;step1[22] = step1b[22][i] + step1b[21][i]; - ;step1[23] = step1b[23][i] + step1b[20][i]; - vadd.s16 q10, q7, q1 - vadd.s16 q11, q5, q0 - ;step1[24] = step1b[24][i] + step1b[27][i]; - ;step1[25] = step1b[25][i] + step1b[26][i]; - vadd.s16 q12, q6, q2 - vadd.s16 q15, q4, q3 - ; -------------------------------------------------------------------------- - ; part of stage 6 - ;step3[16] = step1b[16][i] + step1b[23][i]; - ;step3[17] = step1b[17][i] + step1b[22][i]; - ;step3[22] = step1b[17][i] - step1b[22][i]; - ;step3[23] = step1b[16][i] - step1b[23][i]; - LOAD_FROM_OUTPUT 28, 16, 17, q14, q13 - vadd.s16 q8, q14, q11 - vadd.s16 q9, q13, q10 - vsub.s16 q13, q13, q10 - vsub.s16 q11, q14, q11 - STORE_IN_OUTPUT 17, 17, 16, q9, q8 - ; -------------------------------------------------------------------------- - ; part of stage 6 - ;step3[24] = step1b[31][i] - step1b[24][i]; - ;step3[25] = step1b[30][i] - step1b[25][i]; - ;step3[30] = step1b[30][i] + step1b[25][i]; - ;step3[31] = step1b[31][i] + step1b[24][i]; - LOAD_FROM_OUTPUT 16, 30, 31, q14, q9 - vsub.s16 q8, q9, q12 - vadd.s16 q10, q14, q15 - vsub.s16 q14, q14, q15 - vadd.s16 q12, q9, q12 - STORE_IN_OUTPUT 31, 30, 31, q10, q12 - ; -------------------------------------------------------------------------- - ; TODO(cd) do some register allocation change to remove these push/pop - vpush {q8} ; [24] - vpush {q11} ; [23] - ; -------------------------------------------------------------------------- - ; part of stage 7 - ;temp1 = (step1b[25][i] - step1b[22][i]) * cospi_16_64; - ;temp2 = (step1b[25][i] + step1b[22][i]) * cospi_16_64; - ;step1[22] = dct_const_round_shift(temp1); - ;step1[25] = dct_const_round_shift(temp2); - DO_BUTTERFLY_STD cospi_16_64, cospi_16_64, d26, d27, d28, d29 - STORE_IN_OUTPUT 31, 25, 22, q14, q13 - ; -------------------------------------------------------------------------- - ; part of stage 7 - ;temp1 = (step1b[24][i] - step1b[23][i]) * cospi_16_64; - ;temp2 = (step1b[24][i] + step1b[23][i]) * cospi_16_64; - ;step1[23] = dct_const_round_shift(temp1); - ;step1[24] = dct_const_round_shift(temp2); - ; TODO(cd) do some register allocation change to remove these push/pop - vpop {q13} ; [23] - vpop {q14} ; [24] - DO_BUTTERFLY_STD cospi_16_64, cospi_16_64, d26, d27, d28, d29 - STORE_IN_OUTPUT 22, 24, 23, q14, q13 - ; -------------------------------------------------------------------------- - ; part of stage 4 - ;step1[20] = step1b[23][i] - step1b[20][i]; - ;step1[27] = step1b[24][i] - step1b[27][i]; - vsub.s16 q14, q5, q0 - vsub.s16 q13, q6, q2 - ; -------------------------------------------------------------------------- - ; part of stage 5 - ;temp1 = step1b[20][i] * (-cospi_8_64) - step1b[27][i] * (-cospi_24_64); - ;temp2 = step1b[20][i] * (-cospi_24_64) + step1b[27][i] * (-cospi_8_64); - ;step2[27] = dct_const_round_shift(temp1); - ;step2[20] = dct_const_round_shift(temp2); - DO_BUTTERFLY_STD (-cospi_8_64), (-cospi_24_64), d10, d11, d12, d13 - ; -------------------------------------------------------------------------- - ; part of stage 4 - ;step1[21] = step1b[22][i] - step1b[21][i]; - ;step1[26] = step1b[25][i] - step1b[26][i]; - vsub.s16 q14, q7, q1 - vsub.s16 q13, q4, q3 - ; -------------------------------------------------------------------------- - ; part of stage 5 - ;temp1 = step1b[21][i] * (-cospi_8_64) - step1b[26][i] * (-cospi_24_64); - ;temp2 = step1b[21][i] * (-cospi_24_64) + step1b[26][i] * (-cospi_8_64); - ;step2[26] = dct_const_round_shift(temp1); - ;step2[21] = dct_const_round_shift(temp2); - DO_BUTTERFLY_STD (-cospi_8_64), (-cospi_24_64), d0, d1, d2, d3 - ; -------------------------------------------------------------------------- - ; part of stage 6 - ;step3[18] = step1b[18][i] + step1b[21][i]; - ;step3[19] = step1b[19][i] + step1b[20][i]; - ;step3[20] = step1b[19][i] - step1b[20][i]; - ;step3[21] = step1b[18][i] - step1b[21][i]; - LOAD_FROM_OUTPUT 23, 18, 19, q14, q13 - vadd.s16 q8, q14, q1 - vadd.s16 q9, q13, q6 - vsub.s16 q13, q13, q6 - vsub.s16 q1, q14, q1 - STORE_IN_OUTPUT 19, 18, 19, q8, q9 - ; -------------------------------------------------------------------------- - ; part of stage 6 - ;step3[27] = step1b[28][i] - step1b[27][i]; - ;step3[28] = step1b[28][i] + step1b[27][i]; - ;step3[29] = step1b[29][i] + step1b[26][i]; - ;step3[26] = step1b[29][i] - step1b[26][i]; - LOAD_FROM_OUTPUT 19, 28, 29, q8, q9 - vsub.s16 q14, q8, q5 - vadd.s16 q10, q8, q5 - vadd.s16 q11, q9, q0 - vsub.s16 q0, q9, q0 - STORE_IN_OUTPUT 29, 28, 29, q10, q11 - ; -------------------------------------------------------------------------- - ; part of stage 7 - ;temp1 = (step1b[27][i] - step1b[20][i]) * cospi_16_64; - ;temp2 = (step1b[27][i] + step1b[20][i]) * cospi_16_64; - ;step1[20] = dct_const_round_shift(temp1); - ;step1[27] = dct_const_round_shift(temp2); - DO_BUTTERFLY_STD cospi_16_64, cospi_16_64, d26, d27, d28, d29 - STORE_IN_OUTPUT 29, 20, 27, q13, q14 - ; -------------------------------------------------------------------------- - ; part of stage 7 - ;temp1 = (step1b[26][i] - step1b[21][i]) * cospi_16_64; - ;temp2 = (step1b[26][i] + step1b[21][i]) * cospi_16_64; - ;step1[21] = dct_const_round_shift(temp1); - ;step1[26] = dct_const_round_shift(temp2); - DO_BUTTERFLY d0, d1, d2, d3, cospi_16_64, cospi_16_64, d2, d3, d0, d1 - STORE_IN_OUTPUT 27, 21, 26, q1, q0 - ; -------------------------------------------------------------------------- - - - ; -------------------------------------------------------------------------- - ; BLOCK C: 8-10,11-15 - ; -------------------------------------------------------------------------- - ; generate 8,9,14,15 - ; -------------------------------------------------------------------------- - ; part of stage 2 - ;temp1 = input[2 * 32] * cospi_30_64 - input[30 * 32] * cospi_2_64; - ;temp2 = input[2 * 32] * cospi_2_64 + input[30 * 32] * cospi_30_64; - ;step2[8] = dct_const_round_shift(temp1); - ;step2[15] = dct_const_round_shift(temp2); - LOAD_FROM_TRANSPOSED 3, 2, 30 - DO_BUTTERFLY_STD cospi_30_64, cospi_2_64, d0, d1, d4, d5 - ; -------------------------------------------------------------------------- - ; part of stage 2 - ;temp1 = input[18 * 32] * cospi_14_64 - input[14 * 32] * cospi_18_64; - ;temp2 = input[18 * 32] * cospi_18_64 + input[14 * 32] * cospi_14_64; - ;step2[9] = dct_const_round_shift(temp1); - ;step2[14] = dct_const_round_shift(temp2); - LOAD_FROM_TRANSPOSED 30, 18, 14 - DO_BUTTERFLY_STD cospi_14_64, cospi_18_64, d2, d3, d6, d7 - ; -------------------------------------------------------------------------- - ; part of stage 3 - ;step3[8] = step1b[8][i] + step1b[9][i]; - ;step3[9] = step1b[8][i] - step1b[9][i]; - ;step3[14] = step1b[15][i] - step1b[14][i]; - ;step3[15] = step1b[15][i] + step1b[14][i]; - vsub.s16 q13, q0, q1 - vadd.s16 q0, q0, q1 - vsub.s16 q14, q2, q3 - vadd.s16 q2, q2, q3 - ; -------------------------------------------------------------------------- - ; part of stage 4 - ;temp1 = step1b[14][i] * cospi_24_64 - step1b[9][i] * cospi_8_64; - ;temp2 = step1b[14][i] * cospi_8_64 + step1b[9][i] * cospi_24_64; - ;step1[9] = dct_const_round_shift(temp1); - ;step1[14] = dct_const_round_shift(temp2); - DO_BUTTERFLY_STD cospi_24_64, cospi_8_64, d2, d3, d6, d7 - ; -------------------------------------------------------------------------- - ; generate 10,11,12,13 - ; -------------------------------------------------------------------------- - ; part of stage 2 - ;temp1 = input[10 * 32] * cospi_22_64 - input[22 * 32] * cospi_10_64; - ;temp2 = input[10 * 32] * cospi_10_64 + input[22 * 32] * cospi_22_64; - ;step2[10] = dct_const_round_shift(temp1); - ;step2[13] = dct_const_round_shift(temp2); - LOAD_FROM_TRANSPOSED 14, 10, 22 - DO_BUTTERFLY_STD cospi_22_64, cospi_10_64, d10, d11, d14, d15 - ; -------------------------------------------------------------------------- - ; part of stage 2 - ;temp1 = input[26 * 32] * cospi_6_64 - input[6 * 32] * cospi_26_64; - ;temp2 = input[26 * 32] * cospi_26_64 + input[6 * 32] * cospi_6_64; - ;step2[11] = dct_const_round_shift(temp1); - ;step2[12] = dct_const_round_shift(temp2); - LOAD_FROM_TRANSPOSED 22, 26, 6 - DO_BUTTERFLY_STD cospi_6_64, cospi_26_64, d8, d9, d12, d13 - ; -------------------------------------------------------------------------- - ; part of stage 3 - ;step3[10] = step1b[11][i] - step1b[10][i]; - ;step3[11] = step1b[11][i] + step1b[10][i]; - ;step3[12] = step1b[12][i] + step1b[13][i]; - ;step3[13] = step1b[12][i] - step1b[13][i]; - vsub.s16 q14, q4, q5 - vadd.s16 q5, q4, q5 - vsub.s16 q13, q6, q7 - vadd.s16 q6, q6, q7 - ; -------------------------------------------------------------------------- - ; part of stage 4 - ;temp1 = step1b[10][i] * (-cospi_8_64) - step1b[13][i] * (-cospi_24_64); - ;temp2 = step1b[10][i] * (-cospi_24_64) + step1b[13][i] * (-cospi_8_64); - ;step1[13] = dct_const_round_shift(temp1); - ;step1[10] = dct_const_round_shift(temp2); - DO_BUTTERFLY_STD (-cospi_8_64), (-cospi_24_64), d8, d9, d14, d15 - ; -------------------------------------------------------------------------- - ; combine 8-10,11-15 - ; -------------------------------------------------------------------------- - ; part of stage 5 - ;step2[8] = step1b[8][i] + step1b[11][i]; - ;step2[9] = step1b[9][i] + step1b[10][i]; - ;step2[10] = step1b[9][i] - step1b[10][i]; - vadd.s16 q8, q0, q5 - vadd.s16 q9, q1, q7 - vsub.s16 q13, q1, q7 - ;step2[13] = step1b[14][i] - step1b[13][i]; - ;step2[14] = step1b[14][i] + step1b[13][i]; - ;step2[15] = step1b[15][i] + step1b[12][i]; - vsub.s16 q14, q3, q4 - vadd.s16 q10, q3, q4 - vadd.s16 q15, q2, q6 - STORE_IN_OUTPUT 26, 8, 15, q8, q15 - STORE_IN_OUTPUT 15, 9, 14, q9, q10 - ; -------------------------------------------------------------------------- - ; part of stage 6 - ;temp1 = (step1b[13][i] - step1b[10][i]) * cospi_16_64; - ;temp2 = (step1b[13][i] + step1b[10][i]) * cospi_16_64; - ;step3[10] = dct_const_round_shift(temp1); - ;step3[13] = dct_const_round_shift(temp2); - DO_BUTTERFLY_STD cospi_16_64, cospi_16_64, d2, d3, d6, d7 - STORE_IN_OUTPUT 14, 13, 10, q3, q1 - ; -------------------------------------------------------------------------- - ; part of stage 5 - ;step2[11] = step1b[8][i] - step1b[11][i]; - ;step2[12] = step1b[15][i] - step1b[12][i]; - vsub.s16 q13, q0, q5 - vsub.s16 q14, q2, q6 - ; -------------------------------------------------------------------------- - ; part of stage 6 - ;temp1 = (step1b[12][i] - step1b[11][i]) * cospi_16_64; - ;temp2 = (step1b[12][i] + step1b[11][i]) * cospi_16_64; - ;step3[11] = dct_const_round_shift(temp1); - ;step3[12] = dct_const_round_shift(temp2); - DO_BUTTERFLY_STD cospi_16_64, cospi_16_64, d2, d3, d6, d7 - STORE_IN_OUTPUT 10, 11, 12, q1, q3 - ; -------------------------------------------------------------------------- - - - ; -------------------------------------------------------------------------- - ; BLOCK D: 0-3,4-7 - ; -------------------------------------------------------------------------- - ; generate 4,5,6,7 - ; -------------------------------------------------------------------------- - ; part of stage 3 - ;temp1 = input[4 * 32] * cospi_28_64 - input[28 * 32] * cospi_4_64; - ;temp2 = input[4 * 32] * cospi_4_64 + input[28 * 32] * cospi_28_64; - ;step3[4] = dct_const_round_shift(temp1); - ;step3[7] = dct_const_round_shift(temp2); - LOAD_FROM_TRANSPOSED 6, 4, 28 - DO_BUTTERFLY_STD cospi_28_64, cospi_4_64, d0, d1, d4, d5 - ; -------------------------------------------------------------------------- - ; part of stage 3 - ;temp1 = input[20 * 32] * cospi_12_64 - input[12 * 32] * cospi_20_64; - ;temp2 = input[20 * 32] * cospi_20_64 + input[12 * 32] * cospi_12_64; - ;step3[5] = dct_const_round_shift(temp1); - ;step3[6] = dct_const_round_shift(temp2); - LOAD_FROM_TRANSPOSED 28, 20, 12 - DO_BUTTERFLY_STD cospi_12_64, cospi_20_64, d2, d3, d6, d7 - ; -------------------------------------------------------------------------- - ; part of stage 4 - ;step1[4] = step1b[4][i] + step1b[5][i]; - ;step1[5] = step1b[4][i] - step1b[5][i]; - ;step1[6] = step1b[7][i] - step1b[6][i]; - ;step1[7] = step1b[7][i] + step1b[6][i]; - vsub.s16 q13, q0, q1 - vadd.s16 q0, q0, q1 - vsub.s16 q14, q2, q3 - vadd.s16 q2, q2, q3 - ; -------------------------------------------------------------------------- - ; part of stage 5 - ;temp1 = (step1b[6][i] - step1b[5][i]) * cospi_16_64; - ;temp2 = (step1b[5][i] + step1b[6][i]) * cospi_16_64; - ;step2[5] = dct_const_round_shift(temp1); - ;step2[6] = dct_const_round_shift(temp2); - DO_BUTTERFLY_STD cospi_16_64, cospi_16_64, d2, d3, d6, d7 - ; -------------------------------------------------------------------------- - ; generate 0,1,2,3 - ; -------------------------------------------------------------------------- - ; part of stage 4 - ;temp1 = (input[0 * 32] - input[16 * 32]) * cospi_16_64; - ;temp2 = (input[0 * 32] + input[16 * 32]) * cospi_16_64; - ;step1[1] = dct_const_round_shift(temp1); - ;step1[0] = dct_const_round_shift(temp2); - LOAD_FROM_TRANSPOSED 12, 0, 16 - DO_BUTTERFLY_STD cospi_16_64, cospi_16_64, d10, d11, d14, d15 - ; -------------------------------------------------------------------------- - ; part of stage 4 - ;temp1 = input[8 * 32] * cospi_24_64 - input[24 * 32] * cospi_8_64; - ;temp2 = input[8 * 32] * cospi_8_64 + input[24 * 32] * cospi_24_64; - ;step1[2] = dct_const_round_shift(temp1); - ;step1[3] = dct_const_round_shift(temp2); - LOAD_FROM_TRANSPOSED 16, 8, 24 - DO_BUTTERFLY_STD cospi_24_64, cospi_8_64, d28, d29, d12, d13 - ; -------------------------------------------------------------------------- - ; part of stage 5 - ;step2[0] = step1b[0][i] + step1b[3][i]; - ;step2[1] = step1b[1][i] + step1b[2][i]; - ;step2[2] = step1b[1][i] - step1b[2][i]; - ;step2[3] = step1b[0][i] - step1b[3][i]; - vadd.s16 q4, q7, q6 - vsub.s16 q7, q7, q6 - vsub.s16 q6, q5, q14 - vadd.s16 q5, q5, q14 - ; -------------------------------------------------------------------------- - ; combine 0-3,4-7 - ; -------------------------------------------------------------------------- - ; part of stage 6 - ;step3[0] = step1b[0][i] + step1b[7][i]; - ;step3[1] = step1b[1][i] + step1b[6][i]; - ;step3[2] = step1b[2][i] + step1b[5][i]; - ;step3[3] = step1b[3][i] + step1b[4][i]; - vadd.s16 q8, q4, q2 - vadd.s16 q9, q5, q3 - vadd.s16 q10, q6, q1 - vadd.s16 q11, q7, q0 - ;step3[4] = step1b[3][i] - step1b[4][i]; - ;step3[5] = step1b[2][i] - step1b[5][i]; - ;step3[6] = step1b[1][i] - step1b[6][i]; - ;step3[7] = step1b[0][i] - step1b[7][i]; - vsub.s16 q12, q7, q0 - vsub.s16 q13, q6, q1 - vsub.s16 q14, q5, q3 - vsub.s16 q15, q4, q2 - ; -------------------------------------------------------------------------- - ; part of stage 7 - ;step1[0] = step1b[0][i] + step1b[15][i]; - ;step1[1] = step1b[1][i] + step1b[14][i]; - ;step1[14] = step1b[1][i] - step1b[14][i]; - ;step1[15] = step1b[0][i] - step1b[15][i]; - LOAD_FROM_OUTPUT 12, 14, 15, q0, q1 - vadd.s16 q2, q8, q1 - vadd.s16 q3, q9, q0 - vsub.s16 q4, q9, q0 - vsub.s16 q5, q8, q1 - ; -------------------------------------------------------------------------- - ; part of final stage - ;output[14 * 32] = step1b[14][i] + step1b[17][i]; - ;output[15 * 32] = step1b[15][i] + step1b[16][i]; - ;output[16 * 32] = step1b[15][i] - step1b[16][i]; - ;output[17 * 32] = step1b[14][i] - step1b[17][i]; - LOAD_FROM_OUTPUT 15, 16, 17, q0, q1 - vadd.s16 q8, q4, q1 - vadd.s16 q9, q5, q0 - vsub.s16 q6, q5, q0 - vsub.s16 q7, q4, q1 - - cmp r5, #0 - bgt idct32_bands_end_2nd_pass - -idct32_bands_end_1st_pass - STORE_IN_OUTPUT 17, 16, 17, q6, q7 - STORE_IN_OUTPUT 17, 14, 15, q8, q9 - ; -------------------------------------------------------------------------- - ; part of final stage - ;output[ 0 * 32] = step1b[0][i] + step1b[31][i]; - ;output[ 1 * 32] = step1b[1][i] + step1b[30][i]; - ;output[30 * 32] = step1b[1][i] - step1b[30][i]; - ;output[31 * 32] = step1b[0][i] - step1b[31][i]; - LOAD_FROM_OUTPUT 15, 30, 31, q0, q1 - vadd.s16 q4, q2, q1 - vadd.s16 q5, q3, q0 - vsub.s16 q6, q3, q0 - vsub.s16 q7, q2, q1 - STORE_IN_OUTPUT 31, 30, 31, q6, q7 - STORE_IN_OUTPUT 31, 0, 1, q4, q5 - ; -------------------------------------------------------------------------- - ; part of stage 7 - ;step1[2] = step1b[2][i] + step1b[13][i]; - ;step1[3] = step1b[3][i] + step1b[12][i]; - ;step1[12] = step1b[3][i] - step1b[12][i]; - ;step1[13] = step1b[2][i] - step1b[13][i]; - LOAD_FROM_OUTPUT 1, 12, 13, q0, q1 - vadd.s16 q2, q10, q1 - vadd.s16 q3, q11, q0 - vsub.s16 q4, q11, q0 - vsub.s16 q5, q10, q1 - ; -------------------------------------------------------------------------- - ; part of final stage - ;output[12 * 32] = step1b[12][i] + step1b[19][i]; - ;output[13 * 32] = step1b[13][i] + step1b[18][i]; - ;output[18 * 32] = step1b[13][i] - step1b[18][i]; - ;output[19 * 32] = step1b[12][i] - step1b[19][i]; - LOAD_FROM_OUTPUT 13, 18, 19, q0, q1 - vadd.s16 q8, q4, q1 - vadd.s16 q9, q5, q0 - vsub.s16 q6, q5, q0 - vsub.s16 q7, q4, q1 - STORE_IN_OUTPUT 19, 18, 19, q6, q7 - STORE_IN_OUTPUT 19, 12, 13, q8, q9 - ; -------------------------------------------------------------------------- - ; part of final stage - ;output[ 2 * 32] = step1b[2][i] + step1b[29][i]; - ;output[ 3 * 32] = step1b[3][i] + step1b[28][i]; - ;output[28 * 32] = step1b[3][i] - step1b[28][i]; - ;output[29 * 32] = step1b[2][i] - step1b[29][i]; - LOAD_FROM_OUTPUT 13, 28, 29, q0, q1 - vadd.s16 q4, q2, q1 - vadd.s16 q5, q3, q0 - vsub.s16 q6, q3, q0 - vsub.s16 q7, q2, q1 - STORE_IN_OUTPUT 29, 28, 29, q6, q7 - STORE_IN_OUTPUT 29, 2, 3, q4, q5 - ; -------------------------------------------------------------------------- - ; part of stage 7 - ;step1[4] = step1b[4][i] + step1b[11][i]; - ;step1[5] = step1b[5][i] + step1b[10][i]; - ;step1[10] = step1b[5][i] - step1b[10][i]; - ;step1[11] = step1b[4][i] - step1b[11][i]; - LOAD_FROM_OUTPUT 3, 10, 11, q0, q1 - vadd.s16 q2, q12, q1 - vadd.s16 q3, q13, q0 - vsub.s16 q4, q13, q0 - vsub.s16 q5, q12, q1 - ; -------------------------------------------------------------------------- - ; part of final stage - ;output[10 * 32] = step1b[10][i] + step1b[21][i]; - ;output[11 * 32] = step1b[11][i] + step1b[20][i]; - ;output[20 * 32] = step1b[11][i] - step1b[20][i]; - ;output[21 * 32] = step1b[10][i] - step1b[21][i]; - LOAD_FROM_OUTPUT 11, 20, 21, q0, q1 - vadd.s16 q8, q4, q1 - vadd.s16 q9, q5, q0 - vsub.s16 q6, q5, q0 - vsub.s16 q7, q4, q1 - STORE_IN_OUTPUT 21, 20, 21, q6, q7 - STORE_IN_OUTPUT 21, 10, 11, q8, q9 - ; -------------------------------------------------------------------------- - ; part of final stage - ;output[ 4 * 32] = step1b[4][i] + step1b[27][i]; - ;output[ 5 * 32] = step1b[5][i] + step1b[26][i]; - ;output[26 * 32] = step1b[5][i] - step1b[26][i]; - ;output[27 * 32] = step1b[4][i] - step1b[27][i]; - LOAD_FROM_OUTPUT 11, 26, 27, q0, q1 - vadd.s16 q4, q2, q1 - vadd.s16 q5, q3, q0 - vsub.s16 q6, q3, q0 - vsub.s16 q7, q2, q1 - STORE_IN_OUTPUT 27, 26, 27, q6, q7 - STORE_IN_OUTPUT 27, 4, 5, q4, q5 - ; -------------------------------------------------------------------------- - ; part of stage 7 - ;step1[6] = step1b[6][i] + step1b[9][i]; - ;step1[7] = step1b[7][i] + step1b[8][i]; - ;step1[8] = step1b[7][i] - step1b[8][i]; - ;step1[9] = step1b[6][i] - step1b[9][i]; - LOAD_FROM_OUTPUT 5, 8, 9, q0, q1 - vadd.s16 q2, q14, q1 - vadd.s16 q3, q15, q0 - vsub.s16 q4, q15, q0 - vsub.s16 q5, q14, q1 - ; -------------------------------------------------------------------------- - ; part of final stage - ;output[ 8 * 32] = step1b[8][i] + step1b[23][i]; - ;output[ 9 * 32] = step1b[9][i] + step1b[22][i]; - ;output[22 * 32] = step1b[9][i] - step1b[22][i]; - ;output[23 * 32] = step1b[8][i] - step1b[23][i]; - LOAD_FROM_OUTPUT 9, 22, 23, q0, q1 - vadd.s16 q8, q4, q1 - vadd.s16 q9, q5, q0 - vsub.s16 q6, q5, q0 - vsub.s16 q7, q4, q1 - STORE_IN_OUTPUT 23, 22, 23, q6, q7 - STORE_IN_OUTPUT 23, 8, 9, q8, q9 - ; -------------------------------------------------------------------------- - ; part of final stage - ;output[ 6 * 32] = step1b[6][i] + step1b[25][i]; - ;output[ 7 * 32] = step1b[7][i] + step1b[24][i]; - ;output[24 * 32] = step1b[7][i] - step1b[24][i]; - ;output[25 * 32] = step1b[6][i] - step1b[25][i]; - LOAD_FROM_OUTPUT 9, 24, 25, q0, q1 - vadd.s16 q4, q2, q1 - vadd.s16 q5, q3, q0 - vsub.s16 q6, q3, q0 - vsub.s16 q7, q2, q1 - STORE_IN_OUTPUT 25, 24, 25, q6, q7 - STORE_IN_OUTPUT 25, 6, 7, q4, q5 - - ; restore r0 by removing the last offset from the last - ; operation (LOAD_FROM_TRANSPOSED 16, 8, 24) => 24*8*2 - sub r0, r0, #24*8*2 - ; restore r1 by removing the last offset from the last - ; operation (STORE_IN_OUTPUT 24, 6, 7) => 7*32*2 - ; advance by 8 columns => 8*2 - sub r1, r1, #7*32*2 - 8*2 - ; advance by 8 lines (8*32*2) - ; go back by the two pairs from the loop (32*2) - add r3, r3, #8*32*2 - 32*2 - - ; bands loop processing - subs r4, r4, #1 - bne idct32_bands_loop - - ; parameters for second pass - ; the input of pass2 is the result of pass1. we have to remove the offset - ; of 32 columns induced by the above idct32_bands_loop - sub r3, r1, #32*2 - ; r1 = pass2[32 * 32] - add r1, sp, #2048 - - ; pass loop processing - add r5, r5, #1 - b idct32_pass_loop - -idct32_bands_end_2nd_pass - STORE_COMBINE_CENTER_RESULTS - ; -------------------------------------------------------------------------- - ; part of final stage - ;output[ 0 * 32] = step1b[0][i] + step1b[31][i]; - ;output[ 1 * 32] = step1b[1][i] + step1b[30][i]; - ;output[30 * 32] = step1b[1][i] - step1b[30][i]; - ;output[31 * 32] = step1b[0][i] - step1b[31][i]; - LOAD_FROM_OUTPUT 17, 30, 31, q0, q1 - vadd.s16 q4, q2, q1 - vadd.s16 q5, q3, q0 - vsub.s16 q6, q3, q0 - vsub.s16 q7, q2, q1 - STORE_COMBINE_EXTREME_RESULTS - ; -------------------------------------------------------------------------- - ; part of stage 7 - ;step1[2] = step1b[2][i] + step1b[13][i]; - ;step1[3] = step1b[3][i] + step1b[12][i]; - ;step1[12] = step1b[3][i] - step1b[12][i]; - ;step1[13] = step1b[2][i] - step1b[13][i]; - LOAD_FROM_OUTPUT 31, 12, 13, q0, q1 - vadd.s16 q2, q10, q1 - vadd.s16 q3, q11, q0 - vsub.s16 q4, q11, q0 - vsub.s16 q5, q10, q1 - ; -------------------------------------------------------------------------- - ; part of final stage - ;output[12 * 32] = step1b[12][i] + step1b[19][i]; - ;output[13 * 32] = step1b[13][i] + step1b[18][i]; - ;output[18 * 32] = step1b[13][i] - step1b[18][i]; - ;output[19 * 32] = step1b[12][i] - step1b[19][i]; - LOAD_FROM_OUTPUT 13, 18, 19, q0, q1 - vadd.s16 q8, q4, q1 - vadd.s16 q9, q5, q0 - vsub.s16 q6, q5, q0 - vsub.s16 q7, q4, q1 - STORE_COMBINE_CENTER_RESULTS - ; -------------------------------------------------------------------------- - ; part of final stage - ;output[ 2 * 32] = step1b[2][i] + step1b[29][i]; - ;output[ 3 * 32] = step1b[3][i] + step1b[28][i]; - ;output[28 * 32] = step1b[3][i] - step1b[28][i]; - ;output[29 * 32] = step1b[2][i] - step1b[29][i]; - LOAD_FROM_OUTPUT 19, 28, 29, q0, q1 - vadd.s16 q4, q2, q1 - vadd.s16 q5, q3, q0 - vsub.s16 q6, q3, q0 - vsub.s16 q7, q2, q1 - STORE_COMBINE_EXTREME_RESULTS - ; -------------------------------------------------------------------------- - ; part of stage 7 - ;step1[4] = step1b[4][i] + step1b[11][i]; - ;step1[5] = step1b[5][i] + step1b[10][i]; - ;step1[10] = step1b[5][i] - step1b[10][i]; - ;step1[11] = step1b[4][i] - step1b[11][i]; - LOAD_FROM_OUTPUT 29, 10, 11, q0, q1 - vadd.s16 q2, q12, q1 - vadd.s16 q3, q13, q0 - vsub.s16 q4, q13, q0 - vsub.s16 q5, q12, q1 - ; -------------------------------------------------------------------------- - ; part of final stage - ;output[10 * 32] = step1b[10][i] + step1b[21][i]; - ;output[11 * 32] = step1b[11][i] + step1b[20][i]; - ;output[20 * 32] = step1b[11][i] - step1b[20][i]; - ;output[21 * 32] = step1b[10][i] - step1b[21][i]; - LOAD_FROM_OUTPUT 11, 20, 21, q0, q1 - vadd.s16 q8, q4, q1 - vadd.s16 q9, q5, q0 - vsub.s16 q6, q5, q0 - vsub.s16 q7, q4, q1 - STORE_COMBINE_CENTER_RESULTS - ; -------------------------------------------------------------------------- - ; part of final stage - ;output[ 4 * 32] = step1b[4][i] + step1b[27][i]; - ;output[ 5 * 32] = step1b[5][i] + step1b[26][i]; - ;output[26 * 32] = step1b[5][i] - step1b[26][i]; - ;output[27 * 32] = step1b[4][i] - step1b[27][i]; - LOAD_FROM_OUTPUT 21, 26, 27, q0, q1 - vadd.s16 q4, q2, q1 - vadd.s16 q5, q3, q0 - vsub.s16 q6, q3, q0 - vsub.s16 q7, q2, q1 - STORE_COMBINE_EXTREME_RESULTS - ; -------------------------------------------------------------------------- - ; part of stage 7 - ;step1[6] = step1b[6][i] + step1b[9][i]; - ;step1[7] = step1b[7][i] + step1b[8][i]; - ;step1[8] = step1b[7][i] - step1b[8][i]; - ;step1[9] = step1b[6][i] - step1b[9][i]; - LOAD_FROM_OUTPUT 27, 8, 9, q0, q1 - vadd.s16 q2, q14, q1 - vadd.s16 q3, q15, q0 - vsub.s16 q4, q15, q0 - vsub.s16 q5, q14, q1 - ; -------------------------------------------------------------------------- - ; part of final stage - ;output[ 8 * 32] = step1b[8][i] + step1b[23][i]; - ;output[ 9 * 32] = step1b[9][i] + step1b[22][i]; - ;output[22 * 32] = step1b[9][i] - step1b[22][i]; - ;output[23 * 32] = step1b[8][i] - step1b[23][i]; - LOAD_FROM_OUTPUT 9, 22, 23, q0, q1 - vadd.s16 q8, q4, q1 - vadd.s16 q9, q5, q0 - vsub.s16 q6, q5, q0 - vsub.s16 q7, q4, q1 - STORE_COMBINE_CENTER_RESULTS_LAST - ; -------------------------------------------------------------------------- - ; part of final stage - ;output[ 6 * 32] = step1b[6][i] + step1b[25][i]; - ;output[ 7 * 32] = step1b[7][i] + step1b[24][i]; - ;output[24 * 32] = step1b[7][i] - step1b[24][i]; - ;output[25 * 32] = step1b[6][i] - step1b[25][i]; - LOAD_FROM_OUTPUT 23, 24, 25, q0, q1 - vadd.s16 q4, q2, q1 - vadd.s16 q5, q3, q0 - vsub.s16 q6, q3, q0 - vsub.s16 q7, q2, q1 - STORE_COMBINE_EXTREME_RESULTS_LAST - ; -------------------------------------------------------------------------- - ; restore pointers to their initial indices for next band pass by - ; removing/adding dest_stride * 8. The actual increment by eight - ; is taken care of within the _LAST macros. - add r6, r6, r2, lsl #3 - add r9, r9, r2, lsl #3 - sub r7, r7, r2, lsl #3 - sub r10, r10, r2, lsl #3 - - ; restore r0 by removing the last offset from the last - ; operation (LOAD_FROM_TRANSPOSED 16, 8, 24) => 24*8*2 - sub r0, r0, #24*8*2 - ; restore r1 by removing the last offset from the last - ; operation (LOAD_FROM_OUTPUT 23, 24, 25) => 25*32*2 - ; advance by 8 columns => 8*2 - sub r1, r1, #25*32*2 - 8*2 - ; advance by 8 lines (8*32*2) - ; go back by the two pairs from the loop (32*2) - add r3, r3, #8*32*2 - 32*2 - - ; bands loop processing - subs r4, r4, #1 - bne idct32_bands_loop - - ; stack operation - add sp, sp, #512+2048+2048 - vpop {d8-d15} - pop {r4-r11} - bx lr - ENDP ; |vp9_idct32x32_1024_add_neon| - END diff --git a/media/libvpx/vp9/common/arm/neon/vp9_idct4x4_1_add_neon.asm b/media/libvpx/vp9/common/arm/neon/vp9_idct4x4_1_add_neon.asm deleted file mode 100644 index 0d4a721c4d3..00000000000 --- a/media/libvpx/vp9/common/arm/neon/vp9_idct4x4_1_add_neon.asm +++ /dev/null @@ -1,68 +0,0 @@ -; -; Copyright (c) 2013 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. -; - - - EXPORT |vp9_idct4x4_1_add_neon| - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 - -;void vp9_idct4x4_1_add_neon(int16_t *input, uint8_t *dest, -; int dest_stride) -; -; r0 int16_t input -; r1 uint8_t *dest -; r2 int dest_stride) - -|vp9_idct4x4_1_add_neon| PROC - ldrsh r0, [r0] - - ; generate cospi_16_64 = 11585 - mov r12, #0x2d00 - add r12, #0x41 - - ; out = dct_const_round_shift(input[0] * cospi_16_64) - mul r0, r0, r12 ; input[0] * cospi_16_64 - add r0, r0, #0x2000 ; +(1 << ((DCT_CONST_BITS) - 1)) - asr r0, r0, #14 ; >> DCT_CONST_BITS - - ; out = dct_const_round_shift(out * cospi_16_64) - mul r0, r0, r12 ; out * cospi_16_64 - mov r12, r1 ; save dest - add r0, r0, #0x2000 ; +(1 << ((DCT_CONST_BITS) - 1)) - asr r0, r0, #14 ; >> DCT_CONST_BITS - - ; a1 = ROUND_POWER_OF_TWO(out, 4) - add r0, r0, #8 ; + (1 <<((4) - 1)) - asr r0, r0, #4 ; >> 4 - - vdup.s16 q0, r0 ; duplicate a1 - - vld1.32 {d2[0]}, [r1], r2 - vld1.32 {d2[1]}, [r1], r2 - vld1.32 {d4[0]}, [r1], r2 - vld1.32 {d4[1]}, [r1] - - vaddw.u8 q8, q0, d2 ; dest[x] + a1 - vaddw.u8 q9, q0, d4 - - vqmovun.s16 d6, q8 ; clip_pixel - vqmovun.s16 d7, q9 - - vst1.32 {d6[0]}, [r12], r2 - vst1.32 {d6[1]}, [r12], r2 - vst1.32 {d7[0]}, [r12], r2 - vst1.32 {d7[1]}, [r12] - - bx lr - ENDP ; |vp9_idct4x4_1_add_neon| - - END diff --git a/media/libvpx/vp9/common/arm/neon/vp9_idct4x4_add_neon.asm b/media/libvpx/vp9/common/arm/neon/vp9_idct4x4_add_neon.asm deleted file mode 100644 index 00283fc8d78..00000000000 --- a/media/libvpx/vp9/common/arm/neon/vp9_idct4x4_add_neon.asm +++ /dev/null @@ -1,190 +0,0 @@ -; -; Copyright (c) 2013 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - EXPORT |vp9_idct4x4_16_add_neon| - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 - - AREA Block, CODE, READONLY ; name this block of code -;void vp9_idct4x4_16_add_neon(int16_t *input, uint8_t *dest, int dest_stride) -; -; r0 int16_t input -; r1 uint8_t *dest -; r2 int dest_stride) - -|vp9_idct4x4_16_add_neon| PROC - - ; The 2D transform is done with two passes which are actually pretty - ; similar. We first transform the rows. This is done by transposing - ; the inputs, doing an SIMD column transform (the columns are the - ; transposed rows) and then transpose the results (so that it goes back - ; in normal/row positions). Then, we transform the columns by doing - ; another SIMD column transform. - ; So, two passes of a transpose followed by a column transform. - - ; load the inputs into q8-q9, d16-d19 - vld1.s16 {q8,q9}, [r0]! - - ; generate scalar constants - ; cospi_8_64 = 15137 = 0x3b21 - mov r0, #0x3b00 - add r0, #0x21 - ; cospi_16_64 = 11585 = 0x2d41 - mov r3, #0x2d00 - add r3, #0x41 - ; cospi_24_64 = 6270 = 0x 187e - mov r12, #0x1800 - add r12, #0x7e - - ; transpose the input data - ; 00 01 02 03 d16 - ; 10 11 12 13 d17 - ; 20 21 22 23 d18 - ; 30 31 32 33 d19 - vtrn.16 d16, d17 - vtrn.16 d18, d19 - - ; generate constant vectors - vdup.16 d20, r0 ; replicate cospi_8_64 - vdup.16 d21, r3 ; replicate cospi_16_64 - - ; 00 10 02 12 d16 - ; 01 11 03 13 d17 - ; 20 30 22 32 d18 - ; 21 31 23 33 d19 - vtrn.32 q8, q9 - ; 00 10 20 30 d16 - ; 01 11 21 31 d17 - ; 02 12 22 32 d18 - ; 03 13 23 33 d19 - - vdup.16 d22, r12 ; replicate cospi_24_64 - - ; do the transform on transposed rows - - ; stage 1 - vadd.s16 d23, d16, d18 ; (input[0] + input[2]) - vsub.s16 d24, d16, d18 ; (input[0] - input[2]) - - vmull.s16 q15, d17, d22 ; input[1] * cospi_24_64 - vmull.s16 q1, d17, d20 ; input[1] * cospi_8_64 - - ; (input[0] + input[2]) * cospi_16_64; - ; (input[0] - input[2]) * cospi_16_64; - vmull.s16 q13, d23, d21 - vmull.s16 q14, d24, d21 - - ; input[1] * cospi_24_64 - input[3] * cospi_8_64; - ; input[1] * cospi_8_64 + input[3] * cospi_24_64; - vmlsl.s16 q15, d19, d20 - vmlal.s16 q1, d19, d22 - - ; dct_const_round_shift - vqrshrn.s32 d26, q13, #14 - vqrshrn.s32 d27, q14, #14 - vqrshrn.s32 d29, q15, #14 - vqrshrn.s32 d28, q1, #14 - - ; stage 2 - ; output[0] = step[0] + step[3]; - ; output[1] = step[1] + step[2]; - ; output[3] = step[0] - step[3]; - ; output[2] = step[1] - step[2]; - vadd.s16 q8, q13, q14 - vsub.s16 q9, q13, q14 - vswp d18, d19 - - ; transpose the results - ; 00 01 02 03 d16 - ; 10 11 12 13 d17 - ; 20 21 22 23 d18 - ; 30 31 32 33 d19 - vtrn.16 d16, d17 - vtrn.16 d18, d19 - ; 00 10 02 12 d16 - ; 01 11 03 13 d17 - ; 20 30 22 32 d18 - ; 21 31 23 33 d19 - vtrn.32 q8, q9 - ; 00 10 20 30 d16 - ; 01 11 21 31 d17 - ; 02 12 22 32 d18 - ; 03 13 23 33 d19 - - ; do the transform on columns - - ; stage 1 - vadd.s16 d23, d16, d18 ; (input[0] + input[2]) - vsub.s16 d24, d16, d18 ; (input[0] - input[2]) - - vmull.s16 q15, d17, d22 ; input[1] * cospi_24_64 - vmull.s16 q1, d17, d20 ; input[1] * cospi_8_64 - - ; (input[0] + input[2]) * cospi_16_64; - ; (input[0] - input[2]) * cospi_16_64; - vmull.s16 q13, d23, d21 - vmull.s16 q14, d24, d21 - - ; input[1] * cospi_24_64 - input[3] * cospi_8_64; - ; input[1] * cospi_8_64 + input[3] * cospi_24_64; - vmlsl.s16 q15, d19, d20 - vmlal.s16 q1, d19, d22 - - ; dct_const_round_shift - vqrshrn.s32 d26, q13, #14 - vqrshrn.s32 d27, q14, #14 - vqrshrn.s32 d29, q15, #14 - vqrshrn.s32 d28, q1, #14 - - ; stage 2 - ; output[0] = step[0] + step[3]; - ; output[1] = step[1] + step[2]; - ; output[3] = step[0] - step[3]; - ; output[2] = step[1] - step[2]; - vadd.s16 q8, q13, q14 - vsub.s16 q9, q13, q14 - - ; The results are in two registers, one of them being swapped. This will - ; be taken care of by loading the 'dest' value in a swapped fashion and - ; also storing them in the same swapped fashion. - ; temp_out[0, 1] = d16, d17 = q8 - ; temp_out[2, 3] = d19, d18 = q9 swapped - - ; ROUND_POWER_OF_TWO(temp_out[j], 4) - vrshr.s16 q8, q8, #4 - vrshr.s16 q9, q9, #4 - - vld1.32 {d26[0]}, [r1], r2 - vld1.32 {d26[1]}, [r1], r2 - vld1.32 {d27[1]}, [r1], r2 - vld1.32 {d27[0]}, [r1] ; no post-increment - - ; ROUND_POWER_OF_TWO(temp_out[j], 4) + dest[j * dest_stride + i] - vaddw.u8 q8, q8, d26 - vaddw.u8 q9, q9, d27 - - ; clip_pixel - vqmovun.s16 d26, q8 - vqmovun.s16 d27, q9 - - ; do the stores in reverse order with negative post-increment, by changing - ; the sign of the stride - rsb r2, r2, #0 - vst1.32 {d27[0]}, [r1], r2 - vst1.32 {d27[1]}, [r1], r2 - vst1.32 {d26[1]}, [r1], r2 - vst1.32 {d26[0]}, [r1] ; no post-increment - bx lr - ENDP ; |vp9_idct4x4_16_add_neon| - - END diff --git a/media/libvpx/vp9/common/arm/neon/vp9_idct8x8_1_add_neon.asm b/media/libvpx/vp9/common/arm/neon/vp9_idct8x8_1_add_neon.asm deleted file mode 100644 index 421d202d403..00000000000 --- a/media/libvpx/vp9/common/arm/neon/vp9_idct8x8_1_add_neon.asm +++ /dev/null @@ -1,88 +0,0 @@ -; -; Copyright (c) 2013 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. -; - - - EXPORT |vp9_idct8x8_1_add_neon| - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 - -;void vp9_idct8x8_1_add_neon(int16_t *input, uint8_t *dest, -; int dest_stride) -; -; r0 int16_t input -; r1 uint8_t *dest -; r2 int dest_stride) - -|vp9_idct8x8_1_add_neon| PROC - ldrsh r0, [r0] - - ; generate cospi_16_64 = 11585 - mov r12, #0x2d00 - add r12, #0x41 - - ; out = dct_const_round_shift(input[0] * cospi_16_64) - mul r0, r0, r12 ; input[0] * cospi_16_64 - add r0, r0, #0x2000 ; +(1 << ((DCT_CONST_BITS) - 1)) - asr r0, r0, #14 ; >> DCT_CONST_BITS - - ; out = dct_const_round_shift(out * cospi_16_64) - mul r0, r0, r12 ; out * cospi_16_64 - mov r12, r1 ; save dest - add r0, r0, #0x2000 ; +(1 << ((DCT_CONST_BITS) - 1)) - asr r0, r0, #14 ; >> DCT_CONST_BITS - - ; a1 = ROUND_POWER_OF_TWO(out, 5) - add r0, r0, #16 ; + (1 <<((5) - 1)) - asr r0, r0, #5 ; >> 5 - - vdup.s16 q0, r0 ; duplicate a1 - - ; load destination data - vld1.64 {d2}, [r1], r2 - vld1.64 {d3}, [r1], r2 - vld1.64 {d4}, [r1], r2 - vld1.64 {d5}, [r1], r2 - vld1.64 {d6}, [r1], r2 - vld1.64 {d7}, [r1], r2 - vld1.64 {d16}, [r1], r2 - vld1.64 {d17}, [r1] - - vaddw.u8 q9, q0, d2 ; dest[x] + a1 - vaddw.u8 q10, q0, d3 ; dest[x] + a1 - vaddw.u8 q11, q0, d4 ; dest[x] + a1 - vaddw.u8 q12, q0, d5 ; dest[x] + a1 - vqmovun.s16 d2, q9 ; clip_pixel - vqmovun.s16 d3, q10 ; clip_pixel - vqmovun.s16 d30, q11 ; clip_pixel - vqmovun.s16 d31, q12 ; clip_pixel - vst1.64 {d2}, [r12], r2 - vst1.64 {d3}, [r12], r2 - vst1.64 {d30}, [r12], r2 - vst1.64 {d31}, [r12], r2 - - vaddw.u8 q9, q0, d6 ; dest[x] + a1 - vaddw.u8 q10, q0, d7 ; dest[x] + a1 - vaddw.u8 q11, q0, d16 ; dest[x] + a1 - vaddw.u8 q12, q0, d17 ; dest[x] + a1 - vqmovun.s16 d2, q9 ; clip_pixel - vqmovun.s16 d3, q10 ; clip_pixel - vqmovun.s16 d30, q11 ; clip_pixel - vqmovun.s16 d31, q12 ; clip_pixel - vst1.64 {d2}, [r12], r2 - vst1.64 {d3}, [r12], r2 - vst1.64 {d30}, [r12], r2 - vst1.64 {d31}, [r12], r2 - - bx lr - ENDP ; |vp9_idct8x8_1_add_neon| - - END diff --git a/media/libvpx/vp9/common/arm/neon/vp9_idct8x8_add_neon.asm b/media/libvpx/vp9/common/arm/neon/vp9_idct8x8_add_neon.asm deleted file mode 100644 index ab5bb69202a..00000000000 --- a/media/libvpx/vp9/common/arm/neon/vp9_idct8x8_add_neon.asm +++ /dev/null @@ -1,519 +0,0 @@ -; -; Copyright (c) 2013 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - EXPORT |vp9_idct8x8_64_add_neon| - EXPORT |vp9_idct8x8_12_add_neon| - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 - - ; Parallel 1D IDCT on all the columns of a 8x8 16bit data matrix which are - ; loaded in q8-q15. The output will be stored back into q8-q15 registers. - ; This macro will touch q0-q7 registers and use them as buffer during - ; calculation. - MACRO - IDCT8x8_1D - ; stage 1 - vdup.16 d0, r3 ; duplicate cospi_28_64 - vdup.16 d1, r4 ; duplicate cospi_4_64 - vdup.16 d2, r5 ; duplicate cospi_12_64 - vdup.16 d3, r6 ; duplicate cospi_20_64 - - ; input[1] * cospi_28_64 - vmull.s16 q2, d18, d0 - vmull.s16 q3, d19, d0 - - ; input[5] * cospi_12_64 - vmull.s16 q5, d26, d2 - vmull.s16 q6, d27, d2 - - ; input[1]*cospi_28_64-input[7]*cospi_4_64 - vmlsl.s16 q2, d30, d1 - vmlsl.s16 q3, d31, d1 - - ; input[5] * cospi_12_64 - input[3] * cospi_20_64 - vmlsl.s16 q5, d22, d3 - vmlsl.s16 q6, d23, d3 - - ; dct_const_round_shift(input_dc * cospi_16_64) - vqrshrn.s32 d8, q2, #14 ; >> 14 - vqrshrn.s32 d9, q3, #14 ; >> 14 - - ; dct_const_round_shift(input_dc * cospi_16_64) - vqrshrn.s32 d10, q5, #14 ; >> 14 - vqrshrn.s32 d11, q6, #14 ; >> 14 - - ; input[1] * cospi_4_64 - vmull.s16 q2, d18, d1 - vmull.s16 q3, d19, d1 - - ; input[5] * cospi_20_64 - vmull.s16 q9, d26, d3 - vmull.s16 q13, d27, d3 - - ; input[1]*cospi_4_64+input[7]*cospi_28_64 - vmlal.s16 q2, d30, d0 - vmlal.s16 q3, d31, d0 - - ; input[5] * cospi_20_64 + input[3] * cospi_12_64 - vmlal.s16 q9, d22, d2 - vmlal.s16 q13, d23, d2 - - ; dct_const_round_shift(input_dc * cospi_16_64) - vqrshrn.s32 d14, q2, #14 ; >> 14 - vqrshrn.s32 d15, q3, #14 ; >> 14 - - ; stage 2 & stage 3 - even half - vdup.16 d0, r7 ; duplicate cospi_16_64 - - ; dct_const_round_shift(input_dc * cospi_16_64) - vqrshrn.s32 d12, q9, #14 ; >> 14 - vqrshrn.s32 d13, q13, #14 ; >> 14 - - ; input[0] * cospi_16_64 - vmull.s16 q2, d16, d0 - vmull.s16 q3, d17, d0 - - ; input[0] * cospi_16_64 - vmull.s16 q13, d16, d0 - vmull.s16 q15, d17, d0 - - ; (input[0] + input[2]) * cospi_16_64 - vmlal.s16 q2, d24, d0 - vmlal.s16 q3, d25, d0 - - ; (input[0] - input[2]) * cospi_16_64 - vmlsl.s16 q13, d24, d0 - vmlsl.s16 q15, d25, d0 - - vdup.16 d0, r8 ; duplicate cospi_24_64 - vdup.16 d1, r9 ; duplicate cospi_8_64 - - ; dct_const_round_shift(input_dc * cospi_16_64) - vqrshrn.s32 d18, q2, #14 ; >> 14 - vqrshrn.s32 d19, q3, #14 ; >> 14 - - ; dct_const_round_shift(input_dc * cospi_16_64) - vqrshrn.s32 d22, q13, #14 ; >> 14 - vqrshrn.s32 d23, q15, #14 ; >> 14 - - ; input[1] * cospi_24_64 - input[3] * cospi_8_64 - ; input[1] * cospi_24_64 - vmull.s16 q2, d20, d0 - vmull.s16 q3, d21, d0 - - ; input[1] * cospi_8_64 - vmull.s16 q8, d20, d1 - vmull.s16 q12, d21, d1 - - ; input[1] * cospi_24_64 - input[3] * cospi_8_64 - vmlsl.s16 q2, d28, d1 - vmlsl.s16 q3, d29, d1 - - ; input[1] * cospi_8_64 + input[3] * cospi_24_64 - vmlal.s16 q8, d28, d0 - vmlal.s16 q12, d29, d0 - - ; dct_const_round_shift(input_dc * cospi_16_64) - vqrshrn.s32 d26, q2, #14 ; >> 14 - vqrshrn.s32 d27, q3, #14 ; >> 14 - - ; dct_const_round_shift(input_dc * cospi_16_64) - vqrshrn.s32 d30, q8, #14 ; >> 14 - vqrshrn.s32 d31, q12, #14 ; >> 14 - - vadd.s16 q0, q9, q15 ; output[0] = step[0] + step[3] - vadd.s16 q1, q11, q13 ; output[1] = step[1] + step[2] - vsub.s16 q2, q11, q13 ; output[2] = step[1] - step[2] - vsub.s16 q3, q9, q15 ; output[3] = step[0] - step[3] - - ; stage 3 -odd half - vdup.16 d16, r7 ; duplicate cospi_16_64 - - ; stage 2 - odd half - vsub.s16 q13, q4, q5 ; step2[5] = step1[4] - step1[5] - vadd.s16 q4, q4, q5 ; step2[4] = step1[4] + step1[5] - vsub.s16 q14, q7, q6 ; step2[6] = -step1[6] + step1[7] - vadd.s16 q7, q7, q6 ; step2[7] = step1[6] + step1[7] - - ; step2[6] * cospi_16_64 - vmull.s16 q9, d28, d16 - vmull.s16 q10, d29, d16 - - ; step2[6] * cospi_16_64 - vmull.s16 q11, d28, d16 - vmull.s16 q12, d29, d16 - - ; (step2[6] - step2[5]) * cospi_16_64 - vmlsl.s16 q9, d26, d16 - vmlsl.s16 q10, d27, d16 - - ; (step2[5] + step2[6]) * cospi_16_64 - vmlal.s16 q11, d26, d16 - vmlal.s16 q12, d27, d16 - - ; dct_const_round_shift(input_dc * cospi_16_64) - vqrshrn.s32 d10, q9, #14 ; >> 14 - vqrshrn.s32 d11, q10, #14 ; >> 14 - - ; dct_const_round_shift(input_dc * cospi_16_64) - vqrshrn.s32 d12, q11, #14 ; >> 14 - vqrshrn.s32 d13, q12, #14 ; >> 14 - - ; stage 4 - vadd.s16 q8, q0, q7 ; output[0] = step1[0] + step1[7]; - vadd.s16 q9, q1, q6 ; output[1] = step1[1] + step1[6]; - vadd.s16 q10, q2, q5 ; output[2] = step1[2] + step1[5]; - vadd.s16 q11, q3, q4 ; output[3] = step1[3] + step1[4]; - vsub.s16 q12, q3, q4 ; output[4] = step1[3] - step1[4]; - vsub.s16 q13, q2, q5 ; output[5] = step1[2] - step1[5]; - vsub.s16 q14, q1, q6 ; output[6] = step1[1] - step1[6]; - vsub.s16 q15, q0, q7 ; output[7] = step1[0] - step1[7]; - MEND - - ; Transpose a 8x8 16bit data matrix. Datas are loaded in q8-q15. - MACRO - TRANSPOSE8X8 - vswp d17, d24 - vswp d23, d30 - vswp d21, d28 - vswp d19, d26 - vtrn.32 q8, q10 - vtrn.32 q9, q11 - vtrn.32 q12, q14 - vtrn.32 q13, q15 - vtrn.16 q8, q9 - vtrn.16 q10, q11 - vtrn.16 q12, q13 - vtrn.16 q14, q15 - MEND - - AREA Block, CODE, READONLY ; name this block of code -;void vp9_idct8x8_64_add_neon(int16_t *input, uint8_t *dest, int dest_stride) -; -; r0 int16_t input -; r1 uint8_t *dest -; r2 int dest_stride) - -|vp9_idct8x8_64_add_neon| PROC - push {r4-r9} - vpush {d8-d15} - vld1.s16 {q8,q9}, [r0]! - vld1.s16 {q10,q11}, [r0]! - vld1.s16 {q12,q13}, [r0]! - vld1.s16 {q14,q15}, [r0]! - - ; transpose the input data - TRANSPOSE8X8 - - ; generate cospi_28_64 = 3196 - mov r3, #0x0c00 - add r3, #0x7c - - ; generate cospi_4_64 = 16069 - mov r4, #0x3e00 - add r4, #0xc5 - - ; generate cospi_12_64 = 13623 - mov r5, #0x3500 - add r5, #0x37 - - ; generate cospi_20_64 = 9102 - mov r6, #0x2300 - add r6, #0x8e - - ; generate cospi_16_64 = 11585 - mov r7, #0x2d00 - add r7, #0x41 - - ; generate cospi_24_64 = 6270 - mov r8, #0x1800 - add r8, #0x7e - - ; generate cospi_8_64 = 15137 - mov r9, #0x3b00 - add r9, #0x21 - - ; First transform rows - IDCT8x8_1D - - ; Transpose the matrix - TRANSPOSE8X8 - - ; Then transform columns - IDCT8x8_1D - - ; ROUND_POWER_OF_TWO(temp_out[j], 5) - vrshr.s16 q8, q8, #5 - vrshr.s16 q9, q9, #5 - vrshr.s16 q10, q10, #5 - vrshr.s16 q11, q11, #5 - vrshr.s16 q12, q12, #5 - vrshr.s16 q13, q13, #5 - vrshr.s16 q14, q14, #5 - vrshr.s16 q15, q15, #5 - - ; save dest pointer - mov r0, r1 - - ; load destination data - vld1.64 {d0}, [r1], r2 - vld1.64 {d1}, [r1], r2 - vld1.64 {d2}, [r1], r2 - vld1.64 {d3}, [r1], r2 - vld1.64 {d4}, [r1], r2 - vld1.64 {d5}, [r1], r2 - vld1.64 {d6}, [r1], r2 - vld1.64 {d7}, [r1] - - ; ROUND_POWER_OF_TWO(temp_out[j], 5) + dest[j * dest_stride + i] - vaddw.u8 q8, q8, d0 - vaddw.u8 q9, q9, d1 - vaddw.u8 q10, q10, d2 - vaddw.u8 q11, q11, d3 - vaddw.u8 q12, q12, d4 - vaddw.u8 q13, q13, d5 - vaddw.u8 q14, q14, d6 - vaddw.u8 q15, q15, d7 - - ; clip_pixel - vqmovun.s16 d0, q8 - vqmovun.s16 d1, q9 - vqmovun.s16 d2, q10 - vqmovun.s16 d3, q11 - vqmovun.s16 d4, q12 - vqmovun.s16 d5, q13 - vqmovun.s16 d6, q14 - vqmovun.s16 d7, q15 - - ; store the data - vst1.64 {d0}, [r0], r2 - vst1.64 {d1}, [r0], r2 - vst1.64 {d2}, [r0], r2 - vst1.64 {d3}, [r0], r2 - vst1.64 {d4}, [r0], r2 - vst1.64 {d5}, [r0], r2 - vst1.64 {d6}, [r0], r2 - vst1.64 {d7}, [r0], r2 - - vpop {d8-d15} - pop {r4-r9} - bx lr - ENDP ; |vp9_idct8x8_64_add_neon| - -;void vp9_idct8x8_12_add_neon(int16_t *input, uint8_t *dest, int dest_stride) -; -; r0 int16_t input -; r1 uint8_t *dest -; r2 int dest_stride) - -|vp9_idct8x8_12_add_neon| PROC - push {r4-r9} - vpush {d8-d15} - vld1.s16 {q8,q9}, [r0]! - vld1.s16 {q10,q11}, [r0]! - vld1.s16 {q12,q13}, [r0]! - vld1.s16 {q14,q15}, [r0]! - - ; transpose the input data - TRANSPOSE8X8 - - ; generate cospi_28_64 = 3196 - mov r3, #0x0c00 - add r3, #0x7c - - ; generate cospi_4_64 = 16069 - mov r4, #0x3e00 - add r4, #0xc5 - - ; generate cospi_12_64 = 13623 - mov r5, #0x3500 - add r5, #0x37 - - ; generate cospi_20_64 = 9102 - mov r6, #0x2300 - add r6, #0x8e - - ; generate cospi_16_64 = 11585 - mov r7, #0x2d00 - add r7, #0x41 - - ; generate cospi_24_64 = 6270 - mov r8, #0x1800 - add r8, #0x7e - - ; generate cospi_8_64 = 15137 - mov r9, #0x3b00 - add r9, #0x21 - - ; First transform rows - ; stage 1 - ; The following instructions use vqrdmulh to do the - ; dct_const_round_shift(input[1] * cospi_28_64). vqrdmulh will do doubling - ; multiply and shift the result by 16 bits instead of 14 bits. So we need - ; to double the constants before multiplying to compensate this. - mov r12, r3, lsl #1 - vdup.16 q0, r12 ; duplicate cospi_28_64*2 - mov r12, r4, lsl #1 - vdup.16 q1, r12 ; duplicate cospi_4_64*2 - - ; dct_const_round_shift(input[1] * cospi_28_64) - vqrdmulh.s16 q4, q9, q0 - - mov r12, r6, lsl #1 - rsb r12, #0 - vdup.16 q0, r12 ; duplicate -cospi_20_64*2 - - ; dct_const_round_shift(input[1] * cospi_4_64) - vqrdmulh.s16 q7, q9, q1 - - mov r12, r5, lsl #1 - vdup.16 q1, r12 ; duplicate cospi_12_64*2 - - ; dct_const_round_shift(- input[3] * cospi_20_64) - vqrdmulh.s16 q5, q11, q0 - - mov r12, r7, lsl #1 - vdup.16 q0, r12 ; duplicate cospi_16_64*2 - - ; dct_const_round_shift(input[3] * cospi_12_64) - vqrdmulh.s16 q6, q11, q1 - - ; stage 2 & stage 3 - even half - mov r12, r8, lsl #1 - vdup.16 q1, r12 ; duplicate cospi_24_64*2 - - ; dct_const_round_shift(input_dc * cospi_16_64) - vqrdmulh.s16 q9, q8, q0 - - mov r12, r9, lsl #1 - vdup.16 q0, r12 ; duplicate cospi_8_64*2 - - ; dct_const_round_shift(input[1] * cospi_24_64) - vqrdmulh.s16 q13, q10, q1 - - ; dct_const_round_shift(input[1] * cospi_8_64) - vqrdmulh.s16 q15, q10, q0 - - ; stage 3 -odd half - vdup.16 d16, r7 ; duplicate cospi_16_64 - - vadd.s16 q0, q9, q15 ; output[0] = step[0] + step[3] - vadd.s16 q1, q9, q13 ; output[1] = step[1] + step[2] - vsub.s16 q2, q9, q13 ; output[2] = step[1] - step[2] - vsub.s16 q3, q9, q15 ; output[3] = step[0] - step[3] - - ; stage 2 - odd half - vsub.s16 q13, q4, q5 ; step2[5] = step1[4] - step1[5] - vadd.s16 q4, q4, q5 ; step2[4] = step1[4] + step1[5] - vsub.s16 q14, q7, q6 ; step2[6] = -step1[6] + step1[7] - vadd.s16 q7, q7, q6 ; step2[7] = step1[6] + step1[7] - - ; step2[6] * cospi_16_64 - vmull.s16 q9, d28, d16 - vmull.s16 q10, d29, d16 - - ; step2[6] * cospi_16_64 - vmull.s16 q11, d28, d16 - vmull.s16 q12, d29, d16 - - ; (step2[6] - step2[5]) * cospi_16_64 - vmlsl.s16 q9, d26, d16 - vmlsl.s16 q10, d27, d16 - - ; (step2[5] + step2[6]) * cospi_16_64 - vmlal.s16 q11, d26, d16 - vmlal.s16 q12, d27, d16 - - ; dct_const_round_shift(input_dc * cospi_16_64) - vqrshrn.s32 d10, q9, #14 ; >> 14 - vqrshrn.s32 d11, q10, #14 ; >> 14 - - ; dct_const_round_shift(input_dc * cospi_16_64) - vqrshrn.s32 d12, q11, #14 ; >> 14 - vqrshrn.s32 d13, q12, #14 ; >> 14 - - ; stage 4 - vadd.s16 q8, q0, q7 ; output[0] = step1[0] + step1[7]; - vadd.s16 q9, q1, q6 ; output[1] = step1[1] + step1[6]; - vadd.s16 q10, q2, q5 ; output[2] = step1[2] + step1[5]; - vadd.s16 q11, q3, q4 ; output[3] = step1[3] + step1[4]; - vsub.s16 q12, q3, q4 ; output[4] = step1[3] - step1[4]; - vsub.s16 q13, q2, q5 ; output[5] = step1[2] - step1[5]; - vsub.s16 q14, q1, q6 ; output[6] = step1[1] - step1[6]; - vsub.s16 q15, q0, q7 ; output[7] = step1[0] - step1[7]; - - ; Transpose the matrix - TRANSPOSE8X8 - - ; Then transform columns - IDCT8x8_1D - - ; ROUND_POWER_OF_TWO(temp_out[j], 5) - vrshr.s16 q8, q8, #5 - vrshr.s16 q9, q9, #5 - vrshr.s16 q10, q10, #5 - vrshr.s16 q11, q11, #5 - vrshr.s16 q12, q12, #5 - vrshr.s16 q13, q13, #5 - vrshr.s16 q14, q14, #5 - vrshr.s16 q15, q15, #5 - - ; save dest pointer - mov r0, r1 - - ; load destination data - vld1.64 {d0}, [r1], r2 - vld1.64 {d1}, [r1], r2 - vld1.64 {d2}, [r1], r2 - vld1.64 {d3}, [r1], r2 - vld1.64 {d4}, [r1], r2 - vld1.64 {d5}, [r1], r2 - vld1.64 {d6}, [r1], r2 - vld1.64 {d7}, [r1] - - ; ROUND_POWER_OF_TWO(temp_out[j], 5) + dest[j * dest_stride + i] - vaddw.u8 q8, q8, d0 - vaddw.u8 q9, q9, d1 - vaddw.u8 q10, q10, d2 - vaddw.u8 q11, q11, d3 - vaddw.u8 q12, q12, d4 - vaddw.u8 q13, q13, d5 - vaddw.u8 q14, q14, d6 - vaddw.u8 q15, q15, d7 - - ; clip_pixel - vqmovun.s16 d0, q8 - vqmovun.s16 d1, q9 - vqmovun.s16 d2, q10 - vqmovun.s16 d3, q11 - vqmovun.s16 d4, q12 - vqmovun.s16 d5, q13 - vqmovun.s16 d6, q14 - vqmovun.s16 d7, q15 - - ; store the data - vst1.64 {d0}, [r0], r2 - vst1.64 {d1}, [r0], r2 - vst1.64 {d2}, [r0], r2 - vst1.64 {d3}, [r0], r2 - vst1.64 {d4}, [r0], r2 - vst1.64 {d5}, [r0], r2 - vst1.64 {d6}, [r0], r2 - vst1.64 {d7}, [r0], r2 - - vpop {d8-d15} - pop {r4-r9} - bx lr - ENDP ; |vp9_idct8x8_12_add_neon| - - END diff --git a/media/libvpx/vp9/common/arm/neon/vp9_iht4x4_add_neon.asm b/media/libvpx/vp9/common/arm/neon/vp9_iht4x4_add_neon.asm deleted file mode 100644 index 2f326e24c9e..00000000000 --- a/media/libvpx/vp9/common/arm/neon/vp9_iht4x4_add_neon.asm +++ /dev/null @@ -1,237 +0,0 @@ -; -; Copyright (c) 2013 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - EXPORT |vp9_iht4x4_16_add_neon| - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 - - ; Parallel 1D IDCT on all the columns of a 4x4 16bits data matrix which are - ; loaded in d16-d19. d0 must contain cospi_8_64. d1 must contain - ; cospi_16_64. d2 must contain cospi_24_64. The output will be stored back - ; into d16-d19 registers. This macro will touch q10- q15 registers and use - ; them as buffer during calculation. - MACRO - IDCT4x4_1D - ; stage 1 - vadd.s16 d23, d16, d18 ; (input[0] + input[2]) - vsub.s16 d24, d16, d18 ; (input[0] - input[2]) - - vmull.s16 q15, d17, d2 ; input[1] * cospi_24_64 - vmull.s16 q10, d17, d0 ; input[1] * cospi_8_64 - vmull.s16 q13, d23, d1 ; (input[0] + input[2]) * cospi_16_64 - vmull.s16 q14, d24, d1 ; (input[0] - input[2]) * cospi_16_64 - vmlsl.s16 q15, d19, d0 ; input[1] * cospi_24_64 - input[3] * cospi_8_64 - vmlal.s16 q10, d19, d2 ; input[1] * cospi_8_64 + input[3] * cospi_24_64 - - ; dct_const_round_shift - vqrshrn.s32 d26, q13, #14 - vqrshrn.s32 d27, q14, #14 - vqrshrn.s32 d29, q15, #14 - vqrshrn.s32 d28, q10, #14 - - ; stage 2 - ; output[0] = step[0] + step[3]; - ; output[1] = step[1] + step[2]; - ; output[3] = step[0] - step[3]; - ; output[2] = step[1] - step[2]; - vadd.s16 q8, q13, q14 - vsub.s16 q9, q13, q14 - vswp d18, d19 - MEND - - ; Parallel 1D IADST on all the columns of a 4x4 16bits data matrix which - ; loaded in d16-d19. d3 must contain sinpi_1_9. d4 must contain sinpi_2_9. - ; d5 must contain sinpi_4_9. d6 must contain sinpi_3_9. The output will be - ; stored back into d16-d19 registers. This macro will touch q11,q12,q13, - ; q14,q15 registers and use them as buffer during calculation. - MACRO - IADST4x4_1D - vmull.s16 q10, d3, d16 ; s0 = sinpi_1_9 * x0 - vmull.s16 q11, d4, d16 ; s1 = sinpi_2_9 * x0 - vmull.s16 q12, d6, d17 ; s2 = sinpi_3_9 * x1 - vmull.s16 q13, d5, d18 ; s3 = sinpi_4_9 * x2 - vmull.s16 q14, d3, d18 ; s4 = sinpi_1_9 * x2 - vmovl.s16 q15, d16 ; expand x0 from 16 bit to 32 bit - vaddw.s16 q15, q15, d19 ; x0 + x3 - vmull.s16 q8, d4, d19 ; s5 = sinpi_2_9 * x3 - vsubw.s16 q15, q15, d18 ; s7 = x0 + x3 - x2 - vmull.s16 q9, d5, d19 ; s6 = sinpi_4_9 * x3 - - vadd.s32 q10, q10, q13 ; x0 = s0 + s3 + s5 - vadd.s32 q10, q10, q8 - vsub.s32 q11, q11, q14 ; x1 = s1 - s4 - s6 - vdup.32 q8, r0 ; duplicate sinpi_3_9 - vsub.s32 q11, q11, q9 - vmul.s32 q15, q15, q8 ; x2 = sinpi_3_9 * s7 - - vadd.s32 q13, q10, q12 ; s0 = x0 + x3 - vadd.s32 q10, q10, q11 ; x0 + x1 - vadd.s32 q14, q11, q12 ; s1 = x1 + x3 - vsub.s32 q10, q10, q12 ; s3 = x0 + x1 - x3 - - ; dct_const_round_shift - vqrshrn.s32 d16, q13, #14 - vqrshrn.s32 d17, q14, #14 - vqrshrn.s32 d18, q15, #14 - vqrshrn.s32 d19, q10, #14 - MEND - - ; Generate cosine constants in d6 - d8 for the IDCT - MACRO - GENERATE_COSINE_CONSTANTS - ; cospi_8_64 = 15137 = 0x3b21 - mov r0, #0x3b00 - add r0, #0x21 - ; cospi_16_64 = 11585 = 0x2d41 - mov r3, #0x2d00 - add r3, #0x41 - ; cospi_24_64 = 6270 = 0x187e - mov r12, #0x1800 - add r12, #0x7e - - ; generate constant vectors - vdup.16 d0, r0 ; duplicate cospi_8_64 - vdup.16 d1, r3 ; duplicate cospi_16_64 - vdup.16 d2, r12 ; duplicate cospi_24_64 - MEND - - ; Generate sine constants in d1 - d4 for the IADST. - MACRO - GENERATE_SINE_CONSTANTS - ; sinpi_1_9 = 5283 = 0x14A3 - mov r0, #0x1400 - add r0, #0xa3 - ; sinpi_2_9 = 9929 = 0x26C9 - mov r3, #0x2600 - add r3, #0xc9 - ; sinpi_4_9 = 15212 = 0x3B6C - mov r12, #0x3b00 - add r12, #0x6c - - ; generate constant vectors - vdup.16 d3, r0 ; duplicate sinpi_1_9 - - ; sinpi_3_9 = 13377 = 0x3441 - mov r0, #0x3400 - add r0, #0x41 - - vdup.16 d4, r3 ; duplicate sinpi_2_9 - vdup.16 d5, r12 ; duplicate sinpi_4_9 - vdup.16 q3, r0 ; duplicate sinpi_3_9 - MEND - - ; Transpose a 4x4 16bits data matrix. Datas are loaded in d16-d19. - MACRO - TRANSPOSE4X4 - vtrn.16 d16, d17 - vtrn.16 d18, d19 - vtrn.32 q8, q9 - MEND - - AREA Block, CODE, READONLY ; name this block of code -;void vp9_iht4x4_16_add_neon(int16_t *input, uint8_t *dest, -; int dest_stride, int tx_type) -; -; r0 int16_t input -; r1 uint8_t *dest -; r2 int dest_stride -; r3 int tx_type) -; This function will only handle tx_type of 1,2,3. -|vp9_iht4x4_16_add_neon| PROC - - ; load the inputs into d16-d19 - vld1.s16 {q8,q9}, [r0]! - - ; transpose the input data - TRANSPOSE4X4 - - ; decide the type of transform - cmp r3, #2 - beq idct_iadst - cmp r3, #3 - beq iadst_iadst - -iadst_idct - ; generate constants - GENERATE_COSINE_CONSTANTS - GENERATE_SINE_CONSTANTS - - ; first transform rows - IDCT4x4_1D - - ; transpose the matrix - TRANSPOSE4X4 - - ; then transform columns - IADST4x4_1D - - b end_vp9_iht4x4_16_add_neon - -idct_iadst - ; generate constants - GENERATE_COSINE_CONSTANTS - GENERATE_SINE_CONSTANTS - - ; first transform rows - IADST4x4_1D - - ; transpose the matrix - TRANSPOSE4X4 - - ; then transform columns - IDCT4x4_1D - - b end_vp9_iht4x4_16_add_neon - -iadst_iadst - ; generate constants - GENERATE_SINE_CONSTANTS - - ; first transform rows - IADST4x4_1D - - ; transpose the matrix - TRANSPOSE4X4 - - ; then transform columns - IADST4x4_1D - -end_vp9_iht4x4_16_add_neon - ; ROUND_POWER_OF_TWO(temp_out[j], 4) - vrshr.s16 q8, q8, #4 - vrshr.s16 q9, q9, #4 - - vld1.32 {d26[0]}, [r1], r2 - vld1.32 {d26[1]}, [r1], r2 - vld1.32 {d27[0]}, [r1], r2 - vld1.32 {d27[1]}, [r1] - - ; ROUND_POWER_OF_TWO(temp_out[j], 4) + dest[j * dest_stride + i] - vaddw.u8 q8, q8, d26 - vaddw.u8 q9, q9, d27 - - ; clip_pixel - vqmovun.s16 d26, q8 - vqmovun.s16 d27, q9 - - ; do the stores in reverse order with negative post-increment, by changing - ; the sign of the stride - rsb r2, r2, #0 - vst1.32 {d27[1]}, [r1], r2 - vst1.32 {d27[0]}, [r1], r2 - vst1.32 {d26[1]}, [r1], r2 - vst1.32 {d26[0]}, [r1] ; no post-increment - bx lr - ENDP ; |vp9_iht4x4_16_add_neon| - - END diff --git a/media/libvpx/vp9/common/arm/neon/vp9_iht8x8_add_neon.asm b/media/libvpx/vp9/common/arm/neon/vp9_iht8x8_add_neon.asm deleted file mode 100644 index b41f5661b80..00000000000 --- a/media/libvpx/vp9/common/arm/neon/vp9_iht8x8_add_neon.asm +++ /dev/null @@ -1,698 +0,0 @@ -; -; Copyright (c) 2013 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - EXPORT |vp9_iht8x8_64_add_neon| - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 - - ; Generate IADST constants in r0 - r12 for the IADST. - MACRO - GENERATE_IADST_CONSTANTS - ; generate cospi_2_64 = 16305 - mov r0, #0x3f00 - add r0, #0xb1 - - ; generate cospi_30_64 = 1606 - mov r1, #0x600 - add r1, #0x46 - - ; generate cospi_10_64 = 14449 - mov r2, #0x3800 - add r2, #0x71 - - ; generate cospi_22_64 = 7723 - mov r3, #0x1e00 - add r3, #0x2b - - ; generate cospi_18_64 = 10394 - mov r4, #0x2800 - add r4, #0x9a - - ; generate cospi_14_64 = 12665 - mov r5, #0x3100 - add r5, #0x79 - - ; generate cospi_26_64 = 4756 - mov r6, #0x1200 - add r6, #0x94 - - ; generate cospi_6_64 = 15679 - mov r7, #0x3d00 - add r7, #0x3f - - ; generate cospi_8_64 = 15137 - mov r8, #0x3b00 - add r8, #0x21 - - ; generate cospi_24_64 = 6270 - mov r9, #0x1800 - add r9, #0x7e - - ; generate 0 - mov r10, #0 - - ; generate cospi_16_64 = 11585 - mov r12, #0x2d00 - add r12, #0x41 - MEND - - ; Generate IDCT constants in r3 - r9 for the IDCT. - MACRO - GENERATE_IDCT_CONSTANTS - ; generate cospi_28_64 = 3196 - mov r3, #0x0c00 - add r3, #0x7c - - ; generate cospi_4_64 = 16069 - mov r4, #0x3e00 - add r4, #0xc5 - - ; generate cospi_12_64 = 13623 - mov r5, #0x3500 - add r5, #0x37 - - ; generate cospi_20_64 = 9102 - mov r6, #0x2300 - add r6, #0x8e - - ; generate cospi_16_64 = 11585 - mov r7, #0x2d00 - add r7, #0x41 - - ; generate cospi_24_64 = 6270 - mov r8, #0x1800 - add r8, #0x7e - - ; generate cospi_8_64 = 15137 - mov r9, #0x3b00 - add r9, #0x21 - MEND - - ; Transpose a 8x8 16bits data matrix. Datas are loaded in q8-q15. - MACRO - TRANSPOSE8X8 - vswp d17, d24 - vswp d23, d30 - vswp d21, d28 - vswp d19, d26 - vtrn.32 q8, q10 - vtrn.32 q9, q11 - vtrn.32 q12, q14 - vtrn.32 q13, q15 - vtrn.16 q8, q9 - vtrn.16 q10, q11 - vtrn.16 q12, q13 - vtrn.16 q14, q15 - MEND - - ; Parallel 1D IDCT on all the columns of a 8x8 16bits data matrix which are - ; loaded in q8-q15. The IDCT constants are loaded in r3 - r9. The output - ; will be stored back into q8-q15 registers. This macro will touch q0-q7 - ; registers and use them as buffer during calculation. - MACRO - IDCT8x8_1D - ; stage 1 - vdup.16 d0, r3 ; duplicate cospi_28_64 - vdup.16 d1, r4 ; duplicate cospi_4_64 - vdup.16 d2, r5 ; duplicate cospi_12_64 - vdup.16 d3, r6 ; duplicate cospi_20_64 - - ; input[1] * cospi_28_64 - vmull.s16 q2, d18, d0 - vmull.s16 q3, d19, d0 - - ; input[5] * cospi_12_64 - vmull.s16 q5, d26, d2 - vmull.s16 q6, d27, d2 - - ; input[1]*cospi_28_64-input[7]*cospi_4_64 - vmlsl.s16 q2, d30, d1 - vmlsl.s16 q3, d31, d1 - - ; input[5] * cospi_12_64 - input[3] * cospi_20_64 - vmlsl.s16 q5, d22, d3 - vmlsl.s16 q6, d23, d3 - - ; dct_const_round_shift(input_dc * cospi_16_64) - vqrshrn.s32 d8, q2, #14 ; >> 14 - vqrshrn.s32 d9, q3, #14 ; >> 14 - - ; dct_const_round_shift(input_dc * cospi_16_64) - vqrshrn.s32 d10, q5, #14 ; >> 14 - vqrshrn.s32 d11, q6, #14 ; >> 14 - - ; input[1] * cospi_4_64 - vmull.s16 q2, d18, d1 - vmull.s16 q3, d19, d1 - - ; input[5] * cospi_20_64 - vmull.s16 q9, d26, d3 - vmull.s16 q13, d27, d3 - - ; input[1]*cospi_4_64+input[7]*cospi_28_64 - vmlal.s16 q2, d30, d0 - vmlal.s16 q3, d31, d0 - - ; input[5] * cospi_20_64 + input[3] * cospi_12_64 - vmlal.s16 q9, d22, d2 - vmlal.s16 q13, d23, d2 - - ; dct_const_round_shift(input_dc * cospi_16_64) - vqrshrn.s32 d14, q2, #14 ; >> 14 - vqrshrn.s32 d15, q3, #14 ; >> 14 - - ; stage 2 & stage 3 - even half - vdup.16 d0, r7 ; duplicate cospi_16_64 - - ; dct_const_round_shift(input_dc * cospi_16_64) - vqrshrn.s32 d12, q9, #14 ; >> 14 - vqrshrn.s32 d13, q13, #14 ; >> 14 - - ; input[0] * cospi_16_64 - vmull.s16 q2, d16, d0 - vmull.s16 q3, d17, d0 - - ; input[0] * cospi_16_64 - vmull.s16 q13, d16, d0 - vmull.s16 q15, d17, d0 - - ; (input[0] + input[2]) * cospi_16_64 - vmlal.s16 q2, d24, d0 - vmlal.s16 q3, d25, d0 - - ; (input[0] - input[2]) * cospi_16_64 - vmlsl.s16 q13, d24, d0 - vmlsl.s16 q15, d25, d0 - - vdup.16 d0, r8 ; duplicate cospi_24_64 - vdup.16 d1, r9 ; duplicate cospi_8_64 - - ; dct_const_round_shift(input_dc * cospi_16_64) - vqrshrn.s32 d18, q2, #14 ; >> 14 - vqrshrn.s32 d19, q3, #14 ; >> 14 - - ; dct_const_round_shift(input_dc * cospi_16_64) - vqrshrn.s32 d22, q13, #14 ; >> 14 - vqrshrn.s32 d23, q15, #14 ; >> 14 - - ; input[1] * cospi_24_64 - vmull.s16 q2, d20, d0 - vmull.s16 q3, d21, d0 - - ; input[1] * cospi_8_64 - vmull.s16 q8, d20, d1 - vmull.s16 q12, d21, d1 - - ; input[1] * cospi_24_64 - input[3] * cospi_8_64 - vmlsl.s16 q2, d28, d1 - vmlsl.s16 q3, d29, d1 - - ; input[1] * cospi_8_64 + input[3] * cospi_24_64 - vmlal.s16 q8, d28, d0 - vmlal.s16 q12, d29, d0 - - ; dct_const_round_shift(input_dc * cospi_16_64) - vqrshrn.s32 d26, q2, #14 ; >> 14 - vqrshrn.s32 d27, q3, #14 ; >> 14 - - ; dct_const_round_shift(input_dc * cospi_16_64) - vqrshrn.s32 d30, q8, #14 ; >> 14 - vqrshrn.s32 d31, q12, #14 ; >> 14 - - vadd.s16 q0, q9, q15 ; output[0] = step[0] + step[3] - vadd.s16 q1, q11, q13 ; output[1] = step[1] + step[2] - vsub.s16 q2, q11, q13 ; output[2] = step[1] - step[2] - vsub.s16 q3, q9, q15 ; output[3] = step[0] - step[3] - - ; stage 3 -odd half - vdup.16 d16, r7 ; duplicate cospi_16_64 - - ; stage 2 - odd half - vsub.s16 q13, q4, q5 ; step2[5] = step1[4] - step1[5] - vadd.s16 q4, q4, q5 ; step2[4] = step1[4] + step1[5] - vsub.s16 q14, q7, q6 ; step2[6] = -step1[6] + step1[7] - vadd.s16 q7, q7, q6 ; step2[7] = step1[6] + step1[7] - - ; step2[6] * cospi_16_64 - vmull.s16 q9, d28, d16 - vmull.s16 q10, d29, d16 - - ; step2[6] * cospi_16_64 - vmull.s16 q11, d28, d16 - vmull.s16 q12, d29, d16 - - ; (step2[6] - step2[5]) * cospi_16_64 - vmlsl.s16 q9, d26, d16 - vmlsl.s16 q10, d27, d16 - - ; (step2[5] + step2[6]) * cospi_16_64 - vmlal.s16 q11, d26, d16 - vmlal.s16 q12, d27, d16 - - ; dct_const_round_shift(input_dc * cospi_16_64) - vqrshrn.s32 d10, q9, #14 ; >> 14 - vqrshrn.s32 d11, q10, #14 ; >> 14 - - ; dct_const_round_shift(input_dc * cospi_16_64) - vqrshrn.s32 d12, q11, #14 ; >> 14 - vqrshrn.s32 d13, q12, #14 ; >> 14 - - ; stage 4 - vadd.s16 q8, q0, q7 ; output[0] = step1[0] + step1[7]; - vadd.s16 q9, q1, q6 ; output[1] = step1[1] + step1[6]; - vadd.s16 q10, q2, q5 ; output[2] = step1[2] + step1[5]; - vadd.s16 q11, q3, q4 ; output[3] = step1[3] + step1[4]; - vsub.s16 q12, q3, q4 ; output[4] = step1[3] - step1[4]; - vsub.s16 q13, q2, q5 ; output[5] = step1[2] - step1[5]; - vsub.s16 q14, q1, q6 ; output[6] = step1[1] - step1[6]; - vsub.s16 q15, q0, q7 ; output[7] = step1[0] - step1[7]; - MEND - - ; Parallel 1D IADST on all the columns of a 8x8 16bits data matrix which - ; loaded in q8-q15. IADST constants are loaded in r0 - r12 registers. The - ; output will be stored back into q8-q15 registers. This macro will touch - ; q0 - q7 registers and use them as buffer during calculation. - MACRO - IADST8X8_1D - vdup.16 d14, r0 ; duplicate cospi_2_64 - vdup.16 d15, r1 ; duplicate cospi_30_64 - - ; cospi_2_64 * x0 - vmull.s16 q1, d30, d14 - vmull.s16 q2, d31, d14 - - ; cospi_30_64 * x0 - vmull.s16 q3, d30, d15 - vmull.s16 q4, d31, d15 - - vdup.16 d30, r4 ; duplicate cospi_18_64 - vdup.16 d31, r5 ; duplicate cospi_14_64 - - ; s0 = cospi_2_64 * x0 + cospi_30_64 * x1; - vmlal.s16 q1, d16, d15 - vmlal.s16 q2, d17, d15 - - ; s1 = cospi_30_64 * x0 - cospi_2_64 * x1 - vmlsl.s16 q3, d16, d14 - vmlsl.s16 q4, d17, d14 - - ; cospi_18_64 * x4 - vmull.s16 q5, d22, d30 - vmull.s16 q6, d23, d30 - - ; cospi_14_64 * x4 - vmull.s16 q7, d22, d31 - vmull.s16 q8, d23, d31 - - ; s4 = cospi_18_64 * x4 + cospi_14_64 * x5; - vmlal.s16 q5, d24, d31 - vmlal.s16 q6, d25, d31 - - ; s5 = cospi_14_64 * x4 - cospi_18_64 * x5 - vmlsl.s16 q7, d24, d30 - vmlsl.s16 q8, d25, d30 - - ; (s0 + s4) - vadd.s32 q11, q1, q5 - vadd.s32 q12, q2, q6 - - vdup.16 d0, r2 ; duplicate cospi_10_64 - vdup.16 d1, r3 ; duplicate cospi_22_64 - - ; (s0 - s4) - vsub.s32 q1, q1, q5 - vsub.s32 q2, q2, q6 - - ; x0 = dct_const_round_shift(s0 + s4); - vqrshrn.s32 d22, q11, #14 ; >> 14 - vqrshrn.s32 d23, q12, #14 ; >> 14 - - ; (s1 + s5) - vadd.s32 q12, q3, q7 - vadd.s32 q15, q4, q8 - - ; (s1 - s5) - vsub.s32 q3, q3, q7 - vsub.s32 q4, q4, q8 - - ; x4 = dct_const_round_shift(s0 - s4); - vqrshrn.s32 d2, q1, #14 ; >> 14 - vqrshrn.s32 d3, q2, #14 ; >> 14 - - ; x1 = dct_const_round_shift(s1 + s5); - vqrshrn.s32 d24, q12, #14 ; >> 14 - vqrshrn.s32 d25, q15, #14 ; >> 14 - - ; x5 = dct_const_round_shift(s1 - s5); - vqrshrn.s32 d6, q3, #14 ; >> 14 - vqrshrn.s32 d7, q4, #14 ; >> 14 - - ; cospi_10_64 * x2 - vmull.s16 q4, d26, d0 - vmull.s16 q5, d27, d0 - - ; cospi_22_64 * x2 - vmull.s16 q2, d26, d1 - vmull.s16 q6, d27, d1 - - vdup.16 d30, r6 ; duplicate cospi_26_64 - vdup.16 d31, r7 ; duplicate cospi_6_64 - - ; s2 = cospi_10_64 * x2 + cospi_22_64 * x3; - vmlal.s16 q4, d20, d1 - vmlal.s16 q5, d21, d1 - - ; s3 = cospi_22_64 * x2 - cospi_10_64 * x3; - vmlsl.s16 q2, d20, d0 - vmlsl.s16 q6, d21, d0 - - ; cospi_26_64 * x6 - vmull.s16 q0, d18, d30 - vmull.s16 q13, d19, d30 - - ; s6 = cospi_26_64 * x6 + cospi_6_64 * x7; - vmlal.s16 q0, d28, d31 - vmlal.s16 q13, d29, d31 - - ; cospi_6_64 * x6 - vmull.s16 q10, d18, d31 - vmull.s16 q9, d19, d31 - - ; s7 = cospi_6_64 * x6 - cospi_26_64 * x7; - vmlsl.s16 q10, d28, d30 - vmlsl.s16 q9, d29, d30 - - ; (s3 + s7) - vadd.s32 q14, q2, q10 - vadd.s32 q15, q6, q9 - - ; (s3 - s7) - vsub.s32 q2, q2, q10 - vsub.s32 q6, q6, q9 - - ; x3 = dct_const_round_shift(s3 + s7); - vqrshrn.s32 d28, q14, #14 ; >> 14 - vqrshrn.s32 d29, q15, #14 ; >> 14 - - ; x7 = dct_const_round_shift(s3 - s7); - vqrshrn.s32 d4, q2, #14 ; >> 14 - vqrshrn.s32 d5, q6, #14 ; >> 14 - - ; (s2 + s6) - vadd.s32 q9, q4, q0 - vadd.s32 q10, q5, q13 - - ; (s2 - s6) - vsub.s32 q4, q4, q0 - vsub.s32 q5, q5, q13 - - vdup.16 d30, r8 ; duplicate cospi_8_64 - vdup.16 d31, r9 ; duplicate cospi_24_64 - - ; x2 = dct_const_round_shift(s2 + s6); - vqrshrn.s32 d18, q9, #14 ; >> 14 - vqrshrn.s32 d19, q10, #14 ; >> 14 - - ; x6 = dct_const_round_shift(s2 - s6); - vqrshrn.s32 d8, q4, #14 ; >> 14 - vqrshrn.s32 d9, q5, #14 ; >> 14 - - ; cospi_8_64 * x4 - vmull.s16 q5, d2, d30 - vmull.s16 q6, d3, d30 - - ; cospi_24_64 * x4 - vmull.s16 q7, d2, d31 - vmull.s16 q0, d3, d31 - - ; s4 = cospi_8_64 * x4 + cospi_24_64 * x5; - vmlal.s16 q5, d6, d31 - vmlal.s16 q6, d7, d31 - - ; s5 = cospi_24_64 * x4 - cospi_8_64 * x5; - vmlsl.s16 q7, d6, d30 - vmlsl.s16 q0, d7, d30 - - ; cospi_8_64 * x7 - vmull.s16 q1, d4, d30 - vmull.s16 q3, d5, d30 - - ; cospi_24_64 * x7 - vmull.s16 q10, d4, d31 - vmull.s16 q2, d5, d31 - - ; s6 = -cospi_24_64 * x6 + cospi_8_64 * x7; - vmlsl.s16 q1, d8, d31 - vmlsl.s16 q3, d9, d31 - - ; s7 = cospi_8_64 * x6 + cospi_24_64 * x7; - vmlal.s16 q10, d8, d30 - vmlal.s16 q2, d9, d30 - - vadd.s16 q8, q11, q9 ; x0 = s0 + s2; - - vsub.s16 q11, q11, q9 ; x2 = s0 - s2; - - vadd.s16 q4, q12, q14 ; x1 = s1 + s3; - - vsub.s16 q12, q12, q14 ; x3 = s1 - s3; - - ; (s4 + s6) - vadd.s32 q14, q5, q1 - vadd.s32 q15, q6, q3 - - ; (s4 - s6) - vsub.s32 q5, q5, q1 - vsub.s32 q6, q6, q3 - - ; x4 = dct_const_round_shift(s4 + s6); - vqrshrn.s32 d18, q14, #14 ; >> 14 - vqrshrn.s32 d19, q15, #14 ; >> 14 - - ; x6 = dct_const_round_shift(s4 - s6); - vqrshrn.s32 d10, q5, #14 ; >> 14 - vqrshrn.s32 d11, q6, #14 ; >> 14 - - ; (s5 + s7) - vadd.s32 q1, q7, q10 - vadd.s32 q3, q0, q2 - - ; (s5 - s7)) - vsub.s32 q7, q7, q10 - vsub.s32 q0, q0, q2 - - ; x5 = dct_const_round_shift(s5 + s7); - vqrshrn.s32 d28, q1, #14 ; >> 14 - vqrshrn.s32 d29, q3, #14 ; >> 14 - - ; x7 = dct_const_round_shift(s5 - s7); - vqrshrn.s32 d14, q7, #14 ; >> 14 - vqrshrn.s32 d15, q0, #14 ; >> 14 - - vdup.16 d30, r12 ; duplicate cospi_16_64 - - ; cospi_16_64 * x2 - vmull.s16 q2, d22, d30 - vmull.s16 q3, d23, d30 - - ; cospi_6_64 * x6 - vmull.s16 q13, d22, d30 - vmull.s16 q1, d23, d30 - - ; cospi_16_64 * x2 + cospi_16_64 * x3; - vmlal.s16 q2, d24, d30 - vmlal.s16 q3, d25, d30 - - ; cospi_16_64 * x2 - cospi_16_64 * x3; - vmlsl.s16 q13, d24, d30 - vmlsl.s16 q1, d25, d30 - - ; x2 = dct_const_round_shift(s2); - vqrshrn.s32 d4, q2, #14 ; >> 14 - vqrshrn.s32 d5, q3, #14 ; >> 14 - - ;x3 = dct_const_round_shift(s3); - vqrshrn.s32 d24, q13, #14 ; >> 14 - vqrshrn.s32 d25, q1, #14 ; >> 14 - - ; cospi_16_64 * x6 - vmull.s16 q13, d10, d30 - vmull.s16 q1, d11, d30 - - ; cospi_6_64 * x6 - vmull.s16 q11, d10, d30 - vmull.s16 q0, d11, d30 - - ; cospi_16_64 * x6 + cospi_16_64 * x7; - vmlal.s16 q13, d14, d30 - vmlal.s16 q1, d15, d30 - - ; cospi_16_64 * x6 - cospi_16_64 * x7; - vmlsl.s16 q11, d14, d30 - vmlsl.s16 q0, d15, d30 - - ; x6 = dct_const_round_shift(s6); - vqrshrn.s32 d20, q13, #14 ; >> 14 - vqrshrn.s32 d21, q1, #14 ; >> 14 - - ;x7 = dct_const_round_shift(s7); - vqrshrn.s32 d12, q11, #14 ; >> 14 - vqrshrn.s32 d13, q0, #14 ; >> 14 - - vdup.16 q5, r10 ; duplicate 0 - - vsub.s16 q9, q5, q9 ; output[1] = -x4; - vsub.s16 q11, q5, q2 ; output[3] = -x2; - vsub.s16 q13, q5, q6 ; output[5] = -x7; - vsub.s16 q15, q5, q4 ; output[7] = -x1; - MEND - - - AREA Block, CODE, READONLY ; name this block of code -;void vp9_iht8x8_64_add_neon(int16_t *input, uint8_t *dest, -; int dest_stride, int tx_type) -; -; r0 int16_t input -; r1 uint8_t *dest -; r2 int dest_stride -; r3 int tx_type) -; This function will only handle tx_type of 1,2,3. -|vp9_iht8x8_64_add_neon| PROC - - ; load the inputs into d16-d19 - vld1.s16 {q8,q9}, [r0]! - vld1.s16 {q10,q11}, [r0]! - vld1.s16 {q12,q13}, [r0]! - vld1.s16 {q14,q15}, [r0]! - - push {r0-r10} - vpush {d8-d15} - - ; transpose the input data - TRANSPOSE8X8 - - ; decide the type of transform - cmp r3, #2 - beq idct_iadst - cmp r3, #3 - beq iadst_iadst - -iadst_idct - ; generate IDCT constants - GENERATE_IDCT_CONSTANTS - - ; first transform rows - IDCT8x8_1D - - ; transpose the matrix - TRANSPOSE8X8 - - ; generate IADST constants - GENERATE_IADST_CONSTANTS - - ; then transform columns - IADST8X8_1D - - b end_vp9_iht8x8_64_add_neon - -idct_iadst - ; generate IADST constants - GENERATE_IADST_CONSTANTS - - ; first transform rows - IADST8X8_1D - - ; transpose the matrix - TRANSPOSE8X8 - - ; generate IDCT constants - GENERATE_IDCT_CONSTANTS - - ; then transform columns - IDCT8x8_1D - - b end_vp9_iht8x8_64_add_neon - -iadst_iadst - ; generate IADST constants - GENERATE_IADST_CONSTANTS - - ; first transform rows - IADST8X8_1D - - ; transpose the matrix - TRANSPOSE8X8 - - ; then transform columns - IADST8X8_1D - -end_vp9_iht8x8_64_add_neon - vpop {d8-d15} - pop {r0-r10} - - ; ROUND_POWER_OF_TWO(temp_out[j], 5) - vrshr.s16 q8, q8, #5 - vrshr.s16 q9, q9, #5 - vrshr.s16 q10, q10, #5 - vrshr.s16 q11, q11, #5 - vrshr.s16 q12, q12, #5 - vrshr.s16 q13, q13, #5 - vrshr.s16 q14, q14, #5 - vrshr.s16 q15, q15, #5 - - ; save dest pointer - mov r0, r1 - - ; load destination data - vld1.64 {d0}, [r1], r2 - vld1.64 {d1}, [r1], r2 - vld1.64 {d2}, [r1], r2 - vld1.64 {d3}, [r1], r2 - vld1.64 {d4}, [r1], r2 - vld1.64 {d5}, [r1], r2 - vld1.64 {d6}, [r1], r2 - vld1.64 {d7}, [r1] - - ; ROUND_POWER_OF_TWO(temp_out[j], 5) + dest[j * dest_stride + i] - vaddw.u8 q8, q8, d0 - vaddw.u8 q9, q9, d1 - vaddw.u8 q10, q10, d2 - vaddw.u8 q11, q11, d3 - vaddw.u8 q12, q12, d4 - vaddw.u8 q13, q13, d5 - vaddw.u8 q14, q14, d6 - vaddw.u8 q15, q15, d7 - - ; clip_pixel - vqmovun.s16 d0, q8 - vqmovun.s16 d1, q9 - vqmovun.s16 d2, q10 - vqmovun.s16 d3, q11 - vqmovun.s16 d4, q12 - vqmovun.s16 d5, q13 - vqmovun.s16 d6, q14 - vqmovun.s16 d7, q15 - - ; store the data - vst1.64 {d0}, [r0], r2 - vst1.64 {d1}, [r0], r2 - vst1.64 {d2}, [r0], r2 - vst1.64 {d3}, [r0], r2 - vst1.64 {d4}, [r0], r2 - vst1.64 {d5}, [r0], r2 - vst1.64 {d6}, [r0], r2 - vst1.64 {d7}, [r0], r2 - bx lr - ENDP ; |vp9_iht8x8_64_add_neon| - - END diff --git a/media/libvpx/vp9/common/arm/neon/vp9_loopfilter_16_neon.asm b/media/libvpx/vp9/common/arm/neon/vp9_loopfilter_16_neon.asm deleted file mode 100644 index 5b8ec20287d..00000000000 --- a/media/libvpx/vp9/common/arm/neon/vp9_loopfilter_16_neon.asm +++ /dev/null @@ -1,199 +0,0 @@ -; -; Copyright (c) 2013 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - EXPORT |vp9_lpf_horizontal_4_dual_neon| - ARM - - AREA ||.text||, CODE, READONLY, ALIGN=2 - -;void vp9_lpf_horizontal_4_dual_neon(uint8_t *s, int p, -; const uint8_t *blimit0, -; const uint8_t *limit0, -; const uint8_t *thresh0, -; const uint8_t *blimit1, -; const uint8_t *limit1, -; const uint8_t *thresh1) -; r0 uint8_t *s, -; r1 int p, -; r2 const uint8_t *blimit0, -; r3 const uint8_t *limit0, -; sp const uint8_t *thresh0, -; sp+4 const uint8_t *blimit1, -; sp+8 const uint8_t *limit1, -; sp+12 const uint8_t *thresh1, - -|vp9_lpf_horizontal_4_dual_neon| PROC - push {lr} - - ldr r12, [sp, #4] ; load thresh0 - vld1.8 {d0}, [r2] ; load blimit0 to first half q - vld1.8 {d2}, [r3] ; load limit0 to first half q - - add r1, r1, r1 ; double pitch - ldr r2, [sp, #8] ; load blimit1 - - vld1.8 {d4}, [r12] ; load thresh0 to first half q - - ldr r3, [sp, #12] ; load limit1 - ldr r12, [sp, #16] ; load thresh1 - vld1.8 {d1}, [r2] ; load blimit1 to 2nd half q - - sub r2, r0, r1, lsl #1 ; s[-4 * p] - - vld1.8 {d3}, [r3] ; load limit1 to 2nd half q - vld1.8 {d5}, [r12] ; load thresh1 to 2nd half q - - vpush {d8-d15} ; save neon registers - - add r3, r2, r1, lsr #1 ; s[-3 * p] - - vld1.u8 {q3}, [r2@64], r1 ; p3 - vld1.u8 {q4}, [r3@64], r1 ; p2 - vld1.u8 {q5}, [r2@64], r1 ; p1 - vld1.u8 {q6}, [r3@64], r1 ; p0 - vld1.u8 {q7}, [r2@64], r1 ; q0 - vld1.u8 {q8}, [r3@64], r1 ; q1 - vld1.u8 {q9}, [r2@64] ; q2 - vld1.u8 {q10}, [r3@64] ; q3 - - sub r2, r2, r1, lsl #1 - sub r3, r3, r1, lsl #1 - - bl vp9_loop_filter_neon_16 - - vst1.u8 {q5}, [r2@64], r1 ; store op1 - vst1.u8 {q6}, [r3@64], r1 ; store op0 - vst1.u8 {q7}, [r2@64], r1 ; store oq0 - vst1.u8 {q8}, [r3@64], r1 ; store oq1 - - vpop {d8-d15} ; restore neon registers - - pop {pc} - ENDP ; |vp9_lpf_horizontal_4_dual_neon| - -; void vp9_loop_filter_neon_16(); -; This is a helper function for the loopfilters. The invidual functions do the -; necessary load, transpose (if necessary) and store. This function uses -; registers d8-d15, so the calling function must save those registers. -; -; r0-r3, r12 PRESERVE -; q0 blimit -; q1 limit -; q2 thresh -; q3 p3 -; q4 p2 -; q5 p1 -; q6 p0 -; q7 q0 -; q8 q1 -; q9 q2 -; q10 q3 -; -; Outputs: -; q5 op1 -; q6 op0 -; q7 oq0 -; q8 oq1 -|vp9_loop_filter_neon_16| PROC - - ; filter_mask - vabd.u8 q11, q3, q4 ; m1 = abs(p3 - p2) - vabd.u8 q12, q4, q5 ; m2 = abs(p2 - p1) - vabd.u8 q13, q5, q6 ; m3 = abs(p1 - p0) - vabd.u8 q14, q8, q7 ; m4 = abs(q1 - q0) - vabd.u8 q3, q9, q8 ; m5 = abs(q2 - q1) - vabd.u8 q4, q10, q9 ; m6 = abs(q3 - q2) - - ; only compare the largest value to limit - vmax.u8 q11, q11, q12 ; m7 = max(m1, m2) - vmax.u8 q12, q13, q14 ; m8 = max(m3, m4) - - vabd.u8 q9, q6, q7 ; abs(p0 - q0) - - vmax.u8 q3, q3, q4 ; m9 = max(m5, m6) - - vmov.u8 q10, #0x80 - - vmax.u8 q15, q11, q12 ; m10 = max(m7, m8) - - vcgt.u8 q13, q13, q2 ; (abs(p1 - p0) > thresh)*-1 - vcgt.u8 q14, q14, q2 ; (abs(q1 - q0) > thresh)*-1 - vmax.u8 q15, q15, q3 ; m11 = max(m10, m9) - - vabd.u8 q2, q5, q8 ; a = abs(p1 - q1) - vqadd.u8 q9, q9, q9 ; b = abs(p0 - q0) * 2 - - veor q7, q7, q10 ; qs0 - - vcge.u8 q15, q1, q15 ; abs(m11) > limit - - vshr.u8 q2, q2, #1 ; a = a / 2 - veor q6, q6, q10 ; ps0 - - veor q5, q5, q10 ; ps1 - vqadd.u8 q9, q9, q2 ; a = b + a - - veor q8, q8, q10 ; qs1 - - vmov.u16 q4, #3 - - vsubl.s8 q2, d14, d12 ; ( qs0 - ps0) - vsubl.s8 q11, d15, d13 - - vcge.u8 q9, q0, q9 ; a > blimit - - vqsub.s8 q1, q5, q8 ; filter = clamp(ps1-qs1) - vorr q14, q13, q14 ; hev - - vmul.i16 q2, q2, q4 ; 3 * ( qs0 - ps0) - vmul.i16 q11, q11, q4 - - vand q1, q1, q14 ; filter &= hev - vand q15, q15, q9 ; mask - - vmov.u8 q4, #3 - - vaddw.s8 q2, q2, d2 ; filter + 3 * (qs0 - ps0) - vaddw.s8 q11, q11, d3 - - vmov.u8 q9, #4 - - ; filter = clamp(filter + 3 * ( qs0 - ps0)) - vqmovn.s16 d2, q2 - vqmovn.s16 d3, q11 - vand q1, q1, q15 ; filter &= mask - - vqadd.s8 q2, q1, q4 ; filter2 = clamp(filter+3) - vqadd.s8 q1, q1, q9 ; filter1 = clamp(filter+4) - vshr.s8 q2, q2, #3 ; filter2 >>= 3 - vshr.s8 q1, q1, #3 ; filter1 >>= 3 - - - vqadd.s8 q11, q6, q2 ; u = clamp(ps0 + filter2) - vqsub.s8 q0, q7, q1 ; u = clamp(qs0 - filter1) - - ; outer tap adjustments - vrshr.s8 q1, q1, #1 ; filter = ++filter1 >> 1 - - veor q7, q0, q10 ; *oq0 = u^0x80 - - vbic q1, q1, q14 ; filter &= ~hev - - vqadd.s8 q13, q5, q1 ; u = clamp(ps1 + filter) - vqsub.s8 q12, q8, q1 ; u = clamp(qs1 - filter) - - veor q6, q11, q10 ; *op0 = u^0x80 - veor q5, q13, q10 ; *op1 = u^0x80 - veor q8, q12, q10 ; *oq1 = u^0x80 - - bx lr - ENDP ; |vp9_loop_filter_neon_16| - - END diff --git a/media/libvpx/vp9/common/arm/neon/vp9_loopfilter_16_neon.c b/media/libvpx/vp9/common/arm/neon/vp9_loopfilter_16_neon.c deleted file mode 100644 index bc6a17cd16f..00000000000 --- a/media/libvpx/vp9/common/arm/neon/vp9_loopfilter_16_neon.c +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2013 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "./vp9_rtcd.h" -#include "vpx/vpx_integer.h" - -void vp9_lpf_horizontal_8_dual_neon(uint8_t *s, int p /* pitch */, - const uint8_t *blimit0, - const uint8_t *limit0, - const uint8_t *thresh0, - const uint8_t *blimit1, - const uint8_t *limit1, - const uint8_t *thresh1) { - vp9_lpf_horizontal_8(s, p, blimit0, limit0, thresh0, 1); - vp9_lpf_horizontal_8(s + 8, p, blimit1, limit1, thresh1, 1); -} - -void vp9_lpf_vertical_4_dual_neon(uint8_t *s, int p, - const uint8_t *blimit0, - const uint8_t *limit0, - const uint8_t *thresh0, - const uint8_t *blimit1, - const uint8_t *limit1, - const uint8_t *thresh1) { - vp9_lpf_vertical_4_neon(s, p, blimit0, limit0, thresh0, 1); - vp9_lpf_vertical_4_neon(s + 8 * p, p, blimit1, limit1, thresh1, 1); -} - -void vp9_lpf_vertical_8_dual_neon(uint8_t *s, int p, - const uint8_t *blimit0, - const uint8_t *limit0, - const uint8_t *thresh0, - const uint8_t *blimit1, - const uint8_t *limit1, - const uint8_t *thresh1) { - vp9_lpf_vertical_8_neon(s, p, blimit0, limit0, thresh0, 1); - vp9_lpf_vertical_8_neon(s + 8 * p, p, blimit1, limit1, thresh1, 1); -} - -void vp9_lpf_vertical_16_dual_neon(uint8_t *s, int p, - const uint8_t *blimit, - const uint8_t *limit, - const uint8_t *thresh) { - vp9_lpf_vertical_16_neon(s, p, blimit, limit, thresh); - vp9_lpf_vertical_16_neon(s + 8 * p, p, blimit, limit, thresh); -} diff --git a/media/libvpx/vp9/common/arm/neon/vp9_reconintra_neon.asm b/media/libvpx/vp9/common/arm/neon/vp9_reconintra_neon.asm deleted file mode 100644 index dc9856fa887..00000000000 --- a/media/libvpx/vp9/common/arm/neon/vp9_reconintra_neon.asm +++ /dev/null @@ -1,634 +0,0 @@ -; -; Copyright (c) 2014 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - EXPORT |vp9_v_predictor_4x4_neon| - EXPORT |vp9_v_predictor_8x8_neon| - EXPORT |vp9_v_predictor_16x16_neon| - EXPORT |vp9_v_predictor_32x32_neon| - EXPORT |vp9_h_predictor_4x4_neon| - EXPORT |vp9_h_predictor_8x8_neon| - EXPORT |vp9_h_predictor_16x16_neon| - EXPORT |vp9_h_predictor_32x32_neon| - EXPORT |vp9_tm_predictor_4x4_neon| - EXPORT |vp9_tm_predictor_8x8_neon| - EXPORT |vp9_tm_predictor_16x16_neon| - EXPORT |vp9_tm_predictor_32x32_neon| - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 - -;void vp9_v_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, -; const uint8_t *above, -; const uint8_t *left) -; r0 uint8_t *dst -; r1 ptrdiff_t y_stride -; r2 const uint8_t *above -; r3 const uint8_t *left - -|vp9_v_predictor_4x4_neon| PROC - vld1.32 {d0[0]}, [r2] - vst1.32 {d0[0]}, [r0], r1 - vst1.32 {d0[0]}, [r0], r1 - vst1.32 {d0[0]}, [r0], r1 - vst1.32 {d0[0]}, [r0], r1 - bx lr - ENDP ; |vp9_v_predictor_4x4_neon| - -;void vp9_v_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, -; const uint8_t *above, -; const uint8_t *left) -; r0 uint8_t *dst -; r1 ptrdiff_t y_stride -; r2 const uint8_t *above -; r3 const uint8_t *left - -|vp9_v_predictor_8x8_neon| PROC - vld1.8 {d0}, [r2] - vst1.8 {d0}, [r0], r1 - vst1.8 {d0}, [r0], r1 - vst1.8 {d0}, [r0], r1 - vst1.8 {d0}, [r0], r1 - vst1.8 {d0}, [r0], r1 - vst1.8 {d0}, [r0], r1 - vst1.8 {d0}, [r0], r1 - vst1.8 {d0}, [r0], r1 - bx lr - ENDP ; |vp9_v_predictor_8x8_neon| - -;void vp9_v_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, -; const uint8_t *above, -; const uint8_t *left) -; r0 uint8_t *dst -; r1 ptrdiff_t y_stride -; r2 const uint8_t *above -; r3 const uint8_t *left - -|vp9_v_predictor_16x16_neon| PROC - vld1.8 {q0}, [r2] - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - bx lr - ENDP ; |vp9_v_predictor_16x16_neon| - -;void vp9_v_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, -; const uint8_t *above, -; const uint8_t *left) -; r0 uint8_t *dst -; r1 ptrdiff_t y_stride -; r2 const uint8_t *above -; r3 const uint8_t *left - -|vp9_v_predictor_32x32_neon| PROC - vld1.8 {q0, q1}, [r2] - mov r2, #2 -loop_v - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - subs r2, r2, #1 - bgt loop_v - bx lr - ENDP ; |vp9_v_predictor_32x32_neon| - -;void vp9_h_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, -; const uint8_t *above, -; const uint8_t *left) -; r0 uint8_t *dst -; r1 ptrdiff_t y_stride -; r2 const uint8_t *above -; r3 const uint8_t *left - -|vp9_h_predictor_4x4_neon| PROC - vld1.32 {d1[0]}, [r3] - vdup.8 d0, d1[0] - vst1.32 {d0[0]}, [r0], r1 - vdup.8 d0, d1[1] - vst1.32 {d0[0]}, [r0], r1 - vdup.8 d0, d1[2] - vst1.32 {d0[0]}, [r0], r1 - vdup.8 d0, d1[3] - vst1.32 {d0[0]}, [r0], r1 - bx lr - ENDP ; |vp9_h_predictor_4x4_neon| - -;void vp9_h_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, -; const uint8_t *above, -; const uint8_t *left) -; r0 uint8_t *dst -; r1 ptrdiff_t y_stride -; r2 const uint8_t *above -; r3 const uint8_t *left - -|vp9_h_predictor_8x8_neon| PROC - vld1.64 {d1}, [r3] - vdup.8 d0, d1[0] - vst1.64 {d0}, [r0], r1 - vdup.8 d0, d1[1] - vst1.64 {d0}, [r0], r1 - vdup.8 d0, d1[2] - vst1.64 {d0}, [r0], r1 - vdup.8 d0, d1[3] - vst1.64 {d0}, [r0], r1 - vdup.8 d0, d1[4] - vst1.64 {d0}, [r0], r1 - vdup.8 d0, d1[5] - vst1.64 {d0}, [r0], r1 - vdup.8 d0, d1[6] - vst1.64 {d0}, [r0], r1 - vdup.8 d0, d1[7] - vst1.64 {d0}, [r0], r1 - bx lr - ENDP ; |vp9_h_predictor_8x8_neon| - -;void vp9_h_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, -; const uint8_t *above, -; const uint8_t *left) -; r0 uint8_t *dst -; r1 ptrdiff_t y_stride -; r2 const uint8_t *above -; r3 const uint8_t *left - -|vp9_h_predictor_16x16_neon| PROC - vld1.8 {q1}, [r3] - vdup.8 q0, d2[0] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[1] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[2] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[3] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[4] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[5] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[6] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[7] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[0] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[1] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[2] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[3] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[4] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[5] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[6] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[7] - vst1.8 {q0}, [r0], r1 - bx lr - ENDP ; |vp9_h_predictor_16x16_neon| - -;void vp9_h_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, -; const uint8_t *above, -; const uint8_t *left) -; r0 uint8_t *dst -; r1 ptrdiff_t y_stride -; r2 const uint8_t *above -; r3 const uint8_t *left - -|vp9_h_predictor_32x32_neon| PROC - sub r1, r1, #16 - mov r2, #2 -loop_h - vld1.8 {q1}, [r3]! - vdup.8 q0, d2[0] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[1] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[2] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[3] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[4] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[5] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[6] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[7] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[0] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[1] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[2] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[3] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[4] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[5] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[6] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[7] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - subs r2, r2, #1 - bgt loop_h - bx lr - ENDP ; |vp9_h_predictor_32x32_neon| - -;void vp9_tm_predictor_4x4_neon (uint8_t *dst, ptrdiff_t y_stride, -; const uint8_t *above, -; const uint8_t *left) -; r0 uint8_t *dst -; r1 ptrdiff_t y_stride -; r2 const uint8_t *above -; r3 const uint8_t *left - -|vp9_tm_predictor_4x4_neon| PROC - ; Load ytop_left = above[-1]; - sub r12, r2, #1 - ldrb r12, [r12] - vdup.u8 d0, r12 - - ; Load above 4 pixels - vld1.32 {d2[0]}, [r2] - - ; Compute above - ytop_left - vsubl.u8 q3, d2, d0 - - ; Load left row by row and compute left + (above - ytop_left) - ; 1st row and 2nd row - ldrb r12, [r3], #1 - ldrb r2, [r3], #1 - vdup.u16 q1, r12 - vdup.u16 q2, r2 - vadd.s16 q1, q1, q3 - vadd.s16 q2, q2, q3 - vqmovun.s16 d0, q1 - vqmovun.s16 d1, q2 - vst1.32 {d0[0]}, [r0], r1 - vst1.32 {d1[0]}, [r0], r1 - - ; 3rd row and 4th row - ldrb r12, [r3], #1 - ldrb r2, [r3], #1 - vdup.u16 q1, r12 - vdup.u16 q2, r2 - vadd.s16 q1, q1, q3 - vadd.s16 q2, q2, q3 - vqmovun.s16 d0, q1 - vqmovun.s16 d1, q2 - vst1.32 {d0[0]}, [r0], r1 - vst1.32 {d1[0]}, [r0], r1 - bx lr - ENDP ; |vp9_tm_predictor_4x4_neon| - -;void vp9_tm_predictor_8x8_neon (uint8_t *dst, ptrdiff_t y_stride, -; const uint8_t *above, -; const uint8_t *left) -; r0 uint8_t *dst -; r1 ptrdiff_t y_stride -; r2 const uint8_t *above -; r3 const uint8_t *left - -|vp9_tm_predictor_8x8_neon| PROC - ; Load ytop_left = above[-1]; - sub r12, r2, #1 - ldrb r12, [r12] - vdup.u8 d0, r12 - - ; preload 8 left - vld1.8 {d30}, [r3] - - ; Load above 8 pixels - vld1.64 {d2}, [r2] - - vmovl.u8 q10, d30 - - ; Compute above - ytop_left - vsubl.u8 q3, d2, d0 - - ; Load left row by row and compute left + (above - ytop_left) - ; 1st row and 2nd row - vdup.16 q0, d20[0] - vdup.16 q1, d20[1] - vadd.s16 q0, q3, q0 - vadd.s16 q1, q3, q1 - - ; 3rd row and 4th row - vdup.16 q8, d20[2] - vdup.16 q9, d20[3] - vadd.s16 q8, q3, q8 - vadd.s16 q9, q3, q9 - - vqmovun.s16 d0, q0 - vqmovun.s16 d1, q1 - vqmovun.s16 d2, q8 - vqmovun.s16 d3, q9 - - vst1.64 {d0}, [r0], r1 - vst1.64 {d1}, [r0], r1 - vst1.64 {d2}, [r0], r1 - vst1.64 {d3}, [r0], r1 - - ; 5th row and 6th row - vdup.16 q0, d21[0] - vdup.16 q1, d21[1] - vadd.s16 q0, q3, q0 - vadd.s16 q1, q3, q1 - - ; 7th row and 8th row - vdup.16 q8, d21[2] - vdup.16 q9, d21[3] - vadd.s16 q8, q3, q8 - vadd.s16 q9, q3, q9 - - vqmovun.s16 d0, q0 - vqmovun.s16 d1, q1 - vqmovun.s16 d2, q8 - vqmovun.s16 d3, q9 - - vst1.64 {d0}, [r0], r1 - vst1.64 {d1}, [r0], r1 - vst1.64 {d2}, [r0], r1 - vst1.64 {d3}, [r0], r1 - - bx lr - ENDP ; |vp9_tm_predictor_8x8_neon| - -;void vp9_tm_predictor_16x16_neon (uint8_t *dst, ptrdiff_t y_stride, -; const uint8_t *above, -; const uint8_t *left) -; r0 uint8_t *dst -; r1 ptrdiff_t y_stride -; r2 const uint8_t *above -; r3 const uint8_t *left - -|vp9_tm_predictor_16x16_neon| PROC - ; Load ytop_left = above[-1]; - sub r12, r2, #1 - ldrb r12, [r12] - vdup.u8 q0, r12 - - ; Load above 8 pixels - vld1.8 {q1}, [r2] - - ; preload 8 left into r12 - vld1.8 {d18}, [r3]! - - ; Compute above - ytop_left - vsubl.u8 q2, d2, d0 - vsubl.u8 q3, d3, d1 - - vmovl.u8 q10, d18 - - ; Load left row by row and compute left + (above - ytop_left) - ; Process 8 rows in each single loop and loop 2 times to process 16 rows. - mov r2, #2 - -loop_16x16_neon - ; Process two rows. - vdup.16 q0, d20[0] - vdup.16 q8, d20[1] - vadd.s16 q1, q0, q2 - vadd.s16 q0, q0, q3 - vadd.s16 q11, q8, q2 - vadd.s16 q8, q8, q3 - vqmovun.s16 d2, q1 - vqmovun.s16 d3, q0 - vqmovun.s16 d22, q11 - vqmovun.s16 d23, q8 - vdup.16 q0, d20[2] ; proload next 2 rows data - vdup.16 q8, d20[3] - vst1.64 {d2,d3}, [r0], r1 - vst1.64 {d22,d23}, [r0], r1 - - ; Process two rows. - vadd.s16 q1, q0, q2 - vadd.s16 q0, q0, q3 - vadd.s16 q11, q8, q2 - vadd.s16 q8, q8, q3 - vqmovun.s16 d2, q1 - vqmovun.s16 d3, q0 - vqmovun.s16 d22, q11 - vqmovun.s16 d23, q8 - vdup.16 q0, d21[0] ; proload next 2 rows data - vdup.16 q8, d21[1] - vst1.64 {d2,d3}, [r0], r1 - vst1.64 {d22,d23}, [r0], r1 - - vadd.s16 q1, q0, q2 - vadd.s16 q0, q0, q3 - vadd.s16 q11, q8, q2 - vadd.s16 q8, q8, q3 - vqmovun.s16 d2, q1 - vqmovun.s16 d3, q0 - vqmovun.s16 d22, q11 - vqmovun.s16 d23, q8 - vdup.16 q0, d21[2] ; proload next 2 rows data - vdup.16 q8, d21[3] - vst1.64 {d2,d3}, [r0], r1 - vst1.64 {d22,d23}, [r0], r1 - - - vadd.s16 q1, q0, q2 - vadd.s16 q0, q0, q3 - vadd.s16 q11, q8, q2 - vadd.s16 q8, q8, q3 - vqmovun.s16 d2, q1 - vqmovun.s16 d3, q0 - vqmovun.s16 d22, q11 - vqmovun.s16 d23, q8 - vld1.8 {d18}, [r3]! ; preload 8 left into r12 - vmovl.u8 q10, d18 - vst1.64 {d2,d3}, [r0], r1 - vst1.64 {d22,d23}, [r0], r1 - - subs r2, r2, #1 - bgt loop_16x16_neon - - bx lr - ENDP ; |vp9_tm_predictor_16x16_neon| - -;void vp9_tm_predictor_32x32_neon (uint8_t *dst, ptrdiff_t y_stride, -; const uint8_t *above, -; const uint8_t *left) -; r0 uint8_t *dst -; r1 ptrdiff_t y_stride -; r2 const uint8_t *above -; r3 const uint8_t *left - -|vp9_tm_predictor_32x32_neon| PROC - ; Load ytop_left = above[-1]; - sub r12, r2, #1 - ldrb r12, [r12] - vdup.u8 q0, r12 - - ; Load above 32 pixels - vld1.8 {q1}, [r2]! - vld1.8 {q2}, [r2] - - ; preload 8 left pixels - vld1.8 {d26}, [r3]! - - ; Compute above - ytop_left - vsubl.u8 q8, d2, d0 - vsubl.u8 q9, d3, d1 - vsubl.u8 q10, d4, d0 - vsubl.u8 q11, d5, d1 - - vmovl.u8 q3, d26 - - ; Load left row by row and compute left + (above - ytop_left) - ; Process 8 rows in each single loop and loop 4 times to process 32 rows. - mov r2, #4 - -loop_32x32_neon - ; Process two rows. - vdup.16 q0, d6[0] - vdup.16 q2, d6[1] - vadd.s16 q12, q0, q8 - vadd.s16 q13, q0, q9 - vadd.s16 q14, q0, q10 - vadd.s16 q15, q0, q11 - vqmovun.s16 d0, q12 - vqmovun.s16 d1, q13 - vadd.s16 q12, q2, q8 - vadd.s16 q13, q2, q9 - vqmovun.s16 d2, q14 - vqmovun.s16 d3, q15 - vadd.s16 q14, q2, q10 - vadd.s16 q15, q2, q11 - vst1.64 {d0-d3}, [r0], r1 - vqmovun.s16 d24, q12 - vqmovun.s16 d25, q13 - vqmovun.s16 d26, q14 - vqmovun.s16 d27, q15 - vdup.16 q1, d6[2] - vdup.16 q2, d6[3] - vst1.64 {d24-d27}, [r0], r1 - - ; Process two rows. - vadd.s16 q12, q1, q8 - vadd.s16 q13, q1, q9 - vadd.s16 q14, q1, q10 - vadd.s16 q15, q1, q11 - vqmovun.s16 d0, q12 - vqmovun.s16 d1, q13 - vadd.s16 q12, q2, q8 - vadd.s16 q13, q2, q9 - vqmovun.s16 d2, q14 - vqmovun.s16 d3, q15 - vadd.s16 q14, q2, q10 - vadd.s16 q15, q2, q11 - vst1.64 {d0-d3}, [r0], r1 - vqmovun.s16 d24, q12 - vqmovun.s16 d25, q13 - vqmovun.s16 d26, q14 - vqmovun.s16 d27, q15 - vdup.16 q0, d7[0] - vdup.16 q2, d7[1] - vst1.64 {d24-d27}, [r0], r1 - - ; Process two rows. - vadd.s16 q12, q0, q8 - vadd.s16 q13, q0, q9 - vadd.s16 q14, q0, q10 - vadd.s16 q15, q0, q11 - vqmovun.s16 d0, q12 - vqmovun.s16 d1, q13 - vadd.s16 q12, q2, q8 - vadd.s16 q13, q2, q9 - vqmovun.s16 d2, q14 - vqmovun.s16 d3, q15 - vadd.s16 q14, q2, q10 - vadd.s16 q15, q2, q11 - vst1.64 {d0-d3}, [r0], r1 - vqmovun.s16 d24, q12 - vqmovun.s16 d25, q13 - vqmovun.s16 d26, q14 - vqmovun.s16 d27, q15 - vdup.16 q0, d7[2] - vdup.16 q2, d7[3] - vst1.64 {d24-d27}, [r0], r1 - - ; Process two rows. - vadd.s16 q12, q0, q8 - vadd.s16 q13, q0, q9 - vadd.s16 q14, q0, q10 - vadd.s16 q15, q0, q11 - vqmovun.s16 d0, q12 - vqmovun.s16 d1, q13 - vadd.s16 q12, q2, q8 - vadd.s16 q13, q2, q9 - vqmovun.s16 d2, q14 - vqmovun.s16 d3, q15 - vadd.s16 q14, q2, q10 - vadd.s16 q15, q2, q11 - vst1.64 {d0-d3}, [r0], r1 - vqmovun.s16 d24, q12 - vqmovun.s16 d25, q13 - vld1.8 {d0}, [r3]! ; preload 8 left pixels - vqmovun.s16 d26, q14 - vqmovun.s16 d27, q15 - vmovl.u8 q3, d0 - vst1.64 {d24-d27}, [r0], r1 - - subs r2, r2, #1 - bgt loop_32x32_neon - - bx lr - ENDP ; |vp9_tm_predictor_32x32_neon| - - END diff --git a/media/libvpx/vp9/common/vp9_blockd.c b/media/libvpx/vp9/common/vp9_blockd.c deleted file mode 100644 index e13445fd1c3..00000000000 --- a/media/libvpx/vp9/common/vp9_blockd.c +++ /dev/null @@ -1,149 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "vp9/common/vp9_blockd.h" - -PREDICTION_MODE vp9_left_block_mode(const MODE_INFO *cur_mi, - const MODE_INFO *left_mi, int b) { - if (b == 0 || b == 2) { - if (!left_mi || is_inter_block(&left_mi->mbmi)) - return DC_PRED; - - return get_y_mode(left_mi, b + 1); - } else { - assert(b == 1 || b == 3); - return cur_mi->bmi[b - 1].as_mode; - } -} - -PREDICTION_MODE vp9_above_block_mode(const MODE_INFO *cur_mi, - const MODE_INFO *above_mi, int b) { - if (b == 0 || b == 1) { - if (!above_mi || is_inter_block(&above_mi->mbmi)) - return DC_PRED; - - return get_y_mode(above_mi, b + 2); - } else { - assert(b == 2 || b == 3); - return cur_mi->bmi[b - 2].as_mode; - } -} - -void vp9_foreach_transformed_block_in_plane( - const MACROBLOCKD *const xd, BLOCK_SIZE bsize, int plane, - foreach_transformed_block_visitor visit, void *arg) { - const struct macroblockd_plane *const pd = &xd->plane[plane]; - const MB_MODE_INFO* mbmi = &xd->mi[0].src_mi->mbmi; - // block and transform sizes, in number of 4x4 blocks log 2 ("*_b") - // 4x4=0, 8x8=2, 16x16=4, 32x32=6, 64x64=8 - // transform size varies per plane, look it up in a common way. - const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi, pd) - : mbmi->tx_size; - const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd); - const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize]; - const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; - const int step = 1 << (tx_size << 1); - int i; - - // If mb_to_right_edge is < 0 we are in a situation in which - // the current block size extends into the UMV and we won't - // visit the sub blocks that are wholly within the UMV. - if (xd->mb_to_right_edge < 0 || xd->mb_to_bottom_edge < 0) { - int r, c; - - int max_blocks_wide = num_4x4_w; - int max_blocks_high = num_4x4_h; - - // xd->mb_to_right_edge is in units of pixels * 8. This converts - // it to 4x4 block sizes. - if (xd->mb_to_right_edge < 0) - max_blocks_wide += (xd->mb_to_right_edge >> (5 + pd->subsampling_x)); - - if (xd->mb_to_bottom_edge < 0) - max_blocks_high += (xd->mb_to_bottom_edge >> (5 + pd->subsampling_y)); - - i = 0; - // Unlike the normal case - in here we have to keep track of the - // row and column of the blocks we use so that we know if we are in - // the unrestricted motion border. - for (r = 0; r < num_4x4_h; r += (1 << tx_size)) { - for (c = 0; c < num_4x4_w; c += (1 << tx_size)) { - if (r < max_blocks_high && c < max_blocks_wide) - visit(plane, i, plane_bsize, tx_size, arg); - i += step; - } - } - } else { - for (i = 0; i < num_4x4_w * num_4x4_h; i += step) - visit(plane, i, plane_bsize, tx_size, arg); - } -} - -void vp9_foreach_transformed_block(const MACROBLOCKD* const xd, - BLOCK_SIZE bsize, - foreach_transformed_block_visitor visit, - void *arg) { - int plane; - - for (plane = 0; plane < MAX_MB_PLANE; plane++) - vp9_foreach_transformed_block_in_plane(xd, bsize, plane, visit, arg); -} - -void vp9_set_contexts(const MACROBLOCKD *xd, struct macroblockd_plane *pd, - BLOCK_SIZE plane_bsize, TX_SIZE tx_size, int has_eob, - int aoff, int loff) { - ENTROPY_CONTEXT *const a = pd->above_context + aoff; - ENTROPY_CONTEXT *const l = pd->left_context + loff; - const int tx_size_in_blocks = 1 << tx_size; - - // above - if (has_eob && xd->mb_to_right_edge < 0) { - int i; - const int blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize] + - (xd->mb_to_right_edge >> (5 + pd->subsampling_x)); - int above_contexts = tx_size_in_blocks; - if (above_contexts + aoff > blocks_wide) - above_contexts = blocks_wide - aoff; - - for (i = 0; i < above_contexts; ++i) - a[i] = has_eob; - for (i = above_contexts; i < tx_size_in_blocks; ++i) - a[i] = 0; - } else { - vpx_memset(a, has_eob, sizeof(ENTROPY_CONTEXT) * tx_size_in_blocks); - } - - // left - if (has_eob && xd->mb_to_bottom_edge < 0) { - int i; - const int blocks_high = num_4x4_blocks_high_lookup[plane_bsize] + - (xd->mb_to_bottom_edge >> (5 + pd->subsampling_y)); - int left_contexts = tx_size_in_blocks; - if (left_contexts + loff > blocks_high) - left_contexts = blocks_high - loff; - - for (i = 0; i < left_contexts; ++i) - l[i] = has_eob; - for (i = left_contexts; i < tx_size_in_blocks; ++i) - l[i] = 0; - } else { - vpx_memset(l, has_eob, sizeof(ENTROPY_CONTEXT) * tx_size_in_blocks); - } -} - -void vp9_setup_block_planes(MACROBLOCKD *xd, int ss_x, int ss_y) { - int i; - - for (i = 0; i < MAX_MB_PLANE; i++) { - xd->plane[i].plane_type = i ? PLANE_TYPE_UV : PLANE_TYPE_Y; - xd->plane[i].subsampling_x = i ? ss_x : 0; - xd->plane[i].subsampling_y = i ? ss_y : 0; - } -} diff --git a/media/libvpx/vp9/common/vp9_frame_buffers.c b/media/libvpx/vp9/common/vp9_frame_buffers.c deleted file mode 100644 index 34795b74ec2..00000000000 --- a/media/libvpx/vp9/common/vp9_frame_buffers.c +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include - -#include "vp9/common/vp9_frame_buffers.h" -#include "vpx_mem/vpx_mem.h" - -int vp9_alloc_internal_frame_buffers(InternalFrameBufferList *list) { - assert(list != NULL); - vp9_free_internal_frame_buffers(list); - - list->num_internal_frame_buffers = - VP9_MAXIMUM_REF_BUFFERS + VPX_MAXIMUM_WORK_BUFFERS; - list->int_fb = - (InternalFrameBuffer *)vpx_calloc(list->num_internal_frame_buffers, - sizeof(*list->int_fb)); - return (list->int_fb == NULL); -} - -void vp9_free_internal_frame_buffers(InternalFrameBufferList *list) { - int i; - - assert(list != NULL); - - for (i = 0; i < list->num_internal_frame_buffers; ++i) { - vpx_free(list->int_fb[i].data); - list->int_fb[i].data = NULL; - } - vpx_free(list->int_fb); - list->int_fb = NULL; -} - -int vp9_get_frame_buffer(void *cb_priv, size_t min_size, - vpx_codec_frame_buffer_t *fb) { - int i; - InternalFrameBufferList *const int_fb_list = - (InternalFrameBufferList *)cb_priv; - if (int_fb_list == NULL) - return -1; - - // Find a free frame buffer. - for (i = 0; i < int_fb_list->num_internal_frame_buffers; ++i) { - if (!int_fb_list->int_fb[i].in_use) - break; - } - - if (i == int_fb_list->num_internal_frame_buffers) - return -1; - - if (int_fb_list->int_fb[i].size < min_size) { - int_fb_list->int_fb[i].data = - (uint8_t *)vpx_realloc(int_fb_list->int_fb[i].data, min_size); - if (!int_fb_list->int_fb[i].data) - return -1; - - // This memset is needed for fixing valgrind error from C loop filter - // due to access uninitialized memory in frame border. It could be - // removed if border is totally removed. - vpx_memset(int_fb_list->int_fb[i].data, 0, min_size); - int_fb_list->int_fb[i].size = min_size; - } - - fb->data = int_fb_list->int_fb[i].data; - fb->size = int_fb_list->int_fb[i].size; - int_fb_list->int_fb[i].in_use = 1; - - // Set the frame buffer's private data to point at the internal frame buffer. - fb->priv = &int_fb_list->int_fb[i]; - return 0; -} - -int vp9_release_frame_buffer(void *cb_priv, vpx_codec_frame_buffer_t *fb) { - InternalFrameBuffer *const int_fb = (InternalFrameBuffer *)fb->priv; - (void)cb_priv; - if (int_fb) - int_fb->in_use = 0; - return 0; -} diff --git a/media/libvpx/vp9/common/vp9_frame_buffers.h b/media/libvpx/vp9/common/vp9_frame_buffers.h deleted file mode 100644 index e2cfe61b662..00000000000 --- a/media/libvpx/vp9/common/vp9_frame_buffers.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_COMMON_VP9_FRAME_BUFFERS_H_ -#define VP9_COMMON_VP9_FRAME_BUFFERS_H_ - -#include "vpx/vpx_frame_buffer.h" -#include "vpx/vpx_integer.h" - -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct InternalFrameBuffer { - uint8_t *data; - size_t size; - int in_use; -} InternalFrameBuffer; - -typedef struct InternalFrameBufferList { - int num_internal_frame_buffers; - InternalFrameBuffer *int_fb; -} InternalFrameBufferList; - -// Initializes |list|. Returns 0 on success. -int vp9_alloc_internal_frame_buffers(InternalFrameBufferList *list); - -// Free any data allocated to the frame buffers. -void vp9_free_internal_frame_buffers(InternalFrameBufferList *list); - -// Callback used by libvpx to request an external frame buffer. |cb_priv| -// Callback private data, which points to an InternalFrameBufferList. -// |min_size| is the minimum size in bytes needed to decode the next frame. -// |fb| pointer to the frame buffer. -int vp9_get_frame_buffer(void *cb_priv, size_t min_size, - vpx_codec_frame_buffer_t *fb); - -// Callback used by libvpx when there are no references to the frame buffer. -// |cb_priv| is not used. |fb| pointer to the frame buffer. -int vp9_release_frame_buffer(void *cb_priv, vpx_codec_frame_buffer_t *fb); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_COMMON_VP9_FRAME_BUFFERS_H_ diff --git a/media/libvpx/vp9/common/vp9_prob.c b/media/libvpx/vp9/common/vp9_prob.c deleted file mode 100644 index a1befc63e88..00000000000 --- a/media/libvpx/vp9/common/vp9_prob.c +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright (c) 2013 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "vp9/common/vp9_prob.h" - -const uint8_t vp9_norm[256] = { - 0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -}; - - -static unsigned int tree_merge_probs_impl(unsigned int i, - const vp9_tree_index *tree, - const vp9_prob *pre_probs, - const unsigned int *counts, - unsigned int count_sat, - unsigned int max_update, - vp9_prob *probs) { - const int l = tree[i]; - const unsigned int left_count = (l <= 0) - ? counts[-l] - : tree_merge_probs_impl(l, tree, pre_probs, counts, - count_sat, max_update, probs); - const int r = tree[i + 1]; - const unsigned int right_count = (r <= 0) - ? counts[-r] - : tree_merge_probs_impl(r, tree, pre_probs, counts, - count_sat, max_update, probs); - const unsigned int ct[2] = { left_count, right_count }; - probs[i >> 1] = merge_probs(pre_probs[i >> 1], ct, - count_sat, max_update); - return left_count + right_count; -} - -void vp9_tree_merge_probs(const vp9_tree_index *tree, const vp9_prob *pre_probs, - const unsigned int *counts, unsigned int count_sat, - unsigned int max_update_factor, vp9_prob *probs) { - tree_merge_probs_impl(0, tree, pre_probs, counts, count_sat, - max_update_factor, probs); -} diff --git a/media/libvpx/vp9/common/vp9_prob.h b/media/libvpx/vp9/common/vp9_prob.h deleted file mode 100644 index fa0e36da472..00000000000 --- a/media/libvpx/vp9/common/vp9_prob.h +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Copyright (c) 2013 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_COMMON_VP9_PROB_H_ -#define VP9_COMMON_VP9_PROB_H_ - -#include "./vpx_config.h" - -#include "vpx_ports/mem.h" -#include "vpx/vpx_integer.h" - -#include "vp9/common/vp9_common.h" - -#ifdef __cplusplus -extern "C" { -#endif - -typedef uint8_t vp9_prob; - -#define MAX_PROB 255 - -#define vp9_prob_half ((vp9_prob) 128) - -typedef int8_t vp9_tree_index; - -#define TREE_SIZE(leaf_count) (2 * (leaf_count) - 2) - -#define vp9_complement(x) (255 - x) - -/* We build coding trees compactly in arrays. - Each node of the tree is a pair of vp9_tree_indices. - Array index often references a corresponding probability table. - Index <= 0 means done encoding/decoding and value = -Index, - Index > 0 means need another bit, specification at index. - Nonnegative indices are always even; processing begins at node 0. */ - -typedef const vp9_tree_index vp9_tree[]; - -static INLINE vp9_prob clip_prob(int p) { - return (p > 255) ? 255 : (p < 1) ? 1 : p; -} - -static INLINE vp9_prob get_prob(int num, int den) { - return (den == 0) ? 128u : clip_prob(((int64_t)num * 256 + (den >> 1)) / den); -} - -static INLINE vp9_prob get_binary_prob(int n0, int n1) { - return get_prob(n0, n0 + n1); -} - -/* This function assumes prob1 and prob2 are already within [1,255] range. */ -static INLINE vp9_prob weighted_prob(int prob1, int prob2, int factor) { - return ROUND_POWER_OF_TWO(prob1 * (256 - factor) + prob2 * factor, 8); -} - -static INLINE vp9_prob merge_probs(vp9_prob pre_prob, - const unsigned int ct[2], - unsigned int count_sat, - unsigned int max_update_factor) { - const vp9_prob prob = get_binary_prob(ct[0], ct[1]); - const unsigned int count = MIN(ct[0] + ct[1], count_sat); - const unsigned int factor = max_update_factor * count / count_sat; - return weighted_prob(pre_prob, prob, factor); -} - -void vp9_tree_merge_probs(const vp9_tree_index *tree, const vp9_prob *pre_probs, - const unsigned int *counts, unsigned int count_sat, - unsigned int max_update_factor, vp9_prob *probs); - - -DECLARE_ALIGNED(16, extern const uint8_t, vp9_norm[256]); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_COMMON_VP9_PROB_H_ diff --git a/media/libvpx/vp9/common/vp9_thread.c b/media/libvpx/vp9/common/vp9_thread.c deleted file mode 100644 index 1c6aec032fe..00000000000 --- a/media/libvpx/vp9/common/vp9_thread.c +++ /dev/null @@ -1,184 +0,0 @@ -// Copyright 2013 Google Inc. All Rights Reserved. -// -// Use of this source code is governed by a BSD-style license -// that can be found in the COPYING file in the root of the source -// tree. An additional intellectual property rights grant can be found -// in the file PATENTS. All contributing project authors may -// be found in the AUTHORS file in the root of the source tree. -// ----------------------------------------------------------------------------- -// -// Multi-threaded worker -// -// Original source: -// http://git.chromium.org/webm/libwebp.git -// 100644 blob 264210ba2807e4da47eb5d18c04cf869d89b9784 src/utils/thread.c - -#include -#include // for memset() -#include "./vp9_thread.h" -#include "vpx_mem/vpx_mem.h" - -#if CONFIG_MULTITHREAD - -struct VP9WorkerImpl { - pthread_mutex_t mutex_; - pthread_cond_t condition_; - pthread_t thread_; -}; - -//------------------------------------------------------------------------------ - -static void execute(VP9Worker *const worker); // Forward declaration. - -static THREADFN thread_loop(void *ptr) { - VP9Worker *const worker = (VP9Worker*)ptr; - int done = 0; - while (!done) { - pthread_mutex_lock(&worker->impl_->mutex_); - while (worker->status_ == OK) { // wait in idling mode - pthread_cond_wait(&worker->impl_->condition_, &worker->impl_->mutex_); - } - if (worker->status_ == WORK) { - execute(worker); - worker->status_ = OK; - } else if (worker->status_ == NOT_OK) { // finish the worker - done = 1; - } - // signal to the main thread that we're done (for sync()) - pthread_cond_signal(&worker->impl_->condition_); - pthread_mutex_unlock(&worker->impl_->mutex_); - } - return THREAD_RETURN(NULL); // Thread is finished -} - -// main thread state control -static void change_state(VP9Worker *const worker, - VP9WorkerStatus new_status) { - // No-op when attempting to change state on a thread that didn't come up. - // Checking status_ without acquiring the lock first would result in a data - // race. - if (worker->impl_ == NULL) return; - - pthread_mutex_lock(&worker->impl_->mutex_); - if (worker->status_ >= OK) { - // wait for the worker to finish - while (worker->status_ != OK) { - pthread_cond_wait(&worker->impl_->condition_, &worker->impl_->mutex_); - } - // assign new status and release the working thread if needed - if (new_status != OK) { - worker->status_ = new_status; - pthread_cond_signal(&worker->impl_->condition_); - } - } - pthread_mutex_unlock(&worker->impl_->mutex_); -} - -#endif // CONFIG_MULTITHREAD - -//------------------------------------------------------------------------------ - -static void init(VP9Worker *const worker) { - memset(worker, 0, sizeof(*worker)); - worker->status_ = NOT_OK; -} - -static int sync(VP9Worker *const worker) { -#if CONFIG_MULTITHREAD - change_state(worker, OK); -#endif - assert(worker->status_ <= OK); - return !worker->had_error; -} - -static int reset(VP9Worker *const worker) { - int ok = 1; - worker->had_error = 0; - if (worker->status_ < OK) { -#if CONFIG_MULTITHREAD - worker->impl_ = (VP9WorkerImpl*)vpx_calloc(1, sizeof(*worker->impl_)); - if (worker->impl_ == NULL) { - return 0; - } - if (pthread_mutex_init(&worker->impl_->mutex_, NULL)) { - goto Error; - } - if (pthread_cond_init(&worker->impl_->condition_, NULL)) { - pthread_mutex_destroy(&worker->impl_->mutex_); - goto Error; - } - pthread_mutex_lock(&worker->impl_->mutex_); - ok = !pthread_create(&worker->impl_->thread_, NULL, thread_loop, worker); - if (ok) worker->status_ = OK; - pthread_mutex_unlock(&worker->impl_->mutex_); - if (!ok) { - pthread_mutex_destroy(&worker->impl_->mutex_); - pthread_cond_destroy(&worker->impl_->condition_); - Error: - vpx_free(worker->impl_); - worker->impl_ = NULL; - return 0; - } -#else - worker->status_ = OK; -#endif - } else if (worker->status_ > OK) { - ok = sync(worker); - } - assert(!ok || (worker->status_ == OK)); - return ok; -} - -static void execute(VP9Worker *const worker) { - if (worker->hook != NULL) { - worker->had_error |= !worker->hook(worker->data1, worker->data2); - } -} - -static void launch(VP9Worker *const worker) { -#if CONFIG_MULTITHREAD - change_state(worker, WORK); -#else - execute(worker); -#endif -} - -static void end(VP9Worker *const worker) { -#if CONFIG_MULTITHREAD - if (worker->impl_ != NULL) { - change_state(worker, NOT_OK); - pthread_join(worker->impl_->thread_, NULL); - pthread_mutex_destroy(&worker->impl_->mutex_); - pthread_cond_destroy(&worker->impl_->condition_); - vpx_free(worker->impl_); - worker->impl_ = NULL; - } -#else - worker->status_ = NOT_OK; - assert(worker->impl_ == NULL); -#endif - assert(worker->status_ == NOT_OK); -} - -//------------------------------------------------------------------------------ - -static VP9WorkerInterface g_worker_interface = { - init, reset, sync, launch, execute, end -}; - -int vp9_set_worker_interface(const VP9WorkerInterface* const winterface) { - if (winterface == NULL || - winterface->init == NULL || winterface->reset == NULL || - winterface->sync == NULL || winterface->launch == NULL || - winterface->execute == NULL || winterface->end == NULL) { - return 0; - } - g_worker_interface = *winterface; - return 1; -} - -const VP9WorkerInterface *vp9_get_worker_interface(void) { - return &g_worker_interface; -} - -//------------------------------------------------------------------------------ diff --git a/media/libvpx/vp9/common/vp9_thread.h b/media/libvpx/vp9/common/vp9_thread.h deleted file mode 100644 index 864579c03c3..00000000000 --- a/media/libvpx/vp9/common/vp9_thread.h +++ /dev/null @@ -1,219 +0,0 @@ -// Copyright 2013 Google Inc. All Rights Reserved. -// -// Use of this source code is governed by a BSD-style license -// that can be found in the COPYING file in the root of the source -// tree. An additional intellectual property rights grant can be found -// in the file PATENTS. All contributing project authors may -// be found in the AUTHORS file in the root of the source tree. -// ----------------------------------------------------------------------------- -// -// Multi-threaded worker -// -// Original source: -// http://git.chromium.org/webm/libwebp.git -// 100644 blob 7bd451b124ae3b81596abfbcc823e3cb129d3a38 src/utils/thread.h - -#ifndef VP9_DECODER_VP9_THREAD_H_ -#define VP9_DECODER_VP9_THREAD_H_ - -#include "./vpx_config.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#if CONFIG_MULTITHREAD - -#if defined(_WIN32) -#include // NOLINT -#include // NOLINT -#include // NOLINT -typedef HANDLE pthread_t; -typedef CRITICAL_SECTION pthread_mutex_t; -typedef struct { - HANDLE waiting_sem_; - HANDLE received_sem_; - HANDLE signal_event_; -} pthread_cond_t; - -//------------------------------------------------------------------------------ -// simplistic pthread emulation layer - -// _beginthreadex requires __stdcall -#define THREADFN unsigned int __stdcall -#define THREAD_RETURN(val) (unsigned int)((DWORD_PTR)val) - -static INLINE int pthread_create(pthread_t* const thread, const void* attr, - unsigned int (__stdcall *start)(void*), - void* arg) { - (void)attr; - *thread = (pthread_t)_beginthreadex(NULL, /* void *security */ - 0, /* unsigned stack_size */ - start, - arg, - 0, /* unsigned initflag */ - NULL); /* unsigned *thrdaddr */ - if (*thread == NULL) return 1; - SetThreadPriority(*thread, THREAD_PRIORITY_ABOVE_NORMAL); - return 0; -} - -static INLINE int pthread_join(pthread_t thread, void** value_ptr) { - (void)value_ptr; - return (WaitForSingleObject(thread, INFINITE) != WAIT_OBJECT_0 || - CloseHandle(thread) == 0); -} - -// Mutex -static INLINE int pthread_mutex_init(pthread_mutex_t *const mutex, - void* mutexattr) { - (void)mutexattr; - InitializeCriticalSection(mutex); - return 0; -} - -static INLINE int pthread_mutex_trylock(pthread_mutex_t *const mutex) { - return TryEnterCriticalSection(mutex) ? 0 : EBUSY; -} - -static INLINE int pthread_mutex_lock(pthread_mutex_t *const mutex) { - EnterCriticalSection(mutex); - return 0; -} - -static INLINE int pthread_mutex_unlock(pthread_mutex_t *const mutex) { - LeaveCriticalSection(mutex); - return 0; -} - -static INLINE int pthread_mutex_destroy(pthread_mutex_t *const mutex) { - DeleteCriticalSection(mutex); - return 0; -} - -// Condition -static INLINE int pthread_cond_destroy(pthread_cond_t *const condition) { - int ok = 1; - ok &= (CloseHandle(condition->waiting_sem_) != 0); - ok &= (CloseHandle(condition->received_sem_) != 0); - ok &= (CloseHandle(condition->signal_event_) != 0); - return !ok; -} - -static INLINE int pthread_cond_init(pthread_cond_t *const condition, - void* cond_attr) { - (void)cond_attr; - condition->waiting_sem_ = CreateSemaphore(NULL, 0, 1, NULL); - condition->received_sem_ = CreateSemaphore(NULL, 0, 1, NULL); - condition->signal_event_ = CreateEvent(NULL, FALSE, FALSE, NULL); - if (condition->waiting_sem_ == NULL || - condition->received_sem_ == NULL || - condition->signal_event_ == NULL) { - pthread_cond_destroy(condition); - return 1; - } - return 0; -} - -static INLINE int pthread_cond_signal(pthread_cond_t *const condition) { - int ok = 1; - if (WaitForSingleObject(condition->waiting_sem_, 0) == WAIT_OBJECT_0) { - // a thread is waiting in pthread_cond_wait: allow it to be notified - ok = SetEvent(condition->signal_event_); - // wait until the event is consumed so the signaler cannot consume - // the event via its own pthread_cond_wait. - ok &= (WaitForSingleObject(condition->received_sem_, INFINITE) != - WAIT_OBJECT_0); - } - return !ok; -} - -static INLINE int pthread_cond_wait(pthread_cond_t *const condition, - pthread_mutex_t *const mutex) { - int ok; - // note that there is a consumer available so the signal isn't dropped in - // pthread_cond_signal - if (!ReleaseSemaphore(condition->waiting_sem_, 1, NULL)) - return 1; - // now unlock the mutex so pthread_cond_signal may be issued - pthread_mutex_unlock(mutex); - ok = (WaitForSingleObject(condition->signal_event_, INFINITE) == - WAIT_OBJECT_0); - ok &= ReleaseSemaphore(condition->received_sem_, 1, NULL); - pthread_mutex_lock(mutex); - return !ok; -} -#else // _WIN32 -#include // NOLINT -# define THREADFN void* -# define THREAD_RETURN(val) val -#endif - -#endif // CONFIG_MULTITHREAD - -// State of the worker thread object -typedef enum { - NOT_OK = 0, // object is unusable - OK, // ready to work - WORK // busy finishing the current task -} VP9WorkerStatus; - -// Function to be called by the worker thread. Takes two opaque pointers as -// arguments (data1 and data2), and should return false in case of error. -typedef int (*VP9WorkerHook)(void*, void*); - -// Platform-dependent implementation details for the worker. -typedef struct VP9WorkerImpl VP9WorkerImpl; - -// Synchronization object used to launch job in the worker thread -typedef struct { - VP9WorkerImpl *impl_; - VP9WorkerStatus status_; - VP9WorkerHook hook; // hook to call - void *data1; // first argument passed to 'hook' - void *data2; // second argument passed to 'hook' - int had_error; // return value of the last call to 'hook' -} VP9Worker; - -// The interface for all thread-worker related functions. All these functions -// must be implemented. -typedef struct { - // Must be called first, before any other method. - void (*init)(VP9Worker *const worker); - // Must be called to initialize the object and spawn the thread. Re-entrant. - // Will potentially launch the thread. Returns false in case of error. - int (*reset)(VP9Worker *const worker); - // Makes sure the previous work is finished. Returns true if worker->had_error - // was not set and no error condition was triggered by the working thread. - int (*sync)(VP9Worker *const worker); - // Triggers the thread to call hook() with data1 and data2 arguments. These - // hook/data1/data2 values can be changed at any time before calling this - // function, but not be changed afterward until the next call to Sync(). - void (*launch)(VP9Worker *const worker); - // This function is similar to launch() except that it calls the - // hook directly instead of using a thread. Convenient to bypass the thread - // mechanism while still using the VP9Worker structs. sync() must - // still be called afterward (for error reporting). - void (*execute)(VP9Worker *const worker); - // Kill the thread and terminate the object. To use the object again, one - // must call reset() again. - void (*end)(VP9Worker *const worker); -} VP9WorkerInterface; - -// Install a new set of threading functions, overriding the defaults. This -// should be done before any workers are started, i.e., before any encoding or -// decoding takes place. The contents of the interface struct are copied, it -// is safe to free the corresponding memory after this call. This function is -// not thread-safe. Return false in case of invalid pointer or methods. -int vp9_set_worker_interface(const VP9WorkerInterface *const winterface); - -// Retrieve the currently set thread worker interface. -const VP9WorkerInterface *vp9_get_worker_interface(void); - -//------------------------------------------------------------------------------ - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_DECODER_VP9_THREAD_H_ diff --git a/media/libvpx/vp9/common/x86/vp9_high_intrapred_sse2.asm b/media/libvpx/vp9/common/x86/vp9_high_intrapred_sse2.asm deleted file mode 100644 index ff450711ec3..00000000000 --- a/media/libvpx/vp9/common/x86/vp9_high_intrapred_sse2.asm +++ /dev/null @@ -1,476 +0,0 @@ -; -; Copyright (c) 2014 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - -%include "third_party/x86inc/x86inc.asm" - -SECTION_RODATA -pw_4: times 8 dw 4 -pw_8: times 8 dw 8 -pw_16: times 4 dd 16 -pw_32: times 4 dd 32 - -SECTION .text -INIT_MMX sse -cglobal high_dc_predictor_4x4, 4, 5, 4, dst, stride, above, left, goffset - GET_GOT goffsetq - - movq m0, [aboveq] - movq m2, [leftq] - DEFINE_ARGS dst, stride, one - mov oned, 0x0001 - pxor m1, m1 - movd m3, oned - pshufw m3, m3, 0x0 - paddw m0, m2 - pmaddwd m0, m3 - packssdw m0, m1 - pmaddwd m0, m3 - paddw m0, [GLOBAL(pw_4)] - psraw m0, 3 - pshufw m0, m0, 0x0 - movq [dstq ], m0 - movq [dstq+strideq*2], m0 - lea dstq, [dstq+strideq*4] - movq [dstq ], m0 - movq [dstq+strideq*2], m0 - - RESTORE_GOT - RET - -INIT_XMM sse2 -cglobal high_dc_predictor_8x8, 4, 5, 4, dst, stride, above, left, goffset - GET_GOT goffsetq - - pxor m1, m1 - mova m0, [aboveq] - mova m2, [leftq] - DEFINE_ARGS dst, stride, stride3, one - mov oned, 0x00010001 - lea stride3q, [strideq*3] - movd m3, oned - pshufd m3, m3, 0x0 - paddw m0, m2 - pmaddwd m0, m3 - packssdw m0, m1 - pmaddwd m0, m3 - packssdw m0, m1 - pmaddwd m0, m3 - paddw m0, [GLOBAL(pw_8)] - psrlw m0, 4 - pshuflw m0, m0, 0x0 - punpcklqdq m0, m0 - mova [dstq ], m0 - mova [dstq+strideq*2 ], m0 - mova [dstq+strideq*4 ], m0 - mova [dstq+stride3q*2], m0 - lea dstq, [dstq+strideq*8] - mova [dstq ], m0 - mova [dstq+strideq*2 ], m0 - mova [dstq+strideq*4 ], m0 - mova [dstq+stride3q*2], m0 - - RESTORE_GOT - RET - -INIT_XMM sse2 -cglobal high_dc_predictor_16x16, 4, 5, 5, dst, stride, above, left, goffset - GET_GOT goffsetq - - pxor m1, m1 - mova m0, [aboveq] - mova m3, [aboveq+16] - mova m2, [leftq] - mova m4, [leftq+16] - DEFINE_ARGS dst, stride, stride3, lines4 - lea stride3q, [strideq*3] - mov lines4d, 4 - paddw m0, m2 - paddw m0, m3 - paddw m0, m4 - movhlps m2, m0 - paddw m0, m2 - punpcklwd m0, m1 - movhlps m2, m0 - paddd m0, m2 - punpckldq m0, m1 - movhlps m2, m0 - paddd m0, m2 - paddd m0, [GLOBAL(pw_16)] - psrad m0, 5 - pshuflw m0, m0, 0x0 - punpcklqdq m0, m0 -.loop: - mova [dstq ], m0 - mova [dstq +16], m0 - mova [dstq+strideq*2 ], m0 - mova [dstq+strideq*2 +16], m0 - mova [dstq+strideq*4 ], m0 - mova [dstq+strideq*4 +16], m0 - mova [dstq+stride3q*2 ], m0 - mova [dstq+stride3q*2+16], m0 - lea dstq, [dstq+strideq*8] - dec lines4d - jnz .loop - - RESTORE_GOT - REP_RET - -%if ARCH_X86_64 -INIT_XMM sse2 -cglobal high_dc_predictor_32x32, 4, 5, 9, dst, stride, above, left, goffset - GET_GOT goffsetq - - pxor m1, m1 - mova m0, [aboveq] - mova m2, [aboveq+16] - mova m3, [aboveq+32] - mova m4, [aboveq+48] - mova m5, [leftq] - mova m6, [leftq+16] - mova m7, [leftq+32] - mova m8, [leftq+48] - DEFINE_ARGS dst, stride, stride3, lines4 - lea stride3q, [strideq*3] - mov lines4d, 8 - paddw m0, m2 - paddw m0, m3 - paddw m0, m4 - paddw m0, m5 - paddw m0, m6 - paddw m0, m7 - paddw m0, m8 - movhlps m2, m0 - paddw m0, m2 - punpcklwd m0, m1 - movhlps m2, m0 - paddd m0, m2 - punpckldq m0, m1 - movhlps m2, m0 - paddd m0, m2 - paddd m0, [GLOBAL(pw_32)] - psrad m0, 6 - pshuflw m0, m0, 0x0 - punpcklqdq m0, m0 -.loop: - mova [dstq ], m0 - mova [dstq +16 ], m0 - mova [dstq +32 ], m0 - mova [dstq +48 ], m0 - mova [dstq+strideq*2 ], m0 - mova [dstq+strideq*2+16 ], m0 - mova [dstq+strideq*2+32 ], m0 - mova [dstq+strideq*2+48 ], m0 - mova [dstq+strideq*4 ], m0 - mova [dstq+strideq*4+16 ], m0 - mova [dstq+strideq*4+32 ], m0 - mova [dstq+strideq*4+48 ], m0 - mova [dstq+stride3q*2 ], m0 - mova [dstq+stride3q*2 +16], m0 - mova [dstq+stride3q*2 +32], m0 - mova [dstq+stride3q*2 +48], m0 - lea dstq, [dstq+strideq*8] - dec lines4d - jnz .loop - - RESTORE_GOT - REP_RET -%endif - -INIT_MMX sse -cglobal high_v_predictor_4x4, 3, 3, 1, dst, stride, above - movq m0, [aboveq] - movq [dstq ], m0 - movq [dstq+strideq*2], m0 - lea dstq, [dstq+strideq*4] - movq [dstq ], m0 - movq [dstq+strideq*2], m0 - RET - -INIT_XMM sse2 -cglobal high_v_predictor_8x8, 3, 3, 1, dst, stride, above - mova m0, [aboveq] - DEFINE_ARGS dst, stride, stride3 - lea stride3q, [strideq*3] - mova [dstq ], m0 - mova [dstq+strideq*2 ], m0 - mova [dstq+strideq*4 ], m0 - mova [dstq+stride3q*2], m0 - lea dstq, [dstq+strideq*8] - mova [dstq ], m0 - mova [dstq+strideq*2 ], m0 - mova [dstq+strideq*4 ], m0 - mova [dstq+stride3q*2], m0 - RET - -INIT_XMM sse2 -cglobal high_v_predictor_16x16, 3, 4, 2, dst, stride, above - mova m0, [aboveq] - mova m1, [aboveq+16] - DEFINE_ARGS dst, stride, stride3, nlines4 - lea stride3q, [strideq*3] - mov nlines4d, 4 -.loop: - mova [dstq ], m0 - mova [dstq +16], m1 - mova [dstq+strideq*2 ], m0 - mova [dstq+strideq*2 +16], m1 - mova [dstq+strideq*4 ], m0 - mova [dstq+strideq*4 +16], m1 - mova [dstq+stride3q*2 ], m0 - mova [dstq+stride3q*2+16], m1 - lea dstq, [dstq+strideq*8] - dec nlines4d - jnz .loop - REP_RET - -INIT_XMM sse2 -cglobal high_v_predictor_32x32, 3, 4, 4, dst, stride, above - mova m0, [aboveq] - mova m1, [aboveq+16] - mova m2, [aboveq+32] - mova m3, [aboveq+48] - DEFINE_ARGS dst, stride, stride3, nlines4 - lea stride3q, [strideq*3] - mov nlines4d, 8 -.loop: - mova [dstq ], m0 - mova [dstq +16], m1 - mova [dstq +32], m2 - mova [dstq +48], m3 - mova [dstq+strideq*2 ], m0 - mova [dstq+strideq*2 +16], m1 - mova [dstq+strideq*2 +32], m2 - mova [dstq+strideq*2 +48], m3 - mova [dstq+strideq*4 ], m0 - mova [dstq+strideq*4 +16], m1 - mova [dstq+strideq*4 +32], m2 - mova [dstq+strideq*4 +48], m3 - mova [dstq+stride3q*2 ], m0 - mova [dstq+stride3q*2 +16], m1 - mova [dstq+stride3q*2 +32], m2 - mova [dstq+stride3q*2 +48], m3 - lea dstq, [dstq+strideq*8] - dec nlines4d - jnz .loop - REP_RET - -INIT_MMX sse -cglobal high_tm_predictor_4x4, 5, 6, 5, dst, stride, above, left, bps, one - movd m1, [aboveq-2] - movq m0, [aboveq] - pshufw m1, m1, 0x0 - ; Get the values to compute the maximum value at this bit depth - mov oned, 1 - movd m3, oned - movd m4, bpsd - pshufw m3, m3, 0x0 - DEFINE_ARGS dst, stride, line, left - mov lineq, -2 - mova m2, m3 - psllw m3, m4 - add leftq, 8 - psubw m3, m2 ; max possible value - pxor m4, m4 ; min possible value - psubw m0, m1 -.loop: - movq m1, [leftq+lineq*4] - movq m2, [leftq+lineq*4+2] - pshufw m1, m1, 0x0 - pshufw m2, m2, 0x0 - paddw m1, m0 - paddw m2, m0 - ;Clamp to the bit-depth - pminsw m1, m3 - pminsw m2, m3 - pmaxsw m1, m4 - pmaxsw m2, m4 - ;Store the values - movq [dstq ], m1 - movq [dstq+strideq*2], m2 - lea dstq, [dstq+strideq*4] - inc lineq - jnz .loop - REP_RET - -INIT_XMM sse2 -cglobal high_tm_predictor_8x8, 5, 6, 5, dst, stride, above, left, bps, one - movd m1, [aboveq-2] - mova m0, [aboveq] - pshuflw m1, m1, 0x0 - ; Get the values to compute the maximum value at this bit depth - mov oned, 1 - pxor m3, m3 - pxor m4, m4 - pinsrw m3, oned, 0 - pinsrw m4, bpsd, 0 - pshuflw m3, m3, 0x0 - DEFINE_ARGS dst, stride, line, left - punpcklqdq m3, m3 - mov lineq, -4 - mova m2, m3 - punpcklqdq m1, m1 - psllw m3, m4 - add leftq, 16 - psubw m3, m2 ; max possible value - pxor m4, m4 ; min possible value - psubw m0, m1 -.loop: - movd m1, [leftq+lineq*4] - movd m2, [leftq+lineq*4+2] - pshuflw m1, m1, 0x0 - pshuflw m2, m2, 0x0 - punpcklqdq m1, m1 - punpcklqdq m2, m2 - paddw m1, m0 - paddw m2, m0 - ;Clamp to the bit-depth - pminsw m1, m3 - pminsw m2, m3 - pmaxsw m1, m4 - pmaxsw m2, m4 - ;Store the values - mova [dstq ], m1 - mova [dstq+strideq*2], m2 - lea dstq, [dstq+strideq*4] - inc lineq - jnz .loop - REP_RET - -%if ARCH_X86_64 -INIT_XMM sse2 -cglobal high_tm_predictor_16x16, 5, 6, 8, dst, stride, above, left, bps, one - movd m2, [aboveq-2] - mova m0, [aboveq] - mova m1, [aboveq+16] - pshuflw m2, m2, 0x0 - ; Get the values to compute the maximum value at this bit depth - mov oned, 1 - pxor m7, m7 - pxor m8, m8 - pinsrw m7, oned, 0 - pinsrw m8, bpsd, 0 - pshuflw m7, m7, 0x0 - DEFINE_ARGS dst, stride, line, left - punpcklqdq m7, m7 - mov lineq, -8 - mova m5, m7 - punpcklqdq m2, m2 - psllw m7, m8 - add leftq, 32 - psubw m7, m5 ; max possible value - pxor m8, m8 ; min possible value - psubw m0, m2 - psubw m1, m2 -.loop: - movd m2, [leftq+lineq*4] - movd m3, [leftq+lineq*4+2] - pshuflw m2, m2, 0x0 - pshuflw m3, m3, 0x0 - punpcklqdq m2, m2 - punpcklqdq m3, m3 - paddw m4, m2, m0 - paddw m5, m3, m0 - paddw m2, m1 - paddw m3, m1 - ;Clamp to the bit-depth - pminsw m4, m7 - pminsw m5, m7 - pminsw m2, m7 - pminsw m3, m7 - pmaxsw m4, m8 - pmaxsw m5, m8 - pmaxsw m2, m8 - pmaxsw m3, m8 - ;Store the values - mova [dstq ], m4 - mova [dstq+strideq*2 ], m5 - mova [dstq +16], m2 - mova [dstq+strideq*2+16], m3 - lea dstq, [dstq+strideq*4] - inc lineq - jnz .loop - REP_RET - -INIT_XMM sse2 -cglobal high_tm_predictor_32x32, 5, 6, 12, dst, stride, above, left, bps, one - movd m0, [aboveq-2] - mova m1, [aboveq] - mova m2, [aboveq+16] - mova m3, [aboveq+32] - mova m4, [aboveq+48] - pshuflw m0, m0, 0x0 - ; Get the values to compute the maximum value at this bit depth - mov oned, 1 - pxor m10, m10 - pxor m11, m11 - pinsrw m10, oned, 0 - pinsrw m11, bpsd, 0 - pshuflw m10, m10, 0x0 - DEFINE_ARGS dst, stride, line, left - punpcklqdq m10, m10 - mov lineq, -16 - mova m5, m10 - punpcklqdq m0, m0 - psllw m10, m11 - add leftq, 64 - psubw m10, m5 ; max possible value - pxor m11, m11 ; min possible value - psubw m1, m0 - psubw m2, m0 - psubw m3, m0 - psubw m4, m0 -.loop: - movd m5, [leftq+lineq*4] - movd m6, [leftq+lineq*4+2] - pshuflw m5, m5, 0x0 - pshuflw m6, m6, 0x0 - punpcklqdq m5, m5 - punpcklqdq m6, m6 - paddw m7, m5, m1 - paddw m8, m5, m2 - paddw m9, m5, m3 - paddw m5, m4 - ;Clamp these values to the bit-depth - pminsw m7, m10 - pminsw m8, m10 - pminsw m9, m10 - pminsw m5, m10 - pmaxsw m7, m11 - pmaxsw m8, m11 - pmaxsw m9, m11 - pmaxsw m5, m11 - ;Store these values - mova [dstq ], m7 - mova [dstq +16], m8 - mova [dstq +32], m9 - mova [dstq +48], m5 - paddw m7, m6, m1 - paddw m8, m6, m2 - paddw m9, m6, m3 - paddw m6, m4 - ;Clamp these values to the bit-depth - pminsw m7, m10 - pminsw m8, m10 - pminsw m9, m10 - pminsw m6, m10 - pmaxsw m7, m11 - pmaxsw m8, m11 - pmaxsw m9, m11 - pmaxsw m6, m11 - ;Store these values - mova [dstq+strideq*2 ], m7 - mova [dstq+strideq*2+16], m8 - mova [dstq+strideq*2+32], m9 - mova [dstq+strideq*2+48], m6 - lea dstq, [dstq+strideq*4] - inc lineq - jnz .loop - REP_RET -%endif diff --git a/media/libvpx/vp9/common/x86/vp9_high_loopfilter_intrin_sse2.c b/media/libvpx/vp9/common/x86/vp9_high_loopfilter_intrin_sse2.c deleted file mode 100644 index 32e4b2012f7..00000000000 --- a/media/libvpx/vp9/common/x86/vp9_high_loopfilter_intrin_sse2.c +++ /dev/null @@ -1,1119 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include // SSE2 - -#include "./vp9_rtcd.h" -#include "vp9/common/vp9_loopfilter.h" -#include "vpx_ports/emmintrin_compat.h" - -static INLINE __m128i signed_char_clamp_bd_sse2(__m128i value, int bd) { - __m128i ubounded; - __m128i lbounded; - __m128i retval; - - const __m128i zero = _mm_set1_epi16(0); - const __m128i one = _mm_set1_epi16(1); - const __m128i t80 = _mm_slli_epi16(_mm_set1_epi16(0x80), bd - 8); - const __m128i max = _mm_subs_epi16( - _mm_subs_epi16(_mm_slli_epi16(one, bd), one), t80); - const __m128i min = _mm_subs_epi16(zero, t80); - ubounded = _mm_cmpgt_epi16(value, max); - lbounded = _mm_cmplt_epi16(value, min); - retval = _mm_andnot_si128(_mm_or_si128(ubounded, lbounded), value); - ubounded = _mm_and_si128(ubounded, max); - lbounded = _mm_and_si128(lbounded, min); - retval = _mm_or_si128(retval, ubounded); - retval = _mm_or_si128(retval, lbounded); - return retval; -} - -// TODO(debargha, peter): Break up large functions into smaller ones -// in this file. -static void highbd_mb_lpf_horizontal_edge_w_sse2_8(uint16_t *s, - int p, - const uint8_t *_blimit, - const uint8_t *_limit, - const uint8_t *_thresh, - int bd) { - const __m128i zero = _mm_set1_epi16(0); - const __m128i one = _mm_set1_epi16(1); - const __m128i blimit = _mm_slli_epi16( - _mm_unpacklo_epi8( - _mm_load_si128((const __m128i *)_blimit), zero), bd - 8); - const __m128i limit = _mm_slli_epi16( - _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_limit), zero), bd - 8); - const __m128i thresh = _mm_slli_epi16( - _mm_unpacklo_epi8( - _mm_load_si128((const __m128i *)_thresh), zero), bd - 8); - __m128i q7, p7, q6, p6, q5, p5, q4, p4, q3, p3, q2, p2, q1, p1, q0, p0; - __m128i mask, hev, flat, flat2, abs_p1p0, abs_q1q0; - __m128i ps1, qs1, ps0, qs0; - __m128i abs_p0q0, abs_p1q1, ffff, work; - __m128i filt, work_a, filter1, filter2; - __m128i flat2_q6, flat2_p6, flat2_q5, flat2_p5, flat2_q4, flat2_p4; - __m128i flat2_q3, flat2_p3, flat2_q2, flat2_p2, flat2_q1, flat2_p1; - __m128i flat2_q0, flat2_p0; - __m128i flat_q2, flat_p2, flat_q1, flat_p1, flat_q0, flat_p0; - __m128i pixelFilter_p, pixelFilter_q; - __m128i pixetFilter_p2p1p0, pixetFilter_q2q1q0; - __m128i sum_p7, sum_q7, sum_p3, sum_q3; - __m128i t4, t3, t80, t1; - __m128i eight, four; - - q4 = _mm_load_si128((__m128i *)(s + 4 * p)); - p4 = _mm_load_si128((__m128i *)(s - 5 * p)); - q3 = _mm_load_si128((__m128i *)(s + 3 * p)); - p3 = _mm_load_si128((__m128i *)(s - 4 * p)); - q2 = _mm_load_si128((__m128i *)(s + 2 * p)); - p2 = _mm_load_si128((__m128i *)(s - 3 * p)); - q1 = _mm_load_si128((__m128i *)(s + 1 * p)); - p1 = _mm_load_si128((__m128i *)(s - 2 * p)); - q0 = _mm_load_si128((__m128i *)(s + 0 * p)); - p0 = _mm_load_si128((__m128i *)(s - 1 * p)); - - // high_filter_mask - abs_p1p0 = _mm_or_si128(_mm_subs_epu16(p1, p0), _mm_subs_epu16(p0, p1)); - abs_q1q0 = _mm_or_si128(_mm_subs_epu16(q1, q0), _mm_subs_epu16(q0, q1)); - - ffff = _mm_cmpeq_epi16(abs_p1p0, abs_p1p0); - - abs_p0q0 = _mm_or_si128(_mm_subs_epu16(p0, q0), _mm_subs_epu16(q0, p0)); - abs_p1q1 = _mm_or_si128(_mm_subs_epu16(p1, q1), _mm_subs_epu16(q1, p1)); - - // high_hev_mask (in C code this is actually called from high_filter4) - flat = _mm_max_epi16(abs_p1p0, abs_q1q0); - hev = _mm_subs_epu16(flat, thresh); - hev = _mm_xor_si128(_mm_cmpeq_epi16(hev, zero), ffff); - - abs_p0q0 =_mm_adds_epu16(abs_p0q0, abs_p0q0); // abs(p0 - q0) * 2 - abs_p1q1 = _mm_srli_epi16(abs_p1q1, 1); // abs(p1 - q1) / 2 - mask = _mm_subs_epu16(_mm_adds_epu16(abs_p0q0, abs_p1q1), blimit); - mask = _mm_xor_si128(_mm_cmpeq_epi16(mask, zero), ffff); - mask = _mm_and_si128(mask, _mm_adds_epu16(limit, one)); - work = _mm_max_epi16(_mm_or_si128(_mm_subs_epu16(p1, p0), - _mm_subs_epu16(p0, p1)), - _mm_or_si128(_mm_subs_epu16(q1, q0), - _mm_subs_epu16(q0, q1))); - mask = _mm_max_epi16(work, mask); - work = _mm_max_epi16(_mm_or_si128(_mm_subs_epu16(p2, p1), - _mm_subs_epu16(p1, p2)), - _mm_or_si128(_mm_subs_epu16(q2, q1), - _mm_subs_epu16(q1, q2))); - mask = _mm_max_epi16(work, mask); - work = _mm_max_epi16(_mm_or_si128(_mm_subs_epu16(p3, p2), - _mm_subs_epu16(p2, p3)), - _mm_or_si128(_mm_subs_epu16(q3, q2), - _mm_subs_epu16(q2, q3))); - mask = _mm_max_epi16(work, mask); - - mask = _mm_subs_epu16(mask, limit); - mask = _mm_cmpeq_epi16(mask, zero); // return ~mask - - // lp filter - // high_filter4 - t4 = _mm_set1_epi16(4); - t3 = _mm_set1_epi16(3); - t80 = _mm_slli_epi16(_mm_set1_epi16(0x80), bd - 8); - t1 = _mm_set1_epi16(0x1); - - ps1 = _mm_subs_epi16(p1, t80); - qs1 = _mm_subs_epi16(q1, t80); - ps0 = _mm_subs_epi16(p0, t80); - qs0 = _mm_subs_epi16(q0, t80); - - filt = _mm_and_si128( - signed_char_clamp_bd_sse2(_mm_subs_epi16(ps1, qs1), bd), hev); - work_a = _mm_subs_epi16(qs0, ps0); - filt = _mm_adds_epi16(filt, work_a); - filt = _mm_adds_epi16(filt, work_a); - filt = signed_char_clamp_bd_sse2(_mm_adds_epi16(filt, work_a), bd); - filt = _mm_and_si128(filt, mask); - - filter1 = signed_char_clamp_bd_sse2(_mm_adds_epi16(filt, t4), bd); - filter2 = signed_char_clamp_bd_sse2(_mm_adds_epi16(filt, t3), bd); - - // Filter1 >> 3 - filter1 = _mm_srai_epi16(filter1, 0x3); - filter2 = _mm_srai_epi16(filter2, 0x3); - - qs0 = _mm_adds_epi16( - signed_char_clamp_bd_sse2(_mm_subs_epi16(qs0, filter1), bd), - t80); - ps0 = _mm_adds_epi16( - signed_char_clamp_bd_sse2(_mm_adds_epi16(ps0, filter2), bd), - t80); - filt = _mm_adds_epi16(filter1, t1); - filt = _mm_srai_epi16(filt, 1); - filt = _mm_andnot_si128(hev, filt); - - qs1 = _mm_adds_epi16( - signed_char_clamp_bd_sse2(_mm_subs_epi16(qs1, filt), bd), - t80); - ps1 = _mm_adds_epi16( - signed_char_clamp_bd_sse2(_mm_adds_epi16(ps1, filt), bd), - t80); - // end high_filter4 - // loopfilter done - - // high_flat_mask4 - flat = _mm_max_epi16(_mm_or_si128(_mm_subs_epu16(p2, p0), - _mm_subs_epu16(p0, p2)), - _mm_or_si128(_mm_subs_epu16(p3, p0), - _mm_subs_epu16(p0, p3))); - work = _mm_max_epi16(_mm_or_si128(_mm_subs_epu16(q2, q0), - _mm_subs_epu16(q0, q2)), - _mm_or_si128(_mm_subs_epu16(q3, q0), - _mm_subs_epu16(q0, q3))); - flat = _mm_max_epi16(work, flat); - work = _mm_max_epi16(abs_p1p0, abs_q1q0); - flat = _mm_max_epi16(work, flat); - flat = _mm_subs_epu16(flat, _mm_slli_epi16(one, bd - 8)); - flat = _mm_cmpeq_epi16(flat, zero); - // end flat_mask4 - - // flat & mask = flat && mask (as used in filter8) - // (because, in both vars, each block of 16 either all 1s or all 0s) - flat = _mm_and_si128(flat, mask); - - p5 = _mm_load_si128((__m128i *)(s - 6 * p)); - q5 = _mm_load_si128((__m128i *)(s + 5 * p)); - p6 = _mm_load_si128((__m128i *)(s - 7 * p)); - q6 = _mm_load_si128((__m128i *)(s + 6 * p)); - p7 = _mm_load_si128((__m128i *)(s - 8 * p)); - q7 = _mm_load_si128((__m128i *)(s + 7 * p)); - - // high_flat_mask5 (arguments passed in are p0, q0, p4-p7, q4-q7 - // but referred to as p0-p4 & q0-q4 in fn) - flat2 = _mm_max_epi16(_mm_or_si128(_mm_subs_epu16(p4, p0), - _mm_subs_epu16(p0, p4)), - _mm_or_si128(_mm_subs_epu16(q4, q0), - _mm_subs_epu16(q0, q4))); - - work = _mm_max_epi16(_mm_or_si128(_mm_subs_epu16(p5, p0), - _mm_subs_epu16(p0, p5)), - _mm_or_si128(_mm_subs_epu16(q5, q0), - _mm_subs_epu16(q0, q5))); - flat2 = _mm_max_epi16(work, flat2); - - work = _mm_max_epi16(_mm_or_si128(_mm_subs_epu16(p6, p0), - _mm_subs_epu16(p0, p6)), - _mm_or_si128(_mm_subs_epu16(q6, q0), - _mm_subs_epu16(q0, q6))); - flat2 = _mm_max_epi16(work, flat2); - - work = _mm_max_epi16(_mm_or_si128(_mm_subs_epu16(p7, p0), - _mm_subs_epu16(p0, p7)), - _mm_or_si128(_mm_subs_epu16(q7, q0), - _mm_subs_epu16(q0, q7))); - flat2 = _mm_max_epi16(work, flat2); - - flat2 = _mm_subs_epu16(flat2, _mm_slli_epi16(one, bd - 8)); - flat2 = _mm_cmpeq_epi16(flat2, zero); - flat2 = _mm_and_si128(flat2, flat); // flat2 & flat & mask - // end high_flat_mask5 - - // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - // flat and wide flat calculations - eight = _mm_set1_epi16(8); - four = _mm_set1_epi16(4); - - pixelFilter_p = _mm_add_epi16(_mm_add_epi16(p6, p5), - _mm_add_epi16(p4, p3)); - pixelFilter_q = _mm_add_epi16(_mm_add_epi16(q6, q5), - _mm_add_epi16(q4, q3)); - - pixetFilter_p2p1p0 = _mm_add_epi16(p0, _mm_add_epi16(p2, p1)); - pixelFilter_p = _mm_add_epi16(pixelFilter_p, pixetFilter_p2p1p0); - - pixetFilter_q2q1q0 = _mm_add_epi16(q0, _mm_add_epi16(q2, q1)); - pixelFilter_q = _mm_add_epi16(pixelFilter_q, pixetFilter_q2q1q0); - pixelFilter_p = _mm_add_epi16(eight, _mm_add_epi16(pixelFilter_p, - pixelFilter_q)); - pixetFilter_p2p1p0 = _mm_add_epi16(four, - _mm_add_epi16(pixetFilter_p2p1p0, - pixetFilter_q2q1q0)); - flat2_p0 = _mm_srli_epi16(_mm_add_epi16(pixelFilter_p, - _mm_add_epi16(p7, p0)), 4); - flat2_q0 = _mm_srli_epi16(_mm_add_epi16(pixelFilter_p, - _mm_add_epi16(q7, q0)), 4); - flat_p0 = _mm_srli_epi16(_mm_add_epi16(pixetFilter_p2p1p0, - _mm_add_epi16(p3, p0)), 3); - flat_q0 = _mm_srli_epi16(_mm_add_epi16(pixetFilter_p2p1p0, - _mm_add_epi16(q3, q0)), 3); - - sum_p7 = _mm_add_epi16(p7, p7); - sum_q7 = _mm_add_epi16(q7, q7); - sum_p3 = _mm_add_epi16(p3, p3); - sum_q3 = _mm_add_epi16(q3, q3); - - pixelFilter_q = _mm_sub_epi16(pixelFilter_p, p6); - pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q6); - flat2_p1 = _mm_srli_epi16( - _mm_add_epi16(pixelFilter_p, _mm_add_epi16(sum_p7, p1)), 4); - flat2_q1 = _mm_srli_epi16( - _mm_add_epi16(pixelFilter_q, _mm_add_epi16(sum_q7, q1)), 4); - - pixetFilter_q2q1q0 = _mm_sub_epi16(pixetFilter_p2p1p0, p2); - pixetFilter_p2p1p0 = _mm_sub_epi16(pixetFilter_p2p1p0, q2); - flat_p1 = _mm_srli_epi16(_mm_add_epi16(pixetFilter_p2p1p0, - _mm_add_epi16(sum_p3, p1)), 3); - flat_q1 = _mm_srli_epi16(_mm_add_epi16(pixetFilter_q2q1q0, - _mm_add_epi16(sum_q3, q1)), 3); - - sum_p7 = _mm_add_epi16(sum_p7, p7); - sum_q7 = _mm_add_epi16(sum_q7, q7); - sum_p3 = _mm_add_epi16(sum_p3, p3); - sum_q3 = _mm_add_epi16(sum_q3, q3); - - pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q5); - pixelFilter_q = _mm_sub_epi16(pixelFilter_q, p5); - flat2_p2 = _mm_srli_epi16(_mm_add_epi16(pixelFilter_p, - _mm_add_epi16(sum_p7, p2)), 4); - flat2_q2 = _mm_srli_epi16(_mm_add_epi16(pixelFilter_q, - _mm_add_epi16(sum_q7, q2)), 4); - - pixetFilter_p2p1p0 = _mm_sub_epi16(pixetFilter_p2p1p0, q1); - pixetFilter_q2q1q0 = _mm_sub_epi16(pixetFilter_q2q1q0, p1); - flat_p2 = _mm_srli_epi16(_mm_add_epi16(pixetFilter_p2p1p0, - _mm_add_epi16(sum_p3, p2)), 3); - flat_q2 = _mm_srli_epi16(_mm_add_epi16(pixetFilter_q2q1q0, - _mm_add_epi16(sum_q3, q2)), 3); - - sum_p7 = _mm_add_epi16(sum_p7, p7); - sum_q7 = _mm_add_epi16(sum_q7, q7); - pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q4); - pixelFilter_q = _mm_sub_epi16(pixelFilter_q, p4); - flat2_p3 = _mm_srli_epi16(_mm_add_epi16(pixelFilter_p, - _mm_add_epi16(sum_p7, p3)), 4); - flat2_q3 = _mm_srli_epi16(_mm_add_epi16(pixelFilter_q, - _mm_add_epi16(sum_q7, q3)), 4); - - sum_p7 = _mm_add_epi16(sum_p7, p7); - sum_q7 = _mm_add_epi16(sum_q7, q7); - pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q3); - pixelFilter_q = _mm_sub_epi16(pixelFilter_q, p3); - flat2_p4 = _mm_srli_epi16(_mm_add_epi16(pixelFilter_p, - _mm_add_epi16(sum_p7, p4)), 4); - flat2_q4 = _mm_srli_epi16(_mm_add_epi16(pixelFilter_q, - _mm_add_epi16(sum_q7, q4)), 4); - - sum_p7 = _mm_add_epi16(sum_p7, p7); - sum_q7 = _mm_add_epi16(sum_q7, q7); - pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q2); - pixelFilter_q = _mm_sub_epi16(pixelFilter_q, p2); - flat2_p5 = _mm_srli_epi16(_mm_add_epi16(pixelFilter_p, - _mm_add_epi16(sum_p7, p5)), 4); - flat2_q5 = _mm_srli_epi16(_mm_add_epi16(pixelFilter_q, - _mm_add_epi16(sum_q7, q5)), 4); - - sum_p7 = _mm_add_epi16(sum_p7, p7); - sum_q7 = _mm_add_epi16(sum_q7, q7); - pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q1); - pixelFilter_q = _mm_sub_epi16(pixelFilter_q, p1); - flat2_p6 = _mm_srli_epi16(_mm_add_epi16(pixelFilter_p, - _mm_add_epi16(sum_p7, p6)), 4); - flat2_q6 = _mm_srli_epi16(_mm_add_epi16(pixelFilter_q, - _mm_add_epi16(sum_q7, q6)), 4); - - // wide flat - // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - // high_filter8 - p2 = _mm_andnot_si128(flat, p2); - // p2 remains unchanged if !(flat && mask) - flat_p2 = _mm_and_si128(flat, flat_p2); - // when (flat && mask) - p2 = _mm_or_si128(p2, flat_p2); // full list of p2 values - q2 = _mm_andnot_si128(flat, q2); - flat_q2 = _mm_and_si128(flat, flat_q2); - q2 = _mm_or_si128(q2, flat_q2); // full list of q2 values - - ps1 = _mm_andnot_si128(flat, ps1); - // p1 takes the value assigned to in in filter4 if !(flat && mask) - flat_p1 = _mm_and_si128(flat, flat_p1); - // when (flat && mask) - p1 = _mm_or_si128(ps1, flat_p1); // full list of p1 values - qs1 = _mm_andnot_si128(flat, qs1); - flat_q1 = _mm_and_si128(flat, flat_q1); - q1 = _mm_or_si128(qs1, flat_q1); // full list of q1 values - - ps0 = _mm_andnot_si128(flat, ps0); - // p0 takes the value assigned to in in filter4 if !(flat && mask) - flat_p0 = _mm_and_si128(flat, flat_p0); - // when (flat && mask) - p0 = _mm_or_si128(ps0, flat_p0); // full list of p0 values - qs0 = _mm_andnot_si128(flat, qs0); - flat_q0 = _mm_and_si128(flat, flat_q0); - q0 = _mm_or_si128(qs0, flat_q0); // full list of q0 values - // end high_filter8 - - // high_filter16 - p6 = _mm_andnot_si128(flat2, p6); - // p6 remains unchanged if !(flat2 && flat && mask) - flat2_p6 = _mm_and_si128(flat2, flat2_p6); - // get values for when (flat2 && flat && mask) - p6 = _mm_or_si128(p6, flat2_p6); // full list of p6 values - q6 = _mm_andnot_si128(flat2, q6); - // q6 remains unchanged if !(flat2 && flat && mask) - flat2_q6 = _mm_and_si128(flat2, flat2_q6); - // get values for when (flat2 && flat && mask) - q6 = _mm_or_si128(q6, flat2_q6); // full list of q6 values - _mm_store_si128((__m128i *)(s - 7 * p), p6); - _mm_store_si128((__m128i *)(s + 6 * p), q6); - - p5 = _mm_andnot_si128(flat2, p5); - // p5 remains unchanged if !(flat2 && flat && mask) - flat2_p5 = _mm_and_si128(flat2, flat2_p5); - // get values for when (flat2 && flat && mask) - p5 = _mm_or_si128(p5, flat2_p5); - // full list of p5 values - q5 = _mm_andnot_si128(flat2, q5); - // q5 remains unchanged if !(flat2 && flat && mask) - flat2_q5 = _mm_and_si128(flat2, flat2_q5); - // get values for when (flat2 && flat && mask) - q5 = _mm_or_si128(q5, flat2_q5); - // full list of q5 values - _mm_store_si128((__m128i *)(s - 6 * p), p5); - _mm_store_si128((__m128i *)(s + 5 * p), q5); - - p4 = _mm_andnot_si128(flat2, p4); - // p4 remains unchanged if !(flat2 && flat && mask) - flat2_p4 = _mm_and_si128(flat2, flat2_p4); - // get values for when (flat2 && flat && mask) - p4 = _mm_or_si128(p4, flat2_p4); // full list of p4 values - q4 = _mm_andnot_si128(flat2, q4); - // q4 remains unchanged if !(flat2 && flat && mask) - flat2_q4 = _mm_and_si128(flat2, flat2_q4); - // get values for when (flat2 && flat && mask) - q4 = _mm_or_si128(q4, flat2_q4); // full list of q4 values - _mm_store_si128((__m128i *)(s - 5 * p), p4); - _mm_store_si128((__m128i *)(s + 4 * p), q4); - - p3 = _mm_andnot_si128(flat2, p3); - // p3 takes value from high_filter8 if !(flat2 && flat && mask) - flat2_p3 = _mm_and_si128(flat2, flat2_p3); - // get values for when (flat2 && flat && mask) - p3 = _mm_or_si128(p3, flat2_p3); // full list of p3 values - q3 = _mm_andnot_si128(flat2, q3); - // q3 takes value from high_filter8 if !(flat2 && flat && mask) - flat2_q3 = _mm_and_si128(flat2, flat2_q3); - // get values for when (flat2 && flat && mask) - q3 = _mm_or_si128(q3, flat2_q3); // full list of q3 values - _mm_store_si128((__m128i *)(s - 4 * p), p3); - _mm_store_si128((__m128i *)(s + 3 * p), q3); - - p2 = _mm_andnot_si128(flat2, p2); - // p2 takes value from high_filter8 if !(flat2 && flat && mask) - flat2_p2 = _mm_and_si128(flat2, flat2_p2); - // get values for when (flat2 && flat && mask) - p2 = _mm_or_si128(p2, flat2_p2); - // full list of p2 values - q2 = _mm_andnot_si128(flat2, q2); - // q2 takes value from high_filter8 if !(flat2 && flat && mask) - flat2_q2 = _mm_and_si128(flat2, flat2_q2); - // get values for when (flat2 && flat && mask) - q2 = _mm_or_si128(q2, flat2_q2); // full list of q2 values - _mm_store_si128((__m128i *)(s - 3 * p), p2); - _mm_store_si128((__m128i *)(s + 2 * p), q2); - - p1 = _mm_andnot_si128(flat2, p1); - // p1 takes value from high_filter8 if !(flat2 && flat && mask) - flat2_p1 = _mm_and_si128(flat2, flat2_p1); - // get values for when (flat2 && flat && mask) - p1 = _mm_or_si128(p1, flat2_p1); // full list of p1 values - q1 = _mm_andnot_si128(flat2, q1); - // q1 takes value from high_filter8 if !(flat2 && flat && mask) - flat2_q1 = _mm_and_si128(flat2, flat2_q1); - // get values for when (flat2 && flat && mask) - q1 = _mm_or_si128(q1, flat2_q1); // full list of q1 values - _mm_store_si128((__m128i *)(s - 2 * p), p1); - _mm_store_si128((__m128i *)(s + 1 * p), q1); - - p0 = _mm_andnot_si128(flat2, p0); - // p0 takes value from high_filter8 if !(flat2 && flat && mask) - flat2_p0 = _mm_and_si128(flat2, flat2_p0); - // get values for when (flat2 && flat && mask) - p0 = _mm_or_si128(p0, flat2_p0); // full list of p0 values - q0 = _mm_andnot_si128(flat2, q0); - // q0 takes value from high_filter8 if !(flat2 && flat && mask) - flat2_q0 = _mm_and_si128(flat2, flat2_q0); - // get values for when (flat2 && flat && mask) - q0 = _mm_or_si128(q0, flat2_q0); // full list of q0 values - _mm_store_si128((__m128i *)(s - 1 * p), p0); - _mm_store_si128((__m128i *)(s - 0 * p), q0); -} - -static void highbd_mb_lpf_horizontal_edge_w_sse2_16(uint16_t *s, - int p, - const uint8_t *_blimit, - const uint8_t *_limit, - const uint8_t *_thresh, - int bd) { - highbd_mb_lpf_horizontal_edge_w_sse2_8(s, p, _blimit, _limit, _thresh, bd); - highbd_mb_lpf_horizontal_edge_w_sse2_8(s + 8, p, _blimit, _limit, _thresh, - bd); -} - -// TODO(yunqingwang): remove count and call these 2 functions(8 or 16) directly. -void vp9_highbd_lpf_horizontal_16_sse2(uint16_t *s, int p, - const uint8_t *_blimit, - const uint8_t *_limit, - const uint8_t *_thresh, - int count, int bd) { - if (count == 1) - highbd_mb_lpf_horizontal_edge_w_sse2_8(s, p, _blimit, _limit, _thresh, bd); - else - highbd_mb_lpf_horizontal_edge_w_sse2_16(s, p, _blimit, _limit, _thresh, bd); -} - -void vp9_highbd_lpf_horizontal_8_sse2(uint16_t *s, int p, - const uint8_t *_blimit, - const uint8_t *_limit, - const uint8_t *_thresh, - int count, int bd) { - DECLARE_ALIGNED_ARRAY(16, uint16_t, flat_op2, 16); - DECLARE_ALIGNED_ARRAY(16, uint16_t, flat_op1, 16); - DECLARE_ALIGNED_ARRAY(16, uint16_t, flat_op0, 16); - DECLARE_ALIGNED_ARRAY(16, uint16_t, flat_oq2, 16); - DECLARE_ALIGNED_ARRAY(16, uint16_t, flat_oq1, 16); - DECLARE_ALIGNED_ARRAY(16, uint16_t, flat_oq0, 16); - const __m128i zero = _mm_set1_epi16(0); - const __m128i blimit = _mm_slli_epi16( - _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_blimit), zero), - bd - 8); - const __m128i limit = _mm_slli_epi16( - _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_limit), zero), - bd - 8); - const __m128i thresh = _mm_slli_epi16( - _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_thresh), zero), - bd - 8); - __m128i mask, hev, flat; - __m128i p3 = _mm_load_si128((__m128i *)(s - 4 * p)); - __m128i q3 = _mm_load_si128((__m128i *)(s + 3 * p)); - __m128i p2 = _mm_load_si128((__m128i *)(s - 3 * p)); - __m128i q2 = _mm_load_si128((__m128i *)(s + 2 * p)); - __m128i p1 = _mm_load_si128((__m128i *)(s - 2 * p)); - __m128i q1 = _mm_load_si128((__m128i *)(s + 1 * p)); - __m128i p0 = _mm_load_si128((__m128i *)(s - 1 * p)); - __m128i q0 = _mm_load_si128((__m128i *)(s + 0 * p)); - const __m128i one = _mm_set1_epi16(1); - const __m128i ffff = _mm_cmpeq_epi16(one, one); - __m128i abs_p1q1, abs_p0q0, abs_q1q0, abs_p1p0, work; - const __m128i four = _mm_set1_epi16(4); - __m128i workp_a, workp_b, workp_shft; - - const __m128i t4 = _mm_set1_epi16(4); - const __m128i t3 = _mm_set1_epi16(3); - const __m128i t80 = _mm_slli_epi16(_mm_set1_epi16(0x80), bd - 8); - const __m128i t1 = _mm_set1_epi16(0x1); - const __m128i ps1 = _mm_subs_epi16(p1, t80); - const __m128i ps0 = _mm_subs_epi16(p0, t80); - const __m128i qs0 = _mm_subs_epi16(q0, t80); - const __m128i qs1 = _mm_subs_epi16(q1, t80); - __m128i filt; - __m128i work_a; - __m128i filter1, filter2; - - (void)count; - - // filter_mask and hev_mask - abs_p1p0 = _mm_or_si128(_mm_subs_epu16(p1, p0), - _mm_subs_epu16(p0, p1)); - abs_q1q0 = _mm_or_si128(_mm_subs_epu16(q1, q0), - _mm_subs_epu16(q0, q1)); - - abs_p0q0 = _mm_or_si128(_mm_subs_epu16(p0, q0), - _mm_subs_epu16(q0, p0)); - abs_p1q1 = _mm_or_si128(_mm_subs_epu16(p1, q1), - _mm_subs_epu16(q1, p1)); - flat = _mm_max_epi16(abs_p1p0, abs_q1q0); - hev = _mm_subs_epu16(flat, thresh); - hev = _mm_xor_si128(_mm_cmpeq_epi16(hev, zero), ffff); - - abs_p0q0 =_mm_adds_epu16(abs_p0q0, abs_p0q0); - abs_p1q1 = _mm_srli_epi16(abs_p1q1, 1); - mask = _mm_subs_epu16(_mm_adds_epu16(abs_p0q0, abs_p1q1), blimit); - mask = _mm_xor_si128(_mm_cmpeq_epi16(mask, zero), ffff); - // mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1; - // So taking maximums continues to work: - mask = _mm_and_si128(mask, _mm_adds_epu16(limit, one)); - mask = _mm_max_epi16(abs_p1p0, mask); - // mask |= (abs(p1 - p0) > limit) * -1; - mask = _mm_max_epi16(abs_q1q0, mask); - // mask |= (abs(q1 - q0) > limit) * -1; - - work = _mm_max_epi16(_mm_or_si128(_mm_subs_epu16(p2, p1), - _mm_subs_epu16(p1, p2)), - _mm_or_si128(_mm_subs_epu16(q2, q1), - _mm_subs_epu16(q1, q2))); - mask = _mm_max_epi16(work, mask); - work = _mm_max_epi16(_mm_or_si128(_mm_subs_epu16(p3, p2), - _mm_subs_epu16(p2, p3)), - _mm_or_si128(_mm_subs_epu16(q3, q2), - _mm_subs_epu16(q2, q3))); - mask = _mm_max_epi16(work, mask); - mask = _mm_subs_epu16(mask, limit); - mask = _mm_cmpeq_epi16(mask, zero); - - // flat_mask4 - flat = _mm_max_epi16(_mm_or_si128(_mm_subs_epu16(p2, p0), - _mm_subs_epu16(p0, p2)), - _mm_or_si128(_mm_subs_epu16(q2, q0), - _mm_subs_epu16(q0, q2))); - work = _mm_max_epi16(_mm_or_si128(_mm_subs_epu16(p3, p0), - _mm_subs_epu16(p0, p3)), - _mm_or_si128(_mm_subs_epu16(q3, q0), - _mm_subs_epu16(q0, q3))); - flat = _mm_max_epi16(work, flat); - flat = _mm_max_epi16(abs_p1p0, flat); - flat = _mm_max_epi16(abs_q1q0, flat); - flat = _mm_subs_epu16(flat, _mm_slli_epi16(one, bd - 8)); - flat = _mm_cmpeq_epi16(flat, zero); - flat = _mm_and_si128(flat, mask); // flat & mask - - // Added before shift for rounding part of ROUND_POWER_OF_TWO - - workp_a = _mm_add_epi16(_mm_add_epi16(p3, p3), _mm_add_epi16(p2, p1)); - workp_a = _mm_add_epi16(_mm_add_epi16(workp_a, four), p0); - workp_b = _mm_add_epi16(_mm_add_epi16(q0, p2), p3); - workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); - _mm_store_si128((__m128i *)&flat_op2[0], workp_shft); - - workp_b = _mm_add_epi16(_mm_add_epi16(q0, q1), p1); - workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); - _mm_store_si128((__m128i *)&flat_op1[0], workp_shft); - - workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p3), q2); - workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, p1), p0); - workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); - _mm_store_si128((__m128i *)&flat_op0[0], workp_shft); - - workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p3), q3); - workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, p0), q0); - workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); - _mm_store_si128((__m128i *)&flat_oq0[0], workp_shft); - - workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p2), q3); - workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, q0), q1); - workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); - _mm_store_si128((__m128i *)&flat_oq1[0], workp_shft); - - workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p1), q3); - workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, q1), q2); - workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); - _mm_store_si128((__m128i *)&flat_oq2[0], workp_shft); - - // lp filter - filt = signed_char_clamp_bd_sse2(_mm_subs_epi16(ps1, qs1), bd); - filt = _mm_and_si128(filt, hev); - work_a = _mm_subs_epi16(qs0, ps0); - filt = _mm_adds_epi16(filt, work_a); - filt = _mm_adds_epi16(filt, work_a); - filt = _mm_adds_epi16(filt, work_a); - // (vp9_filter + 3 * (qs0 - ps0)) & mask - filt = signed_char_clamp_bd_sse2(filt, bd); - filt = _mm_and_si128(filt, mask); - - filter1 = _mm_adds_epi16(filt, t4); - filter2 = _mm_adds_epi16(filt, t3); - - // Filter1 >> 3 - filter1 = signed_char_clamp_bd_sse2(filter1, bd); - filter1 = _mm_srai_epi16(filter1, 3); - - // Filter2 >> 3 - filter2 = signed_char_clamp_bd_sse2(filter2, bd); - filter2 = _mm_srai_epi16(filter2, 3); - - // filt >> 1 - filt = _mm_adds_epi16(filter1, t1); - filt = _mm_srai_epi16(filt, 1); - // filter = ROUND_POWER_OF_TWO(filter1, 1) & ~hev; - filt = _mm_andnot_si128(hev, filt); - - work_a = signed_char_clamp_bd_sse2(_mm_subs_epi16(qs0, filter1), bd); - work_a = _mm_adds_epi16(work_a, t80); - q0 = _mm_load_si128((__m128i *)flat_oq0); - work_a = _mm_andnot_si128(flat, work_a); - q0 = _mm_and_si128(flat, q0); - q0 = _mm_or_si128(work_a, q0); - - work_a = signed_char_clamp_bd_sse2(_mm_subs_epi16(qs1, filt), bd); - work_a = _mm_adds_epi16(work_a, t80); - q1 = _mm_load_si128((__m128i *)flat_oq1); - work_a = _mm_andnot_si128(flat, work_a); - q1 = _mm_and_si128(flat, q1); - q1 = _mm_or_si128(work_a, q1); - - work_a = _mm_loadu_si128((__m128i *)(s + 2 * p)); - q2 = _mm_load_si128((__m128i *)flat_oq2); - work_a = _mm_andnot_si128(flat, work_a); - q2 = _mm_and_si128(flat, q2); - q2 = _mm_or_si128(work_a, q2); - - work_a = signed_char_clamp_bd_sse2(_mm_adds_epi16(ps0, filter2), bd); - work_a = _mm_adds_epi16(work_a, t80); - p0 = _mm_load_si128((__m128i *)flat_op0); - work_a = _mm_andnot_si128(flat, work_a); - p0 = _mm_and_si128(flat, p0); - p0 = _mm_or_si128(work_a, p0); - - work_a = signed_char_clamp_bd_sse2(_mm_adds_epi16(ps1, filt), bd); - work_a = _mm_adds_epi16(work_a, t80); - p1 = _mm_load_si128((__m128i *)flat_op1); - work_a = _mm_andnot_si128(flat, work_a); - p1 = _mm_and_si128(flat, p1); - p1 = _mm_or_si128(work_a, p1); - - work_a = _mm_loadu_si128((__m128i *)(s - 3 * p)); - p2 = _mm_load_si128((__m128i *)flat_op2); - work_a = _mm_andnot_si128(flat, work_a); - p2 = _mm_and_si128(flat, p2); - p2 = _mm_or_si128(work_a, p2); - - _mm_store_si128((__m128i *)(s - 3 * p), p2); - _mm_store_si128((__m128i *)(s - 2 * p), p1); - _mm_store_si128((__m128i *)(s - 1 * p), p0); - _mm_store_si128((__m128i *)(s + 0 * p), q0); - _mm_store_si128((__m128i *)(s + 1 * p), q1); - _mm_store_si128((__m128i *)(s + 2 * p), q2); -} - -void vp9_highbd_lpf_horizontal_8_dual_sse2(uint16_t *s, int p, - const uint8_t *_blimit0, - const uint8_t *_limit0, - const uint8_t *_thresh0, - const uint8_t *_blimit1, - const uint8_t *_limit1, - const uint8_t *_thresh1, - int bd) { - vp9_highbd_lpf_horizontal_8_sse2(s, p, _blimit0, _limit0, _thresh0, 1, bd); - vp9_highbd_lpf_horizontal_8_sse2(s + 8, p, _blimit1, _limit1, _thresh1, - 1, bd); -} - -void vp9_highbd_lpf_horizontal_4_sse2(uint16_t *s, int p, - const uint8_t *_blimit, - const uint8_t *_limit, - const uint8_t *_thresh, - int count, int bd) { - const __m128i zero = _mm_set1_epi16(0); - const __m128i blimit = _mm_slli_epi16( - _mm_unpacklo_epi8( - _mm_load_si128((const __m128i *)_blimit), zero), bd - 8); - const __m128i limit = _mm_slli_epi16( - _mm_unpacklo_epi8( - _mm_load_si128((const __m128i *)_limit), zero), bd - 8); - const __m128i thresh = _mm_slli_epi16( - _mm_unpacklo_epi8( - _mm_load_si128((const __m128i *)_thresh), zero), bd - 8); - __m128i mask, hev, flat; - __m128i p3 = _mm_loadu_si128((__m128i *)(s - 4 * p)); - __m128i p2 = _mm_loadu_si128((__m128i *)(s - 3 * p)); - __m128i p1 = _mm_loadu_si128((__m128i *)(s - 2 * p)); - __m128i p0 = _mm_loadu_si128((__m128i *)(s - 1 * p)); - __m128i q0 = _mm_loadu_si128((__m128i *)(s - 0 * p)); - __m128i q1 = _mm_loadu_si128((__m128i *)(s + 1 * p)); - __m128i q2 = _mm_loadu_si128((__m128i *)(s + 2 * p)); - __m128i q3 = _mm_loadu_si128((__m128i *)(s + 3 * p)); - const __m128i abs_p1p0 = _mm_or_si128(_mm_subs_epu16(p1, p0), - _mm_subs_epu16(p0, p1)); - const __m128i abs_q1q0 = _mm_or_si128(_mm_subs_epu16(q1, q0), - _mm_subs_epu16(q0, q1)); - const __m128i ffff = _mm_cmpeq_epi16(abs_p1p0, abs_p1p0); - const __m128i one = _mm_set1_epi16(1); - __m128i abs_p0q0 = _mm_or_si128(_mm_subs_epu16(p0, q0), - _mm_subs_epu16(q0, p0)); - __m128i abs_p1q1 = _mm_or_si128(_mm_subs_epu16(p1, q1), - _mm_subs_epu16(q1, p1)); - __m128i work; - const __m128i t4 = _mm_set1_epi16(4); - const __m128i t3 = _mm_set1_epi16(3); - const __m128i t80 = _mm_slli_epi16(_mm_set1_epi16(0x80), bd - 8); - const __m128i tff80 = _mm_slli_epi16(_mm_set1_epi16(0xff80), bd - 8); - const __m128i tffe0 = _mm_slli_epi16(_mm_set1_epi16(0xffe0), bd - 8); - const __m128i t1f = _mm_srli_epi16(_mm_set1_epi16(0x1fff), 16 - bd); - // equivalent to shifting 0x1f left by bitdepth - 8 - // and setting new bits to 1 - const __m128i t1 = _mm_set1_epi16(0x1); - const __m128i t7f = _mm_srli_epi16(_mm_set1_epi16(0x7fff), 16 - bd); - // equivalent to shifting 0x7f left by bitdepth - 8 - // and setting new bits to 1 - const __m128i ps1 = _mm_subs_epi16(_mm_loadu_si128((__m128i *)(s - 2 * p)), - t80); - const __m128i ps0 = _mm_subs_epi16(_mm_loadu_si128((__m128i *)(s - 1 * p)), - t80); - const __m128i qs0 = _mm_subs_epi16(_mm_loadu_si128((__m128i *)(s + 0 * p)), - t80); - const __m128i qs1 = _mm_subs_epi16(_mm_loadu_si128((__m128i *)(s + 1 * p)), - t80); - __m128i filt; - __m128i work_a; - __m128i filter1, filter2; - - (void)count; - - // filter_mask and hev_mask - flat = _mm_max_epi16(abs_p1p0, abs_q1q0); - hev = _mm_subs_epu16(flat, thresh); - hev = _mm_xor_si128(_mm_cmpeq_epi16(hev, zero), ffff); - - abs_p0q0 =_mm_adds_epu16(abs_p0q0, abs_p0q0); - abs_p1q1 = _mm_srli_epi16(abs_p1q1, 1); - mask = _mm_subs_epu16(_mm_adds_epu16(abs_p0q0, abs_p1q1), blimit); - mask = _mm_xor_si128(_mm_cmpeq_epi16(mask, zero), ffff); - // mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1; - // So taking maximums continues to work: - mask = _mm_and_si128(mask, _mm_adds_epu16(limit, one)); - mask = _mm_max_epi16(flat, mask); - // mask |= (abs(p1 - p0) > limit) * -1; - // mask |= (abs(q1 - q0) > limit) * -1; - work = _mm_max_epi16(_mm_or_si128(_mm_subs_epu16(p2, p1), - _mm_subs_epu16(p1, p2)), - _mm_or_si128(_mm_subs_epu16(p3, p2), - _mm_subs_epu16(p2, p3))); - mask = _mm_max_epi16(work, mask); - work = _mm_max_epi16(_mm_or_si128(_mm_subs_epu16(q2, q1), - _mm_subs_epu16(q1, q2)), - _mm_or_si128(_mm_subs_epu16(q3, q2), - _mm_subs_epu16(q2, q3))); - mask = _mm_max_epi16(work, mask); - mask = _mm_subs_epu16(mask, limit); - mask = _mm_cmpeq_epi16(mask, zero); - - // filter4 - filt = signed_char_clamp_bd_sse2(_mm_subs_epi16(ps1, qs1), bd); - filt = _mm_and_si128(filt, hev); - work_a = _mm_subs_epi16(qs0, ps0); - filt = _mm_adds_epi16(filt, work_a); - filt = _mm_adds_epi16(filt, work_a); - filt = signed_char_clamp_bd_sse2(_mm_adds_epi16(filt, work_a), bd); - // (vp9_filter + 3 * (qs0 - ps0)) & mask - filt = _mm_and_si128(filt, mask); - - filter1 = signed_char_clamp_bd_sse2(_mm_adds_epi16(filt, t4), bd); - filter2 = signed_char_clamp_bd_sse2(_mm_adds_epi16(filt, t3), bd); - - // Filter1 >> 3 - work_a = _mm_cmpgt_epi16(zero, filter1); // get the values that are <0 - filter1 = _mm_srli_epi16(filter1, 3); - work_a = _mm_and_si128(work_a, tffe0); // sign bits for the values < 0 - filter1 = _mm_and_si128(filter1, t1f); // clamp the range - filter1 = _mm_or_si128(filter1, work_a); // reinsert the sign bits - - // Filter2 >> 3 - work_a = _mm_cmpgt_epi16(zero, filter2); - filter2 = _mm_srli_epi16(filter2, 3); - work_a = _mm_and_si128(work_a, tffe0); - filter2 = _mm_and_si128(filter2, t1f); - filter2 = _mm_or_si128(filter2, work_a); - - // filt >> 1 - filt = _mm_adds_epi16(filter1, t1); - work_a = _mm_cmpgt_epi16(zero, filt); - filt = _mm_srli_epi16(filt, 1); - work_a = _mm_and_si128(work_a, tff80); - filt = _mm_and_si128(filt, t7f); - filt = _mm_or_si128(filt, work_a); - - filt = _mm_andnot_si128(hev, filt); - - q0 = _mm_adds_epi16( - signed_char_clamp_bd_sse2(_mm_subs_epi16(qs0, filter1), bd), t80); - q1 = _mm_adds_epi16( - signed_char_clamp_bd_sse2(_mm_subs_epi16(qs1, filt), bd), t80); - p0 = _mm_adds_epi16( - signed_char_clamp_bd_sse2(_mm_adds_epi16(ps0, filter2), bd), t80); - p1 = _mm_adds_epi16( - signed_char_clamp_bd_sse2(_mm_adds_epi16(ps1, filt), bd), t80); - - _mm_storeu_si128((__m128i *)(s - 2 * p), p1); - _mm_storeu_si128((__m128i *)(s - 1 * p), p0); - _mm_storeu_si128((__m128i *)(s + 0 * p), q0); - _mm_storeu_si128((__m128i *)(s + 1 * p), q1); -} - -void vp9_highbd_lpf_horizontal_4_dual_sse2(uint16_t *s, int p, - const uint8_t *_blimit0, - const uint8_t *_limit0, - const uint8_t *_thresh0, - const uint8_t *_blimit1, - const uint8_t *_limit1, - const uint8_t *_thresh1, - int bd) { - vp9_highbd_lpf_horizontal_4_sse2(s, p, _blimit0, _limit0, _thresh0, 1, bd); - vp9_highbd_lpf_horizontal_4_sse2(s + 8, p, _blimit1, _limit1, _thresh1, 1, - bd); -} - -static INLINE void highbd_transpose(uint16_t *src[], int in_p, - uint16_t *dst[], int out_p, - int num_8x8_to_transpose) { - int idx8x8 = 0; - __m128i p0, p1, p2, p3, p4, p5, p6, p7, x0, x1, x2, x3, x4, x5, x6, x7; - do { - uint16_t *in = src[idx8x8]; - uint16_t *out = dst[idx8x8]; - - p0 = _mm_loadu_si128((__m128i *)(in + 0*in_p)); // 00 01 02 03 04 05 06 07 - p1 = _mm_loadu_si128((__m128i *)(in + 1*in_p)); // 10 11 12 13 14 15 16 17 - p2 = _mm_loadu_si128((__m128i *)(in + 2*in_p)); // 20 21 22 23 24 25 26 27 - p3 = _mm_loadu_si128((__m128i *)(in + 3*in_p)); // 30 31 32 33 34 35 36 37 - p4 = _mm_loadu_si128((__m128i *)(in + 4*in_p)); // 40 41 42 43 44 45 46 47 - p5 = _mm_loadu_si128((__m128i *)(in + 5*in_p)); // 50 51 52 53 54 55 56 57 - p6 = _mm_loadu_si128((__m128i *)(in + 6*in_p)); // 60 61 62 63 64 65 66 67 - p7 = _mm_loadu_si128((__m128i *)(in + 7*in_p)); // 70 71 72 73 74 75 76 77 - // 00 10 01 11 02 12 03 13 - x0 = _mm_unpacklo_epi16(p0, p1); - // 20 30 21 31 22 32 23 33 - x1 = _mm_unpacklo_epi16(p2, p3); - // 40 50 41 51 42 52 43 53 - x2 = _mm_unpacklo_epi16(p4, p5); - // 60 70 61 71 62 72 63 73 - x3 = _mm_unpacklo_epi16(p6, p7); - // 00 10 20 30 01 11 21 31 - x4 = _mm_unpacklo_epi32(x0, x1); - // 40 50 60 70 41 51 61 71 - x5 = _mm_unpacklo_epi32(x2, x3); - // 00 10 20 30 40 50 60 70 - x6 = _mm_unpacklo_epi64(x4, x5); - // 01 11 21 31 41 51 61 71 - x7 = _mm_unpackhi_epi64(x4, x5); - - _mm_storeu_si128((__m128i *)(out + 0*out_p), x6); - // 00 10 20 30 40 50 60 70 - _mm_storeu_si128((__m128i *)(out + 1*out_p), x7); - // 01 11 21 31 41 51 61 71 - - // 02 12 22 32 03 13 23 33 - x4 = _mm_unpackhi_epi32(x0, x1); - // 42 52 62 72 43 53 63 73 - x5 = _mm_unpackhi_epi32(x2, x3); - // 02 12 22 32 42 52 62 72 - x6 = _mm_unpacklo_epi64(x4, x5); - // 03 13 23 33 43 53 63 73 - x7 = _mm_unpackhi_epi64(x4, x5); - - _mm_storeu_si128((__m128i *)(out + 2*out_p), x6); - // 02 12 22 32 42 52 62 72 - _mm_storeu_si128((__m128i *)(out + 3*out_p), x7); - // 03 13 23 33 43 53 63 73 - - // 04 14 05 15 06 16 07 17 - x0 = _mm_unpackhi_epi16(p0, p1); - // 24 34 25 35 26 36 27 37 - x1 = _mm_unpackhi_epi16(p2, p3); - // 44 54 45 55 46 56 47 57 - x2 = _mm_unpackhi_epi16(p4, p5); - // 64 74 65 75 66 76 67 77 - x3 = _mm_unpackhi_epi16(p6, p7); - // 04 14 24 34 05 15 25 35 - x4 = _mm_unpacklo_epi32(x0, x1); - // 44 54 64 74 45 55 65 75 - x5 = _mm_unpacklo_epi32(x2, x3); - // 04 14 24 34 44 54 64 74 - x6 = _mm_unpacklo_epi64(x4, x5); - // 05 15 25 35 45 55 65 75 - x7 = _mm_unpackhi_epi64(x4, x5); - - _mm_storeu_si128((__m128i *)(out + 4*out_p), x6); - // 04 14 24 34 44 54 64 74 - _mm_storeu_si128((__m128i *)(out + 5*out_p), x7); - // 05 15 25 35 45 55 65 75 - - // 06 16 26 36 07 17 27 37 - x4 = _mm_unpackhi_epi32(x0, x1); - // 46 56 66 76 47 57 67 77 - x5 = _mm_unpackhi_epi32(x2, x3); - // 06 16 26 36 46 56 66 76 - x6 = _mm_unpacklo_epi64(x4, x5); - // 07 17 27 37 47 57 67 77 - x7 = _mm_unpackhi_epi64(x4, x5); - - _mm_storeu_si128((__m128i *)(out + 6*out_p), x6); - // 06 16 26 36 46 56 66 76 - _mm_storeu_si128((__m128i *)(out + 7*out_p), x7); - // 07 17 27 37 47 57 67 77 - } while (++idx8x8 < num_8x8_to_transpose); -} - -static INLINE void highbd_transpose8x16(uint16_t *in0, uint16_t *in1, - int in_p, uint16_t *out, int out_p) { - uint16_t *src0[1]; - uint16_t *src1[1]; - uint16_t *dest0[1]; - uint16_t *dest1[1]; - src0[0] = in0; - src1[0] = in1; - dest0[0] = out; - dest1[0] = out + 8; - highbd_transpose(src0, in_p, dest0, out_p, 1); - highbd_transpose(src1, in_p, dest1, out_p, 1); -} - -void vp9_highbd_lpf_vertical_4_sse2(uint16_t *s, int p, - const uint8_t *blimit, - const uint8_t *limit, - const uint8_t *thresh, - int count, int bd) { - DECLARE_ALIGNED_ARRAY(16, uint16_t, t_dst, 8 * 8); - uint16_t *src[1]; - uint16_t *dst[1]; - (void)count; - - // Transpose 8x8 - src[0] = s - 4; - dst[0] = t_dst; - - highbd_transpose(src, p, dst, 8, 1); - - // Loop filtering - vp9_highbd_lpf_horizontal_4_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh, 1, - bd); - - src[0] = t_dst; - dst[0] = s - 4; - - // Transpose back - highbd_transpose(src, 8, dst, p, 1); -} - -void vp9_highbd_lpf_vertical_4_dual_sse2(uint16_t *s, int p, - const uint8_t *blimit0, - const uint8_t *limit0, - const uint8_t *thresh0, - const uint8_t *blimit1, - const uint8_t *limit1, - const uint8_t *thresh1, - int bd) { - DECLARE_ALIGNED_ARRAY(16, uint16_t, t_dst, 16 * 8); - uint16_t *src[2]; - uint16_t *dst[2]; - - // Transpose 8x16 - highbd_transpose8x16(s - 4, s - 4 + p * 8, p, t_dst, 16); - - // Loop filtering - vp9_highbd_lpf_horizontal_4_dual_sse2(t_dst + 4 * 16, 16, blimit0, limit0, - thresh0, blimit1, limit1, thresh1, bd); - src[0] = t_dst; - src[1] = t_dst + 8; - dst[0] = s - 4; - dst[1] = s - 4 + p * 8; - - // Transpose back - highbd_transpose(src, 16, dst, p, 2); -} - -void vp9_highbd_lpf_vertical_8_sse2(uint16_t *s, int p, - const uint8_t *blimit, - const uint8_t *limit, - const uint8_t *thresh, - int count, int bd) { - DECLARE_ALIGNED_ARRAY(16, uint16_t, t_dst, 8 * 8); - uint16_t *src[1]; - uint16_t *dst[1]; - (void)count; - - // Transpose 8x8 - src[0] = s - 4; - dst[0] = t_dst; - - highbd_transpose(src, p, dst, 8, 1); - - // Loop filtering - vp9_highbd_lpf_horizontal_8_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh, 1, - bd); - - src[0] = t_dst; - dst[0] = s - 4; - - // Transpose back - highbd_transpose(src, 8, dst, p, 1); -} - -void vp9_highbd_lpf_vertical_8_dual_sse2(uint16_t *s, int p, - const uint8_t *blimit0, - const uint8_t *limit0, - const uint8_t *thresh0, - const uint8_t *blimit1, - const uint8_t *limit1, - const uint8_t *thresh1, - int bd) { - DECLARE_ALIGNED_ARRAY(16, uint16_t, t_dst, 16 * 8); - uint16_t *src[2]; - uint16_t *dst[2]; - - // Transpose 8x16 - highbd_transpose8x16(s - 4, s - 4 + p * 8, p, t_dst, 16); - - // Loop filtering - vp9_highbd_lpf_horizontal_8_dual_sse2(t_dst + 4 * 16, 16, blimit0, limit0, - thresh0, blimit1, limit1, thresh1, bd); - src[0] = t_dst; - src[1] = t_dst + 8; - - dst[0] = s - 4; - dst[1] = s - 4 + p * 8; - - // Transpose back - highbd_transpose(src, 16, dst, p, 2); -} - -void vp9_highbd_lpf_vertical_16_sse2(uint16_t *s, int p, - const uint8_t *blimit, - const uint8_t *limit, - const uint8_t *thresh, - int bd) { - DECLARE_ALIGNED_ARRAY(16, uint16_t, t_dst, 8 * 16); - uint16_t *src[2]; - uint16_t *dst[2]; - - src[0] = s - 8; - src[1] = s; - dst[0] = t_dst; - dst[1] = t_dst + 8 * 8; - - // Transpose 16x8 - highbd_transpose(src, p, dst, 8, 2); - - // Loop filtering - highbd_mb_lpf_horizontal_edge_w_sse2_8(t_dst + 8 * 8, 8, blimit, limit, - thresh, bd); - src[0] = t_dst; - src[1] = t_dst + 8 * 8; - dst[0] = s - 8; - dst[1] = s; - - // Transpose back - highbd_transpose(src, 8, dst, p, 2); -} - -void vp9_highbd_lpf_vertical_16_dual_sse2(uint16_t *s, - int p, - const uint8_t *blimit, - const uint8_t *limit, - const uint8_t *thresh, - int bd) { - DECLARE_ALIGNED_ARRAY(16, uint16_t, t_dst, 256); - - // Transpose 16x16 - highbd_transpose8x16(s - 8, s - 8 + 8 * p, p, t_dst, 16); - highbd_transpose8x16(s, s + 8 * p, p, t_dst + 8 * 16, 16); - - // Loop filtering - highbd_mb_lpf_horizontal_edge_w_sse2_16(t_dst + 8 * 16, 16, blimit, limit, - thresh, bd); - - // Transpose back - highbd_transpose8x16(t_dst, t_dst + 8 * 16, 16, s - 8, p); - highbd_transpose8x16(t_dst + 8, t_dst + 8 + 8 * 16, 16, s - 8 + 8 * p, p); -} diff --git a/media/libvpx/vp9/common/x86/vp9_high_subpixel_8t_sse2.asm b/media/libvpx/vp9/common/x86/vp9_high_subpixel_8t_sse2.asm deleted file mode 100644 index 4bdbb83f4af..00000000000 --- a/media/libvpx/vp9/common/x86/vp9_high_subpixel_8t_sse2.asm +++ /dev/null @@ -1,962 +0,0 @@ -; -; Copyright (c) 2014 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" - -;Note: tap3 and tap4 have to be applied and added after other taps to avoid -;overflow. - -%macro HIGH_GET_FILTERS_4 0 - mov rdx, arg(5) ;filter ptr - mov rcx, 0x00000040 - - movdqa xmm7, [rdx] ;load filters - pshuflw xmm0, xmm7, 0b ;k0 - pshuflw xmm1, xmm7, 01010101b ;k1 - pshuflw xmm2, xmm7, 10101010b ;k2 - pshuflw xmm3, xmm7, 11111111b ;k3 - psrldq xmm7, 8 - pshuflw xmm4, xmm7, 0b ;k4 - pshuflw xmm5, xmm7, 01010101b ;k5 - pshuflw xmm6, xmm7, 10101010b ;k6 - pshuflw xmm7, xmm7, 11111111b ;k7 - - punpcklwd xmm0, xmm6 - punpcklwd xmm2, xmm5 - punpcklwd xmm3, xmm4 - punpcklwd xmm1, xmm7 - - movdqa k0k6, xmm0 - movdqa k2k5, xmm2 - movdqa k3k4, xmm3 - movdqa k1k7, xmm1 - - movq xmm6, rcx - pshufd xmm6, xmm6, 0 - movdqa krd, xmm6 - - ;Compute max and min values of a pixel - mov rdx, 0x00010001 - movsxd rcx, DWORD PTR arg(6) ;bps - movq xmm0, rdx - movq xmm1, rcx - pshufd xmm0, xmm0, 0b - movdqa xmm2, xmm0 - psllw xmm0, xmm1 - psubw xmm0, xmm2 - pxor xmm1, xmm1 - movdqa max, xmm0 ;max value (for clamping) - movdqa min, xmm1 ;min value (for clamping) - -%endm - -%macro HIGH_APPLY_FILTER_4 1 - punpcklwd xmm0, xmm6 ;two row in one register - punpcklwd xmm1, xmm7 - punpcklwd xmm2, xmm5 - punpcklwd xmm3, xmm4 - - pmaddwd xmm0, k0k6 ;multiply the filter factors - pmaddwd xmm1, k1k7 - pmaddwd xmm2, k2k5 - pmaddwd xmm3, k3k4 - - paddd xmm0, xmm1 ;sum - paddd xmm0, xmm2 - paddd xmm0, xmm3 - - paddd xmm0, krd ;rounding - psrad xmm0, 7 ;shift - packssdw xmm0, xmm0 ;pack to word - - ;clamp the values - pminsw xmm0, max - pmaxsw xmm0, min - -%if %1 - movq xmm1, [rdi] - pavgw xmm0, xmm1 -%endif - movq [rdi], xmm0 -%endm - -%macro HIGH_GET_FILTERS 0 - mov rdx, arg(5) ;filter ptr - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;output_ptr - mov rcx, 0x00000040 - - movdqa xmm7, [rdx] ;load filters - pshuflw xmm0, xmm7, 0b ;k0 - pshuflw xmm1, xmm7, 01010101b ;k1 - pshuflw xmm2, xmm7, 10101010b ;k2 - pshuflw xmm3, xmm7, 11111111b ;k3 - pshufhw xmm4, xmm7, 0b ;k4 - pshufhw xmm5, xmm7, 01010101b ;k5 - pshufhw xmm6, xmm7, 10101010b ;k6 - pshufhw xmm7, xmm7, 11111111b ;k7 - punpcklqdq xmm2, xmm2 - punpcklqdq xmm3, xmm3 - punpcklwd xmm0, xmm1 - punpckhwd xmm6, xmm7 - punpckhwd xmm2, xmm5 - punpckhwd xmm3, xmm4 - - movdqa k0k1, xmm0 ;store filter factors on stack - movdqa k6k7, xmm6 - movdqa k2k5, xmm2 - movdqa k3k4, xmm3 - - movq xmm6, rcx - pshufd xmm6, xmm6, 0 - movdqa krd, xmm6 ;rounding - - ;Compute max and min values of a pixel - mov rdx, 0x00010001 - movsxd rcx, DWORD PTR arg(6) ;bps - movq xmm0, rdx - movq xmm1, rcx - pshufd xmm0, xmm0, 0b - movdqa xmm2, xmm0 - psllw xmm0, xmm1 - psubw xmm0, xmm2 - pxor xmm1, xmm1 - movdqa max, xmm0 ;max value (for clamping) - movdqa min, xmm1 ;min value (for clamping) -%endm - -%macro LOAD_VERT_8 1 - movdqu xmm0, [rsi + %1] ;0 - movdqu xmm1, [rsi + rax + %1] ;1 - movdqu xmm6, [rsi + rdx * 2 + %1] ;6 - lea rsi, [rsi + rax] - movdqu xmm7, [rsi + rdx * 2 + %1] ;7 - movdqu xmm2, [rsi + rax + %1] ;2 - movdqu xmm3, [rsi + rax * 2 + %1] ;3 - movdqu xmm4, [rsi + rdx + %1] ;4 - movdqu xmm5, [rsi + rax * 4 + %1] ;5 -%endm - -%macro HIGH_APPLY_FILTER_8 2 - movdqu temp, xmm4 - movdqa xmm4, xmm0 - punpcklwd xmm0, xmm1 - punpckhwd xmm4, xmm1 - movdqa xmm1, xmm6 - punpcklwd xmm6, xmm7 - punpckhwd xmm1, xmm7 - movdqa xmm7, xmm2 - punpcklwd xmm2, xmm5 - punpckhwd xmm7, xmm5 - - movdqu xmm5, temp - movdqu temp, xmm4 - movdqa xmm4, xmm3 - punpcklwd xmm3, xmm5 - punpckhwd xmm4, xmm5 - movdqu xmm5, temp - - pmaddwd xmm0, k0k1 - pmaddwd xmm5, k0k1 - pmaddwd xmm6, k6k7 - pmaddwd xmm1, k6k7 - pmaddwd xmm2, k2k5 - pmaddwd xmm7, k2k5 - pmaddwd xmm3, k3k4 - pmaddwd xmm4, k3k4 - - paddd xmm0, xmm6 - paddd xmm0, xmm2 - paddd xmm0, xmm3 - paddd xmm5, xmm1 - paddd xmm5, xmm7 - paddd xmm5, xmm4 - - paddd xmm0, krd ;rounding - paddd xmm5, krd - psrad xmm0, 7 ;shift - psrad xmm5, 7 - packssdw xmm0, xmm5 ;pack back to word - - ;clamp the values - pminsw xmm0, max - pmaxsw xmm0, min - -%if %1 - movdqu xmm1, [rdi + %2] - pavgw xmm0, xmm1 -%endif - movdqu [rdi + %2], xmm0 -%endm - -;void vp9_filter_block1d4_v8_sse2 -;( -; unsigned char *src_ptr, -; unsigned int src_pitch, -; unsigned char *output_ptr, -; unsigned int out_pitch, -; unsigned int output_height, -; short *filter -;) -global sym(vp9_high_filter_block1d4_v8_sse2) PRIVATE -sym(vp9_high_filter_block1d4_v8_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 7 - SAVE_XMM 7 - push rsi - push rdi - push rbx - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 16 * 7 - %define k0k6 [rsp + 16 * 0] - %define k2k5 [rsp + 16 * 1] - %define k3k4 [rsp + 16 * 2] - %define k1k7 [rsp + 16 * 3] - %define krd [rsp + 16 * 4] - %define max [rsp + 16 * 5] - %define min [rsp + 16 * 6] - - HIGH_GET_FILTERS_4 - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;output_ptr - - movsxd rax, DWORD PTR arg(1) ;pixels_per_line - movsxd rbx, DWORD PTR arg(3) ;out_pitch - lea rax, [rax + rax] ;bytes per line - lea rbx, [rbx + rbx] - lea rdx, [rax + rax * 2] - movsxd rcx, DWORD PTR arg(4) ;output_height - -.loop: - movq xmm0, [rsi] ;load src: row 0 - movq xmm1, [rsi + rax] ;1 - movq xmm6, [rsi + rdx * 2] ;6 - lea rsi, [rsi + rax] - movq xmm7, [rsi + rdx * 2] ;7 - movq xmm2, [rsi + rax] ;2 - movq xmm3, [rsi + rax * 2] ;3 - movq xmm4, [rsi + rdx] ;4 - movq xmm5, [rsi + rax * 4] ;5 - - HIGH_APPLY_FILTER_4 0 - - lea rdi, [rdi + rbx] - dec rcx - jnz .loop - - add rsp, 16 * 7 - pop rsp - pop rbx - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -;void vp9_filter_block1d8_v8_sse2 -;( -; unsigned char *src_ptr, -; unsigned int src_pitch, -; unsigned char *output_ptr, -; unsigned int out_pitch, -; unsigned int output_height, -; short *filter -;) -global sym(vp9_high_filter_block1d8_v8_sse2) PRIVATE -sym(vp9_high_filter_block1d8_v8_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 7 - SAVE_XMM 7 - push rsi - push rdi - push rbx - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 16 * 8 - %define k0k1 [rsp + 16 * 0] - %define k6k7 [rsp + 16 * 1] - %define k2k5 [rsp + 16 * 2] - %define k3k4 [rsp + 16 * 3] - %define krd [rsp + 16 * 4] - %define temp [rsp + 16 * 5] - %define max [rsp + 16 * 6] - %define min [rsp + 16 * 7] - - HIGH_GET_FILTERS - - movsxd rax, DWORD PTR arg(1) ;pixels_per_line - movsxd rbx, DWORD PTR arg(3) ;out_pitch - lea rax, [rax + rax] ;bytes per line - lea rbx, [rbx + rbx] - lea rdx, [rax + rax * 2] - movsxd rcx, DWORD PTR arg(4) ;output_height - -.loop: - LOAD_VERT_8 0 - HIGH_APPLY_FILTER_8 0, 0 - - lea rdi, [rdi + rbx] - dec rcx - jnz .loop - - add rsp, 16 * 8 - pop rsp - pop rbx - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -;void vp9_filter_block1d16_v8_sse2 -;( -; unsigned char *src_ptr, -; unsigned int src_pitch, -; unsigned char *output_ptr, -; unsigned int out_pitch, -; unsigned int output_height, -; short *filter -;) -global sym(vp9_high_filter_block1d16_v8_sse2) PRIVATE -sym(vp9_high_filter_block1d16_v8_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 7 - SAVE_XMM 7 - push rsi - push rdi - push rbx - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 16 * 8 - %define k0k1 [rsp + 16 * 0] - %define k6k7 [rsp + 16 * 1] - %define k2k5 [rsp + 16 * 2] - %define k3k4 [rsp + 16 * 3] - %define krd [rsp + 16 * 4] - %define temp [rsp + 16 * 5] - %define max [rsp + 16 * 6] - %define min [rsp + 16 * 7] - - HIGH_GET_FILTERS - - movsxd rax, DWORD PTR arg(1) ;pixels_per_line - movsxd rbx, DWORD PTR arg(3) ;out_pitch - lea rax, [rax + rax] ;bytes per line - lea rbx, [rbx + rbx] - lea rdx, [rax + rax * 2] - movsxd rcx, DWORD PTR arg(4) ;output_height - -.loop: - LOAD_VERT_8 0 - HIGH_APPLY_FILTER_8 0, 0 - sub rsi, rax - - LOAD_VERT_8 16 - HIGH_APPLY_FILTER_8 0, 16 - add rdi, rbx - - dec rcx - jnz .loop - - add rsp, 16 * 8 - pop rsp - pop rbx - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -global sym(vp9_high_filter_block1d4_v8_avg_sse2) PRIVATE -sym(vp9_high_filter_block1d4_v8_avg_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 7 - SAVE_XMM 7 - push rsi - push rdi - push rbx - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 16 * 7 - %define k0k6 [rsp + 16 * 0] - %define k2k5 [rsp + 16 * 1] - %define k3k4 [rsp + 16 * 2] - %define k1k7 [rsp + 16 * 3] - %define krd [rsp + 16 * 4] - %define max [rsp + 16 * 5] - %define min [rsp + 16 * 6] - - HIGH_GET_FILTERS_4 - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;output_ptr - - movsxd rax, DWORD PTR arg(1) ;pixels_per_line - movsxd rbx, DWORD PTR arg(3) ;out_pitch - lea rax, [rax + rax] ;bytes per line - lea rbx, [rbx + rbx] - lea rdx, [rax + rax * 2] - movsxd rcx, DWORD PTR arg(4) ;output_height - -.loop: - movq xmm0, [rsi] ;load src: row 0 - movq xmm1, [rsi + rax] ;1 - movq xmm6, [rsi + rdx * 2] ;6 - lea rsi, [rsi + rax] - movq xmm7, [rsi + rdx * 2] ;7 - movq xmm2, [rsi + rax] ;2 - movq xmm3, [rsi + rax * 2] ;3 - movq xmm4, [rsi + rdx] ;4 - movq xmm5, [rsi + rax * 4] ;5 - - HIGH_APPLY_FILTER_4 1 - - lea rdi, [rdi + rbx] - dec rcx - jnz .loop - - add rsp, 16 * 7 - pop rsp - pop rbx - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -global sym(vp9_high_filter_block1d8_v8_avg_sse2) PRIVATE -sym(vp9_high_filter_block1d8_v8_avg_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 7 - SAVE_XMM 7 - push rsi - push rdi - push rbx - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 16 * 8 - %define k0k1 [rsp + 16 * 0] - %define k6k7 [rsp + 16 * 1] - %define k2k5 [rsp + 16 * 2] - %define k3k4 [rsp + 16 * 3] - %define krd [rsp + 16 * 4] - %define temp [rsp + 16 * 5] - %define max [rsp + 16 * 6] - %define min [rsp + 16 * 7] - - HIGH_GET_FILTERS - - movsxd rax, DWORD PTR arg(1) ;pixels_per_line - movsxd rbx, DWORD PTR arg(3) ;out_pitch - lea rax, [rax + rax] ;bytes per line - lea rbx, [rbx + rbx] - lea rdx, [rax + rax * 2] - movsxd rcx, DWORD PTR arg(4) ;output_height -.loop: - LOAD_VERT_8 0 - HIGH_APPLY_FILTER_8 1, 0 - - lea rdi, [rdi + rbx] - dec rcx - jnz .loop - - add rsp, 16 * 8 - pop rsp - pop rbx - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -global sym(vp9_high_filter_block1d16_v8_avg_sse2) PRIVATE -sym(vp9_high_filter_block1d16_v8_avg_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 7 - SAVE_XMM 7 - push rsi - push rdi - push rbx - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 16 * 8 - %define k0k1 [rsp + 16 * 0] - %define k6k7 [rsp + 16 * 1] - %define k2k5 [rsp + 16 * 2] - %define k3k4 [rsp + 16 * 3] - %define krd [rsp + 16 * 4] - %define temp [rsp + 16 * 5] - %define max [rsp + 16 * 6] - %define min [rsp + 16 * 7] - - HIGH_GET_FILTERS - - movsxd rax, DWORD PTR arg(1) ;pixels_per_line - movsxd rbx, DWORD PTR arg(3) ;out_pitch - lea rax, [rax + rax] ;bytes per line - lea rbx, [rbx + rbx] - lea rdx, [rax + rax * 2] - movsxd rcx, DWORD PTR arg(4) ;output_height -.loop: - LOAD_VERT_8 0 - HIGH_APPLY_FILTER_8 1, 0 - sub rsi, rax - - LOAD_VERT_8 16 - HIGH_APPLY_FILTER_8 1, 16 - add rdi, rbx - - dec rcx - jnz .loop - - add rsp, 16 * 8 - pop rsp - pop rbx - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -;void vp9_filter_block1d4_h8_sse2 -;( -; unsigned char *src_ptr, -; unsigned int src_pixels_per_line, -; unsigned char *output_ptr, -; unsigned int output_pitch, -; unsigned int output_height, -; short *filter -;) -global sym(vp9_high_filter_block1d4_h8_sse2) PRIVATE -sym(vp9_high_filter_block1d4_h8_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 7 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 16 * 7 - %define k0k6 [rsp + 16 * 0] - %define k2k5 [rsp + 16 * 1] - %define k3k4 [rsp + 16 * 2] - %define k1k7 [rsp + 16 * 3] - %define krd [rsp + 16 * 4] - %define max [rsp + 16 * 5] - %define min [rsp + 16 * 6] - - HIGH_GET_FILTERS_4 - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;output_ptr - - movsxd rax, DWORD PTR arg(1) ;pixels_per_line - movsxd rdx, DWORD PTR arg(3) ;out_pitch - lea rax, [rax + rax] ;bytes per line - lea rdx, [rdx + rdx] - movsxd rcx, DWORD PTR arg(4) ;output_height - -.loop: - movdqu xmm0, [rsi - 6] ;load src - movdqu xmm4, [rsi + 2] - movdqa xmm1, xmm0 - movdqa xmm6, xmm4 - movdqa xmm7, xmm4 - movdqa xmm2, xmm0 - movdqa xmm3, xmm0 - movdqa xmm5, xmm4 - - psrldq xmm1, 2 - psrldq xmm6, 4 - psrldq xmm7, 6 - psrldq xmm2, 4 - psrldq xmm3, 6 - psrldq xmm5, 2 - - HIGH_APPLY_FILTER_4 0 - - lea rsi, [rsi + rax] - lea rdi, [rdi + rdx] - dec rcx - jnz .loop - - add rsp, 16 * 7 - pop rsp - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -;void vp9_filter_block1d8_h8_sse2 -;( -; unsigned char *src_ptr, -; unsigned int src_pixels_per_line, -; unsigned char *output_ptr, -; unsigned int output_pitch, -; unsigned int output_height, -; short *filter -;) -global sym(vp9_high_filter_block1d8_h8_sse2) PRIVATE -sym(vp9_high_filter_block1d8_h8_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 7 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 16 * 8 - %define k0k1 [rsp + 16 * 0] - %define k6k7 [rsp + 16 * 1] - %define k2k5 [rsp + 16 * 2] - %define k3k4 [rsp + 16 * 3] - %define krd [rsp + 16 * 4] - %define temp [rsp + 16 * 5] - %define max [rsp + 16 * 6] - %define min [rsp + 16 * 7] - - HIGH_GET_FILTERS - - movsxd rax, DWORD PTR arg(1) ;pixels_per_line - movsxd rdx, DWORD PTR arg(3) ;out_pitch - lea rax, [rax + rax] ;bytes per line - lea rdx, [rdx + rdx] - movsxd rcx, DWORD PTR arg(4) ;output_height - -.loop: - movdqu xmm0, [rsi - 6] ;load src - movdqu xmm1, [rsi - 4] - movdqu xmm2, [rsi - 2] - movdqu xmm3, [rsi] - movdqu xmm4, [rsi + 2] - movdqu xmm5, [rsi + 4] - movdqu xmm6, [rsi + 6] - movdqu xmm7, [rsi + 8] - - HIGH_APPLY_FILTER_8 0, 0 - - lea rsi, [rsi + rax] - lea rdi, [rdi + rdx] - dec rcx - jnz .loop - - add rsp, 16 * 8 - pop rsp - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -;void vp9_filter_block1d16_h8_sse2 -;( -; unsigned char *src_ptr, -; unsigned int src_pixels_per_line, -; unsigned char *output_ptr, -; unsigned int output_pitch, -; unsigned int output_height, -; short *filter -;) -global sym(vp9_high_filter_block1d16_h8_sse2) PRIVATE -sym(vp9_high_filter_block1d16_h8_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 7 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 16 * 8 - %define k0k1 [rsp + 16 * 0] - %define k6k7 [rsp + 16 * 1] - %define k2k5 [rsp + 16 * 2] - %define k3k4 [rsp + 16 * 3] - %define krd [rsp + 16 * 4] - %define temp [rsp + 16 * 5] - %define max [rsp + 16 * 6] - %define min [rsp + 16 * 7] - - HIGH_GET_FILTERS - - movsxd rax, DWORD PTR arg(1) ;pixels_per_line - movsxd rdx, DWORD PTR arg(3) ;out_pitch - lea rax, [rax + rax] ;bytes per line - lea rdx, [rdx + rdx] - movsxd rcx, DWORD PTR arg(4) ;output_height - -.loop: - movdqu xmm0, [rsi - 6] ;load src - movdqu xmm1, [rsi - 4] - movdqu xmm2, [rsi - 2] - movdqu xmm3, [rsi] - movdqu xmm4, [rsi + 2] - movdqu xmm5, [rsi + 4] - movdqu xmm6, [rsi + 6] - movdqu xmm7, [rsi + 8] - - HIGH_APPLY_FILTER_8 0, 0 - - movdqu xmm0, [rsi + 10] ;load src - movdqu xmm1, [rsi + 12] - movdqu xmm2, [rsi + 14] - movdqu xmm3, [rsi + 16] - movdqu xmm4, [rsi + 18] - movdqu xmm5, [rsi + 20] - movdqu xmm6, [rsi + 22] - movdqu xmm7, [rsi + 24] - - HIGH_APPLY_FILTER_8 0, 16 - - lea rsi, [rsi + rax] - lea rdi, [rdi + rdx] - dec rcx - jnz .loop - - add rsp, 16 * 8 - pop rsp - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -global sym(vp9_high_filter_block1d4_h8_avg_sse2) PRIVATE -sym(vp9_high_filter_block1d4_h8_avg_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 7 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 16 * 7 - %define k0k6 [rsp + 16 * 0] - %define k2k5 [rsp + 16 * 1] - %define k3k4 [rsp + 16 * 2] - %define k1k7 [rsp + 16 * 3] - %define krd [rsp + 16 * 4] - %define max [rsp + 16 * 5] - %define min [rsp + 16 * 6] - - HIGH_GET_FILTERS_4 - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;output_ptr - - movsxd rax, DWORD PTR arg(1) ;pixels_per_line - movsxd rdx, DWORD PTR arg(3) ;out_pitch - lea rax, [rax + rax] ;bytes per line - lea rdx, [rdx + rdx] - movsxd rcx, DWORD PTR arg(4) ;output_height - -.loop: - movdqu xmm0, [rsi - 6] ;load src - movdqu xmm4, [rsi + 2] - movdqa xmm1, xmm0 - movdqa xmm6, xmm4 - movdqa xmm7, xmm4 - movdqa xmm2, xmm0 - movdqa xmm3, xmm0 - movdqa xmm5, xmm4 - - psrldq xmm1, 2 - psrldq xmm6, 4 - psrldq xmm7, 6 - psrldq xmm2, 4 - psrldq xmm3, 6 - psrldq xmm5, 2 - - HIGH_APPLY_FILTER_4 1 - - lea rsi, [rsi + rax] - lea rdi, [rdi + rdx] - dec rcx - jnz .loop - - add rsp, 16 * 7 - pop rsp - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -global sym(vp9_high_filter_block1d8_h8_avg_sse2) PRIVATE -sym(vp9_high_filter_block1d8_h8_avg_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 7 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 16 * 8 - %define k0k1 [rsp + 16 * 0] - %define k6k7 [rsp + 16 * 1] - %define k2k5 [rsp + 16 * 2] - %define k3k4 [rsp + 16 * 3] - %define krd [rsp + 16 * 4] - %define temp [rsp + 16 * 5] - %define max [rsp + 16 * 6] - %define min [rsp + 16 * 7] - - HIGH_GET_FILTERS - - movsxd rax, DWORD PTR arg(1) ;pixels_per_line - movsxd rdx, DWORD PTR arg(3) ;out_pitch - lea rax, [rax + rax] ;bytes per line - lea rdx, [rdx + rdx] - movsxd rcx, DWORD PTR arg(4) ;output_height - -.loop: - movdqu xmm0, [rsi - 6] ;load src - movdqu xmm1, [rsi - 4] - movdqu xmm2, [rsi - 2] - movdqu xmm3, [rsi] - movdqu xmm4, [rsi + 2] - movdqu xmm5, [rsi + 4] - movdqu xmm6, [rsi + 6] - movdqu xmm7, [rsi + 8] - - HIGH_APPLY_FILTER_8 1, 0 - - lea rsi, [rsi + rax] - lea rdi, [rdi + rdx] - dec rcx - jnz .loop - - add rsp, 16 * 8 - pop rsp - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -global sym(vp9_high_filter_block1d16_h8_avg_sse2) PRIVATE -sym(vp9_high_filter_block1d16_h8_avg_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 7 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 16 * 8 - %define k0k1 [rsp + 16 * 0] - %define k6k7 [rsp + 16 * 1] - %define k2k5 [rsp + 16 * 2] - %define k3k4 [rsp + 16 * 3] - %define krd [rsp + 16 * 4] - %define temp [rsp + 16 * 5] - %define max [rsp + 16 * 6] - %define min [rsp + 16 * 7] - - HIGH_GET_FILTERS - - movsxd rax, DWORD PTR arg(1) ;pixels_per_line - movsxd rdx, DWORD PTR arg(3) ;out_pitch - lea rax, [rax + rax] ;bytes per line - lea rdx, [rdx + rdx] - movsxd rcx, DWORD PTR arg(4) ;output_height - -.loop: - movdqu xmm0, [rsi - 6] ;load src - movdqu xmm1, [rsi - 4] - movdqu xmm2, [rsi - 2] - movdqu xmm3, [rsi] - movdqu xmm4, [rsi + 2] - movdqu xmm5, [rsi + 4] - movdqu xmm6, [rsi + 6] - movdqu xmm7, [rsi + 8] - - HIGH_APPLY_FILTER_8 1, 0 - - movdqu xmm0, [rsi + 10] ;load src - movdqu xmm1, [rsi + 12] - movdqu xmm2, [rsi + 14] - movdqu xmm3, [rsi + 16] - movdqu xmm4, [rsi + 18] - movdqu xmm5, [rsi + 20] - movdqu xmm6, [rsi + 22] - movdqu xmm7, [rsi + 24] - - HIGH_APPLY_FILTER_8 1, 16 - - lea rsi, [rsi + rax] - lea rdi, [rdi + rdx] - dec rcx - jnz .loop - - add rsp, 16 * 8 - pop rsp - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret diff --git a/media/libvpx/vp9/common/x86/vp9_high_subpixel_bilinear_sse2.asm b/media/libvpx/vp9/common/x86/vp9_high_subpixel_bilinear_sse2.asm deleted file mode 100644 index b7d4a61ffec..00000000000 --- a/media/libvpx/vp9/common/x86/vp9_high_subpixel_bilinear_sse2.asm +++ /dev/null @@ -1,494 +0,0 @@ -; -; Copyright (c) 2014 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - -%include "vpx_ports/x86_abi_support.asm" - -%macro HIGH_GET_PARAM_4 0 - mov rdx, arg(5) ;filter ptr - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;output_ptr - mov rcx, 0x00000040 - - movdqa xmm3, [rdx] ;load filters - pshuflw xmm4, xmm3, 11111111b ;k3 - psrldq xmm3, 8 - pshuflw xmm3, xmm3, 0b ;k4 - punpcklwd xmm4, xmm3 ;k3k4 - - movq xmm3, rcx ;rounding - pshufd xmm3, xmm3, 0 - - mov rdx, 0x00010001 - movsxd rcx, DWORD PTR arg(6) ;bps - movq xmm5, rdx - movq xmm2, rcx - pshufd xmm5, xmm5, 0b - movdqa xmm1, xmm5 - psllw xmm5, xmm2 - psubw xmm5, xmm1 ;max value (for clamping) - pxor xmm2, xmm2 ;min value (for clamping) - - movsxd rax, DWORD PTR arg(1) ;pixels_per_line - movsxd rdx, DWORD PTR arg(3) ;out_pitch - movsxd rcx, DWORD PTR arg(4) ;output_height -%endm - -%macro HIGH_APPLY_FILTER_4 1 - - punpcklwd xmm0, xmm1 ;two row in one register - pmaddwd xmm0, xmm4 ;multiply the filter factors - - paddd xmm0, xmm3 ;rounding - psrad xmm0, 7 ;shift - packssdw xmm0, xmm0 ;pack to word - - ;clamp the values - pminsw xmm0, xmm5 - pmaxsw xmm0, xmm2 - -%if %1 - movq xmm1, [rdi] - pavgw xmm0, xmm1 -%endif - - movq [rdi], xmm0 - lea rsi, [rsi + 2*rax] - lea rdi, [rdi + 2*rdx] - dec rcx -%endm - -%if ARCH_X86_64 -%macro HIGH_GET_PARAM 0 - mov rdx, arg(5) ;filter ptr - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;output_ptr - mov rcx, 0x00000040 - - movdqa xmm6, [rdx] ;load filters - - pshuflw xmm7, xmm6, 11111111b ;k3 - pshufhw xmm6, xmm6, 0b ;k4 - psrldq xmm6, 8 - punpcklwd xmm7, xmm6 ;k3k4k3k4k3k4k3k4 - - movq xmm4, rcx ;rounding - pshufd xmm4, xmm4, 0 - - mov rdx, 0x00010001 - movsxd rcx, DWORD PTR arg(6) ;bps - movq xmm8, rdx - movq xmm5, rcx - pshufd xmm8, xmm8, 0b - movdqa xmm1, xmm8 - psllw xmm8, xmm5 - psubw xmm8, xmm1 ;max value (for clamping) - pxor xmm5, xmm5 ;min value (for clamping) - - movsxd rax, DWORD PTR arg(1) ;pixels_per_line - movsxd rdx, DWORD PTR arg(3) ;out_pitch - movsxd rcx, DWORD PTR arg(4) ;output_height -%endm - -%macro HIGH_APPLY_FILTER_8 1 - movdqa xmm6, xmm0 - punpckhwd xmm6, xmm1 - punpcklwd xmm0, xmm1 - pmaddwd xmm6, xmm7 - pmaddwd xmm0, xmm7 - - paddd xmm6, xmm4 ;rounding - paddd xmm0, xmm4 ;rounding - psrad xmm6, 7 ;shift - psrad xmm0, 7 ;shift - packssdw xmm0, xmm6 ;pack back to word - - ;clamp the values - pminsw xmm0, xmm8 - pmaxsw xmm0, xmm5 - -%if %1 - movdqu xmm1, [rdi] - pavgw xmm0, xmm1 -%endif - movdqu [rdi], xmm0 ;store the result - - lea rsi, [rsi + 2*rax] - lea rdi, [rdi + 2*rdx] - dec rcx -%endm - -%macro HIGH_APPLY_FILTER_16 1 - movdqa xmm9, xmm0 - movdqa xmm6, xmm2 - punpckhwd xmm9, xmm1 - punpckhwd xmm6, xmm3 - punpcklwd xmm0, xmm1 - punpcklwd xmm2, xmm3 - - pmaddwd xmm9, xmm7 - pmaddwd xmm6, xmm7 - pmaddwd xmm0, xmm7 - pmaddwd xmm2, xmm7 - - paddd xmm9, xmm4 ;rounding - paddd xmm6, xmm4 - paddd xmm0, xmm4 - paddd xmm2, xmm4 - - psrad xmm9, 7 ;shift - psrad xmm6, 7 - psrad xmm0, 7 - psrad xmm2, 7 - - packssdw xmm0, xmm9 ;pack back to word - packssdw xmm2, xmm6 ;pack back to word - - ;clamp the values - pminsw xmm0, xmm8 - pmaxsw xmm0, xmm5 - pminsw xmm2, xmm8 - pmaxsw xmm2, xmm5 - -%if %1 - movdqu xmm1, [rdi] - movdqu xmm3, [rdi + 16] - pavgw xmm0, xmm1 - pavgw xmm2, xmm3 -%endif - movdqu [rdi], xmm0 ;store the result - movdqu [rdi + 16], xmm2 ;store the result - - lea rsi, [rsi + 2*rax] - lea rdi, [rdi + 2*rdx] - dec rcx -%endm -%endif - -global sym(vp9_high_filter_block1d4_v2_sse2) PRIVATE -sym(vp9_high_filter_block1d4_v2_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 7 - push rsi - push rdi - ; end prolog - - HIGH_GET_PARAM_4 -.loop: - movq xmm0, [rsi] ;load src - movq xmm1, [rsi + 2*rax] - - HIGH_APPLY_FILTER_4 0 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - -%if ARCH_X86_64 -global sym(vp9_high_filter_block1d8_v2_sse2) PRIVATE -sym(vp9_high_filter_block1d8_v2_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 7 - SAVE_XMM 8 - push rsi - push rdi - ; end prolog - - HIGH_GET_PARAM -.loop: - movdqu xmm0, [rsi] ;0 - movdqu xmm1, [rsi + 2*rax] ;1 - - HIGH_APPLY_FILTER_8 0 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -global sym(vp9_high_filter_block1d16_v2_sse2) PRIVATE -sym(vp9_high_filter_block1d16_v2_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 7 - SAVE_XMM 9 - push rsi - push rdi - ; end prolog - - HIGH_GET_PARAM -.loop: - movdqu xmm0, [rsi] ;0 - movdqu xmm2, [rsi + 16] - movdqu xmm1, [rsi + 2*rax] ;1 - movdqu xmm3, [rsi + 2*rax + 16] - - HIGH_APPLY_FILTER_16 0 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret -%endif - -global sym(vp9_high_filter_block1d4_v2_avg_sse2) PRIVATE -sym(vp9_high_filter_block1d4_v2_avg_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 7 - push rsi - push rdi - ; end prolog - - HIGH_GET_PARAM_4 -.loop: - movq xmm0, [rsi] ;load src - movq xmm1, [rsi + 2*rax] - - HIGH_APPLY_FILTER_4 1 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - -%if ARCH_X86_64 -global sym(vp9_high_filter_block1d8_v2_avg_sse2) PRIVATE -sym(vp9_high_filter_block1d8_v2_avg_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 7 - SAVE_XMM 8 - push rsi - push rdi - ; end prolog - - HIGH_GET_PARAM -.loop: - movdqu xmm0, [rsi] ;0 - movdqu xmm1, [rsi + 2*rax] ;1 - - HIGH_APPLY_FILTER_8 1 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -global sym(vp9_high_filter_block1d16_v2_avg_sse2) PRIVATE -sym(vp9_high_filter_block1d16_v2_avg_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 7 - SAVE_XMM 9 - push rsi - push rdi - ; end prolog - - HIGH_GET_PARAM -.loop: - movdqu xmm0, [rsi] ;0 - movdqu xmm1, [rsi + 2*rax] ;1 - movdqu xmm2, [rsi + 16] - movdqu xmm3, [rsi + 2*rax + 16] - - HIGH_APPLY_FILTER_16 1 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret -%endif - -global sym(vp9_high_filter_block1d4_h2_sse2) PRIVATE -sym(vp9_high_filter_block1d4_h2_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 7 - push rsi - push rdi - ; end prolog - - HIGH_GET_PARAM_4 -.loop: - movdqu xmm0, [rsi] ;load src - movdqa xmm1, xmm0 - psrldq xmm1, 2 - - HIGH_APPLY_FILTER_4 0 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - -%if ARCH_X86_64 -global sym(vp9_high_filter_block1d8_h2_sse2) PRIVATE -sym(vp9_high_filter_block1d8_h2_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 7 - SAVE_XMM 8 - push rsi - push rdi - ; end prolog - - HIGH_GET_PARAM -.loop: - movdqu xmm0, [rsi] ;load src - movdqu xmm1, [rsi + 2] - - HIGH_APPLY_FILTER_8 0 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -global sym(vp9_high_filter_block1d16_h2_sse2) PRIVATE -sym(vp9_high_filter_block1d16_h2_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 7 - SAVE_XMM 9 - push rsi - push rdi - ; end prolog - - HIGH_GET_PARAM -.loop: - movdqu xmm0, [rsi] ;load src - movdqu xmm1, [rsi + 2] - movdqu xmm2, [rsi + 16] - movdqu xmm3, [rsi + 18] - - HIGH_APPLY_FILTER_16 0 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret -%endif - -global sym(vp9_high_filter_block1d4_h2_avg_sse2) PRIVATE -sym(vp9_high_filter_block1d4_h2_avg_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 7 - push rsi - push rdi - ; end prolog - - HIGH_GET_PARAM_4 -.loop: - movdqu xmm0, [rsi] ;load src - movdqa xmm1, xmm0 - psrldq xmm1, 2 - - HIGH_APPLY_FILTER_4 1 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - -%if ARCH_X86_64 -global sym(vp9_high_filter_block1d8_h2_avg_sse2) PRIVATE -sym(vp9_high_filter_block1d8_h2_avg_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 7 - SAVE_XMM 8 - push rsi - push rdi - ; end prolog - - HIGH_GET_PARAM -.loop: - movdqu xmm0, [rsi] ;load src - movdqu xmm1, [rsi + 2] - - HIGH_APPLY_FILTER_8 1 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -global sym(vp9_high_filter_block1d16_h2_avg_sse2) PRIVATE -sym(vp9_high_filter_block1d16_h2_avg_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 7 - SAVE_XMM 9 - push rsi - push rdi - ; end prolog - - HIGH_GET_PARAM -.loop: - movdqu xmm0, [rsi] ;load src - movdqu xmm1, [rsi + 2] - movdqu xmm2, [rsi + 16] - movdqu xmm3, [rsi + 18] - - HIGH_APPLY_FILTER_16 1 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret -%endif diff --git a/media/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.h b/media/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.h deleted file mode 100644 index 0f179b49a57..00000000000 --- a/media/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.h +++ /dev/null @@ -1,175 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include -#include // SSE2 -#include "./vpx_config.h" -#include "vpx/vpx_integer.h" -#include "vp9/common/vp9_common.h" -#include "vp9/common/vp9_idct.h" - -// perform 8x8 transpose -static INLINE void array_transpose_8x8(__m128i *in, __m128i *res) { - const __m128i tr0_0 = _mm_unpacklo_epi16(in[0], in[1]); - const __m128i tr0_1 = _mm_unpacklo_epi16(in[2], in[3]); - const __m128i tr0_2 = _mm_unpackhi_epi16(in[0], in[1]); - const __m128i tr0_3 = _mm_unpackhi_epi16(in[2], in[3]); - const __m128i tr0_4 = _mm_unpacklo_epi16(in[4], in[5]); - const __m128i tr0_5 = _mm_unpacklo_epi16(in[6], in[7]); - const __m128i tr0_6 = _mm_unpackhi_epi16(in[4], in[5]); - const __m128i tr0_7 = _mm_unpackhi_epi16(in[6], in[7]); - - const __m128i tr1_0 = _mm_unpacklo_epi32(tr0_0, tr0_1); - const __m128i tr1_1 = _mm_unpacklo_epi32(tr0_4, tr0_5); - const __m128i tr1_2 = _mm_unpackhi_epi32(tr0_0, tr0_1); - const __m128i tr1_3 = _mm_unpackhi_epi32(tr0_4, tr0_5); - const __m128i tr1_4 = _mm_unpacklo_epi32(tr0_2, tr0_3); - const __m128i tr1_5 = _mm_unpacklo_epi32(tr0_6, tr0_7); - const __m128i tr1_6 = _mm_unpackhi_epi32(tr0_2, tr0_3); - const __m128i tr1_7 = _mm_unpackhi_epi32(tr0_6, tr0_7); - - res[0] = _mm_unpacklo_epi64(tr1_0, tr1_1); - res[1] = _mm_unpackhi_epi64(tr1_0, tr1_1); - res[2] = _mm_unpacklo_epi64(tr1_2, tr1_3); - res[3] = _mm_unpackhi_epi64(tr1_2, tr1_3); - res[4] = _mm_unpacklo_epi64(tr1_4, tr1_5); - res[5] = _mm_unpackhi_epi64(tr1_4, tr1_5); - res[6] = _mm_unpacklo_epi64(tr1_6, tr1_7); - res[7] = _mm_unpackhi_epi64(tr1_6, tr1_7); -} - -#define TRANSPOSE_8X4(in0, in1, in2, in3, out0, out1) \ - { \ - const __m128i tr0_0 = _mm_unpacklo_epi16(in0, in1); \ - const __m128i tr0_1 = _mm_unpacklo_epi16(in2, in3); \ - \ - in0 = _mm_unpacklo_epi32(tr0_0, tr0_1); /* i1 i0 */ \ - in1 = _mm_unpackhi_epi32(tr0_0, tr0_1); /* i3 i2 */ \ - } - -static INLINE void array_transpose_4X8(__m128i *in, __m128i * out) { - const __m128i tr0_0 = _mm_unpacklo_epi16(in[0], in[1]); - const __m128i tr0_1 = _mm_unpacklo_epi16(in[2], in[3]); - const __m128i tr0_4 = _mm_unpacklo_epi16(in[4], in[5]); - const __m128i tr0_5 = _mm_unpacklo_epi16(in[6], in[7]); - - const __m128i tr1_0 = _mm_unpacklo_epi32(tr0_0, tr0_1); - const __m128i tr1_2 = _mm_unpackhi_epi32(tr0_0, tr0_1); - const __m128i tr1_4 = _mm_unpacklo_epi32(tr0_4, tr0_5); - const __m128i tr1_6 = _mm_unpackhi_epi32(tr0_4, tr0_5); - - out[0] = _mm_unpacklo_epi64(tr1_0, tr1_4); - out[1] = _mm_unpackhi_epi64(tr1_0, tr1_4); - out[2] = _mm_unpacklo_epi64(tr1_2, tr1_6); - out[3] = _mm_unpackhi_epi64(tr1_2, tr1_6); -} - -static INLINE void array_transpose_16x16(__m128i *res0, __m128i *res1) { - __m128i tbuf[8]; - array_transpose_8x8(res0, res0); - array_transpose_8x8(res1, tbuf); - array_transpose_8x8(res0 + 8, res1); - array_transpose_8x8(res1 + 8, res1 + 8); - - res0[8] = tbuf[0]; - res0[9] = tbuf[1]; - res0[10] = tbuf[2]; - res0[11] = tbuf[3]; - res0[12] = tbuf[4]; - res0[13] = tbuf[5]; - res0[14] = tbuf[6]; - res0[15] = tbuf[7]; -} - -static INLINE void load_buffer_8x16(const int16_t *input, __m128i *in) { - in[0] = _mm_load_si128((const __m128i *)(input + 0 * 16)); - in[1] = _mm_load_si128((const __m128i *)(input + 1 * 16)); - in[2] = _mm_load_si128((const __m128i *)(input + 2 * 16)); - in[3] = _mm_load_si128((const __m128i *)(input + 3 * 16)); - in[4] = _mm_load_si128((const __m128i *)(input + 4 * 16)); - in[5] = _mm_load_si128((const __m128i *)(input + 5 * 16)); - in[6] = _mm_load_si128((const __m128i *)(input + 6 * 16)); - in[7] = _mm_load_si128((const __m128i *)(input + 7 * 16)); - - in[8] = _mm_load_si128((const __m128i *)(input + 8 * 16)); - in[9] = _mm_load_si128((const __m128i *)(input + 9 * 16)); - in[10] = _mm_load_si128((const __m128i *)(input + 10 * 16)); - in[11] = _mm_load_si128((const __m128i *)(input + 11 * 16)); - in[12] = _mm_load_si128((const __m128i *)(input + 12 * 16)); - in[13] = _mm_load_si128((const __m128i *)(input + 13 * 16)); - in[14] = _mm_load_si128((const __m128i *)(input + 14 * 16)); - in[15] = _mm_load_si128((const __m128i *)(input + 15 * 16)); -} - -#define RECON_AND_STORE(dest, in_x) \ - { \ - __m128i d0 = _mm_loadl_epi64((__m128i *)(dest)); \ - d0 = _mm_unpacklo_epi8(d0, zero); \ - d0 = _mm_add_epi16(in_x, d0); \ - d0 = _mm_packus_epi16(d0, d0); \ - _mm_storel_epi64((__m128i *)(dest), d0); \ - dest += stride; \ - } - -static INLINE void write_buffer_8x16(uint8_t *dest, __m128i *in, int stride) { - const __m128i final_rounding = _mm_set1_epi16(1<<5); - const __m128i zero = _mm_setzero_si128(); - // Final rounding and shift - in[0] = _mm_adds_epi16(in[0], final_rounding); - in[1] = _mm_adds_epi16(in[1], final_rounding); - in[2] = _mm_adds_epi16(in[2], final_rounding); - in[3] = _mm_adds_epi16(in[3], final_rounding); - in[4] = _mm_adds_epi16(in[4], final_rounding); - in[5] = _mm_adds_epi16(in[5], final_rounding); - in[6] = _mm_adds_epi16(in[6], final_rounding); - in[7] = _mm_adds_epi16(in[7], final_rounding); - in[8] = _mm_adds_epi16(in[8], final_rounding); - in[9] = _mm_adds_epi16(in[9], final_rounding); - in[10] = _mm_adds_epi16(in[10], final_rounding); - in[11] = _mm_adds_epi16(in[11], final_rounding); - in[12] = _mm_adds_epi16(in[12], final_rounding); - in[13] = _mm_adds_epi16(in[13], final_rounding); - in[14] = _mm_adds_epi16(in[14], final_rounding); - in[15] = _mm_adds_epi16(in[15], final_rounding); - - in[0] = _mm_srai_epi16(in[0], 6); - in[1] = _mm_srai_epi16(in[1], 6); - in[2] = _mm_srai_epi16(in[2], 6); - in[3] = _mm_srai_epi16(in[3], 6); - in[4] = _mm_srai_epi16(in[4], 6); - in[5] = _mm_srai_epi16(in[5], 6); - in[6] = _mm_srai_epi16(in[6], 6); - in[7] = _mm_srai_epi16(in[7], 6); - in[8] = _mm_srai_epi16(in[8], 6); - in[9] = _mm_srai_epi16(in[9], 6); - in[10] = _mm_srai_epi16(in[10], 6); - in[11] = _mm_srai_epi16(in[11], 6); - in[12] = _mm_srai_epi16(in[12], 6); - in[13] = _mm_srai_epi16(in[13], 6); - in[14] = _mm_srai_epi16(in[14], 6); - in[15] = _mm_srai_epi16(in[15], 6); - - RECON_AND_STORE(dest, in[0]); - RECON_AND_STORE(dest, in[1]); - RECON_AND_STORE(dest, in[2]); - RECON_AND_STORE(dest, in[3]); - RECON_AND_STORE(dest, in[4]); - RECON_AND_STORE(dest, in[5]); - RECON_AND_STORE(dest, in[6]); - RECON_AND_STORE(dest, in[7]); - RECON_AND_STORE(dest, in[8]); - RECON_AND_STORE(dest, in[9]); - RECON_AND_STORE(dest, in[10]); - RECON_AND_STORE(dest, in[11]); - RECON_AND_STORE(dest, in[12]); - RECON_AND_STORE(dest, in[13]); - RECON_AND_STORE(dest, in[14]); - RECON_AND_STORE(dest, in[15]); -} diff --git a/media/libvpx/vp9/common/x86/vp9_idct_intrin_ssse3.c b/media/libvpx/vp9/common/x86/vp9_idct_intrin_ssse3.c deleted file mode 100644 index 73bf5d1d78e..00000000000 --- a/media/libvpx/vp9/common/x86/vp9_idct_intrin_ssse3.c +++ /dev/null @@ -1,762 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#if defined(_MSC_VER) && _MSC_VER <= 1500 -// Need to include math.h before calling tmmintrin.h/intrin.h -// in certain versions of MSVS. -#include -#endif -#include // SSSE3 -#include "vp9/common/x86/vp9_idct_intrin_sse2.h" - -static void idct16_8col(__m128i *in, int round) { - const __m128i k__cospi_p30_m02 = pair_set_epi16(cospi_30_64, -cospi_2_64); - const __m128i k__cospi_p02_p30 = pair_set_epi16(cospi_2_64, cospi_30_64); - const __m128i k__cospi_p14_m18 = pair_set_epi16(cospi_14_64, -cospi_18_64); - const __m128i k__cospi_p18_p14 = pair_set_epi16(cospi_18_64, cospi_14_64); - const __m128i k__cospi_p22_m10 = pair_set_epi16(cospi_22_64, -cospi_10_64); - const __m128i k__cospi_p10_p22 = pair_set_epi16(cospi_10_64, cospi_22_64); - const __m128i k__cospi_p06_m26 = pair_set_epi16(cospi_6_64, -cospi_26_64); - const __m128i k__cospi_p26_p06 = pair_set_epi16(cospi_26_64, cospi_6_64); - const __m128i k__cospi_p28_m04 = pair_set_epi16(cospi_28_64, -cospi_4_64); - const __m128i k__cospi_p04_p28 = pair_set_epi16(cospi_4_64, cospi_28_64); - const __m128i k__cospi_p12_m20 = pair_set_epi16(cospi_12_64, -cospi_20_64); - const __m128i k__cospi_p20_p12 = pair_set_epi16(cospi_20_64, cospi_12_64); - const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64); - const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64); - const __m128i k__cospi_m08_p24 = pair_set_epi16(-cospi_8_64, cospi_24_64); - const __m128i k__cospi_p24_p08 = pair_set_epi16(cospi_24_64, cospi_8_64); - const __m128i k__cospi_m24_m08 = pair_set_epi16(-cospi_24_64, -cospi_8_64); - const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING); - const __m128i k__cospi_p16_p16_x2 = pair_set_epi16(23170, 23170); - const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64); - const __m128i k__cospi_m16_p16 = pair_set_epi16(-cospi_16_64, cospi_16_64); - - __m128i v[16], u[16], s[16], t[16]; - - // stage 1 - s[0] = in[0]; - s[1] = in[8]; - s[2] = in[4]; - s[3] = in[12]; - s[4] = in[2]; - s[5] = in[10]; - s[6] = in[6]; - s[7] = in[14]; - s[8] = in[1]; - s[9] = in[9]; - s[10] = in[5]; - s[11] = in[13]; - s[12] = in[3]; - s[13] = in[11]; - s[14] = in[7]; - s[15] = in[15]; - - // stage 2 - u[0] = _mm_unpacklo_epi16(s[8], s[15]); - u[1] = _mm_unpackhi_epi16(s[8], s[15]); - u[2] = _mm_unpacklo_epi16(s[9], s[14]); - u[3] = _mm_unpackhi_epi16(s[9], s[14]); - u[4] = _mm_unpacklo_epi16(s[10], s[13]); - u[5] = _mm_unpackhi_epi16(s[10], s[13]); - u[6] = _mm_unpacklo_epi16(s[11], s[12]); - u[7] = _mm_unpackhi_epi16(s[11], s[12]); - - v[0] = _mm_madd_epi16(u[0], k__cospi_p30_m02); - v[1] = _mm_madd_epi16(u[1], k__cospi_p30_m02); - v[2] = _mm_madd_epi16(u[0], k__cospi_p02_p30); - v[3] = _mm_madd_epi16(u[1], k__cospi_p02_p30); - v[4] = _mm_madd_epi16(u[2], k__cospi_p14_m18); - v[5] = _mm_madd_epi16(u[3], k__cospi_p14_m18); - v[6] = _mm_madd_epi16(u[2], k__cospi_p18_p14); - v[7] = _mm_madd_epi16(u[3], k__cospi_p18_p14); - v[8] = _mm_madd_epi16(u[4], k__cospi_p22_m10); - v[9] = _mm_madd_epi16(u[5], k__cospi_p22_m10); - v[10] = _mm_madd_epi16(u[4], k__cospi_p10_p22); - v[11] = _mm_madd_epi16(u[5], k__cospi_p10_p22); - v[12] = _mm_madd_epi16(u[6], k__cospi_p06_m26); - v[13] = _mm_madd_epi16(u[7], k__cospi_p06_m26); - v[14] = _mm_madd_epi16(u[6], k__cospi_p26_p06); - v[15] = _mm_madd_epi16(u[7], k__cospi_p26_p06); - - u[0] = _mm_add_epi32(v[0], k__DCT_CONST_ROUNDING); - u[1] = _mm_add_epi32(v[1], k__DCT_CONST_ROUNDING); - u[2] = _mm_add_epi32(v[2], k__DCT_CONST_ROUNDING); - u[3] = _mm_add_epi32(v[3], k__DCT_CONST_ROUNDING); - u[4] = _mm_add_epi32(v[4], k__DCT_CONST_ROUNDING); - u[5] = _mm_add_epi32(v[5], k__DCT_CONST_ROUNDING); - u[6] = _mm_add_epi32(v[6], k__DCT_CONST_ROUNDING); - u[7] = _mm_add_epi32(v[7], k__DCT_CONST_ROUNDING); - u[8] = _mm_add_epi32(v[8], k__DCT_CONST_ROUNDING); - u[9] = _mm_add_epi32(v[9], k__DCT_CONST_ROUNDING); - u[10] = _mm_add_epi32(v[10], k__DCT_CONST_ROUNDING); - u[11] = _mm_add_epi32(v[11], k__DCT_CONST_ROUNDING); - u[12] = _mm_add_epi32(v[12], k__DCT_CONST_ROUNDING); - u[13] = _mm_add_epi32(v[13], k__DCT_CONST_ROUNDING); - u[14] = _mm_add_epi32(v[14], k__DCT_CONST_ROUNDING); - u[15] = _mm_add_epi32(v[15], k__DCT_CONST_ROUNDING); - - u[0] = _mm_srai_epi32(u[0], DCT_CONST_BITS); - u[1] = _mm_srai_epi32(u[1], DCT_CONST_BITS); - u[2] = _mm_srai_epi32(u[2], DCT_CONST_BITS); - u[3] = _mm_srai_epi32(u[3], DCT_CONST_BITS); - u[4] = _mm_srai_epi32(u[4], DCT_CONST_BITS); - u[5] = _mm_srai_epi32(u[5], DCT_CONST_BITS); - u[6] = _mm_srai_epi32(u[6], DCT_CONST_BITS); - u[7] = _mm_srai_epi32(u[7], DCT_CONST_BITS); - u[8] = _mm_srai_epi32(u[8], DCT_CONST_BITS); - u[9] = _mm_srai_epi32(u[9], DCT_CONST_BITS); - u[10] = _mm_srai_epi32(u[10], DCT_CONST_BITS); - u[11] = _mm_srai_epi32(u[11], DCT_CONST_BITS); - u[12] = _mm_srai_epi32(u[12], DCT_CONST_BITS); - u[13] = _mm_srai_epi32(u[13], DCT_CONST_BITS); - u[14] = _mm_srai_epi32(u[14], DCT_CONST_BITS); - u[15] = _mm_srai_epi32(u[15], DCT_CONST_BITS); - - s[8] = _mm_packs_epi32(u[0], u[1]); - s[15] = _mm_packs_epi32(u[2], u[3]); - s[9] = _mm_packs_epi32(u[4], u[5]); - s[14] = _mm_packs_epi32(u[6], u[7]); - s[10] = _mm_packs_epi32(u[8], u[9]); - s[13] = _mm_packs_epi32(u[10], u[11]); - s[11] = _mm_packs_epi32(u[12], u[13]); - s[12] = _mm_packs_epi32(u[14], u[15]); - - // stage 3 - t[0] = s[0]; - t[1] = s[1]; - t[2] = s[2]; - t[3] = s[3]; - u[0] = _mm_unpacklo_epi16(s[4], s[7]); - u[1] = _mm_unpackhi_epi16(s[4], s[7]); - u[2] = _mm_unpacklo_epi16(s[5], s[6]); - u[3] = _mm_unpackhi_epi16(s[5], s[6]); - - v[0] = _mm_madd_epi16(u[0], k__cospi_p28_m04); - v[1] = _mm_madd_epi16(u[1], k__cospi_p28_m04); - v[2] = _mm_madd_epi16(u[0], k__cospi_p04_p28); - v[3] = _mm_madd_epi16(u[1], k__cospi_p04_p28); - v[4] = _mm_madd_epi16(u[2], k__cospi_p12_m20); - v[5] = _mm_madd_epi16(u[3], k__cospi_p12_m20); - v[6] = _mm_madd_epi16(u[2], k__cospi_p20_p12); - v[7] = _mm_madd_epi16(u[3], k__cospi_p20_p12); - - u[0] = _mm_add_epi32(v[0], k__DCT_CONST_ROUNDING); - u[1] = _mm_add_epi32(v[1], k__DCT_CONST_ROUNDING); - u[2] = _mm_add_epi32(v[2], k__DCT_CONST_ROUNDING); - u[3] = _mm_add_epi32(v[3], k__DCT_CONST_ROUNDING); - u[4] = _mm_add_epi32(v[4], k__DCT_CONST_ROUNDING); - u[5] = _mm_add_epi32(v[5], k__DCT_CONST_ROUNDING); - u[6] = _mm_add_epi32(v[6], k__DCT_CONST_ROUNDING); - u[7] = _mm_add_epi32(v[7], k__DCT_CONST_ROUNDING); - - u[0] = _mm_srai_epi32(u[0], DCT_CONST_BITS); - u[1] = _mm_srai_epi32(u[1], DCT_CONST_BITS); - u[2] = _mm_srai_epi32(u[2], DCT_CONST_BITS); - u[3] = _mm_srai_epi32(u[3], DCT_CONST_BITS); - u[4] = _mm_srai_epi32(u[4], DCT_CONST_BITS); - u[5] = _mm_srai_epi32(u[5], DCT_CONST_BITS); - u[6] = _mm_srai_epi32(u[6], DCT_CONST_BITS); - u[7] = _mm_srai_epi32(u[7], DCT_CONST_BITS); - - t[4] = _mm_packs_epi32(u[0], u[1]); - t[7] = _mm_packs_epi32(u[2], u[3]); - t[5] = _mm_packs_epi32(u[4], u[5]); - t[6] = _mm_packs_epi32(u[6], u[7]); - t[8] = _mm_add_epi16(s[8], s[9]); - t[9] = _mm_sub_epi16(s[8], s[9]); - t[10] = _mm_sub_epi16(s[11], s[10]); - t[11] = _mm_add_epi16(s[10], s[11]); - t[12] = _mm_add_epi16(s[12], s[13]); - t[13] = _mm_sub_epi16(s[12], s[13]); - t[14] = _mm_sub_epi16(s[15], s[14]); - t[15] = _mm_add_epi16(s[14], s[15]); - - // stage 4 - u[0] = _mm_add_epi16(t[0], t[1]); - u[1] = _mm_sub_epi16(t[0], t[1]); - u[2] = _mm_unpacklo_epi16(t[2], t[3]); - u[3] = _mm_unpackhi_epi16(t[2], t[3]); - u[4] = _mm_unpacklo_epi16(t[9], t[14]); - u[5] = _mm_unpackhi_epi16(t[9], t[14]); - u[6] = _mm_unpacklo_epi16(t[10], t[13]); - u[7] = _mm_unpackhi_epi16(t[10], t[13]); - - s[0] = _mm_mulhrs_epi16(u[0], k__cospi_p16_p16_x2); - s[1] = _mm_mulhrs_epi16(u[1], k__cospi_p16_p16_x2); - v[4] = _mm_madd_epi16(u[2], k__cospi_p24_m08); - v[5] = _mm_madd_epi16(u[3], k__cospi_p24_m08); - v[6] = _mm_madd_epi16(u[2], k__cospi_p08_p24); - v[7] = _mm_madd_epi16(u[3], k__cospi_p08_p24); - v[8] = _mm_madd_epi16(u[4], k__cospi_m08_p24); - v[9] = _mm_madd_epi16(u[5], k__cospi_m08_p24); - v[10] = _mm_madd_epi16(u[4], k__cospi_p24_p08); - v[11] = _mm_madd_epi16(u[5], k__cospi_p24_p08); - v[12] = _mm_madd_epi16(u[6], k__cospi_m24_m08); - v[13] = _mm_madd_epi16(u[7], k__cospi_m24_m08); - v[14] = _mm_madd_epi16(u[6], k__cospi_m08_p24); - v[15] = _mm_madd_epi16(u[7], k__cospi_m08_p24); - - u[4] = _mm_add_epi32(v[4], k__DCT_CONST_ROUNDING); - u[5] = _mm_add_epi32(v[5], k__DCT_CONST_ROUNDING); - u[6] = _mm_add_epi32(v[6], k__DCT_CONST_ROUNDING); - u[7] = _mm_add_epi32(v[7], k__DCT_CONST_ROUNDING); - u[8] = _mm_add_epi32(v[8], k__DCT_CONST_ROUNDING); - u[9] = _mm_add_epi32(v[9], k__DCT_CONST_ROUNDING); - u[10] = _mm_add_epi32(v[10], k__DCT_CONST_ROUNDING); - u[11] = _mm_add_epi32(v[11], k__DCT_CONST_ROUNDING); - u[12] = _mm_add_epi32(v[12], k__DCT_CONST_ROUNDING); - u[13] = _mm_add_epi32(v[13], k__DCT_CONST_ROUNDING); - u[14] = _mm_add_epi32(v[14], k__DCT_CONST_ROUNDING); - u[15] = _mm_add_epi32(v[15], k__DCT_CONST_ROUNDING); - - u[4] = _mm_srai_epi32(u[4], DCT_CONST_BITS); - u[5] = _mm_srai_epi32(u[5], DCT_CONST_BITS); - u[6] = _mm_srai_epi32(u[6], DCT_CONST_BITS); - u[7] = _mm_srai_epi32(u[7], DCT_CONST_BITS); - u[8] = _mm_srai_epi32(u[8], DCT_CONST_BITS); - u[9] = _mm_srai_epi32(u[9], DCT_CONST_BITS); - u[10] = _mm_srai_epi32(u[10], DCT_CONST_BITS); - u[11] = _mm_srai_epi32(u[11], DCT_CONST_BITS); - u[12] = _mm_srai_epi32(u[12], DCT_CONST_BITS); - u[13] = _mm_srai_epi32(u[13], DCT_CONST_BITS); - u[14] = _mm_srai_epi32(u[14], DCT_CONST_BITS); - u[15] = _mm_srai_epi32(u[15], DCT_CONST_BITS); - - s[2] = _mm_packs_epi32(u[4], u[5]); - s[3] = _mm_packs_epi32(u[6], u[7]); - s[4] = _mm_add_epi16(t[4], t[5]); - s[5] = _mm_sub_epi16(t[4], t[5]); - s[6] = _mm_sub_epi16(t[7], t[6]); - s[7] = _mm_add_epi16(t[6], t[7]); - s[8] = t[8]; - s[15] = t[15]; - s[9] = _mm_packs_epi32(u[8], u[9]); - s[14] = _mm_packs_epi32(u[10], u[11]); - s[10] = _mm_packs_epi32(u[12], u[13]); - s[13] = _mm_packs_epi32(u[14], u[15]); - s[11] = t[11]; - s[12] = t[12]; - - // stage 5 - t[0] = _mm_add_epi16(s[0], s[3]); - t[1] = _mm_add_epi16(s[1], s[2]); - t[2] = _mm_sub_epi16(s[1], s[2]); - t[3] = _mm_sub_epi16(s[0], s[3]); - t[4] = s[4]; - t[7] = s[7]; - - u[0] = _mm_sub_epi16(s[6], s[5]); - u[1] = _mm_add_epi16(s[6], s[5]); - t[5] = _mm_mulhrs_epi16(u[0], k__cospi_p16_p16_x2); - t[6] = _mm_mulhrs_epi16(u[1], k__cospi_p16_p16_x2); - - t[8] = _mm_add_epi16(s[8], s[11]); - t[9] = _mm_add_epi16(s[9], s[10]); - t[10] = _mm_sub_epi16(s[9], s[10]); - t[11] = _mm_sub_epi16(s[8], s[11]); - t[12] = _mm_sub_epi16(s[15], s[12]); - t[13] = _mm_sub_epi16(s[14], s[13]); - t[14] = _mm_add_epi16(s[13], s[14]); - t[15] = _mm_add_epi16(s[12], s[15]); - - // stage 6 - if (round == 1) { - s[0] = _mm_add_epi16(t[0], t[7]); - s[1] = _mm_add_epi16(t[1], t[6]); - s[2] = _mm_add_epi16(t[2], t[5]); - s[3] = _mm_add_epi16(t[3], t[4]); - s[4] = _mm_sub_epi16(t[3], t[4]); - s[5] = _mm_sub_epi16(t[2], t[5]); - s[6] = _mm_sub_epi16(t[1], t[6]); - s[7] = _mm_sub_epi16(t[0], t[7]); - s[8] = t[8]; - s[9] = t[9]; - - u[0] = _mm_unpacklo_epi16(t[10], t[13]); - u[1] = _mm_unpackhi_epi16(t[10], t[13]); - u[2] = _mm_unpacklo_epi16(t[11], t[12]); - u[3] = _mm_unpackhi_epi16(t[11], t[12]); - - v[0] = _mm_madd_epi16(u[0], k__cospi_m16_p16); - v[1] = _mm_madd_epi16(u[1], k__cospi_m16_p16); - v[2] = _mm_madd_epi16(u[0], k__cospi_p16_p16); - v[3] = _mm_madd_epi16(u[1], k__cospi_p16_p16); - v[4] = _mm_madd_epi16(u[2], k__cospi_m16_p16); - v[5] = _mm_madd_epi16(u[3], k__cospi_m16_p16); - v[6] = _mm_madd_epi16(u[2], k__cospi_p16_p16); - v[7] = _mm_madd_epi16(u[3], k__cospi_p16_p16); - - u[0] = _mm_add_epi32(v[0], k__DCT_CONST_ROUNDING); - u[1] = _mm_add_epi32(v[1], k__DCT_CONST_ROUNDING); - u[2] = _mm_add_epi32(v[2], k__DCT_CONST_ROUNDING); - u[3] = _mm_add_epi32(v[3], k__DCT_CONST_ROUNDING); - u[4] = _mm_add_epi32(v[4], k__DCT_CONST_ROUNDING); - u[5] = _mm_add_epi32(v[5], k__DCT_CONST_ROUNDING); - u[6] = _mm_add_epi32(v[6], k__DCT_CONST_ROUNDING); - u[7] = _mm_add_epi32(v[7], k__DCT_CONST_ROUNDING); - - u[0] = _mm_srai_epi32(u[0], DCT_CONST_BITS); - u[1] = _mm_srai_epi32(u[1], DCT_CONST_BITS); - u[2] = _mm_srai_epi32(u[2], DCT_CONST_BITS); - u[3] = _mm_srai_epi32(u[3], DCT_CONST_BITS); - u[4] = _mm_srai_epi32(u[4], DCT_CONST_BITS); - u[5] = _mm_srai_epi32(u[5], DCT_CONST_BITS); - u[6] = _mm_srai_epi32(u[6], DCT_CONST_BITS); - u[7] = _mm_srai_epi32(u[7], DCT_CONST_BITS); - - s[10] = _mm_packs_epi32(u[0], u[1]); - s[13] = _mm_packs_epi32(u[2], u[3]); - s[11] = _mm_packs_epi32(u[4], u[5]); - s[12] = _mm_packs_epi32(u[6], u[7]); - s[14] = t[14]; - s[15] = t[15]; - } else { - s[0] = _mm_add_epi16(t[0], t[7]); - s[1] = _mm_add_epi16(t[1], t[6]); - s[2] = _mm_add_epi16(t[2], t[5]); - s[3] = _mm_add_epi16(t[3], t[4]); - s[4] = _mm_sub_epi16(t[3], t[4]); - s[5] = _mm_sub_epi16(t[2], t[5]); - s[6] = _mm_sub_epi16(t[1], t[6]); - s[7] = _mm_sub_epi16(t[0], t[7]); - s[8] = t[8]; - s[9] = t[9]; - - u[0] = _mm_sub_epi16(t[13], t[10]); - u[1] = _mm_add_epi16(t[13], t[10]); - u[2] = _mm_sub_epi16(t[12], t[11]); - u[3] = _mm_add_epi16(t[12], t[11]); - - s[10] = _mm_mulhrs_epi16(u[0], k__cospi_p16_p16_x2); - s[13] = _mm_mulhrs_epi16(u[1], k__cospi_p16_p16_x2); - s[11] = _mm_mulhrs_epi16(u[2], k__cospi_p16_p16_x2); - s[12] = _mm_mulhrs_epi16(u[3], k__cospi_p16_p16_x2); - s[14] = t[14]; - s[15] = t[15]; - } - - // stage 7 - in[0] = _mm_add_epi16(s[0], s[15]); - in[1] = _mm_add_epi16(s[1], s[14]); - in[2] = _mm_add_epi16(s[2], s[13]); - in[3] = _mm_add_epi16(s[3], s[12]); - in[4] = _mm_add_epi16(s[4], s[11]); - in[5] = _mm_add_epi16(s[5], s[10]); - in[6] = _mm_add_epi16(s[6], s[9]); - in[7] = _mm_add_epi16(s[7], s[8]); - in[8] = _mm_sub_epi16(s[7], s[8]); - in[9] = _mm_sub_epi16(s[6], s[9]); - in[10] = _mm_sub_epi16(s[5], s[10]); - in[11] = _mm_sub_epi16(s[4], s[11]); - in[12] = _mm_sub_epi16(s[3], s[12]); - in[13] = _mm_sub_epi16(s[2], s[13]); - in[14] = _mm_sub_epi16(s[1], s[14]); - in[15] = _mm_sub_epi16(s[0], s[15]); -} - -static void idct16_sse2(__m128i *in0, __m128i *in1, int round) { - array_transpose_16x16(in0, in1); - idct16_8col(in0, round); - idct16_8col(in1, round); -} - -void vp9_idct16x16_256_add_ssse3(const int16_t *input, uint8_t *dest, - int stride) { - __m128i in0[16], in1[16]; - - load_buffer_8x16(input, in0); - input += 8; - load_buffer_8x16(input, in1); - - idct16_sse2(in0, in1, 0); - idct16_sse2(in0, in1, 1); - - write_buffer_8x16(dest, in0, stride); - dest += 8; - write_buffer_8x16(dest, in1, stride); -} - -static void idct16_10_r1(__m128i *in, __m128i *l) { - const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); - const __m128i zero = _mm_setzero_si128(); - - const __m128i stg2_01 = dual_set_epi16(3212, 32610); - const __m128i stg2_67 = dual_set_epi16(-9512, 31358); - const __m128i stg3_01 = dual_set_epi16(6392, 32138); - const __m128i stg4_01 = dual_set_epi16(23170, 23170); - - - - const __m128i stg4_4 = pair_set_epi16(-cospi_8_64, cospi_24_64); - const __m128i stg4_5 = pair_set_epi16(cospi_24_64, cospi_8_64); - const __m128i stg4_6 = pair_set_epi16(-cospi_24_64, -cospi_8_64); - const __m128i stg4_7 = pair_set_epi16(-cospi_8_64, cospi_24_64); - - __m128i stp1_0, stp1_1, stp1_4, stp1_6, - stp1_8, stp1_9, stp1_10, stp1_11, stp1_12, stp1_13, stp1_14, stp1_15; - __m128i stp2_0, stp2_1, stp2_2, stp2_3, stp2_4, stp2_5, stp2_6, stp2_7, - stp2_8, stp2_9, stp2_10, stp2_11, stp2_12, stp2_13; - __m128i tmp0, tmp1, tmp2, tmp3, tmp4; - - // Stage2 - { - const __m128i lo_1_15 = _mm_unpackhi_epi64(in[0], in[0]); - const __m128i lo_13_3 = _mm_unpackhi_epi64(in[1], in[1]); - - stp2_8 = _mm_mulhrs_epi16(lo_1_15, stg2_01); - stp2_11 = _mm_mulhrs_epi16(lo_13_3, stg2_67); - } - - // Stage3 - { - const __m128i lo_2_14 = _mm_unpacklo_epi64(in[1], in[1]); - stp1_4 = _mm_mulhrs_epi16(lo_2_14, stg3_01); - - stp1_13 = _mm_unpackhi_epi64(stp2_11, zero); - stp1_14 = _mm_unpackhi_epi64(stp2_8, zero); - } - - // Stage4 - { - const __m128i lo_0_8 = _mm_unpacklo_epi64(in[0], in[0]); - const __m128i lo_9_14 = _mm_unpacklo_epi16(stp2_8, stp1_14); - const __m128i lo_10_13 = _mm_unpacklo_epi16(stp2_11, stp1_13); - - tmp0 = _mm_mulhrs_epi16(lo_0_8, stg4_01); - tmp1 = _mm_madd_epi16(lo_9_14, stg4_4); - tmp3 = _mm_madd_epi16(lo_9_14, stg4_5); - tmp2 = _mm_madd_epi16(lo_10_13, stg4_6); - tmp4 = _mm_madd_epi16(lo_10_13, stg4_7); - - tmp1 = _mm_add_epi32(tmp1, rounding); - tmp3 = _mm_add_epi32(tmp3, rounding); - tmp2 = _mm_add_epi32(tmp2, rounding); - tmp4 = _mm_add_epi32(tmp4, rounding); - - tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS); - tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS); - tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); - tmp4 = _mm_srai_epi32(tmp4, DCT_CONST_BITS); - - stp1_0 = _mm_unpacklo_epi64(tmp0, tmp0); - stp1_1 = _mm_unpackhi_epi64(tmp0, tmp0); - stp2_9 = _mm_packs_epi32(tmp1, tmp3); - stp2_10 = _mm_packs_epi32(tmp2, tmp4); - - stp2_6 = _mm_unpackhi_epi64(stp1_4, zero); - } - - // Stage5 and Stage6 - { - tmp0 = _mm_add_epi16(stp2_8, stp2_11); - tmp1 = _mm_sub_epi16(stp2_8, stp2_11); - tmp2 = _mm_add_epi16(stp2_9, stp2_10); - tmp3 = _mm_sub_epi16(stp2_9, stp2_10); - - stp1_9 = _mm_unpacklo_epi64(tmp2, zero); - stp1_10 = _mm_unpacklo_epi64(tmp3, zero); - stp1_8 = _mm_unpacklo_epi64(tmp0, zero); - stp1_11 = _mm_unpacklo_epi64(tmp1, zero); - - stp1_13 = _mm_unpackhi_epi64(tmp3, zero); - stp1_14 = _mm_unpackhi_epi64(tmp2, zero); - stp1_12 = _mm_unpackhi_epi64(tmp1, zero); - stp1_15 = _mm_unpackhi_epi64(tmp0, zero); - } - - // Stage6 - { - const __m128i lo_6_5 = _mm_add_epi16(stp2_6, stp1_4); - const __m128i lo_6_6 = _mm_sub_epi16(stp2_6, stp1_4); - const __m128i lo_10_13 = _mm_sub_epi16(stp1_13, stp1_10); - const __m128i lo_10_14 = _mm_add_epi16(stp1_13, stp1_10); - const __m128i lo_11_12 = _mm_sub_epi16(stp1_12, stp1_11); - const __m128i lo_11_13 = _mm_add_epi16(stp1_12, stp1_11); - - tmp1 = _mm_unpacklo_epi64(lo_6_5, lo_6_6); - tmp0 = _mm_unpacklo_epi64(lo_10_13, lo_10_14); - tmp4 = _mm_unpacklo_epi64(lo_11_12, lo_11_13); - - stp1_6 = _mm_mulhrs_epi16(tmp1, stg4_01); - tmp0 = _mm_mulhrs_epi16(tmp0, stg4_01); - tmp4 = _mm_mulhrs_epi16(tmp4, stg4_01); - - stp2_10 = _mm_unpacklo_epi64(tmp0, zero); - stp2_13 = _mm_unpackhi_epi64(tmp0, zero); - stp2_11 = _mm_unpacklo_epi64(tmp4, zero); - stp2_12 = _mm_unpackhi_epi64(tmp4, zero); - - tmp0 = _mm_add_epi16(stp1_0, stp1_4); - tmp1 = _mm_sub_epi16(stp1_0, stp1_4); - tmp2 = _mm_add_epi16(stp1_1, stp1_6); - tmp3 = _mm_sub_epi16(stp1_1, stp1_6); - - stp2_0 = _mm_unpackhi_epi64(tmp0, zero); - stp2_1 = _mm_unpacklo_epi64(tmp2, zero); - stp2_2 = _mm_unpackhi_epi64(tmp2, zero); - stp2_3 = _mm_unpacklo_epi64(tmp0, zero); - stp2_4 = _mm_unpacklo_epi64(tmp1, zero); - stp2_5 = _mm_unpackhi_epi64(tmp3, zero); - stp2_6 = _mm_unpacklo_epi64(tmp3, zero); - stp2_7 = _mm_unpackhi_epi64(tmp1, zero); - } - - // Stage7. Left 8x16 only. - l[0] = _mm_add_epi16(stp2_0, stp1_15); - l[1] = _mm_add_epi16(stp2_1, stp1_14); - l[2] = _mm_add_epi16(stp2_2, stp2_13); - l[3] = _mm_add_epi16(stp2_3, stp2_12); - l[4] = _mm_add_epi16(stp2_4, stp2_11); - l[5] = _mm_add_epi16(stp2_5, stp2_10); - l[6] = _mm_add_epi16(stp2_6, stp1_9); - l[7] = _mm_add_epi16(stp2_7, stp1_8); - l[8] = _mm_sub_epi16(stp2_7, stp1_8); - l[9] = _mm_sub_epi16(stp2_6, stp1_9); - l[10] = _mm_sub_epi16(stp2_5, stp2_10); - l[11] = _mm_sub_epi16(stp2_4, stp2_11); - l[12] = _mm_sub_epi16(stp2_3, stp2_12); - l[13] = _mm_sub_epi16(stp2_2, stp2_13); - l[14] = _mm_sub_epi16(stp2_1, stp1_14); - l[15] = _mm_sub_epi16(stp2_0, stp1_15); -} - -static void idct16_10_r2(__m128i *in) { - const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); - - const __m128i stg2_0 = dual_set_epi16(3212, 3212); - const __m128i stg2_1 = dual_set_epi16(32610, 32610); - const __m128i stg2_6 = dual_set_epi16(-9512, -9512); - const __m128i stg2_7 = dual_set_epi16(31358, 31358); - const __m128i stg3_0 = dual_set_epi16(6392, 6392); - const __m128i stg3_1 = dual_set_epi16(32138, 32138); - const __m128i stg4_01 = dual_set_epi16(23170, 23170); - - const __m128i stg4_4 = pair_set_epi16(-cospi_8_64, cospi_24_64); - const __m128i stg4_5 = pair_set_epi16(cospi_24_64, cospi_8_64); - const __m128i stg4_6 = pair_set_epi16(-cospi_24_64, -cospi_8_64); - const __m128i stg4_7 = pair_set_epi16(-cospi_8_64, cospi_24_64); - - __m128i stp1_0, stp1_2, stp1_3, stp1_5, stp1_6, - stp1_8, stp1_9, stp1_10, stp1_11, stp1_12, stp1_13, stp1_14, stp1_15, - stp1_8_0, stp1_12_0; - __m128i stp2_0, stp2_1, stp2_2, stp2_3, stp2_4, stp2_5, stp2_6, stp2_7, - stp2_9, stp2_10, stp2_11, stp2_12, stp2_13, stp2_14; - __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; - - /* Stage2 */ - { - stp1_8_0 = _mm_mulhrs_epi16(in[1], stg2_0); - stp1_15 = _mm_mulhrs_epi16(in[1], stg2_1); - stp1_11 = _mm_mulhrs_epi16(in[3], stg2_6); - stp1_12_0 = _mm_mulhrs_epi16(in[3], stg2_7); - } - - /* Stage3 */ - { - stp2_4 = _mm_mulhrs_epi16(in[2], stg3_0); - stp2_7 = _mm_mulhrs_epi16(in[2], stg3_1); - - stp1_9 = stp1_8_0; - stp1_10 = stp1_11; - - stp1_13 = stp1_12_0; - stp1_14 = stp1_15; - } - - /* Stage4 */ - { - const __m128i lo_9_14 = _mm_unpacklo_epi16(stp1_9, stp1_14); - const __m128i hi_9_14 = _mm_unpackhi_epi16(stp1_9, stp1_14); - const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13); - const __m128i hi_10_13 = _mm_unpackhi_epi16(stp1_10, stp1_13); - - stp1_0 = _mm_mulhrs_epi16(in[0], stg4_01); - - stp2_5 = stp2_4; - stp2_6 = stp2_7; - - - tmp0 = _mm_madd_epi16(lo_9_14, stg4_4); - tmp1 = _mm_madd_epi16(hi_9_14, stg4_4); - tmp2 = _mm_madd_epi16(lo_9_14, stg4_5); - tmp3 = _mm_madd_epi16(hi_9_14, stg4_5); - tmp4 = _mm_madd_epi16(lo_10_13, stg4_6); - tmp5 = _mm_madd_epi16(hi_10_13, stg4_6); - tmp6 = _mm_madd_epi16(lo_10_13, stg4_7); - tmp7 = _mm_madd_epi16(hi_10_13, stg4_7); - - tmp0 = _mm_add_epi32(tmp0, rounding); - tmp1 = _mm_add_epi32(tmp1, rounding); - tmp2 = _mm_add_epi32(tmp2, rounding); - tmp3 = _mm_add_epi32(tmp3, rounding); - tmp4 = _mm_add_epi32(tmp4, rounding); - tmp5 = _mm_add_epi32(tmp5, rounding); - tmp6 = _mm_add_epi32(tmp6, rounding); - tmp7 = _mm_add_epi32(tmp7, rounding); - - tmp0 = _mm_srai_epi32(tmp0, 14); - tmp1 = _mm_srai_epi32(tmp1, 14); - tmp2 = _mm_srai_epi32(tmp2, 14); - tmp3 = _mm_srai_epi32(tmp3, 14); - tmp4 = _mm_srai_epi32(tmp4, 14); - tmp5 = _mm_srai_epi32(tmp5, 14); - tmp6 = _mm_srai_epi32(tmp6, 14); - tmp7 = _mm_srai_epi32(tmp7, 14); - - stp2_9 = _mm_packs_epi32(tmp0, tmp1); - stp2_14 = _mm_packs_epi32(tmp2, tmp3); - stp2_10 = _mm_packs_epi32(tmp4, tmp5); - stp2_13 = _mm_packs_epi32(tmp6, tmp7); - } - - /* Stage5 */ - { - stp1_2 = stp1_0; - stp1_3 = stp1_0; - - tmp0 = _mm_sub_epi16(stp2_6, stp2_5); - tmp1 = _mm_add_epi16(stp2_6, stp2_5); - - stp1_5 = _mm_mulhrs_epi16(tmp0, stg4_01); - stp1_6 = _mm_mulhrs_epi16(tmp1, stg4_01); - - stp1_8 = _mm_add_epi16(stp1_8_0, stp1_11); - stp1_9 = _mm_add_epi16(stp2_9, stp2_10); - stp1_10 = _mm_sub_epi16(stp2_9, stp2_10); - stp1_11 = _mm_sub_epi16(stp1_8_0, stp1_11); - - stp1_12 = _mm_sub_epi16(stp1_15, stp1_12_0); - stp1_13 = _mm_sub_epi16(stp2_14, stp2_13); - stp1_14 = _mm_add_epi16(stp2_14, stp2_13); - stp1_15 = _mm_add_epi16(stp1_15, stp1_12_0); - } - - /* Stage6 */ - { - stp2_0 = _mm_add_epi16(stp1_0, stp2_7); - stp2_1 = _mm_add_epi16(stp1_0, stp1_6); - stp2_2 = _mm_add_epi16(stp1_2, stp1_5); - stp2_3 = _mm_add_epi16(stp1_3, stp2_4); - - tmp0 = _mm_sub_epi16(stp1_13, stp1_10); - tmp1 = _mm_add_epi16(stp1_13, stp1_10); - tmp2 = _mm_sub_epi16(stp1_12, stp1_11); - tmp3 = _mm_add_epi16(stp1_12, stp1_11); - - stp2_4 = _mm_sub_epi16(stp1_3, stp2_4); - stp2_5 = _mm_sub_epi16(stp1_2, stp1_5); - stp2_6 = _mm_sub_epi16(stp1_0, stp1_6); - stp2_7 = _mm_sub_epi16(stp1_0, stp2_7); - - stp2_10 = _mm_mulhrs_epi16(tmp0, stg4_01); - stp2_13 = _mm_mulhrs_epi16(tmp1, stg4_01); - stp2_11 = _mm_mulhrs_epi16(tmp2, stg4_01); - stp2_12 = _mm_mulhrs_epi16(tmp3, stg4_01); - } - - // Stage7 - in[0] = _mm_add_epi16(stp2_0, stp1_15); - in[1] = _mm_add_epi16(stp2_1, stp1_14); - in[2] = _mm_add_epi16(stp2_2, stp2_13); - in[3] = _mm_add_epi16(stp2_3, stp2_12); - in[4] = _mm_add_epi16(stp2_4, stp2_11); - in[5] = _mm_add_epi16(stp2_5, stp2_10); - in[6] = _mm_add_epi16(stp2_6, stp1_9); - in[7] = _mm_add_epi16(stp2_7, stp1_8); - in[8] = _mm_sub_epi16(stp2_7, stp1_8); - in[9] = _mm_sub_epi16(stp2_6, stp1_9); - in[10] = _mm_sub_epi16(stp2_5, stp2_10); - in[11] = _mm_sub_epi16(stp2_4, stp2_11); - in[12] = _mm_sub_epi16(stp2_3, stp2_12); - in[13] = _mm_sub_epi16(stp2_2, stp2_13); - in[14] = _mm_sub_epi16(stp2_1, stp1_14); - in[15] = _mm_sub_epi16(stp2_0, stp1_15); -} - -void vp9_idct16x16_10_add_ssse3(const int16_t *input, uint8_t *dest, - int stride) { - const __m128i final_rounding = _mm_set1_epi16(1<<5); - const __m128i zero = _mm_setzero_si128(); - __m128i in[16], l[16]; - - int i; - // First 1-D inverse DCT - // Load input data. - in[0] = _mm_load_si128((const __m128i *)input); - in[1] = _mm_load_si128((const __m128i *)(input + 8 * 2)); - in[2] = _mm_load_si128((const __m128i *)(input + 8 * 4)); - in[3] = _mm_load_si128((const __m128i *)(input + 8 * 6)); - - TRANSPOSE_8X4(in[0], in[1], in[2], in[3], in[0], in[1]); - - idct16_10_r1(in, l); - - // Second 1-D inverse transform, performed per 8x16 block - for (i = 0; i < 2; i++) { - array_transpose_4X8(l + 8*i, in); - - idct16_10_r2(in); - - // Final rounding and shift - in[0] = _mm_adds_epi16(in[0], final_rounding); - in[1] = _mm_adds_epi16(in[1], final_rounding); - in[2] = _mm_adds_epi16(in[2], final_rounding); - in[3] = _mm_adds_epi16(in[3], final_rounding); - in[4] = _mm_adds_epi16(in[4], final_rounding); - in[5] = _mm_adds_epi16(in[5], final_rounding); - in[6] = _mm_adds_epi16(in[6], final_rounding); - in[7] = _mm_adds_epi16(in[7], final_rounding); - in[8] = _mm_adds_epi16(in[8], final_rounding); - in[9] = _mm_adds_epi16(in[9], final_rounding); - in[10] = _mm_adds_epi16(in[10], final_rounding); - in[11] = _mm_adds_epi16(in[11], final_rounding); - in[12] = _mm_adds_epi16(in[12], final_rounding); - in[13] = _mm_adds_epi16(in[13], final_rounding); - in[14] = _mm_adds_epi16(in[14], final_rounding); - in[15] = _mm_adds_epi16(in[15], final_rounding); - - in[0] = _mm_srai_epi16(in[0], 6); - in[1] = _mm_srai_epi16(in[1], 6); - in[2] = _mm_srai_epi16(in[2], 6); - in[3] = _mm_srai_epi16(in[3], 6); - in[4] = _mm_srai_epi16(in[4], 6); - in[5] = _mm_srai_epi16(in[5], 6); - in[6] = _mm_srai_epi16(in[6], 6); - in[7] = _mm_srai_epi16(in[7], 6); - in[8] = _mm_srai_epi16(in[8], 6); - in[9] = _mm_srai_epi16(in[9], 6); - in[10] = _mm_srai_epi16(in[10], 6); - in[11] = _mm_srai_epi16(in[11], 6); - in[12] = _mm_srai_epi16(in[12], 6); - in[13] = _mm_srai_epi16(in[13], 6); - in[14] = _mm_srai_epi16(in[14], 6); - in[15] = _mm_srai_epi16(in[15], 6); - - RECON_AND_STORE(dest, in[0]); - RECON_AND_STORE(dest, in[1]); - RECON_AND_STORE(dest, in[2]); - RECON_AND_STORE(dest, in[3]); - RECON_AND_STORE(dest, in[4]); - RECON_AND_STORE(dest, in[5]); - RECON_AND_STORE(dest, in[6]); - RECON_AND_STORE(dest, in[7]); - RECON_AND_STORE(dest, in[8]); - RECON_AND_STORE(dest, in[9]); - RECON_AND_STORE(dest, in[10]); - RECON_AND_STORE(dest, in[11]); - RECON_AND_STORE(dest, in[12]); - RECON_AND_STORE(dest, in[13]); - RECON_AND_STORE(dest, in[14]); - RECON_AND_STORE(dest, in[15]); - - dest += 8 - (stride * 16); - } -} diff --git a/media/libvpx/vp9/common/x86/vp9_idct_ssse3_x86_64.asm b/media/libvpx/vp9/common/x86/vp9_idct_ssse3_x86_64.asm deleted file mode 100644 index 2c1060710cc..00000000000 --- a/media/libvpx/vp9/common/x86/vp9_idct_ssse3_x86_64.asm +++ /dev/null @@ -1,300 +0,0 @@ -; -; Copyright (c) 2014 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; -%include "third_party/x86inc/x86inc.asm" - -; This file provides SSSE3 version of the inverse transformation. Part -; of the functions are originally derived from the ffmpeg project. -; Note that the current version applies to x86 64-bit only. - -SECTION_RODATA - -pw_11585x2: times 8 dw 23170 -pd_8192: times 4 dd 8192 -pw_16: times 8 dw 16 - -%macro TRANSFORM_COEFFS 2 -pw_%1_%2: dw %1, %2, %1, %2, %1, %2, %1, %2 -pw_m%2_%1: dw -%2, %1, -%2, %1, -%2, %1, -%2, %1 -%endmacro - -TRANSFORM_COEFFS 6270, 15137 -TRANSFORM_COEFFS 3196, 16069 -TRANSFORM_COEFFS 13623, 9102 - -%macro PAIR_PP_COEFFS 2 -dpw_%1_%2: dw %1, %1, %1, %1, %2, %2, %2, %2 -%endmacro - -%macro PAIR_MP_COEFFS 2 -dpw_m%1_%2: dw -%1, -%1, -%1, -%1, %2, %2, %2, %2 -%endmacro - -%macro PAIR_MM_COEFFS 2 -dpw_m%1_m%2: dw -%1, -%1, -%1, -%1, -%2, -%2, -%2, -%2 -%endmacro - -PAIR_PP_COEFFS 30274, 12540 -PAIR_PP_COEFFS 6392, 32138 -PAIR_MP_COEFFS 18204, 27246 - -PAIR_PP_COEFFS 12540, 12540 -PAIR_PP_COEFFS 30274, 30274 -PAIR_PP_COEFFS 6392, 6392 -PAIR_PP_COEFFS 32138, 32138 -PAIR_MM_COEFFS 18204, 18204 -PAIR_PP_COEFFS 27246, 27246 - -SECTION .text - -%if ARCH_X86_64 -%macro SUM_SUB 3 - psubw m%3, m%1, m%2 - paddw m%1, m%2 - SWAP %2, %3 -%endmacro - -; butterfly operation -%macro MUL_ADD_2X 6 ; dst1, dst2, src, round, coefs1, coefs2 - pmaddwd m%1, m%3, %5 - pmaddwd m%2, m%3, %6 - paddd m%1, %4 - paddd m%2, %4 - psrad m%1, 14 - psrad m%2, 14 -%endmacro - -%macro BUTTERFLY_4X 7 ; dst1, dst2, coef1, coef2, round, tmp1, tmp2 - punpckhwd m%6, m%2, m%1 - MUL_ADD_2X %7, %6, %6, %5, [pw_m%4_%3], [pw_%3_%4] - punpcklwd m%2, m%1 - MUL_ADD_2X %1, %2, %2, %5, [pw_m%4_%3], [pw_%3_%4] - packssdw m%1, m%7 - packssdw m%2, m%6 -%endmacro - -; matrix transpose -%macro INTERLEAVE_2X 4 - punpckh%1 m%4, m%2, m%3 - punpckl%1 m%2, m%3 - SWAP %3, %4 -%endmacro - -%macro TRANSPOSE8X8 9 - INTERLEAVE_2X wd, %1, %2, %9 - INTERLEAVE_2X wd, %3, %4, %9 - INTERLEAVE_2X wd, %5, %6, %9 - INTERLEAVE_2X wd, %7, %8, %9 - - INTERLEAVE_2X dq, %1, %3, %9 - INTERLEAVE_2X dq, %2, %4, %9 - INTERLEAVE_2X dq, %5, %7, %9 - INTERLEAVE_2X dq, %6, %8, %9 - - INTERLEAVE_2X qdq, %1, %5, %9 - INTERLEAVE_2X qdq, %3, %7, %9 - INTERLEAVE_2X qdq, %2, %6, %9 - INTERLEAVE_2X qdq, %4, %8, %9 - - SWAP %2, %5 - SWAP %4, %7 -%endmacro - -%macro IDCT8_1D 0 - SUM_SUB 0, 4, 9 - BUTTERFLY_4X 2, 6, 6270, 15137, m8, 9, 10 - pmulhrsw m0, m12 - pmulhrsw m4, m12 - BUTTERFLY_4X 1, 7, 3196, 16069, m8, 9, 10 - BUTTERFLY_4X 5, 3, 13623, 9102, m8, 9, 10 - - SUM_SUB 1, 5, 9 - SUM_SUB 7, 3, 9 - SUM_SUB 0, 6, 9 - SUM_SUB 4, 2, 9 - SUM_SUB 3, 5, 9 - pmulhrsw m3, m12 - pmulhrsw m5, m12 - - SUM_SUB 0, 7, 9 - SUM_SUB 4, 3, 9 - SUM_SUB 2, 5, 9 - SUM_SUB 6, 1, 9 - - SWAP 3, 6 - SWAP 1, 4 -%endmacro - -; This macro handles 8 pixels per line -%macro ADD_STORE_8P_2X 5; src1, src2, tmp1, tmp2, zero - paddw m%1, m11 - paddw m%2, m11 - psraw m%1, 5 - psraw m%2, 5 - - movh m%3, [outputq] - movh m%4, [outputq + strideq] - punpcklbw m%3, m%5 - punpcklbw m%4, m%5 - paddw m%3, m%1 - paddw m%4, m%2 - packuswb m%3, m%5 - packuswb m%4, m%5 - movh [outputq], m%3 - movh [outputq + strideq], m%4 -%endmacro - -INIT_XMM ssse3 -; full inverse 8x8 2D-DCT transform -cglobal idct8x8_64_add, 3, 5, 13, input, output, stride - mova m8, [pd_8192] - mova m11, [pw_16] - mova m12, [pw_11585x2] - - lea r3, [2 * strideq] - - mova m0, [inputq + 0] - mova m1, [inputq + 16] - mova m2, [inputq + 32] - mova m3, [inputq + 48] - mova m4, [inputq + 64] - mova m5, [inputq + 80] - mova m6, [inputq + 96] - mova m7, [inputq + 112] - - TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9 - IDCT8_1D - TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9 - IDCT8_1D - - pxor m12, m12 - ADD_STORE_8P_2X 0, 1, 9, 10, 12 - lea outputq, [outputq + r3] - ADD_STORE_8P_2X 2, 3, 9, 10, 12 - lea outputq, [outputq + r3] - ADD_STORE_8P_2X 4, 5, 9, 10, 12 - lea outputq, [outputq + r3] - ADD_STORE_8P_2X 6, 7, 9, 10, 12 - - RET - -; inverse 8x8 2D-DCT transform with only first 10 coeffs non-zero -cglobal idct8x8_12_add, 3, 5, 13, input, output, stride - mova m8, [pd_8192] - mova m11, [pw_16] - mova m12, [pw_11585x2] - - lea r3, [2 * strideq] - - mova m0, [inputq + 0] - mova m1, [inputq + 16] - mova m2, [inputq + 32] - mova m3, [inputq + 48] - - punpcklwd m0, m1 - punpcklwd m2, m3 - punpckhdq m9, m0, m2 - punpckldq m0, m2 - SWAP 2, 9 - - ; m0 -> [0], [0] - ; m1 -> [1], [1] - ; m2 -> [2], [2] - ; m3 -> [3], [3] - punpckhqdq m10, m0, m0 - punpcklqdq m0, m0 - punpckhqdq m9, m2, m2 - punpcklqdq m2, m2 - SWAP 1, 10 - SWAP 3, 9 - - pmulhrsw m0, m12 - pmulhrsw m2, [dpw_30274_12540] - pmulhrsw m1, [dpw_6392_32138] - pmulhrsw m3, [dpw_m18204_27246] - - SUM_SUB 0, 2, 9 - SUM_SUB 1, 3, 9 - - punpcklqdq m9, m3, m3 - punpckhqdq m5, m3, m9 - - SUM_SUB 3, 5, 9 - punpckhqdq m5, m3 - pmulhrsw m5, m12 - - punpckhqdq m9, m1, m5 - punpcklqdq m1, m5 - SWAP 5, 9 - - SUM_SUB 0, 5, 9 - SUM_SUB 2, 1, 9 - - punpckhqdq m3, m0, m0 - punpckhqdq m4, m1, m1 - punpckhqdq m6, m5, m5 - punpckhqdq m7, m2, m2 - - punpcklwd m0, m3 - punpcklwd m7, m2 - punpcklwd m1, m4 - punpcklwd m6, m5 - - punpckhdq m4, m0, m7 - punpckldq m0, m7 - punpckhdq m10, m1, m6 - punpckldq m5, m1, m6 - - punpckhqdq m1, m0, m5 - punpcklqdq m0, m5 - punpckhqdq m3, m4, m10 - punpcklqdq m2, m4, m10 - - - pmulhrsw m0, m12 - pmulhrsw m6, m2, [dpw_30274_30274] - pmulhrsw m4, m2, [dpw_12540_12540] - - pmulhrsw m7, m1, [dpw_32138_32138] - pmulhrsw m1, [dpw_6392_6392] - pmulhrsw m5, m3, [dpw_m18204_m18204] - pmulhrsw m3, [dpw_27246_27246] - - mova m2, m0 - SUM_SUB 0, 6, 9 - SUM_SUB 2, 4, 9 - SUM_SUB 1, 5, 9 - SUM_SUB 7, 3, 9 - - SUM_SUB 3, 5, 9 - pmulhrsw m3, m12 - pmulhrsw m5, m12 - - SUM_SUB 0, 7, 9 - SUM_SUB 2, 3, 9 - SUM_SUB 4, 5, 9 - SUM_SUB 6, 1, 9 - - SWAP 3, 6 - SWAP 1, 2 - SWAP 2, 4 - - - pxor m12, m12 - ADD_STORE_8P_2X 0, 1, 9, 10, 12 - lea outputq, [outputq + r3] - ADD_STORE_8P_2X 2, 3, 9, 10, 12 - lea outputq, [outputq + r3] - ADD_STORE_8P_2X 4, 5, 9, 10, 12 - lea outputq, [outputq + r3] - ADD_STORE_8P_2X 6, 7, 9, 10, 12 - - RET - -%endif diff --git a/media/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c b/media/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c deleted file mode 100644 index 3bc7d3918b7..00000000000 --- a/media/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c +++ /dev/null @@ -1,544 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include -#include "vpx_ports/mem.h" - -// filters for 16_h8 and 16_v8 -DECLARE_ALIGNED(32, static const uint8_t, filt1_global_avx2[32]) = { - 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, - 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 -}; - -DECLARE_ALIGNED(32, static const uint8_t, filt2_global_avx2[32]) = { - 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, - 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10 -}; - -DECLARE_ALIGNED(32, static const uint8_t, filt3_global_avx2[32]) = { - 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, - 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12 -}; - -DECLARE_ALIGNED(32, static const uint8_t, filt4_global_avx2[32]) = { - 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, - 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14 -}; - -#if defined(__clang__) -# if __clang_major__ < 3 || (__clang_major__ == 3 && __clang_minor__ <= 3) || \ - (defined(__APPLE__) && __clang_major__ == 5 && __clang_minor__ == 0) -# define MM256_BROADCASTSI128_SI256(x) \ - _mm_broadcastsi128_si256((__m128i const *)&(x)) -# else // clang > 3.3, and not 5.0 on macosx. -# define MM256_BROADCASTSI128_SI256(x) _mm256_broadcastsi128_si256(x) -# endif // clang <= 3.3 -#elif defined(__GNUC__) -# if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ <= 6) -# define MM256_BROADCASTSI128_SI256(x) \ - _mm_broadcastsi128_si256((__m128i const *)&(x)) -# elif __GNUC__ == 4 && __GNUC_MINOR__ == 7 -# define MM256_BROADCASTSI128_SI256(x) _mm_broadcastsi128_si256(x) -# else // gcc > 4.7 -# define MM256_BROADCASTSI128_SI256(x) _mm256_broadcastsi128_si256(x) -# endif // gcc <= 4.6 -#else // !(gcc || clang) -# define MM256_BROADCASTSI128_SI256(x) _mm256_broadcastsi128_si256(x) -#endif // __clang__ - -void vp9_filter_block1d16_h8_avx2(unsigned char *src_ptr, - unsigned int src_pixels_per_line, - unsigned char *output_ptr, - unsigned int output_pitch, - unsigned int output_height, - int16_t *filter) { - __m128i filtersReg; - __m256i addFilterReg64, filt1Reg, filt2Reg, filt3Reg, filt4Reg; - __m256i firstFilters, secondFilters, thirdFilters, forthFilters; - __m256i srcRegFilt32b1_1, srcRegFilt32b2_1, srcRegFilt32b2, srcRegFilt32b3; - __m256i srcReg32b1, srcReg32b2, filtersReg32; - unsigned int i; - unsigned int src_stride, dst_stride; - - // create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64 - addFilterReg64 = _mm256_set1_epi32((int)0x0400040u); - filtersReg = _mm_loadu_si128((__m128i *)filter); - // converting the 16 bit (short) to 8 bit (byte) and have the same data - // in both lanes of 128 bit register. - filtersReg =_mm_packs_epi16(filtersReg, filtersReg); - // have the same data in both lanes of a 256 bit register - filtersReg32 = MM256_BROADCASTSI128_SI256(filtersReg); - - // duplicate only the first 16 bits (first and second byte) - // across 256 bit register - firstFilters = _mm256_shuffle_epi8(filtersReg32, - _mm256_set1_epi16(0x100u)); - // duplicate only the second 16 bits (third and forth byte) - // across 256 bit register - secondFilters = _mm256_shuffle_epi8(filtersReg32, - _mm256_set1_epi16(0x302u)); - // duplicate only the third 16 bits (fifth and sixth byte) - // across 256 bit register - thirdFilters = _mm256_shuffle_epi8(filtersReg32, - _mm256_set1_epi16(0x504u)); - // duplicate only the forth 16 bits (seventh and eighth byte) - // across 256 bit register - forthFilters = _mm256_shuffle_epi8(filtersReg32, - _mm256_set1_epi16(0x706u)); - - filt1Reg = _mm256_load_si256((__m256i const *)filt1_global_avx2); - filt2Reg = _mm256_load_si256((__m256i const *)filt2_global_avx2); - filt3Reg = _mm256_load_si256((__m256i const *)filt3_global_avx2); - filt4Reg = _mm256_load_si256((__m256i const *)filt4_global_avx2); - - // multiple the size of the source and destination stride by two - src_stride = src_pixels_per_line << 1; - dst_stride = output_pitch << 1; - for (i = output_height; i > 1; i-=2) { - // load the 2 strides of source - srcReg32b1 = _mm256_castsi128_si256( - _mm_loadu_si128((__m128i *)(src_ptr-3))); - srcReg32b1 = _mm256_inserti128_si256(srcReg32b1, - _mm_loadu_si128((__m128i *) - (src_ptr+src_pixels_per_line-3)), 1); - - // filter the source buffer - srcRegFilt32b1_1= _mm256_shuffle_epi8(srcReg32b1, filt1Reg); - srcRegFilt32b2= _mm256_shuffle_epi8(srcReg32b1, filt4Reg); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt32b1_1 = _mm256_maddubs_epi16(srcRegFilt32b1_1, firstFilters); - srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, forthFilters); - - // add and saturate the results together - srcRegFilt32b1_1 = _mm256_adds_epi16(srcRegFilt32b1_1, srcRegFilt32b2); - - // filter the source buffer - srcRegFilt32b3= _mm256_shuffle_epi8(srcReg32b1, filt2Reg); - srcRegFilt32b2= _mm256_shuffle_epi8(srcReg32b1, filt3Reg); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt32b3 = _mm256_maddubs_epi16(srcRegFilt32b3, secondFilters); - srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, thirdFilters); - - // add and saturate the results together - srcRegFilt32b1_1 = _mm256_adds_epi16(srcRegFilt32b1_1, - _mm256_min_epi16(srcRegFilt32b3, srcRegFilt32b2)); - - // reading 2 strides of the next 16 bytes - // (part of it was being read by earlier read) - srcReg32b2 = _mm256_castsi128_si256( - _mm_loadu_si128((__m128i *)(src_ptr+5))); - srcReg32b2 = _mm256_inserti128_si256(srcReg32b2, - _mm_loadu_si128((__m128i *) - (src_ptr+src_pixels_per_line+5)), 1); - - // add and saturate the results together - srcRegFilt32b1_1 = _mm256_adds_epi16(srcRegFilt32b1_1, - _mm256_max_epi16(srcRegFilt32b3, srcRegFilt32b2)); - - // filter the source buffer - srcRegFilt32b2_1 = _mm256_shuffle_epi8(srcReg32b2, filt1Reg); - srcRegFilt32b2 = _mm256_shuffle_epi8(srcReg32b2, filt4Reg); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt32b2_1 = _mm256_maddubs_epi16(srcRegFilt32b2_1, firstFilters); - srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, forthFilters); - - // add and saturate the results together - srcRegFilt32b2_1 = _mm256_adds_epi16(srcRegFilt32b2_1, srcRegFilt32b2); - - // filter the source buffer - srcRegFilt32b3= _mm256_shuffle_epi8(srcReg32b2, filt2Reg); - srcRegFilt32b2= _mm256_shuffle_epi8(srcReg32b2, filt3Reg); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt32b3 = _mm256_maddubs_epi16(srcRegFilt32b3, secondFilters); - srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, thirdFilters); - - // add and saturate the results together - srcRegFilt32b2_1 = _mm256_adds_epi16(srcRegFilt32b2_1, - _mm256_min_epi16(srcRegFilt32b3, srcRegFilt32b2)); - srcRegFilt32b2_1 = _mm256_adds_epi16(srcRegFilt32b2_1, - _mm256_max_epi16(srcRegFilt32b3, srcRegFilt32b2)); - - - srcRegFilt32b1_1 = _mm256_adds_epi16(srcRegFilt32b1_1, addFilterReg64); - - srcRegFilt32b2_1 = _mm256_adds_epi16(srcRegFilt32b2_1, addFilterReg64); - - // shift by 7 bit each 16 bit - srcRegFilt32b1_1 = _mm256_srai_epi16(srcRegFilt32b1_1, 7); - srcRegFilt32b2_1 = _mm256_srai_epi16(srcRegFilt32b2_1, 7); - - // shrink to 8 bit each 16 bits, the first lane contain the first - // convolve result and the second lane contain the second convolve - // result - srcRegFilt32b1_1 = _mm256_packus_epi16(srcRegFilt32b1_1, - srcRegFilt32b2_1); - - src_ptr+=src_stride; - - // save 16 bytes - _mm_store_si128((__m128i*)output_ptr, - _mm256_castsi256_si128(srcRegFilt32b1_1)); - - // save the next 16 bits - _mm_store_si128((__m128i*)(output_ptr+output_pitch), - _mm256_extractf128_si256(srcRegFilt32b1_1, 1)); - output_ptr+=dst_stride; - } - - // if the number of strides is odd. - // process only 16 bytes - if (i > 0) { - __m128i srcReg1, srcReg2, srcRegFilt1_1, srcRegFilt2_1; - __m128i srcRegFilt2, srcRegFilt3; - - srcReg1 = _mm_loadu_si128((__m128i *)(src_ptr-3)); - - // filter the source buffer - srcRegFilt1_1 = _mm_shuffle_epi8(srcReg1, - _mm256_castsi256_si128(filt1Reg)); - srcRegFilt2 = _mm_shuffle_epi8(srcReg1, - _mm256_castsi256_si128(filt4Reg)); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt1_1 = _mm_maddubs_epi16(srcRegFilt1_1, - _mm256_castsi256_si128(firstFilters)); - srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, - _mm256_castsi256_si128(forthFilters)); - - // add and saturate the results together - srcRegFilt1_1 = _mm_adds_epi16(srcRegFilt1_1, srcRegFilt2); - - // filter the source buffer - srcRegFilt3= _mm_shuffle_epi8(srcReg1, - _mm256_castsi256_si128(filt2Reg)); - srcRegFilt2= _mm_shuffle_epi8(srcReg1, - _mm256_castsi256_si128(filt3Reg)); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt3 = _mm_maddubs_epi16(srcRegFilt3, - _mm256_castsi256_si128(secondFilters)); - srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, - _mm256_castsi256_si128(thirdFilters)); - - // add and saturate the results together - srcRegFilt1_1 = _mm_adds_epi16(srcRegFilt1_1, - _mm_min_epi16(srcRegFilt3, srcRegFilt2)); - - // reading the next 16 bytes - // (part of it was being read by earlier read) - srcReg2 = _mm_loadu_si128((__m128i *)(src_ptr+5)); - - // add and saturate the results together - srcRegFilt1_1 = _mm_adds_epi16(srcRegFilt1_1, - _mm_max_epi16(srcRegFilt3, srcRegFilt2)); - - // filter the source buffer - srcRegFilt2_1 = _mm_shuffle_epi8(srcReg2, - _mm256_castsi256_si128(filt1Reg)); - srcRegFilt2 = _mm_shuffle_epi8(srcReg2, - _mm256_castsi256_si128(filt4Reg)); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt2_1 = _mm_maddubs_epi16(srcRegFilt2_1, - _mm256_castsi256_si128(firstFilters)); - srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, - _mm256_castsi256_si128(forthFilters)); - - // add and saturate the results together - srcRegFilt2_1 = _mm_adds_epi16(srcRegFilt2_1, srcRegFilt2); - - // filter the source buffer - srcRegFilt3 = _mm_shuffle_epi8(srcReg2, - _mm256_castsi256_si128(filt2Reg)); - srcRegFilt2 = _mm_shuffle_epi8(srcReg2, - _mm256_castsi256_si128(filt3Reg)); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt3 = _mm_maddubs_epi16(srcRegFilt3, - _mm256_castsi256_si128(secondFilters)); - srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, - _mm256_castsi256_si128(thirdFilters)); - - // add and saturate the results together - srcRegFilt2_1 = _mm_adds_epi16(srcRegFilt2_1, - _mm_min_epi16(srcRegFilt3, srcRegFilt2)); - srcRegFilt2_1 = _mm_adds_epi16(srcRegFilt2_1, - _mm_max_epi16(srcRegFilt3, srcRegFilt2)); - - - srcRegFilt1_1 = _mm_adds_epi16(srcRegFilt1_1, - _mm256_castsi256_si128(addFilterReg64)); - - srcRegFilt2_1 = _mm_adds_epi16(srcRegFilt2_1, - _mm256_castsi256_si128(addFilterReg64)); - - // shift by 7 bit each 16 bit - srcRegFilt1_1 = _mm_srai_epi16(srcRegFilt1_1, 7); - srcRegFilt2_1 = _mm_srai_epi16(srcRegFilt2_1, 7); - - // shrink to 8 bit each 16 bits, the first lane contain the first - // convolve result and the second lane contain the second convolve - // result - srcRegFilt1_1 = _mm_packus_epi16(srcRegFilt1_1, srcRegFilt2_1); - - // save 16 bytes - _mm_store_si128((__m128i*)output_ptr, srcRegFilt1_1); - } -} - -void vp9_filter_block1d16_v8_avx2(unsigned char *src_ptr, - unsigned int src_pitch, - unsigned char *output_ptr, - unsigned int out_pitch, - unsigned int output_height, - int16_t *filter) { - __m128i filtersReg; - __m256i addFilterReg64; - __m256i srcReg32b1, srcReg32b2, srcReg32b3, srcReg32b4, srcReg32b5; - __m256i srcReg32b6, srcReg32b7, srcReg32b8, srcReg32b9, srcReg32b10; - __m256i srcReg32b11, srcReg32b12, filtersReg32; - __m256i firstFilters, secondFilters, thirdFilters, forthFilters; - unsigned int i; - unsigned int src_stride, dst_stride; - - // create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64 - addFilterReg64 = _mm256_set1_epi32((int)0x0400040u); - filtersReg = _mm_loadu_si128((__m128i *)filter); - // converting the 16 bit (short) to 8 bit (byte) and have the - // same data in both lanes of 128 bit register. - filtersReg =_mm_packs_epi16(filtersReg, filtersReg); - // have the same data in both lanes of a 256 bit register - filtersReg32 = MM256_BROADCASTSI128_SI256(filtersReg); - - // duplicate only the first 16 bits (first and second byte) - // across 256 bit register - firstFilters = _mm256_shuffle_epi8(filtersReg32, - _mm256_set1_epi16(0x100u)); - // duplicate only the second 16 bits (third and forth byte) - // across 256 bit register - secondFilters = _mm256_shuffle_epi8(filtersReg32, - _mm256_set1_epi16(0x302u)); - // duplicate only the third 16 bits (fifth and sixth byte) - // across 256 bit register - thirdFilters = _mm256_shuffle_epi8(filtersReg32, - _mm256_set1_epi16(0x504u)); - // duplicate only the forth 16 bits (seventh and eighth byte) - // across 256 bit register - forthFilters = _mm256_shuffle_epi8(filtersReg32, - _mm256_set1_epi16(0x706u)); - - // multiple the size of the source and destination stride by two - src_stride = src_pitch << 1; - dst_stride = out_pitch << 1; - - // load 16 bytes 7 times in stride of src_pitch - srcReg32b1 = _mm256_castsi128_si256( - _mm_loadu_si128((__m128i *)(src_ptr))); - srcReg32b2 = _mm256_castsi128_si256( - _mm_loadu_si128((__m128i *)(src_ptr+src_pitch))); - srcReg32b3 = _mm256_castsi128_si256( - _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*2))); - srcReg32b4 = _mm256_castsi128_si256( - _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*3))); - srcReg32b5 = _mm256_castsi128_si256( - _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*4))); - srcReg32b6 = _mm256_castsi128_si256( - _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*5))); - srcReg32b7 = _mm256_castsi128_si256( - _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*6))); - - // have each consecutive loads on the same 256 register - srcReg32b1 = _mm256_inserti128_si256(srcReg32b1, - _mm256_castsi256_si128(srcReg32b2), 1); - srcReg32b2 = _mm256_inserti128_si256(srcReg32b2, - _mm256_castsi256_si128(srcReg32b3), 1); - srcReg32b3 = _mm256_inserti128_si256(srcReg32b3, - _mm256_castsi256_si128(srcReg32b4), 1); - srcReg32b4 = _mm256_inserti128_si256(srcReg32b4, - _mm256_castsi256_si128(srcReg32b5), 1); - srcReg32b5 = _mm256_inserti128_si256(srcReg32b5, - _mm256_castsi256_si128(srcReg32b6), 1); - srcReg32b6 = _mm256_inserti128_si256(srcReg32b6, - _mm256_castsi256_si128(srcReg32b7), 1); - - // merge every two consecutive registers except the last one - srcReg32b10 = _mm256_unpacklo_epi8(srcReg32b1, srcReg32b2); - srcReg32b1 = _mm256_unpackhi_epi8(srcReg32b1, srcReg32b2); - - // save - srcReg32b11 = _mm256_unpacklo_epi8(srcReg32b3, srcReg32b4); - - // save - srcReg32b3 = _mm256_unpackhi_epi8(srcReg32b3, srcReg32b4); - - // save - srcReg32b2 = _mm256_unpacklo_epi8(srcReg32b5, srcReg32b6); - - // save - srcReg32b5 = _mm256_unpackhi_epi8(srcReg32b5, srcReg32b6); - - - for (i = output_height; i > 1; i-=2) { - // load the last 2 loads of 16 bytes and have every two - // consecutive loads in the same 256 bit register - srcReg32b8 = _mm256_castsi128_si256( - _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*7))); - srcReg32b7 = _mm256_inserti128_si256(srcReg32b7, - _mm256_castsi256_si128(srcReg32b8), 1); - srcReg32b9 = _mm256_castsi128_si256( - _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*8))); - srcReg32b8 = _mm256_inserti128_si256(srcReg32b8, - _mm256_castsi256_si128(srcReg32b9), 1); - - // merge every two consecutive registers - // save - srcReg32b4 = _mm256_unpacklo_epi8(srcReg32b7, srcReg32b8); - srcReg32b7 = _mm256_unpackhi_epi8(srcReg32b7, srcReg32b8); - - // multiply 2 adjacent elements with the filter and add the result - srcReg32b10 = _mm256_maddubs_epi16(srcReg32b10, firstFilters); - srcReg32b6 = _mm256_maddubs_epi16(srcReg32b4, forthFilters); - - // add and saturate the results together - srcReg32b10 = _mm256_adds_epi16(srcReg32b10, srcReg32b6); - - // multiply 2 adjacent elements with the filter and add the result - srcReg32b8 = _mm256_maddubs_epi16(srcReg32b11, secondFilters); - srcReg32b12 = _mm256_maddubs_epi16(srcReg32b2, thirdFilters); - - // add and saturate the results together - srcReg32b10 = _mm256_adds_epi16(srcReg32b10, - _mm256_min_epi16(srcReg32b8, srcReg32b12)); - srcReg32b10 = _mm256_adds_epi16(srcReg32b10, - _mm256_max_epi16(srcReg32b8, srcReg32b12)); - - // multiply 2 adjacent elements with the filter and add the result - srcReg32b1 = _mm256_maddubs_epi16(srcReg32b1, firstFilters); - srcReg32b6 = _mm256_maddubs_epi16(srcReg32b7, forthFilters); - - srcReg32b1 = _mm256_adds_epi16(srcReg32b1, srcReg32b6); - - // multiply 2 adjacent elements with the filter and add the result - srcReg32b8 = _mm256_maddubs_epi16(srcReg32b3, secondFilters); - srcReg32b12 = _mm256_maddubs_epi16(srcReg32b5, thirdFilters); - - // add and saturate the results together - srcReg32b1 = _mm256_adds_epi16(srcReg32b1, - _mm256_min_epi16(srcReg32b8, srcReg32b12)); - srcReg32b1 = _mm256_adds_epi16(srcReg32b1, - _mm256_max_epi16(srcReg32b8, srcReg32b12)); - - srcReg32b10 = _mm256_adds_epi16(srcReg32b10, addFilterReg64); - srcReg32b1 = _mm256_adds_epi16(srcReg32b1, addFilterReg64); - - // shift by 7 bit each 16 bit - srcReg32b10 = _mm256_srai_epi16(srcReg32b10, 7); - srcReg32b1 = _mm256_srai_epi16(srcReg32b1, 7); - - // shrink to 8 bit each 16 bits, the first lane contain the first - // convolve result and the second lane contain the second convolve - // result - srcReg32b1 = _mm256_packus_epi16(srcReg32b10, srcReg32b1); - - src_ptr+=src_stride; - - // save 16 bytes - _mm_store_si128((__m128i*)output_ptr, - _mm256_castsi256_si128(srcReg32b1)); - - // save the next 16 bits - _mm_store_si128((__m128i*)(output_ptr+out_pitch), - _mm256_extractf128_si256(srcReg32b1, 1)); - - output_ptr+=dst_stride; - - // save part of the registers for next strides - srcReg32b10 = srcReg32b11; - srcReg32b1 = srcReg32b3; - srcReg32b11 = srcReg32b2; - srcReg32b3 = srcReg32b5; - srcReg32b2 = srcReg32b4; - srcReg32b5 = srcReg32b7; - srcReg32b7 = srcReg32b9; - } - if (i > 0) { - __m128i srcRegFilt1, srcRegFilt3, srcRegFilt4, srcRegFilt5; - __m128i srcRegFilt6, srcRegFilt7, srcRegFilt8; - // load the last 16 bytes - srcRegFilt8 = _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*7)); - - // merge the last 2 results together - srcRegFilt4 = _mm_unpacklo_epi8( - _mm256_castsi256_si128(srcReg32b7), srcRegFilt8); - srcRegFilt7 = _mm_unpackhi_epi8( - _mm256_castsi256_si128(srcReg32b7), srcRegFilt8); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt1 = _mm_maddubs_epi16(_mm256_castsi256_si128(srcReg32b10), - _mm256_castsi256_si128(firstFilters)); - srcRegFilt4 = _mm_maddubs_epi16(srcRegFilt4, - _mm256_castsi256_si128(forthFilters)); - srcRegFilt3 = _mm_maddubs_epi16(_mm256_castsi256_si128(srcReg32b1), - _mm256_castsi256_si128(firstFilters)); - srcRegFilt7 = _mm_maddubs_epi16(srcRegFilt7, - _mm256_castsi256_si128(forthFilters)); - - // add and saturate the results together - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt4); - srcRegFilt3 = _mm_adds_epi16(srcRegFilt3, srcRegFilt7); - - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt4 = _mm_maddubs_epi16(_mm256_castsi256_si128(srcReg32b11), - _mm256_castsi256_si128(secondFilters)); - srcRegFilt5 = _mm_maddubs_epi16(_mm256_castsi256_si128(srcReg32b3), - _mm256_castsi256_si128(secondFilters)); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt6 = _mm_maddubs_epi16(_mm256_castsi256_si128(srcReg32b2), - _mm256_castsi256_si128(thirdFilters)); - srcRegFilt7 = _mm_maddubs_epi16(_mm256_castsi256_si128(srcReg32b5), - _mm256_castsi256_si128(thirdFilters)); - - // add and saturate the results together - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, - _mm_min_epi16(srcRegFilt4, srcRegFilt6)); - srcRegFilt3 = _mm_adds_epi16(srcRegFilt3, - _mm_min_epi16(srcRegFilt5, srcRegFilt7)); - - // add and saturate the results together - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, - _mm_max_epi16(srcRegFilt4, srcRegFilt6)); - srcRegFilt3 = _mm_adds_epi16(srcRegFilt3, - _mm_max_epi16(srcRegFilt5, srcRegFilt7)); - - - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, - _mm256_castsi256_si128(addFilterReg64)); - srcRegFilt3 = _mm_adds_epi16(srcRegFilt3, - _mm256_castsi256_si128(addFilterReg64)); - - // shift by 7 bit each 16 bit - srcRegFilt1 = _mm_srai_epi16(srcRegFilt1, 7); - srcRegFilt3 = _mm_srai_epi16(srcRegFilt3, 7); - - // shrink to 8 bit each 16 bits, the first lane contain the first - // convolve result and the second lane contain the second convolve - // result - srcRegFilt1 = _mm_packus_epi16(srcRegFilt1, srcRegFilt3); - - // save 16 bytes - _mm_store_si128((__m128i*)output_ptr, srcRegFilt1); - } -} diff --git a/media/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_ssse3.c b/media/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_ssse3.c deleted file mode 100644 index c4efa6565f3..00000000000 --- a/media/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_ssse3.c +++ /dev/null @@ -1,492 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include -#include "vpx_ports/mem.h" -#include "vpx_ports/emmintrin_compat.h" - -// filters only for the 4_h8 convolution -DECLARE_ALIGNED(16, static const uint8_t, filt1_4_h8[16]) = { - 0, 1, 1, 2, 2, 3, 3, 4, 2, 3, 3, 4, 4, 5, 5, 6 -}; - -DECLARE_ALIGNED(16, static const uint8_t, filt2_4_h8[16]) = { - 4, 5, 5, 6, 6, 7, 7, 8, 6, 7, 7, 8, 8, 9, 9, 10 -}; - -// filters for 8_h8 and 16_h8 -DECLARE_ALIGNED(16, static const uint8_t, filt1_global[16]) = { - 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 -}; - -DECLARE_ALIGNED(16, static const uint8_t, filt2_global[16]) = { - 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10 -}; - -DECLARE_ALIGNED(16, static const uint8_t, filt3_global[16]) = { - 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12 -}; - -DECLARE_ALIGNED(16, static const uint8_t, filt4_global[16]) = { - 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14 -}; - -void vp9_filter_block1d4_h8_intrin_ssse3(unsigned char *src_ptr, - unsigned int src_pixels_per_line, - unsigned char *output_ptr, - unsigned int output_pitch, - unsigned int output_height, - int16_t *filter) { - __m128i firstFilters, secondFilters, shuffle1, shuffle2; - __m128i srcRegFilt1, srcRegFilt2, srcRegFilt3, srcRegFilt4; - __m128i addFilterReg64, filtersReg, srcReg, minReg; - unsigned int i; - - // create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64 - addFilterReg64 =_mm_set1_epi32((int)0x0400040u); - filtersReg = _mm_loadu_si128((__m128i *)filter); - // converting the 16 bit (short) to 8 bit (byte) and have the same data - // in both lanes of 128 bit register. - filtersReg =_mm_packs_epi16(filtersReg, filtersReg); - - // duplicate only the first 16 bits in the filter into the first lane - firstFilters = _mm_shufflelo_epi16(filtersReg, 0); - // duplicate only the third 16 bit in the filter into the first lane - secondFilters = _mm_shufflelo_epi16(filtersReg, 0xAAu); - // duplicate only the seconds 16 bits in the filter into the second lane - // firstFilters: k0 k1 k0 k1 k0 k1 k0 k1 k2 k3 k2 k3 k2 k3 k2 k3 - firstFilters = _mm_shufflehi_epi16(firstFilters, 0x55u); - // duplicate only the forth 16 bits in the filter into the second lane - // secondFilters: k4 k5 k4 k5 k4 k5 k4 k5 k6 k7 k6 k7 k6 k7 k6 k7 - secondFilters = _mm_shufflehi_epi16(secondFilters, 0xFFu); - - // loading the local filters - shuffle1 =_mm_load_si128((__m128i const *)filt1_4_h8); - shuffle2 = _mm_load_si128((__m128i const *)filt2_4_h8); - - for (i = 0; i < output_height; i++) { - srcReg = _mm_loadu_si128((__m128i *)(src_ptr-3)); - - // filter the source buffer - srcRegFilt1= _mm_shuffle_epi8(srcReg, shuffle1); - srcRegFilt2= _mm_shuffle_epi8(srcReg, shuffle2); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt1 = _mm_maddubs_epi16(srcRegFilt1, firstFilters); - srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, secondFilters); - - // extract the higher half of the lane - srcRegFilt3 = _mm_srli_si128(srcRegFilt1, 8); - srcRegFilt4 = _mm_srli_si128(srcRegFilt2, 8); - - minReg = _mm_min_epi16(srcRegFilt3, srcRegFilt2); - - // add and saturate all the results together - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt4); - srcRegFilt3 = _mm_max_epi16(srcRegFilt3, srcRegFilt2); - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, minReg); - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt3); - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, addFilterReg64); - - // shift by 7 bit each 16 bits - srcRegFilt1 = _mm_srai_epi16(srcRegFilt1, 7); - - // shrink to 8 bit each 16 bits - srcRegFilt1 = _mm_packus_epi16(srcRegFilt1, srcRegFilt1); - src_ptr+=src_pixels_per_line; - - // save only 4 bytes - *((int*)&output_ptr[0])= _mm_cvtsi128_si32(srcRegFilt1); - - output_ptr+=output_pitch; - } -} - -void vp9_filter_block1d8_h8_intrin_ssse3(unsigned char *src_ptr, - unsigned int src_pixels_per_line, - unsigned char *output_ptr, - unsigned int output_pitch, - unsigned int output_height, - int16_t *filter) { - __m128i firstFilters, secondFilters, thirdFilters, forthFilters, srcReg; - __m128i filt1Reg, filt2Reg, filt3Reg, filt4Reg; - __m128i srcRegFilt1, srcRegFilt2, srcRegFilt3, srcRegFilt4; - __m128i addFilterReg64, filtersReg, minReg; - unsigned int i; - - // create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64 - addFilterReg64 = _mm_set1_epi32((int)0x0400040u); - filtersReg = _mm_loadu_si128((__m128i *)filter); - // converting the 16 bit (short) to 8 bit (byte) and have the same data - // in both lanes of 128 bit register. - filtersReg =_mm_packs_epi16(filtersReg, filtersReg); - - // duplicate only the first 16 bits (first and second byte) - // across 128 bit register - firstFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x100u)); - // duplicate only the second 16 bits (third and forth byte) - // across 128 bit register - secondFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x302u)); - // duplicate only the third 16 bits (fifth and sixth byte) - // across 128 bit register - thirdFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x504u)); - // duplicate only the forth 16 bits (seventh and eighth byte) - // across 128 bit register - forthFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x706u)); - - filt1Reg = _mm_load_si128((__m128i const *)filt1_global); - filt2Reg = _mm_load_si128((__m128i const *)filt2_global); - filt3Reg = _mm_load_si128((__m128i const *)filt3_global); - filt4Reg = _mm_load_si128((__m128i const *)filt4_global); - - for (i = 0; i < output_height; i++) { - srcReg = _mm_loadu_si128((__m128i *)(src_ptr-3)); - - // filter the source buffer - srcRegFilt1= _mm_shuffle_epi8(srcReg, filt1Reg); - srcRegFilt2= _mm_shuffle_epi8(srcReg, filt2Reg); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt1 = _mm_maddubs_epi16(srcRegFilt1, firstFilters); - srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, secondFilters); - - // filter the source buffer - srcRegFilt3= _mm_shuffle_epi8(srcReg, filt3Reg); - srcRegFilt4= _mm_shuffle_epi8(srcReg, filt4Reg); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt3 = _mm_maddubs_epi16(srcRegFilt3, thirdFilters); - srcRegFilt4 = _mm_maddubs_epi16(srcRegFilt4, forthFilters); - - // add and saturate all the results together - minReg = _mm_min_epi16(srcRegFilt2, srcRegFilt3); - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt4); - - srcRegFilt2= _mm_max_epi16(srcRegFilt2, srcRegFilt3); - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, minReg); - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt2); - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, addFilterReg64); - - // shift by 7 bit each 16 bits - srcRegFilt1 = _mm_srai_epi16(srcRegFilt1, 7); - - // shrink to 8 bit each 16 bits - srcRegFilt1 = _mm_packus_epi16(srcRegFilt1, srcRegFilt1); - - src_ptr+=src_pixels_per_line; - - // save only 8 bytes - _mm_storel_epi64((__m128i*)&output_ptr[0], srcRegFilt1); - - output_ptr+=output_pitch; - } -} - -void vp9_filter_block1d16_h8_intrin_ssse3(unsigned char *src_ptr, - unsigned int src_pixels_per_line, - unsigned char *output_ptr, - unsigned int output_pitch, - unsigned int output_height, - int16_t *filter) { - __m128i addFilterReg64, filtersReg, srcReg1, srcReg2; - __m128i filt1Reg, filt2Reg, filt3Reg, filt4Reg; - __m128i firstFilters, secondFilters, thirdFilters, forthFilters; - __m128i srcRegFilt1_1, srcRegFilt2_1, srcRegFilt2, srcRegFilt3; - unsigned int i; - - // create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64 - addFilterReg64 = _mm_set1_epi32((int)0x0400040u); - filtersReg = _mm_loadu_si128((__m128i *)filter); - // converting the 16 bit (short) to 8 bit (byte) and have the same data - // in both lanes of 128 bit register. - filtersReg =_mm_packs_epi16(filtersReg, filtersReg); - - // duplicate only the first 16 bits (first and second byte) - // across 128 bit register - firstFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x100u)); - // duplicate only the second 16 bits (third and forth byte) - // across 128 bit register - secondFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x302u)); - // duplicate only the third 16 bits (fifth and sixth byte) - // across 128 bit register - thirdFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x504u)); - // duplicate only the forth 16 bits (seventh and eighth byte) - // across 128 bit register - forthFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x706u)); - - filt1Reg = _mm_load_si128((__m128i const *)filt1_global); - filt2Reg = _mm_load_si128((__m128i const *)filt2_global); - filt3Reg = _mm_load_si128((__m128i const *)filt3_global); - filt4Reg = _mm_load_si128((__m128i const *)filt4_global); - - for (i = 0; i < output_height; i++) { - srcReg1 = _mm_loadu_si128((__m128i *)(src_ptr-3)); - - // filter the source buffer - srcRegFilt1_1= _mm_shuffle_epi8(srcReg1, filt1Reg); - srcRegFilt2= _mm_shuffle_epi8(srcReg1, filt4Reg); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt1_1 = _mm_maddubs_epi16(srcRegFilt1_1, firstFilters); - srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, forthFilters); - - // add and saturate the results together - srcRegFilt1_1 = _mm_adds_epi16(srcRegFilt1_1, srcRegFilt2); - - // filter the source buffer - srcRegFilt3= _mm_shuffle_epi8(srcReg1, filt2Reg); - srcRegFilt2= _mm_shuffle_epi8(srcReg1, filt3Reg); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt3 = _mm_maddubs_epi16(srcRegFilt3, secondFilters); - srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, thirdFilters); - - // add and saturate the results together - srcRegFilt1_1 = _mm_adds_epi16(srcRegFilt1_1, - _mm_min_epi16(srcRegFilt3, srcRegFilt2)); - - // reading the next 16 bytes. - // (part of it was being read by earlier read) - srcReg2 = _mm_loadu_si128((__m128i *)(src_ptr+5)); - - // add and saturate the results together - srcRegFilt1_1 = _mm_adds_epi16(srcRegFilt1_1, - _mm_max_epi16(srcRegFilt3, srcRegFilt2)); - - // filter the source buffer - srcRegFilt2_1= _mm_shuffle_epi8(srcReg2, filt1Reg); - srcRegFilt2= _mm_shuffle_epi8(srcReg2, filt4Reg); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt2_1 = _mm_maddubs_epi16(srcRegFilt2_1, firstFilters); - srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, forthFilters); - - // add and saturate the results together - srcRegFilt2_1 = _mm_adds_epi16(srcRegFilt2_1, srcRegFilt2); - - // filter the source buffer - srcRegFilt3= _mm_shuffle_epi8(srcReg2, filt2Reg); - srcRegFilt2= _mm_shuffle_epi8(srcReg2, filt3Reg); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt3 = _mm_maddubs_epi16(srcRegFilt3, secondFilters); - srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, thirdFilters); - - // add and saturate the results together - srcRegFilt2_1 = _mm_adds_epi16(srcRegFilt2_1, - _mm_min_epi16(srcRegFilt3, srcRegFilt2)); - srcRegFilt2_1 = _mm_adds_epi16(srcRegFilt2_1, - _mm_max_epi16(srcRegFilt3, srcRegFilt2)); - - srcRegFilt1_1 = _mm_adds_epi16(srcRegFilt1_1, addFilterReg64); - srcRegFilt2_1 = _mm_adds_epi16(srcRegFilt2_1, addFilterReg64); - - // shift by 7 bit each 16 bit - srcRegFilt1_1 = _mm_srai_epi16(srcRegFilt1_1, 7); - srcRegFilt2_1 = _mm_srai_epi16(srcRegFilt2_1, 7); - - // shrink to 8 bit each 16 bits, the first lane contain the first - // convolve result and the second lane contain the second convolve - // result - srcRegFilt1_1 = _mm_packus_epi16(srcRegFilt1_1, srcRegFilt2_1); - - src_ptr+=src_pixels_per_line; - - // save 16 bytes - _mm_store_si128((__m128i*)output_ptr, srcRegFilt1_1); - - output_ptr+=output_pitch; - } -} - -void vp9_filter_block1d8_v8_intrin_ssse3(unsigned char *src_ptr, - unsigned int src_pitch, - unsigned char *output_ptr, - unsigned int out_pitch, - unsigned int output_height, - int16_t *filter) { - __m128i addFilterReg64, filtersReg, minReg, srcRegFilt6; - __m128i firstFilters, secondFilters, thirdFilters, forthFilters; - __m128i srcRegFilt1, srcRegFilt2, srcRegFilt3, srcRegFilt4, srcRegFilt5; - unsigned int i; - - // create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64 - addFilterReg64 = _mm_set1_epi32((int)0x0400040u); - filtersReg = _mm_loadu_si128((__m128i *)filter); - // converting the 16 bit (short) to 8 bit (byte) and have the same data - // in both lanes of 128 bit register. - filtersReg =_mm_packs_epi16(filtersReg, filtersReg); - - // duplicate only the first 16 bits in the filter - firstFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x100u)); - // duplicate only the second 16 bits in the filter - secondFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x302u)); - // duplicate only the third 16 bits in the filter - thirdFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x504u)); - // duplicate only the forth 16 bits in the filter - forthFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x706u)); - - for (i = 0; i < output_height; i++) { - // load the first 8 bytes - srcRegFilt1 = _mm_loadl_epi64((__m128i *)&src_ptr[0]); - // load the next 8 bytes in stride of src_pitch - srcRegFilt2 = _mm_loadl_epi64((__m128i *)&(src_ptr+src_pitch)[0]); - srcRegFilt3 = _mm_loadl_epi64((__m128i *)&(src_ptr+src_pitch*2)[0]); - srcRegFilt4 = _mm_loadl_epi64((__m128i *)&(src_ptr+src_pitch*3)[0]); - - // merge the result together - srcRegFilt1 = _mm_unpacklo_epi8(srcRegFilt1, srcRegFilt2); - srcRegFilt3 = _mm_unpacklo_epi8(srcRegFilt3, srcRegFilt4); - - // load the next 8 bytes in stride of src_pitch - srcRegFilt2 = _mm_loadl_epi64((__m128i *)&(src_ptr+src_pitch*4)[0]); - srcRegFilt4 = _mm_loadl_epi64((__m128i *)&(src_ptr+src_pitch*5)[0]); - srcRegFilt5 = _mm_loadl_epi64((__m128i *)&(src_ptr+src_pitch*6)[0]); - srcRegFilt6 = _mm_loadl_epi64((__m128i *)&(src_ptr+src_pitch*7)[0]); - - // merge the result together - srcRegFilt2 = _mm_unpacklo_epi8(srcRegFilt2, srcRegFilt4); - srcRegFilt5 = _mm_unpacklo_epi8(srcRegFilt5, srcRegFilt6); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt1 = _mm_maddubs_epi16(srcRegFilt1, firstFilters); - srcRegFilt3 = _mm_maddubs_epi16(srcRegFilt3, secondFilters); - srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, thirdFilters); - srcRegFilt5 = _mm_maddubs_epi16(srcRegFilt5, forthFilters); - - // add and saturate the results together - minReg = _mm_min_epi16(srcRegFilt2, srcRegFilt3); - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt5); - srcRegFilt2 = _mm_max_epi16(srcRegFilt2, srcRegFilt3); - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, minReg); - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt2); - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, addFilterReg64); - - // shift by 7 bit each 16 bit - srcRegFilt1 = _mm_srai_epi16(srcRegFilt1, 7); - - // shrink to 8 bit each 16 bits - srcRegFilt1 = _mm_packus_epi16(srcRegFilt1, srcRegFilt1); - - src_ptr+=src_pitch; - - // save only 8 bytes convolve result - _mm_storel_epi64((__m128i*)&output_ptr[0], srcRegFilt1); - - output_ptr+=out_pitch; - } -} - -void vp9_filter_block1d16_v8_intrin_ssse3(unsigned char *src_ptr, - unsigned int src_pitch, - unsigned char *output_ptr, - unsigned int out_pitch, - unsigned int output_height, - int16_t *filter) { - __m128i addFilterReg64, filtersReg, srcRegFilt1, srcRegFilt2, srcRegFilt3; - __m128i firstFilters, secondFilters, thirdFilters, forthFilters; - __m128i srcRegFilt4, srcRegFilt5, srcRegFilt6, srcRegFilt7, srcRegFilt8; - unsigned int i; - - // create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64 - addFilterReg64 = _mm_set1_epi32((int)0x0400040u); - filtersReg = _mm_loadu_si128((__m128i *)filter); - // converting the 16 bit (short) to 8 bit (byte) and have the same data - // in both lanes of 128 bit register. - filtersReg =_mm_packs_epi16(filtersReg, filtersReg); - - // duplicate only the first 16 bits in the filter - firstFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x100u)); - // duplicate only the second 16 bits in the filter - secondFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x302u)); - // duplicate only the third 16 bits in the filter - thirdFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x504u)); - // duplicate only the forth 16 bits in the filter - forthFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x706u)); - - for (i = 0; i < output_height; i++) { - // load the first 16 bytes - srcRegFilt1 = _mm_loadu_si128((__m128i *)(src_ptr)); - // load the next 16 bytes in stride of src_pitch - srcRegFilt2 = _mm_loadu_si128((__m128i *)(src_ptr+src_pitch)); - srcRegFilt3 = _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*6)); - srcRegFilt4 = _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*7)); - - // merge the result together - srcRegFilt5 = _mm_unpacklo_epi8(srcRegFilt1, srcRegFilt2); - srcRegFilt6 = _mm_unpacklo_epi8(srcRegFilt3, srcRegFilt4); - srcRegFilt1 = _mm_unpackhi_epi8(srcRegFilt1, srcRegFilt2); - srcRegFilt3 = _mm_unpackhi_epi8(srcRegFilt3, srcRegFilt4); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt5 = _mm_maddubs_epi16(srcRegFilt5, firstFilters); - srcRegFilt6 = _mm_maddubs_epi16(srcRegFilt6, forthFilters); - srcRegFilt1 = _mm_maddubs_epi16(srcRegFilt1, firstFilters); - srcRegFilt3 = _mm_maddubs_epi16(srcRegFilt3, forthFilters); - - // add and saturate the results together - srcRegFilt5 = _mm_adds_epi16(srcRegFilt5, srcRegFilt6); - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt3); - - // load the next 16 bytes in stride of two/three src_pitch - srcRegFilt2 = _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*2)); - srcRegFilt3 = _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*3)); - - // merge the result together - srcRegFilt4 = _mm_unpacklo_epi8(srcRegFilt2, srcRegFilt3); - srcRegFilt6 = _mm_unpackhi_epi8(srcRegFilt2, srcRegFilt3); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt4 = _mm_maddubs_epi16(srcRegFilt4, secondFilters); - srcRegFilt6 = _mm_maddubs_epi16(srcRegFilt6, secondFilters); - - // load the next 16 bytes in stride of four/five src_pitch - srcRegFilt2 = _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*4)); - srcRegFilt3 = _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*5)); - - // merge the result together - srcRegFilt7 = _mm_unpacklo_epi8(srcRegFilt2, srcRegFilt3); - srcRegFilt8 = _mm_unpackhi_epi8(srcRegFilt2, srcRegFilt3); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt7 = _mm_maddubs_epi16(srcRegFilt7, thirdFilters); - srcRegFilt8 = _mm_maddubs_epi16(srcRegFilt8, thirdFilters); - - // add and saturate the results together - srcRegFilt5 = _mm_adds_epi16(srcRegFilt5, - _mm_min_epi16(srcRegFilt4, srcRegFilt7)); - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, - _mm_min_epi16(srcRegFilt6, srcRegFilt8)); - - // add and saturate the results together - srcRegFilt5 = _mm_adds_epi16(srcRegFilt5, - _mm_max_epi16(srcRegFilt4, srcRegFilt7)); - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, - _mm_max_epi16(srcRegFilt6, srcRegFilt8)); - srcRegFilt5 = _mm_adds_epi16(srcRegFilt5, addFilterReg64); - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, addFilterReg64); - - // shift by 7 bit each 16 bit - srcRegFilt5 = _mm_srai_epi16(srcRegFilt5, 7); - srcRegFilt1 = _mm_srai_epi16(srcRegFilt1, 7); - - // shrink to 8 bit each 16 bits, the first lane contain the first - // convolve result and the second lane contain the second convolve - // result - srcRegFilt1 = _mm_packus_epi16(srcRegFilt5, srcRegFilt1); - - src_ptr+=src_pitch; - - // save 16 bytes convolve result - _mm_store_si128((__m128i*)output_ptr, srcRegFilt1); - - output_ptr+=out_pitch; - } -} diff --git a/media/libvpx/vp9/common/x86/vp9_subpixel_bilinear_sse2.asm b/media/libvpx/vp9/common/x86/vp9_subpixel_bilinear_sse2.asm deleted file mode 100644 index d94ccf2e9b7..00000000000 --- a/media/libvpx/vp9/common/x86/vp9_subpixel_bilinear_sse2.asm +++ /dev/null @@ -1,448 +0,0 @@ -; -; Copyright (c) 2014 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - -%include "vpx_ports/x86_abi_support.asm" - -%macro GET_PARAM_4 0 - mov rdx, arg(5) ;filter ptr - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;output_ptr - mov rcx, 0x0400040 - - movdqa xmm3, [rdx] ;load filters - pshuflw xmm4, xmm3, 11111111b ;k3 - psrldq xmm3, 8 - pshuflw xmm3, xmm3, 0b ;k4 - punpcklqdq xmm4, xmm3 ;k3k4 - - movq xmm3, rcx ;rounding - pshufd xmm3, xmm3, 0 - - pxor xmm2, xmm2 - - movsxd rax, DWORD PTR arg(1) ;pixels_per_line - movsxd rdx, DWORD PTR arg(3) ;out_pitch - movsxd rcx, DWORD PTR arg(4) ;output_height -%endm - -%macro APPLY_FILTER_4 1 - - punpckldq xmm0, xmm1 ;two row in one register - punpcklbw xmm0, xmm2 ;unpack to word - pmullw xmm0, xmm4 ;multiply the filter factors - - movdqa xmm1, xmm0 - psrldq xmm1, 8 - paddsw xmm0, xmm1 - - paddsw xmm0, xmm3 ;rounding - psraw xmm0, 7 ;shift - packuswb xmm0, xmm0 ;pack to byte - -%if %1 - movd xmm1, [rdi] - pavgb xmm0, xmm1 -%endif - - movd [rdi], xmm0 - lea rsi, [rsi + rax] - lea rdi, [rdi + rdx] - dec rcx -%endm - -%macro GET_PARAM 0 - mov rdx, arg(5) ;filter ptr - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;output_ptr - mov rcx, 0x0400040 - - movdqa xmm7, [rdx] ;load filters - - pshuflw xmm6, xmm7, 11111111b ;k3 - pshufhw xmm7, xmm7, 0b ;k4 - punpcklwd xmm6, xmm6 - punpckhwd xmm7, xmm7 - - movq xmm4, rcx ;rounding - pshufd xmm4, xmm4, 0 - - pxor xmm5, xmm5 - - movsxd rax, DWORD PTR arg(1) ;pixels_per_line - movsxd rdx, DWORD PTR arg(3) ;out_pitch - movsxd rcx, DWORD PTR arg(4) ;output_height -%endm - -%macro APPLY_FILTER_8 1 - punpcklbw xmm0, xmm5 - punpcklbw xmm1, xmm5 - - pmullw xmm0, xmm6 - pmullw xmm1, xmm7 - paddsw xmm0, xmm1 - paddsw xmm0, xmm4 ;rounding - psraw xmm0, 7 ;shift - packuswb xmm0, xmm0 ;pack back to byte -%if %1 - movq xmm1, [rdi] - pavgb xmm0, xmm1 -%endif - movq [rdi], xmm0 ;store the result - - lea rsi, [rsi + rax] - lea rdi, [rdi + rdx] - dec rcx -%endm - -%macro APPLY_FILTER_16 1 - punpcklbw xmm0, xmm5 - punpcklbw xmm1, xmm5 - punpckhbw xmm2, xmm5 - punpckhbw xmm3, xmm5 - - pmullw xmm0, xmm6 - pmullw xmm1, xmm7 - pmullw xmm2, xmm6 - pmullw xmm3, xmm7 - - paddsw xmm0, xmm1 - paddsw xmm2, xmm3 - - paddsw xmm0, xmm4 ;rounding - paddsw xmm2, xmm4 - psraw xmm0, 7 ;shift - psraw xmm2, 7 - packuswb xmm0, xmm2 ;pack back to byte -%if %1 - movdqu xmm1, [rdi] - pavgb xmm0, xmm1 -%endif - movdqu [rdi], xmm0 ;store the result - - lea rsi, [rsi + rax] - lea rdi, [rdi + rdx] - dec rcx -%endm - -global sym(vp9_filter_block1d4_v2_sse2) PRIVATE -sym(vp9_filter_block1d4_v2_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - push rsi - push rdi - ; end prolog - - GET_PARAM_4 -.loop: - movd xmm0, [rsi] ;load src - movd xmm1, [rsi + rax] - - APPLY_FILTER_4 0 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - -global sym(vp9_filter_block1d8_v2_sse2) PRIVATE -sym(vp9_filter_block1d8_v2_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - GET_PARAM -.loop: - movq xmm0, [rsi] ;0 - movq xmm1, [rsi + rax] ;1 - - APPLY_FILTER_8 0 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -global sym(vp9_filter_block1d16_v2_sse2) PRIVATE -sym(vp9_filter_block1d16_v2_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - GET_PARAM -.loop: - movdqu xmm0, [rsi] ;0 - movdqu xmm1, [rsi + rax] ;1 - movdqa xmm2, xmm0 - movdqa xmm3, xmm1 - - APPLY_FILTER_16 0 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -global sym(vp9_filter_block1d4_v2_avg_sse2) PRIVATE -sym(vp9_filter_block1d4_v2_avg_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - push rsi - push rdi - ; end prolog - - GET_PARAM_4 -.loop: - movd xmm0, [rsi] ;load src - movd xmm1, [rsi + rax] - - APPLY_FILTER_4 1 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - -global sym(vp9_filter_block1d8_v2_avg_sse2) PRIVATE -sym(vp9_filter_block1d8_v2_avg_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - GET_PARAM -.loop: - movq xmm0, [rsi] ;0 - movq xmm1, [rsi + rax] ;1 - - APPLY_FILTER_8 1 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -global sym(vp9_filter_block1d16_v2_avg_sse2) PRIVATE -sym(vp9_filter_block1d16_v2_avg_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - GET_PARAM -.loop: - movdqu xmm0, [rsi] ;0 - movdqu xmm1, [rsi + rax] ;1 - movdqa xmm2, xmm0 - movdqa xmm3, xmm1 - - APPLY_FILTER_16 1 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -global sym(vp9_filter_block1d4_h2_sse2) PRIVATE -sym(vp9_filter_block1d4_h2_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - push rsi - push rdi - ; end prolog - - GET_PARAM_4 -.loop: - movdqu xmm0, [rsi] ;load src - movdqa xmm1, xmm0 - psrldq xmm1, 1 - - APPLY_FILTER_4 0 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - -global sym(vp9_filter_block1d8_h2_sse2) PRIVATE -sym(vp9_filter_block1d8_h2_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - GET_PARAM -.loop: - movdqu xmm0, [rsi] ;load src - movdqa xmm1, xmm0 - psrldq xmm1, 1 - - APPLY_FILTER_8 0 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -global sym(vp9_filter_block1d16_h2_sse2) PRIVATE -sym(vp9_filter_block1d16_h2_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - GET_PARAM -.loop: - movdqu xmm0, [rsi] ;load src - movdqu xmm1, [rsi + 1] - movdqa xmm2, xmm0 - movdqa xmm3, xmm1 - - APPLY_FILTER_16 0 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -global sym(vp9_filter_block1d4_h2_avg_sse2) PRIVATE -sym(vp9_filter_block1d4_h2_avg_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - push rsi - push rdi - ; end prolog - - GET_PARAM_4 -.loop: - movdqu xmm0, [rsi] ;load src - movdqa xmm1, xmm0 - psrldq xmm1, 1 - - APPLY_FILTER_4 1 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - -global sym(vp9_filter_block1d8_h2_avg_sse2) PRIVATE -sym(vp9_filter_block1d8_h2_avg_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - GET_PARAM -.loop: - movdqu xmm0, [rsi] ;load src - movdqa xmm1, xmm0 - psrldq xmm1, 1 - - APPLY_FILTER_8 1 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -global sym(vp9_filter_block1d16_h2_avg_sse2) PRIVATE -sym(vp9_filter_block1d16_h2_avg_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - GET_PARAM -.loop: - movdqu xmm0, [rsi] ;load src - movdqu xmm1, [rsi + 1] - movdqa xmm2, xmm0 - movdqa xmm3, xmm1 - - APPLY_FILTER_16 1 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret diff --git a/media/libvpx/vp9/common/x86/vp9_subpixel_bilinear_ssse3.asm b/media/libvpx/vp9/common/x86/vp9_subpixel_bilinear_ssse3.asm deleted file mode 100644 index b5e18fe6d4a..00000000000 --- a/media/libvpx/vp9/common/x86/vp9_subpixel_bilinear_ssse3.asm +++ /dev/null @@ -1,422 +0,0 @@ -; -; Copyright (c) 2014 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - -%include "vpx_ports/x86_abi_support.asm" - -%macro GET_PARAM_4 0 - mov rdx, arg(5) ;filter ptr - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;output_ptr - mov rcx, 0x0400040 - - movdqa xmm3, [rdx] ;load filters - psrldq xmm3, 6 - packsswb xmm3, xmm3 - pshuflw xmm3, xmm3, 0b ;k3_k4 - - movq xmm2, rcx ;rounding - pshufd xmm2, xmm2, 0 - - movsxd rax, DWORD PTR arg(1) ;pixels_per_line - movsxd rdx, DWORD PTR arg(3) ;out_pitch - movsxd rcx, DWORD PTR arg(4) ;output_height -%endm - -%macro APPLY_FILTER_4 1 - punpcklbw xmm0, xmm1 - pmaddubsw xmm0, xmm3 - - paddsw xmm0, xmm2 ;rounding - psraw xmm0, 7 ;shift - packuswb xmm0, xmm0 ;pack to byte - -%if %1 - movd xmm1, [rdi] - pavgb xmm0, xmm1 -%endif - movd [rdi], xmm0 - lea rsi, [rsi + rax] - lea rdi, [rdi + rdx] - dec rcx -%endm - -%macro GET_PARAM 0 - mov rdx, arg(5) ;filter ptr - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;output_ptr - mov rcx, 0x0400040 - - movdqa xmm7, [rdx] ;load filters - psrldq xmm7, 6 - packsswb xmm7, xmm7 - pshuflw xmm7, xmm7, 0b ;k3_k4 - punpcklwd xmm7, xmm7 - - movq xmm6, rcx ;rounding - pshufd xmm6, xmm6, 0 - - movsxd rax, DWORD PTR arg(1) ;pixels_per_line - movsxd rdx, DWORD PTR arg(3) ;out_pitch - movsxd rcx, DWORD PTR arg(4) ;output_height -%endm - -%macro APPLY_FILTER_8 1 - punpcklbw xmm0, xmm1 - pmaddubsw xmm0, xmm7 - - paddsw xmm0, xmm6 ;rounding - psraw xmm0, 7 ;shift - packuswb xmm0, xmm0 ;pack back to byte - -%if %1 - movq xmm1, [rdi] - pavgb xmm0, xmm1 -%endif - movq [rdi], xmm0 ;store the result - - lea rsi, [rsi + rax] - lea rdi, [rdi + rdx] - dec rcx -%endm - -%macro APPLY_FILTER_16 1 - punpcklbw xmm0, xmm1 - punpckhbw xmm2, xmm1 - pmaddubsw xmm0, xmm7 - pmaddubsw xmm2, xmm7 - - paddsw xmm0, xmm6 ;rounding - paddsw xmm2, xmm6 - psraw xmm0, 7 ;shift - psraw xmm2, 7 - packuswb xmm0, xmm2 ;pack back to byte - -%if %1 - movdqu xmm1, [rdi] - pavgb xmm0, xmm1 -%endif - movdqu [rdi], xmm0 ;store the result - - lea rsi, [rsi + rax] - lea rdi, [rdi + rdx] - dec rcx -%endm - -global sym(vp9_filter_block1d4_v2_ssse3) PRIVATE -sym(vp9_filter_block1d4_v2_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - push rsi - push rdi - ; end prolog - - GET_PARAM_4 -.loop: - movd xmm0, [rsi] ;load src - movd xmm1, [rsi + rax] - - APPLY_FILTER_4 0 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - -global sym(vp9_filter_block1d8_v2_ssse3) PRIVATE -sym(vp9_filter_block1d8_v2_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - GET_PARAM -.loop: - movq xmm0, [rsi] ;0 - movq xmm1, [rsi + rax] ;1 - - APPLY_FILTER_8 0 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -global sym(vp9_filter_block1d16_v2_ssse3) PRIVATE -sym(vp9_filter_block1d16_v2_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - GET_PARAM -.loop: - movdqu xmm0, [rsi] ;0 - movdqu xmm1, [rsi + rax] ;1 - movdqa xmm2, xmm0 - - APPLY_FILTER_16 0 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -global sym(vp9_filter_block1d4_v2_avg_ssse3) PRIVATE -sym(vp9_filter_block1d4_v2_avg_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - push rsi - push rdi - ; end prolog - - GET_PARAM_4 -.loop: - movd xmm0, [rsi] ;load src - movd xmm1, [rsi + rax] - - APPLY_FILTER_4 1 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - -global sym(vp9_filter_block1d8_v2_avg_ssse3) PRIVATE -sym(vp9_filter_block1d8_v2_avg_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - GET_PARAM -.loop: - movq xmm0, [rsi] ;0 - movq xmm1, [rsi + rax] ;1 - - APPLY_FILTER_8 1 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -global sym(vp9_filter_block1d16_v2_avg_ssse3) PRIVATE -sym(vp9_filter_block1d16_v2_avg_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - GET_PARAM -.loop: - movdqu xmm0, [rsi] ;0 - movdqu xmm1, [rsi + rax] ;1 - movdqa xmm2, xmm0 - - APPLY_FILTER_16 1 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -global sym(vp9_filter_block1d4_h2_ssse3) PRIVATE -sym(vp9_filter_block1d4_h2_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - push rsi - push rdi - ; end prolog - - GET_PARAM_4 -.loop: - movdqu xmm0, [rsi] ;load src - movdqa xmm1, xmm0 - psrldq xmm1, 1 - - APPLY_FILTER_4 0 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - -global sym(vp9_filter_block1d8_h2_ssse3) PRIVATE -sym(vp9_filter_block1d8_h2_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - GET_PARAM -.loop: - movdqu xmm0, [rsi] ;load src - movdqa xmm1, xmm0 - psrldq xmm1, 1 - - APPLY_FILTER_8 0 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -global sym(vp9_filter_block1d16_h2_ssse3) PRIVATE -sym(vp9_filter_block1d16_h2_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - GET_PARAM -.loop: - movdqu xmm0, [rsi] ;load src - movdqu xmm1, [rsi + 1] - movdqa xmm2, xmm0 - - APPLY_FILTER_16 0 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -global sym(vp9_filter_block1d4_h2_avg_ssse3) PRIVATE -sym(vp9_filter_block1d4_h2_avg_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - push rsi - push rdi - ; end prolog - - GET_PARAM_4 -.loop: - movdqu xmm0, [rsi] ;load src - movdqa xmm1, xmm0 - psrldq xmm1, 1 - - APPLY_FILTER_4 1 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - -global sym(vp9_filter_block1d8_h2_avg_ssse3) PRIVATE -sym(vp9_filter_block1d8_h2_avg_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - GET_PARAM -.loop: - movdqu xmm0, [rsi] ;load src - movdqa xmm1, xmm0 - psrldq xmm1, 1 - - APPLY_FILTER_8 1 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -global sym(vp9_filter_block1d16_h2_avg_ssse3) PRIVATE -sym(vp9_filter_block1d16_h2_avg_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - GET_PARAM -.loop: - movdqu xmm0, [rsi] ;load src - movdqu xmm1, [rsi + 1] - movdqa xmm2, xmm0 - - APPLY_FILTER_16 1 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret diff --git a/media/libvpx/vp9/decoder/vp9_decodeframe.c b/media/libvpx/vp9/decoder/vp9_decodeframe.c deleted file mode 100644 index 4e85caf45cd..00000000000 --- a/media/libvpx/vp9/decoder/vp9_decodeframe.c +++ /dev/null @@ -1,1522 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include -#include // qsort() - -#include "./vp9_rtcd.h" -#include "./vpx_scale_rtcd.h" - -#include "vpx_mem/vpx_mem.h" -#include "vpx_ports/mem_ops.h" -#include "vpx_scale/vpx_scale.h" - -#include "vp9/common/vp9_alloccommon.h" -#include "vp9/common/vp9_common.h" -#include "vp9/common/vp9_entropy.h" -#include "vp9/common/vp9_entropymode.h" -#include "vp9/common/vp9_idct.h" -#include "vp9/common/vp9_pred_common.h" -#include "vp9/common/vp9_quant_common.h" -#include "vp9/common/vp9_reconintra.h" -#include "vp9/common/vp9_reconinter.h" -#include "vp9/common/vp9_seg_common.h" -#include "vp9/common/vp9_thread.h" -#include "vp9/common/vp9_tile_common.h" - -#include "vp9/decoder/vp9_decodeframe.h" -#include "vp9/decoder/vp9_detokenize.h" -#include "vp9/decoder/vp9_decodemv.h" -#include "vp9/decoder/vp9_decoder.h" -#include "vp9/decoder/vp9_dsubexp.h" -#include "vp9/decoder/vp9_dthread.h" -#include "vp9/decoder/vp9_read_bit_buffer.h" -#include "vp9/decoder/vp9_reader.h" - -#define MAX_VP9_HEADER_SIZE 80 - -static int is_compound_reference_allowed(const VP9_COMMON *cm) { - int i; - for (i = 1; i < REFS_PER_FRAME; ++i) - if (cm->ref_frame_sign_bias[i + 1] != cm->ref_frame_sign_bias[1]) - return 1; - - return 0; -} - -static void setup_compound_reference_mode(VP9_COMMON *cm) { - if (cm->ref_frame_sign_bias[LAST_FRAME] == - cm->ref_frame_sign_bias[GOLDEN_FRAME]) { - cm->comp_fixed_ref = ALTREF_FRAME; - cm->comp_var_ref[0] = LAST_FRAME; - cm->comp_var_ref[1] = GOLDEN_FRAME; - } else if (cm->ref_frame_sign_bias[LAST_FRAME] == - cm->ref_frame_sign_bias[ALTREF_FRAME]) { - cm->comp_fixed_ref = GOLDEN_FRAME; - cm->comp_var_ref[0] = LAST_FRAME; - cm->comp_var_ref[1] = ALTREF_FRAME; - } else { - cm->comp_fixed_ref = LAST_FRAME; - cm->comp_var_ref[0] = GOLDEN_FRAME; - cm->comp_var_ref[1] = ALTREF_FRAME; - } -} - -static int read_is_valid(const uint8_t *start, size_t len, const uint8_t *end) { - return len != 0 && len <= (size_t)(end - start); -} - -static int decode_unsigned_max(struct vp9_read_bit_buffer *rb, int max) { - const int data = vp9_rb_read_literal(rb, get_unsigned_bits(max)); - return data > max ? max : data; -} - -static TX_MODE read_tx_mode(vp9_reader *r) { - TX_MODE tx_mode = vp9_read_literal(r, 2); - if (tx_mode == ALLOW_32X32) - tx_mode += vp9_read_bit(r); - return tx_mode; -} - -static void read_tx_mode_probs(struct tx_probs *tx_probs, vp9_reader *r) { - int i, j; - - for (i = 0; i < TX_SIZE_CONTEXTS; ++i) - for (j = 0; j < TX_SIZES - 3; ++j) - vp9_diff_update_prob(r, &tx_probs->p8x8[i][j]); - - for (i = 0; i < TX_SIZE_CONTEXTS; ++i) - for (j = 0; j < TX_SIZES - 2; ++j) - vp9_diff_update_prob(r, &tx_probs->p16x16[i][j]); - - for (i = 0; i < TX_SIZE_CONTEXTS; ++i) - for (j = 0; j < TX_SIZES - 1; ++j) - vp9_diff_update_prob(r, &tx_probs->p32x32[i][j]); -} - -static void read_switchable_interp_probs(FRAME_CONTEXT *fc, vp9_reader *r) { - int i, j; - for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; ++j) - for (i = 0; i < SWITCHABLE_FILTERS - 1; ++i) - vp9_diff_update_prob(r, &fc->switchable_interp_prob[j][i]); -} - -static void read_inter_mode_probs(FRAME_CONTEXT *fc, vp9_reader *r) { - int i, j; - for (i = 0; i < INTER_MODE_CONTEXTS; ++i) - for (j = 0; j < INTER_MODES - 1; ++j) - vp9_diff_update_prob(r, &fc->inter_mode_probs[i][j]); -} - -static REFERENCE_MODE read_frame_reference_mode(const VP9_COMMON *cm, - vp9_reader *r) { - if (is_compound_reference_allowed(cm)) { - return vp9_read_bit(r) ? (vp9_read_bit(r) ? REFERENCE_MODE_SELECT - : COMPOUND_REFERENCE) - : SINGLE_REFERENCE; - } else { - return SINGLE_REFERENCE; - } -} - -static void read_frame_reference_mode_probs(VP9_COMMON *cm, vp9_reader *r) { - FRAME_CONTEXT *const fc = &cm->fc; - int i; - - if (cm->reference_mode == REFERENCE_MODE_SELECT) - for (i = 0; i < COMP_INTER_CONTEXTS; ++i) - vp9_diff_update_prob(r, &fc->comp_inter_prob[i]); - - if (cm->reference_mode != COMPOUND_REFERENCE) - for (i = 0; i < REF_CONTEXTS; ++i) { - vp9_diff_update_prob(r, &fc->single_ref_prob[i][0]); - vp9_diff_update_prob(r, &fc->single_ref_prob[i][1]); - } - - if (cm->reference_mode != SINGLE_REFERENCE) - for (i = 0; i < REF_CONTEXTS; ++i) - vp9_diff_update_prob(r, &fc->comp_ref_prob[i]); -} - -static void update_mv_probs(vp9_prob *p, int n, vp9_reader *r) { - int i; - for (i = 0; i < n; ++i) - if (vp9_read(r, MV_UPDATE_PROB)) - p[i] = (vp9_read_literal(r, 7) << 1) | 1; -} - -static void read_mv_probs(nmv_context *ctx, int allow_hp, vp9_reader *r) { - int i, j; - - update_mv_probs(ctx->joints, MV_JOINTS - 1, r); - - for (i = 0; i < 2; ++i) { - nmv_component *const comp_ctx = &ctx->comps[i]; - update_mv_probs(&comp_ctx->sign, 1, r); - update_mv_probs(comp_ctx->classes, MV_CLASSES - 1, r); - update_mv_probs(comp_ctx->class0, CLASS0_SIZE - 1, r); - update_mv_probs(comp_ctx->bits, MV_OFFSET_BITS, r); - } - - for (i = 0; i < 2; ++i) { - nmv_component *const comp_ctx = &ctx->comps[i]; - for (j = 0; j < CLASS0_SIZE; ++j) - update_mv_probs(comp_ctx->class0_fp[j], MV_FP_SIZE - 1, r); - update_mv_probs(comp_ctx->fp, 3, r); - } - - if (allow_hp) { - for (i = 0; i < 2; ++i) { - nmv_component *const comp_ctx = &ctx->comps[i]; - update_mv_probs(&comp_ctx->class0_hp, 1, r); - update_mv_probs(&comp_ctx->hp, 1, r); - } - } -} - -static void setup_plane_dequants(VP9_COMMON *cm, MACROBLOCKD *xd, int q_index) { - int i; - xd->plane[0].dequant = cm->y_dequant[q_index]; - - for (i = 1; i < MAX_MB_PLANE; i++) - xd->plane[i].dequant = cm->uv_dequant[q_index]; -} - -static void inverse_transform_block(MACROBLOCKD* xd, int plane, int block, - TX_SIZE tx_size, uint8_t *dst, int stride, - int eob) { - struct macroblockd_plane *const pd = &xd->plane[plane]; - if (eob > 0) { - TX_TYPE tx_type = DCT_DCT; - tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); - if (xd->lossless) { - tx_type = DCT_DCT; - vp9_iwht4x4_add(dqcoeff, dst, stride, eob); - } else { - const PLANE_TYPE plane_type = pd->plane_type; - switch (tx_size) { - case TX_4X4: - tx_type = get_tx_type_4x4(plane_type, xd, block); - vp9_iht4x4_add(tx_type, dqcoeff, dst, stride, eob); - break; - case TX_8X8: - tx_type = get_tx_type(plane_type, xd); - vp9_iht8x8_add(tx_type, dqcoeff, dst, stride, eob); - break; - case TX_16X16: - tx_type = get_tx_type(plane_type, xd); - vp9_iht16x16_add(tx_type, dqcoeff, dst, stride, eob); - break; - case TX_32X32: - tx_type = DCT_DCT; - vp9_idct32x32_add(dqcoeff, dst, stride, eob); - break; - default: - assert(0 && "Invalid transform size"); - } - } - - if (eob == 1) { - vpx_memset(dqcoeff, 0, 2 * sizeof(dqcoeff[0])); - } else { - if (tx_type == DCT_DCT && tx_size <= TX_16X16 && eob <= 10) - vpx_memset(dqcoeff, 0, 4 * (4 << tx_size) * sizeof(dqcoeff[0])); - else if (tx_size == TX_32X32 && eob <= 34) - vpx_memset(dqcoeff, 0, 256 * sizeof(dqcoeff[0])); - else - vpx_memset(dqcoeff, 0, (16 << (tx_size << 1)) * sizeof(dqcoeff[0])); - } - } -} - -struct intra_args { - VP9_COMMON *cm; - MACROBLOCKD *xd; - vp9_reader *r; -}; - -static void predict_and_reconstruct_intra_block(int plane, int block, - BLOCK_SIZE plane_bsize, - TX_SIZE tx_size, void *arg) { - struct intra_args *const args = (struct intra_args *)arg; - VP9_COMMON *const cm = args->cm; - MACROBLOCKD *const xd = args->xd; - struct macroblockd_plane *const pd = &xd->plane[plane]; - MODE_INFO *const mi = xd->mi[0].src_mi; - const PREDICTION_MODE mode = (plane == 0) ? get_y_mode(mi, block) - : mi->mbmi.uv_mode; - int x, y; - uint8_t *dst; - txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x, &y); - dst = &pd->dst.buf[4 * y * pd->dst.stride + 4 * x]; - - vp9_predict_intra_block(xd, block >> (tx_size << 1), - b_width_log2(plane_bsize), tx_size, mode, - dst, pd->dst.stride, dst, pd->dst.stride, - x, y, plane); - - if (!mi->mbmi.skip) { - const int eob = vp9_decode_block_tokens(cm, xd, plane, block, - plane_bsize, x, y, tx_size, - args->r); - inverse_transform_block(xd, plane, block, tx_size, dst, pd->dst.stride, - eob); - } -} - -struct inter_args { - VP9_COMMON *cm; - MACROBLOCKD *xd; - vp9_reader *r; - int *eobtotal; -}; - -static void reconstruct_inter_block(int plane, int block, - BLOCK_SIZE plane_bsize, - TX_SIZE tx_size, void *arg) { - struct inter_args *args = (struct inter_args *)arg; - VP9_COMMON *const cm = args->cm; - MACROBLOCKD *const xd = args->xd; - struct macroblockd_plane *const pd = &xd->plane[plane]; - int x, y, eob; - txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x, &y); - eob = vp9_decode_block_tokens(cm, xd, plane, block, plane_bsize, x, y, - tx_size, args->r); - inverse_transform_block(xd, plane, block, tx_size, - &pd->dst.buf[4 * y * pd->dst.stride + 4 * x], - pd->dst.stride, eob); - *args->eobtotal += eob; -} - -static MB_MODE_INFO *set_offsets(VP9_COMMON *const cm, MACROBLOCKD *const xd, - const TileInfo *const tile, - BLOCK_SIZE bsize, int mi_row, int mi_col) { - const int bw = num_8x8_blocks_wide_lookup[bsize]; - const int bh = num_8x8_blocks_high_lookup[bsize]; - const int x_mis = MIN(bw, cm->mi_cols - mi_col); - const int y_mis = MIN(bh, cm->mi_rows - mi_row); - const int offset = mi_row * cm->mi_stride + mi_col; - int x, y; - - xd->mi = cm->mi + offset; - xd->mi[0].src_mi = &xd->mi[0]; // Point to self. - xd->mi[0].mbmi.sb_type = bsize; - - for (y = 0; y < y_mis; ++y) - for (x = !y; x < x_mis; ++x) { - xd->mi[y * cm->mi_stride + x].src_mi = &xd->mi[0]; - } - - set_skip_context(xd, mi_row, mi_col); - - // Distance of Mb to the various image edges. These are specified to 8th pel - // as they are always compared to values that are in 1/8th pel units - set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols); - - vp9_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col); - return &xd->mi[0].mbmi; -} - -static void set_ref(VP9_COMMON *const cm, MACROBLOCKD *const xd, - int idx, int mi_row, int mi_col) { - MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi; - RefBuffer *ref_buffer = &cm->frame_refs[mbmi->ref_frame[idx] - LAST_FRAME]; - xd->block_refs[idx] = ref_buffer; - if (!vp9_is_valid_scale(&ref_buffer->sf)) - vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, - "Invalid scale factors"); - if (ref_buffer->buf->corrupted) - vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, - "Block reference is corrupt"); - vp9_setup_pre_planes(xd, idx, ref_buffer->buf, mi_row, mi_col, - &ref_buffer->sf); - xd->corrupted |= ref_buffer->buf->corrupted; -} - -static void decode_block(VP9_COMMON *const cm, MACROBLOCKD *const xd, - const TileInfo *const tile, - int mi_row, int mi_col, - vp9_reader *r, BLOCK_SIZE bsize) { - const int less8x8 = bsize < BLOCK_8X8; - MB_MODE_INFO *mbmi = set_offsets(cm, xd, tile, bsize, mi_row, mi_col); - vp9_read_mode_info(cm, xd, tile, mi_row, mi_col, r); - - if (less8x8) - bsize = BLOCK_8X8; - - if (mbmi->skip) { - reset_skip_context(xd, bsize); - } else { - if (cm->seg.enabled) - setup_plane_dequants(cm, xd, vp9_get_qindex(&cm->seg, mbmi->segment_id, - cm->base_qindex)); - } - - if (!is_inter_block(mbmi)) { - struct intra_args arg = { cm, xd, r }; - vp9_foreach_transformed_block(xd, bsize, - predict_and_reconstruct_intra_block, &arg); - } else { - // Setup - set_ref(cm, xd, 0, mi_row, mi_col); - if (has_second_ref(mbmi)) - set_ref(cm, xd, 1, mi_row, mi_col); - - // Prediction - vp9_dec_build_inter_predictors_sb(xd, mi_row, mi_col, bsize); - - // Reconstruction - if (!mbmi->skip) { - int eobtotal = 0; - struct inter_args arg = { cm, xd, r, &eobtotal }; - vp9_foreach_transformed_block(xd, bsize, reconstruct_inter_block, &arg); - if (!less8x8 && eobtotal == 0) - mbmi->skip = 1; // skip loopfilter - } - } - - xd->corrupted |= vp9_reader_has_error(r); -} - -static PARTITION_TYPE read_partition(VP9_COMMON *cm, MACROBLOCKD *xd, int hbs, - int mi_row, int mi_col, BLOCK_SIZE bsize, - vp9_reader *r) { - const int ctx = partition_plane_context(xd, mi_row, mi_col, bsize); - const vp9_prob *const probs = get_partition_probs(cm, ctx); - const int has_rows = (mi_row + hbs) < cm->mi_rows; - const int has_cols = (mi_col + hbs) < cm->mi_cols; - PARTITION_TYPE p; - - if (has_rows && has_cols) - p = (PARTITION_TYPE)vp9_read_tree(r, vp9_partition_tree, probs); - else if (!has_rows && has_cols) - p = vp9_read(r, probs[1]) ? PARTITION_SPLIT : PARTITION_HORZ; - else if (has_rows && !has_cols) - p = vp9_read(r, probs[2]) ? PARTITION_SPLIT : PARTITION_VERT; - else - p = PARTITION_SPLIT; - - if (!cm->frame_parallel_decoding_mode) - ++cm->counts.partition[ctx][p]; - - return p; -} - -static void decode_partition(VP9_COMMON *const cm, MACROBLOCKD *const xd, - const TileInfo *const tile, - int mi_row, int mi_col, - vp9_reader* r, BLOCK_SIZE bsize) { - const int hbs = num_8x8_blocks_wide_lookup[bsize] / 2; - PARTITION_TYPE partition; - BLOCK_SIZE subsize, uv_subsize; - - if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) - return; - - partition = read_partition(cm, xd, hbs, mi_row, mi_col, bsize, r); - subsize = get_subsize(bsize, partition); - uv_subsize = ss_size_lookup[subsize][cm->subsampling_x][cm->subsampling_y]; - if (subsize >= BLOCK_8X8 && uv_subsize == BLOCK_INVALID) - vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, - "Invalid block size."); - if (subsize < BLOCK_8X8) { - decode_block(cm, xd, tile, mi_row, mi_col, r, subsize); - } else { - switch (partition) { - case PARTITION_NONE: - decode_block(cm, xd, tile, mi_row, mi_col, r, subsize); - break; - case PARTITION_HORZ: - decode_block(cm, xd, tile, mi_row, mi_col, r, subsize); - if (mi_row + hbs < cm->mi_rows) - decode_block(cm, xd, tile, mi_row + hbs, mi_col, r, subsize); - break; - case PARTITION_VERT: - decode_block(cm, xd, tile, mi_row, mi_col, r, subsize); - if (mi_col + hbs < cm->mi_cols) - decode_block(cm, xd, tile, mi_row, mi_col + hbs, r, subsize); - break; - case PARTITION_SPLIT: - decode_partition(cm, xd, tile, mi_row, mi_col, r, subsize); - decode_partition(cm, xd, tile, mi_row, mi_col + hbs, r, subsize); - decode_partition(cm, xd, tile, mi_row + hbs, mi_col, r, subsize); - decode_partition(cm, xd, tile, mi_row + hbs, mi_col + hbs, r, subsize); - break; - default: - assert(0 && "Invalid partition type"); - } - } - - // update partition context - if (bsize >= BLOCK_8X8 && - (bsize == BLOCK_8X8 || partition != PARTITION_SPLIT)) - update_partition_context(xd, mi_row, mi_col, subsize, bsize); -} - -static void setup_token_decoder(const uint8_t *data, - const uint8_t *data_end, - size_t read_size, - struct vpx_internal_error_info *error_info, - vp9_reader *r, - vpx_decrypt_cb decrypt_cb, - void *decrypt_state) { - // Validate the calculated partition length. If the buffer - // described by the partition can't be fully read, then restrict - // it to the portion that can be (for EC mode) or throw an error. - if (!read_is_valid(data, read_size, data_end)) - vpx_internal_error(error_info, VPX_CODEC_CORRUPT_FRAME, - "Truncated packet or corrupt tile length"); - - if (vp9_reader_init(r, data, read_size, decrypt_cb, decrypt_state)) - vpx_internal_error(error_info, VPX_CODEC_MEM_ERROR, - "Failed to allocate bool decoder %d", 1); -} - -static void read_coef_probs_common(vp9_coeff_probs_model *coef_probs, - vp9_reader *r) { - int i, j, k, l, m; - - if (vp9_read_bit(r)) - for (i = 0; i < PLANE_TYPES; ++i) - for (j = 0; j < REF_TYPES; ++j) - for (k = 0; k < COEF_BANDS; ++k) - for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) - for (m = 0; m < UNCONSTRAINED_NODES; ++m) - vp9_diff_update_prob(r, &coef_probs[i][j][k][l][m]); -} - -static void read_coef_probs(FRAME_CONTEXT *fc, TX_MODE tx_mode, - vp9_reader *r) { - const TX_SIZE max_tx_size = tx_mode_to_biggest_tx_size[tx_mode]; - TX_SIZE tx_size; - for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size) - read_coef_probs_common(fc->coef_probs[tx_size], r); -} - -static void setup_segmentation(struct segmentation *seg, - struct vp9_read_bit_buffer *rb) { - int i, j; - - seg->update_map = 0; - seg->update_data = 0; - - seg->enabled = vp9_rb_read_bit(rb); - if (!seg->enabled) - return; - - // Segmentation map update - seg->update_map = vp9_rb_read_bit(rb); - if (seg->update_map) { - for (i = 0; i < SEG_TREE_PROBS; i++) - seg->tree_probs[i] = vp9_rb_read_bit(rb) ? vp9_rb_read_literal(rb, 8) - : MAX_PROB; - - seg->temporal_update = vp9_rb_read_bit(rb); - if (seg->temporal_update) { - for (i = 0; i < PREDICTION_PROBS; i++) - seg->pred_probs[i] = vp9_rb_read_bit(rb) ? vp9_rb_read_literal(rb, 8) - : MAX_PROB; - } else { - for (i = 0; i < PREDICTION_PROBS; i++) - seg->pred_probs[i] = MAX_PROB; - } - } - - // Segmentation data update - seg->update_data = vp9_rb_read_bit(rb); - if (seg->update_data) { - seg->abs_delta = vp9_rb_read_bit(rb); - - vp9_clearall_segfeatures(seg); - - for (i = 0; i < MAX_SEGMENTS; i++) { - for (j = 0; j < SEG_LVL_MAX; j++) { - int data = 0; - const int feature_enabled = vp9_rb_read_bit(rb); - if (feature_enabled) { - vp9_enable_segfeature(seg, i, j); - data = decode_unsigned_max(rb, vp9_seg_feature_data_max(j)); - if (vp9_is_segfeature_signed(j)) - data = vp9_rb_read_bit(rb) ? -data : data; - } - vp9_set_segdata(seg, i, j, data); - } - } - } -} - -static void setup_loopfilter(struct loopfilter *lf, - struct vp9_read_bit_buffer *rb) { - lf->filter_level = vp9_rb_read_literal(rb, 6); - lf->sharpness_level = vp9_rb_read_literal(rb, 3); - - // Read in loop filter deltas applied at the MB level based on mode or ref - // frame. - lf->mode_ref_delta_update = 0; - - lf->mode_ref_delta_enabled = vp9_rb_read_bit(rb); - if (lf->mode_ref_delta_enabled) { - lf->mode_ref_delta_update = vp9_rb_read_bit(rb); - if (lf->mode_ref_delta_update) { - int i; - - for (i = 0; i < MAX_REF_LF_DELTAS; i++) - if (vp9_rb_read_bit(rb)) - lf->ref_deltas[i] = vp9_rb_read_signed_literal(rb, 6); - - for (i = 0; i < MAX_MODE_LF_DELTAS; i++) - if (vp9_rb_read_bit(rb)) - lf->mode_deltas[i] = vp9_rb_read_signed_literal(rb, 6); - } - } -} - -static int read_delta_q(struct vp9_read_bit_buffer *rb, int *delta_q) { - const int old = *delta_q; - *delta_q = vp9_rb_read_bit(rb) ? vp9_rb_read_signed_literal(rb, 4) : 0; - return old != *delta_q; -} - -static void setup_quantization(VP9_COMMON *const cm, MACROBLOCKD *const xd, - struct vp9_read_bit_buffer *rb) { - int update = 0; - - cm->base_qindex = vp9_rb_read_literal(rb, QINDEX_BITS); - update |= read_delta_q(rb, &cm->y_dc_delta_q); - update |= read_delta_q(rb, &cm->uv_dc_delta_q); - update |= read_delta_q(rb, &cm->uv_ac_delta_q); - if (update) - vp9_init_dequantizer(cm); - - xd->lossless = cm->base_qindex == 0 && - cm->y_dc_delta_q == 0 && - cm->uv_dc_delta_q == 0 && - cm->uv_ac_delta_q == 0; -} - -static INTERP_FILTER read_interp_filter(struct vp9_read_bit_buffer *rb) { - const INTERP_FILTER literal_to_filter[] = { EIGHTTAP_SMOOTH, - EIGHTTAP, - EIGHTTAP_SHARP, - BILINEAR }; - return vp9_rb_read_bit(rb) ? SWITCHABLE - : literal_to_filter[vp9_rb_read_literal(rb, 2)]; -} - -void vp9_read_frame_size(struct vp9_read_bit_buffer *rb, - int *width, int *height) { - const int w = vp9_rb_read_literal(rb, 16) + 1; - const int h = vp9_rb_read_literal(rb, 16) + 1; - *width = w; - *height = h; -} - -static void setup_display_size(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) { - cm->display_width = cm->width; - cm->display_height = cm->height; - if (vp9_rb_read_bit(rb)) - vp9_read_frame_size(rb, &cm->display_width, &cm->display_height); -} - -static void resize_context_buffers(VP9_COMMON *cm, int width, int height) { -#if CONFIG_SIZE_LIMIT - if (width > DECODE_WIDTH_LIMIT || height > DECODE_HEIGHT_LIMIT) - vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, - "Width and height beyond allowed size."); -#endif - if (cm->width != width || cm->height != height) { - const int new_mi_rows = - ALIGN_POWER_OF_TWO(height, MI_SIZE_LOG2) >> MI_SIZE_LOG2; - const int new_mi_cols = - ALIGN_POWER_OF_TWO(width, MI_SIZE_LOG2) >> MI_SIZE_LOG2; - - // Allocations in vp9_alloc_context_buffers() depend on individual - // dimensions as well as the overall size. - if (new_mi_cols > cm->mi_cols || new_mi_rows > cm->mi_rows) { - if (vp9_alloc_context_buffers(cm, width, height)) - vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, - "Failed to allocate context buffers"); - } else { - vp9_set_mb_mi(cm, width, height); - } - vp9_init_context_buffers(cm); - cm->width = width; - cm->height = height; - } -} - -static void setup_frame_size(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) { - int width, height; - vp9_read_frame_size(rb, &width, &height); - resize_context_buffers(cm, width, height); - setup_display_size(cm, rb); - - if (vp9_realloc_frame_buffer( - get_frame_new_buffer(cm), cm->width, cm->height, - cm->subsampling_x, cm->subsampling_y, -#if CONFIG_VP9_HIGHBITDEPTH - cm->use_highbitdepth, -#endif - VP9_DEC_BORDER_IN_PIXELS, - &cm->frame_bufs[cm->new_fb_idx].raw_frame_buffer, cm->get_fb_cb, - cm->cb_priv)) { - vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, - "Failed to allocate frame buffer"); - } - cm->frame_bufs[cm->new_fb_idx].buf.bit_depth = (unsigned int)cm->bit_depth; -} - -static INLINE int valid_ref_frame_img_fmt(vpx_bit_depth_t ref_bit_depth, - int ref_xss, int ref_yss, - vpx_bit_depth_t this_bit_depth, - int this_xss, int this_yss) { - return ref_bit_depth == this_bit_depth && ref_xss == this_xss && - ref_yss == this_yss; -} - -static void setup_frame_size_with_refs(VP9_COMMON *cm, - struct vp9_read_bit_buffer *rb) { - int width, height; - int found = 0, i; - int has_valid_ref_frame = 0; - for (i = 0; i < REFS_PER_FRAME; ++i) { - if (vp9_rb_read_bit(rb)) { - YV12_BUFFER_CONFIG *const buf = cm->frame_refs[i].buf; - width = buf->y_crop_width; - height = buf->y_crop_height; - if (buf->corrupted) { - vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, - "Frame reference is corrupt"); - } - found = 1; - break; - } - } - - if (!found) - vp9_read_frame_size(rb, &width, &height); - - if (width <=0 || height <= 0) - vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, - "Invalid frame size"); - - // Check to make sure at least one of frames that this frame references - // has valid dimensions. - for (i = 0; i < REFS_PER_FRAME; ++i) { - RefBuffer *const ref_frame = &cm->frame_refs[i]; - has_valid_ref_frame |= valid_ref_frame_size(ref_frame->buf->y_crop_width, - ref_frame->buf->y_crop_height, - width, height); - } - if (!has_valid_ref_frame) - vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, - "Referenced frame has invalid size"); - for (i = 0; i < REFS_PER_FRAME; ++i) { - RefBuffer *const ref_frame = &cm->frame_refs[i]; - if (!valid_ref_frame_img_fmt( - ref_frame->buf->bit_depth, - ref_frame->buf->uv_crop_width < ref_frame->buf->y_crop_width, - ref_frame->buf->uv_crop_height < ref_frame->buf->y_crop_height, - cm->bit_depth, - cm->subsampling_x, - cm->subsampling_y)) - vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, - "Referenced frame has incompatible color space"); - } - - resize_context_buffers(cm, width, height); - setup_display_size(cm, rb); - - if (vp9_realloc_frame_buffer( - get_frame_new_buffer(cm), cm->width, cm->height, - cm->subsampling_x, cm->subsampling_y, -#if CONFIG_VP9_HIGHBITDEPTH - cm->use_highbitdepth, -#endif - VP9_DEC_BORDER_IN_PIXELS, - &cm->frame_bufs[cm->new_fb_idx].raw_frame_buffer, cm->get_fb_cb, - cm->cb_priv)) { - vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, - "Failed to allocate frame buffer"); - } - cm->frame_bufs[cm->new_fb_idx].buf.bit_depth = (unsigned int)cm->bit_depth; -} - -static void setup_tile_info(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) { - int min_log2_tile_cols, max_log2_tile_cols, max_ones; - vp9_get_tile_n_bits(cm->mi_cols, &min_log2_tile_cols, &max_log2_tile_cols); - - // columns - max_ones = max_log2_tile_cols - min_log2_tile_cols; - cm->log2_tile_cols = min_log2_tile_cols; - while (max_ones-- && vp9_rb_read_bit(rb)) - cm->log2_tile_cols++; - - if (cm->log2_tile_cols > 6) - vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, - "Invalid number of tile columns"); - - // rows - cm->log2_tile_rows = vp9_rb_read_bit(rb); - if (cm->log2_tile_rows) - cm->log2_tile_rows += vp9_rb_read_bit(rb); -} - -typedef struct TileBuffer { - const uint8_t *data; - size_t size; - int col; // only used with multi-threaded decoding -} TileBuffer; - -// Reads the next tile returning its size and adjusting '*data' accordingly -// based on 'is_last'. -static void get_tile_buffer(const uint8_t *const data_end, - int is_last, - struct vpx_internal_error_info *error_info, - const uint8_t **data, - vpx_decrypt_cb decrypt_cb, void *decrypt_state, - TileBuffer *buf) { - size_t size; - - if (!is_last) { - if (!read_is_valid(*data, 4, data_end)) - vpx_internal_error(error_info, VPX_CODEC_CORRUPT_FRAME, - "Truncated packet or corrupt tile length"); - - if (decrypt_cb) { - uint8_t be_data[4]; - decrypt_cb(decrypt_state, *data, be_data, 4); - size = mem_get_be32(be_data); - } else { - size = mem_get_be32(*data); - } - *data += 4; - - if (size > (size_t)(data_end - *data)) - vpx_internal_error(error_info, VPX_CODEC_CORRUPT_FRAME, - "Truncated packet or corrupt tile size"); - } else { - size = data_end - *data; - } - - buf->data = *data; - buf->size = size; - - *data += size; -} - -static void get_tile_buffers(VP9Decoder *pbi, - const uint8_t *data, const uint8_t *data_end, - int tile_cols, int tile_rows, - TileBuffer (*tile_buffers)[1 << 6]) { - int r, c; - - for (r = 0; r < tile_rows; ++r) { - for (c = 0; c < tile_cols; ++c) { - const int is_last = (r == tile_rows - 1) && (c == tile_cols - 1); - TileBuffer *const buf = &tile_buffers[r][c]; - buf->col = c; - get_tile_buffer(data_end, is_last, &pbi->common.error, &data, - pbi->decrypt_cb, pbi->decrypt_state, buf); - } - } -} - -static const uint8_t *decode_tiles(VP9Decoder *pbi, - const uint8_t *data, - const uint8_t *data_end) { - VP9_COMMON *const cm = &pbi->common; - const VP9WorkerInterface *const winterface = vp9_get_worker_interface(); - const int aligned_cols = mi_cols_aligned_to_sb(cm->mi_cols); - const int tile_cols = 1 << cm->log2_tile_cols; - const int tile_rows = 1 << cm->log2_tile_rows; - TileBuffer tile_buffers[4][1 << 6]; - int tile_row, tile_col; - int mi_row, mi_col; - TileData *tile_data = NULL; - - if (cm->lf.filter_level && pbi->lf_worker.data1 == NULL) { - CHECK_MEM_ERROR(cm, pbi->lf_worker.data1, - vpx_memalign(32, sizeof(LFWorkerData))); - pbi->lf_worker.hook = (VP9WorkerHook)vp9_loop_filter_worker; - if (pbi->max_threads > 1 && !winterface->reset(&pbi->lf_worker)) { - vpx_internal_error(&cm->error, VPX_CODEC_ERROR, - "Loop filter thread creation failed"); - } - } - - if (cm->lf.filter_level) { - LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1; - // Be sure to sync as we might be resuming after a failed frame decode. - winterface->sync(&pbi->lf_worker); - lf_data->frame_buffer = get_frame_new_buffer(cm); - lf_data->cm = cm; - vp9_copy(lf_data->planes, pbi->mb.plane); - lf_data->stop = 0; - lf_data->y_only = 0; - vp9_loop_filter_frame_init(cm, cm->lf.filter_level); - } - - assert(tile_rows <= 4); - assert(tile_cols <= (1 << 6)); - - // Note: this memset assumes above_context[0], [1] and [2] - // are allocated as part of the same buffer. - vpx_memset(cm->above_context, 0, - sizeof(*cm->above_context) * MAX_MB_PLANE * 2 * aligned_cols); - - vpx_memset(cm->above_seg_context, 0, - sizeof(*cm->above_seg_context) * aligned_cols); - - get_tile_buffers(pbi, data, data_end, tile_cols, tile_rows, tile_buffers); - - if (pbi->tile_data == NULL || - (tile_cols * tile_rows) != pbi->total_tiles) { - vpx_free(pbi->tile_data); - CHECK_MEM_ERROR( - cm, - pbi->tile_data, - vpx_memalign(32, tile_cols * tile_rows * (sizeof(*pbi->tile_data)))); - pbi->total_tiles = tile_rows * tile_cols; - } - - // Load all tile information into tile_data. - for (tile_row = 0; tile_row < tile_rows; ++tile_row) { - for (tile_col = 0; tile_col < tile_cols; ++tile_col) { - TileInfo tile; - const TileBuffer *const buf = &tile_buffers[tile_row][tile_col]; - tile_data = pbi->tile_data + tile_cols * tile_row + tile_col; - tile_data->cm = cm; - tile_data->xd = pbi->mb; - tile_data->xd.corrupted = 0; - vp9_tile_init(&tile, tile_data->cm, tile_row, tile_col); - setup_token_decoder(buf->data, data_end, buf->size, &cm->error, - &tile_data->bit_reader, pbi->decrypt_cb, - pbi->decrypt_state); - init_macroblockd(cm, &tile_data->xd); - vp9_zero(tile_data->xd.dqcoeff); - } - } - - for (tile_row = 0; tile_row < tile_rows; ++tile_row) { - TileInfo tile; - vp9_tile_set_row(&tile, cm, tile_row); - for (mi_row = tile.mi_row_start; mi_row < tile.mi_row_end; - mi_row += MI_BLOCK_SIZE) { - for (tile_col = 0; tile_col < tile_cols; ++tile_col) { - const int col = pbi->inv_tile_order ? - tile_cols - tile_col - 1 : tile_col; - tile_data = pbi->tile_data + tile_cols * tile_row + col; - vp9_tile_set_col(&tile, tile_data->cm, col); - vp9_zero(tile_data->xd.left_context); - vp9_zero(tile_data->xd.left_seg_context); - for (mi_col = tile.mi_col_start; mi_col < tile.mi_col_end; - mi_col += MI_BLOCK_SIZE) { - decode_partition(tile_data->cm, &tile_data->xd, &tile, mi_row, mi_col, - &tile_data->bit_reader, BLOCK_64X64); - } - pbi->mb.corrupted |= tile_data->xd.corrupted; - } - // Loopfilter one row. - if (cm->lf.filter_level && !pbi->mb.corrupted) { - const int lf_start = mi_row - MI_BLOCK_SIZE; - LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1; - - // delay the loopfilter by 1 macroblock row. - if (lf_start < 0) continue; - - // decoding has completed: finish up the loop filter in this thread. - if (mi_row + MI_BLOCK_SIZE >= cm->mi_rows) continue; - - winterface->sync(&pbi->lf_worker); - lf_data->start = lf_start; - lf_data->stop = mi_row; - if (pbi->max_threads > 1) { - winterface->launch(&pbi->lf_worker); - } else { - winterface->execute(&pbi->lf_worker); - } - } - } - } - - // Loopfilter remaining rows in the frame. - if (cm->lf.filter_level && !pbi->mb.corrupted) { - LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1; - winterface->sync(&pbi->lf_worker); - lf_data->start = lf_data->stop; - lf_data->stop = cm->mi_rows; - winterface->execute(&pbi->lf_worker); - } - - // Get last tile data. - tile_data = pbi->tile_data + tile_cols * tile_rows - 1; - - return vp9_reader_find_end(&tile_data->bit_reader); -} - -static int tile_worker_hook(TileWorkerData *const tile_data, - const TileInfo *const tile) { - int mi_row, mi_col; - - for (mi_row = tile->mi_row_start; mi_row < tile->mi_row_end; - mi_row += MI_BLOCK_SIZE) { - vp9_zero(tile_data->xd.left_context); - vp9_zero(tile_data->xd.left_seg_context); - for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end; - mi_col += MI_BLOCK_SIZE) { - decode_partition(tile_data->cm, &tile_data->xd, tile, - mi_row, mi_col, &tile_data->bit_reader, BLOCK_64X64); - } - } - return !tile_data->xd.corrupted; -} - -// sorts in descending order -static int compare_tile_buffers(const void *a, const void *b) { - const TileBuffer *const buf1 = (const TileBuffer*)a; - const TileBuffer *const buf2 = (const TileBuffer*)b; - if (buf1->size < buf2->size) { - return 1; - } else if (buf1->size == buf2->size) { - return 0; - } else { - return -1; - } -} - -static const uint8_t *decode_tiles_mt(VP9Decoder *pbi, - const uint8_t *data, - const uint8_t *data_end) { - VP9_COMMON *const cm = &pbi->common; - const VP9WorkerInterface *const winterface = vp9_get_worker_interface(); - const uint8_t *bit_reader_end = NULL; - const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols); - const int tile_cols = 1 << cm->log2_tile_cols; - const int tile_rows = 1 << cm->log2_tile_rows; - const int num_workers = MIN(pbi->max_threads & ~1, tile_cols); - TileBuffer tile_buffers[1][1 << 6]; - int n; - int final_worker = -1; - - assert(tile_cols <= (1 << 6)); - assert(tile_rows == 1); - (void)tile_rows; - - // TODO(jzern): See if we can remove the restriction of passing in max - // threads to the decoder. - if (pbi->num_tile_workers == 0) { - const int num_threads = pbi->max_threads & ~1; - int i; - // TODO(jzern): Allocate one less worker, as in the current code we only - // use num_threads - 1 workers. - CHECK_MEM_ERROR(cm, pbi->tile_workers, - vpx_malloc(num_threads * sizeof(*pbi->tile_workers))); - for (i = 0; i < num_threads; ++i) { - VP9Worker *const worker = &pbi->tile_workers[i]; - ++pbi->num_tile_workers; - - winterface->init(worker); - CHECK_MEM_ERROR(cm, worker->data1, - vpx_memalign(32, sizeof(TileWorkerData))); - CHECK_MEM_ERROR(cm, worker->data2, vpx_malloc(sizeof(TileInfo))); - if (i < num_threads - 1 && !winterface->reset(worker)) { - vpx_internal_error(&cm->error, VPX_CODEC_ERROR, - "Tile decoder thread creation failed"); - } - } - } - - // Reset tile decoding hook - for (n = 0; n < num_workers; ++n) { - winterface->sync(&pbi->tile_workers[n]); - pbi->tile_workers[n].hook = (VP9WorkerHook)tile_worker_hook; - } - - // Note: this memset assumes above_context[0], [1] and [2] - // are allocated as part of the same buffer. - vpx_memset(cm->above_context, 0, - sizeof(*cm->above_context) * MAX_MB_PLANE * 2 * aligned_mi_cols); - vpx_memset(cm->above_seg_context, 0, - sizeof(*cm->above_seg_context) * aligned_mi_cols); - - // Load tile data into tile_buffers - get_tile_buffers(pbi, data, data_end, tile_cols, tile_rows, tile_buffers); - - // Sort the buffers based on size in descending order. - qsort(tile_buffers[0], tile_cols, sizeof(tile_buffers[0][0]), - compare_tile_buffers); - - // Rearrange the tile buffers such that per-tile group the largest, and - // presumably the most difficult, tile will be decoded in the main thread. - // This should help minimize the number of instances where the main thread is - // waiting for a worker to complete. - { - int group_start = 0; - while (group_start < tile_cols) { - const TileBuffer largest = tile_buffers[0][group_start]; - const int group_end = MIN(group_start + num_workers, tile_cols) - 1; - memmove(tile_buffers[0] + group_start, tile_buffers[0] + group_start + 1, - (group_end - group_start) * sizeof(tile_buffers[0][0])); - tile_buffers[0][group_end] = largest; - group_start = group_end + 1; - } - } - - n = 0; - while (n < tile_cols) { - int i; - for (i = 0; i < num_workers && n < tile_cols; ++i) { - VP9Worker *const worker = &pbi->tile_workers[i]; - TileWorkerData *const tile_data = (TileWorkerData*)worker->data1; - TileInfo *const tile = (TileInfo*)worker->data2; - TileBuffer *const buf = &tile_buffers[0][n]; - - tile_data->cm = cm; - tile_data->xd = pbi->mb; - tile_data->xd.corrupted = 0; - vp9_tile_init(tile, tile_data->cm, 0, buf->col); - setup_token_decoder(buf->data, data_end, buf->size, &cm->error, - &tile_data->bit_reader, pbi->decrypt_cb, - pbi->decrypt_state); - init_macroblockd(cm, &tile_data->xd); - vp9_zero(tile_data->xd.dqcoeff); - - worker->had_error = 0; - if (i == num_workers - 1 || n == tile_cols - 1) { - winterface->execute(worker); - } else { - winterface->launch(worker); - } - - if (buf->col == tile_cols - 1) { - final_worker = i; - } - - ++n; - } - - for (; i > 0; --i) { - VP9Worker *const worker = &pbi->tile_workers[i - 1]; - pbi->mb.corrupted |= !winterface->sync(worker); - } - if (final_worker > -1) { - TileWorkerData *const tile_data = - (TileWorkerData*)pbi->tile_workers[final_worker].data1; - bit_reader_end = vp9_reader_find_end(&tile_data->bit_reader); - final_worker = -1; - } - } - - return bit_reader_end; -} - -static void error_handler(void *data) { - VP9_COMMON *const cm = (VP9_COMMON *)data; - vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, "Truncated packet"); -} - -int vp9_read_sync_code(struct vp9_read_bit_buffer *const rb) { - return vp9_rb_read_literal(rb, 8) == VP9_SYNC_CODE_0 && - vp9_rb_read_literal(rb, 8) == VP9_SYNC_CODE_1 && - vp9_rb_read_literal(rb, 8) == VP9_SYNC_CODE_2; -} - -BITSTREAM_PROFILE vp9_read_profile(struct vp9_read_bit_buffer *rb) { - int profile = vp9_rb_read_bit(rb); - profile |= vp9_rb_read_bit(rb) << 1; - if (profile > 2) - profile += vp9_rb_read_bit(rb); - return (BITSTREAM_PROFILE) profile; -} - -static void read_bitdepth_colorspace_sampling( - VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) { - if (cm->profile >= PROFILE_2) - cm->bit_depth = vp9_rb_read_bit(rb) ? VPX_BITS_12 : VPX_BITS_10; - cm->color_space = (COLOR_SPACE)vp9_rb_read_literal(rb, 3); - if (cm->color_space != SRGB) { - vp9_rb_read_bit(rb); // [16,235] (including xvycc) vs [0,255] range - if (cm->profile == PROFILE_1 || cm->profile == PROFILE_3) { - cm->subsampling_x = vp9_rb_read_bit(rb); - cm->subsampling_y = vp9_rb_read_bit(rb); - if (cm->subsampling_x == 1 && cm->subsampling_y == 1) - vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, - "4:2:0 color not supported in profile 1 or 3"); - if (vp9_rb_read_bit(rb)) - vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, - "Reserved bit set"); - } else { - cm->subsampling_y = cm->subsampling_x = 1; - } - } else { - if (cm->profile == PROFILE_1 || cm->profile == PROFILE_3) { - // Note if colorspace is SRGB then 4:4:4 chroma sampling is assumed. - // 4:2:2 or 4:4:0 chroma sampling is not allowed. - cm->subsampling_y = cm->subsampling_x = 0; - if (vp9_rb_read_bit(rb)) - vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, - "Reserved bit set"); - } else { - vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, - "4:4:4 color not supported in profile 0 or 2"); - } - } -} - -static size_t read_uncompressed_header(VP9Decoder *pbi, - struct vp9_read_bit_buffer *rb) { - VP9_COMMON *const cm = &pbi->common; - size_t sz; - int i; - - cm->last_frame_type = cm->frame_type; - - if (vp9_rb_read_literal(rb, 2) != VP9_FRAME_MARKER) - vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, - "Invalid frame marker"); - - cm->profile = vp9_read_profile(rb); - - if (cm->profile >= MAX_PROFILES) - vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, - "Unsupported bitstream profile"); - - cm->show_existing_frame = vp9_rb_read_bit(rb); - if (cm->show_existing_frame) { - // Show an existing frame directly. - const int frame_to_show = cm->ref_frame_map[vp9_rb_read_literal(rb, 3)]; - - if (frame_to_show < 0 || cm->frame_bufs[frame_to_show].ref_count < 1) - vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, - "Buffer %d does not contain a decoded frame", - frame_to_show); - - ref_cnt_fb(cm->frame_bufs, &cm->new_fb_idx, frame_to_show); - pbi->refresh_frame_flags = 0; - cm->lf.filter_level = 0; - cm->show_frame = 1; - return 0; - } - - cm->frame_type = (FRAME_TYPE) vp9_rb_read_bit(rb); - cm->show_frame = vp9_rb_read_bit(rb); - cm->error_resilient_mode = vp9_rb_read_bit(rb); - - if (cm->frame_type == KEY_FRAME) { - if (!vp9_read_sync_code(rb)) - vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, - "Invalid frame sync code"); - - read_bitdepth_colorspace_sampling(cm, rb); - pbi->refresh_frame_flags = (1 << REF_FRAMES) - 1; - - for (i = 0; i < REFS_PER_FRAME; ++i) { - cm->frame_refs[i].idx = -1; - cm->frame_refs[i].buf = NULL; - } - - setup_frame_size(cm, rb); - pbi->need_resync = 0; - } else { - cm->intra_only = cm->show_frame ? 0 : vp9_rb_read_bit(rb); - - cm->reset_frame_context = cm->error_resilient_mode ? - 0 : vp9_rb_read_literal(rb, 2); - - if (cm->intra_only) { - if (!vp9_read_sync_code(rb)) - vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, - "Invalid frame sync code"); - if (cm->profile > PROFILE_0) { - read_bitdepth_colorspace_sampling(cm, rb); - } else { - // NOTE: The intra-only frame header does not include the specification - // of either the color format or color sub-sampling in profile 0. VP9 - // specifies that the default color space should be YUV 4:2:0 in this - // case (normative). - cm->color_space = BT_601; - cm->subsampling_y = cm->subsampling_x = 1; - } - - pbi->refresh_frame_flags = vp9_rb_read_literal(rb, REF_FRAMES); - setup_frame_size(cm, rb); - pbi->need_resync = 0; - } else { - pbi->refresh_frame_flags = vp9_rb_read_literal(rb, REF_FRAMES); - for (i = 0; i < REFS_PER_FRAME; ++i) { - const int ref = vp9_rb_read_literal(rb, REF_FRAMES_LOG2); - const int idx = cm->ref_frame_map[ref]; - RefBuffer *const ref_frame = &cm->frame_refs[i]; - ref_frame->idx = idx; - ref_frame->buf = &cm->frame_bufs[idx].buf; - cm->ref_frame_sign_bias[LAST_FRAME + i] = vp9_rb_read_bit(rb); - } - - setup_frame_size_with_refs(cm, rb); - - cm->allow_high_precision_mv = vp9_rb_read_bit(rb); - cm->interp_filter = read_interp_filter(rb); - - for (i = 0; i < REFS_PER_FRAME; ++i) { - RefBuffer *const ref_buf = &cm->frame_refs[i]; -#if CONFIG_VP9_HIGHBITDEPTH - vp9_setup_scale_factors_for_frame(&ref_buf->sf, - ref_buf->buf->y_crop_width, - ref_buf->buf->y_crop_height, - cm->width, cm->height, - cm->use_highbitdepth); -#else - vp9_setup_scale_factors_for_frame(&ref_buf->sf, - ref_buf->buf->y_crop_width, - ref_buf->buf->y_crop_height, - cm->width, cm->height); -#endif - if (vp9_is_scaled(&ref_buf->sf)) - vp9_extend_frame_borders(ref_buf->buf); - } - } - } - - if (pbi->need_resync) { - vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, - "Keyframe / intra-only frame required to reset decoder" - " state"); - } - - if (!cm->error_resilient_mode) { - cm->refresh_frame_context = vp9_rb_read_bit(rb); - cm->frame_parallel_decoding_mode = vp9_rb_read_bit(rb); - } else { - cm->refresh_frame_context = 0; - cm->frame_parallel_decoding_mode = 1; - } - - // This flag will be overridden by the call to vp9_setup_past_independence - // below, forcing the use of context 0 for those frame types. - cm->frame_context_idx = vp9_rb_read_literal(rb, FRAME_CONTEXTS_LOG2); - - if (frame_is_intra_only(cm) || cm->error_resilient_mode) - vp9_setup_past_independence(cm); - - setup_loopfilter(&cm->lf, rb); - setup_quantization(cm, &pbi->mb, rb); - setup_segmentation(&cm->seg, rb); - - setup_tile_info(cm, rb); - sz = vp9_rb_read_literal(rb, 16); - - if (sz == 0) - vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, - "Invalid header size"); - - return sz; -} - -static int read_compressed_header(VP9Decoder *pbi, const uint8_t *data, - size_t partition_size) { - VP9_COMMON *const cm = &pbi->common; - MACROBLOCKD *const xd = &pbi->mb; - FRAME_CONTEXT *const fc = &cm->fc; - vp9_reader r; - int k; - - if (vp9_reader_init(&r, data, partition_size, pbi->decrypt_cb, - pbi->decrypt_state)) - vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, - "Failed to allocate bool decoder 0"); - - cm->tx_mode = xd->lossless ? ONLY_4X4 : read_tx_mode(&r); - if (cm->tx_mode == TX_MODE_SELECT) - read_tx_mode_probs(&fc->tx_probs, &r); - read_coef_probs(fc, cm->tx_mode, &r); - - for (k = 0; k < SKIP_CONTEXTS; ++k) - vp9_diff_update_prob(&r, &fc->skip_probs[k]); - - if (!frame_is_intra_only(cm)) { - nmv_context *const nmvc = &fc->nmvc; - int i, j; - - read_inter_mode_probs(fc, &r); - - if (cm->interp_filter == SWITCHABLE) - read_switchable_interp_probs(fc, &r); - - for (i = 0; i < INTRA_INTER_CONTEXTS; i++) - vp9_diff_update_prob(&r, &fc->intra_inter_prob[i]); - - cm->reference_mode = read_frame_reference_mode(cm, &r); - if (cm->reference_mode != SINGLE_REFERENCE) - setup_compound_reference_mode(cm); - read_frame_reference_mode_probs(cm, &r); - - for (j = 0; j < BLOCK_SIZE_GROUPS; j++) - for (i = 0; i < INTRA_MODES - 1; ++i) - vp9_diff_update_prob(&r, &fc->y_mode_prob[j][i]); - - for (j = 0; j < PARTITION_CONTEXTS; ++j) - for (i = 0; i < PARTITION_TYPES - 1; ++i) - vp9_diff_update_prob(&r, &fc->partition_prob[j][i]); - - read_mv_probs(nmvc, cm->allow_high_precision_mv, &r); - } - - return vp9_reader_has_error(&r); -} - -void vp9_init_dequantizer(VP9_COMMON *cm) { - int q; - - for (q = 0; q < QINDEX_RANGE; q++) { - cm->y_dequant[q][0] = vp9_dc_quant(q, cm->y_dc_delta_q, cm->bit_depth); - cm->y_dequant[q][1] = vp9_ac_quant(q, 0, cm->bit_depth); - - cm->uv_dequant[q][0] = vp9_dc_quant(q, cm->uv_dc_delta_q, cm->bit_depth); - cm->uv_dequant[q][1] = vp9_ac_quant(q, cm->uv_ac_delta_q, cm->bit_depth); - } -} - -#ifdef NDEBUG -#define debug_check_frame_counts(cm) (void)0 -#else // !NDEBUG -// Counts should only be incremented when frame_parallel_decoding_mode and -// error_resilient_mode are disabled. -static void debug_check_frame_counts(const VP9_COMMON *const cm) { - FRAME_COUNTS zero_counts; - vp9_zero(zero_counts); - assert(cm->frame_parallel_decoding_mode || cm->error_resilient_mode); - assert(!memcmp(cm->counts.y_mode, zero_counts.y_mode, - sizeof(cm->counts.y_mode))); - assert(!memcmp(cm->counts.uv_mode, zero_counts.uv_mode, - sizeof(cm->counts.uv_mode))); - assert(!memcmp(cm->counts.partition, zero_counts.partition, - sizeof(cm->counts.partition))); - assert(!memcmp(cm->counts.coef, zero_counts.coef, - sizeof(cm->counts.coef))); - assert(!memcmp(cm->counts.eob_branch, zero_counts.eob_branch, - sizeof(cm->counts.eob_branch))); - assert(!memcmp(cm->counts.switchable_interp, zero_counts.switchable_interp, - sizeof(cm->counts.switchable_interp))); - assert(!memcmp(cm->counts.inter_mode, zero_counts.inter_mode, - sizeof(cm->counts.inter_mode))); - assert(!memcmp(cm->counts.intra_inter, zero_counts.intra_inter, - sizeof(cm->counts.intra_inter))); - assert(!memcmp(cm->counts.comp_inter, zero_counts.comp_inter, - sizeof(cm->counts.comp_inter))); - assert(!memcmp(cm->counts.single_ref, zero_counts.single_ref, - sizeof(cm->counts.single_ref))); - assert(!memcmp(cm->counts.comp_ref, zero_counts.comp_ref, - sizeof(cm->counts.comp_ref))); - assert(!memcmp(&cm->counts.tx, &zero_counts.tx, sizeof(cm->counts.tx))); - assert(!memcmp(cm->counts.skip, zero_counts.skip, sizeof(cm->counts.skip))); - assert(!memcmp(&cm->counts.mv, &zero_counts.mv, sizeof(cm->counts.mv))); -} -#endif // NDEBUG - -static struct vp9_read_bit_buffer* init_read_bit_buffer( - VP9Decoder *pbi, - struct vp9_read_bit_buffer *rb, - const uint8_t *data, - const uint8_t *data_end, - uint8_t *clear_data /* buffer size MAX_VP9_HEADER_SIZE */) { - rb->bit_offset = 0; - rb->error_handler = error_handler; - rb->error_handler_data = &pbi->common; - if (pbi->decrypt_cb) { - const int n = (int)MIN(MAX_VP9_HEADER_SIZE, data_end - data); - pbi->decrypt_cb(pbi->decrypt_state, data, clear_data, n); - rb->bit_buffer = clear_data; - rb->bit_buffer_end = clear_data + n; - } else { - rb->bit_buffer = data; - rb->bit_buffer_end = data_end; - } - return rb; -} - -void vp9_decode_frame(VP9Decoder *pbi, - const uint8_t *data, const uint8_t *data_end, - const uint8_t **p_data_end) { - VP9_COMMON *const cm = &pbi->common; - MACROBLOCKD *const xd = &pbi->mb; - struct vp9_read_bit_buffer rb = { NULL, NULL, 0, NULL, 0}; - - uint8_t clear_data[MAX_VP9_HEADER_SIZE]; - const size_t first_partition_size = read_uncompressed_header(pbi, - init_read_bit_buffer(pbi, &rb, data, data_end, clear_data)); - const int tile_rows = 1 << cm->log2_tile_rows; - const int tile_cols = 1 << cm->log2_tile_cols; - YV12_BUFFER_CONFIG *const new_fb = get_frame_new_buffer(cm); - xd->cur_buf = new_fb; - - if (!first_partition_size) { - // showing a frame directly - *p_data_end = data + (cm->profile <= PROFILE_2 ? 1 : 2); - return; - } - - data += vp9_rb_bytes_read(&rb); - if (!read_is_valid(data, first_partition_size, data_end)) - vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, - "Truncated packet or corrupt header length"); - - init_macroblockd(cm, &pbi->mb); - - if (!cm->error_resilient_mode) - set_prev_mi(cm); - else - cm->prev_mi = NULL; - - setup_plane_dequants(cm, xd, cm->base_qindex); - vp9_setup_block_planes(xd, cm->subsampling_x, cm->subsampling_y); - - cm->fc = cm->frame_contexts[cm->frame_context_idx]; - vp9_zero(cm->counts); - vp9_zero(xd->dqcoeff); - - xd->corrupted = 0; - new_fb->corrupted = read_compressed_header(pbi, data, first_partition_size); - - // TODO(jzern): remove frame_parallel_decoding_mode restriction for - // single-frame tile decoding. - if (pbi->max_threads > 1 && tile_rows == 1 && tile_cols > 1 && - cm->frame_parallel_decoding_mode) { - *p_data_end = decode_tiles_mt(pbi, data + first_partition_size, data_end); - if (!xd->corrupted) { - // If multiple threads are used to decode tiles, then we use those threads - // to do parallel loopfiltering. - vp9_loop_filter_frame_mt(new_fb, pbi, cm, cm->lf.filter_level, 0); - } - } else { - *p_data_end = decode_tiles(pbi, data + first_partition_size, data_end); - } - - new_fb->corrupted |= xd->corrupted; - - if (!new_fb->corrupted) { - if (!cm->error_resilient_mode && !cm->frame_parallel_decoding_mode) { - vp9_adapt_coef_probs(cm); - - if (!frame_is_intra_only(cm)) { - vp9_adapt_mode_probs(cm); - vp9_adapt_mv_probs(cm, cm->allow_high_precision_mv); - } - } else { - debug_check_frame_counts(cm); - } - } else { - vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, - "Decode failed. Frame data is corrupted."); - } - - if (cm->refresh_frame_context) - cm->frame_contexts[cm->frame_context_idx] = cm->fc; -} diff --git a/media/libvpx/vp9/decoder/vp9_decodeframe.h b/media/libvpx/vp9/decoder/vp9_decodeframe.h deleted file mode 100644 index 10a9e34629b..00000000000 --- a/media/libvpx/vp9/decoder/vp9_decodeframe.h +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP9_DECODER_VP9_DECODEFRAME_H_ -#define VP9_DECODER_VP9_DECODEFRAME_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -struct VP9Common; -struct VP9Decoder; -struct vp9_read_bit_buffer; - -void vp9_init_dequantizer(struct VP9Common *cm); - -void vp9_decode_frame(struct VP9Decoder *pbi, - const uint8_t *data, const uint8_t *data_end, - const uint8_t **p_data_end); - -int vp9_read_sync_code(struct vp9_read_bit_buffer *const rb); -void vp9_read_frame_size(struct vp9_read_bit_buffer *rb, - int *width, int *height); -BITSTREAM_PROFILE vp9_read_profile(struct vp9_read_bit_buffer *rb); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_DECODER_VP9_DECODEFRAME_H_ diff --git a/media/libvpx/vp9/decoder/vp9_decoder.c b/media/libvpx/vp9/decoder/vp9_decoder.c deleted file mode 100644 index 6ee3d7037fa..00000000000 --- a/media/libvpx/vp9/decoder/vp9_decoder.c +++ /dev/null @@ -1,382 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include -#include -#include - -#include "./vpx_scale_rtcd.h" - -#include "vpx_mem/vpx_mem.h" -#include "vpx_ports/vpx_timer.h" -#include "vpx_scale/vpx_scale.h" - -#include "vp9/common/vp9_alloccommon.h" -#include "vp9/common/vp9_loopfilter.h" -#include "vp9/common/vp9_onyxc_int.h" -#if CONFIG_VP9_POSTPROC -#include "vp9/common/vp9_postproc.h" -#endif -#include "vp9/common/vp9_quant_common.h" -#include "vp9/common/vp9_reconintra.h" -#include "vp9/common/vp9_systemdependent.h" - -#include "vp9/decoder/vp9_decodeframe.h" -#include "vp9/decoder/vp9_decoder.h" -#include "vp9/decoder/vp9_detokenize.h" -#include "vp9/decoder/vp9_dthread.h" - -static void initialize_dec() { - static int init_done = 0; - - if (!init_done) { - vp9_rtcd(); - vp9_init_neighbors(); - vp9_init_intra_predictors(); - init_done = 1; - } -} - -VP9Decoder *vp9_decoder_create() { - VP9Decoder *const pbi = vpx_memalign(32, sizeof(*pbi)); - VP9_COMMON *const cm = pbi ? &pbi->common : NULL; - - if (!cm) - return NULL; - - vp9_zero(*pbi); - - if (setjmp(cm->error.jmp)) { - cm->error.setjmp = 0; - vp9_decoder_remove(pbi); - return NULL; - } - - cm->error.setjmp = 1; - pbi->need_resync = 1; - initialize_dec(); - - // Initialize the references to not point to any frame buffers. - vpx_memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map)); - - cm->current_video_frame = 0; - pbi->ready_for_new_data = 1; - cm->bit_depth = VPX_BITS_8; - - // vp9_init_dequantizer() is first called here. Add check in - // frame_init_dequantizer() to avoid unnecessary calling of - // vp9_init_dequantizer() for every frame. - vp9_init_dequantizer(cm); - - vp9_loop_filter_init(cm); - - cm->error.setjmp = 0; - - vp9_get_worker_interface()->init(&pbi->lf_worker); - - return pbi; -} - -void vp9_decoder_remove(VP9Decoder *pbi) { - VP9_COMMON *const cm = &pbi->common; - int i; - - vp9_get_worker_interface()->end(&pbi->lf_worker); - vpx_free(pbi->lf_worker.data1); - vpx_free(pbi->tile_data); - for (i = 0; i < pbi->num_tile_workers; ++i) { - VP9Worker *const worker = &pbi->tile_workers[i]; - vp9_get_worker_interface()->end(worker); - vpx_free(worker->data1); - vpx_free(worker->data2); - } - vpx_free(pbi->tile_workers); - - if (pbi->num_tile_workers > 0) { - vp9_loop_filter_dealloc(&pbi->lf_row_sync); - } - - vp9_remove_common(cm); - vpx_free(pbi); -} - -static int equal_dimensions(const YV12_BUFFER_CONFIG *a, - const YV12_BUFFER_CONFIG *b) { - return a->y_height == b->y_height && a->y_width == b->y_width && - a->uv_height == b->uv_height && a->uv_width == b->uv_width; -} - -vpx_codec_err_t vp9_copy_reference_dec(VP9Decoder *pbi, - VP9_REFFRAME ref_frame_flag, - YV12_BUFFER_CONFIG *sd) { - VP9_COMMON *cm = &pbi->common; - - /* TODO(jkoleszar): The decoder doesn't have any real knowledge of what the - * encoder is using the frame buffers for. This is just a stub to keep the - * vpxenc --test-decode functionality working, and will be replaced in a - * later commit that adds VP9-specific controls for this functionality. - */ - if (ref_frame_flag == VP9_LAST_FLAG) { - const YV12_BUFFER_CONFIG *const cfg = get_ref_frame(cm, 0); - if (cfg == NULL) { - vpx_internal_error(&cm->error, VPX_CODEC_ERROR, - "No 'last' reference frame"); - return VPX_CODEC_ERROR; - } - if (!equal_dimensions(cfg, sd)) - vpx_internal_error(&cm->error, VPX_CODEC_ERROR, - "Incorrect buffer dimensions"); - else - vp8_yv12_copy_frame(cfg, sd); - } else { - vpx_internal_error(&cm->error, VPX_CODEC_ERROR, - "Invalid reference frame"); - } - - return cm->error.error_code; -} - - -vpx_codec_err_t vp9_set_reference_dec(VP9_COMMON *cm, - VP9_REFFRAME ref_frame_flag, - YV12_BUFFER_CONFIG *sd) { - RefBuffer *ref_buf = NULL; - - // TODO(jkoleszar): The decoder doesn't have any real knowledge of what the - // encoder is using the frame buffers for. This is just a stub to keep the - // vpxenc --test-decode functionality working, and will be replaced in a - // later commit that adds VP9-specific controls for this functionality. - if (ref_frame_flag == VP9_LAST_FLAG) { - ref_buf = &cm->frame_refs[0]; - } else if (ref_frame_flag == VP9_GOLD_FLAG) { - ref_buf = &cm->frame_refs[1]; - } else if (ref_frame_flag == VP9_ALT_FLAG) { - ref_buf = &cm->frame_refs[2]; - } else { - vpx_internal_error(&cm->error, VPX_CODEC_ERROR, - "Invalid reference frame"); - return cm->error.error_code; - } - - if (!equal_dimensions(ref_buf->buf, sd)) { - vpx_internal_error(&cm->error, VPX_CODEC_ERROR, - "Incorrect buffer dimensions"); - } else { - int *ref_fb_ptr = &ref_buf->idx; - - // Find an empty frame buffer. - const int free_fb = get_free_fb(cm); - // Decrease ref_count since it will be increased again in - // ref_cnt_fb() below. - cm->frame_bufs[free_fb].ref_count--; - - // Manage the reference counters and copy image. - ref_cnt_fb(cm->frame_bufs, ref_fb_ptr, free_fb); - ref_buf->buf = &cm->frame_bufs[*ref_fb_ptr].buf; - vp8_yv12_copy_frame(sd, ref_buf->buf); - } - - return cm->error.error_code; -} - -/* If any buffer updating is signaled it should be done here. */ -static void swap_frame_buffers(VP9Decoder *pbi) { - int ref_index = 0, mask; - VP9_COMMON *const cm = &pbi->common; - - for (mask = pbi->refresh_frame_flags; mask; mask >>= 1) { - if (mask & 1) { - const int old_idx = cm->ref_frame_map[ref_index]; - ref_cnt_fb(cm->frame_bufs, &cm->ref_frame_map[ref_index], - cm->new_fb_idx); - if (old_idx >= 0 && cm->frame_bufs[old_idx].ref_count == 0) - cm->release_fb_cb(cm->cb_priv, - &cm->frame_bufs[old_idx].raw_frame_buffer); - } - ++ref_index; - } - - cm->frame_to_show = get_frame_new_buffer(cm); - cm->frame_bufs[cm->new_fb_idx].ref_count--; - - // Invalidate these references until the next frame starts. - for (ref_index = 0; ref_index < 3; ref_index++) - cm->frame_refs[ref_index].idx = INT_MAX; -} - -int vp9_receive_compressed_data(VP9Decoder *pbi, - size_t size, const uint8_t **psource) { - VP9_COMMON *const cm = &pbi->common; - const uint8_t *source = *psource; - int retcode = 0; - - cm->error.error_code = VPX_CODEC_OK; - - if (size == 0) { - // This is used to signal that we are missing frames. - // We do not know if the missing frame(s) was supposed to update - // any of the reference buffers, but we act conservative and - // mark only the last buffer as corrupted. - // - // TODO(jkoleszar): Error concealment is undefined and non-normative - // at this point, but if it becomes so, [0] may not always be the correct - // thing to do here. - if (cm->frame_refs[0].idx != INT_MAX) - cm->frame_refs[0].buf->corrupted = 1; - } - - // Check if the previous frame was a frame without any references to it. - if (cm->new_fb_idx >= 0 && cm->frame_bufs[cm->new_fb_idx].ref_count == 0) - cm->release_fb_cb(cm->cb_priv, - &cm->frame_bufs[cm->new_fb_idx].raw_frame_buffer); - cm->new_fb_idx = get_free_fb(cm); - - if (setjmp(cm->error.jmp)) { - pbi->need_resync = 1; - cm->error.setjmp = 0; - vp9_clear_system_state(); - - // We do not know if the missing frame(s) was supposed to update - // any of the reference buffers, but we act conservative and - // mark only the last buffer as corrupted. - // - // TODO(jkoleszar): Error concealment is undefined and non-normative - // at this point, but if it becomes so, [0] may not always be the correct - // thing to do here. - if (cm->frame_refs[0].idx != INT_MAX && cm->frame_refs[0].buf != NULL) - cm->frame_refs[0].buf->corrupted = 1; - - if (cm->new_fb_idx > 0 && cm->frame_bufs[cm->new_fb_idx].ref_count > 0) - cm->frame_bufs[cm->new_fb_idx].ref_count--; - - return -1; - } - - cm->error.setjmp = 1; - - vp9_decode_frame(pbi, source, source + size, psource); - - swap_frame_buffers(pbi); - - vp9_clear_system_state(); - - cm->last_width = cm->width; - cm->last_height = cm->height; - - if (!cm->show_existing_frame) - cm->last_show_frame = cm->show_frame; - if (cm->show_frame) { - if (!cm->show_existing_frame) - vp9_swap_mi_and_prev_mi(cm); - - cm->current_video_frame++; - } - - pbi->ready_for_new_data = 0; - - cm->error.setjmp = 0; - return retcode; -} - -int vp9_get_raw_frame(VP9Decoder *pbi, YV12_BUFFER_CONFIG *sd, - vp9_ppflags_t *flags) { - VP9_COMMON *const cm = &pbi->common; - int ret = -1; -#if !CONFIG_VP9_POSTPROC - (void)*flags; -#endif - - if (pbi->ready_for_new_data == 1) - return ret; - - /* no raw frame to show!!! */ - if (!cm->show_frame) - return ret; - - pbi->ready_for_new_data = 1; - -#if CONFIG_VP9_POSTPROC - if (!cm->show_existing_frame) { - ret = vp9_post_proc_frame(cm, sd, flags); - } else { - *sd = *cm->frame_to_show; - ret = 0; - } -#else - *sd = *cm->frame_to_show; - ret = 0; -#endif /*!CONFIG_POSTPROC*/ - vp9_clear_system_state(); - return ret; -} - -vpx_codec_err_t vp9_parse_superframe_index(const uint8_t *data, - size_t data_sz, - uint32_t sizes[8], int *count, - vpx_decrypt_cb decrypt_cb, - void *decrypt_state) { - // A chunk ending with a byte matching 0xc0 is an invalid chunk unless - // it is a super frame index. If the last byte of real video compression - // data is 0xc0 the encoder must add a 0 byte. If we have the marker but - // not the associated matching marker byte at the front of the index we have - // an invalid bitstream and need to return an error. - - uint8_t marker; - - assert(data_sz); - marker = read_marker(decrypt_cb, decrypt_state, data + data_sz - 1); - *count = 0; - - if ((marker & 0xe0) == 0xc0) { - const uint32_t frames = (marker & 0x7) + 1; - const uint32_t mag = ((marker >> 3) & 0x3) + 1; - const size_t index_sz = 2 + mag * frames; - - // This chunk is marked as having a superframe index but doesn't have - // enough data for it, thus it's an invalid superframe index. - if (data_sz < index_sz) - return VPX_CODEC_CORRUPT_FRAME; - - { - const uint8_t marker2 = read_marker(decrypt_cb, decrypt_state, - data + data_sz - index_sz); - - // This chunk is marked as having a superframe index but doesn't have - // the matching marker byte at the front of the index therefore it's an - // invalid chunk. - if (marker != marker2) - return VPX_CODEC_CORRUPT_FRAME; - } - - { - // Found a valid superframe index. - uint32_t i, j; - const uint8_t *x = &data[data_sz - index_sz + 1]; - - // Frames has a maximum of 8 and mag has a maximum of 4. - uint8_t clear_buffer[32]; - assert(sizeof(clear_buffer) >= frames * mag); - if (decrypt_cb) { - decrypt_cb(decrypt_state, x, clear_buffer, frames * mag); - x = clear_buffer; - } - - for (i = 0; i < frames; ++i) { - uint32_t this_sz = 0; - - for (j = 0; j < mag; ++j) - this_sz |= (*x++) << (j * 8); - sizes[i] = this_sz; - } - *count = frames; - } - } - return VPX_CODEC_OK; -} diff --git a/media/libvpx/vp9/decoder/vp9_decoder.h b/media/libvpx/vp9/decoder/vp9_decoder.h deleted file mode 100644 index 4f52bb9c473..00000000000 --- a/media/libvpx/vp9/decoder/vp9_decoder.h +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_DECODER_VP9_DECODER_H_ -#define VP9_DECODER_VP9_DECODER_H_ - -#include "./vpx_config.h" - -#include "vpx/vpx_codec.h" -#include "vpx_scale/yv12config.h" - -#include "vp9/common/vp9_onyxc_int.h" -#include "vp9/common/vp9_ppflags.h" -#include "vp9/common/vp9_thread.h" - -#include "vp9/decoder/vp9_dthread.h" - -#ifdef __cplusplus -extern "C" { -#endif - -// TODO(hkuang): combine this with TileWorkerData. -typedef struct TileData { - VP9_COMMON *cm; - vp9_reader bit_reader; - DECLARE_ALIGNED(16, MACROBLOCKD, xd); -} TileData; - -typedef struct VP9Decoder { - DECLARE_ALIGNED(16, MACROBLOCKD, mb); - - DECLARE_ALIGNED(16, VP9_COMMON, common); - - int ready_for_new_data; - - int refresh_frame_flags; - - int frame_parallel_decode; // frame-based threading. - - VP9Worker lf_worker; - VP9Worker *tile_workers; - int num_tile_workers; - - TileData *tile_data; - int total_tiles; - - VP9LfSync lf_row_sync; - - vpx_decrypt_cb decrypt_cb; - void *decrypt_state; - - int max_threads; - int inv_tile_order; - int need_resync; // wait for key/intra-only frame -} VP9Decoder; - -int vp9_receive_compressed_data(struct VP9Decoder *pbi, - size_t size, const uint8_t **dest); - -int vp9_get_raw_frame(struct VP9Decoder *pbi, YV12_BUFFER_CONFIG *sd, - vp9_ppflags_t *flags); - -vpx_codec_err_t vp9_copy_reference_dec(struct VP9Decoder *pbi, - VP9_REFFRAME ref_frame_flag, - YV12_BUFFER_CONFIG *sd); - -vpx_codec_err_t vp9_set_reference_dec(VP9_COMMON *cm, - VP9_REFFRAME ref_frame_flag, - YV12_BUFFER_CONFIG *sd); - -struct VP9Decoder *vp9_decoder_create(); - -void vp9_decoder_remove(struct VP9Decoder *pbi); - -static INLINE uint8_t read_marker(vpx_decrypt_cb decrypt_cb, - void *decrypt_state, - const uint8_t *data) { - if (decrypt_cb) { - uint8_t marker; - decrypt_cb(decrypt_state, data, &marker, 1); - return marker; - } - return *data; -} - -// This function is exposed for use in tests, as well as the inlined function -// "read_marker". -vpx_codec_err_t vp9_parse_superframe_index(const uint8_t *data, - size_t data_sz, - uint32_t sizes[8], int *count, - vpx_decrypt_cb decrypt_cb, - void *decrypt_state); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_DECODER_VP9_DECODER_H_ diff --git a/media/libvpx/vp9/decoder/vp9_dthread.c b/media/libvpx/vp9/decoder/vp9_dthread.c deleted file mode 100644 index 62ea6c14d25..00000000000 --- a/media/libvpx/vp9/decoder/vp9_dthread.c +++ /dev/null @@ -1,269 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "./vpx_config.h" - -#include "vpx_mem/vpx_mem.h" - -#include "vp9/common/vp9_reconinter.h" - -#include "vp9/decoder/vp9_dthread.h" -#include "vp9/decoder/vp9_decoder.h" - -#if CONFIG_MULTITHREAD -static INLINE void mutex_lock(pthread_mutex_t *const mutex) { - const int kMaxTryLocks = 4000; - int locked = 0; - int i; - - for (i = 0; i < kMaxTryLocks; ++i) { - if (!pthread_mutex_trylock(mutex)) { - locked = 1; - break; - } - } - - if (!locked) - pthread_mutex_lock(mutex); -} -#endif // CONFIG_MULTITHREAD - -static INLINE void sync_read(VP9LfSync *const lf_sync, int r, int c) { -#if CONFIG_MULTITHREAD - const int nsync = lf_sync->sync_range; - - if (r && !(c & (nsync - 1))) { - pthread_mutex_t *const mutex = &lf_sync->mutex_[r - 1]; - mutex_lock(mutex); - - while (c > lf_sync->cur_sb_col[r - 1] - nsync) { - pthread_cond_wait(&lf_sync->cond_[r - 1], mutex); - } - pthread_mutex_unlock(mutex); - } -#else - (void)lf_sync; - (void)r; - (void)c; -#endif // CONFIG_MULTITHREAD -} - -static INLINE void sync_write(VP9LfSync *const lf_sync, int r, int c, - const int sb_cols) { -#if CONFIG_MULTITHREAD - const int nsync = lf_sync->sync_range; - int cur; - // Only signal when there are enough filtered SB for next row to run. - int sig = 1; - - if (c < sb_cols - 1) { - cur = c; - if (c % nsync) - sig = 0; - } else { - cur = sb_cols + nsync; - } - - if (sig) { - mutex_lock(&lf_sync->mutex_[r]); - - lf_sync->cur_sb_col[r] = cur; - - pthread_cond_signal(&lf_sync->cond_[r]); - pthread_mutex_unlock(&lf_sync->mutex_[r]); - } -#else - (void)lf_sync; - (void)r; - (void)c; - (void)sb_cols; -#endif // CONFIG_MULTITHREAD -} - -// Implement row loopfiltering for each thread. -static void loop_filter_rows_mt(const YV12_BUFFER_CONFIG *const frame_buffer, - VP9_COMMON *const cm, - struct macroblockd_plane planes[MAX_MB_PLANE], - int start, int stop, int y_only, - VP9LfSync *const lf_sync, int num_lf_workers) { - const int num_planes = y_only ? 1 : MAX_MB_PLANE; - int r, c; // SB row and col - const int sb_cols = mi_cols_aligned_to_sb(cm->mi_cols) >> MI_BLOCK_SIZE_LOG2; - - for (r = start; r < stop; r += num_lf_workers) { - const int mi_row = r << MI_BLOCK_SIZE_LOG2; - MODE_INFO *const mi = cm->mi + mi_row * cm->mi_stride; - - for (c = 0; c < sb_cols; ++c) { - const int mi_col = c << MI_BLOCK_SIZE_LOG2; - LOOP_FILTER_MASK lfm; - int plane; - - sync_read(lf_sync, r, c); - - vp9_setup_dst_planes(planes, frame_buffer, mi_row, mi_col); - vp9_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride, &lfm); - - for (plane = 0; plane < num_planes; ++plane) { - vp9_filter_block_plane(cm, &planes[plane], mi_row, &lfm); - } - - sync_write(lf_sync, r, c, sb_cols); - } - } -} - -// Row-based multi-threaded loopfilter hook -static int loop_filter_row_worker(TileWorkerData *const tile_data, - void *unused) { - LFWorkerData *const lf_data = &tile_data->lfdata; - (void)unused; - loop_filter_rows_mt(lf_data->frame_buffer, lf_data->cm, lf_data->planes, - lf_data->start, lf_data->stop, lf_data->y_only, - lf_data->lf_sync, lf_data->num_lf_workers); - return 1; -} - -// VP9 decoder: Implement multi-threaded loopfilter that uses the tile -// threads. -void vp9_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, - VP9Decoder *pbi, VP9_COMMON *cm, - int frame_filter_level, - int y_only) { - VP9LfSync *const lf_sync = &pbi->lf_row_sync; - const VP9WorkerInterface *const winterface = vp9_get_worker_interface(); - // Number of superblock rows and cols - const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2; - const int tile_cols = 1 << cm->log2_tile_cols; - const int num_workers = MIN(pbi->max_threads & ~1, tile_cols); - int i; - - if (!frame_filter_level) return; - - if (!lf_sync->sync_range || cm->last_height != cm->height) { - vp9_loop_filter_dealloc(lf_sync); - vp9_loop_filter_alloc(lf_sync, cm, sb_rows, cm->width); - } - - vp9_loop_filter_frame_init(cm, frame_filter_level); - - // Initialize cur_sb_col to -1 for all SB rows. - vpx_memset(lf_sync->cur_sb_col, -1, sizeof(*lf_sync->cur_sb_col) * sb_rows); - - // Set up loopfilter thread data. - // The decoder is using num_workers instead of pbi->num_tile_workers - // because it has been observed that using more threads on the - // loopfilter, than there are tile columns in the frame will hurt - // performance on Android. This is because the system will only - // schedule the tile decode workers on cores equal to the number - // of tile columns. Then if the decoder tries to use more threads for the - // loopfilter, it will hurt performance because of contention. If the - // multithreading code changes in the future then the number of workers - // used by the loopfilter should be revisited. - for (i = 0; i < num_workers; ++i) { - VP9Worker *const worker = &pbi->tile_workers[i]; - TileWorkerData *const tile_data = (TileWorkerData*)worker->data1; - LFWorkerData *const lf_data = &tile_data->lfdata; - - worker->hook = (VP9WorkerHook)loop_filter_row_worker; - - // Loopfilter data - lf_data->frame_buffer = frame; - lf_data->cm = cm; - vp9_copy(lf_data->planes, pbi->mb.plane); - lf_data->start = i; - lf_data->stop = sb_rows; - lf_data->y_only = y_only; // always do all planes in decoder - - lf_data->lf_sync = lf_sync; - lf_data->num_lf_workers = num_workers; - - // Start loopfiltering - if (i == num_workers - 1) { - winterface->execute(worker); - } else { - winterface->launch(worker); - } - } - - // Wait till all rows are finished - for (i = 0; i < num_workers; ++i) { - winterface->sync(&pbi->tile_workers[i]); - } -} - -// Set up nsync by width. -static int get_sync_range(int width) { - // nsync numbers are picked by testing. For example, for 4k - // video, using 4 gives best performance. - if (width < 640) - return 1; - else if (width <= 1280) - return 2; - else if (width <= 4096) - return 4; - else - return 8; -} - -// Allocate memory for lf row synchronization -void vp9_loop_filter_alloc(VP9LfSync *lf_sync, VP9_COMMON *cm, int rows, - int width) { - lf_sync->rows = rows; -#if CONFIG_MULTITHREAD - { - int i; - - CHECK_MEM_ERROR(cm, lf_sync->mutex_, - vpx_malloc(sizeof(*lf_sync->mutex_) * rows)); - for (i = 0; i < rows; ++i) { - pthread_mutex_init(&lf_sync->mutex_[i], NULL); - } - - CHECK_MEM_ERROR(cm, lf_sync->cond_, - vpx_malloc(sizeof(*lf_sync->cond_) * rows)); - for (i = 0; i < rows; ++i) { - pthread_cond_init(&lf_sync->cond_[i], NULL); - } - } -#endif // CONFIG_MULTITHREAD - - CHECK_MEM_ERROR(cm, lf_sync->cur_sb_col, - vpx_malloc(sizeof(*lf_sync->cur_sb_col) * rows)); - - // Set up nsync. - lf_sync->sync_range = get_sync_range(width); -} - -// Deallocate lf synchronization related mutex and data -void vp9_loop_filter_dealloc(VP9LfSync *lf_sync) { - if (lf_sync != NULL) { -#if CONFIG_MULTITHREAD - int i; - - if (lf_sync->mutex_ != NULL) { - for (i = 0; i < lf_sync->rows; ++i) { - pthread_mutex_destroy(&lf_sync->mutex_[i]); - } - vpx_free(lf_sync->mutex_); - } - if (lf_sync->cond_ != NULL) { - for (i = 0; i < lf_sync->rows; ++i) { - pthread_cond_destroy(&lf_sync->cond_[i]); - } - vpx_free(lf_sync->cond_); - } -#endif // CONFIG_MULTITHREAD - vpx_free(lf_sync->cur_sb_col); - // clear the structure as the source of this call may be a resize in which - // case this call will be followed by an _alloc() which may fail. - vp9_zero(*lf_sync); - } -} diff --git a/media/libvpx/vp9/decoder/vp9_dthread.h b/media/libvpx/vp9/decoder/vp9_dthread.h deleted file mode 100644 index b1fbdeb74a0..00000000000 --- a/media/libvpx/vp9/decoder/vp9_dthread.h +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_DECODER_VP9_DTHREAD_H_ -#define VP9_DECODER_VP9_DTHREAD_H_ - -#include "./vpx_config.h" -#include "vp9/common/vp9_thread.h" -#include "vp9/decoder/vp9_reader.h" - -struct VP9Common; -struct VP9Decoder; - -typedef struct TileWorkerData { - struct VP9Common *cm; - vp9_reader bit_reader; - DECLARE_ALIGNED(16, struct macroblockd, xd); - - // Row-based parallel loopfilter data - LFWorkerData lfdata; -} TileWorkerData; - -// Loopfilter row synchronization -typedef struct VP9LfSyncData { -#if CONFIG_MULTITHREAD - pthread_mutex_t *mutex_; - pthread_cond_t *cond_; -#endif - // Allocate memory to store the loop-filtered superblock index in each row. - int *cur_sb_col; - // The optimal sync_range for different resolution and platform should be - // determined by testing. Currently, it is chosen to be a power-of-2 number. - int sync_range; - int rows; -} VP9LfSync; - -// Allocate memory for loopfilter row synchronization. -void vp9_loop_filter_alloc(VP9LfSync *lf_sync, VP9_COMMON *cm, int rows, - int width); - -// Deallocate loopfilter synchronization related mutex and data. -void vp9_loop_filter_dealloc(VP9LfSync *lf_sync); - -// Multi-threaded loopfilter that uses the tile threads. -void vp9_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, - struct VP9Decoder *pbi, - struct VP9Common *cm, - int frame_filter_level, - int y_only); - -#endif // VP9_DECODER_VP9_DTHREAD_H_ diff --git a/media/libvpx/vp9/decoder/vp9_read_bit_buffer.c b/media/libvpx/vp9/decoder/vp9_read_bit_buffer.c deleted file mode 100644 index 3eef72844c1..00000000000 --- a/media/libvpx/vp9/decoder/vp9_read_bit_buffer.c +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright (c) 2013 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ -#include "vp9/decoder/vp9_read_bit_buffer.h" - -size_t vp9_rb_bytes_read(struct vp9_read_bit_buffer *rb) { - return (rb->bit_offset + CHAR_BIT - 1) / CHAR_BIT; -} - -int vp9_rb_read_bit(struct vp9_read_bit_buffer *rb) { - const size_t off = rb->bit_offset; - const size_t p = off / CHAR_BIT; - const int q = CHAR_BIT - 1 - (int)off % CHAR_BIT; - if (rb->bit_buffer + p >= rb->bit_buffer_end) { - rb->error_handler(rb->error_handler_data); - return 0; - } else { - const int bit = (rb->bit_buffer[p] & (1 << q)) >> q; - rb->bit_offset = off + 1; - return bit; - } -} - -int vp9_rb_read_literal(struct vp9_read_bit_buffer *rb, int bits) { - int value = 0, bit; - for (bit = bits - 1; bit >= 0; bit--) - value |= vp9_rb_read_bit(rb) << bit; - return value; -} - -int vp9_rb_read_signed_literal(struct vp9_read_bit_buffer *rb, - int bits) { - const int value = vp9_rb_read_literal(rb, bits); - return vp9_rb_read_bit(rb) ? -value : value; -} diff --git a/media/libvpx/vp9/decoder/vp9_reader.c b/media/libvpx/vp9/decoder/vp9_reader.c deleted file mode 100644 index 6bb4f9f732b..00000000000 --- a/media/libvpx/vp9/decoder/vp9_reader.c +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "vpx_ports/mem.h" -#include "vpx_mem/vpx_mem.h" - -#include "vp9/decoder/vp9_reader.h" - -// This is meant to be a large, positive constant that can still be efficiently -// loaded as an immediate (on platforms like ARM, for example). -// Even relatively modest values like 100 would work fine. -#define LOTS_OF_BITS 0x40000000 - -int vp9_reader_init(vp9_reader *r, - const uint8_t *buffer, - size_t size, - vpx_decrypt_cb decrypt_cb, - void *decrypt_state) { - if (size && !buffer) { - return 1; - } else { - r->buffer_end = buffer + size; - r->buffer = buffer; - r->value = 0; - r->count = -8; - r->range = 255; - r->decrypt_cb = decrypt_cb; - r->decrypt_state = decrypt_state; - vp9_reader_fill(r); - return vp9_read_bit(r) != 0; // marker bit - } -} - -void vp9_reader_fill(vp9_reader *r) { - const uint8_t *const buffer_end = r->buffer_end; - const uint8_t *buffer = r->buffer; - const uint8_t *buffer_start = buffer; - BD_VALUE value = r->value; - int count = r->count; - int shift = BD_VALUE_SIZE - CHAR_BIT - (count + CHAR_BIT); - int loop_end = 0; - const size_t bytes_left = buffer_end - buffer; - const size_t bits_left = bytes_left * CHAR_BIT; - const int x = (int)(shift + CHAR_BIT - bits_left); - - if (r->decrypt_cb) { - size_t n = MIN(sizeof(r->clear_buffer), bytes_left); - r->decrypt_cb(r->decrypt_state, buffer, r->clear_buffer, (int)n); - buffer = r->clear_buffer; - buffer_start = r->clear_buffer; - } - - if (x >= 0) { - count += LOTS_OF_BITS; - loop_end = x; - } - - if (x < 0 || bits_left) { - while (shift >= loop_end) { - count += CHAR_BIT; - value |= (BD_VALUE)*buffer++ << shift; - shift -= CHAR_BIT; - } - } - - // NOTE: Variable 'buffer' may not relate to 'r->buffer' after decryption, - // so we increase 'r->buffer' by the amount that 'buffer' moved, rather than - // assign 'buffer' to 'r->buffer'. - r->buffer += buffer - buffer_start; - r->value = value; - r->count = count; -} - -const uint8_t *vp9_reader_find_end(vp9_reader *r) { - // Find the end of the coded buffer - while (r->count > CHAR_BIT && r->count < BD_VALUE_SIZE) { - r->count -= CHAR_BIT; - r->buffer--; - } - return r->buffer; -} - -int vp9_reader_has_error(vp9_reader *r) { - // Check if we have reached the end of the buffer. - // - // Variable 'count' stores the number of bits in the 'value' buffer, minus - // 8. The top byte is part of the algorithm, and the remainder is buffered - // to be shifted into it. So if count == 8, the top 16 bits of 'value' are - // occupied, 8 for the algorithm and 8 in the buffer. - // - // When reading a byte from the user's buffer, count is filled with 8 and - // one byte is filled into the value buffer. When we reach the end of the - // data, count is additionally filled with LOTS_OF_BITS. So when - // count == LOTS_OF_BITS - 1, the user's data has been exhausted. - // - // 1 if we have tried to decode bits after the end of stream was encountered. - // 0 No error. - return r->count > BD_VALUE_SIZE && r->count < LOTS_OF_BITS; -} diff --git a/media/libvpx/vp9/decoder/vp9_reader.h b/media/libvpx/vp9/decoder/vp9_reader.h deleted file mode 100644 index 2d9eccfbf93..00000000000 --- a/media/libvpx/vp9/decoder/vp9_reader.h +++ /dev/null @@ -1,119 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_DECODER_VP9_READER_H_ -#define VP9_DECODER_VP9_READER_H_ - -#include -#include - -#include "./vpx_config.h" -#include "vpx_ports/mem.h" -#include "vpx/vp8dx.h" -#include "vpx/vpx_integer.h" - -#include "vp9/common/vp9_prob.h" - -#ifdef __cplusplus -extern "C" { -#endif - -typedef size_t BD_VALUE; - -#define BD_VALUE_SIZE ((int)sizeof(BD_VALUE) * CHAR_BIT) - -typedef struct { - const uint8_t *buffer_end; - const uint8_t *buffer; - uint8_t clear_buffer[sizeof(BD_VALUE) + 1]; - BD_VALUE value; - int count; - unsigned int range; - vpx_decrypt_cb decrypt_cb; - void *decrypt_state; -} vp9_reader; - -int vp9_reader_init(vp9_reader *r, - const uint8_t *buffer, - size_t size, - vpx_decrypt_cb decrypt_cb, - void *decrypt_state); - -void vp9_reader_fill(vp9_reader *r); - -int vp9_reader_has_error(vp9_reader *r); - -const uint8_t *vp9_reader_find_end(vp9_reader *r); - -static INLINE int vp9_read(vp9_reader *r, int prob) { - unsigned int bit = 0; - BD_VALUE value; - BD_VALUE bigsplit; - int count; - unsigned int range; - unsigned int split = (r->range * prob + (256 - prob)) >> CHAR_BIT; - - if (r->count < 0) - vp9_reader_fill(r); - - value = r->value; - count = r->count; - - bigsplit = (BD_VALUE)split << (BD_VALUE_SIZE - CHAR_BIT); - - range = split; - - if (value >= bigsplit) { - range = r->range - split; - value = value - bigsplit; - bit = 1; - } - - { - register unsigned int shift = vp9_norm[range]; - range <<= shift; - value <<= shift; - count -= shift; - } - r->value = value; - r->count = count; - r->range = range; - - return bit; -} - -static INLINE int vp9_read_bit(vp9_reader *r) { - return vp9_read(r, 128); // vp9_prob_half -} - -static INLINE int vp9_read_literal(vp9_reader *r, int bits) { - int literal = 0, bit; - - for (bit = bits - 1; bit >= 0; bit--) - literal |= vp9_read_bit(r) << bit; - - return literal; -} - -static INLINE int vp9_read_tree(vp9_reader *r, const vp9_tree_index *tree, - const vp9_prob *probs) { - vp9_tree_index i = 0; - - while ((i = tree[i + vp9_read(r, probs[i >> 1])]) > 0) - continue; - - return -i; -} - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_DECODER_VP9_READER_H_ diff --git a/media/libvpx/vp9/encoder/arm/neon/vp9_dct_neon.c b/media/libvpx/vp9/encoder/arm/neon/vp9_dct_neon.c deleted file mode 100644 index 6c66f5d5bc9..00000000000 --- a/media/libvpx/vp9/encoder/arm/neon/vp9_dct_neon.c +++ /dev/null @@ -1,223 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include -#include "./vp9_rtcd.h" -#include "./vpx_config.h" - -#include "vp9/common/vp9_blockd.h" -#include "vp9/common/vp9_idct.h" - -void vp9_fdct8x8_1_neon(const int16_t *input, int16_t *output, int stride) { - int r; - int16x8_t sum = vld1q_s16(&input[0]); - for (r = 1; r < 8; ++r) { - const int16x8_t input_00 = vld1q_s16(&input[r * stride]); - sum = vaddq_s16(sum, input_00); - } - { - const int32x4_t a = vpaddlq_s16(sum); - const int64x2_t b = vpaddlq_s32(a); - const int32x2_t c = vadd_s32(vreinterpret_s32_s64(vget_low_s64(b)), - vreinterpret_s32_s64(vget_high_s64(b))); - output[0] = vget_lane_s16(vreinterpret_s16_s32(c), 0); - output[1] = 0; - } -} - -void vp9_fdct8x8_neon(const int16_t *input, int16_t *final_output, int stride) { - int i; - // stage 1 - int16x8_t input_0 = vshlq_n_s16(vld1q_s16(&input[0 * stride]), 2); - int16x8_t input_1 = vshlq_n_s16(vld1q_s16(&input[1 * stride]), 2); - int16x8_t input_2 = vshlq_n_s16(vld1q_s16(&input[2 * stride]), 2); - int16x8_t input_3 = vshlq_n_s16(vld1q_s16(&input[3 * stride]), 2); - int16x8_t input_4 = vshlq_n_s16(vld1q_s16(&input[4 * stride]), 2); - int16x8_t input_5 = vshlq_n_s16(vld1q_s16(&input[5 * stride]), 2); - int16x8_t input_6 = vshlq_n_s16(vld1q_s16(&input[6 * stride]), 2); - int16x8_t input_7 = vshlq_n_s16(vld1q_s16(&input[7 * stride]), 2); - for (i = 0; i < 2; ++i) { - int16x8_t out_0, out_1, out_2, out_3, out_4, out_5, out_6, out_7; - const int16x8_t v_s0 = vaddq_s16(input_0, input_7); - const int16x8_t v_s1 = vaddq_s16(input_1, input_6); - const int16x8_t v_s2 = vaddq_s16(input_2, input_5); - const int16x8_t v_s3 = vaddq_s16(input_3, input_4); - const int16x8_t v_s4 = vsubq_s16(input_3, input_4); - const int16x8_t v_s5 = vsubq_s16(input_2, input_5); - const int16x8_t v_s6 = vsubq_s16(input_1, input_6); - const int16x8_t v_s7 = vsubq_s16(input_0, input_7); - // fdct4(step, step); - int16x8_t v_x0 = vaddq_s16(v_s0, v_s3); - int16x8_t v_x1 = vaddq_s16(v_s1, v_s2); - int16x8_t v_x2 = vsubq_s16(v_s1, v_s2); - int16x8_t v_x3 = vsubq_s16(v_s0, v_s3); - // fdct4(step, step); - int32x4_t v_t0_lo = vaddl_s16(vget_low_s16(v_x0), vget_low_s16(v_x1)); - int32x4_t v_t0_hi = vaddl_s16(vget_high_s16(v_x0), vget_high_s16(v_x1)); - int32x4_t v_t1_lo = vsubl_s16(vget_low_s16(v_x0), vget_low_s16(v_x1)); - int32x4_t v_t1_hi = vsubl_s16(vget_high_s16(v_x0), vget_high_s16(v_x1)); - int32x4_t v_t2_lo = vmull_n_s16(vget_low_s16(v_x2), (int16_t)cospi_24_64); - int32x4_t v_t2_hi = vmull_n_s16(vget_high_s16(v_x2), (int16_t)cospi_24_64); - int32x4_t v_t3_lo = vmull_n_s16(vget_low_s16(v_x3), (int16_t)cospi_24_64); - int32x4_t v_t3_hi = vmull_n_s16(vget_high_s16(v_x3), (int16_t)cospi_24_64); - v_t2_lo = vmlal_n_s16(v_t2_lo, vget_low_s16(v_x3), (int16_t)cospi_8_64); - v_t2_hi = vmlal_n_s16(v_t2_hi, vget_high_s16(v_x3), (int16_t)cospi_8_64); - v_t3_lo = vmlsl_n_s16(v_t3_lo, vget_low_s16(v_x2), (int16_t)cospi_8_64); - v_t3_hi = vmlsl_n_s16(v_t3_hi, vget_high_s16(v_x2), (int16_t)cospi_8_64); - v_t0_lo = vmulq_n_s32(v_t0_lo, cospi_16_64); - v_t0_hi = vmulq_n_s32(v_t0_hi, cospi_16_64); - v_t1_lo = vmulq_n_s32(v_t1_lo, cospi_16_64); - v_t1_hi = vmulq_n_s32(v_t1_hi, cospi_16_64); - { - const int16x4_t a = vrshrn_n_s32(v_t0_lo, DCT_CONST_BITS); - const int16x4_t b = vrshrn_n_s32(v_t0_hi, DCT_CONST_BITS); - const int16x4_t c = vrshrn_n_s32(v_t1_lo, DCT_CONST_BITS); - const int16x4_t d = vrshrn_n_s32(v_t1_hi, DCT_CONST_BITS); - const int16x4_t e = vrshrn_n_s32(v_t2_lo, DCT_CONST_BITS); - const int16x4_t f = vrshrn_n_s32(v_t2_hi, DCT_CONST_BITS); - const int16x4_t g = vrshrn_n_s32(v_t3_lo, DCT_CONST_BITS); - const int16x4_t h = vrshrn_n_s32(v_t3_hi, DCT_CONST_BITS); - out_0 = vcombine_s16(a, c); // 00 01 02 03 40 41 42 43 - out_2 = vcombine_s16(e, g); // 20 21 22 23 60 61 62 63 - out_4 = vcombine_s16(b, d); // 04 05 06 07 44 45 46 47 - out_6 = vcombine_s16(f, h); // 24 25 26 27 64 65 66 67 - } - // Stage 2 - v_x0 = vsubq_s16(v_s6, v_s5); - v_x1 = vaddq_s16(v_s6, v_s5); - v_t0_lo = vmull_n_s16(vget_low_s16(v_x0), (int16_t)cospi_16_64); - v_t0_hi = vmull_n_s16(vget_high_s16(v_x0), (int16_t)cospi_16_64); - v_t1_lo = vmull_n_s16(vget_low_s16(v_x1), (int16_t)cospi_16_64); - v_t1_hi = vmull_n_s16(vget_high_s16(v_x1), (int16_t)cospi_16_64); - { - const int16x4_t a = vrshrn_n_s32(v_t0_lo, DCT_CONST_BITS); - const int16x4_t b = vrshrn_n_s32(v_t0_hi, DCT_CONST_BITS); - const int16x4_t c = vrshrn_n_s32(v_t1_lo, DCT_CONST_BITS); - const int16x4_t d = vrshrn_n_s32(v_t1_hi, DCT_CONST_BITS); - const int16x8_t ab = vcombine_s16(a, b); - const int16x8_t cd = vcombine_s16(c, d); - // Stage 3 - v_x0 = vaddq_s16(v_s4, ab); - v_x1 = vsubq_s16(v_s4, ab); - v_x2 = vsubq_s16(v_s7, cd); - v_x3 = vaddq_s16(v_s7, cd); - } - // Stage 4 - v_t0_lo = vmull_n_s16(vget_low_s16(v_x3), (int16_t)cospi_4_64); - v_t0_hi = vmull_n_s16(vget_high_s16(v_x3), (int16_t)cospi_4_64); - v_t0_lo = vmlal_n_s16(v_t0_lo, vget_low_s16(v_x0), (int16_t)cospi_28_64); - v_t0_hi = vmlal_n_s16(v_t0_hi, vget_high_s16(v_x0), (int16_t)cospi_28_64); - v_t1_lo = vmull_n_s16(vget_low_s16(v_x1), (int16_t)cospi_12_64); - v_t1_hi = vmull_n_s16(vget_high_s16(v_x1), (int16_t)cospi_12_64); - v_t1_lo = vmlal_n_s16(v_t1_lo, vget_low_s16(v_x2), (int16_t)cospi_20_64); - v_t1_hi = vmlal_n_s16(v_t1_hi, vget_high_s16(v_x2), (int16_t)cospi_20_64); - v_t2_lo = vmull_n_s16(vget_low_s16(v_x2), (int16_t)cospi_12_64); - v_t2_hi = vmull_n_s16(vget_high_s16(v_x2), (int16_t)cospi_12_64); - v_t2_lo = vmlsl_n_s16(v_t2_lo, vget_low_s16(v_x1), (int16_t)cospi_20_64); - v_t2_hi = vmlsl_n_s16(v_t2_hi, vget_high_s16(v_x1), (int16_t)cospi_20_64); - v_t3_lo = vmull_n_s16(vget_low_s16(v_x3), (int16_t)cospi_28_64); - v_t3_hi = vmull_n_s16(vget_high_s16(v_x3), (int16_t)cospi_28_64); - v_t3_lo = vmlsl_n_s16(v_t3_lo, vget_low_s16(v_x0), (int16_t)cospi_4_64); - v_t3_hi = vmlsl_n_s16(v_t3_hi, vget_high_s16(v_x0), (int16_t)cospi_4_64); - { - const int16x4_t a = vrshrn_n_s32(v_t0_lo, DCT_CONST_BITS); - const int16x4_t b = vrshrn_n_s32(v_t0_hi, DCT_CONST_BITS); - const int16x4_t c = vrshrn_n_s32(v_t1_lo, DCT_CONST_BITS); - const int16x4_t d = vrshrn_n_s32(v_t1_hi, DCT_CONST_BITS); - const int16x4_t e = vrshrn_n_s32(v_t2_lo, DCT_CONST_BITS); - const int16x4_t f = vrshrn_n_s32(v_t2_hi, DCT_CONST_BITS); - const int16x4_t g = vrshrn_n_s32(v_t3_lo, DCT_CONST_BITS); - const int16x4_t h = vrshrn_n_s32(v_t3_hi, DCT_CONST_BITS); - out_1 = vcombine_s16(a, c); // 10 11 12 13 50 51 52 53 - out_3 = vcombine_s16(e, g); // 30 31 32 33 70 71 72 73 - out_5 = vcombine_s16(b, d); // 14 15 16 17 54 55 56 57 - out_7 = vcombine_s16(f, h); // 34 35 36 37 74 75 76 77 - } - // transpose 8x8 - { - // 00 01 02 03 40 41 42 43 - // 10 11 12 13 50 51 52 53 - // 20 21 22 23 60 61 62 63 - // 30 31 32 33 70 71 72 73 - // 04 05 06 07 44 45 46 47 - // 14 15 16 17 54 55 56 57 - // 24 25 26 27 64 65 66 67 - // 34 35 36 37 74 75 76 77 - const int32x4x2_t r02_s32 = vtrnq_s32(vreinterpretq_s32_s16(out_0), - vreinterpretq_s32_s16(out_2)); - const int32x4x2_t r13_s32 = vtrnq_s32(vreinterpretq_s32_s16(out_1), - vreinterpretq_s32_s16(out_3)); - const int32x4x2_t r46_s32 = vtrnq_s32(vreinterpretq_s32_s16(out_4), - vreinterpretq_s32_s16(out_6)); - const int32x4x2_t r57_s32 = vtrnq_s32(vreinterpretq_s32_s16(out_5), - vreinterpretq_s32_s16(out_7)); - const int16x8x2_t r01_s16 = - vtrnq_s16(vreinterpretq_s16_s32(r02_s32.val[0]), - vreinterpretq_s16_s32(r13_s32.val[0])); - const int16x8x2_t r23_s16 = - vtrnq_s16(vreinterpretq_s16_s32(r02_s32.val[1]), - vreinterpretq_s16_s32(r13_s32.val[1])); - const int16x8x2_t r45_s16 = - vtrnq_s16(vreinterpretq_s16_s32(r46_s32.val[0]), - vreinterpretq_s16_s32(r57_s32.val[0])); - const int16x8x2_t r67_s16 = - vtrnq_s16(vreinterpretq_s16_s32(r46_s32.val[1]), - vreinterpretq_s16_s32(r57_s32.val[1])); - input_0 = r01_s16.val[0]; - input_1 = r01_s16.val[1]; - input_2 = r23_s16.val[0]; - input_3 = r23_s16.val[1]; - input_4 = r45_s16.val[0]; - input_5 = r45_s16.val[1]; - input_6 = r67_s16.val[0]; - input_7 = r67_s16.val[1]; - // 00 10 20 30 40 50 60 70 - // 01 11 21 31 41 51 61 71 - // 02 12 22 32 42 52 62 72 - // 03 13 23 33 43 53 63 73 - // 04 14 24 34 44 54 64 74 - // 05 15 25 35 45 55 65 75 - // 06 16 26 36 46 56 66 76 - // 07 17 27 37 47 57 67 77 - } - } // for - { - // from vp9_dct_sse2.c - // Post-condition (division by two) - // division of two 16 bits signed numbers using shifts - // n / 2 = (n - (n >> 15)) >> 1 - const int16x8_t sign_in0 = vshrq_n_s16(input_0, 15); - const int16x8_t sign_in1 = vshrq_n_s16(input_1, 15); - const int16x8_t sign_in2 = vshrq_n_s16(input_2, 15); - const int16x8_t sign_in3 = vshrq_n_s16(input_3, 15); - const int16x8_t sign_in4 = vshrq_n_s16(input_4, 15); - const int16x8_t sign_in5 = vshrq_n_s16(input_5, 15); - const int16x8_t sign_in6 = vshrq_n_s16(input_6, 15); - const int16x8_t sign_in7 = vshrq_n_s16(input_7, 15); - input_0 = vhsubq_s16(input_0, sign_in0); - input_1 = vhsubq_s16(input_1, sign_in1); - input_2 = vhsubq_s16(input_2, sign_in2); - input_3 = vhsubq_s16(input_3, sign_in3); - input_4 = vhsubq_s16(input_4, sign_in4); - input_5 = vhsubq_s16(input_5, sign_in5); - input_6 = vhsubq_s16(input_6, sign_in6); - input_7 = vhsubq_s16(input_7, sign_in7); - // store results - vst1q_s16(&final_output[0 * 8], input_0); - vst1q_s16(&final_output[1 * 8], input_1); - vst1q_s16(&final_output[2 * 8], input_2); - vst1q_s16(&final_output[3 * 8], input_3); - vst1q_s16(&final_output[4 * 8], input_4); - vst1q_s16(&final_output[5 * 8], input_5); - vst1q_s16(&final_output[6 * 8], input_6); - vst1q_s16(&final_output[7 * 8], input_7); - } -} - diff --git a/media/libvpx/vp9/encoder/arm/neon/vp9_quantize_neon.c b/media/libvpx/vp9/encoder/arm/neon/vp9_quantize_neon.c deleted file mode 100644 index 8c13d0da672..00000000000 --- a/media/libvpx/vp9/encoder/arm/neon/vp9_quantize_neon.c +++ /dev/null @@ -1,119 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include - -#include - -#include "vpx_mem/vpx_mem.h" - -#include "vp9/common/vp9_quant_common.h" -#include "vp9/common/vp9_seg_common.h" - -#include "vp9/encoder/vp9_encoder.h" -#include "vp9/encoder/vp9_quantize.h" -#include "vp9/encoder/vp9_rd.h" - -void vp9_quantize_fp_neon(const int16_t *coeff_ptr, intptr_t count, - int skip_block, const int16_t *zbin_ptr, - const int16_t *round_ptr, const int16_t *quant_ptr, - const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, - int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, - int zbin_oq_value, uint16_t *eob_ptr, - const int16_t *scan, const int16_t *iscan) { - // TODO(jingning) Decide the need of these arguments after the - // quantization process is completed. - (void)zbin_ptr; - (void)quant_shift_ptr; - (void)zbin_oq_value; - (void)scan; - - if (!skip_block) { - // Quantization pass: All coefficients with index >= zero_flag are - // skippable. Note: zero_flag can be zero. - int i; - const int16x8_t v_zero = vdupq_n_s16(0); - const int16x8_t v_one = vdupq_n_s16(1); - int16x8_t v_eobmax_76543210 = vdupq_n_s16(-1); - int16x8_t v_round = vmovq_n_s16(round_ptr[1]); - int16x8_t v_quant = vmovq_n_s16(quant_ptr[1]); - int16x8_t v_dequant = vmovq_n_s16(dequant_ptr[1]); - // adjust for dc - v_round = vsetq_lane_s16(round_ptr[0], v_round, 0); - v_quant = vsetq_lane_s16(quant_ptr[0], v_quant, 0); - v_dequant = vsetq_lane_s16(dequant_ptr[0], v_dequant, 0); - // process dc and the first seven ac coeffs - { - const int16x8_t v_iscan = vld1q_s16(&iscan[0]); - const int16x8_t v_coeff = vld1q_s16(&coeff_ptr[0]); - const int16x8_t v_coeff_sign = vshrq_n_s16(v_coeff, 15); - const int16x8_t v_tmp = vabaq_s16(v_round, v_coeff, v_zero); - const int32x4_t v_tmp_lo = vmull_s16(vget_low_s16(v_tmp), - vget_low_s16(v_quant)); - const int32x4_t v_tmp_hi = vmull_s16(vget_high_s16(v_tmp), - vget_high_s16(v_quant)); - const int16x8_t v_tmp2 = vcombine_s16(vshrn_n_s32(v_tmp_lo, 16), - vshrn_n_s32(v_tmp_hi, 16)); - const uint16x8_t v_nz_mask = vceqq_s16(v_tmp2, v_zero); - const int16x8_t v_iscan_plus1 = vaddq_s16(v_iscan, v_one); - const int16x8_t v_nz_iscan = vbslq_s16(v_nz_mask, v_zero, v_iscan_plus1); - const int16x8_t v_qcoeff_a = veorq_s16(v_tmp2, v_coeff_sign); - const int16x8_t v_qcoeff = vsubq_s16(v_qcoeff_a, v_coeff_sign); - const int16x8_t v_dqcoeff = vmulq_s16(v_qcoeff, v_dequant); - v_eobmax_76543210 = vmaxq_s16(v_eobmax_76543210, v_nz_iscan); - vst1q_s16(&qcoeff_ptr[0], v_qcoeff); - vst1q_s16(&dqcoeff_ptr[0], v_dqcoeff); - v_round = vmovq_n_s16(round_ptr[1]); - v_quant = vmovq_n_s16(quant_ptr[1]); - v_dequant = vmovq_n_s16(dequant_ptr[1]); - } - // now process the rest of the ac coeffs - for (i = 8; i < count; i += 8) { - const int16x8_t v_iscan = vld1q_s16(&iscan[i]); - const int16x8_t v_coeff = vld1q_s16(&coeff_ptr[i]); - const int16x8_t v_coeff_sign = vshrq_n_s16(v_coeff, 15); - const int16x8_t v_tmp = vabaq_s16(v_round, v_coeff, v_zero); - const int32x4_t v_tmp_lo = vmull_s16(vget_low_s16(v_tmp), - vget_low_s16(v_quant)); - const int32x4_t v_tmp_hi = vmull_s16(vget_high_s16(v_tmp), - vget_high_s16(v_quant)); - const int16x8_t v_tmp2 = vcombine_s16(vshrn_n_s32(v_tmp_lo, 16), - vshrn_n_s32(v_tmp_hi, 16)); - const uint16x8_t v_nz_mask = vceqq_s16(v_tmp2, v_zero); - const int16x8_t v_iscan_plus1 = vaddq_s16(v_iscan, v_one); - const int16x8_t v_nz_iscan = vbslq_s16(v_nz_mask, v_zero, v_iscan_plus1); - const int16x8_t v_qcoeff_a = veorq_s16(v_tmp2, v_coeff_sign); - const int16x8_t v_qcoeff = vsubq_s16(v_qcoeff_a, v_coeff_sign); - const int16x8_t v_dqcoeff = vmulq_s16(v_qcoeff, v_dequant); - v_eobmax_76543210 = vmaxq_s16(v_eobmax_76543210, v_nz_iscan); - vst1q_s16(&qcoeff_ptr[i], v_qcoeff); - vst1q_s16(&dqcoeff_ptr[i], v_dqcoeff); - } - { - const int16x4_t v_eobmax_3210 = - vmax_s16(vget_low_s16(v_eobmax_76543210), - vget_high_s16(v_eobmax_76543210)); - const int64x1_t v_eobmax_xx32 = - vshr_n_s64(vreinterpret_s64_s16(v_eobmax_3210), 32); - const int16x4_t v_eobmax_tmp = - vmax_s16(v_eobmax_3210, vreinterpret_s16_s64(v_eobmax_xx32)); - const int64x1_t v_eobmax_xxx3 = - vshr_n_s64(vreinterpret_s64_s16(v_eobmax_tmp), 16); - const int16x4_t v_eobmax_final = - vmax_s16(v_eobmax_tmp, vreinterpret_s16_s64(v_eobmax_xxx3)); - - *eob_ptr = (uint16_t)vget_lane_s16(v_eobmax_final, 0); - } - } else { - vpx_memset(qcoeff_ptr, 0, count * sizeof(int16_t)); - vpx_memset(dqcoeff_ptr, 0, count * sizeof(int16_t)); - *eob_ptr = 0; - } -} diff --git a/media/libvpx/vp9/encoder/arm/neon/vp9_sad_neon.c b/media/libvpx/vp9/encoder/arm/neon/vp9_sad_neon.c deleted file mode 100644 index c4cd856804d..00000000000 --- a/media/libvpx/vp9/encoder/arm/neon/vp9_sad_neon.c +++ /dev/null @@ -1,130 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include -#include "./vp9_rtcd.h" -#include "./vpx_config.h" - -#include "vpx/vpx_integer.h" - -static INLINE unsigned int horizontal_long_add_16x8(const uint16x8_t vec_lo, - const uint16x8_t vec_hi) { - const uint32x4_t vec_l_lo = vaddl_u16(vget_low_u16(vec_lo), - vget_high_u16(vec_lo)); - const uint32x4_t vec_l_hi = vaddl_u16(vget_low_u16(vec_hi), - vget_high_u16(vec_hi)); - const uint32x4_t a = vaddq_u32(vec_l_lo, vec_l_hi); - const uint64x2_t b = vpaddlq_u32(a); - const uint32x2_t c = vadd_u32(vreinterpret_u32_u64(vget_low_u64(b)), - vreinterpret_u32_u64(vget_high_u64(b))); - return vget_lane_u32(c, 0); -} -static INLINE unsigned int horizontal_add_16x8(const uint16x8_t vec_16x8) { - const uint32x4_t a = vpaddlq_u16(vec_16x8); - const uint64x2_t b = vpaddlq_u32(a); - const uint32x2_t c = vadd_u32(vreinterpret_u32_u64(vget_low_u64(b)), - vreinterpret_u32_u64(vget_high_u64(b))); - return vget_lane_u32(c, 0); -} - -unsigned int vp9_sad64x64_neon(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride) { - int i; - uint16x8_t vec_accum_lo = vdupq_n_u16(0); - uint16x8_t vec_accum_hi = vdupq_n_u16(0); - for (i = 0; i < 64; ++i) { - const uint8x16_t vec_src_00 = vld1q_u8(src); - const uint8x16_t vec_src_16 = vld1q_u8(src + 16); - const uint8x16_t vec_src_32 = vld1q_u8(src + 32); - const uint8x16_t vec_src_48 = vld1q_u8(src + 48); - const uint8x16_t vec_ref_00 = vld1q_u8(ref); - const uint8x16_t vec_ref_16 = vld1q_u8(ref + 16); - const uint8x16_t vec_ref_32 = vld1q_u8(ref + 32); - const uint8x16_t vec_ref_48 = vld1q_u8(ref + 48); - src += src_stride; - ref += ref_stride; - vec_accum_lo = vabal_u8(vec_accum_lo, vget_low_u8(vec_src_00), - vget_low_u8(vec_ref_00)); - vec_accum_hi = vabal_u8(vec_accum_hi, vget_high_u8(vec_src_00), - vget_high_u8(vec_ref_00)); - vec_accum_lo = vabal_u8(vec_accum_lo, vget_low_u8(vec_src_16), - vget_low_u8(vec_ref_16)); - vec_accum_hi = vabal_u8(vec_accum_hi, vget_high_u8(vec_src_16), - vget_high_u8(vec_ref_16)); - vec_accum_lo = vabal_u8(vec_accum_lo, vget_low_u8(vec_src_32), - vget_low_u8(vec_ref_32)); - vec_accum_hi = vabal_u8(vec_accum_hi, vget_high_u8(vec_src_32), - vget_high_u8(vec_ref_32)); - vec_accum_lo = vabal_u8(vec_accum_lo, vget_low_u8(vec_src_48), - vget_low_u8(vec_ref_48)); - vec_accum_hi = vabal_u8(vec_accum_hi, vget_high_u8(vec_src_48), - vget_high_u8(vec_ref_48)); - } - return horizontal_long_add_16x8(vec_accum_lo, vec_accum_hi); -} - -unsigned int vp9_sad32x32_neon(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride) { - int i; - uint16x8_t vec_accum_lo = vdupq_n_u16(0); - uint16x8_t vec_accum_hi = vdupq_n_u16(0); - - for (i = 0; i < 32; ++i) { - const uint8x16_t vec_src_00 = vld1q_u8(src); - const uint8x16_t vec_src_16 = vld1q_u8(src + 16); - const uint8x16_t vec_ref_00 = vld1q_u8(ref); - const uint8x16_t vec_ref_16 = vld1q_u8(ref + 16); - src += src_stride; - ref += ref_stride; - vec_accum_lo = vabal_u8(vec_accum_lo, vget_low_u8(vec_src_00), - vget_low_u8(vec_ref_00)); - vec_accum_hi = vabal_u8(vec_accum_hi, vget_high_u8(vec_src_00), - vget_high_u8(vec_ref_00)); - vec_accum_lo = vabal_u8(vec_accum_lo, vget_low_u8(vec_src_16), - vget_low_u8(vec_ref_16)); - vec_accum_hi = vabal_u8(vec_accum_hi, vget_high_u8(vec_src_16), - vget_high_u8(vec_ref_16)); - } - return horizontal_add_16x8(vaddq_u16(vec_accum_lo, vec_accum_hi)); -} - -unsigned int vp9_sad16x16_neon(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride) { - int i; - uint16x8_t vec_accum_lo = vdupq_n_u16(0); - uint16x8_t vec_accum_hi = vdupq_n_u16(0); - - for (i = 0; i < 16; ++i) { - const uint8x16_t vec_src = vld1q_u8(src); - const uint8x16_t vec_ref = vld1q_u8(ref); - src += src_stride; - ref += ref_stride; - vec_accum_lo = vabal_u8(vec_accum_lo, vget_low_u8(vec_src), - vget_low_u8(vec_ref)); - vec_accum_hi = vabal_u8(vec_accum_hi, vget_high_u8(vec_src), - vget_high_u8(vec_ref)); - } - return horizontal_add_16x8(vaddq_u16(vec_accum_lo, vec_accum_hi)); -} - -unsigned int vp9_sad8x8_neon(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride) { - int i; - uint16x8_t vec_accum = vdupq_n_u16(0); - - for (i = 0; i < 8; ++i) { - const uint8x8_t vec_src = vld1_u8(src); - const uint8x8_t vec_ref = vld1_u8(ref); - src += src_stride; - ref += ref_stride; - vec_accum = vabal_u8(vec_accum, vec_src, vec_ref); - } - return horizontal_add_16x8(vec_accum); -} diff --git a/media/libvpx/vp9/encoder/arm/neon/vp9_subtract_neon.c b/media/libvpx/vp9/encoder/arm/neon/vp9_subtract_neon.c deleted file mode 100644 index b4bf567db99..00000000000 --- a/media/libvpx/vp9/encoder/arm/neon/vp9_subtract_neon.c +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include -#include "./vp9_rtcd.h" -#include "./vpx_config.h" - -#include "vpx/vpx_integer.h" - -void vp9_subtract_block_neon(int rows, int cols, - int16_t *diff, ptrdiff_t diff_stride, - const uint8_t *src, ptrdiff_t src_stride, - const uint8_t *pred, ptrdiff_t pred_stride) { - int r, c; - - if (cols > 16) { - for (r = 0; r < rows; ++r) { - for (c = 0; c < cols; c += 32) { - const uint8x16_t v_src_00 = vld1q_u8(&src[c + 0]); - const uint8x16_t v_src_16 = vld1q_u8(&src[c + 16]); - const uint8x16_t v_pred_00 = vld1q_u8(&pred[c + 0]); - const uint8x16_t v_pred_16 = vld1q_u8(&pred[c + 16]); - const uint16x8_t v_diff_lo_00 = vsubl_u8(vget_low_u8(v_src_00), - vget_low_u8(v_pred_00)); - const uint16x8_t v_diff_hi_00 = vsubl_u8(vget_high_u8(v_src_00), - vget_high_u8(v_pred_00)); - const uint16x8_t v_diff_lo_16 = vsubl_u8(vget_low_u8(v_src_16), - vget_low_u8(v_pred_16)); - const uint16x8_t v_diff_hi_16 = vsubl_u8(vget_high_u8(v_src_16), - vget_high_u8(v_pred_16)); - vst1q_s16(&diff[c + 0], vreinterpretq_s16_u16(v_diff_lo_00)); - vst1q_s16(&diff[c + 8], vreinterpretq_s16_u16(v_diff_hi_00)); - vst1q_s16(&diff[c + 16], vreinterpretq_s16_u16(v_diff_lo_16)); - vst1q_s16(&diff[c + 24], vreinterpretq_s16_u16(v_diff_hi_16)); - } - diff += diff_stride; - pred += pred_stride; - src += src_stride; - } - } else if (cols > 8) { - for (r = 0; r < rows; ++r) { - const uint8x16_t v_src = vld1q_u8(&src[0]); - const uint8x16_t v_pred = vld1q_u8(&pred[0]); - const uint16x8_t v_diff_lo = vsubl_u8(vget_low_u8(v_src), - vget_low_u8(v_pred)); - const uint16x8_t v_diff_hi = vsubl_u8(vget_high_u8(v_src), - vget_high_u8(v_pred)); - vst1q_s16(&diff[0], vreinterpretq_s16_u16(v_diff_lo)); - vst1q_s16(&diff[8], vreinterpretq_s16_u16(v_diff_hi)); - diff += diff_stride; - pred += pred_stride; - src += src_stride; - } - } else if (cols > 4) { - for (r = 0; r < rows; ++r) { - const uint8x8_t v_src = vld1_u8(&src[0]); - const uint8x8_t v_pred = vld1_u8(&pred[0]); - const uint16x8_t v_diff = vsubl_u8(v_src, v_pred); - vst1q_s16(&diff[0], vreinterpretq_s16_u16(v_diff)); - diff += diff_stride; - pred += pred_stride; - src += src_stride; - } - } else { - for (r = 0; r < rows; ++r) { - for (c = 0; c < cols; ++c) - diff[c] = src[c] - pred[c]; - - diff += diff_stride; - pred += pred_stride; - src += src_stride; - } - } -} diff --git a/media/libvpx/vp9/encoder/arm/neon/vp9_variance_neon.c b/media/libvpx/vp9/encoder/arm/neon/vp9_variance_neon.c deleted file mode 100644 index 816fbda1fbe..00000000000 --- a/media/libvpx/vp9/encoder/arm/neon/vp9_variance_neon.c +++ /dev/null @@ -1,227 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include -#include "./vp9_rtcd.h" - -#include "vpx_ports/mem.h" -#include "vpx/vpx_integer.h" - -#include "vp9/common/vp9_common.h" -#include "vp9/common/vp9_filter.h" - -#include "vp9/encoder/vp9_variance.h" - -enum { kWidth8 = 8 }; -enum { kHeight8 = 8 }; -enum { kHeight8PlusOne = 9 }; -enum { kWidth16 = 16 }; -enum { kHeight16 = 16 }; -enum { kHeight16PlusOne = 17 }; -enum { kWidth32 = 32 }; -enum { kHeight32 = 32 }; -enum { kHeight32PlusOne = 33 }; -enum { kPixelStepOne = 1 }; -enum { kAlign16 = 16 }; - -static INLINE int horizontal_add_s16x8(const int16x8_t v_16x8) { - const int32x4_t a = vpaddlq_s16(v_16x8); - const int64x2_t b = vpaddlq_s32(a); - const int32x2_t c = vadd_s32(vreinterpret_s32_s64(vget_low_s64(b)), - vreinterpret_s32_s64(vget_high_s64(b))); - return vget_lane_s32(c, 0); -} - -static INLINE int horizontal_add_s32x4(const int32x4_t v_32x4) { - const int64x2_t b = vpaddlq_s32(v_32x4); - const int32x2_t c = vadd_s32(vreinterpret_s32_s64(vget_low_s64(b)), - vreinterpret_s32_s64(vget_high_s64(b))); - return vget_lane_s32(c, 0); -} - -static void variance_neon_w8(const uint8_t *a, int a_stride, - const uint8_t *b, int b_stride, - int w, int h, unsigned int *sse, int *sum) { - int i, j; - int16x8_t v_sum = vdupq_n_s16(0); - int32x4_t v_sse_lo = vdupq_n_s32(0); - int32x4_t v_sse_hi = vdupq_n_s32(0); - - for (i = 0; i < h; ++i) { - for (j = 0; j < w; j += 8) { - const uint8x8_t v_a = vld1_u8(&a[j]); - const uint8x8_t v_b = vld1_u8(&b[j]); - const uint16x8_t v_diff = vsubl_u8(v_a, v_b); - const int16x8_t sv_diff = vreinterpretq_s16_u16(v_diff); - v_sum = vaddq_s16(v_sum, sv_diff); - v_sse_lo = vmlal_s16(v_sse_lo, - vget_low_s16(sv_diff), - vget_low_s16(sv_diff)); - v_sse_hi = vmlal_s16(v_sse_hi, - vget_high_s16(sv_diff), - vget_high_s16(sv_diff)); - } - a += a_stride; - b += b_stride; - } - - *sum = horizontal_add_s16x8(v_sum); - *sse = (unsigned int)horizontal_add_s32x4(vaddq_s32(v_sse_lo, v_sse_hi)); -} - -void vp9_get8x8var_neon(const uint8_t *src_ptr, int source_stride, - const uint8_t *ref_ptr, int ref_stride, - unsigned int *sse, int *sum) { - variance_neon_w8(src_ptr, source_stride, ref_ptr, ref_stride, kWidth8, - kHeight8, sse, sum); -} - -unsigned int vp9_variance8x8_neon(const uint8_t *a, int a_stride, - const uint8_t *b, int b_stride, - unsigned int *sse) { - int sum; - variance_neon_w8(a, a_stride, b, b_stride, kWidth8, kHeight8, sse, &sum); - return *sse - (((int64_t)sum * sum) / (kWidth8 * kHeight8)); -} - -void vp9_get16x16var_neon(const uint8_t *src_ptr, int source_stride, - const uint8_t *ref_ptr, int ref_stride, - unsigned int *sse, int *sum) { - variance_neon_w8(src_ptr, source_stride, ref_ptr, ref_stride, kWidth16, - kHeight16, sse, sum); -} - -unsigned int vp9_variance16x16_neon(const uint8_t *a, int a_stride, - const uint8_t *b, int b_stride, - unsigned int *sse) { - int sum; - variance_neon_w8(a, a_stride, b, b_stride, kWidth16, kHeight16, sse, &sum); - return *sse - (((int64_t)sum * sum) / (kWidth16 * kHeight16)); -} - -static void var_filter_block2d_bil_w8(const uint8_t *src_ptr, - uint8_t *output_ptr, - unsigned int src_pixels_per_line, - int pixel_step, - unsigned int output_height, - unsigned int output_width, - const int16_t *vp9_filter) { - const uint8x8_t f0 = vmov_n_u8((uint8_t)vp9_filter[0]); - const uint8x8_t f1 = vmov_n_u8((uint8_t)vp9_filter[1]); - unsigned int i; - for (i = 0; i < output_height; ++i) { - const uint8x8_t src_0 = vld1_u8(&src_ptr[0]); - const uint8x8_t src_1 = vld1_u8(&src_ptr[pixel_step]); - const uint16x8_t a = vmull_u8(src_0, f0); - const uint16x8_t b = vmlal_u8(a, src_1, f1); - const uint8x8_t out = vrshrn_n_u16(b, FILTER_BITS); - vst1_u8(&output_ptr[0], out); - // Next row... - src_ptr += src_pixels_per_line; - output_ptr += output_width; - } -} - -static void var_filter_block2d_bil_w16(const uint8_t *src_ptr, - uint8_t *output_ptr, - unsigned int src_pixels_per_line, - int pixel_step, - unsigned int output_height, - unsigned int output_width, - const int16_t *vp9_filter) { - const uint8x8_t f0 = vmov_n_u8((uint8_t)vp9_filter[0]); - const uint8x8_t f1 = vmov_n_u8((uint8_t)vp9_filter[1]); - unsigned int i, j; - for (i = 0; i < output_height; ++i) { - for (j = 0; j < output_width; j += 16) { - const uint8x16_t src_0 = vld1q_u8(&src_ptr[j]); - const uint8x16_t src_1 = vld1q_u8(&src_ptr[j + pixel_step]); - const uint16x8_t a = vmull_u8(vget_low_u8(src_0), f0); - const uint16x8_t b = vmlal_u8(a, vget_low_u8(src_1), f1); - const uint8x8_t out_lo = vrshrn_n_u16(b, FILTER_BITS); - const uint16x8_t c = vmull_u8(vget_high_u8(src_0), f0); - const uint16x8_t d = vmlal_u8(c, vget_high_u8(src_1), f1); - const uint8x8_t out_hi = vrshrn_n_u16(d, FILTER_BITS); - vst1q_u8(&output_ptr[j], vcombine_u8(out_lo, out_hi)); - } - // Next row... - src_ptr += src_pixels_per_line; - output_ptr += output_width; - } -} - -unsigned int vp9_sub_pixel_variance8x8_neon(const uint8_t *src, - int src_stride, - int xoffset, - int yoffset, - const uint8_t *dst, - int dst_stride, - unsigned int *sse) { - DECLARE_ALIGNED_ARRAY(kAlign16, uint8_t, temp2, kHeight8 * kWidth8); - DECLARE_ALIGNED_ARRAY(kAlign16, uint8_t, fdata3, kHeight8PlusOne * kWidth8); - - var_filter_block2d_bil_w8(src, fdata3, src_stride, kPixelStepOne, - kHeight8PlusOne, kWidth8, - BILINEAR_FILTERS_2TAP(xoffset)); - var_filter_block2d_bil_w8(fdata3, temp2, kWidth8, kWidth8, kHeight8, - kWidth8, BILINEAR_FILTERS_2TAP(yoffset)); - return vp9_variance8x8_neon(temp2, kWidth8, dst, dst_stride, sse); -} - -unsigned int vp9_sub_pixel_variance16x16_neon(const uint8_t *src, - int src_stride, - int xoffset, - int yoffset, - const uint8_t *dst, - int dst_stride, - unsigned int *sse) { - DECLARE_ALIGNED_ARRAY(kAlign16, uint8_t, temp2, kHeight16 * kWidth16); - DECLARE_ALIGNED_ARRAY(kAlign16, uint8_t, fdata3, kHeight16PlusOne * kWidth16); - - var_filter_block2d_bil_w16(src, fdata3, src_stride, kPixelStepOne, - kHeight16PlusOne, kWidth16, - BILINEAR_FILTERS_2TAP(xoffset)); - var_filter_block2d_bil_w16(fdata3, temp2, kWidth16, kWidth16, kHeight16, - kWidth16, BILINEAR_FILTERS_2TAP(yoffset)); - return vp9_variance16x16_neon(temp2, kWidth16, dst, dst_stride, sse); -} - -void vp9_get32x32var_neon(const uint8_t *src_ptr, int source_stride, - const uint8_t *ref_ptr, int ref_stride, - unsigned int *sse, int *sum) { - variance_neon_w8(src_ptr, source_stride, ref_ptr, ref_stride, kWidth32, - kHeight32, sse, sum); -} - -unsigned int vp9_variance32x32_neon(const uint8_t *a, int a_stride, - const uint8_t *b, int b_stride, - unsigned int *sse) { - int sum; - variance_neon_w8(a, a_stride, b, b_stride, kWidth32, kHeight32, sse, &sum); - return *sse - (((int64_t)sum * sum) / (kWidth32 * kHeight32)); -} - -unsigned int vp9_sub_pixel_variance32x32_neon(const uint8_t *src, - int src_stride, - int xoffset, - int yoffset, - const uint8_t *dst, - int dst_stride, - unsigned int *sse) { - DECLARE_ALIGNED_ARRAY(kAlign16, uint8_t, temp2, kHeight32 * kWidth32); - DECLARE_ALIGNED_ARRAY(kAlign16, uint8_t, fdata3, kHeight32PlusOne * kWidth32); - - var_filter_block2d_bil_w16(src, fdata3, src_stride, kPixelStepOne, - kHeight32PlusOne, kWidth32, - BILINEAR_FILTERS_2TAP(xoffset)); - var_filter_block2d_bil_w16(fdata3, temp2, kWidth32, kWidth32, kHeight32, - kWidth32, BILINEAR_FILTERS_2TAP(yoffset)); - return vp9_variance32x32_neon(temp2, kWidth32, dst, dst_stride, sse); -} diff --git a/media/libvpx/vp9/encoder/vp9_aq_complexity.c b/media/libvpx/vp9/encoder/vp9_aq_complexity.c deleted file mode 100644 index f7fca0cde0a..00000000000 --- a/media/libvpx/vp9/encoder/vp9_aq_complexity.c +++ /dev/null @@ -1,151 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include -#include - -#include "vp9/common/vp9_seg_common.h" - -#include "vp9/encoder/vp9_segmentation.h" - -#define AQ_C_SEGMENTS 3 -#define AQ_C_STRENGTHS 3 -static const int aq_c_active_segments[AQ_C_STRENGTHS] = {1, 2, 3}; -static const double aq_c_q_adj_factor[AQ_C_STRENGTHS][AQ_C_SEGMENTS] = - {{1.0, 1.0, 1.0}, {1.0, 2.0, 1.0}, {1.0, 1.5, 2.5}}; -static const double aq_c_transitions[AQ_C_STRENGTHS][AQ_C_SEGMENTS] = - {{1.0, 1.0, 1.0}, {1.0, 0.25, 0.0}, {1.0, 0.5, 0.25}}; - -static int get_aq_c_strength(int q_index, vpx_bit_depth_t bit_depth) { - // Approximate base quatizer (truncated to int) - const int base_quant = vp9_ac_quant(q_index, 0, bit_depth) / 4; - return (base_quant > 20) + (base_quant > 45); -} - -void vp9_setup_in_frame_q_adj(VP9_COMP *cpi) { - VP9_COMMON *const cm = &cpi->common; - struct segmentation *const seg = &cm->seg; - - // Make SURE use of floating point in this function is safe. - vp9_clear_system_state(); - - if (cm->frame_type == KEY_FRAME || - cpi->refresh_alt_ref_frame || - (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref)) { - int segment; - const int aq_strength = get_aq_c_strength(cm->base_qindex, cm->bit_depth); - const int active_segments = aq_c_active_segments[aq_strength]; - - // Clear down the segment map. - vpx_memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols); - - // Clear down the complexity map used for rd. - vpx_memset(cpi->complexity_map, 0, cm->mi_rows * cm->mi_cols); - - vp9_clearall_segfeatures(seg); - - // Segmentation only makes sense if the target bits per SB is above a - // threshold. Below this the overheads will usually outweigh any benefit. - if (cpi->rc.sb64_target_rate < 256) { - vp9_disable_segmentation(seg); - return; - } - - vp9_enable_segmentation(seg); - - // Select delta coding method. - seg->abs_delta = SEGMENT_DELTADATA; - - // Segment 0 "Q" feature is disabled so it defaults to the baseline Q. - vp9_disable_segfeature(seg, 0, SEG_LVL_ALT_Q); - - // Use some of the segments for in frame Q adjustment. - for (segment = 1; segment < active_segments; ++segment) { - int qindex_delta = - vp9_compute_qdelta_by_rate(&cpi->rc, cm->frame_type, cm->base_qindex, - aq_c_q_adj_factor[aq_strength][segment], - cm->bit_depth); - - // For AQ complexity mode, we dont allow Q0 in a segment if the base - // Q is not 0. Q0 (lossless) implies 4x4 only and in AQ mode 2 a segment - // Q delta is sometimes applied without going back around the rd loop. - // This could lead to an illegal combination of partition size and q. - if ((cm->base_qindex != 0) && ((cm->base_qindex + qindex_delta) == 0)) { - qindex_delta = -cm->base_qindex + 1; - } - if ((cm->base_qindex + qindex_delta) > 0) { - vp9_enable_segfeature(seg, segment, SEG_LVL_ALT_Q); - vp9_set_segdata(seg, segment, SEG_LVL_ALT_Q, qindex_delta); - } - } - } -} - -// Select a segment for the current SB64 block. -// The choice of segment for a block depends on the ratio of the projected -// bits for the block vs a target average. -// An "aq_strength" value determines how many segments are supported, -// the set of transition points to use and the extent of the quantizer -// adjustment for each segment (configured in vp9_setup_in_frame_q_adj()). -void vp9_select_in_frame_q_segment(VP9_COMP *cpi, - int mi_row, int mi_col, - int output_enabled, int projected_rate) { - VP9_COMMON *const cm = &cpi->common; - - const int mi_offset = mi_row * cm->mi_cols + mi_col; - const int bw = num_8x8_blocks_wide_lookup[BLOCK_64X64]; - const int bh = num_8x8_blocks_high_lookup[BLOCK_64X64]; - const int xmis = MIN(cm->mi_cols - mi_col, bw); - const int ymis = MIN(cm->mi_rows - mi_row, bh); - int complexity_metric = 64; - int x, y; - - unsigned char segment; - - if (!output_enabled) { - segment = 0; - } else { - // Rate depends on fraction of a SB64 in frame (xmis * ymis / bw * bh). - // It is converted to bits * 256 units. - const int target_rate = (cpi->rc.sb64_target_rate * xmis * ymis * 256) / - (bw * bh); - const int aq_strength = get_aq_c_strength(cm->base_qindex, cm->bit_depth); - const int active_segments = aq_c_active_segments[aq_strength]; - - // The number of segments considered and the transition points used to - // select them is determined by the "aq_strength" value. - // Currently this loop only supports segments that reduce Q (i.e. where - // there is undershoot. - // The loop counts down towards segment 0 which is the default segment - // with no Q adjustment. - segment = active_segments - 1; - while (segment > 0) { - if (projected_rate < - (target_rate * aq_c_transitions[aq_strength][segment])) { - break; - } - --segment; - } - - if (target_rate > 0) { - complexity_metric = - clamp((int)((projected_rate * 64) / target_rate), 16, 255); - } - } - - // Fill in the entires in the segment map corresponding to this SB64. - for (y = 0; y < ymis; y++) { - for (x = 0; x < xmis; x++) { - cpi->segmentation_map[mi_offset + y * cm->mi_cols + x] = segment; - cpi->complexity_map[mi_offset + y * cm->mi_cols + x] = - (unsigned char)complexity_metric; - } - } -} diff --git a/media/libvpx/vp9/encoder/vp9_aq_complexity.h b/media/libvpx/vp9/encoder/vp9_aq_complexity.h deleted file mode 100644 index af031a46c6c..00000000000 --- a/media/libvpx/vp9/encoder/vp9_aq_complexity.h +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP9_ENCODER_VP9_AQ_COMPLEXITY_H_ -#define VP9_ENCODER_VP9_AQ_COMPLEXITY_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -struct VP9_COMP; - -// Select a segment for the current SB64. -void vp9_select_in_frame_q_segment(struct VP9_COMP *cpi, int mi_row, int mi_col, - int output_enabled, int projected_rate); - - -// This function sets up a set of segments with delta Q values around -// the baseline frame quantizer. -void vp9_setup_in_frame_q_adj(struct VP9_COMP *cpi); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_ENCODER_VP9_AQ_COMPLEXITY_H_ diff --git a/media/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c b/media/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c deleted file mode 100644 index 514ff7a52ad..00000000000 --- a/media/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c +++ /dev/null @@ -1,324 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include -#include - -#include "vp9/encoder/vp9_aq_cyclicrefresh.h" - -#include "vp9/common/vp9_seg_common.h" - -#include "vp9/encoder/vp9_ratectrl.h" -#include "vp9/encoder/vp9_segmentation.h" - -struct CYCLIC_REFRESH { - // Percentage of super-blocks per frame that are targeted as candidates - // for cyclic refresh. - int max_sbs_perframe; - // Maximum q-delta as percentage of base q. - int max_qdelta_perc; - // Block size below which we don't apply cyclic refresh. - BLOCK_SIZE min_block_size; - // Superblock starting index for cycling through the frame. - int sb_index; - // Controls how long a block will need to wait to be refreshed again. - int time_for_refresh; - // Actual number of (8x8) blocks that were applied delta-q (segment 1). - int num_seg_blocks; - // Actual encoding bits for segment 1. - int actual_seg_bits; - // RD mult. parameters for segment 1. - int rdmult; - // Cyclic refresh map. - signed char *map; - // Projected rate and distortion for the current superblock. - int64_t projected_rate_sb; - int64_t projected_dist_sb; - // Thresholds applied to projected rate/distortion of the superblock. - int64_t thresh_rate_sb; - int64_t thresh_dist_sb; -}; - -CYCLIC_REFRESH *vp9_cyclic_refresh_alloc(int mi_rows, int mi_cols) { - CYCLIC_REFRESH *const cr = vpx_calloc(1, sizeof(*cr)); - if (cr == NULL) - return NULL; - - cr->map = vpx_calloc(mi_rows * mi_cols, sizeof(*cr->map)); - if (cr->map == NULL) { - vpx_free(cr); - return NULL; - } - - return cr; -} - -void vp9_cyclic_refresh_free(CYCLIC_REFRESH *cr) { - vpx_free(cr->map); - vpx_free(cr); -} - -// Check if we should turn off cyclic refresh based on bitrate condition. -static int apply_cyclic_refresh_bitrate(const VP9_COMMON *cm, - const RATE_CONTROL *rc) { - // Turn off cyclic refresh if bits available per frame is not sufficiently - // larger than bit cost of segmentation. Segment map bit cost should scale - // with number of seg blocks, so compare available bits to number of blocks. - // Average bits available per frame = avg_frame_bandwidth - // Number of (8x8) blocks in frame = mi_rows * mi_cols; - const float factor = 0.5; - const int number_blocks = cm->mi_rows * cm->mi_cols; - // The condition below corresponds to turning off at target bitrates: - // ~24kbps for CIF, 72kbps for VGA (at 30fps). - // Also turn off at very small frame sizes, to avoid too large fraction of - // superblocks to be refreshed per frame. Threshold below is less than QCIF. - if (rc->avg_frame_bandwidth < factor * number_blocks || - number_blocks / 64 < 5) - return 0; - else - return 1; -} - -// Check if this coding block, of size bsize, should be considered for refresh -// (lower-qp coding). Decision can be based on various factors, such as -// size of the coding block (i.e., below min_block size rejected), coding -// mode, and rate/distortion. -static int candidate_refresh_aq(const CYCLIC_REFRESH *cr, - const MB_MODE_INFO *mbmi, - BLOCK_SIZE bsize, int use_rd) { - if (use_rd) { - // If projected rate is below the thresh_rate (well below target, - // so undershoot expected), accept it for lower-qp coding. - if (cr->projected_rate_sb < cr->thresh_rate_sb) - return 1; - // Otherwise, reject the block for lower-qp coding if any of the following: - // 1) prediction block size is below min_block_size - // 2) mode is non-zero mv and projected distortion is above thresh_dist - // 3) mode is an intra-mode (we may want to allow some of this under - // another thresh_dist) - else if (bsize < cr->min_block_size || - (mbmi->mv[0].as_int != 0 && - cr->projected_dist_sb > cr->thresh_dist_sb) || - !is_inter_block(mbmi)) - return 0; - else - return 1; - } else { - // Rate/distortion not used for update. - if (bsize < cr->min_block_size || - mbmi->mv[0].as_int != 0 || - !is_inter_block(mbmi)) - return 0; - else - return 1; - } -} - -// Prior to coding a given prediction block, of size bsize at (mi_row, mi_col), -// check if we should reset the segment_id, and update the cyclic_refresh map -// and segmentation map. -void vp9_cyclic_refresh_update_segment(VP9_COMP *const cpi, - MB_MODE_INFO *const mbmi, - int mi_row, int mi_col, - BLOCK_SIZE bsize, int use_rd) { - const VP9_COMMON *const cm = &cpi->common; - CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; - const int bw = num_8x8_blocks_wide_lookup[bsize]; - const int bh = num_8x8_blocks_high_lookup[bsize]; - const int xmis = MIN(cm->mi_cols - mi_col, bw); - const int ymis = MIN(cm->mi_rows - mi_row, bh); - const int block_index = mi_row * cm->mi_cols + mi_col; - const int refresh_this_block = cpi->mb.in_static_area || - candidate_refresh_aq(cr, mbmi, bsize, use_rd); - // Default is to not update the refresh map. - int new_map_value = cr->map[block_index]; - int x = 0; int y = 0; - - // Check if we should reset the segment_id for this block. - if (mbmi->segment_id > 0 && !refresh_this_block) - mbmi->segment_id = 0; - - // Update the cyclic refresh map, to be used for setting segmentation map - // for the next frame. If the block will be refreshed this frame, mark it - // as clean. The magnitude of the -ve influences how long before we consider - // it for refresh again. - if (mbmi->segment_id == 1) { - new_map_value = -cr->time_for_refresh; - } else if (refresh_this_block) { - // Else if it is accepted as candidate for refresh, and has not already - // been refreshed (marked as 1) then mark it as a candidate for cleanup - // for future time (marked as 0), otherwise don't update it. - if (cr->map[block_index] == 1) - new_map_value = 0; - } else { - // Leave it marked as block that is not candidate for refresh. - new_map_value = 1; - } - // Update entries in the cyclic refresh map with new_map_value, and - // copy mbmi->segment_id into global segmentation map. - for (y = 0; y < ymis; y++) - for (x = 0; x < xmis; x++) { - cr->map[block_index + y * cm->mi_cols + x] = new_map_value; - cpi->segmentation_map[block_index + y * cm->mi_cols + x] = - mbmi->segment_id; - } - // Keep track of actual number (in units of 8x8) of blocks in segment 1 used - // for encoding this frame. - if (mbmi->segment_id) - cr->num_seg_blocks += xmis * ymis; -} - -// Setup cyclic background refresh: set delta q and segmentation map. -void vp9_cyclic_refresh_setup(VP9_COMP *const cpi) { - VP9_COMMON *const cm = &cpi->common; - const RATE_CONTROL *const rc = &cpi->rc; - CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; - struct segmentation *const seg = &cm->seg; - unsigned char *const seg_map = cpi->segmentation_map; - const int apply_cyclic_refresh = apply_cyclic_refresh_bitrate(cm, rc); - // Don't apply refresh on key frame or enhancement layer frames. - if (!apply_cyclic_refresh || - (cm->frame_type == KEY_FRAME) || - (cpi->svc.temporal_layer_id > 0)) { - // Set segmentation map to 0 and disable. - vpx_memset(seg_map, 0, cm->mi_rows * cm->mi_cols); - vp9_disable_segmentation(&cm->seg); - if (cm->frame_type == KEY_FRAME) - cr->sb_index = 0; - return; - } else { - int qindex_delta = 0; - int i, block_count, bl_index, sb_rows, sb_cols, sbs_in_frame; - int xmis, ymis, x, y, qindex2; - - // Rate target ratio to set q delta. - const float rate_ratio_qdelta = 2.0; - const double q = vp9_convert_qindex_to_q(cm->base_qindex, cm->bit_depth); - vp9_clear_system_state(); - // Some of these parameters may be set via codec-control function later. - cr->max_sbs_perframe = 10; - cr->max_qdelta_perc = 50; - cr->min_block_size = BLOCK_8X8; - cr->time_for_refresh = 1; - // Set rate threshold to some fraction of target (and scaled by 256). - cr->thresh_rate_sb = (rc->sb64_target_rate * 256) >> 2; - // Distortion threshold, quadratic in Q, scale factor to be adjusted. - cr->thresh_dist_sb = 8 * (int)(q * q); - if (cpi->sf.use_nonrd_pick_mode) { - // May want to be more conservative with thresholds in non-rd mode for now - // as rate/distortion are derived from model based on prediction residual. - cr->thresh_rate_sb = (rc->sb64_target_rate * 256) >> 3; - cr->thresh_dist_sb = 4 * (int)(q * q); - } - - cr->num_seg_blocks = 0; - // Set up segmentation. - // Clear down the segment map. - vpx_memset(seg_map, 0, cm->mi_rows * cm->mi_cols); - vp9_enable_segmentation(&cm->seg); - vp9_clearall_segfeatures(seg); - // Select delta coding method. - seg->abs_delta = SEGMENT_DELTADATA; - - // Note: setting temporal_update has no effect, as the seg-map coding method - // (temporal or spatial) is determined in vp9_choose_segmap_coding_method(), - // based on the coding cost of each method. For error_resilient mode on the - // last_frame_seg_map is set to 0, so if temporal coding is used, it is - // relative to 0 previous map. - // seg->temporal_update = 0; - - // Segment 0 "Q" feature is disabled so it defaults to the baseline Q. - vp9_disable_segfeature(seg, 0, SEG_LVL_ALT_Q); - // Use segment 1 for in-frame Q adjustment. - vp9_enable_segfeature(seg, 1, SEG_LVL_ALT_Q); - - // Set the q delta for segment 1. - qindex_delta = vp9_compute_qdelta_by_rate(rc, cm->frame_type, - cm->base_qindex, - rate_ratio_qdelta, - cm->bit_depth); - // TODO(marpan): Incorporate the actual-vs-target rate over/undershoot from - // previous encoded frame. - if (-qindex_delta > cr->max_qdelta_perc * cm->base_qindex / 100) - qindex_delta = -cr->max_qdelta_perc * cm->base_qindex / 100; - - // Compute rd-mult for segment 1. - qindex2 = clamp(cm->base_qindex + cm->y_dc_delta_q + qindex_delta, 0, MAXQ); - cr->rdmult = vp9_compute_rd_mult(cpi, qindex2); - - vp9_set_segdata(seg, 1, SEG_LVL_ALT_Q, qindex_delta); - - sb_cols = (cm->mi_cols + MI_BLOCK_SIZE - 1) / MI_BLOCK_SIZE; - sb_rows = (cm->mi_rows + MI_BLOCK_SIZE - 1) / MI_BLOCK_SIZE; - sbs_in_frame = sb_cols * sb_rows; - // Number of target superblocks to get the q delta (segment 1). - block_count = cr->max_sbs_perframe * sbs_in_frame / 100; - // Set the segmentation map: cycle through the superblocks, starting at - // cr->mb_index, and stopping when either block_count blocks have been found - // to be refreshed, or we have passed through whole frame. - assert(cr->sb_index < sbs_in_frame); - i = cr->sb_index; - do { - int sum_map = 0; - // Get the mi_row/mi_col corresponding to superblock index i. - int sb_row_index = (i / sb_cols); - int sb_col_index = i - sb_row_index * sb_cols; - int mi_row = sb_row_index * MI_BLOCK_SIZE; - int mi_col = sb_col_index * MI_BLOCK_SIZE; - assert(mi_row >= 0 && mi_row < cm->mi_rows); - assert(mi_col >= 0 && mi_col < cm->mi_cols); - bl_index = mi_row * cm->mi_cols + mi_col; - // Loop through all 8x8 blocks in superblock and update map. - xmis = MIN(cm->mi_cols - mi_col, - num_8x8_blocks_wide_lookup[BLOCK_64X64]); - ymis = MIN(cm->mi_rows - mi_row, - num_8x8_blocks_high_lookup[BLOCK_64X64]); - for (y = 0; y < ymis; y++) { - for (x = 0; x < xmis; x++) { - const int bl_index2 = bl_index + y * cm->mi_cols + x; - // If the block is as a candidate for clean up then mark it - // for possible boost/refresh (segment 1). The segment id may get - // reset to 0 later if block gets coded anything other than ZEROMV. - if (cr->map[bl_index2] == 0) { - seg_map[bl_index2] = 1; - sum_map++; - } else if (cr->map[bl_index2] < 0) { - cr->map[bl_index2]++; - } - } - } - // Enforce constant segment over superblock. - // If segment is partial over superblock, reset to either all 1 or 0. - if (sum_map > 0 && sum_map < xmis * ymis) { - const int new_value = (sum_map >= xmis * ymis / 2); - for (y = 0; y < ymis; y++) - for (x = 0; x < xmis; x++) - seg_map[bl_index + y * cm->mi_cols + x] = new_value; - } - i++; - if (i == sbs_in_frame) { - i = 0; - } - if (sum_map >= xmis * ymis /2) - block_count--; - } while (block_count && i != cr->sb_index); - cr->sb_index = i; - } -} - -void vp9_cyclic_refresh_set_rate_and_dist_sb(CYCLIC_REFRESH *cr, - int64_t rate_sb, int64_t dist_sb) { - cr->projected_rate_sb = rate_sb; - cr->projected_dist_sb = dist_sb; -} - -int vp9_cyclic_refresh_get_rdmult(const CYCLIC_REFRESH *cr) { - return cr->rdmult; -} diff --git a/media/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.h b/media/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.h deleted file mode 100644 index f556d658bdc..00000000000 --- a/media/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP9_ENCODER_VP9_AQ_CYCLICREFRESH_H_ -#define VP9_ENCODER_VP9_AQ_CYCLICREFRESH_H_ - -#include "vp9/common/vp9_blockd.h" - -#ifdef __cplusplus -extern "C" { -#endif - -struct VP9_COMP; - -struct CYCLIC_REFRESH; -typedef struct CYCLIC_REFRESH CYCLIC_REFRESH; - -CYCLIC_REFRESH *vp9_cyclic_refresh_alloc(int mi_rows, int mi_cols); - -void vp9_cyclic_refresh_free(CYCLIC_REFRESH *cr); - -// Prior to coding a given prediction block, of size bsize at (mi_row, mi_col), -// check if we should reset the segment_id, and update the cyclic_refresh map -// and segmentation map. -void vp9_cyclic_refresh_update_segment(struct VP9_COMP *const cpi, - MB_MODE_INFO *const mbmi, - int mi_row, int mi_col, - BLOCK_SIZE bsize, int use_rd); - -// Setup cyclic background refresh: set delta q and segmentation map. -void vp9_cyclic_refresh_setup(struct VP9_COMP *const cpi); - -void vp9_cyclic_refresh_set_rate_and_dist_sb(CYCLIC_REFRESH *cr, - int64_t rate_sb, int64_t dist_sb); - -int vp9_cyclic_refresh_get_rdmult(const CYCLIC_REFRESH *cr); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_ENCODER_VP9_AQ_CYCLICREFRESH_H_ diff --git a/media/libvpx/vp9/encoder/vp9_aq_variance.c b/media/libvpx/vp9/encoder/vp9_aq_variance.c deleted file mode 100644 index b96f00fd19c..00000000000 --- a/media/libvpx/vp9/encoder/vp9_aq_variance.c +++ /dev/null @@ -1,149 +0,0 @@ -/* - * Copyright (c) 2013 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include - -#include "vp9/encoder/vp9_aq_variance.h" - -#include "vp9/common/vp9_seg_common.h" - -#include "vp9/encoder/vp9_ratectrl.h" -#include "vp9/encoder/vp9_rd.h" -#include "vp9/encoder/vp9_segmentation.h" -#include "vp9/common/vp9_systemdependent.h" - -#define ENERGY_MIN (-1) -#define ENERGY_MAX (1) -#define ENERGY_SPAN (ENERGY_MAX - ENERGY_MIN + 1) -#define ENERGY_IN_BOUNDS(energy)\ - assert((energy) >= ENERGY_MIN && (energy) <= ENERGY_MAX) - -static double q_ratio[MAX_SEGMENTS] = { 1, 1, 1, 1, 1, 1, 1, 1 }; -static double rdmult_ratio[MAX_SEGMENTS] = { 1, 1, 1, 1, 1, 1, 1, 1 }; -static int segment_id[MAX_SEGMENTS] = { 5, 3, 1, 0, 2, 4, 6, 7 }; - -#define Q_RATIO(i) q_ratio[(i) - ENERGY_MIN] -#define RDMULT_RATIO(i) rdmult_ratio[(i) - ENERGY_MIN] -#define SEGMENT_ID(i) segment_id[(i) - ENERGY_MIN] - -DECLARE_ALIGNED(16, static const uint8_t, vp9_64_zeros[64]) = {0}; - -unsigned int vp9_vaq_segment_id(int energy) { - ENERGY_IN_BOUNDS(energy); - - return SEGMENT_ID(energy); -} - -double vp9_vaq_rdmult_ratio(int energy) { - ENERGY_IN_BOUNDS(energy); - - vp9_clear_system_state(); - - return RDMULT_RATIO(energy); -} - -double vp9_vaq_inv_q_ratio(int energy) { - ENERGY_IN_BOUNDS(energy); - - vp9_clear_system_state(); - - return Q_RATIO(-energy); -} - -void vp9_vaq_init() { - int i; - double base_ratio; - - assert(ENERGY_SPAN <= MAX_SEGMENTS); - - vp9_clear_system_state(); - - base_ratio = 1.5; - - for (i = ENERGY_MIN; i <= ENERGY_MAX; i++) { - Q_RATIO(i) = pow(base_ratio, i/3.0); - } -} - -void vp9_vaq_frame_setup(VP9_COMP *cpi) { - VP9_COMMON *cm = &cpi->common; - struct segmentation *seg = &cm->seg; - const double base_q = vp9_convert_qindex_to_q(cm->base_qindex, cm->bit_depth); - const int base_rdmult = vp9_compute_rd_mult(cpi, cm->base_qindex + - cm->y_dc_delta_q); - int i; - - if (cm->frame_type == KEY_FRAME || - cpi->refresh_alt_ref_frame || - (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref)) { - vp9_enable_segmentation(seg); - vp9_clearall_segfeatures(seg); - - seg->abs_delta = SEGMENT_DELTADATA; - - vp9_clear_system_state(); - - for (i = ENERGY_MIN; i <= ENERGY_MAX; i++) { - int qindex_delta, segment_rdmult; - - if (Q_RATIO(i) == 1) { - // No need to enable SEG_LVL_ALT_Q for this segment - RDMULT_RATIO(i) = 1; - continue; - } - - qindex_delta = vp9_compute_qdelta(&cpi->rc, base_q, base_q * Q_RATIO(i), - cm->bit_depth); - vp9_set_segdata(seg, SEGMENT_ID(i), SEG_LVL_ALT_Q, qindex_delta); - vp9_enable_segfeature(seg, SEGMENT_ID(i), SEG_LVL_ALT_Q); - - segment_rdmult = vp9_compute_rd_mult(cpi, cm->base_qindex + qindex_delta + - cm->y_dc_delta_q); - - RDMULT_RATIO(i) = (double) segment_rdmult / base_rdmult; - } - } -} - - -static unsigned int block_variance(VP9_COMP *cpi, MACROBLOCK *x, - BLOCK_SIZE bs) { - MACROBLOCKD *xd = &x->e_mbd; - unsigned int var, sse; - int right_overflow = (xd->mb_to_right_edge < 0) ? - ((-xd->mb_to_right_edge) >> 3) : 0; - int bottom_overflow = (xd->mb_to_bottom_edge < 0) ? - ((-xd->mb_to_bottom_edge) >> 3) : 0; - - if (right_overflow || bottom_overflow) { - const int bw = 8 * num_8x8_blocks_wide_lookup[bs] - right_overflow; - const int bh = 8 * num_8x8_blocks_high_lookup[bs] - bottom_overflow; - int avg; - variance(x->plane[0].src.buf, x->plane[0].src.stride, - vp9_64_zeros, 0, bw, bh, &sse, &avg); - var = sse - (((int64_t)avg * avg) / (bw * bh)); - return (256 * var) / (bw * bh); - } else { - var = cpi->fn_ptr[bs].vf(x->plane[0].src.buf, - x->plane[0].src.stride, - vp9_64_zeros, 0, &sse); - return (256 * var) >> num_pels_log2_lookup[bs]; - } -} - -int vp9_block_energy(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs) { - double energy; - unsigned int var = block_variance(cpi, x, bs); - - vp9_clear_system_state(); - - energy = 0.9 * (log(var + 1.0) - 10.0); - return clamp((int)round(energy), ENERGY_MIN, ENERGY_MAX); -} diff --git a/media/libvpx/vp9/encoder/vp9_aq_variance.h b/media/libvpx/vp9/encoder/vp9_aq_variance.h deleted file mode 100644 index d1a459fe9ec..00000000000 --- a/media/libvpx/vp9/encoder/vp9_aq_variance.h +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright (c) 2013 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP9_ENCODER_VP9_AQ_VARIANCE_H_ -#define VP9_ENCODER_VP9_AQ_VARIANCE_H_ - -#include "vp9/encoder/vp9_encoder.h" - -#ifdef __cplusplus -extern "C" { -#endif - -unsigned int vp9_vaq_segment_id(int energy); -double vp9_vaq_rdmult_ratio(int energy); -double vp9_vaq_inv_q_ratio(int energy); - -void vp9_vaq_init(); -void vp9_vaq_frame_setup(VP9_COMP *cpi); - -int vp9_block_energy(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_ENCODER_VP9_AQ_VARIANCE_H_ diff --git a/media/libvpx/vp9/encoder/vp9_context_tree.c b/media/libvpx/vp9/encoder/vp9_context_tree.c deleted file mode 100644 index 12acc51143a..00000000000 --- a/media/libvpx/vp9/encoder/vp9_context_tree.c +++ /dev/null @@ -1,158 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "vp9/encoder/vp9_context_tree.h" -#include "vp9/encoder/vp9_encoder.h" - -static const BLOCK_SIZE square[] = { - BLOCK_8X8, - BLOCK_16X16, - BLOCK_32X32, - BLOCK_64X64, -}; - -static void alloc_mode_context(VP9_COMMON *cm, int num_4x4_blk, - PICK_MODE_CONTEXT *ctx) { - const int num_blk = (num_4x4_blk < 4 ? 4 : num_4x4_blk); - const int num_pix = num_blk << 4; - int i, k; - ctx->num_4x4_blk = num_blk; - - CHECK_MEM_ERROR(cm, ctx->zcoeff_blk, - vpx_calloc(num_4x4_blk, sizeof(uint8_t))); - for (i = 0; i < MAX_MB_PLANE; ++i) { - for (k = 0; k < 3; ++k) { - CHECK_MEM_ERROR(cm, ctx->coeff[i][k], - vpx_memalign(16, num_pix * sizeof(*ctx->coeff[i][k]))); - CHECK_MEM_ERROR(cm, ctx->qcoeff[i][k], - vpx_memalign(16, num_pix * sizeof(*ctx->qcoeff[i][k]))); - CHECK_MEM_ERROR(cm, ctx->dqcoeff[i][k], - vpx_memalign(16, num_pix * sizeof(*ctx->dqcoeff[i][k]))); - CHECK_MEM_ERROR(cm, ctx->eobs[i][k], - vpx_memalign(16, num_pix * sizeof(*ctx->eobs[i][k]))); - ctx->coeff_pbuf[i][k] = ctx->coeff[i][k]; - ctx->qcoeff_pbuf[i][k] = ctx->qcoeff[i][k]; - ctx->dqcoeff_pbuf[i][k] = ctx->dqcoeff[i][k]; - ctx->eobs_pbuf[i][k] = ctx->eobs[i][k]; - } - } -} - -static void free_mode_context(PICK_MODE_CONTEXT *ctx) { - int i, k; - vpx_free(ctx->zcoeff_blk); - ctx->zcoeff_blk = 0; - for (i = 0; i < MAX_MB_PLANE; ++i) { - for (k = 0; k < 3; ++k) { - vpx_free(ctx->coeff[i][k]); - ctx->coeff[i][k] = 0; - vpx_free(ctx->qcoeff[i][k]); - ctx->qcoeff[i][k] = 0; - vpx_free(ctx->dqcoeff[i][k]); - ctx->dqcoeff[i][k] = 0; - vpx_free(ctx->eobs[i][k]); - ctx->eobs[i][k] = 0; - } - } -} - -static void alloc_tree_contexts(VP9_COMMON *cm, PC_TREE *tree, - int num_4x4_blk) { - alloc_mode_context(cm, num_4x4_blk, &tree->none); - alloc_mode_context(cm, num_4x4_blk/2, &tree->horizontal[0]); - alloc_mode_context(cm, num_4x4_blk/2, &tree->vertical[0]); - - /* TODO(Jbb): for 4x8 and 8x4 these allocated values are not used. - * Figure out a better way to do this. */ - alloc_mode_context(cm, num_4x4_blk/2, &tree->horizontal[1]); - alloc_mode_context(cm, num_4x4_blk/2, &tree->vertical[1]); -} - -static void free_tree_contexts(PC_TREE *tree) { - free_mode_context(&tree->none); - free_mode_context(&tree->horizontal[0]); - free_mode_context(&tree->horizontal[1]); - free_mode_context(&tree->vertical[0]); - free_mode_context(&tree->vertical[1]); -} - -// This function sets up a tree of contexts such that at each square -// partition level. There are contexts for none, horizontal, vertical, and -// split. Along with a block_size value and a selected block_size which -// represents the state of our search. -void vp9_setup_pc_tree(VP9_COMMON *cm, VP9_COMP *cpi) { - int i, j; - const int leaf_nodes = 64; - const int tree_nodes = 64 + 16 + 4 + 1; - int pc_tree_index = 0; - PC_TREE *this_pc; - PICK_MODE_CONTEXT *this_leaf; - int square_index = 1; - int nodes; - - vpx_free(cpi->leaf_tree); - CHECK_MEM_ERROR(cm, cpi->leaf_tree, vpx_calloc(leaf_nodes, - sizeof(*cpi->leaf_tree))); - vpx_free(cpi->pc_tree); - CHECK_MEM_ERROR(cm, cpi->pc_tree, vpx_calloc(tree_nodes, - sizeof(*cpi->pc_tree))); - - this_pc = &cpi->pc_tree[0]; - this_leaf = &cpi->leaf_tree[0]; - - // 4x4 blocks smaller than 8x8 but in the same 8x8 block share the same - // context so we only need to allocate 1 for each 8x8 block. - for (i = 0; i < leaf_nodes; ++i) - alloc_mode_context(cm, 1, &cpi->leaf_tree[i]); - - // Sets up all the leaf nodes in the tree. - for (pc_tree_index = 0; pc_tree_index < leaf_nodes; ++pc_tree_index) { - PC_TREE *const tree = &cpi->pc_tree[pc_tree_index]; - tree->block_size = square[0]; - alloc_tree_contexts(cm, tree, 4); - tree->leaf_split[0] = this_leaf++; - for (j = 1; j < 4; j++) - tree->leaf_split[j] = tree->leaf_split[0]; - } - - // Each node has 4 leaf nodes, fill each block_size level of the tree - // from leafs to the root. - for (nodes = 16; nodes > 0; nodes >>= 2) { - for (i = 0; i < nodes; ++i) { - PC_TREE *const tree = &cpi->pc_tree[pc_tree_index]; - alloc_tree_contexts(cm, tree, 4 << (2 * square_index)); - tree->block_size = square[square_index]; - for (j = 0; j < 4; j++) - tree->split[j] = this_pc++; - ++pc_tree_index; - } - ++square_index; - } - cpi->pc_root = &cpi->pc_tree[tree_nodes - 1]; - cpi->pc_root[0].none.best_mode_index = 2; -} - -void vp9_free_pc_tree(VP9_COMP *cpi) { - const int tree_nodes = 64 + 16 + 4 + 1; - int i; - - // Set up all 4x4 mode contexts - for (i = 0; i < 64; ++i) - free_mode_context(&cpi->leaf_tree[i]); - - // Sets up all the leaf nodes in the tree. - for (i = 0; i < tree_nodes; ++i) - free_tree_contexts(&cpi->pc_tree[i]); - - vpx_free(cpi->pc_tree); - cpi->pc_tree = NULL; - vpx_free(cpi->leaf_tree); - cpi->leaf_tree = NULL; -} diff --git a/media/libvpx/vp9/encoder/vp9_context_tree.h b/media/libvpx/vp9/encoder/vp9_context_tree.h deleted file mode 100644 index 97f07414813..00000000000 --- a/media/libvpx/vp9/encoder/vp9_context_tree.h +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_ENCODER_VP9_CONTEXT_TREE_H_ -#define VP9_ENCODER_VP9_CONTEXT_TREE_H_ - -#include "vp9/common/vp9_onyxc_int.h" - -struct VP9_COMP; - -// Structure to hold snapshot of coding context during the mode picking process -typedef struct { - MODE_INFO mic; - uint8_t *zcoeff_blk; - tran_low_t *coeff[MAX_MB_PLANE][3]; - tran_low_t *qcoeff[MAX_MB_PLANE][3]; - tran_low_t *dqcoeff[MAX_MB_PLANE][3]; - uint16_t *eobs[MAX_MB_PLANE][3]; - - // dual buffer pointers, 0: in use, 1: best in store - tran_low_t *coeff_pbuf[MAX_MB_PLANE][3]; - tran_low_t *qcoeff_pbuf[MAX_MB_PLANE][3]; - tran_low_t *dqcoeff_pbuf[MAX_MB_PLANE][3]; - uint16_t *eobs_pbuf[MAX_MB_PLANE][3]; - - int is_coded; - int num_4x4_blk; - int skip; - // For current partition, only if all Y, U, and V transform blocks' - // coefficients are quantized to 0, skippable is set to 0. - int skippable; - uint8_t skip_txfm[MAX_MB_PLANE << 2]; - int best_mode_index; - int hybrid_pred_diff; - int comp_pred_diff; - int single_pred_diff; - int64_t tx_rd_diff[TX_MODES]; - int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS]; - -#if CONFIG_VP9_TEMPORAL_DENOISING - unsigned int newmv_sse; - unsigned int zeromv_sse; - PREDICTION_MODE best_sse_inter_mode; - int_mv best_sse_mv; - MV_REFERENCE_FRAME best_reference_frame; - MV_REFERENCE_FRAME best_zeromv_reference_frame; -#endif - - // motion vector cache for adaptive motion search control in partition - // search loop - MV pred_mv[MAX_REF_FRAMES]; - INTERP_FILTER pred_interp_filter; -} PICK_MODE_CONTEXT; - -typedef struct PC_TREE { - int index; - PARTITION_TYPE partitioning; - BLOCK_SIZE block_size; - PICK_MODE_CONTEXT none; - PICK_MODE_CONTEXT horizontal[2]; - PICK_MODE_CONTEXT vertical[2]; - union { - struct PC_TREE *split[4]; - PICK_MODE_CONTEXT *leaf_split[4]; - }; -} PC_TREE; - -void vp9_setup_pc_tree(struct VP9Common *cm, struct VP9_COMP *cpi); -void vp9_free_pc_tree(struct VP9_COMP *cpi); - -#endif /* VP9_ENCODER_VP9_CONTEXT_TREE_H_ */ diff --git a/media/libvpx/vp9/encoder/vp9_cost.c b/media/libvpx/vp9/encoder/vp9_cost.c deleted file mode 100644 index 1c3c3d24847..00000000000 --- a/media/libvpx/vp9/encoder/vp9_cost.c +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "vp9/encoder/vp9_cost.h" - -const unsigned int vp9_prob_cost[256] = { - 2047, 2047, 1791, 1641, 1535, 1452, 1385, 1328, 1279, 1235, 1196, 1161, - 1129, 1099, 1072, 1046, 1023, 1000, 979, 959, 940, 922, 905, 889, - 873, 858, 843, 829, 816, 803, 790, 778, 767, 755, 744, 733, - 723, 713, 703, 693, 684, 675, 666, 657, 649, 641, 633, 625, - 617, 609, 602, 594, 587, 580, 573, 567, 560, 553, 547, 541, - 534, 528, 522, 516, 511, 505, 499, 494, 488, 483, 477, 472, - 467, 462, 457, 452, 447, 442, 437, 433, 428, 424, 419, 415, - 410, 406, 401, 397, 393, 389, 385, 381, 377, 373, 369, 365, - 361, 357, 353, 349, 346, 342, 338, 335, 331, 328, 324, 321, - 317, 314, 311, 307, 304, 301, 297, 294, 291, 288, 285, 281, - 278, 275, 272, 269, 266, 263, 260, 257, 255, 252, 249, 246, - 243, 240, 238, 235, 232, 229, 227, 224, 221, 219, 216, 214, - 211, 208, 206, 203, 201, 198, 196, 194, 191, 189, 186, 184, - 181, 179, 177, 174, 172, 170, 168, 165, 163, 161, 159, 156, - 154, 152, 150, 148, 145, 143, 141, 139, 137, 135, 133, 131, - 129, 127, 125, 123, 121, 119, 117, 115, 113, 111, 109, 107, - 105, 103, 101, 99, 97, 95, 93, 92, 90, 88, 86, 84, - 82, 81, 79, 77, 75, 73, 72, 70, 68, 66, 65, 63, - 61, 60, 58, 56, 55, 53, 51, 50, 48, 46, 45, 43, - 41, 40, 38, 37, 35, 33, 32, 30, 29, 27, 25, 24, - 22, 21, 19, 18, 16, 15, 13, 12, 10, 9, 7, 6, - 4, 3, 1, 1}; - -static void cost(int *costs, vp9_tree tree, const vp9_prob *probs, - int i, int c) { - const vp9_prob prob = probs[i / 2]; - int b; - - for (b = 0; b <= 1; ++b) { - const int cc = c + vp9_cost_bit(prob, b); - const vp9_tree_index ii = tree[i + b]; - - if (ii <= 0) - costs[-ii] = cc; - else - cost(costs, tree, probs, ii, cc); - } -} - -void vp9_cost_tokens(int *costs, const vp9_prob *probs, vp9_tree tree) { - cost(costs, tree, probs, 0, 0); -} - -void vp9_cost_tokens_skip(int *costs, const vp9_prob *probs, vp9_tree tree) { - assert(tree[0] <= 0 && tree[1] > 0); - - costs[-tree[0]] = vp9_cost_bit(probs[0], 0); - cost(costs, tree, probs, 2, 0); -} diff --git a/media/libvpx/vp9/encoder/vp9_cost.h b/media/libvpx/vp9/encoder/vp9_cost.h deleted file mode 100644 index 6d2b9400d7e..00000000000 --- a/media/libvpx/vp9/encoder/vp9_cost.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_ENCODER_VP9_COST_H_ -#define VP9_ENCODER_VP9_COST_H_ - -#include "vp9/common/vp9_prob.h" - -#ifdef __cplusplus -extern "C" { -#endif - -extern const unsigned int vp9_prob_cost[256]; - -#define vp9_cost_zero(prob) (vp9_prob_cost[prob]) - -#define vp9_cost_one(prob) vp9_cost_zero(vp9_complement(prob)) - -#define vp9_cost_bit(prob, bit) vp9_cost_zero((bit) ? vp9_complement(prob) \ - : (prob)) - -static INLINE unsigned int cost_branch256(const unsigned int ct[2], - vp9_prob p) { - return ct[0] * vp9_cost_zero(p) + ct[1] * vp9_cost_one(p); -} - -static INLINE int treed_cost(vp9_tree tree, const vp9_prob *probs, - int bits, int len) { - int cost = 0; - vp9_tree_index i = 0; - - do { - const int bit = (bits >> --len) & 1; - cost += vp9_cost_bit(probs[i >> 1], bit); - i = tree[i + bit]; - } while (len); - - return cost; -} - -void vp9_cost_tokens(int *costs, const vp9_prob *probs, vp9_tree tree); -void vp9_cost_tokens_skip(int *costs, const vp9_prob *probs, vp9_tree tree); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_ENCODER_VP9_COST_H_ diff --git a/media/libvpx/vp9/encoder/vp9_denoiser.c b/media/libvpx/vp9/encoder/vp9_denoiser.c deleted file mode 100644 index 681b2a575b5..00000000000 --- a/media/libvpx/vp9/encoder/vp9_denoiser.c +++ /dev/null @@ -1,491 +0,0 @@ -/* - * Copyright (c) 2012 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include -#include -#include "vpx_scale/yv12config.h" -#include "vpx/vpx_integer.h" -#include "vp9/common/vp9_reconinter.h" -#include "vp9/encoder/vp9_context_tree.h" -#include "vp9/encoder/vp9_denoiser.h" - -/* The VP9 denoiser is a work-in-progress. It currently is only designed to work - * with speed 6, though it (inexplicably) seems to also work with speed 5 (one - * would need to modify the source code in vp9_pickmode.c and vp9_encoder.c to - * make the calls to the vp9_denoiser_* functions when in speed 5). - * - * The implementation is very similar to that of the VP8 denoiser. While - * choosing the motion vectors / reference frames, the denoiser is run, and if - * it did not modify the signal to much, the denoised block is copied to the - * signal. - */ - -#ifdef OUTPUT_YUV_DENOISED -static void make_grayscale(YV12_BUFFER_CONFIG *yuv); -#endif - -static const int widths[] = {4, 4, 8, 8, 8, 16, 16, 16, 32, 32, 32, 64, 64}; -static const int heights[] = {4, 8, 4, 8, 16, 8, 16, 32, 16, 32, 64, 32, 64}; - -static int absdiff_thresh(BLOCK_SIZE bs, int increase_denoising) { - (void)bs; - return 3 + (increase_denoising ? 1 : 0); -} - -static int delta_thresh(BLOCK_SIZE bs, int increase_denoising) { - (void)bs; - (void)increase_denoising; - return 4; -} - -static int noise_motion_thresh(BLOCK_SIZE bs, int increase_denoising) { - (void)bs; - (void)increase_denoising; - return 25 * 25; -} - -static unsigned int sse_thresh(BLOCK_SIZE bs, int increase_denoising) { - return widths[bs] * heights[bs] * (increase_denoising ? 60 : 40); -} - -static int sse_diff_thresh(BLOCK_SIZE bs, int increase_denoising, - int mv_row, int mv_col) { - if (mv_row * mv_row + mv_col * mv_col > - noise_motion_thresh(bs, increase_denoising)) { - return 0; - } else { - return widths[bs] * heights[bs] * 20; - } -} - -static int total_adj_strong_thresh(BLOCK_SIZE bs, int increase_denoising) { - return widths[bs] * heights[bs] * (increase_denoising ? 3 : 2); -} - -static int total_adj_weak_thresh(BLOCK_SIZE bs, int increase_denoising) { - return widths[bs] * heights[bs] * (increase_denoising ? 3 : 2); -} - -static VP9_DENOISER_DECISION denoiser_filter(const uint8_t *sig, int sig_stride, - const uint8_t *mc_avg, - int mc_avg_stride, - uint8_t *avg, int avg_stride, - int increase_denoising, - BLOCK_SIZE bs, - int motion_magnitude) { - int r, c; - const uint8_t *sig_start = sig; - const uint8_t *mc_avg_start = mc_avg; - uint8_t *avg_start = avg; - int diff, adj, absdiff, delta; - int adj_val[] = {3, 4, 6}; - int total_adj = 0; - int shift_inc = 1; - - // If motion_magnitude is small, making the denoiser more aggressive by - // increasing the adjustment for each level. Add another increment for - // blocks that are labeled for increase denoising. - if (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) { - if (increase_denoising) { - shift_inc = 2; - } - adj_val[0] += shift_inc; - adj_val[1] += shift_inc; - adj_val[2] += shift_inc; - } - - // First attempt to apply a strong temporal denoising filter. - for (r = 0; r < heights[bs]; ++r) { - for (c = 0; c < widths[bs]; ++c) { - diff = mc_avg[c] - sig[c]; - absdiff = abs(diff); - - if (absdiff <= absdiff_thresh(bs, increase_denoising)) { - avg[c] = mc_avg[c]; - total_adj += diff; - } else { - switch (absdiff) { - case 4: case 5: case 6: case 7: - adj = adj_val[0]; - break; - case 8: case 9: case 10: case 11: - case 12: case 13: case 14: case 15: - adj = adj_val[1]; - break; - default: - adj = adj_val[2]; - } - if (diff > 0) { - avg[c] = MIN(UINT8_MAX, sig[c] + adj); - total_adj += adj; - } else { - avg[c] = MAX(0, sig[c] - adj); - total_adj -= adj; - } - } - } - sig += sig_stride; - avg += avg_stride; - mc_avg += mc_avg_stride; - } - - // If the strong filter did not modify the signal too much, we're all set. - if (abs(total_adj) <= total_adj_strong_thresh(bs, increase_denoising)) { - return FILTER_BLOCK; - } - - // Otherwise, we try to dampen the filter if the delta is not too high. - delta = ((abs(total_adj) - total_adj_strong_thresh(bs, increase_denoising)) - >> 8) + 1; - - if (delta >= delta_thresh(bs, increase_denoising)) { - return COPY_BLOCK; - } - - mc_avg = mc_avg_start; - avg = avg_start; - sig = sig_start; - for (r = 0; r < heights[bs]; ++r) { - for (c = 0; c < widths[bs]; ++c) { - diff = mc_avg[c] - sig[c]; - adj = abs(diff); - if (adj > delta) { - adj = delta; - } - if (diff > 0) { - // Diff positive means we made positive adjustment above - // (in first try/attempt), so now make negative adjustment to bring - // denoised signal down. - avg[c] = MAX(0, avg[c] - adj); - total_adj -= adj; - } else { - // Diff negative means we made negative adjustment above - // (in first try/attempt), so now make positive adjustment to bring - // denoised signal up. - avg[c] = MIN(UINT8_MAX, avg[c] + adj); - total_adj += adj; - } - } - sig += sig_stride; - avg += avg_stride; - mc_avg += mc_avg_stride; - } - - // We can use the filter if it has been sufficiently dampened - if (abs(total_adj) <= total_adj_weak_thresh(bs, increase_denoising)) { - return FILTER_BLOCK; - } - return COPY_BLOCK; -} - -static uint8_t *block_start(uint8_t *framebuf, int stride, - int mi_row, int mi_col) { - return framebuf + (stride * mi_row * 8) + (mi_col * 8); -} - -static void copy_block(uint8_t *dest, int dest_stride, - const uint8_t *src, int src_stride, BLOCK_SIZE bs) { - int r; - for (r = 0; r < heights[bs]; ++r) { - vpx_memcpy(dest, src, widths[bs]); - dest += dest_stride; - src += src_stride; - } -} - -static VP9_DENOISER_DECISION perform_motion_compensation(VP9_DENOISER *denoiser, - MACROBLOCK *mb, - BLOCK_SIZE bs, - int increase_denoising, - int mi_row, - int mi_col, - PICK_MODE_CONTEXT *ctx, - int *motion_magnitude - ) { - int mv_col, mv_row; - int sse_diff = ctx->zeromv_sse - ctx->newmv_sse; - MV_REFERENCE_FRAME frame; - MACROBLOCKD *filter_mbd = &mb->e_mbd; - MB_MODE_INFO *mbmi = &filter_mbd->mi[0].src_mi->mbmi; - - MB_MODE_INFO saved_mbmi; - int i, j; - struct buf_2d saved_dst[MAX_MB_PLANE]; - struct buf_2d saved_pre[MAX_MB_PLANE][2]; // 2 pre buffers - - // We will restore these after motion compensation. - saved_mbmi = *mbmi; - for (i = 0; i < MAX_MB_PLANE; ++i) { - for (j = 0; j < 2; ++j) { - saved_pre[i][j] = filter_mbd->plane[i].pre[j]; - } - saved_dst[i] = filter_mbd->plane[i].dst; - } - - mv_col = ctx->best_sse_mv.as_mv.col; - mv_row = ctx->best_sse_mv.as_mv.row; - - *motion_magnitude = mv_row * mv_row + mv_col * mv_col; - - frame = ctx->best_reference_frame; - - // If the best reference frame uses inter-prediction and there is enough of a - // difference in sum-squared-error, use it. - if (frame != INTRA_FRAME && - sse_diff > sse_diff_thresh(bs, increase_denoising, mv_row, mv_col)) { - mbmi->ref_frame[0] = ctx->best_reference_frame; - mbmi->mode = ctx->best_sse_inter_mode; - mbmi->mv[0] = ctx->best_sse_mv; - } else { - // Otherwise, use the zero reference frame. - frame = ctx->best_zeromv_reference_frame; - - mbmi->ref_frame[0] = ctx->best_zeromv_reference_frame; - mbmi->mode = ZEROMV; - mbmi->mv[0].as_int = 0; - - ctx->best_sse_inter_mode = ZEROMV; - ctx->best_sse_mv.as_int = 0; - ctx->newmv_sse = ctx->zeromv_sse; - } - - // Set the pointers in the MACROBLOCKD to point to the buffers in the denoiser - // struct. - for (j = 0; j < 2; ++j) { - filter_mbd->plane[0].pre[j].buf = - block_start(denoiser->running_avg_y[frame].y_buffer, - denoiser->running_avg_y[frame].y_stride, - mi_row, mi_col); - filter_mbd->plane[0].pre[j].stride = - denoiser->running_avg_y[frame].y_stride; - filter_mbd->plane[1].pre[j].buf = - block_start(denoiser->running_avg_y[frame].u_buffer, - denoiser->running_avg_y[frame].uv_stride, - mi_row, mi_col); - filter_mbd->plane[1].pre[j].stride = - denoiser->running_avg_y[frame].uv_stride; - filter_mbd->plane[2].pre[j].buf = - block_start(denoiser->running_avg_y[frame].v_buffer, - denoiser->running_avg_y[frame].uv_stride, - mi_row, mi_col); - filter_mbd->plane[2].pre[j].stride = - denoiser->running_avg_y[frame].uv_stride; - } - filter_mbd->plane[0].dst.buf = - block_start(denoiser->mc_running_avg_y.y_buffer, - denoiser->mc_running_avg_y.y_stride, - mi_row, mi_col); - filter_mbd->plane[0].dst.stride = denoiser->mc_running_avg_y.y_stride; - filter_mbd->plane[1].dst.buf = - block_start(denoiser->mc_running_avg_y.u_buffer, - denoiser->mc_running_avg_y.uv_stride, - mi_row, mi_col); - filter_mbd->plane[1].dst.stride = denoiser->mc_running_avg_y.uv_stride; - filter_mbd->plane[2].dst.buf = - block_start(denoiser->mc_running_avg_y.v_buffer, - denoiser->mc_running_avg_y.uv_stride, - mi_row, mi_col); - filter_mbd->plane[2].dst.stride = denoiser->mc_running_avg_y.uv_stride; - - vp9_build_inter_predictors_sby(filter_mbd, mv_row, mv_col, bs); - - // Restore everything to its original state - *mbmi = saved_mbmi; - for (i = 0; i < MAX_MB_PLANE; ++i) { - for (j = 0; j < 2; ++j) { - filter_mbd->plane[i].pre[j] = saved_pre[i][j]; - } - filter_mbd->plane[i].dst = saved_dst[i]; - } - - mv_row = ctx->best_sse_mv.as_mv.row; - mv_col = ctx->best_sse_mv.as_mv.col; - - if (ctx->newmv_sse > sse_thresh(bs, increase_denoising)) { - return COPY_BLOCK; - } - if (mv_row * mv_row + mv_col * mv_col > - 8 * noise_motion_thresh(bs, increase_denoising)) { - return COPY_BLOCK; - } - return FILTER_BLOCK; -} - -void vp9_denoiser_denoise(VP9_DENOISER *denoiser, MACROBLOCK *mb, - int mi_row, int mi_col, BLOCK_SIZE bs, - PICK_MODE_CONTEXT *ctx) { - int motion_magnitude = 0; - VP9_DENOISER_DECISION decision = FILTER_BLOCK; - YV12_BUFFER_CONFIG avg = denoiser->running_avg_y[INTRA_FRAME]; - YV12_BUFFER_CONFIG mc_avg = denoiser->mc_running_avg_y; - uint8_t *avg_start = block_start(avg.y_buffer, avg.y_stride, mi_row, mi_col); - uint8_t *mc_avg_start = block_start(mc_avg.y_buffer, mc_avg.y_stride, - mi_row, mi_col); - struct buf_2d src = mb->plane[0].src; - - decision = perform_motion_compensation(denoiser, mb, bs, - denoiser->increase_denoising, - mi_row, mi_col, ctx, - &motion_magnitude); - - if (decision == FILTER_BLOCK) { - decision = denoiser_filter(src.buf, src.stride, - mc_avg_start, mc_avg.y_stride, - avg_start, avg.y_stride, - 0, bs, motion_magnitude); - } - - if (decision == FILTER_BLOCK) { - copy_block(src.buf, src.stride, avg_start, avg.y_stride, bs); - } else { // COPY_BLOCK - copy_block(avg_start, avg.y_stride, src.buf, src.stride, bs); - } -} - -static void copy_frame(YV12_BUFFER_CONFIG dest, const YV12_BUFFER_CONFIG src) { - int r; - const uint8_t *srcbuf = src.y_buffer; - uint8_t *destbuf = dest.y_buffer; - assert(dest.y_width == src.y_width); - assert(dest.y_height == src.y_height); - - for (r = 0; r < dest.y_height; ++r) { - vpx_memcpy(destbuf, srcbuf, dest.y_width); - destbuf += dest.y_stride; - srcbuf += src.y_stride; - } -} - -void vp9_denoiser_update_frame_info(VP9_DENOISER *denoiser, - YV12_BUFFER_CONFIG src, - FRAME_TYPE frame_type, - int refresh_alt_ref_frame, - int refresh_golden_frame, - int refresh_last_frame) { - if (frame_type == KEY_FRAME) { - int i; - // Start at 1 so as not to overwrite the INTRA_FRAME - for (i = 1; i < MAX_REF_FRAMES; ++i) { - copy_frame(denoiser->running_avg_y[i], src); - } - } else { /* For non key frames */ - if (refresh_alt_ref_frame) { - copy_frame(denoiser->running_avg_y[ALTREF_FRAME], - denoiser->running_avg_y[INTRA_FRAME]); - } - if (refresh_golden_frame) { - copy_frame(denoiser->running_avg_y[GOLDEN_FRAME], - denoiser->running_avg_y[INTRA_FRAME]); - } - if (refresh_last_frame) { - copy_frame(denoiser->running_avg_y[LAST_FRAME], - denoiser->running_avg_y[INTRA_FRAME]); - } - } -} - -void vp9_denoiser_reset_frame_stats(PICK_MODE_CONTEXT *ctx) { - ctx->zeromv_sse = UINT_MAX; - ctx->newmv_sse = UINT_MAX; -} - -void vp9_denoiser_update_frame_stats(MB_MODE_INFO *mbmi, unsigned int sse, - PREDICTION_MODE mode, - PICK_MODE_CONTEXT *ctx) { - // TODO(tkopp): Use both MVs if possible - if (mbmi->mv[0].as_int == 0 && sse < ctx->zeromv_sse) { - ctx->zeromv_sse = sse; - ctx->best_zeromv_reference_frame = mbmi->ref_frame[0]; - } - - if (mode == NEWMV) { - ctx->newmv_sse = sse; - ctx->best_sse_inter_mode = mode; - ctx->best_sse_mv = mbmi->mv[0]; - ctx->best_reference_frame = mbmi->ref_frame[0]; - } -} - -int vp9_denoiser_alloc(VP9_DENOISER *denoiser, int width, int height, - int ssx, int ssy, -#if CONFIG_VP9_HIGHBITDEPTH - int use_highbitdepth, -#endif - int border) { - int i, fail; - assert(denoiser != NULL); - - for (i = 0; i < MAX_REF_FRAMES; ++i) { - fail = vp9_alloc_frame_buffer(&denoiser->running_avg_y[i], width, height, - ssx, ssy, -#if CONFIG_VP9_HIGHBITDEPTH - use_highbitdepth, -#endif - border); - if (fail) { - vp9_denoiser_free(denoiser); - return 1; - } -#ifdef OUTPUT_YUV_DENOISED - make_grayscale(&denoiser->running_avg_y[i]); -#endif - } - - fail = vp9_alloc_frame_buffer(&denoiser->mc_running_avg_y, width, height, - ssx, ssy, -#if CONFIG_VP9_HIGHBITDEPTH - use_highbitdepth, -#endif - border); - if (fail) { - vp9_denoiser_free(denoiser); - return 1; - } -#ifdef OUTPUT_YUV_DENOISED - make_grayscale(&denoiser->running_avg_y[i]); -#endif - denoiser->increase_denoising = 0; - - return 0; -} - -void vp9_denoiser_free(VP9_DENOISER *denoiser) { - int i; - if (denoiser == NULL) { - return; - } - for (i = 0; i < MAX_REF_FRAMES; ++i) { - if (&denoiser->running_avg_y[i] != NULL) { - vp9_free_frame_buffer(&denoiser->running_avg_y[i]); - } - } - if (&denoiser->mc_running_avg_y != NULL) { - vp9_free_frame_buffer(&denoiser->mc_running_avg_y); - } -} - -#ifdef OUTPUT_YUV_DENOISED -static void make_grayscale(YV12_BUFFER_CONFIG *yuv) { - int r, c; - uint8_t *u = yuv->u_buffer; - uint8_t *v = yuv->v_buffer; - - // The '/2's are there because we have a 440 buffer, but we want to output - // 420. - for (r = 0; r < yuv->uv_height / 2; ++r) { - for (c = 0; c < yuv->uv_width / 2; ++c) { - u[c] = UINT8_MAX / 2; - v[c] = UINT8_MAX / 2; - } - u += yuv->uv_stride + yuv->uv_width / 2; - v += yuv->uv_stride + yuv->uv_width / 2; - } -} -#endif diff --git a/media/libvpx/vp9/encoder/vp9_denoiser.h b/media/libvpx/vp9/encoder/vp9_denoiser.h deleted file mode 100644 index fa714b13286..00000000000 --- a/media/libvpx/vp9/encoder/vp9_denoiser.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (c) 2012 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_ENCODER_DENOISER_H_ -#define VP9_ENCODER_DENOISER_H_ - -#include "vp9/encoder/vp9_block.h" -#include "vpx_scale/yv12config.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define MOTION_MAGNITUDE_THRESHOLD (8 * 3) - -typedef enum vp9_denoiser_decision { - COPY_BLOCK, - FILTER_BLOCK -} VP9_DENOISER_DECISION; - -typedef struct vp9_denoiser { - YV12_BUFFER_CONFIG running_avg_y[MAX_REF_FRAMES]; - YV12_BUFFER_CONFIG mc_running_avg_y; - int increase_denoising; -} VP9_DENOISER; - -void vp9_denoiser_update_frame_info(VP9_DENOISER *denoiser, - YV12_BUFFER_CONFIG src, - FRAME_TYPE frame_type, - int refresh_alt_ref_frame, - int refresh_golden_frame, - int refresh_last_frame); - -void vp9_denoiser_denoise(VP9_DENOISER *denoiser, MACROBLOCK *mb, - int mi_row, int mi_col, BLOCK_SIZE bs, - PICK_MODE_CONTEXT *ctx); - -void vp9_denoiser_reset_frame_stats(PICK_MODE_CONTEXT *ctx); - -void vp9_denoiser_update_frame_stats(MB_MODE_INFO *mbmi, - unsigned int sse, PREDICTION_MODE mode, - PICK_MODE_CONTEXT *ctx); - -int vp9_denoiser_alloc(VP9_DENOISER *denoiser, int width, int height, - int ssx, int ssy, -#if CONFIG_VP9_HIGHBITDEPTH - int use_highbitdepth, -#endif - int border); - -void vp9_denoiser_free(VP9_DENOISER *denoiser); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_ENCODER_DENOISER_H_ diff --git a/media/libvpx/vp9/encoder/vp9_encoder.c b/media/libvpx/vp9/encoder/vp9_encoder.c deleted file mode 100644 index 5d1dd4d8ee2..00000000000 --- a/media/libvpx/vp9/encoder/vp9_encoder.c +++ /dev/null @@ -1,3984 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include -#include -#include - -#include "./vpx_config.h" -#include "./vpx_scale_rtcd.h" -#include "vpx/internal/vpx_psnr.h" -#include "vpx_ports/vpx_timer.h" - -#include "vp9/common/vp9_alloccommon.h" -#include "vp9/common/vp9_filter.h" -#include "vp9/common/vp9_idct.h" -#if CONFIG_VP9_POSTPROC -#include "vp9/common/vp9_postproc.h" -#endif -#include "vp9/common/vp9_reconinter.h" -#include "vp9/common/vp9_reconintra.h" -#include "vp9/common/vp9_systemdependent.h" -#include "vp9/common/vp9_tile_common.h" - -#include "vp9/encoder/vp9_aq_complexity.h" -#include "vp9/encoder/vp9_aq_cyclicrefresh.h" -#include "vp9/encoder/vp9_aq_variance.h" -#include "vp9/encoder/vp9_bitstream.h" -#include "vp9/encoder/vp9_context_tree.h" -#include "vp9/encoder/vp9_encodeframe.h" -#include "vp9/encoder/vp9_encodemv.h" -#include "vp9/encoder/vp9_firstpass.h" -#include "vp9/encoder/vp9_mbgraph.h" -#include "vp9/encoder/vp9_encoder.h" -#include "vp9/encoder/vp9_picklpf.h" -#include "vp9/encoder/vp9_ratectrl.h" -#include "vp9/encoder/vp9_rd.h" -#include "vp9/encoder/vp9_segmentation.h" -#include "vp9/encoder/vp9_speed_features.h" -#if CONFIG_INTERNAL_STATS -#include "vp9/encoder/vp9_ssim.h" -#endif -#include "vp9/encoder/vp9_temporal_filter.h" -#include "vp9/encoder/vp9_resize.h" -#include "vp9/encoder/vp9_svc_layercontext.h" - -void vp9_coef_tree_initialize(); - -#define SHARP_FILTER_QTHRESH 0 /* Q threshold for 8-tap sharp filter */ - -#define ALTREF_HIGH_PRECISION_MV 1 // Whether to use high precision mv - // for altref computation. -#define HIGH_PRECISION_MV_QTHRESH 200 // Q threshold for high precision - // mv. Choose a very high value for - // now so that HIGH_PRECISION is always - // chosen. - -// #define OUTPUT_YUV_REC - -#ifdef OUTPUT_YUV_DENOISED -FILE *yuv_denoised_file = NULL; -#endif -#ifdef OUTPUT_YUV_REC -FILE *yuv_rec_file; -#endif - -#if 0 -FILE *framepsnr; -FILE *kf_list; -FILE *keyfile; -#endif - -static INLINE void Scale2Ratio(VPX_SCALING mode, int *hr, int *hs) { - switch (mode) { - case NORMAL: - *hr = 1; - *hs = 1; - break; - case FOURFIVE: - *hr = 4; - *hs = 5; - break; - case THREEFIVE: - *hr = 3; - *hs = 5; - break; - case ONETWO: - *hr = 1; - *hs = 2; - break; - default: - *hr = 1; - *hs = 1; - assert(0); - break; - } -} - -void vp9_set_high_precision_mv(VP9_COMP *cpi, int allow_high_precision_mv) { - MACROBLOCK *const mb = &cpi->mb; - cpi->common.allow_high_precision_mv = allow_high_precision_mv; - if (cpi->common.allow_high_precision_mv) { - mb->mvcost = mb->nmvcost_hp; - mb->mvsadcost = mb->nmvsadcost_hp; - } else { - mb->mvcost = mb->nmvcost; - mb->mvsadcost = mb->nmvsadcost; - } -} - -static void setup_frame(VP9_COMP *cpi) { - VP9_COMMON *const cm = &cpi->common; - // Set up entropy context depending on frame type. The decoder mandates - // the use of the default context, index 0, for keyframes and inter - // frames where the error_resilient_mode or intra_only flag is set. For - // other inter-frames the encoder currently uses only two contexts; - // context 1 for ALTREF frames and context 0 for the others. - if (frame_is_intra_only(cm) || cm->error_resilient_mode) { - vp9_setup_past_independence(cm); - } else { - if (!cpi->use_svc) - cm->frame_context_idx = cpi->refresh_alt_ref_frame; - } - - if (cm->frame_type == KEY_FRAME) { - if (!is_two_pass_svc(cpi)) - cpi->refresh_golden_frame = 1; - cpi->refresh_alt_ref_frame = 1; - vp9_zero(cpi->interp_filter_selected); - } else { - cm->fc = cm->frame_contexts[cm->frame_context_idx]; - vp9_zero(cpi->interp_filter_selected[0]); - } -} - -void vp9_initialize_enc() { - static int init_done = 0; - - if (!init_done) { - vp9_rtcd(); - vp9_init_neighbors(); - vp9_init_intra_predictors(); - vp9_coef_tree_initialize(); - vp9_tokenize_initialize(); - vp9_init_me_luts(); - vp9_rc_init_minq_luts(); - vp9_entropy_mv_init(); - vp9_entropy_mode_init(); - vp9_temporal_filter_init(); - init_done = 1; - } -} - -static void dealloc_compressor_data(VP9_COMP *cpi) { - VP9_COMMON *const cm = &cpi->common; - int i; - - // Delete sementation map - vpx_free(cpi->segmentation_map); - cpi->segmentation_map = NULL; - vpx_free(cm->last_frame_seg_map); - cm->last_frame_seg_map = NULL; - vpx_free(cpi->coding_context.last_frame_seg_map_copy); - cpi->coding_context.last_frame_seg_map_copy = NULL; - - vpx_free(cpi->complexity_map); - cpi->complexity_map = NULL; - - vpx_free(cpi->nmvcosts[0]); - vpx_free(cpi->nmvcosts[1]); - cpi->nmvcosts[0] = NULL; - cpi->nmvcosts[1] = NULL; - - vpx_free(cpi->nmvcosts_hp[0]); - vpx_free(cpi->nmvcosts_hp[1]); - cpi->nmvcosts_hp[0] = NULL; - cpi->nmvcosts_hp[1] = NULL; - - vpx_free(cpi->nmvsadcosts[0]); - vpx_free(cpi->nmvsadcosts[1]); - cpi->nmvsadcosts[0] = NULL; - cpi->nmvsadcosts[1] = NULL; - - vpx_free(cpi->nmvsadcosts_hp[0]); - vpx_free(cpi->nmvsadcosts_hp[1]); - cpi->nmvsadcosts_hp[0] = NULL; - cpi->nmvsadcosts_hp[1] = NULL; - - vp9_cyclic_refresh_free(cpi->cyclic_refresh); - cpi->cyclic_refresh = NULL; - - vp9_free_ref_frame_buffers(cm); - vp9_free_context_buffers(cm); - - vp9_free_frame_buffer(&cpi->last_frame_uf); - vp9_free_frame_buffer(&cpi->scaled_source); - vp9_free_frame_buffer(&cpi->scaled_last_source); - vp9_free_frame_buffer(&cpi->alt_ref_buffer); - vp9_lookahead_destroy(cpi->lookahead); - - vpx_free(cpi->tok); - cpi->tok = 0; - - vp9_free_pc_tree(cpi); - - for (i = 0; i < cpi->svc.number_spatial_layers; ++i) { - LAYER_CONTEXT *const lc = &cpi->svc.layer_context[i]; - vpx_free(lc->rc_twopass_stats_in.buf); - lc->rc_twopass_stats_in.buf = NULL; - lc->rc_twopass_stats_in.sz = 0; - } - - if (cpi->source_diff_var != NULL) { - vpx_free(cpi->source_diff_var); - cpi->source_diff_var = NULL; - } - - for (i = 0; i < MAX_LAG_BUFFERS; ++i) { - vp9_free_frame_buffer(&cpi->svc.scaled_frames[i]); - } - vpx_memset(&cpi->svc.scaled_frames[0], 0, - MAX_LAG_BUFFERS * sizeof(cpi->svc.scaled_frames[0])); -} - -static void save_coding_context(VP9_COMP *cpi) { - CODING_CONTEXT *const cc = &cpi->coding_context; - VP9_COMMON *cm = &cpi->common; - - // Stores a snapshot of key state variables which can subsequently be - // restored with a call to vp9_restore_coding_context. These functions are - // intended for use in a re-code loop in vp9_compress_frame where the - // quantizer value is adjusted between loop iterations. - vp9_copy(cc->nmvjointcost, cpi->mb.nmvjointcost); - - vpx_memcpy(cc->nmvcosts[0], cpi->nmvcosts[0], - MV_VALS * sizeof(*cpi->nmvcosts[0])); - vpx_memcpy(cc->nmvcosts[1], cpi->nmvcosts[1], - MV_VALS * sizeof(*cpi->nmvcosts[1])); - vpx_memcpy(cc->nmvcosts_hp[0], cpi->nmvcosts_hp[0], - MV_VALS * sizeof(*cpi->nmvcosts_hp[0])); - vpx_memcpy(cc->nmvcosts_hp[1], cpi->nmvcosts_hp[1], - MV_VALS * sizeof(*cpi->nmvcosts_hp[1])); - - vp9_copy(cc->segment_pred_probs, cm->seg.pred_probs); - - vpx_memcpy(cpi->coding_context.last_frame_seg_map_copy, - cm->last_frame_seg_map, (cm->mi_rows * cm->mi_cols)); - - vp9_copy(cc->last_ref_lf_deltas, cm->lf.last_ref_deltas); - vp9_copy(cc->last_mode_lf_deltas, cm->lf.last_mode_deltas); - - cc->fc = cm->fc; -} - -static void restore_coding_context(VP9_COMP *cpi) { - CODING_CONTEXT *const cc = &cpi->coding_context; - VP9_COMMON *cm = &cpi->common; - - // Restore key state variables to the snapshot state stored in the - // previous call to vp9_save_coding_context. - vp9_copy(cpi->mb.nmvjointcost, cc->nmvjointcost); - - vpx_memcpy(cpi->nmvcosts[0], cc->nmvcosts[0], - MV_VALS * sizeof(*cc->nmvcosts[0])); - vpx_memcpy(cpi->nmvcosts[1], cc->nmvcosts[1], - MV_VALS * sizeof(*cc->nmvcosts[1])); - vpx_memcpy(cpi->nmvcosts_hp[0], cc->nmvcosts_hp[0], - MV_VALS * sizeof(*cc->nmvcosts_hp[0])); - vpx_memcpy(cpi->nmvcosts_hp[1], cc->nmvcosts_hp[1], - MV_VALS * sizeof(*cc->nmvcosts_hp[1])); - - vp9_copy(cm->seg.pred_probs, cc->segment_pred_probs); - - vpx_memcpy(cm->last_frame_seg_map, - cpi->coding_context.last_frame_seg_map_copy, - (cm->mi_rows * cm->mi_cols)); - - vp9_copy(cm->lf.last_ref_deltas, cc->last_ref_lf_deltas); - vp9_copy(cm->lf.last_mode_deltas, cc->last_mode_lf_deltas); - - cm->fc = cc->fc; -} - -static void configure_static_seg_features(VP9_COMP *cpi) { - VP9_COMMON *const cm = &cpi->common; - const RATE_CONTROL *const rc = &cpi->rc; - struct segmentation *const seg = &cm->seg; - - int high_q = (int)(rc->avg_q > 48.0); - int qi_delta; - - // Disable and clear down for KF - if (cm->frame_type == KEY_FRAME) { - // Clear down the global segmentation map - vpx_memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols); - seg->update_map = 0; - seg->update_data = 0; - cpi->static_mb_pct = 0; - - // Disable segmentation - vp9_disable_segmentation(seg); - - // Clear down the segment features. - vp9_clearall_segfeatures(seg); - } else if (cpi->refresh_alt_ref_frame) { - // If this is an alt ref frame - // Clear down the global segmentation map - vpx_memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols); - seg->update_map = 0; - seg->update_data = 0; - cpi->static_mb_pct = 0; - - // Disable segmentation and individual segment features by default - vp9_disable_segmentation(seg); - vp9_clearall_segfeatures(seg); - - // Scan frames from current to arf frame. - // This function re-enables segmentation if appropriate. - vp9_update_mbgraph_stats(cpi); - - // If segmentation was enabled set those features needed for the - // arf itself. - if (seg->enabled) { - seg->update_map = 1; - seg->update_data = 1; - - qi_delta = vp9_compute_qdelta(rc, rc->avg_q, rc->avg_q * 0.875, - cm->bit_depth); - vp9_set_segdata(seg, 1, SEG_LVL_ALT_Q, qi_delta - 2); - vp9_set_segdata(seg, 1, SEG_LVL_ALT_LF, -2); - - vp9_enable_segfeature(seg, 1, SEG_LVL_ALT_Q); - vp9_enable_segfeature(seg, 1, SEG_LVL_ALT_LF); - - // Where relevant assume segment data is delta data - seg->abs_delta = SEGMENT_DELTADATA; - } - } else if (seg->enabled) { - // All other frames if segmentation has been enabled - - // First normal frame in a valid gf or alt ref group - if (rc->frames_since_golden == 0) { - // Set up segment features for normal frames in an arf group - if (rc->source_alt_ref_active) { - seg->update_map = 0; - seg->update_data = 1; - seg->abs_delta = SEGMENT_DELTADATA; - - qi_delta = vp9_compute_qdelta(rc, rc->avg_q, rc->avg_q * 1.125, - cm->bit_depth); - vp9_set_segdata(seg, 1, SEG_LVL_ALT_Q, qi_delta + 2); - vp9_enable_segfeature(seg, 1, SEG_LVL_ALT_Q); - - vp9_set_segdata(seg, 1, SEG_LVL_ALT_LF, -2); - vp9_enable_segfeature(seg, 1, SEG_LVL_ALT_LF); - - // Segment coding disabled for compred testing - if (high_q || (cpi->static_mb_pct == 100)) { - vp9_set_segdata(seg, 1, SEG_LVL_REF_FRAME, ALTREF_FRAME); - vp9_enable_segfeature(seg, 1, SEG_LVL_REF_FRAME); - vp9_enable_segfeature(seg, 1, SEG_LVL_SKIP); - } - } else { - // Disable segmentation and clear down features if alt ref - // is not active for this group - - vp9_disable_segmentation(seg); - - vpx_memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols); - - seg->update_map = 0; - seg->update_data = 0; - - vp9_clearall_segfeatures(seg); - } - } else if (rc->is_src_frame_alt_ref) { - // Special case where we are coding over the top of a previous - // alt ref frame. - // Segment coding disabled for compred testing - - // Enable ref frame features for segment 0 as well - vp9_enable_segfeature(seg, 0, SEG_LVL_REF_FRAME); - vp9_enable_segfeature(seg, 1, SEG_LVL_REF_FRAME); - - // All mbs should use ALTREF_FRAME - vp9_clear_segdata(seg, 0, SEG_LVL_REF_FRAME); - vp9_set_segdata(seg, 0, SEG_LVL_REF_FRAME, ALTREF_FRAME); - vp9_clear_segdata(seg, 1, SEG_LVL_REF_FRAME); - vp9_set_segdata(seg, 1, SEG_LVL_REF_FRAME, ALTREF_FRAME); - - // Skip all MBs if high Q (0,0 mv and skip coeffs) - if (high_q) { - vp9_enable_segfeature(seg, 0, SEG_LVL_SKIP); - vp9_enable_segfeature(seg, 1, SEG_LVL_SKIP); - } - // Enable data update - seg->update_data = 1; - } else { - // All other frames. - - // No updates.. leave things as they are. - seg->update_map = 0; - seg->update_data = 0; - } - } -} - -static void update_reference_segmentation_map(VP9_COMP *cpi) { - VP9_COMMON *const cm = &cpi->common; - MODE_INFO *mi_8x8_ptr = cm->mi; - uint8_t *cache_ptr = cm->last_frame_seg_map; - int row, col; - - for (row = 0; row < cm->mi_rows; row++) { - MODE_INFO *mi_8x8 = mi_8x8_ptr; - uint8_t *cache = cache_ptr; - for (col = 0; col < cm->mi_cols; col++, mi_8x8++, cache++) - cache[0] = mi_8x8[0].src_mi->mbmi.segment_id; - mi_8x8_ptr += cm->mi_stride; - cache_ptr += cm->mi_cols; - } -} - -static void alloc_raw_frame_buffers(VP9_COMP *cpi) { - VP9_COMMON *cm = &cpi->common; - const VP9EncoderConfig *oxcf = &cpi->oxcf; - - cpi->lookahead = vp9_lookahead_init(oxcf->width, oxcf->height, - cm->subsampling_x, cm->subsampling_y, -#if CONFIG_VP9_HIGHBITDEPTH - cm->use_highbitdepth, -#endif - oxcf->lag_in_frames); - if (!cpi->lookahead) - vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, - "Failed to allocate lag buffers"); - - if (vp9_realloc_frame_buffer(&cpi->alt_ref_buffer, - oxcf->width, oxcf->height, - cm->subsampling_x, cm->subsampling_y, -#if CONFIG_VP9_HIGHBITDEPTH - cm->use_highbitdepth, -#endif - VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL)) - vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, - "Failed to allocate altref buffer"); -} - -static void alloc_ref_frame_buffers(VP9_COMP *cpi) { - VP9_COMMON *const cm = &cpi->common; - if (vp9_alloc_ref_frame_buffers(cm, cm->width, cm->height)) - vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, - "Failed to allocate frame buffers"); -} - -static void alloc_util_frame_buffers(VP9_COMP *cpi) { - VP9_COMMON *const cm = &cpi->common; - if (vp9_realloc_frame_buffer(&cpi->last_frame_uf, - cm->width, cm->height, - cm->subsampling_x, cm->subsampling_y, -#if CONFIG_VP9_HIGHBITDEPTH - cm->use_highbitdepth, -#endif - VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL)) - vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, - "Failed to allocate last frame buffer"); - - if (vp9_realloc_frame_buffer(&cpi->scaled_source, - cm->width, cm->height, - cm->subsampling_x, cm->subsampling_y, -#if CONFIG_VP9_HIGHBITDEPTH - cm->use_highbitdepth, -#endif - VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL)) - vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, - "Failed to allocate scaled source buffer"); - - if (vp9_realloc_frame_buffer(&cpi->scaled_last_source, - cm->width, cm->height, - cm->subsampling_x, cm->subsampling_y, -#if CONFIG_VP9_HIGHBITDEPTH - cm->use_highbitdepth, -#endif - VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL)) - vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, - "Failed to allocate scaled last source buffer"); -} - -void vp9_alloc_compressor_data(VP9_COMP *cpi) { - VP9_COMMON *cm = &cpi->common; - - vp9_alloc_context_buffers(cm, cm->width, cm->height); - - vpx_free(cpi->tok); - - { - unsigned int tokens = get_token_alloc(cm->mb_rows, cm->mb_cols); - CHECK_MEM_ERROR(cm, cpi->tok, vpx_calloc(tokens, sizeof(*cpi->tok))); - } - - vp9_setup_pc_tree(&cpi->common, cpi); -} - -static void update_frame_size(VP9_COMP *cpi) { - VP9_COMMON *const cm = &cpi->common; - MACROBLOCKD *const xd = &cpi->mb.e_mbd; - - vp9_set_mb_mi(cm, cm->width, cm->height); - vp9_init_context_buffers(cm); - init_macroblockd(cm, xd); - - if (is_two_pass_svc(cpi)) { - if (vp9_realloc_frame_buffer(&cpi->alt_ref_buffer, - cm->width, cm->height, - cm->subsampling_x, cm->subsampling_y, -#if CONFIG_VP9_HIGHBITDEPTH - cm->use_highbitdepth, -#endif - VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL)) - vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, - "Failed to reallocate alt_ref_buffer"); - } -} - -void vp9_new_framerate(VP9_COMP *cpi, double framerate) { - cpi->framerate = framerate < 0.1 ? 30 : framerate; - vp9_rc_update_framerate(cpi); -} - -static void set_tile_limits(VP9_COMP *cpi) { - VP9_COMMON *const cm = &cpi->common; - - int min_log2_tile_cols, max_log2_tile_cols; - vp9_get_tile_n_bits(cm->mi_cols, &min_log2_tile_cols, &max_log2_tile_cols); - - cm->log2_tile_cols = clamp(cpi->oxcf.tile_columns, - min_log2_tile_cols, max_log2_tile_cols); - cm->log2_tile_rows = cpi->oxcf.tile_rows; -} - -static void init_buffer_indices(VP9_COMP *cpi) { - cpi->lst_fb_idx = 0; - cpi->gld_fb_idx = 1; - cpi->alt_fb_idx = 2; -} - -static void init_config(struct VP9_COMP *cpi, VP9EncoderConfig *oxcf) { - VP9_COMMON *const cm = &cpi->common; - - cpi->oxcf = *oxcf; - cpi->framerate = oxcf->init_framerate; - - cm->profile = oxcf->profile; - cm->bit_depth = oxcf->bit_depth; -#if CONFIG_VP9_HIGHBITDEPTH - cm->use_highbitdepth = oxcf->use_highbitdepth; -#endif - cm->color_space = UNKNOWN; - - cm->width = oxcf->width; - cm->height = oxcf->height; - vp9_alloc_compressor_data(cpi); - - // Spatial scalability. - cpi->svc.number_spatial_layers = oxcf->ss_number_layers; - // Temporal scalability. - cpi->svc.number_temporal_layers = oxcf->ts_number_layers; - - if ((cpi->svc.number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) || - ((cpi->svc.number_temporal_layers > 1 || - cpi->svc.number_spatial_layers > 1) && - cpi->oxcf.pass == 2)) { - vp9_init_layer_context(cpi); - } - - // change includes all joint functionality - vp9_change_config(cpi, oxcf); - - cpi->static_mb_pct = 0; - cpi->ref_frame_flags = 0; - - init_buffer_indices(cpi); - - set_tile_limits(cpi); -} - -static void set_rc_buffer_sizes(RATE_CONTROL *rc, - const VP9EncoderConfig *oxcf) { - const int64_t bandwidth = oxcf->target_bandwidth; - const int64_t starting = oxcf->starting_buffer_level_ms; - const int64_t optimal = oxcf->optimal_buffer_level_ms; - const int64_t maximum = oxcf->maximum_buffer_size_ms; - - rc->starting_buffer_level = starting * bandwidth / 1000; - rc->optimal_buffer_level = (optimal == 0) ? bandwidth / 8 - : optimal * bandwidth / 1000; - rc->maximum_buffer_size = (maximum == 0) ? bandwidth / 8 - : maximum * bandwidth / 1000; -} - -#if CONFIG_VP9_HIGHBITDEPTH -#define HIGHBD_BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX3F, SDX8F, SDX4DF) \ - cpi->fn_ptr[BT].sdf = SDF; \ - cpi->fn_ptr[BT].sdaf = SDAF; \ - cpi->fn_ptr[BT].vf = VF; \ - cpi->fn_ptr[BT].svf = SVF; \ - cpi->fn_ptr[BT].svaf = SVAF; \ - cpi->fn_ptr[BT].sdx3f = SDX3F; \ - cpi->fn_ptr[BT].sdx8f = SDX8F; \ - cpi->fn_ptr[BT].sdx4df = SDX4DF; - -#define MAKE_BFP_SAD_WRAPPER(fnname) \ -static unsigned int fnname##_bits8(const uint8_t *src_ptr, \ - int source_stride, \ - const uint8_t *ref_ptr, \ - int ref_stride) { \ - return fnname(src_ptr, source_stride, ref_ptr, ref_stride); \ -} \ -static unsigned int fnname##_bits10(const uint8_t *src_ptr, \ - int source_stride, \ - const uint8_t *ref_ptr, \ - int ref_stride) { \ - return fnname(src_ptr, source_stride, ref_ptr, ref_stride) >> 2; \ -} \ -static unsigned int fnname##_bits12(const uint8_t *src_ptr, \ - int source_stride, \ - const uint8_t *ref_ptr, \ - int ref_stride) { \ - return fnname(src_ptr, source_stride, ref_ptr, ref_stride) >> 4; \ -} - -#define MAKE_BFP_SADAVG_WRAPPER(fnname) static unsigned int \ -fnname##_bits8(const uint8_t *src_ptr, \ - int source_stride, \ - const uint8_t *ref_ptr, \ - int ref_stride, \ - const uint8_t *second_pred) { \ - return fnname(src_ptr, source_stride, ref_ptr, ref_stride, second_pred); \ -} \ -static unsigned int fnname##_bits10(const uint8_t *src_ptr, \ - int source_stride, \ - const uint8_t *ref_ptr, \ - int ref_stride, \ - const uint8_t *second_pred) { \ - return fnname(src_ptr, source_stride, ref_ptr, ref_stride, \ - second_pred) >> 2; \ -} \ -static unsigned int fnname##_bits12(const uint8_t *src_ptr, \ - int source_stride, \ - const uint8_t *ref_ptr, \ - int ref_stride, \ - const uint8_t *second_pred) { \ - return fnname(src_ptr, source_stride, ref_ptr, ref_stride, \ - second_pred) >> 4; \ -} - -#define MAKE_BFP_SAD3_WRAPPER(fnname) \ -static void fnname##_bits8(const uint8_t *src_ptr, \ - int source_stride, \ - const uint8_t *ref_ptr, \ - int ref_stride, \ - unsigned int *sad_array) { \ - fnname(src_ptr, source_stride, ref_ptr, ref_stride, sad_array); \ -} \ -static void fnname##_bits10(const uint8_t *src_ptr, \ - int source_stride, \ - const uint8_t *ref_ptr, \ - int ref_stride, \ - unsigned int *sad_array) { \ - int i; \ - fnname(src_ptr, source_stride, ref_ptr, ref_stride, sad_array); \ - for (i = 0; i < 3; i++) \ - sad_array[i] >>= 2; \ -} \ -static void fnname##_bits12(const uint8_t *src_ptr, \ - int source_stride, \ - const uint8_t *ref_ptr, \ - int ref_stride, \ - unsigned int *sad_array) { \ - int i; \ - fnname(src_ptr, source_stride, ref_ptr, ref_stride, sad_array); \ - for (i = 0; i < 3; i++) \ - sad_array[i] >>= 4; \ -} - -#define MAKE_BFP_SAD8_WRAPPER(fnname) \ -static void fnname##_bits8(const uint8_t *src_ptr, \ - int source_stride, \ - const uint8_t *ref_ptr, \ - int ref_stride, \ - unsigned int *sad_array) { \ - fnname(src_ptr, source_stride, ref_ptr, ref_stride, sad_array); \ -} \ -static void fnname##_bits10(const uint8_t *src_ptr, \ - int source_stride, \ - const uint8_t *ref_ptr, \ - int ref_stride, \ - unsigned int *sad_array) { \ - int i; \ - fnname(src_ptr, source_stride, ref_ptr, ref_stride, sad_array); \ - for (i = 0; i < 8; i++) \ - sad_array[i] >>= 2; \ -} \ -static void fnname##_bits12(const uint8_t *src_ptr, \ - int source_stride, \ - const uint8_t *ref_ptr, \ - int ref_stride, \ - unsigned int *sad_array) { \ - int i; \ - fnname(src_ptr, source_stride, ref_ptr, ref_stride, sad_array); \ - for (i = 0; i < 8; i++) \ - sad_array[i] >>= 4; \ -} -#define MAKE_BFP_SAD4D_WRAPPER(fnname) \ -static void fnname##_bits8(const uint8_t *src_ptr, \ - int source_stride, \ - const uint8_t* const ref_ptr[], \ - int ref_stride, \ - unsigned int *sad_array) { \ - fnname(src_ptr, source_stride, ref_ptr, ref_stride, sad_array); \ -} \ -static void fnname##_bits10(const uint8_t *src_ptr, \ - int source_stride, \ - const uint8_t* const ref_ptr[], \ - int ref_stride, \ - unsigned int *sad_array) { \ - int i; \ - fnname(src_ptr, source_stride, ref_ptr, ref_stride, sad_array); \ - for (i = 0; i < 4; i++) \ - sad_array[i] >>= 2; \ -} \ -static void fnname##_bits12(const uint8_t *src_ptr, \ - int source_stride, \ - const uint8_t* const ref_ptr[], \ - int ref_stride, \ - unsigned int *sad_array) { \ - int i; \ - fnname(src_ptr, source_stride, ref_ptr, ref_stride, sad_array); \ - for (i = 0; i < 4; i++) \ - sad_array[i] >>= 4; \ -} - -MAKE_BFP_SAD_WRAPPER(vp9_high_sad32x16) -MAKE_BFP_SADAVG_WRAPPER(vp9_high_sad32x16_avg) -MAKE_BFP_SAD4D_WRAPPER(vp9_high_sad32x16x4d) -MAKE_BFP_SAD_WRAPPER(vp9_high_sad16x32) -MAKE_BFP_SADAVG_WRAPPER(vp9_high_sad16x32_avg) -MAKE_BFP_SAD4D_WRAPPER(vp9_high_sad16x32x4d) -MAKE_BFP_SAD_WRAPPER(vp9_high_sad64x32) -MAKE_BFP_SADAVG_WRAPPER(vp9_high_sad64x32_avg) -MAKE_BFP_SAD4D_WRAPPER(vp9_high_sad64x32x4d) -MAKE_BFP_SAD_WRAPPER(vp9_high_sad32x64) -MAKE_BFP_SADAVG_WRAPPER(vp9_high_sad32x64_avg) -MAKE_BFP_SAD4D_WRAPPER(vp9_high_sad32x64x4d) -MAKE_BFP_SAD_WRAPPER(vp9_high_sad32x32) -MAKE_BFP_SADAVG_WRAPPER(vp9_high_sad32x32_avg) -MAKE_BFP_SAD3_WRAPPER(vp9_high_sad32x32x3) -MAKE_BFP_SAD8_WRAPPER(vp9_high_sad32x32x8) -MAKE_BFP_SAD4D_WRAPPER(vp9_high_sad32x32x4d) -MAKE_BFP_SAD_WRAPPER(vp9_high_sad64x64) -MAKE_BFP_SADAVG_WRAPPER(vp9_high_sad64x64_avg) -MAKE_BFP_SAD3_WRAPPER(vp9_high_sad64x64x3) -MAKE_BFP_SAD8_WRAPPER(vp9_high_sad64x64x8) -MAKE_BFP_SAD4D_WRAPPER(vp9_high_sad64x64x4d) -MAKE_BFP_SAD_WRAPPER(vp9_high_sad16x16) -MAKE_BFP_SADAVG_WRAPPER(vp9_high_sad16x16_avg) -MAKE_BFP_SAD3_WRAPPER(vp9_high_sad16x16x3) -MAKE_BFP_SAD8_WRAPPER(vp9_high_sad16x16x8) -MAKE_BFP_SAD4D_WRAPPER(vp9_high_sad16x16x4d) -MAKE_BFP_SAD_WRAPPER(vp9_high_sad16x8) -MAKE_BFP_SADAVG_WRAPPER(vp9_high_sad16x8_avg) -MAKE_BFP_SAD3_WRAPPER(vp9_high_sad16x8x3) -MAKE_BFP_SAD8_WRAPPER(vp9_high_sad16x8x8) -MAKE_BFP_SAD4D_WRAPPER(vp9_high_sad16x8x4d) -MAKE_BFP_SAD_WRAPPER(vp9_high_sad8x16) -MAKE_BFP_SADAVG_WRAPPER(vp9_high_sad8x16_avg) -MAKE_BFP_SAD3_WRAPPER(vp9_high_sad8x16x3) -MAKE_BFP_SAD8_WRAPPER(vp9_high_sad8x16x8) -MAKE_BFP_SAD4D_WRAPPER(vp9_high_sad8x16x4d) -MAKE_BFP_SAD_WRAPPER(vp9_high_sad8x8) -MAKE_BFP_SADAVG_WRAPPER(vp9_high_sad8x8_avg) -MAKE_BFP_SAD3_WRAPPER(vp9_high_sad8x8x3) -MAKE_BFP_SAD8_WRAPPER(vp9_high_sad8x8x8) -MAKE_BFP_SAD4D_WRAPPER(vp9_high_sad8x8x4d) -MAKE_BFP_SAD_WRAPPER(vp9_high_sad8x4) -MAKE_BFP_SADAVG_WRAPPER(vp9_high_sad8x4_avg) -MAKE_BFP_SAD8_WRAPPER(vp9_high_sad8x4x8) -MAKE_BFP_SAD4D_WRAPPER(vp9_high_sad8x4x4d) -MAKE_BFP_SAD_WRAPPER(vp9_high_sad4x8) -MAKE_BFP_SADAVG_WRAPPER(vp9_high_sad4x8_avg) -MAKE_BFP_SAD8_WRAPPER(vp9_high_sad4x8x8) -MAKE_BFP_SAD4D_WRAPPER(vp9_high_sad4x8x4d) -MAKE_BFP_SAD_WRAPPER(vp9_high_sad4x4) -MAKE_BFP_SADAVG_WRAPPER(vp9_high_sad4x4_avg) -MAKE_BFP_SAD3_WRAPPER(vp9_high_sad4x4x3) -MAKE_BFP_SAD8_WRAPPER(vp9_high_sad4x4x8) -MAKE_BFP_SAD4D_WRAPPER(vp9_high_sad4x4x4d) - -static void highbd_set_var_fns(VP9_COMP *const cpi) { - VP9_COMMON *const cm = &cpi->common; - if (cm->use_highbitdepth) { - switch (cm->bit_depth) { - case VPX_BITS_8: - HIGHBD_BFP(BLOCK_32X16, - vp9_high_sad32x16_bits8, - vp9_high_sad32x16_avg_bits8, - vp9_high_variance32x16, - vp9_high_sub_pixel_variance32x16, - vp9_high_sub_pixel_avg_variance32x16, - NULL, - NULL, - vp9_high_sad32x16x4d_bits8) - - HIGHBD_BFP(BLOCK_16X32, - vp9_high_sad16x32_bits8, - vp9_high_sad16x32_avg_bits8, - vp9_high_variance16x32, - vp9_high_sub_pixel_variance16x32, - vp9_high_sub_pixel_avg_variance16x32, - NULL, - NULL, - vp9_high_sad16x32x4d_bits8) - - HIGHBD_BFP(BLOCK_64X32, - vp9_high_sad64x32_bits8, - vp9_high_sad64x32_avg_bits8, - vp9_high_variance64x32, - vp9_high_sub_pixel_variance64x32, - vp9_high_sub_pixel_avg_variance64x32, - NULL, - NULL, - vp9_high_sad64x32x4d_bits8) - - HIGHBD_BFP(BLOCK_32X64, - vp9_high_sad32x64_bits8, - vp9_high_sad32x64_avg_bits8, - vp9_high_variance32x64, - vp9_high_sub_pixel_variance32x64, - vp9_high_sub_pixel_avg_variance32x64, - NULL, - NULL, - vp9_high_sad32x64x4d_bits8) - - HIGHBD_BFP(BLOCK_32X32, - vp9_high_sad32x32_bits8, - vp9_high_sad32x32_avg_bits8, - vp9_high_variance32x32, - vp9_high_sub_pixel_variance32x32, - vp9_high_sub_pixel_avg_variance32x32, - vp9_high_sad32x32x3_bits8, - vp9_high_sad32x32x8_bits8, - vp9_high_sad32x32x4d_bits8) - - HIGHBD_BFP(BLOCK_64X64, - vp9_high_sad64x64_bits8, - vp9_high_sad64x64_avg_bits8, - vp9_high_variance64x64, - vp9_high_sub_pixel_variance64x64, - vp9_high_sub_pixel_avg_variance64x64, - vp9_high_sad64x64x3_bits8, - vp9_high_sad64x64x8_bits8, - vp9_high_sad64x64x4d_bits8) - - HIGHBD_BFP(BLOCK_16X16, - vp9_high_sad16x16_bits8, - vp9_high_sad16x16_avg_bits8, - vp9_high_variance16x16, - vp9_high_sub_pixel_variance16x16, - vp9_high_sub_pixel_avg_variance16x16, - vp9_high_sad16x16x3_bits8, - vp9_high_sad16x16x8_bits8, - vp9_high_sad16x16x4d_bits8) - - HIGHBD_BFP(BLOCK_16X8, - vp9_high_sad16x8_bits8, - vp9_high_sad16x8_avg_bits8, - vp9_high_variance16x8, - vp9_high_sub_pixel_variance16x8, - vp9_high_sub_pixel_avg_variance16x8, - vp9_high_sad16x8x3_bits8, - vp9_high_sad16x8x8_bits8, - vp9_high_sad16x8x4d_bits8) - - HIGHBD_BFP(BLOCK_8X16, - vp9_high_sad8x16_bits8, - vp9_high_sad8x16_avg_bits8, - vp9_high_variance8x16, - vp9_high_sub_pixel_variance8x16, - vp9_high_sub_pixel_avg_variance8x16, - vp9_high_sad8x16x3_bits8, - vp9_high_sad8x16x8_bits8, - vp9_high_sad8x16x4d_bits8) - - HIGHBD_BFP(BLOCK_8X8, - vp9_high_sad8x8_bits8, - vp9_high_sad8x8_avg_bits8, - vp9_high_variance8x8, - vp9_high_sub_pixel_variance8x8, - vp9_high_sub_pixel_avg_variance8x8, - vp9_high_sad8x8x3_bits8, - vp9_high_sad8x8x8_bits8, - vp9_high_sad8x8x4d_bits8) - - HIGHBD_BFP(BLOCK_8X4, - vp9_high_sad8x4_bits8, - vp9_high_sad8x4_avg_bits8, - vp9_high_variance8x4, - vp9_high_sub_pixel_variance8x4, - vp9_high_sub_pixel_avg_variance8x4, - NULL, - vp9_high_sad8x4x8_bits8, - vp9_high_sad8x4x4d_bits8) - - HIGHBD_BFP(BLOCK_4X8, - vp9_high_sad4x8_bits8, - vp9_high_sad4x8_avg_bits8, - vp9_high_variance4x8, - vp9_high_sub_pixel_variance4x8, - vp9_high_sub_pixel_avg_variance4x8, - NULL, - vp9_high_sad4x8x8_bits8, - vp9_high_sad4x8x4d_bits8) - - HIGHBD_BFP(BLOCK_4X4, - vp9_high_sad4x4_bits8, - vp9_high_sad4x4_avg_bits8, - vp9_high_variance4x4, - vp9_high_sub_pixel_variance4x4, - vp9_high_sub_pixel_avg_variance4x4, - vp9_high_sad4x4x3_bits8, - vp9_high_sad4x4x8_bits8, - vp9_high_sad4x4x4d_bits8) - break; - - case VPX_BITS_10: - HIGHBD_BFP(BLOCK_32X16, - vp9_high_sad32x16_bits10, - vp9_high_sad32x16_avg_bits10, - vp9_high_10_variance32x16, - vp9_high_10_sub_pixel_variance32x16, - vp9_high_10_sub_pixel_avg_variance32x16, - NULL, - NULL, - vp9_high_sad32x16x4d_bits10) - - HIGHBD_BFP(BLOCK_16X32, - vp9_high_sad16x32_bits10, - vp9_high_sad16x32_avg_bits10, - vp9_high_10_variance16x32, - vp9_high_10_sub_pixel_variance16x32, - vp9_high_10_sub_pixel_avg_variance16x32, - NULL, - NULL, - vp9_high_sad16x32x4d_bits10) - - HIGHBD_BFP(BLOCK_64X32, - vp9_high_sad64x32_bits10, - vp9_high_sad64x32_avg_bits10, - vp9_high_10_variance64x32, - vp9_high_10_sub_pixel_variance64x32, - vp9_high_10_sub_pixel_avg_variance64x32, - NULL, - NULL, - vp9_high_sad64x32x4d_bits10) - - HIGHBD_BFP(BLOCK_32X64, - vp9_high_sad32x64_bits10, - vp9_high_sad32x64_avg_bits10, - vp9_high_10_variance32x64, - vp9_high_10_sub_pixel_variance32x64, - vp9_high_10_sub_pixel_avg_variance32x64, - NULL, - NULL, - vp9_high_sad32x64x4d_bits10) - - HIGHBD_BFP(BLOCK_32X32, - vp9_high_sad32x32_bits10, - vp9_high_sad32x32_avg_bits10, - vp9_high_10_variance32x32, - vp9_high_10_sub_pixel_variance32x32, - vp9_high_10_sub_pixel_avg_variance32x32, - vp9_high_sad32x32x3_bits10, - vp9_high_sad32x32x8_bits10, - vp9_high_sad32x32x4d_bits10) - - HIGHBD_BFP(BLOCK_64X64, - vp9_high_sad64x64_bits10, - vp9_high_sad64x64_avg_bits10, - vp9_high_10_variance64x64, - vp9_high_10_sub_pixel_variance64x64, - vp9_high_10_sub_pixel_avg_variance64x64, - vp9_high_sad64x64x3_bits10, - vp9_high_sad64x64x8_bits10, - vp9_high_sad64x64x4d_bits10) - - HIGHBD_BFP(BLOCK_16X16, - vp9_high_sad16x16_bits10, - vp9_high_sad16x16_avg_bits10, - vp9_high_10_variance16x16, - vp9_high_10_sub_pixel_variance16x16, - vp9_high_10_sub_pixel_avg_variance16x16, - vp9_high_sad16x16x3_bits10, - vp9_high_sad16x16x8_bits10, - vp9_high_sad16x16x4d_bits10) - - HIGHBD_BFP(BLOCK_16X8, - vp9_high_sad16x8_bits10, - vp9_high_sad16x8_avg_bits10, - vp9_high_10_variance16x8, - vp9_high_10_sub_pixel_variance16x8, - vp9_high_10_sub_pixel_avg_variance16x8, - vp9_high_sad16x8x3_bits10, - vp9_high_sad16x8x8_bits10, - vp9_high_sad16x8x4d_bits10) - - HIGHBD_BFP(BLOCK_8X16, - vp9_high_sad8x16_bits10, - vp9_high_sad8x16_avg_bits10, - vp9_high_10_variance8x16, - vp9_high_10_sub_pixel_variance8x16, - vp9_high_10_sub_pixel_avg_variance8x16, - vp9_high_sad8x16x3_bits10, - vp9_high_sad8x16x8_bits10, - vp9_high_sad8x16x4d_bits10) - - HIGHBD_BFP(BLOCK_8X8, - vp9_high_sad8x8_bits10, - vp9_high_sad8x8_avg_bits10, - vp9_high_10_variance8x8, - vp9_high_10_sub_pixel_variance8x8, - vp9_high_10_sub_pixel_avg_variance8x8, - vp9_high_sad8x8x3_bits10, - vp9_high_sad8x8x8_bits10, - vp9_high_sad8x8x4d_bits10) - - HIGHBD_BFP(BLOCK_8X4, - vp9_high_sad8x4_bits10, - vp9_high_sad8x4_avg_bits10, - vp9_high_10_variance8x4, - vp9_high_10_sub_pixel_variance8x4, - vp9_high_10_sub_pixel_avg_variance8x4, - NULL, - vp9_high_sad8x4x8_bits10, - vp9_high_sad8x4x4d_bits10) - - HIGHBD_BFP(BLOCK_4X8, - vp9_high_sad4x8_bits10, - vp9_high_sad4x8_avg_bits10, - vp9_high_10_variance4x8, - vp9_high_10_sub_pixel_variance4x8, - vp9_high_10_sub_pixel_avg_variance4x8, - NULL, - vp9_high_sad4x8x8_bits10, - vp9_high_sad4x8x4d_bits10) - - HIGHBD_BFP(BLOCK_4X4, - vp9_high_sad4x4_bits10, - vp9_high_sad4x4_avg_bits10, - vp9_high_10_variance4x4, - vp9_high_10_sub_pixel_variance4x4, - vp9_high_10_sub_pixel_avg_variance4x4, - vp9_high_sad4x4x3_bits10, - vp9_high_sad4x4x8_bits10, - vp9_high_sad4x4x4d_bits10) - break; - - case VPX_BITS_12: - HIGHBD_BFP(BLOCK_32X16, - vp9_high_sad32x16_bits12, - vp9_high_sad32x16_avg_bits12, - vp9_high_12_variance32x16, - vp9_high_12_sub_pixel_variance32x16, - vp9_high_12_sub_pixel_avg_variance32x16, - NULL, - NULL, - vp9_high_sad32x16x4d_bits12) - - HIGHBD_BFP(BLOCK_16X32, - vp9_high_sad16x32_bits12, - vp9_high_sad16x32_avg_bits12, - vp9_high_12_variance16x32, - vp9_high_12_sub_pixel_variance16x32, - vp9_high_12_sub_pixel_avg_variance16x32, - NULL, - NULL, - vp9_high_sad16x32x4d_bits12) - - HIGHBD_BFP(BLOCK_64X32, - vp9_high_sad64x32_bits12, - vp9_high_sad64x32_avg_bits12, - vp9_high_12_variance64x32, - vp9_high_12_sub_pixel_variance64x32, - vp9_high_12_sub_pixel_avg_variance64x32, - NULL, - NULL, - vp9_high_sad64x32x4d_bits12) - - HIGHBD_BFP(BLOCK_32X64, - vp9_high_sad32x64_bits12, - vp9_high_sad32x64_avg_bits12, - vp9_high_12_variance32x64, - vp9_high_12_sub_pixel_variance32x64, - vp9_high_12_sub_pixel_avg_variance32x64, - NULL, - NULL, - vp9_high_sad32x64x4d_bits12) - - HIGHBD_BFP(BLOCK_32X32, - vp9_high_sad32x32_bits12, - vp9_high_sad32x32_avg_bits12, - vp9_high_12_variance32x32, - vp9_high_12_sub_pixel_variance32x32, - vp9_high_12_sub_pixel_avg_variance32x32, - vp9_high_sad32x32x3_bits12, - vp9_high_sad32x32x8_bits12, - vp9_high_sad32x32x4d_bits12) - - HIGHBD_BFP(BLOCK_64X64, - vp9_high_sad64x64_bits12, - vp9_high_sad64x64_avg_bits12, - vp9_high_12_variance64x64, - vp9_high_12_sub_pixel_variance64x64, - vp9_high_12_sub_pixel_avg_variance64x64, - vp9_high_sad64x64x3_bits12, - vp9_high_sad64x64x8_bits12, - vp9_high_sad64x64x4d_bits12) - - HIGHBD_BFP(BLOCK_16X16, - vp9_high_sad16x16_bits12, - vp9_high_sad16x16_avg_bits12, - vp9_high_12_variance16x16, - vp9_high_12_sub_pixel_variance16x16, - vp9_high_12_sub_pixel_avg_variance16x16, - vp9_high_sad16x16x3_bits12, - vp9_high_sad16x16x8_bits12, - vp9_high_sad16x16x4d_bits12) - - HIGHBD_BFP(BLOCK_16X8, - vp9_high_sad16x8_bits12, - vp9_high_sad16x8_avg_bits12, - vp9_high_12_variance16x8, - vp9_high_12_sub_pixel_variance16x8, - vp9_high_12_sub_pixel_avg_variance16x8, - vp9_high_sad16x8x3_bits12, - vp9_high_sad16x8x8_bits12, - vp9_high_sad16x8x4d_bits12) - - HIGHBD_BFP(BLOCK_8X16, - vp9_high_sad8x16_bits12, - vp9_high_sad8x16_avg_bits12, - vp9_high_12_variance8x16, - vp9_high_12_sub_pixel_variance8x16, - vp9_high_12_sub_pixel_avg_variance8x16, - vp9_high_sad8x16x3_bits12, - vp9_high_sad8x16x8_bits12, - vp9_high_sad8x16x4d_bits12) - - HIGHBD_BFP(BLOCK_8X8, - vp9_high_sad8x8_bits12, - vp9_high_sad8x8_avg_bits12, - vp9_high_12_variance8x8, - vp9_high_12_sub_pixel_variance8x8, - vp9_high_12_sub_pixel_avg_variance8x8, - vp9_high_sad8x8x3_bits12, - vp9_high_sad8x8x8_bits12, - vp9_high_sad8x8x4d_bits12) - - HIGHBD_BFP(BLOCK_8X4, - vp9_high_sad8x4_bits12, - vp9_high_sad8x4_avg_bits12, - vp9_high_12_variance8x4, - vp9_high_12_sub_pixel_variance8x4, - vp9_high_12_sub_pixel_avg_variance8x4, - NULL, - vp9_high_sad8x4x8_bits12, - vp9_high_sad8x4x4d_bits12) - - HIGHBD_BFP(BLOCK_4X8, - vp9_high_sad4x8_bits12, - vp9_high_sad4x8_avg_bits12, - vp9_high_12_variance4x8, - vp9_high_12_sub_pixel_variance4x8, - vp9_high_12_sub_pixel_avg_variance4x8, - NULL, - vp9_high_sad4x8x8_bits12, - vp9_high_sad4x8x4d_bits12) - - HIGHBD_BFP(BLOCK_4X4, - vp9_high_sad4x4_bits12, - vp9_high_sad4x4_avg_bits12, - vp9_high_12_variance4x4, - vp9_high_12_sub_pixel_variance4x4, - vp9_high_12_sub_pixel_avg_variance4x4, - vp9_high_sad4x4x3_bits12, - vp9_high_sad4x4x8_bits12, - vp9_high_sad4x4x4d_bits12) - break; - - default: - assert(0 && "cm->bit_depth should be VPX_BITS_8, " - "VPX_BITS_10 or VPX_BITS_12"); - } - } -} -#endif // CONFIG_VP9_HIGHBITDEPTH - -void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) { - VP9_COMMON *const cm = &cpi->common; - RATE_CONTROL *const rc = &cpi->rc; - - if (cm->profile != oxcf->profile) - cm->profile = oxcf->profile; - cm->bit_depth = oxcf->bit_depth; - - if (cm->profile <= PROFILE_1) - assert(cm->bit_depth == VPX_BITS_8); - else - assert(cm->bit_depth > VPX_BITS_8); - - cpi->oxcf = *oxcf; -#if CONFIG_VP9_HIGHBITDEPTH - if (cpi->oxcf.use_highbitdepth) { - cpi->mb.e_mbd.bd = (int)cm->bit_depth; - } -#endif // CONFIG_VP9_HIGHBITDEPTH - - rc->baseline_gf_interval = DEFAULT_GF_INTERVAL; - - cpi->refresh_golden_frame = 0; - cpi->refresh_last_frame = 1; - cm->refresh_frame_context = 1; - cm->reset_frame_context = 0; - - vp9_reset_segment_features(&cm->seg); - vp9_set_high_precision_mv(cpi, 0); - - { - int i; - - for (i = 0; i < MAX_SEGMENTS; i++) - cpi->segment_encode_breakout[i] = cpi->oxcf.encode_breakout; - } - cpi->encode_breakout = cpi->oxcf.encode_breakout; - - set_rc_buffer_sizes(rc, &cpi->oxcf); - - // Under a configuration change, where maximum_buffer_size may change, - // keep buffer level clipped to the maximum allowed buffer size. - rc->bits_off_target = MIN(rc->bits_off_target, rc->maximum_buffer_size); - rc->buffer_level = MIN(rc->buffer_level, rc->maximum_buffer_size); - - // Set up frame rate and related parameters rate control values. - vp9_new_framerate(cpi, cpi->framerate); - - // Set absolute upper and lower quality limits - rc->worst_quality = cpi->oxcf.worst_allowed_q; - rc->best_quality = cpi->oxcf.best_allowed_q; - - cm->interp_filter = cpi->sf.default_interp_filter; - - cm->display_width = cpi->oxcf.width; - cm->display_height = cpi->oxcf.height; - - if (cpi->initial_width) { - // Increasing the size of the frame beyond the first seen frame, or some - // otherwise signaled maximum size, is not supported. - // TODO(jkoleszar): exit gracefully. - assert(cm->width <= cpi->initial_width); - assert(cm->height <= cpi->initial_height); - } - update_frame_size(cpi); - - if ((cpi->svc.number_temporal_layers > 1 && - cpi->oxcf.rc_mode == VPX_CBR) || - ((cpi->svc.number_temporal_layers > 1 || - cpi->svc.number_spatial_layers > 1) && - cpi->oxcf.pass == 2)) { - vp9_update_layer_context_change_config(cpi, - (int)cpi->oxcf.target_bandwidth); - } - - cpi->alt_ref_source = NULL; - rc->is_src_frame_alt_ref = 0; - -#if 0 - // Experimental RD Code - cpi->frame_distortion = 0; - cpi->last_frame_distortion = 0; -#endif - - set_tile_limits(cpi); - - cpi->ext_refresh_frame_flags_pending = 0; - cpi->ext_refresh_frame_context_pending = 0; - -#if CONFIG_VP9_HIGHBITDEPTH - highbd_set_var_fns(cpi); -#endif - -#if CONFIG_VP9_TEMPORAL_DENOISING - if (cpi->oxcf.noise_sensitivity > 0) { - vp9_denoiser_alloc(&(cpi->denoiser), cm->width, cm->height, - cm->subsampling_x, cm->subsampling_y, -#if CONFIG_VP9_HIGHBITDEPTH - cm->use_highbitdepth, -#endif - VP9_ENC_BORDER_IN_PIXELS); - } -#endif -} - -#ifndef M_LOG2_E -#define M_LOG2_E 0.693147180559945309417 -#endif -#define log2f(x) (log (x) / (float) M_LOG2_E) - -static void cal_nmvjointsadcost(int *mvjointsadcost) { - mvjointsadcost[0] = 600; - mvjointsadcost[1] = 300; - mvjointsadcost[2] = 300; - mvjointsadcost[3] = 300; -} - -static void cal_nmvsadcosts(int *mvsadcost[2]) { - int i = 1; - - mvsadcost[0][0] = 0; - mvsadcost[1][0] = 0; - - do { - double z = 256 * (2 * (log2f(8 * i) + .6)); - mvsadcost[0][i] = (int)z; - mvsadcost[1][i] = (int)z; - mvsadcost[0][-i] = (int)z; - mvsadcost[1][-i] = (int)z; - } while (++i <= MV_MAX); -} - -static void cal_nmvsadcosts_hp(int *mvsadcost[2]) { - int i = 1; - - mvsadcost[0][0] = 0; - mvsadcost[1][0] = 0; - - do { - double z = 256 * (2 * (log2f(8 * i) + .6)); - mvsadcost[0][i] = (int)z; - mvsadcost[1][i] = (int)z; - mvsadcost[0][-i] = (int)z; - mvsadcost[1][-i] = (int)z; - } while (++i <= MV_MAX); -} - - -VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf) { - unsigned int i, j; - VP9_COMP *const cpi = vpx_memalign(32, sizeof(VP9_COMP)); - VP9_COMMON *const cm = cpi != NULL ? &cpi->common : NULL; - - if (!cm) - return NULL; - - vp9_zero(*cpi); - - if (setjmp(cm->error.jmp)) { - cm->error.setjmp = 0; - vp9_remove_compressor(cpi); - return 0; - } - - cm->error.setjmp = 1; - - cpi->use_svc = 0; - - init_config(cpi, oxcf); - vp9_rc_init(&cpi->oxcf, oxcf->pass, &cpi->rc); - - cm->current_video_frame = 0; - cpi->skippable_frame = 0; - - // Create the encoder segmentation map and set all entries to 0 - CHECK_MEM_ERROR(cm, cpi->segmentation_map, - vpx_calloc(cm->mi_rows * cm->mi_cols, 1)); - - // Create a complexity map used for rd adjustment - CHECK_MEM_ERROR(cm, cpi->complexity_map, - vpx_calloc(cm->mi_rows * cm->mi_cols, 1)); - - // Create a map used for cyclic background refresh. - CHECK_MEM_ERROR(cm, cpi->cyclic_refresh, - vp9_cyclic_refresh_alloc(cm->mi_rows, cm->mi_cols)); - - // And a place holder structure is the coding context - // for use if we want to save and restore it - CHECK_MEM_ERROR(cm, cpi->coding_context.last_frame_seg_map_copy, - vpx_calloc(cm->mi_rows * cm->mi_cols, 1)); - - CHECK_MEM_ERROR(cm, cpi->nmvcosts[0], - vpx_calloc(MV_VALS, sizeof(*cpi->nmvcosts[0]))); - CHECK_MEM_ERROR(cm, cpi->nmvcosts[1], - vpx_calloc(MV_VALS, sizeof(*cpi->nmvcosts[1]))); - CHECK_MEM_ERROR(cm, cpi->nmvcosts_hp[0], - vpx_calloc(MV_VALS, sizeof(*cpi->nmvcosts_hp[0]))); - CHECK_MEM_ERROR(cm, cpi->nmvcosts_hp[1], - vpx_calloc(MV_VALS, sizeof(*cpi->nmvcosts_hp[1]))); - CHECK_MEM_ERROR(cm, cpi->nmvsadcosts[0], - vpx_calloc(MV_VALS, sizeof(*cpi->nmvsadcosts[0]))); - CHECK_MEM_ERROR(cm, cpi->nmvsadcosts[1], - vpx_calloc(MV_VALS, sizeof(*cpi->nmvsadcosts[1]))); - CHECK_MEM_ERROR(cm, cpi->nmvsadcosts_hp[0], - vpx_calloc(MV_VALS, sizeof(*cpi->nmvsadcosts_hp[0]))); - CHECK_MEM_ERROR(cm, cpi->nmvsadcosts_hp[1], - vpx_calloc(MV_VALS, sizeof(*cpi->nmvsadcosts_hp[1]))); - - for (i = 0; i < (sizeof(cpi->mbgraph_stats) / - sizeof(cpi->mbgraph_stats[0])); i++) { - CHECK_MEM_ERROR(cm, cpi->mbgraph_stats[i].mb_stats, - vpx_calloc(cm->MBs * - sizeof(*cpi->mbgraph_stats[i].mb_stats), 1)); - } - -#if CONFIG_FP_MB_STATS - cpi->use_fp_mb_stats = 0; - if (cpi->use_fp_mb_stats) { - // a place holder used to store the first pass mb stats in the first pass - CHECK_MEM_ERROR(cm, cpi->twopass.frame_mb_stats_buf, - vpx_calloc(cm->MBs * sizeof(uint8_t), 1)); - } else { - cpi->twopass.frame_mb_stats_buf = NULL; - } -#endif - - cpi->refresh_alt_ref_frame = 0; - - // Note that at the moment multi_arf will not work with svc. - // For the current check in all the execution paths are defaulted to 0 - // pending further tuning and testing. The code is left in place here - // as a place holder in regard to the required paths. - cpi->multi_arf_last_grp_enabled = 0; - if (oxcf->pass == 2) { - if (cpi->use_svc) { - cpi->multi_arf_allowed = 0; - cpi->multi_arf_enabled = 0; - } else { - // Disable by default for now. - cpi->multi_arf_allowed = 0; - cpi->multi_arf_enabled = 0; - } - } else { - cpi->multi_arf_allowed = 0; - cpi->multi_arf_enabled = 0; - } - - cpi->b_calculate_psnr = CONFIG_INTERNAL_STATS; -#if CONFIG_INTERNAL_STATS - cpi->b_calculate_ssimg = 0; - - cpi->count = 0; - cpi->bytes = 0; - - if (cpi->b_calculate_psnr) { - cpi->total_y = 0.0; - cpi->total_u = 0.0; - cpi->total_v = 0.0; - cpi->total = 0.0; - cpi->total_sq_error = 0; - cpi->total_samples = 0; - - cpi->totalp_y = 0.0; - cpi->totalp_u = 0.0; - cpi->totalp_v = 0.0; - cpi->totalp = 0.0; - cpi->totalp_sq_error = 0; - cpi->totalp_samples = 0; - - cpi->tot_recode_hits = 0; - cpi->summed_quality = 0; - cpi->summed_weights = 0; - cpi->summedp_quality = 0; - cpi->summedp_weights = 0; - } - - if (cpi->b_calculate_ssimg) { - cpi->total_ssimg_y = 0; - cpi->total_ssimg_u = 0; - cpi->total_ssimg_v = 0; - cpi->total_ssimg_all = 0; - } - -#endif - - cpi->first_time_stamp_ever = INT64_MAX; - - cal_nmvjointsadcost(cpi->mb.nmvjointsadcost); - cpi->mb.nmvcost[0] = &cpi->nmvcosts[0][MV_MAX]; - cpi->mb.nmvcost[1] = &cpi->nmvcosts[1][MV_MAX]; - cpi->mb.nmvsadcost[0] = &cpi->nmvsadcosts[0][MV_MAX]; - cpi->mb.nmvsadcost[1] = &cpi->nmvsadcosts[1][MV_MAX]; - cal_nmvsadcosts(cpi->mb.nmvsadcost); - - cpi->mb.nmvcost_hp[0] = &cpi->nmvcosts_hp[0][MV_MAX]; - cpi->mb.nmvcost_hp[1] = &cpi->nmvcosts_hp[1][MV_MAX]; - cpi->mb.nmvsadcost_hp[0] = &cpi->nmvsadcosts_hp[0][MV_MAX]; - cpi->mb.nmvsadcost_hp[1] = &cpi->nmvsadcosts_hp[1][MV_MAX]; - cal_nmvsadcosts_hp(cpi->mb.nmvsadcost_hp); - -#if CONFIG_VP9_TEMPORAL_DENOISING -#ifdef OUTPUT_YUV_DENOISED - yuv_denoised_file = fopen("denoised.yuv", "ab"); -#endif -#endif -#ifdef OUTPUT_YUV_REC - yuv_rec_file = fopen("rec.yuv", "wb"); -#endif - -#if 0 - framepsnr = fopen("framepsnr.stt", "a"); - kf_list = fopen("kf_list.stt", "w"); -#endif - - cpi->allow_encode_breakout = ENCODE_BREAKOUT_ENABLED; - - if (oxcf->pass == 1) { - vp9_init_first_pass(cpi); - } else if (oxcf->pass == 2) { - const size_t packet_sz = sizeof(FIRSTPASS_STATS); - const int packets = (int)(oxcf->two_pass_stats_in.sz / packet_sz); - - if (cpi->svc.number_spatial_layers > 1 - || cpi->svc.number_temporal_layers > 1) { - FIRSTPASS_STATS *const stats = oxcf->two_pass_stats_in.buf; - FIRSTPASS_STATS *stats_copy[VPX_SS_MAX_LAYERS] = {0}; - int i; - - for (i = 0; i < oxcf->ss_number_layers; ++i) { - FIRSTPASS_STATS *const last_packet_for_layer = - &stats[packets - oxcf->ss_number_layers + i]; - const int layer_id = (int)last_packet_for_layer->spatial_layer_id; - const int packets_in_layer = (int)last_packet_for_layer->count + 1; - if (layer_id >= 0 && layer_id < oxcf->ss_number_layers) { - LAYER_CONTEXT *const lc = &cpi->svc.layer_context[layer_id]; - - vpx_free(lc->rc_twopass_stats_in.buf); - - lc->rc_twopass_stats_in.sz = packets_in_layer * packet_sz; - CHECK_MEM_ERROR(cm, lc->rc_twopass_stats_in.buf, - vpx_malloc(lc->rc_twopass_stats_in.sz)); - lc->twopass.stats_in_start = lc->rc_twopass_stats_in.buf; - lc->twopass.stats_in = lc->twopass.stats_in_start; - lc->twopass.stats_in_end = lc->twopass.stats_in_start - + packets_in_layer - 1; - stats_copy[layer_id] = lc->rc_twopass_stats_in.buf; - } - } - - for (i = 0; i < packets; ++i) { - const int layer_id = (int)stats[i].spatial_layer_id; - if (layer_id >= 0 && layer_id < oxcf->ss_number_layers - && stats_copy[layer_id] != NULL) { - *stats_copy[layer_id] = stats[i]; - ++stats_copy[layer_id]; - } - } - - vp9_init_second_pass_spatial_svc(cpi); - } else { -#if CONFIG_FP_MB_STATS - if (cpi->use_fp_mb_stats) { - const size_t psz = cpi->common.MBs * sizeof(uint8_t); - const int ps = (int)(oxcf->firstpass_mb_stats_in.sz / psz); - - cpi->twopass.firstpass_mb_stats.mb_stats_start = - oxcf->firstpass_mb_stats_in.buf; - cpi->twopass.firstpass_mb_stats.mb_stats_end = - cpi->twopass.firstpass_mb_stats.mb_stats_start + - (ps - 1) * cpi->common.MBs * sizeof(uint8_t); - } -#endif - - cpi->twopass.stats_in_start = oxcf->two_pass_stats_in.buf; - cpi->twopass.stats_in = cpi->twopass.stats_in_start; - cpi->twopass.stats_in_end = &cpi->twopass.stats_in[packets - 1]; - - vp9_init_second_pass(cpi); - } - } - - vp9_set_speed_features(cpi); - - // Allocate memory to store variances for a frame. - CHECK_MEM_ERROR(cm, cpi->source_diff_var, - vpx_calloc(cm->MBs, sizeof(diff))); - cpi->source_var_thresh = 0; - cpi->frames_till_next_var_check = 0; - - // Default rd threshold factors for mode selection - for (i = 0; i < BLOCK_SIZES; ++i) { - for (j = 0; j < MAX_MODES; ++j) { - cpi->rd.thresh_freq_fact[i][j] = 32; - cpi->rd.mode_map[i][j] = j; - } - } - -#define BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX3F, SDX8F, SDX4DF)\ - cpi->fn_ptr[BT].sdf = SDF; \ - cpi->fn_ptr[BT].sdaf = SDAF; \ - cpi->fn_ptr[BT].vf = VF; \ - cpi->fn_ptr[BT].svf = SVF; \ - cpi->fn_ptr[BT].svaf = SVAF; \ - cpi->fn_ptr[BT].sdx3f = SDX3F; \ - cpi->fn_ptr[BT].sdx8f = SDX8F; \ - cpi->fn_ptr[BT].sdx4df = SDX4DF; - - BFP(BLOCK_32X16, vp9_sad32x16, vp9_sad32x16_avg, - vp9_variance32x16, vp9_sub_pixel_variance32x16, - vp9_sub_pixel_avg_variance32x16, NULL, NULL, vp9_sad32x16x4d) - - BFP(BLOCK_16X32, vp9_sad16x32, vp9_sad16x32_avg, - vp9_variance16x32, vp9_sub_pixel_variance16x32, - vp9_sub_pixel_avg_variance16x32, NULL, NULL, vp9_sad16x32x4d) - - BFP(BLOCK_64X32, vp9_sad64x32, vp9_sad64x32_avg, - vp9_variance64x32, vp9_sub_pixel_variance64x32, - vp9_sub_pixel_avg_variance64x32, NULL, NULL, vp9_sad64x32x4d) - - BFP(BLOCK_32X64, vp9_sad32x64, vp9_sad32x64_avg, - vp9_variance32x64, vp9_sub_pixel_variance32x64, - vp9_sub_pixel_avg_variance32x64, NULL, NULL, vp9_sad32x64x4d) - - BFP(BLOCK_32X32, vp9_sad32x32, vp9_sad32x32_avg, - vp9_variance32x32, vp9_sub_pixel_variance32x32, - vp9_sub_pixel_avg_variance32x32, vp9_sad32x32x3, vp9_sad32x32x8, - vp9_sad32x32x4d) - - BFP(BLOCK_64X64, vp9_sad64x64, vp9_sad64x64_avg, - vp9_variance64x64, vp9_sub_pixel_variance64x64, - vp9_sub_pixel_avg_variance64x64, vp9_sad64x64x3, vp9_sad64x64x8, - vp9_sad64x64x4d) - - BFP(BLOCK_16X16, vp9_sad16x16, vp9_sad16x16_avg, - vp9_variance16x16, vp9_sub_pixel_variance16x16, - vp9_sub_pixel_avg_variance16x16, vp9_sad16x16x3, vp9_sad16x16x8, - vp9_sad16x16x4d) - - BFP(BLOCK_16X8, vp9_sad16x8, vp9_sad16x8_avg, - vp9_variance16x8, vp9_sub_pixel_variance16x8, - vp9_sub_pixel_avg_variance16x8, - vp9_sad16x8x3, vp9_sad16x8x8, vp9_sad16x8x4d) - - BFP(BLOCK_8X16, vp9_sad8x16, vp9_sad8x16_avg, - vp9_variance8x16, vp9_sub_pixel_variance8x16, - vp9_sub_pixel_avg_variance8x16, - vp9_sad8x16x3, vp9_sad8x16x8, vp9_sad8x16x4d) - - BFP(BLOCK_8X8, vp9_sad8x8, vp9_sad8x8_avg, - vp9_variance8x8, vp9_sub_pixel_variance8x8, - vp9_sub_pixel_avg_variance8x8, - vp9_sad8x8x3, vp9_sad8x8x8, vp9_sad8x8x4d) - - BFP(BLOCK_8X4, vp9_sad8x4, vp9_sad8x4_avg, - vp9_variance8x4, vp9_sub_pixel_variance8x4, - vp9_sub_pixel_avg_variance8x4, NULL, vp9_sad8x4x8, vp9_sad8x4x4d) - - BFP(BLOCK_4X8, vp9_sad4x8, vp9_sad4x8_avg, - vp9_variance4x8, vp9_sub_pixel_variance4x8, - vp9_sub_pixel_avg_variance4x8, NULL, vp9_sad4x8x8, vp9_sad4x8x4d) - - BFP(BLOCK_4X4, vp9_sad4x4, vp9_sad4x4_avg, - vp9_variance4x4, vp9_sub_pixel_variance4x4, - vp9_sub_pixel_avg_variance4x4, - vp9_sad4x4x3, vp9_sad4x4x8, vp9_sad4x4x4d) - - /* vp9_init_quantizer() is first called here. Add check in - * vp9_frame_init_quantizer() so that vp9_init_quantizer is only - * called later when needed. This will avoid unnecessary calls of - * vp9_init_quantizer() for every frame. - */ - vp9_init_quantizer(cpi); - - vp9_loop_filter_init(cm); - - cm->error.setjmp = 0; - - return cpi; -} - -void vp9_remove_compressor(VP9_COMP *cpi) { - unsigned int i; - - if (!cpi) - return; - - if (cpi && (cpi->common.current_video_frame > 0)) { -#if CONFIG_INTERNAL_STATS - - vp9_clear_system_state(); - - // printf("\n8x8-4x4:%d-%d\n", cpi->t8x8_count, cpi->t4x4_count); - if (cpi->oxcf.pass != 1) { - FILE *f = fopen("opsnr.stt", "a"); - double time_encoded = (cpi->last_end_time_stamp_seen - - cpi->first_time_stamp_ever) / 10000000.000; - double total_encode_time = (cpi->time_receive_data + - cpi->time_compress_data) / 1000.000; - double dr = (double)cpi->bytes * (double) 8 / (double)1000 - / time_encoded; - - if (cpi->b_calculate_psnr) { - const double total_psnr = - vpx_sse_to_psnr((double)cpi->total_samples, 255.0, - (double)cpi->total_sq_error); - const double totalp_psnr = - vpx_sse_to_psnr((double)cpi->totalp_samples, 255.0, - (double)cpi->totalp_sq_error); - const double total_ssim = 100 * pow(cpi->summed_quality / - cpi->summed_weights, 8.0); - const double totalp_ssim = 100 * pow(cpi->summedp_quality / - cpi->summedp_weights, 8.0); - - fprintf(f, "Bitrate\tAVGPsnr\tGLBPsnr\tAVPsnrP\tGLPsnrP\t" - "VPXSSIM\tVPSSIMP\t Time(ms)\n"); - fprintf(f, "%7.2f\t%7.3f\t%7.3f\t%7.3f\t%7.3f\t%7.3f\t%7.3f\t%8.0f\n", - dr, cpi->total / cpi->count, total_psnr, - cpi->totalp / cpi->count, totalp_psnr, total_ssim, totalp_ssim, - total_encode_time); - } - - if (cpi->b_calculate_ssimg) { - fprintf(f, "BitRate\tSSIM_Y\tSSIM_U\tSSIM_V\tSSIM_A\t Time(ms)\n"); - fprintf(f, "%7.2f\t%6.4f\t%6.4f\t%6.4f\t%6.4f\t%8.0f\n", dr, - cpi->total_ssimg_y / cpi->count, - cpi->total_ssimg_u / cpi->count, - cpi->total_ssimg_v / cpi->count, - cpi->total_ssimg_all / cpi->count, total_encode_time); - } - - fclose(f); - } - -#endif - -#if 0 - { - printf("\n_pick_loop_filter_level:%d\n", cpi->time_pick_lpf / 1000); - printf("\n_frames recive_data encod_mb_row compress_frame Total\n"); - printf("%6d %10ld %10ld %10ld %10ld\n", cpi->common.current_video_frame, - cpi->time_receive_data / 1000, cpi->time_encode_sb_row / 1000, - cpi->time_compress_data / 1000, - (cpi->time_receive_data + cpi->time_compress_data) / 1000); - } -#endif - } - -#if CONFIG_VP9_TEMPORAL_DENOISING - if (cpi->oxcf.noise_sensitivity > 0) { - vp9_denoiser_free(&(cpi->denoiser)); - } -#endif - - dealloc_compressor_data(cpi); - vpx_free(cpi->tok); - - for (i = 0; i < sizeof(cpi->mbgraph_stats) / - sizeof(cpi->mbgraph_stats[0]); ++i) { - vpx_free(cpi->mbgraph_stats[i].mb_stats); - } - -#if CONFIG_FP_MB_STATS - if (cpi->use_fp_mb_stats) { - vpx_free(cpi->twopass.frame_mb_stats_buf); - cpi->twopass.frame_mb_stats_buf = NULL; - } -#endif - - vp9_remove_common(&cpi->common); - vpx_free(cpi); - -#if CONFIG_VP9_TEMPORAL_DENOISING -#ifdef OUTPUT_YUV_DENOISED - fclose(yuv_denoised_file); -#endif -#endif -#ifdef OUTPUT_YUV_REC - fclose(yuv_rec_file); -#endif - -#if 0 - - if (keyfile) - fclose(keyfile); - - if (framepsnr) - fclose(framepsnr); - - if (kf_list) - fclose(kf_list); - -#endif -} - -static int64_t get_sse(const uint8_t *a, int a_stride, - const uint8_t *b, int b_stride, - int width, int height) { - const int dw = width % 16; - const int dh = height % 16; - int64_t total_sse = 0; - unsigned int sse = 0; - int sum = 0; - int x, y; - - if (dw > 0) { - variance(&a[width - dw], a_stride, &b[width - dw], b_stride, - dw, height, &sse, &sum); - total_sse += sse; - } - - if (dh > 0) { - variance(&a[(height - dh) * a_stride], a_stride, - &b[(height - dh) * b_stride], b_stride, - width - dw, dh, &sse, &sum); - total_sse += sse; - } - - for (y = 0; y < height / 16; ++y) { - const uint8_t *pa = a; - const uint8_t *pb = b; - for (x = 0; x < width / 16; ++x) { - vp9_mse16x16(pa, a_stride, pb, b_stride, &sse); - total_sse += sse; - - pa += 16; - pb += 16; - } - - a += 16 * a_stride; - b += 16 * b_stride; - } - - return total_sse; -} - -#if CONFIG_VP9_HIGHBITDEPTH -static int64_t highbd_get_sse_shift(const uint8_t *a8, int a_stride, - const uint8_t *b8, int b_stride, - int width, int height, - unsigned int input_shift) { - const uint16_t *a = CONVERT_TO_SHORTPTR(a8); - const uint16_t *b = CONVERT_TO_SHORTPTR(b8); - int64_t total_sse = 0; - int x, y; - for (y = 0; y < height; ++y) { - for (x = 0; x < width; ++x) { - int64_t diff; - diff = (a[x] >> input_shift) - (b[x] >> input_shift); - total_sse += diff * diff; - } - a += a_stride; - b += b_stride; - } - return total_sse; -} - -static int64_t highbd_get_sse(const uint8_t *a, int a_stride, - const uint8_t *b, int b_stride, - int width, int height) { - int64_t total_sse = 0; - int x, y; - const int dw = width % 16; - const int dh = height % 16; - unsigned int sse = 0; - int sum = 0; - if (dw > 0) { - high_variance(&a[width - dw], a_stride, &b[width - dw], b_stride, - dw, height, &sse, &sum); - total_sse += sse; - } - if (dh > 0) { - high_variance(&a[(height - dh) * a_stride], a_stride, - &b[(height - dh) * b_stride], b_stride, - width - dw, dh, &sse, &sum); - total_sse += sse; - } - for (y = 0; y < height / 16; ++y) { - const uint8_t *pa = a; - const uint8_t *pb = b; - for (x = 0; x < width / 16; ++x) { - vp9_high_mse16x16(pa, a_stride, pb, b_stride, &sse); - total_sse += sse; - pa += 16; - pb += 16; - } - a += 16 * a_stride; - b += 16 * b_stride; - } - return total_sse; -} -#endif // CONFIG_VP9_HIGHBITDEPTH - -typedef struct { - double psnr[4]; // total/y/u/v - uint64_t sse[4]; // total/y/u/v - uint32_t samples[4]; // total/y/u/v -} PSNR_STATS; - -static void calc_psnr(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b, - PSNR_STATS *psnr) { - const int widths[3] = {a->y_width, a->uv_width, a->uv_width }; - const int heights[3] = {a->y_height, a->uv_height, a->uv_height}; - const uint8_t *a_planes[3] = {a->y_buffer, a->u_buffer, a->v_buffer }; - const int a_strides[3] = {a->y_stride, a->uv_stride, a->uv_stride}; - const uint8_t *b_planes[3] = {b->y_buffer, b->u_buffer, b->v_buffer }; - const int b_strides[3] = {b->y_stride, b->uv_stride, b->uv_stride}; - int i; - uint64_t total_sse = 0; - uint32_t total_samples = 0; - - for (i = 0; i < 3; ++i) { - const int w = widths[i]; - const int h = heights[i]; - const uint32_t samples = w * h; - const uint64_t sse = get_sse(a_planes[i], a_strides[i], - b_planes[i], b_strides[i], - w, h); - psnr->sse[1 + i] = sse; - psnr->samples[1 + i] = samples; - psnr->psnr[1 + i] = vpx_sse_to_psnr(samples, 255.0, (double)sse); - - total_sse += sse; - total_samples += samples; - } - - psnr->sse[0] = total_sse; - psnr->samples[0] = total_samples; - psnr->psnr[0] = vpx_sse_to_psnr((double)total_samples, 255.0, - (double)total_sse); -} - -#if CONFIG_VP9_HIGHBITDEPTH -static void calc_highbd_psnr(const YV12_BUFFER_CONFIG *a, - const YV12_BUFFER_CONFIG *b, - PSNR_STATS *psnr, - unsigned int bit_depth, - unsigned int in_bit_depth) { - const int widths[3] = {a->y_width, a->uv_width, a->uv_width }; - const int heights[3] = {a->y_height, a->uv_height, a->uv_height}; - const uint8_t *a_planes[3] = {a->y_buffer, a->u_buffer, a->v_buffer }; - const int a_strides[3] = {a->y_stride, a->uv_stride, a->uv_stride}; - const uint8_t *b_planes[3] = {b->y_buffer, b->u_buffer, b->v_buffer }; - const int b_strides[3] = {b->y_stride, b->uv_stride, b->uv_stride}; - int i; - uint64_t total_sse = 0; - uint32_t total_samples = 0; - const double peak = (double)((1 << in_bit_depth) - 1); - const unsigned int input_shift = bit_depth - in_bit_depth; - - for (i = 0; i < 3; ++i) { - const int w = widths[i]; - const int h = heights[i]; - const uint32_t samples = w * h; - uint64_t sse; - if (a->flags & YV12_FLAG_HIGHBITDEPTH) { - if (input_shift) { - sse = highbd_get_sse_shift(a_planes[i], a_strides[i], - b_planes[i], b_strides[i], w, h, - input_shift); - } else { - sse = highbd_get_sse(a_planes[i], a_strides[i], - b_planes[i], b_strides[i], w, h); - } - } else { - sse = get_sse(a_planes[i], a_strides[i], - b_planes[i], b_strides[i], - w, h); - } - psnr->sse[1 + i] = sse; - psnr->samples[1 + i] = samples; - psnr->psnr[1 + i] = vpx_sse_to_psnr(samples, peak, (double)sse); - - total_sse += sse; - total_samples += samples; - } - - psnr->sse[0] = total_sse; - psnr->samples[0] = total_samples; - psnr->psnr[0] = vpx_sse_to_psnr((double)total_samples, peak, - (double)total_sse); -} -#endif // CONFIG_VP9_HIGHBITDEPTH - -static void generate_psnr_packet(VP9_COMP *cpi) { - struct vpx_codec_cx_pkt pkt; - int i; - PSNR_STATS psnr; -#if CONFIG_VP9_HIGHBITDEPTH - calc_highbd_psnr(cpi->Source, cpi->common.frame_to_show, &psnr, - cpi->mb.e_mbd.bd, cpi->oxcf.input_bit_depth); -#else - calc_psnr(cpi->Source, cpi->common.frame_to_show, &psnr); -#endif - - for (i = 0; i < 4; ++i) { - pkt.data.psnr.samples[i] = psnr.samples[i]; - pkt.data.psnr.sse[i] = psnr.sse[i]; - pkt.data.psnr.psnr[i] = psnr.psnr[i]; - } - pkt.kind = VPX_CODEC_PSNR_PKT; - if (is_two_pass_svc(cpi)) - cpi->svc.layer_context[cpi->svc.spatial_layer_id].psnr_pkt = pkt.data.psnr; - else - vpx_codec_pkt_list_add(cpi->output_pkt_list, &pkt); -} - -int vp9_use_as_reference(VP9_COMP *cpi, int ref_frame_flags) { - if (ref_frame_flags > 7) - return -1; - - cpi->ref_frame_flags = ref_frame_flags; - return 0; -} - -void vp9_update_reference(VP9_COMP *cpi, int ref_frame_flags) { - cpi->ext_refresh_golden_frame = (ref_frame_flags & VP9_GOLD_FLAG) != 0; - cpi->ext_refresh_alt_ref_frame = (ref_frame_flags & VP9_ALT_FLAG) != 0; - cpi->ext_refresh_last_frame = (ref_frame_flags & VP9_LAST_FLAG) != 0; - cpi->ext_refresh_frame_flags_pending = 1; -} - -static YV12_BUFFER_CONFIG *get_vp9_ref_frame_buffer(VP9_COMP *cpi, - VP9_REFFRAME ref_frame_flag) { - MV_REFERENCE_FRAME ref_frame = NONE; - if (ref_frame_flag == VP9_LAST_FLAG) - ref_frame = LAST_FRAME; - else if (ref_frame_flag == VP9_GOLD_FLAG) - ref_frame = GOLDEN_FRAME; - else if (ref_frame_flag == VP9_ALT_FLAG) - ref_frame = ALTREF_FRAME; - - return ref_frame == NONE ? NULL : get_ref_frame_buffer(cpi, ref_frame); -} - -int vp9_copy_reference_enc(VP9_COMP *cpi, VP9_REFFRAME ref_frame_flag, - YV12_BUFFER_CONFIG *sd) { - YV12_BUFFER_CONFIG *cfg = get_vp9_ref_frame_buffer(cpi, ref_frame_flag); - if (cfg) { - vp8_yv12_copy_frame(cfg, sd); - return 0; - } else { - return -1; - } -} - -int vp9_set_reference_enc(VP9_COMP *cpi, VP9_REFFRAME ref_frame_flag, - YV12_BUFFER_CONFIG *sd) { - YV12_BUFFER_CONFIG *cfg = get_vp9_ref_frame_buffer(cpi, ref_frame_flag); - if (cfg) { - vp8_yv12_copy_frame(sd, cfg); - return 0; - } else { - return -1; - } -} - -int vp9_update_entropy(VP9_COMP * cpi, int update) { - cpi->ext_refresh_frame_context = update; - cpi->ext_refresh_frame_context_pending = 1; - return 0; -} - -#if CONFIG_VP9_TEMPORAL_DENOISING -#if defined(OUTPUT_YUV_DENOISED) -// The denoiser buffer is allocated as a YUV 440 buffer. This function writes it -// as YUV 420. We simply use the top-left pixels of the UV buffers, since we do -// not denoise the UV channels at this time. If ever we implement UV channel -// denoising we will have to modify this. -void vp9_write_yuv_frame_420(YV12_BUFFER_CONFIG *s, FILE *f) { - uint8_t *src = s->y_buffer; - int h = s->y_height; - - do { - fwrite(src, s->y_width, 1, f); - src += s->y_stride; - } while (--h); - - src = s->u_buffer; - h = s->uv_height / 2; - - do { - fwrite(src, s->uv_width / 2, 1, f); - src += s->uv_stride + s->uv_width / 2; - } while (--h); - - src = s->v_buffer; - h = s->uv_height / 2; - - do { - fwrite(src, s->uv_width / 2, 1, f); - src += s->uv_stride + s->uv_width / 2; - } while (--h); -} -#endif -#endif - -#ifdef OUTPUT_YUV_REC -void vp9_write_yuv_rec_frame(VP9_COMMON *cm) { - YV12_BUFFER_CONFIG *s = cm->frame_to_show; - uint8_t *src = s->y_buffer; - int h = cm->height; - -#if CONFIG_VP9_HIGHBITDEPTH - if (s->flags & YV12_FLAG_HIGHBITDEPTH) { - uint16_t *src16 = CONVERT_TO_SHORTPTR(s->y_buffer); - - do { - fwrite(src16, s->y_width, 2, yuv_rec_file); - src16 += s->y_stride; - } while (--h); - - src16 = CONVERT_TO_SHORTPTR(s->u_buffer); - h = s->uv_height; - - do { - fwrite(src16, s->uv_width, 2, yuv_rec_file); - src16 += s->uv_stride; - } while (--h); - - src16 = CONVERT_TO_SHORTPTR(s->v_buffer); - h = s->uv_height; - - do { - fwrite(src16, s->uv_width, 2, yuv_rec_file); - src16 += s->uv_stride; - } while (--h); - - fflush(yuv_rec_file); - return; - } -#endif // CONFIG_VP9_HIGHBITDEPTH - - do { - fwrite(src, s->y_width, 1, yuv_rec_file); - src += s->y_stride; - } while (--h); - - src = s->u_buffer; - h = s->uv_height; - - do { - fwrite(src, s->uv_width, 1, yuv_rec_file); - src += s->uv_stride; - } while (--h); - - src = s->v_buffer; - h = s->uv_height; - - do { - fwrite(src, s->uv_width, 1, yuv_rec_file); - src += s->uv_stride; - } while (--h); - - fflush(yuv_rec_file); -} -#endif - -#if CONFIG_VP9_HIGHBITDEPTH -static void scale_and_extend_frame_nonnormative(const YV12_BUFFER_CONFIG *src, - YV12_BUFFER_CONFIG *dst, - int bd) { -#else -static void scale_and_extend_frame_nonnormative(const YV12_BUFFER_CONFIG *src, - YV12_BUFFER_CONFIG *dst) { -#endif // CONFIG_VP9_HIGHBITDEPTH - // TODO(dkovalev): replace YV12_BUFFER_CONFIG with vpx_image_t - int i; - const uint8_t *const srcs[3] = {src->y_buffer, src->u_buffer, src->v_buffer}; - const int src_strides[3] = {src->y_stride, src->uv_stride, src->uv_stride}; - const int src_widths[3] = {src->y_crop_width, src->uv_crop_width, - src->uv_crop_width }; - const int src_heights[3] = {src->y_crop_height, src->uv_crop_height, - src->uv_crop_height}; - uint8_t *const dsts[3] = {dst->y_buffer, dst->u_buffer, dst->v_buffer}; - const int dst_strides[3] = {dst->y_stride, dst->uv_stride, dst->uv_stride}; - const int dst_widths[3] = {dst->y_crop_width, dst->uv_crop_width, - dst->uv_crop_width}; - const int dst_heights[3] = {dst->y_crop_height, dst->uv_crop_height, - dst->uv_crop_height}; - - for (i = 0; i < MAX_MB_PLANE; ++i) { -#if CONFIG_VP9_HIGHBITDEPTH - if (src->flags & YV12_FLAG_HIGHBITDEPTH) { - vp9_highbd_resize_plane(srcs[i], src_heights[i], src_widths[i], - src_strides[i], dsts[i], dst_heights[i], - dst_widths[i], dst_strides[i], bd); - } else { - vp9_resize_plane(srcs[i], src_heights[i], src_widths[i], src_strides[i], - dsts[i], dst_heights[i], dst_widths[i], dst_strides[i]); - } -#else - vp9_resize_plane(srcs[i], src_heights[i], src_widths[i], src_strides[i], - dsts[i], dst_heights[i], dst_widths[i], dst_strides[i]); -#endif // CONFIG_VP9_HIGHBITDEPTH - } - vp9_extend_frame_borders(dst); -} - -#if CONFIG_VP9_HIGHBITDEPTH -static void scale_and_extend_frame(const YV12_BUFFER_CONFIG *src, - YV12_BUFFER_CONFIG *dst, int bd) { -#else -static void scale_and_extend_frame(const YV12_BUFFER_CONFIG *src, - YV12_BUFFER_CONFIG *dst) { -#endif // CONFIG_VP9_HIGHBITDEPTH - const int src_w = src->y_crop_width; - const int src_h = src->y_crop_height; - const int dst_w = dst->y_crop_width; - const int dst_h = dst->y_crop_height; - const uint8_t *const srcs[3] = {src->y_buffer, src->u_buffer, src->v_buffer}; - const int src_strides[3] = {src->y_stride, src->uv_stride, src->uv_stride}; - uint8_t *const dsts[3] = {dst->y_buffer, dst->u_buffer, dst->v_buffer}; - const int dst_strides[3] = {dst->y_stride, dst->uv_stride, dst->uv_stride}; - const InterpKernel *const kernel = vp9_get_interp_kernel(EIGHTTAP); - int x, y, i; - - for (y = 0; y < dst_h; y += 16) { - for (x = 0; x < dst_w; x += 16) { - for (i = 0; i < MAX_MB_PLANE; ++i) { - const int factor = (i == 0 || i == 3 ? 1 : 2); - const int x_q4 = x * (16 / factor) * src_w / dst_w; - const int y_q4 = y * (16 / factor) * src_h / dst_h; - const int src_stride = src_strides[i]; - const int dst_stride = dst_strides[i]; - const uint8_t *src_ptr = srcs[i] + (y / factor) * src_h / dst_h * - src_stride + (x / factor) * src_w / dst_w; - uint8_t *dst_ptr = dsts[i] + (y / factor) * dst_stride + (x / factor); - -#if CONFIG_VP9_HIGHBITDEPTH - if (src->flags & YV12_FLAG_HIGHBITDEPTH) { - vp9_high_convolve8(src_ptr, src_stride, dst_ptr, dst_stride, - kernel[x_q4 & 0xf], 16 * src_w / dst_w, - kernel[y_q4 & 0xf], 16 * src_h / dst_h, - 16 / factor, 16 / factor, bd); - } else { - vp9_convolve8(src_ptr, src_stride, dst_ptr, dst_stride, - kernel[x_q4 & 0xf], 16 * src_w / dst_w, - kernel[y_q4 & 0xf], 16 * src_h / dst_h, - 16 / factor, 16 / factor); - } -#else - vp9_convolve8(src_ptr, src_stride, dst_ptr, dst_stride, - kernel[x_q4 & 0xf], 16 * src_w / dst_w, - kernel[y_q4 & 0xf], 16 * src_h / dst_h, - 16 / factor, 16 / factor); -#endif // CONFIG_VP9_HIGHBITDEPTH - } - } - } - - vp9_extend_frame_borders(dst); -} - -// Function to test for conditions that indicate we should loop -// back and recode a frame. -static int recode_loop_test(const VP9_COMP *cpi, - int high_limit, int low_limit, - int q, int maxq, int minq) { - const VP9_COMMON *const cm = &cpi->common; - const RATE_CONTROL *const rc = &cpi->rc; - const VP9EncoderConfig *const oxcf = &cpi->oxcf; - int force_recode = 0; - - // Special case trap if maximum allowed frame size exceeded. - if (rc->projected_frame_size > rc->max_frame_bandwidth) { - force_recode = 1; - - // Is frame recode allowed. - // Yes if either recode mode 1 is selected or mode 2 is selected - // and the frame is a key frame, golden frame or alt_ref_frame - } else if ((cpi->sf.recode_loop == ALLOW_RECODE) || - ((cpi->sf.recode_loop == ALLOW_RECODE_KFARFGF) && - (cm->frame_type == KEY_FRAME || - cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame))) { - // General over and under shoot tests - if ((rc->projected_frame_size > high_limit && q < maxq) || - (rc->projected_frame_size < low_limit && q > minq)) { - force_recode = 1; - } else if (cpi->oxcf.rc_mode == VPX_CQ) { - // Deal with frame undershoot and whether or not we are - // below the automatically set cq level. - if (q > oxcf->cq_level && - rc->projected_frame_size < ((rc->this_frame_target * 7) >> 3)) { - force_recode = 1; - } - } - } - return force_recode; -} - -void vp9_update_reference_frames(VP9_COMP *cpi) { - VP9_COMMON * const cm = &cpi->common; - - // At this point the new frame has been encoded. - // If any buffer copy / swapping is signaled it should be done here. - if (cm->frame_type == KEY_FRAME) { - ref_cnt_fb(cm->frame_bufs, - &cm->ref_frame_map[cpi->gld_fb_idx], cm->new_fb_idx); - ref_cnt_fb(cm->frame_bufs, - &cm->ref_frame_map[cpi->alt_fb_idx], cm->new_fb_idx); - } else if (vp9_preserve_existing_gf(cpi)) { - // We have decided to preserve the previously existing golden frame as our - // new ARF frame. However, in the short term in function - // vp9_bitstream.c::get_refresh_mask() we left it in the GF slot and, if - // we're updating the GF with the current decoded frame, we save it to the - // ARF slot instead. - // We now have to update the ARF with the current frame and swap gld_fb_idx - // and alt_fb_idx so that, overall, we've stored the old GF in the new ARF - // slot and, if we're updating the GF, the current frame becomes the new GF. - int tmp; - - ref_cnt_fb(cm->frame_bufs, - &cm->ref_frame_map[cpi->alt_fb_idx], cm->new_fb_idx); - - tmp = cpi->alt_fb_idx; - cpi->alt_fb_idx = cpi->gld_fb_idx; - cpi->gld_fb_idx = tmp; - - if (is_two_pass_svc(cpi)) { - cpi->svc.layer_context[0].gold_ref_idx = cpi->gld_fb_idx; - cpi->svc.layer_context[0].alt_ref_idx = cpi->alt_fb_idx; - } - } else { /* For non key/golden frames */ - if (cpi->refresh_alt_ref_frame) { - int arf_idx = cpi->alt_fb_idx; - if ((cpi->oxcf.pass == 2) && cpi->multi_arf_allowed) { - const GF_GROUP *const gf_group = &cpi->twopass.gf_group; - arf_idx = gf_group->arf_update_idx[gf_group->index]; - } - - ref_cnt_fb(cm->frame_bufs, - &cm->ref_frame_map[arf_idx], cm->new_fb_idx); - vpx_memcpy(cpi->interp_filter_selected[ALTREF_FRAME], - cpi->interp_filter_selected[0], - sizeof(cpi->interp_filter_selected[0])); - } - - if (cpi->refresh_golden_frame) { - ref_cnt_fb(cm->frame_bufs, - &cm->ref_frame_map[cpi->gld_fb_idx], cm->new_fb_idx); - if (!cpi->rc.is_src_frame_alt_ref) - vpx_memcpy(cpi->interp_filter_selected[GOLDEN_FRAME], - cpi->interp_filter_selected[0], - sizeof(cpi->interp_filter_selected[0])); - else - vpx_memcpy(cpi->interp_filter_selected[GOLDEN_FRAME], - cpi->interp_filter_selected[ALTREF_FRAME], - sizeof(cpi->interp_filter_selected[ALTREF_FRAME])); - } - } - - if (cpi->refresh_last_frame) { - ref_cnt_fb(cm->frame_bufs, - &cm->ref_frame_map[cpi->lst_fb_idx], cm->new_fb_idx); - if (!cpi->rc.is_src_frame_alt_ref) - vpx_memcpy(cpi->interp_filter_selected[LAST_FRAME], - cpi->interp_filter_selected[0], - sizeof(cpi->interp_filter_selected[0])); - } -#if CONFIG_VP9_TEMPORAL_DENOISING - if (cpi->oxcf.noise_sensitivity > 0) { - vp9_denoiser_update_frame_info(&cpi->denoiser, - *cpi->Source, - cpi->common.frame_type, - cpi->refresh_alt_ref_frame, - cpi->refresh_golden_frame, - cpi->refresh_last_frame); - } -#endif -} - -static void loopfilter_frame(VP9_COMP *cpi, VP9_COMMON *cm) { - MACROBLOCKD *xd = &cpi->mb.e_mbd; - struct loopfilter *lf = &cm->lf; - if (xd->lossless) { - lf->filter_level = 0; - } else { - struct vpx_usec_timer timer; - - vp9_clear_system_state(); - - vpx_usec_timer_start(&timer); - - vp9_pick_filter_level(cpi->Source, cpi, cpi->sf.lpf_pick); - - vpx_usec_timer_mark(&timer); - cpi->time_pick_lpf += vpx_usec_timer_elapsed(&timer); - } - - if (lf->filter_level > 0) { - vp9_loop_filter_frame(cm->frame_to_show, cm, xd, lf->filter_level, 0, 0); - } - - vp9_extend_frame_inner_borders(cm->frame_to_show); -} - -void vp9_scale_references(VP9_COMP *cpi) { - VP9_COMMON *cm = &cpi->common; - MV_REFERENCE_FRAME ref_frame; - const VP9_REFFRAME ref_mask[3] = {VP9_LAST_FLAG, VP9_GOLD_FLAG, VP9_ALT_FLAG}; - - for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { - const int idx = cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)]; - const YV12_BUFFER_CONFIG *const ref = &cm->frame_bufs[idx].buf; - - // Need to convert from VP9_REFFRAME to index into ref_mask (subtract 1). - if ((cpi->ref_frame_flags & ref_mask[ref_frame - 1]) && - (ref->y_crop_width != cm->width || ref->y_crop_height != cm->height)) { - const int new_fb = get_free_fb(cm); - vp9_realloc_frame_buffer(&cm->frame_bufs[new_fb].buf, - cm->width, cm->height, - cm->subsampling_x, cm->subsampling_y, -#if CONFIG_VP9_HIGHBITDEPTH - cm->use_highbitdepth, -#endif // CONFIG_VP9_HIGHBITDEPTH - VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL); -#if CONFIG_VP9_HIGHBITDEPTH - scale_and_extend_frame(ref, &cm->frame_bufs[new_fb].buf, - (int)cm->bit_depth); -#else - scale_and_extend_frame(ref, &cm->frame_bufs[new_fb].buf); -#endif // CONFIG_VP9_HIGHBITDEPTH - cpi->scaled_ref_idx[ref_frame - 1] = new_fb; - } else { - cpi->scaled_ref_idx[ref_frame - 1] = idx; - cm->frame_bufs[idx].ref_count++; - } - } -} - -static void release_scaled_references(VP9_COMP *cpi) { - VP9_COMMON *cm = &cpi->common; - int i; - - for (i = 0; i < 3; i++) - cm->frame_bufs[cpi->scaled_ref_idx[i]].ref_count--; -} - -static void full_to_model_count(unsigned int *model_count, - unsigned int *full_count) { - int n; - model_count[ZERO_TOKEN] = full_count[ZERO_TOKEN]; - model_count[ONE_TOKEN] = full_count[ONE_TOKEN]; - model_count[TWO_TOKEN] = full_count[TWO_TOKEN]; - for (n = THREE_TOKEN; n < EOB_TOKEN; ++n) - model_count[TWO_TOKEN] += full_count[n]; - model_count[EOB_MODEL_TOKEN] = full_count[EOB_TOKEN]; -} - -static void full_to_model_counts(vp9_coeff_count_model *model_count, - vp9_coeff_count *full_count) { - int i, j, k, l; - - for (i = 0; i < PLANE_TYPES; ++i) - for (j = 0; j < REF_TYPES; ++j) - for (k = 0; k < COEF_BANDS; ++k) - for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) - full_to_model_count(model_count[i][j][k][l], full_count[i][j][k][l]); -} - -#if 0 && CONFIG_INTERNAL_STATS -static void output_frame_level_debug_stats(VP9_COMP *cpi) { - VP9_COMMON *const cm = &cpi->common; - FILE *const f = fopen("tmp.stt", cm->current_video_frame ? "a" : "w"); - int recon_err; - - vp9_clear_system_state(); - - recon_err = vp9_get_y_sse(cpi->Source, get_frame_new_buffer(cm)); - - if (cpi->twopass.total_left_stats.coded_error != 0.0) - fprintf(f, "%10u %10d %10d %10d %10d" - "%10"PRId64" %10"PRId64" %10"PRId64" %10"PRId64" %10d " - "%7.2lf %7.2lf %7.2lf %7.2lf %7.2lf" - "%6d %6d %5d %5d %5d " - "%10"PRId64" %10.3lf" - "%10lf %8u %10d %10d %10d\n", - cpi->common.current_video_frame, cpi->rc.this_frame_target, - cpi->rc.projected_frame_size, - cpi->rc.projected_frame_size / cpi->common.MBs, - (cpi->rc.projected_frame_size - cpi->rc.this_frame_target), - cpi->rc.vbr_bits_off_target, - cpi->rc.total_target_vs_actual, - (cpi->rc.starting_buffer_level - cpi->rc.bits_off_target), - cpi->rc.total_actual_bits, cm->base_qindex, - vp9_convert_qindex_to_q(cm->base_qindex, cm->bit_depth), - (double)vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth) / 4.0, - vp9_convert_qindex_to_q(cpi->twopass.active_worst_quality, - cm->bit_depth), - cpi->rc.avg_q, - vp9_convert_qindex_to_q(cpi->oxcf.cq_level, cm->bit_depth), - cpi->refresh_last_frame, cpi->refresh_golden_frame, - cpi->refresh_alt_ref_frame, cm->frame_type, cpi->rc.gfu_boost, - cpi->twopass.bits_left, - cpi->twopass.total_left_stats.coded_error, - cpi->twopass.bits_left / - (1 + cpi->twopass.total_left_stats.coded_error), - cpi->tot_recode_hits, recon_err, cpi->rc.kf_boost, - cpi->twopass.kf_zeromotion_pct); - - fclose(f); - - if (0) { - FILE *const fmodes = fopen("Modes.stt", "a"); - int i; - - fprintf(fmodes, "%6d:%1d:%1d:%1d ", cpi->common.current_video_frame, - cm->frame_type, cpi->refresh_golden_frame, - cpi->refresh_alt_ref_frame); - - for (i = 0; i < MAX_MODES; ++i) - fprintf(fmodes, "%5d ", cpi->mode_chosen_counts[i]); - - fprintf(fmodes, "\n"); - - fclose(fmodes); - } -} -#endif - -static void encode_without_recode_loop(VP9_COMP *cpi, - int q) { - VP9_COMMON *const cm = &cpi->common; - vp9_clear_system_state(); - vp9_set_quantizer(cm, q); - setup_frame(cpi); - // Variance adaptive and in frame q adjustment experiments are mutually - // exclusive. - if (cpi->oxcf.aq_mode == VARIANCE_AQ) { - vp9_vaq_frame_setup(cpi); - } else if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) { - vp9_setup_in_frame_q_adj(cpi); - } else if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) { - vp9_cyclic_refresh_setup(cpi); - } - // transform / motion compensation build reconstruction frame - vp9_encode_frame(cpi); - - // Update the skip mb flag probabilities based on the distribution - // seen in the last encoder iteration. - // update_base_skip_probs(cpi); - vp9_clear_system_state(); -} - -static void encode_with_recode_loop(VP9_COMP *cpi, - size_t *size, - uint8_t *dest, - int q, - int bottom_index, - int top_index) { - VP9_COMMON *const cm = &cpi->common; - RATE_CONTROL *const rc = &cpi->rc; - int loop_count = 0; - int loop = 0; - int overshoot_seen = 0; - int undershoot_seen = 0; - int q_low = bottom_index, q_high = top_index; - int frame_over_shoot_limit; - int frame_under_shoot_limit; - - // Decide frame size bounds - vp9_rc_compute_frame_size_bounds(cpi, rc->this_frame_target, - &frame_under_shoot_limit, - &frame_over_shoot_limit); - - do { - vp9_clear_system_state(); - - vp9_set_quantizer(cm, q); - - if (loop_count == 0) - setup_frame(cpi); - - // Variance adaptive and in frame q adjustment experiments are mutually - // exclusive. - if (cpi->oxcf.aq_mode == VARIANCE_AQ) { - vp9_vaq_frame_setup(cpi); - } else if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) { - vp9_setup_in_frame_q_adj(cpi); - } - - // transform / motion compensation build reconstruction frame - vp9_encode_frame(cpi); - - // Update the skip mb flag probabilities based on the distribution - // seen in the last encoder iteration. - // update_base_skip_probs(cpi); - - vp9_clear_system_state(); - - // Dummy pack of the bitstream using up to date stats to get an - // accurate estimate of output frame size to determine if we need - // to recode. - if (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF) { - save_coding_context(cpi); - if (!cpi->sf.use_nonrd_pick_mode) - vp9_pack_bitstream(cpi, dest, size); - - rc->projected_frame_size = (int)(*size) << 3; - restore_coding_context(cpi); - - if (frame_over_shoot_limit == 0) - frame_over_shoot_limit = 1; - } - - if (cpi->oxcf.rc_mode == VPX_Q) { - loop = 0; - } else { - if ((cm->frame_type == KEY_FRAME) && - rc->this_key_frame_forced && - (rc->projected_frame_size < rc->max_frame_bandwidth)) { - int last_q = q; - int kf_err; - - int high_err_target = cpi->ambient_err; - int low_err_target = cpi->ambient_err >> 1; - -#if CONFIG_VP9_HIGHBITDEPTH - if (cm->use_highbitdepth) { - kf_err = vp9_highbd_get_y_sse(cpi->Source, get_frame_new_buffer(cm), - cm->bit_depth); - } else { - kf_err = vp9_get_y_sse(cpi->Source, get_frame_new_buffer(cm)); - } -#else - kf_err = vp9_get_y_sse(cpi->Source, get_frame_new_buffer(cm)); -#endif // CONFIG_VP9_HIGHBITDEPTH - - // Prevent possible divide by zero error below for perfect KF - kf_err += !kf_err; - - // The key frame is not good enough or we can afford - // to make it better without undue risk of popping. - if ((kf_err > high_err_target && - rc->projected_frame_size <= frame_over_shoot_limit) || - (kf_err > low_err_target && - rc->projected_frame_size <= frame_under_shoot_limit)) { - // Lower q_high - q_high = q > q_low ? q - 1 : q_low; - - // Adjust Q - q = (q * high_err_target) / kf_err; - q = MIN(q, (q_high + q_low) >> 1); - } else if (kf_err < low_err_target && - rc->projected_frame_size >= frame_under_shoot_limit) { - // The key frame is much better than the previous frame - // Raise q_low - q_low = q < q_high ? q + 1 : q_high; - - // Adjust Q - q = (q * low_err_target) / kf_err; - q = MIN(q, (q_high + q_low + 1) >> 1); - } - - // Clamp Q to upper and lower limits: - q = clamp(q, q_low, q_high); - - loop = q != last_q; - } else if (recode_loop_test( - cpi, frame_over_shoot_limit, frame_under_shoot_limit, - q, MAX(q_high, top_index), bottom_index)) { - // Is the projected frame size out of range and are we allowed - // to attempt to recode. - int last_q = q; - int retries = 0; - - // Frame size out of permitted range: - // Update correction factor & compute new Q to try... - - // Frame is too large - if (rc->projected_frame_size > rc->this_frame_target) { - // Special case if the projected size is > the max allowed. - if (rc->projected_frame_size >= rc->max_frame_bandwidth) - q_high = rc->worst_quality; - - // Raise Qlow as to at least the current value - q_low = q < q_high ? q + 1 : q_high; - - if (undershoot_seen || loop_count > 1) { - // Update rate_correction_factor unless - vp9_rc_update_rate_correction_factors(cpi, 1); - - q = (q_high + q_low + 1) / 2; - } else { - // Update rate_correction_factor unless - vp9_rc_update_rate_correction_factors(cpi, 0); - - q = vp9_rc_regulate_q(cpi, rc->this_frame_target, - bottom_index, MAX(q_high, top_index)); - - while (q < q_low && retries < 10) { - vp9_rc_update_rate_correction_factors(cpi, 0); - q = vp9_rc_regulate_q(cpi, rc->this_frame_target, - bottom_index, MAX(q_high, top_index)); - retries++; - } - } - - overshoot_seen = 1; - } else { - // Frame is too small - q_high = q > q_low ? q - 1 : q_low; - - if (overshoot_seen || loop_count > 1) { - vp9_rc_update_rate_correction_factors(cpi, 1); - q = (q_high + q_low) / 2; - } else { - vp9_rc_update_rate_correction_factors(cpi, 0); - q = vp9_rc_regulate_q(cpi, rc->this_frame_target, - bottom_index, top_index); - // Special case reset for qlow for constrained quality. - // This should only trigger where there is very substantial - // undershoot on a frame and the auto cq level is above - // the user passsed in value. - if (cpi->oxcf.rc_mode == VPX_CQ && - q < q_low) { - q_low = q; - } - - while (q > q_high && retries < 10) { - vp9_rc_update_rate_correction_factors(cpi, 0); - q = vp9_rc_regulate_q(cpi, rc->this_frame_target, - bottom_index, top_index); - retries++; - } - } - - undershoot_seen = 1; - } - - // Clamp Q to upper and lower limits: - q = clamp(q, q_low, q_high); - - loop = q != last_q; - } else { - loop = 0; - } - } - - // Special case for overlay frame. - if (rc->is_src_frame_alt_ref && - rc->projected_frame_size < rc->max_frame_bandwidth) - loop = 0; - - if (loop) { - loop_count++; - -#if CONFIG_INTERNAL_STATS - cpi->tot_recode_hits++; -#endif - } - } while (loop); -} - -static int get_ref_frame_flags(const VP9_COMP *cpi) { - const int *const map = cpi->common.ref_frame_map; - const int gold_is_last = map[cpi->gld_fb_idx] == map[cpi->lst_fb_idx]; - const int alt_is_last = map[cpi->alt_fb_idx] == map[cpi->lst_fb_idx]; - const int gold_is_alt = map[cpi->gld_fb_idx] == map[cpi->alt_fb_idx]; - int flags = VP9_ALT_FLAG | VP9_GOLD_FLAG | VP9_LAST_FLAG; - - if (gold_is_last) - flags &= ~VP9_GOLD_FLAG; - - if (cpi->rc.frames_till_gf_update_due == INT_MAX && !is_two_pass_svc(cpi)) - flags &= ~VP9_GOLD_FLAG; - - if (alt_is_last) - flags &= ~VP9_ALT_FLAG; - - if (gold_is_alt) - flags &= ~VP9_ALT_FLAG; - - return flags; -} - -static void set_ext_overrides(VP9_COMP *cpi) { - // Overrides the defaults with the externally supplied values with - // vp9_update_reference() and vp9_update_entropy() calls - // Note: The overrides are valid only for the next frame passed - // to encode_frame_to_data_rate() function - if (cpi->ext_refresh_frame_context_pending) { - cpi->common.refresh_frame_context = cpi->ext_refresh_frame_context; - cpi->ext_refresh_frame_context_pending = 0; - } - if (cpi->ext_refresh_frame_flags_pending) { - cpi->refresh_last_frame = cpi->ext_refresh_last_frame; - cpi->refresh_golden_frame = cpi->ext_refresh_golden_frame; - cpi->refresh_alt_ref_frame = cpi->ext_refresh_alt_ref_frame; - cpi->ext_refresh_frame_flags_pending = 0; - } -} - -YV12_BUFFER_CONFIG *vp9_scale_if_required(VP9_COMMON *cm, - YV12_BUFFER_CONFIG *unscaled, - YV12_BUFFER_CONFIG *scaled) { - if (cm->mi_cols * MI_SIZE != unscaled->y_width || - cm->mi_rows * MI_SIZE != unscaled->y_height) { -#if CONFIG_VP9_HIGHBITDEPTH - scale_and_extend_frame_nonnormative(unscaled, scaled, (int)cm->bit_depth); -#else - scale_and_extend_frame_nonnormative(unscaled, scaled); -#endif // CONFIG_VP9_HIGHBITDEPTH - return scaled; - } else { - return unscaled; - } -} - -static int is_skippable_frame(const VP9_COMP *cpi) { - // If the current frame does not have non-zero motion vector detected in the - // first pass, and so do its previous and forward frames, then this frame - // can be skipped for partition check, and the partition size is assigned - // according to the variance - const SVC *const svc = &cpi->svc; - const TWO_PASS *const twopass = is_two_pass_svc(cpi) ? - &svc->layer_context[svc->spatial_layer_id].twopass : &cpi->twopass; - - return (!frame_is_intra_only(&cpi->common) && - twopass->stats_in - 2 > twopass->stats_in_start && - twopass->stats_in < twopass->stats_in_end && - (twopass->stats_in - 1)->pcnt_inter - (twopass->stats_in - 1)->pcnt_motion - == 1 && - (twopass->stats_in - 2)->pcnt_inter - (twopass->stats_in - 2)->pcnt_motion - == 1 && - twopass->stats_in->pcnt_inter - twopass->stats_in->pcnt_motion == 1); -} - -static void set_arf_sign_bias(VP9_COMP *cpi) { - VP9_COMMON *const cm = &cpi->common; - int arf_sign_bias; - - if ((cpi->oxcf.pass == 2) && cpi->multi_arf_allowed) { - const GF_GROUP *const gf_group = &cpi->twopass.gf_group; - arf_sign_bias = cpi->rc.source_alt_ref_active && - (!cpi->refresh_alt_ref_frame || - (gf_group->rf_level[gf_group->index] == GF_ARF_LOW)); - } else { - arf_sign_bias = - (cpi->rc.source_alt_ref_active && !cpi->refresh_alt_ref_frame); - } - cm->ref_frame_sign_bias[ALTREF_FRAME] = arf_sign_bias; -} - -static void set_mv_search_params(VP9_COMP *cpi) { - const VP9_COMMON *const cm = &cpi->common; - const unsigned int max_mv_def = MIN(cm->width, cm->height); - - // Default based on max resolution. - cpi->mv_step_param = vp9_init_search_range(max_mv_def); - - if (cpi->sf.mv.auto_mv_step_size) { - if (frame_is_intra_only(cm)) { - // Initialize max_mv_magnitude for use in the first INTER frame - // after a key/intra-only frame. - cpi->max_mv_magnitude = max_mv_def; - } else { - if (cm->show_frame) - // Allow mv_steps to correspond to twice the max mv magnitude found - // in the previous frame, capped by the default max_mv_magnitude based - // on resolution. - cpi->mv_step_param = - vp9_init_search_range(MIN(max_mv_def, 2 * cpi->max_mv_magnitude)); - cpi->max_mv_magnitude = 0; - } - } -} - - -int setup_interp_filter_search_mask(VP9_COMP *cpi) { - INTERP_FILTER ifilter; - int ref_total[MAX_REF_FRAMES] = {0}; - MV_REFERENCE_FRAME ref; - int mask = 0; - if (cpi->common.last_frame_type == KEY_FRAME || - cpi->refresh_alt_ref_frame) - return mask; - for (ref = LAST_FRAME; ref <= ALTREF_FRAME; ++ref) - for (ifilter = EIGHTTAP; ifilter <= EIGHTTAP_SHARP; ++ifilter) - ref_total[ref] += cpi->interp_filter_selected[ref][ifilter]; - - for (ifilter = EIGHTTAP; ifilter <= EIGHTTAP_SHARP; ++ifilter) { - if ((ref_total[LAST_FRAME] && - cpi->interp_filter_selected[LAST_FRAME][ifilter] == 0) && - (ref_total[GOLDEN_FRAME] == 0 || - cpi->interp_filter_selected[GOLDEN_FRAME][ifilter] * 50 - < ref_total[GOLDEN_FRAME]) && - (ref_total[ALTREF_FRAME] == 0 || - cpi->interp_filter_selected[ALTREF_FRAME][ifilter] * 50 - < ref_total[ALTREF_FRAME])) - mask |= 1 << ifilter; - } - return mask; -} - -static void encode_frame_to_data_rate(VP9_COMP *cpi, - size_t *size, - uint8_t *dest, - unsigned int *frame_flags) { - VP9_COMMON *const cm = &cpi->common; - const VP9EncoderConfig *const oxcf = &cpi->oxcf; - struct segmentation *const seg = &cm->seg; - TX_SIZE t; - int q; - int top_index; - int bottom_index; - - set_ext_overrides(cpi); - - cpi->Source = vp9_scale_if_required(cm, cpi->un_scaled_source, - &cpi->scaled_source); - - if (cpi->unscaled_last_source != NULL) - cpi->Last_Source = vp9_scale_if_required(cm, cpi->unscaled_last_source, - &cpi->scaled_last_source); - - vp9_scale_references(cpi); - - vp9_clear_system_state(); - - // Enable or disable mode based tweaking of the zbin. - // For 2 pass only used where GF/ARF prediction quality - // is above a threshold. - cpi->zbin_mode_boost = 0; - cpi->zbin_mode_boost_enabled = 0; - - // Set the arf sign bias for this frame. - set_arf_sign_bias(cpi); - - // Set default state for segment based loop filter update flags. - cm->lf.mode_ref_delta_update = 0; - - set_mv_search_params(cpi); - - if (cpi->oxcf.pass == 2 && - cpi->sf.adaptive_interp_filter_search) - cpi->sf.interp_filter_search_mask = - setup_interp_filter_search_mask(cpi); - - - // Set various flags etc to special state if it is a key frame. - if (frame_is_intra_only(cm)) { - // Reset the loop filter deltas and segmentation map. - vp9_reset_segment_features(&cm->seg); - - // If segmentation is enabled force a map update for key frames. - if (seg->enabled) { - seg->update_map = 1; - seg->update_data = 1; - } - - // The alternate reference frame cannot be active for a key frame. - cpi->rc.source_alt_ref_active = 0; - - cm->error_resilient_mode = oxcf->error_resilient_mode; - - // By default, encoder assumes decoder can use prev_mi. - if (cm->error_resilient_mode) { - cm->frame_parallel_decoding_mode = 1; - cm->reset_frame_context = 0; - cm->refresh_frame_context = 0; - } else if (cm->intra_only) { - cm->frame_parallel_decoding_mode = oxcf->frame_parallel_decoding_mode; - // Only reset the current context. - cm->reset_frame_context = 2; - } - } - if (is_two_pass_svc(cpi) && cm->error_resilient_mode == 0) { - cm->frame_context_idx = - cpi->svc.spatial_layer_id * cpi->svc.number_temporal_layers + - cpi->svc.temporal_layer_id; - - // The probs will be updated based on the frame type of its previous - // frame if frame_parallel_decoding_mode is 0. The type may vary for - // the frame after a key frame in base layer since we may drop enhancement - // layers. So set frame_parallel_decoding_mode to 1 in this case. - if (cpi->svc.number_temporal_layers == 1) { - if (cpi->svc.spatial_layer_id == 0 && - cpi->svc.layer_context[0].last_frame_type == KEY_FRAME) - cm->frame_parallel_decoding_mode = 1; - else - cm->frame_parallel_decoding_mode = 0; - } else if (cpi->svc.spatial_layer_id == 0) { - // Find the 2nd frame in temporal base layer and 1st frame in temporal - // enhancement layers from the key frame. - int i; - for (i = 0; i < cpi->svc.number_temporal_layers; ++i) { - if (cpi->svc.layer_context[0].frames_from_key_frame == 1 << i) { - cm->frame_parallel_decoding_mode = 1; - break; - } - } - if (i == cpi->svc.number_temporal_layers) - cm->frame_parallel_decoding_mode = 0; - } - } - - // Configure experimental use of segmentation for enhanced coding of - // static regions if indicated. - // Only allowed in second pass of two pass (as requires lagged coding) - // and if the relevant speed feature flag is set. - if (oxcf->pass == 2 && cpi->sf.static_segmentation) - configure_static_seg_features(cpi); - - // Check if the current frame is skippable for the partition search in the - // second pass according to the first pass stats - if (oxcf->pass == 2 && - (!cpi->use_svc || is_two_pass_svc(cpi))) { - cpi->skippable_frame = is_skippable_frame(cpi); - } - - // For 1 pass CBR, check if we are dropping this frame. - // Never drop on key frame. - if (oxcf->pass == 0 && - oxcf->rc_mode == VPX_CBR && - cm->frame_type != KEY_FRAME) { - if (vp9_rc_drop_frame(cpi)) { - vp9_rc_postencode_update_drop_frame(cpi); - ++cm->current_video_frame; - return; - } - } - - vp9_clear_system_state(); - -#if CONFIG_VP9_POSTPROC - if (oxcf->noise_sensitivity > 0) { - int l = 0; - switch (oxcf->noise_sensitivity) { - case 1: - l = 20; - break; - case 2: - l = 40; - break; - case 3: - l = 60; - break; - case 4: - case 5: - l = 100; - break; - case 6: - l = 150; - break; - } - vp9_denoise(cpi->Source, cpi->Source, l); - } -#endif - -#if CONFIG_INTERNAL_STATS - { - int i; - for (i = 0; i < MAX_MODES; ++i) - cpi->mode_chosen_counts[i] = 0; - } -#endif - - vp9_set_speed_features(cpi); - - vp9_set_rd_speed_thresholds(cpi); - vp9_set_rd_speed_thresholds_sub8x8(cpi); - - // Decide q and q bounds. - q = vp9_rc_pick_q_and_bounds(cpi, &bottom_index, &top_index); - - if (!frame_is_intra_only(cm)) { - cm->interp_filter = cpi->sf.default_interp_filter; - /* TODO: Decide this more intelligently */ - vp9_set_high_precision_mv(cpi, q < HIGH_PRECISION_MV_QTHRESH); - } - - if (cpi->sf.recode_loop == DISALLOW_RECODE) { - encode_without_recode_loop(cpi, q); - } else { - encode_with_recode_loop(cpi, size, dest, q, bottom_index, top_index); - } - -#if CONFIG_VP9_TEMPORAL_DENOISING -#ifdef OUTPUT_YUV_DENOISED - if (oxcf->noise_sensitivity > 0) { - vp9_write_yuv_frame_420(&cpi->denoiser.running_avg_y[INTRA_FRAME], - yuv_denoised_file); - } -#endif -#endif - - - // Special case code to reduce pulsing when key frames are forced at a - // fixed interval. Note the reconstruction error if it is the frame before - // the force key frame - if (cpi->rc.next_key_frame_forced && cpi->rc.frames_to_key == 1) { -#if CONFIG_VP9_HIGHBITDEPTH - if (cm->use_highbitdepth) { - cpi->ambient_err = vp9_highbd_get_y_sse(cpi->Source, - get_frame_new_buffer(cm), - cm->bit_depth); - } else { - cpi->ambient_err = vp9_get_y_sse(cpi->Source, get_frame_new_buffer(cm)); - } -#else - cpi->ambient_err = vp9_get_y_sse(cpi->Source, get_frame_new_buffer(cm)); -#endif // CONFIG_VP9_HIGHBITDEPTH - } - - // If the encoder forced a KEY_FRAME decision - if (cm->frame_type == KEY_FRAME) - cpi->refresh_last_frame = 1; - - cm->frame_to_show = get_frame_new_buffer(cm); - - // Pick the loop filter level for the frame. - loopfilter_frame(cpi, cm); - - // build the bitstream - vp9_pack_bitstream(cpi, dest, size); - - if (cm->seg.update_map) - update_reference_segmentation_map(cpi); - - release_scaled_references(cpi); - vp9_update_reference_frames(cpi); - - for (t = TX_4X4; t <= TX_32X32; t++) - full_to_model_counts(cm->counts.coef[t], cpi->coef_counts[t]); - - if (!cm->error_resilient_mode && !cm->frame_parallel_decoding_mode) - vp9_adapt_coef_probs(cm); - - if (!frame_is_intra_only(cm)) { - if (!cm->error_resilient_mode && !cm->frame_parallel_decoding_mode) { - vp9_adapt_mode_probs(cm); - vp9_adapt_mv_probs(cm, cm->allow_high_precision_mv); - } - } - - if (cpi->refresh_golden_frame == 1) - cpi->frame_flags |= FRAMEFLAGS_GOLDEN; - else - cpi->frame_flags &= ~FRAMEFLAGS_GOLDEN; - - if (cpi->refresh_alt_ref_frame == 1) - cpi->frame_flags |= FRAMEFLAGS_ALTREF; - else - cpi->frame_flags &= ~FRAMEFLAGS_ALTREF; - - cpi->ref_frame_flags = get_ref_frame_flags(cpi); - - cm->last_frame_type = cm->frame_type; - vp9_rc_postencode_update(cpi, *size); - -#if 0 - output_frame_level_debug_stats(cpi); -#endif - - if (cm->frame_type == KEY_FRAME) { - // Tell the caller that the frame was coded as a key frame - *frame_flags = cpi->frame_flags | FRAMEFLAGS_KEY; - } else { - *frame_flags = cpi->frame_flags & ~FRAMEFLAGS_KEY; - } - - // Clear the one shot update flags for segmentation map and mode/ref loop - // filter deltas. - cm->seg.update_map = 0; - cm->seg.update_data = 0; - cm->lf.mode_ref_delta_update = 0; - - // keep track of the last coded dimensions - cm->last_width = cm->width; - cm->last_height = cm->height; - - // reset to normal state now that we are done. - if (!cm->show_existing_frame) { - if (is_two_pass_svc(cpi) && cm->error_resilient_mode == 0) - cm->last_show_frame = 0; - else - cm->last_show_frame = cm->show_frame; - } - - if (cm->show_frame) { - vp9_swap_mi_and_prev_mi(cm); - - // Don't increment frame counters if this was an altref buffer - // update not a real frame - ++cm->current_video_frame; - if (cpi->use_svc) - vp9_inc_frame_in_layer(cpi); - } - - if (is_two_pass_svc(cpi)) - cpi->svc.layer_context[cpi->svc.spatial_layer_id].last_frame_type = - cm->frame_type; -} - -static void SvcEncode(VP9_COMP *cpi, size_t *size, uint8_t *dest, - unsigned int *frame_flags) { - vp9_rc_get_svc_params(cpi); - encode_frame_to_data_rate(cpi, size, dest, frame_flags); -} - -static void Pass0Encode(VP9_COMP *cpi, size_t *size, uint8_t *dest, - unsigned int *frame_flags) { - if (cpi->oxcf.rc_mode == VPX_CBR) { - vp9_rc_get_one_pass_cbr_params(cpi); - } else { - vp9_rc_get_one_pass_vbr_params(cpi); - } - encode_frame_to_data_rate(cpi, size, dest, frame_flags); -} - -static void Pass2Encode(VP9_COMP *cpi, size_t *size, - uint8_t *dest, unsigned int *frame_flags) { - cpi->allow_encode_breakout = ENCODE_BREAKOUT_ENABLED; - encode_frame_to_data_rate(cpi, size, dest, frame_flags); - vp9_twopass_postencode_update(cpi); -} - -static void init_motion_estimation(VP9_COMP *cpi) { - int y_stride = cpi->scaled_source.y_stride; - - if (cpi->sf.mv.search_method == NSTEP) { - vp9_init3smotion_compensation(&cpi->ss_cfg, y_stride); - } else if (cpi->sf.mv.search_method == DIAMOND) { - vp9_init_dsmotion_compensation(&cpi->ss_cfg, y_stride); - } -} - -static void check_initial_width(VP9_COMP *cpi, -#if CONFIG_VP9_HIGHBITDEPTH - int use_highbitdepth, -#endif - int subsampling_x, int subsampling_y) { - VP9_COMMON *const cm = &cpi->common; - - if (!cpi->initial_width) { - cm->subsampling_x = subsampling_x; - cm->subsampling_y = subsampling_y; -#if CONFIG_VP9_HIGHBITDEPTH - cm->use_highbitdepth = use_highbitdepth; -#endif - - alloc_raw_frame_buffers(cpi); - alloc_ref_frame_buffers(cpi); - alloc_util_frame_buffers(cpi); - - init_motion_estimation(cpi); - - cpi->initial_width = cm->width; - cpi->initial_height = cm->height; - } -} - - -int vp9_receive_raw_frame(VP9_COMP *cpi, unsigned int frame_flags, - YV12_BUFFER_CONFIG *sd, int64_t time_stamp, - int64_t end_time) { - VP9_COMMON *cm = &cpi->common; - struct vpx_usec_timer timer; - int res = 0; - const int subsampling_x = sd->uv_width < sd->y_width; - const int subsampling_y = sd->uv_height < sd->y_height; -#if CONFIG_VP9_HIGHBITDEPTH - const int use_highbitdepth = sd->flags & YV12_FLAG_HIGHBITDEPTH; - check_initial_width(cpi, use_highbitdepth, subsampling_x, subsampling_y); -#else - check_initial_width(cpi, subsampling_x, subsampling_y); -#endif // CONFIG_VP9_HIGHBITDEPTH - - vpx_usec_timer_start(&timer); - - if (vp9_lookahead_push(cpi->lookahead, sd, time_stamp, end_time, frame_flags)) - res = -1; - vpx_usec_timer_mark(&timer); - cpi->time_receive_data += vpx_usec_timer_elapsed(&timer); - - if ((cm->profile == PROFILE_0 || cm->profile == PROFILE_2) && - (subsampling_x != 1 || subsampling_y != 1)) { - vpx_internal_error(&cm->error, VPX_CODEC_INVALID_PARAM, - "Non-4:2:0 color space requires profile 1 or 3"); - res = -1; - } - if ((cm->profile == PROFILE_1 || cm->profile == PROFILE_3) && - (subsampling_x == 1 && subsampling_y == 1)) { - vpx_internal_error(&cm->error, VPX_CODEC_INVALID_PARAM, - "4:2:0 color space requires profile 0 or 2"); - res = -1; - } - - return res; -} - - -static int frame_is_reference(const VP9_COMP *cpi) { - const VP9_COMMON *cm = &cpi->common; - - return cm->frame_type == KEY_FRAME || - cpi->refresh_last_frame || - cpi->refresh_golden_frame || - cpi->refresh_alt_ref_frame || - cm->refresh_frame_context || - cm->lf.mode_ref_delta_update || - cm->seg.update_map || - cm->seg.update_data; -} - -void adjust_frame_rate(VP9_COMP *cpi, - const struct lookahead_entry *source) { - int64_t this_duration; - int step = 0; - - if (source->ts_start == cpi->first_time_stamp_ever) { - this_duration = source->ts_end - source->ts_start; - step = 1; - } else { - int64_t last_duration = cpi->last_end_time_stamp_seen - - cpi->last_time_stamp_seen; - - this_duration = source->ts_end - cpi->last_end_time_stamp_seen; - - // do a step update if the duration changes by 10% - if (last_duration) - step = (int)((this_duration - last_duration) * 10 / last_duration); - } - - if (this_duration) { - if (step) { - vp9_new_framerate(cpi, 10000000.0 / this_duration); - } else { - // Average this frame's rate into the last second's average - // frame rate. If we haven't seen 1 second yet, then average - // over the whole interval seen. - const double interval = MIN((double)(source->ts_end - - cpi->first_time_stamp_ever), 10000000.0); - double avg_duration = 10000000.0 / cpi->framerate; - avg_duration *= (interval - avg_duration + this_duration); - avg_duration /= interval; - - vp9_new_framerate(cpi, 10000000.0 / avg_duration); - } - } - cpi->last_time_stamp_seen = source->ts_start; - cpi->last_end_time_stamp_seen = source->ts_end; -} - -// Returns 0 if this is not an alt ref else the offset of the source frame -// used as the arf midpoint. -static int get_arf_src_index(VP9_COMP *cpi) { - RATE_CONTROL *const rc = &cpi->rc; - int arf_src_index = 0; - if (is_altref_enabled(cpi)) { - if (cpi->oxcf.pass == 2) { - const GF_GROUP *const gf_group = &cpi->twopass.gf_group; - if (gf_group->update_type[gf_group->index] == ARF_UPDATE) { - arf_src_index = gf_group->arf_src_offset[gf_group->index]; - } - } else if (rc->source_alt_ref_pending) { - arf_src_index = rc->frames_till_gf_update_due; - } - } - return arf_src_index; -} - -static void check_src_altref(VP9_COMP *cpi, - const struct lookahead_entry *source) { - RATE_CONTROL *const rc = &cpi->rc; - - if (cpi->oxcf.pass == 2) { - const GF_GROUP *const gf_group = &cpi->twopass.gf_group; - rc->is_src_frame_alt_ref = - (gf_group->update_type[gf_group->index] == OVERLAY_UPDATE); - } else { - rc->is_src_frame_alt_ref = cpi->alt_ref_source && - (source == cpi->alt_ref_source); - } - - if (rc->is_src_frame_alt_ref) { - // Current frame is an ARF overlay frame. - cpi->alt_ref_source = NULL; - - // Don't refresh the last buffer for an ARF overlay frame. It will - // become the GF so preserve last as an alternative prediction option. - cpi->refresh_last_frame = 0; - } -} - -int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, - size_t *size, uint8_t *dest, - int64_t *time_stamp, int64_t *time_end, int flush) { - const VP9EncoderConfig *const oxcf = &cpi->oxcf; - VP9_COMMON *const cm = &cpi->common; - MACROBLOCKD *const xd = &cpi->mb.e_mbd; - RATE_CONTROL *const rc = &cpi->rc; - struct vpx_usec_timer cmptimer; - YV12_BUFFER_CONFIG *force_src_buffer = NULL; - struct lookahead_entry *last_source = NULL; - struct lookahead_entry *source = NULL; - MV_REFERENCE_FRAME ref_frame; - int arf_src_index; - - if (is_two_pass_svc(cpi)) { -#if CONFIG_SPATIAL_SVC - vp9_svc_start_frame(cpi); -#endif - if (oxcf->pass == 2) - vp9_restore_layer_context(cpi); - } - - vpx_usec_timer_start(&cmptimer); - - vp9_set_high_precision_mv(cpi, ALTREF_HIGH_PRECISION_MV); - - // Normal defaults - cm->reset_frame_context = 0; - cm->refresh_frame_context = 1; - cpi->refresh_last_frame = 1; - cpi->refresh_golden_frame = 0; - cpi->refresh_alt_ref_frame = 0; - - // Should we encode an arf frame. - arf_src_index = get_arf_src_index(cpi); - if (arf_src_index) { - assert(arf_src_index <= rc->frames_to_key); - - if ((source = vp9_lookahead_peek(cpi->lookahead, arf_src_index)) != NULL) { - cpi->alt_ref_source = source; - -#if CONFIG_SPATIAL_SVC - if (is_two_pass_svc(cpi) && cpi->svc.spatial_layer_id > 0) { - int i; - // Reference a hidden frame from a lower layer - for (i = cpi->svc.spatial_layer_id - 1; i >= 0; --i) { - if (oxcf->ss_play_alternate[i]) { - cpi->gld_fb_idx = cpi->svc.layer_context[i].alt_ref_idx; - break; - } - } - } - cpi->svc.layer_context[cpi->svc.spatial_layer_id].has_alt_frame = 1; -#endif - - if (oxcf->arnr_max_frames > 0) { - // Produce the filtered ARF frame. - vp9_temporal_filter(cpi, arf_src_index); - vp9_extend_frame_borders(&cpi->alt_ref_buffer); - force_src_buffer = &cpi->alt_ref_buffer; - } - - cm->show_frame = 0; - cpi->refresh_alt_ref_frame = 1; - cpi->refresh_golden_frame = 0; - cpi->refresh_last_frame = 0; - rc->is_src_frame_alt_ref = 0; - rc->source_alt_ref_pending = 0; - } else { - rc->source_alt_ref_pending = 0; - } - } - - if (!source) { - // Get last frame source. - if (cm->current_video_frame > 0) { - if ((last_source = vp9_lookahead_peek(cpi->lookahead, -1)) == NULL) - return -1; - } - - // Read in the source frame. -#if CONFIG_SPATIAL_SVC - if (is_two_pass_svc(cpi)) - source = vp9_svc_lookahead_pop(cpi, cpi->lookahead, flush); - else -#endif - source = vp9_lookahead_pop(cpi->lookahead, flush); - if (source != NULL) { - cm->show_frame = 1; - cm->intra_only = 0; - - // Check to see if the frame should be encoded as an arf overlay. - check_src_altref(cpi, source); - } - } - - if (source) { - cpi->un_scaled_source = cpi->Source = force_src_buffer ? force_src_buffer - : &source->img; - - cpi->unscaled_last_source = last_source != NULL ? &last_source->img : NULL; - - *time_stamp = source->ts_start; - *time_end = source->ts_end; - *frame_flags = (source->flags & VPX_EFLAG_FORCE_KF) ? FRAMEFLAGS_KEY : 0; - - } else { - *size = 0; - if (flush && oxcf->pass == 1 && !cpi->twopass.first_pass_done) { - vp9_end_first_pass(cpi); /* get last stats packet */ - cpi->twopass.first_pass_done = 1; - } - return -1; - } - - if (source->ts_start < cpi->first_time_stamp_ever) { - cpi->first_time_stamp_ever = source->ts_start; - cpi->last_end_time_stamp_seen = source->ts_start; - } - - // Clear down mmx registers - vp9_clear_system_state(); - - // adjust frame rates based on timestamps given - if (cm->show_frame) { - adjust_frame_rate(cpi, source); - } - - if (cpi->svc.number_temporal_layers > 1 && - oxcf->rc_mode == VPX_CBR) { - vp9_update_temporal_layer_framerate(cpi); - vp9_restore_layer_context(cpi); - } - - // start with a 0 size frame - *size = 0; - - /* find a free buffer for the new frame, releasing the reference previously - * held. - */ - cm->frame_bufs[cm->new_fb_idx].ref_count--; - cm->new_fb_idx = get_free_fb(cm); - - // For two pass encodes analyse the first pass stats and determine - // the bit allocation and other parameters for this frame / group of frames. - if ((oxcf->pass == 2) && (!cpi->use_svc || is_two_pass_svc(cpi))) { - vp9_rc_get_second_pass_params(cpi); - } - - if (!cpi->use_svc && cpi->multi_arf_allowed) { - if (cm->frame_type == KEY_FRAME) { - init_buffer_indices(cpi); - } else if (oxcf->pass == 2) { - const GF_GROUP *const gf_group = &cpi->twopass.gf_group; - cpi->alt_fb_idx = gf_group->arf_ref_idx[gf_group->index]; - } - } - - cpi->frame_flags = *frame_flags; - - if (oxcf->pass == 2 && - cm->current_video_frame == 0 && - oxcf->allow_spatial_resampling && - oxcf->rc_mode == VPX_VBR) { - // Internal scaling is triggered on the first frame. - vp9_set_size_literal(cpi, oxcf->scaled_frame_width, - oxcf->scaled_frame_height); - } - - // Reset the frame pointers to the current frame size - vp9_realloc_frame_buffer(get_frame_new_buffer(cm), - cm->width, cm->height, - cm->subsampling_x, cm->subsampling_y, -#if CONFIG_VP9_HIGHBITDEPTH - cm->use_highbitdepth, -#endif - VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL); - - alloc_util_frame_buffers(cpi); - init_motion_estimation(cpi); - - for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { - const int idx = cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)]; - YV12_BUFFER_CONFIG *const buf = &cm->frame_bufs[idx].buf; - RefBuffer *const ref_buf = &cm->frame_refs[ref_frame - 1]; - ref_buf->buf = buf; - ref_buf->idx = idx; -#if CONFIG_VP9_HIGHBITDEPTH - vp9_setup_scale_factors_for_frame(&ref_buf->sf, - buf->y_crop_width, buf->y_crop_height, - cm->width, cm->height, - (buf->flags & YV12_FLAG_HIGHBITDEPTH) ? - 1 : 0); -#else - vp9_setup_scale_factors_for_frame(&ref_buf->sf, - buf->y_crop_width, buf->y_crop_height, - cm->width, cm->height); -#endif // CONFIG_VP9_HIGHBITDEPTH - if (vp9_is_scaled(&ref_buf->sf)) - vp9_extend_frame_borders(buf); - } - - set_ref_ptrs(cm, xd, LAST_FRAME, LAST_FRAME); - - if (oxcf->aq_mode == VARIANCE_AQ) { - vp9_vaq_init(); - } - - if (oxcf->pass == 1 && - (!cpi->use_svc || is_two_pass_svc(cpi))) { - const int lossless = is_lossless_requested(oxcf); -#if CONFIG_VP9_HIGHBITDEPTH - if (cpi->oxcf.use_highbitdepth) - cpi->mb.fwd_txm4x4 = lossless ? vp9_high_fwht4x4 : vp9_high_fdct4x4; - else - cpi->mb.fwd_txm4x4 = lossless ? vp9_fwht4x4 : vp9_fdct4x4; - cpi->mb.high_itxm_add = lossless ? vp9_high_iwht4x4_add : - vp9_high_idct4x4_add; -#else - cpi->mb.fwd_txm4x4 = lossless ? vp9_fwht4x4 : vp9_fdct4x4; -#endif // CONFIG_VP9_HIGHBITDEPTH - cpi->mb.itxm_add = lossless ? vp9_iwht4x4_add : vp9_idct4x4_add; - vp9_first_pass(cpi, source); - } else if (oxcf->pass == 2 && - (!cpi->use_svc || is_two_pass_svc(cpi))) { - Pass2Encode(cpi, size, dest, frame_flags); - } else if (cpi->use_svc) { - SvcEncode(cpi, size, dest, frame_flags); - } else { - // One pass encode - Pass0Encode(cpi, size, dest, frame_flags); - } - - if (cm->refresh_frame_context) - cm->frame_contexts[cm->frame_context_idx] = cm->fc; - - // Frame was dropped, release scaled references. - if (*size == 0) { - release_scaled_references(cpi); - } - - if (*size > 0) { - cpi->droppable = !frame_is_reference(cpi); - } - - // Save layer specific state. - if ((cpi->svc.number_temporal_layers > 1 && - oxcf->rc_mode == VPX_CBR) || - ((cpi->svc.number_temporal_layers > 1 || - cpi->svc.number_spatial_layers > 1) && - oxcf->pass == 2)) { - vp9_save_layer_context(cpi); - } - - vpx_usec_timer_mark(&cmptimer); - cpi->time_compress_data += vpx_usec_timer_elapsed(&cmptimer); - - if (cpi->b_calculate_psnr && oxcf->pass != 1 && cm->show_frame) - generate_psnr_packet(cpi); - -#if CONFIG_INTERNAL_STATS - - if (oxcf->pass != 1) { - cpi->bytes += (int)(*size); - - if (cm->show_frame) { - cpi->count++; - - if (cpi->b_calculate_psnr) { - YV12_BUFFER_CONFIG *orig = cpi->Source; - YV12_BUFFER_CONFIG *recon = cpi->common.frame_to_show; - YV12_BUFFER_CONFIG *pp = &cm->post_proc_buffer; - PSNR_STATS psnr; -#if CONFIG_VP9_HIGHBITDEPTH - calc_highbd_psnr(orig, recon, &psnr, cpi->mb.e_mbd.bd, - cpi->oxcf.input_bit_depth); -#else - calc_psnr(orig, recon, &psnr); -#endif // CONFIG_VP9_HIGHBITDEPTH - - cpi->total += psnr.psnr[0]; - cpi->total_y += psnr.psnr[1]; - cpi->total_u += psnr.psnr[2]; - cpi->total_v += psnr.psnr[3]; - cpi->total_sq_error += psnr.sse[0]; - cpi->total_samples += psnr.samples[0]; - - { - PSNR_STATS psnr2; - double frame_ssim2 = 0, weight = 0; -#if CONFIG_VP9_POSTPROC - // TODO(agrange) Add resizing of post-proc buffer in here when the - // encoder is changed to use on-demand buffer allocation. - vp9_deblock(cm->frame_to_show, &cm->post_proc_buffer, - cm->lf.filter_level * 10 / 6); -#endif - vp9_clear_system_state(); - -#if CONFIG_VP9_HIGHBITDEPTH - calc_highbd_psnr(orig, recon, &psnr, cpi->mb.e_mbd.bd, - cpi->oxcf.input_bit_depth); -#else - calc_psnr(orig, pp, &psnr2); -#endif // CONFIG_VP9_HIGHBITDEPTH - - cpi->totalp += psnr2.psnr[0]; - cpi->totalp_y += psnr2.psnr[1]; - cpi->totalp_u += psnr2.psnr[2]; - cpi->totalp_v += psnr2.psnr[3]; - cpi->totalp_sq_error += psnr2.sse[0]; - cpi->totalp_samples += psnr2.samples[0]; - -#if CONFIG_VP9_HIGHBITDEPTH - if (cm->use_highbitdepth) { - frame_ssim2 = vp9_highbd_calc_ssim( - orig, recon, &weight, xd->bd, - xd->bd - cpi->oxcf.input_bit_depth); - } else { - frame_ssim2 = vp9_calc_ssim(orig, recon, 1, &weight); - } -#else - frame_ssim2 = vp9_calc_ssim(orig, recon, &weight); -#endif // CONFIG_VP9_HIGHBITDEPTH - - cpi->summed_quality += frame_ssim2 * weight; - cpi->summed_weights += weight; - -#if CONFIG_VP9_HIGHBITDEPTH - if (cm->use_highbitdepth) { - frame_ssim2 = vp9_highbd_calc_ssim( - orig, &cm->post_proc_buffer, &weight, - xd->bd, xd->bd - cpi->oxcf.input_bit_depth); - } else { - frame_ssim2 = vp9_calc_ssim(orig, &cm->post_proc_buffer, &weight); - } -#else - frame_ssim2 = vp9_calc_ssim(orig, &cm->post_proc_buffer, &weight); -#endif // CONFIG_VP9_HIGHBITDEPTH - - cpi->summedp_quality += frame_ssim2 * weight; - cpi->summedp_weights += weight; -#if 0 - { - FILE *f = fopen("q_used.stt", "a"); - fprintf(f, "%5d : Y%f7.3:U%f7.3:V%f7.3:F%f7.3:S%7.3f\n", - cpi->common.current_video_frame, y2, u2, v2, - frame_psnr2, frame_ssim2); - fclose(f); - } -#endif - } - } - - - if (cpi->b_calculate_ssimg) { - double y, u, v, frame_all; -#if CONFIG_VP9_HIGHBITDEPTH - if (cm->use_high) { - frame_all = vp9_highbd_calc_ssimg(cpi->Source, cm->frame_to_show, &y, - &u, &v, xd->bd, - xd->bd - cpi->oxcf.input_bit_depth); - } else { - frame_all = vp9_calc_ssimg(cpi->Source, cm->frame_to_show, &y, &u, - &v); - } -#else - frame_all = vp9_calc_ssimg(cpi->Source, cm->frame_to_show, &y, &u, &v); -#endif // CONFIG_VP9_HIGHBITDEPTH - cpi->total_ssimg_y += y; - cpi->total_ssimg_u += u; - cpi->total_ssimg_v += v; - cpi->total_ssimg_all += frame_all; - } - } - } - -#endif - - if (is_two_pass_svc(cpi) && cm->show_frame) { - ++cpi->svc.spatial_layer_to_encode; - if (cpi->svc.spatial_layer_to_encode >= cpi->svc.number_spatial_layers) - cpi->svc.spatial_layer_to_encode = 0; - } - return 0; -} - -int vp9_get_preview_raw_frame(VP9_COMP *cpi, YV12_BUFFER_CONFIG *dest, - vp9_ppflags_t *flags) { - VP9_COMMON *cm = &cpi->common; -#if !CONFIG_VP9_POSTPROC - (void)flags; -#endif - - if (!cm->show_frame) { - return -1; - } else { - int ret; -#if CONFIG_VP9_POSTPROC - ret = vp9_post_proc_frame(cm, dest, flags); -#else - if (cm->frame_to_show) { - *dest = *cm->frame_to_show; - dest->y_width = cm->width; - dest->y_height = cm->height; - dest->uv_width = cm->width >> cm->subsampling_x; - dest->uv_height = cm->height >> cm->subsampling_y; - ret = 0; - } else { - ret = -1; - } -#endif // !CONFIG_VP9_POSTPROC - vp9_clear_system_state(); - return ret; - } -} - -int vp9_set_active_map(VP9_COMP *cpi, unsigned char *map, int rows, int cols) { - if (rows == cpi->common.mb_rows && cols == cpi->common.mb_cols) { - const int mi_rows = cpi->common.mi_rows; - const int mi_cols = cpi->common.mi_cols; - if (map) { - int r, c; - for (r = 0; r < mi_rows; r++) { - for (c = 0; c < mi_cols; c++) { - cpi->segmentation_map[r * mi_cols + c] = - !map[(r >> 1) * cols + (c >> 1)]; - } - } - vp9_enable_segfeature(&cpi->common.seg, 1, SEG_LVL_SKIP); - vp9_enable_segmentation(&cpi->common.seg); - } else { - vp9_disable_segmentation(&cpi->common.seg); - } - return 0; - } else { - return -1; - } -} - -int vp9_set_internal_size(VP9_COMP *cpi, - VPX_SCALING horiz_mode, VPX_SCALING vert_mode) { - VP9_COMMON *cm = &cpi->common; - int hr = 0, hs = 0, vr = 0, vs = 0; - - if (horiz_mode > ONETWO || vert_mode > ONETWO) - return -1; - - Scale2Ratio(horiz_mode, &hr, &hs); - Scale2Ratio(vert_mode, &vr, &vs); - - // always go to the next whole number - cm->width = (hs - 1 + cpi->oxcf.width * hr) / hs; - cm->height = (vs - 1 + cpi->oxcf.height * vr) / vs; - assert(cm->width <= cpi->initial_width); - assert(cm->height <= cpi->initial_height); - - update_frame_size(cpi); - - return 0; -} - -int vp9_set_size_literal(VP9_COMP *cpi, unsigned int width, - unsigned int height) { - VP9_COMMON *cm = &cpi->common; -#if CONFIG_VP9_HIGHBITDEPTH - check_initial_width(cpi, 1, 1, cm->use_highbitdepth); -#else - check_initial_width(cpi, 1, 1); -#endif // CONFIG_VP9_HIGHBITDEPTH - - if (width) { - cm->width = width; - if (cm->width * 5 < cpi->initial_width) { - cm->width = cpi->initial_width / 5 + 1; - printf("Warning: Desired width too small, changed to %d\n", cm->width); - } - if (cm->width > cpi->initial_width) { - cm->width = cpi->initial_width; - printf("Warning: Desired width too large, changed to %d\n", cm->width); - } - } - - if (height) { - cm->height = height; - if (cm->height * 5 < cpi->initial_height) { - cm->height = cpi->initial_height / 5 + 1; - printf("Warning: Desired height too small, changed to %d\n", cm->height); - } - if (cm->height > cpi->initial_height) { - cm->height = cpi->initial_height; - printf("Warning: Desired height too large, changed to %d\n", cm->height); - } - } - assert(cm->width <= cpi->initial_width); - assert(cm->height <= cpi->initial_height); - - update_frame_size(cpi); - - return 0; -} - -void vp9_set_svc(VP9_COMP *cpi, int use_svc) { - cpi->use_svc = use_svc; - return; -} - -int vp9_get_y_sse(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b) { - assert(a->y_crop_width == b->y_crop_width); - assert(a->y_crop_height == b->y_crop_height); - - return (int)get_sse(a->y_buffer, a->y_stride, b->y_buffer, b->y_stride, - a->y_crop_width, a->y_crop_height); -} - -#if CONFIG_VP9_HIGHBITDEPTH -int vp9_highbd_get_y_sse(const YV12_BUFFER_CONFIG *a, - const YV12_BUFFER_CONFIG *b, - vpx_bit_depth_t bit_depth) { - unsigned int sse; - int sum; - assert(a->y_crop_width == b->y_crop_width); - assert(a->y_crop_height == b->y_crop_height); - assert((a->flags & YV12_FLAG_HIGHBITDEPTH) != 0); - assert((b->flags & YV12_FLAG_HIGHBITDEPTH) != 0); - switch (bit_depth) { - case VPX_BITS_8: - high_variance(a->y_buffer, a->y_stride, b->y_buffer, b->y_stride, - a->y_crop_width, a->y_crop_height, &sse, &sum); - return (int) sse; - case VPX_BITS_10: - high_10_variance(a->y_buffer, a->y_stride, b->y_buffer, b->y_stride, - a->y_crop_width, a->y_crop_height, &sse, &sum); - return (int) sse; - case VPX_BITS_12: - high_12_variance(a->y_buffer, a->y_stride, b->y_buffer, b->y_stride, - a->y_crop_width, a->y_crop_height, &sse, &sum); - return (int) sse; - default: - assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12"); - return -1; - } -} -#endif // CONFIG_VP9_HIGHBITDEPTH - -int vp9_get_quantizer(VP9_COMP *cpi) { - return cpi->common.base_qindex; -} - -void vp9_apply_encoding_flags(VP9_COMP *cpi, vpx_enc_frame_flags_t flags) { - if (flags & (VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_REF_GF | - VP8_EFLAG_NO_REF_ARF)) { - int ref = 7; - - if (flags & VP8_EFLAG_NO_REF_LAST) - ref ^= VP9_LAST_FLAG; - - if (flags & VP8_EFLAG_NO_REF_GF) - ref ^= VP9_GOLD_FLAG; - - if (flags & VP8_EFLAG_NO_REF_ARF) - ref ^= VP9_ALT_FLAG; - - vp9_use_as_reference(cpi, ref); - } - - if (flags & (VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF | - VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_FORCE_GF | - VP8_EFLAG_FORCE_ARF)) { - int upd = 7; - - if (flags & VP8_EFLAG_NO_UPD_LAST) - upd ^= VP9_LAST_FLAG; - - if (flags & VP8_EFLAG_NO_UPD_GF) - upd ^= VP9_GOLD_FLAG; - - if (flags & VP8_EFLAG_NO_UPD_ARF) - upd ^= VP9_ALT_FLAG; - - vp9_update_reference(cpi, upd); - } - - if (flags & VP8_EFLAG_NO_UPD_ENTROPY) { - vp9_update_entropy(cpi, 0); - } -} diff --git a/media/libvpx/vp9/encoder/vp9_encoder.h b/media/libvpx/vp9/encoder/vp9_encoder.h deleted file mode 100644 index 9bd16bc2753..00000000000 --- a/media/libvpx/vp9/encoder/vp9_encoder.h +++ /dev/null @@ -1,541 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_ENCODER_VP9_ENCODER_H_ -#define VP9_ENCODER_VP9_ENCODER_H_ - -#include - -#include "./vpx_config.h" -#include "vpx_ports/mem.h" -#include "vpx/internal/vpx_codec_internal.h" -#include "vpx/vp8cx.h" - -#include "vp9/common/vp9_ppflags.h" -#include "vp9/common/vp9_entropy.h" -#include "vp9/common/vp9_entropymode.h" -#include "vp9/common/vp9_onyxc_int.h" - -#include "vp9/encoder/vp9_aq_cyclicrefresh.h" -#include "vp9/encoder/vp9_context_tree.h" -#include "vp9/encoder/vp9_encodemb.h" -#include "vp9/encoder/vp9_firstpass.h" -#include "vp9/encoder/vp9_lookahead.h" -#include "vp9/encoder/vp9_mbgraph.h" -#include "vp9/encoder/vp9_mcomp.h" -#include "vp9/encoder/vp9_quantize.h" -#include "vp9/encoder/vp9_ratectrl.h" -#include "vp9/encoder/vp9_rd.h" -#include "vp9/encoder/vp9_speed_features.h" -#include "vp9/encoder/vp9_svc_layercontext.h" -#include "vp9/encoder/vp9_tokenize.h" -#include "vp9/encoder/vp9_variance.h" -#if CONFIG_VP9_TEMPORAL_DENOISING -#include "vp9/encoder/vp9_denoiser.h" -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -#define DEFAULT_GF_INTERVAL 10 - -typedef struct { - int nmvjointcost[MV_JOINTS]; - int nmvcosts[2][MV_VALS]; - int nmvcosts_hp[2][MV_VALS]; - - vp9_prob segment_pred_probs[PREDICTION_PROBS]; - - unsigned char *last_frame_seg_map_copy; - - // 0 = Intra, Last, GF, ARF - signed char last_ref_lf_deltas[MAX_REF_LF_DELTAS]; - // 0 = ZERO_MV, MV - signed char last_mode_lf_deltas[MAX_MODE_LF_DELTAS]; - - FRAME_CONTEXT fc; -} CODING_CONTEXT; - - -typedef enum { - // encode_breakout is disabled. - ENCODE_BREAKOUT_DISABLED = 0, - // encode_breakout is enabled. - ENCODE_BREAKOUT_ENABLED = 1, - // encode_breakout is enabled with small max_thresh limit. - ENCODE_BREAKOUT_LIMITED = 2 -} ENCODE_BREAKOUT_TYPE; - -typedef enum { - NORMAL = 0, - FOURFIVE = 1, - THREEFIVE = 2, - ONETWO = 3 -} VPX_SCALING; - -typedef enum { - // Good Quality Fast Encoding. The encoder balances quality with the amount of - // time it takes to encode the output. Speed setting controls how fast. - GOOD, - - // The encoder places priority on the quality of the output over encoding - // speed. The output is compressed at the highest possible quality. This - // option takes the longest amount of time to encode. Speed setting ignored. - BEST, - - // Realtime/Live Encoding. This mode is optimized for realtime encoding (for - // example, capturing a television signal or feed from a live camera). Speed - // setting controls how fast. - REALTIME -} MODE; - -typedef enum { - FRAMEFLAGS_KEY = 1 << 0, - FRAMEFLAGS_GOLDEN = 1 << 1, - FRAMEFLAGS_ALTREF = 1 << 2, -} FRAMETYPE_FLAGS; - -typedef enum { - NO_AQ = 0, - VARIANCE_AQ = 1, - COMPLEXITY_AQ = 2, - CYCLIC_REFRESH_AQ = 3, - AQ_MODE_COUNT // This should always be the last member of the enum -} AQ_MODE; - - -typedef struct VP9EncoderConfig { - BITSTREAM_PROFILE profile; - vpx_bit_depth_t bit_depth; // Codec bit-depth. - int width; // width of data passed to the compressor - int height; // height of data passed to the compressor - unsigned int input_bit_depth; // Input bit depth. - double init_framerate; // set to passed in framerate - int64_t target_bandwidth; // bandwidth to be used in kilobits per second - - int noise_sensitivity; // pre processing blur: recommendation 0 - int sharpness; // sharpening output: recommendation 0: - int speed; - unsigned int rc_max_intra_bitrate_pct; - - MODE mode; - int pass; - - // Key Framing Operations - int auto_key; // autodetect cut scenes and set the keyframes - int key_freq; // maximum distance to key frame. - - int lag_in_frames; // how many frames lag before we start encoding - - // ---------------------------------------------------------------- - // DATARATE CONTROL OPTIONS - - // vbr, cbr, constrained quality or constant quality - enum vpx_rc_mode rc_mode; - - // buffer targeting aggressiveness - int under_shoot_pct; - int over_shoot_pct; - - // buffering parameters - int64_t starting_buffer_level_ms; - int64_t optimal_buffer_level_ms; - int64_t maximum_buffer_size_ms; - - // Frame drop threshold. - int drop_frames_water_mark; - - // controlling quality - int fixed_q; - int worst_allowed_q; - int best_allowed_q; - int cq_level; - AQ_MODE aq_mode; // Adaptive Quantization mode - - // Internal frame size scaling. - int allow_spatial_resampling; - int scaled_frame_width; - int scaled_frame_height; - - // Enable feature to reduce the frame quantization every x frames. - int frame_periodic_boost; - - // two pass datarate control - int two_pass_vbrbias; // two pass datarate control tweaks - int two_pass_vbrmin_section; - int two_pass_vbrmax_section; - // END DATARATE CONTROL OPTIONS - // ---------------------------------------------------------------- - - // Spatial and temporal scalability. - int ss_number_layers; // Number of spatial layers. - int ts_number_layers; // Number of temporal layers. - // Bitrate allocation for spatial layers. - int ss_target_bitrate[VPX_SS_MAX_LAYERS]; - int ss_play_alternate[VPX_SS_MAX_LAYERS]; - // Bitrate allocation (CBR mode) and framerate factor, for temporal layers. - int ts_target_bitrate[VPX_TS_MAX_LAYERS]; - int ts_rate_decimator[VPX_TS_MAX_LAYERS]; - - // these parameters aren't to be used in final build don't use!!! - int play_alternate; - - int encode_breakout; // early breakout : for video conf recommend 800 - - /* Bitfield defining the error resiliency features to enable. - * Can provide decodable frames after losses in previous - * frames and decodable partitions after losses in the same frame. - */ - unsigned int error_resilient_mode; - - /* Bitfield defining the parallel decoding mode where the - * decoding in successive frames may be conducted in parallel - * just by decoding the frame headers. - */ - unsigned int frame_parallel_decoding_mode; - - int arnr_max_frames; - int arnr_strength; - - int tile_columns; - int tile_rows; - - vpx_fixed_buf_t two_pass_stats_in; - struct vpx_codec_pkt_list *output_pkt_list; - -#if CONFIG_FP_MB_STATS - vpx_fixed_buf_t firstpass_mb_stats_in; -#endif - - vp8e_tuning tuning; - vp9e_tune_content content; -#if CONFIG_VP9_HIGHBITDEPTH - int use_highbitdepth; -#endif -} VP9EncoderConfig; - -static INLINE int is_lossless_requested(const VP9EncoderConfig *cfg) { - return cfg->best_allowed_q == 0 && cfg->worst_allowed_q == 0; -} - -typedef struct VP9_COMP { - QUANTS quants; - MACROBLOCK mb; - VP9_COMMON common; - VP9EncoderConfig oxcf; - struct lookahead_ctx *lookahead; - struct lookahead_entry *alt_ref_source; - - YV12_BUFFER_CONFIG *Source; - YV12_BUFFER_CONFIG *Last_Source; // NULL for first frame and alt_ref frames - YV12_BUFFER_CONFIG *un_scaled_source; - YV12_BUFFER_CONFIG scaled_source; - YV12_BUFFER_CONFIG *unscaled_last_source; - YV12_BUFFER_CONFIG scaled_last_source; - - int skippable_frame; - - int scaled_ref_idx[3]; - int lst_fb_idx; - int gld_fb_idx; - int alt_fb_idx; - - int refresh_last_frame; - int refresh_golden_frame; - int refresh_alt_ref_frame; - - int ext_refresh_frame_flags_pending; - int ext_refresh_last_frame; - int ext_refresh_golden_frame; - int ext_refresh_alt_ref_frame; - - int ext_refresh_frame_context_pending; - int ext_refresh_frame_context; - - YV12_BUFFER_CONFIG last_frame_uf; - - TOKENEXTRA *tok; - unsigned int tok_count[4][1 << 6]; - - // Ambient reconstruction err target for force key frames - int ambient_err; - - RD_OPT rd; - - CODING_CONTEXT coding_context; - - int *nmvcosts[2]; - int *nmvcosts_hp[2]; - int *nmvsadcosts[2]; - int *nmvsadcosts_hp[2]; - - int zbin_mode_boost; - int zbin_mode_boost_enabled; - - int64_t last_time_stamp_seen; - int64_t last_end_time_stamp_seen; - int64_t first_time_stamp_ever; - - RATE_CONTROL rc; - double framerate; - - vp9_coeff_count coef_counts[TX_SIZES][PLANE_TYPES]; - int interp_filter_selected[MAX_REF_FRAMES][SWITCHABLE]; - - struct vpx_codec_pkt_list *output_pkt_list; - - MBGRAPH_FRAME_STATS mbgraph_stats[MAX_LAG_BUFFERS]; - int mbgraph_n_frames; // number of frames filled in the above - int static_mb_pct; // % forced skip mbs by segmentation - int ref_frame_flags; - - SPEED_FEATURES sf; - - unsigned int max_mv_magnitude; - int mv_step_param; - - // Default value is 1. From first pass stats, encode_breakout may be disabled. - ENCODE_BREAKOUT_TYPE allow_encode_breakout; - - // Get threshold from external input. A suggested threshold is 800 for HD - // clips, and 300 for < HD clips. - int encode_breakout; - - unsigned char *segmentation_map; - - // segment threashold for encode breakout - int segment_encode_breakout[MAX_SEGMENTS]; - - unsigned char *complexity_map; - - CYCLIC_REFRESH *cyclic_refresh; - - fractional_mv_step_fp *find_fractional_mv_step; - vp9_full_search_fn_t full_search_sad; - vp9_refining_search_fn_t refining_search_sad; - vp9_diamond_search_fn_t diamond_search_sad; - vp9_variance_fn_ptr_t fn_ptr[BLOCK_SIZES]; - uint64_t time_receive_data; - uint64_t time_compress_data; - uint64_t time_pick_lpf; - uint64_t time_encode_sb_row; - -#if CONFIG_FP_MB_STATS - int use_fp_mb_stats; -#endif - - TWO_PASS twopass; - - YV12_BUFFER_CONFIG alt_ref_buffer; - - -#if CONFIG_INTERNAL_STATS - unsigned int mode_chosen_counts[MAX_MODES]; - - int count; - double total_y; - double total_u; - double total_v; - double total; - uint64_t total_sq_error; - uint64_t total_samples; - - double totalp_y; - double totalp_u; - double totalp_v; - double totalp; - uint64_t totalp_sq_error; - uint64_t totalp_samples; - - int bytes; - double summed_quality; - double summed_weights; - double summedp_quality; - double summedp_weights; - unsigned int tot_recode_hits; - - - double total_ssimg_y; - double total_ssimg_u; - double total_ssimg_v; - double total_ssimg_all; - - int b_calculate_ssimg; -#endif - int b_calculate_psnr; - - int droppable; - - int initial_width; - int initial_height; - - int use_svc; - - SVC svc; - - // Store frame variance info in SOURCE_VAR_BASED_PARTITION search type. - diff *source_diff_var; - // The threshold used in SOURCE_VAR_BASED_PARTITION search type. - unsigned int source_var_thresh; - int frames_till_next_var_check; - - int frame_flags; - - search_site_config ss_cfg; - - int mbmode_cost[INTRA_MODES]; - unsigned int inter_mode_cost[INTER_MODE_CONTEXTS][INTER_MODES]; - int intra_uv_mode_cost[FRAME_TYPES][INTRA_MODES]; - int y_mode_costs[INTRA_MODES][INTRA_MODES][INTRA_MODES]; - int switchable_interp_costs[SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS]; - - PICK_MODE_CONTEXT *leaf_tree; - PC_TREE *pc_tree; - PC_TREE *pc_root; - int partition_cost[PARTITION_CONTEXTS][PARTITION_TYPES]; - - int multi_arf_allowed; - int multi_arf_enabled; - int multi_arf_last_grp_enabled; - -#if CONFIG_VP9_TEMPORAL_DENOISING - VP9_DENOISER denoiser; -#endif -} VP9_COMP; - -void vp9_initialize_enc(); - -struct VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf); -void vp9_remove_compressor(VP9_COMP *cpi); - -void vp9_change_config(VP9_COMP *cpi, const VP9EncoderConfig *oxcf); - - // receive a frames worth of data. caller can assume that a copy of this - // frame is made and not just a copy of the pointer.. -int vp9_receive_raw_frame(VP9_COMP *cpi, unsigned int frame_flags, - YV12_BUFFER_CONFIG *sd, int64_t time_stamp, - int64_t end_time_stamp); - -int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, - size_t *size, uint8_t *dest, - int64_t *time_stamp, int64_t *time_end, int flush); - -int vp9_get_preview_raw_frame(VP9_COMP *cpi, YV12_BUFFER_CONFIG *dest, - vp9_ppflags_t *flags); - -int vp9_use_as_reference(VP9_COMP *cpi, int ref_frame_flags); - -void vp9_update_reference(VP9_COMP *cpi, int ref_frame_flags); - -int vp9_copy_reference_enc(VP9_COMP *cpi, VP9_REFFRAME ref_frame_flag, - YV12_BUFFER_CONFIG *sd); - -int vp9_set_reference_enc(VP9_COMP *cpi, VP9_REFFRAME ref_frame_flag, - YV12_BUFFER_CONFIG *sd); - -int vp9_update_entropy(VP9_COMP *cpi, int update); - -int vp9_set_active_map(VP9_COMP *cpi, unsigned char *map, int rows, int cols); - -int vp9_set_internal_size(VP9_COMP *cpi, - VPX_SCALING horiz_mode, VPX_SCALING vert_mode); - -int vp9_set_size_literal(VP9_COMP *cpi, unsigned int width, - unsigned int height); - -void vp9_set_svc(VP9_COMP *cpi, int use_svc); - -int vp9_get_quantizer(struct VP9_COMP *cpi); - -static INLINE int get_ref_frame_idx(const VP9_COMP *cpi, - MV_REFERENCE_FRAME ref_frame) { - if (ref_frame == LAST_FRAME) { - return cpi->lst_fb_idx; - } else if (ref_frame == GOLDEN_FRAME) { - return cpi->gld_fb_idx; - } else { - return cpi->alt_fb_idx; - } -} - -static INLINE YV12_BUFFER_CONFIG *get_ref_frame_buffer( - VP9_COMP *cpi, MV_REFERENCE_FRAME ref_frame) { - VP9_COMMON * const cm = &cpi->common; - return &cm->frame_bufs[cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)]] - .buf; -} - -static INLINE int get_token_alloc(int mb_rows, int mb_cols) { - // TODO(JBB): double check we can't exceed this token count if we have a - // 32x32 transform crossing a boundary at a multiple of 16. - // mb_rows, cols are in units of 16 pixels. We assume 3 planes all at full - // resolution. We assume up to 1 token per pixel, and then allow - // a head room of 4. - return mb_rows * mb_cols * (16 * 16 * 3 + 4); -} - -int vp9_get_y_sse(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b); -#if CONFIG_VP9_HIGHBITDEPTH -int vp9_highbd_get_y_sse(const YV12_BUFFER_CONFIG *a, - const YV12_BUFFER_CONFIG *b, - vpx_bit_depth_t bit_depth); -#endif // CONFIG_VP9_HIGHBITDEPTH - -void vp9_alloc_compressor_data(VP9_COMP *cpi); - -void vp9_scale_references(VP9_COMP *cpi); - -void vp9_update_reference_frames(VP9_COMP *cpi); - -void vp9_set_high_precision_mv(VP9_COMP *cpi, int allow_high_precision_mv); - -YV12_BUFFER_CONFIG *vp9_scale_if_required(VP9_COMMON *cm, - YV12_BUFFER_CONFIG *unscaled, - YV12_BUFFER_CONFIG *scaled); - -void vp9_apply_encoding_flags(VP9_COMP *cpi, vpx_enc_frame_flags_t flags); - -static INLINE int is_two_pass_svc(const struct VP9_COMP *const cpi) { - return cpi->use_svc && - (cpi->svc.number_temporal_layers > 1 || - cpi->svc.number_spatial_layers > 1) && - (cpi->oxcf.pass == 1 || cpi->oxcf.pass == 2); -} - -static INLINE int is_altref_enabled(const VP9_COMP *const cpi) { - return cpi->oxcf.mode != REALTIME && cpi->oxcf.lag_in_frames > 0 && - (cpi->oxcf.play_alternate && - (!is_two_pass_svc(cpi) || - cpi->oxcf.ss_play_alternate[cpi->svc.spatial_layer_id])); -} - -static INLINE void set_ref_ptrs(VP9_COMMON *cm, MACROBLOCKD *xd, - MV_REFERENCE_FRAME ref0, - MV_REFERENCE_FRAME ref1) { - xd->block_refs[0] = &cm->frame_refs[ref0 >= LAST_FRAME ? ref0 - LAST_FRAME - : 0]; - xd->block_refs[1] = &cm->frame_refs[ref1 >= LAST_FRAME ? ref1 - LAST_FRAME - : 0]; -} - -static INLINE int get_chessboard_index(const int frame_index) { - return frame_index & 0x1; -} - -static INLINE int *cond_sad_list(const struct VP9_COMP *cpi, int *sad_list) { - return cpi->sf.mv.subpel_search_method != SUBPEL_TREE ? sad_list : NULL; -} - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_ENCODER_VP9_ENCODER_H_ diff --git a/media/libvpx/vp9/encoder/vp9_extend.c b/media/libvpx/vp9/encoder/vp9_extend.c deleted file mode 100644 index e8517c88925..00000000000 --- a/media/libvpx/vp9/encoder/vp9_extend.c +++ /dev/null @@ -1,131 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "vpx_mem/vpx_mem.h" - -#include "vp9/common/vp9_common.h" -#include "vp9/encoder/vp9_extend.h" - -static void copy_and_extend_plane(const uint8_t *src, int src_pitch, - uint8_t *dst, int dst_pitch, - int w, int h, - int extend_top, int extend_left, - int extend_bottom, int extend_right) { - int i, linesize; - - // copy the left and right most columns out - const uint8_t *src_ptr1 = src; - const uint8_t *src_ptr2 = src + w - 1; - uint8_t *dst_ptr1 = dst - extend_left; - uint8_t *dst_ptr2 = dst + w; - - for (i = 0; i < h; i++) { - vpx_memset(dst_ptr1, src_ptr1[0], extend_left); - vpx_memcpy(dst_ptr1 + extend_left, src_ptr1, w); - vpx_memset(dst_ptr2, src_ptr2[0], extend_right); - src_ptr1 += src_pitch; - src_ptr2 += src_pitch; - dst_ptr1 += dst_pitch; - dst_ptr2 += dst_pitch; - } - - // Now copy the top and bottom lines into each line of the respective - // borders - src_ptr1 = dst - extend_left; - src_ptr2 = dst + dst_pitch * (h - 1) - extend_left; - dst_ptr1 = dst + dst_pitch * (-extend_top) - extend_left; - dst_ptr2 = dst + dst_pitch * (h) - extend_left; - linesize = extend_left + extend_right + w; - - for (i = 0; i < extend_top; i++) { - vpx_memcpy(dst_ptr1, src_ptr1, linesize); - dst_ptr1 += dst_pitch; - } - - for (i = 0; i < extend_bottom; i++) { - vpx_memcpy(dst_ptr2, src_ptr2, linesize); - dst_ptr2 += dst_pitch; - } -} - -void vp9_copy_and_extend_frame(const YV12_BUFFER_CONFIG *src, - YV12_BUFFER_CONFIG *dst) { - // Extend src frame in buffer - // Altref filtering assumes 16 pixel extension - const int et_y = 16; - const int el_y = 16; - // Motion estimation may use src block variance with the block size up - // to 64x64, so the right and bottom need to be extended to 64 multiple - // or up to 16, whichever is greater. - const int eb_y = MAX(ALIGN_POWER_OF_TWO(src->y_width, 6) - src->y_width, - 16); - const int er_y = MAX(ALIGN_POWER_OF_TWO(src->y_height, 6) - src->y_height, - 16); - const int uv_width_subsampling = (src->uv_width != src->y_width); - const int uv_height_subsampling = (src->uv_height != src->y_height); - const int et_uv = et_y >> uv_height_subsampling; - const int el_uv = el_y >> uv_width_subsampling; - const int eb_uv = eb_y >> uv_height_subsampling; - const int er_uv = er_y >> uv_width_subsampling; - - copy_and_extend_plane(src->y_buffer, src->y_stride, - dst->y_buffer, dst->y_stride, - src->y_width, src->y_height, - et_y, el_y, eb_y, er_y); - - copy_and_extend_plane(src->u_buffer, src->uv_stride, - dst->u_buffer, dst->uv_stride, - src->uv_width, src->uv_height, - et_uv, el_uv, eb_uv, er_uv); - - copy_and_extend_plane(src->v_buffer, src->uv_stride, - dst->v_buffer, dst->uv_stride, - src->uv_width, src->uv_height, - et_uv, el_uv, eb_uv, er_uv); -} - -void vp9_copy_and_extend_frame_with_rect(const YV12_BUFFER_CONFIG *src, - YV12_BUFFER_CONFIG *dst, - int srcy, int srcx, - int srch, int srcw) { - // If the side is not touching the bounder then don't extend. - const int et_y = srcy ? 0 : dst->border; - const int el_y = srcx ? 0 : dst->border; - const int eb_y = srcy + srch != src->y_height ? 0 : - dst->border + dst->y_height - src->y_height; - const int er_y = srcx + srcw != src->y_width ? 0 : - dst->border + dst->y_width - src->y_width; - const int src_y_offset = srcy * src->y_stride + srcx; - const int dst_y_offset = srcy * dst->y_stride + srcx; - - const int et_uv = ROUND_POWER_OF_TWO(et_y, 1); - const int el_uv = ROUND_POWER_OF_TWO(el_y, 1); - const int eb_uv = ROUND_POWER_OF_TWO(eb_y, 1); - const int er_uv = ROUND_POWER_OF_TWO(er_y, 1); - const int src_uv_offset = ((srcy * src->uv_stride) >> 1) + (srcx >> 1); - const int dst_uv_offset = ((srcy * dst->uv_stride) >> 1) + (srcx >> 1); - const int srch_uv = ROUND_POWER_OF_TWO(srch, 1); - const int srcw_uv = ROUND_POWER_OF_TWO(srcw, 1); - - copy_and_extend_plane(src->y_buffer + src_y_offset, src->y_stride, - dst->y_buffer + dst_y_offset, dst->y_stride, - srcw, srch, - et_y, el_y, eb_y, er_y); - - copy_and_extend_plane(src->u_buffer + src_uv_offset, src->uv_stride, - dst->u_buffer + dst_uv_offset, dst->uv_stride, - srcw_uv, srch_uv, - et_uv, el_uv, eb_uv, er_uv); - - copy_and_extend_plane(src->v_buffer + src_uv_offset, src->uv_stride, - dst->v_buffer + dst_uv_offset, dst->uv_stride, - srcw_uv, srch_uv, - et_uv, el_uv, eb_uv, er_uv); -} diff --git a/media/libvpx/vp9/encoder/vp9_extend.h b/media/libvpx/vp9/encoder/vp9_extend.h deleted file mode 100644 index 058fe09cf98..00000000000 --- a/media/libvpx/vp9/encoder/vp9_extend.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_ENCODER_VP9_EXTEND_H_ -#define VP9_ENCODER_VP9_EXTEND_H_ - -#include "vpx_scale/yv12config.h" -#include "vpx/vpx_integer.h" - -#ifdef __cplusplus -extern "C" { -#endif - - -void vp9_copy_and_extend_frame(const YV12_BUFFER_CONFIG *src, - YV12_BUFFER_CONFIG *dst); - -void vp9_copy_and_extend_frame_with_rect(const YV12_BUFFER_CONFIG *src, - YV12_BUFFER_CONFIG *dst, - int srcy, int srcx, - int srch, int srcw); -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_ENCODER_VP9_EXTEND_H_ diff --git a/media/libvpx/vp9/encoder/vp9_pickmode.c b/media/libvpx/vp9/encoder/vp9_pickmode.c deleted file mode 100644 index a788c1d8e3f..00000000000 --- a/media/libvpx/vp9/encoder/vp9_pickmode.c +++ /dev/null @@ -1,764 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include -#include -#include -#include - -#include "./vp9_rtcd.h" - -#include "vpx_mem/vpx_mem.h" - -#include "vp9/common/vp9_blockd.h" -#include "vp9/common/vp9_common.h" -#include "vp9/common/vp9_mvref_common.h" -#include "vp9/common/vp9_reconinter.h" -#include "vp9/common/vp9_reconintra.h" - -#include "vp9/encoder/vp9_encoder.h" -#include "vp9/encoder/vp9_pickmode.h" -#include "vp9/encoder/vp9_ratectrl.h" -#include "vp9/encoder/vp9_rd.h" - -typedef struct { - uint8_t *data; - int stride; - int in_use; -} PRED_BUFFER; - -static int mv_refs_rt(const VP9_COMMON *cm, const MACROBLOCKD *xd, - const TileInfo *const tile, - MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame, - int_mv *mv_ref_list, - int mi_row, int mi_col) { - const int *ref_sign_bias = cm->ref_frame_sign_bias; - int i, refmv_count = 0; - - const POSITION *const mv_ref_search = mv_ref_blocks[mi->mbmi.sb_type]; - - int different_ref_found = 0; - int context_counter = 0; - int const_motion = 0; - - // Blank the reference vector list - vpx_memset(mv_ref_list, 0, sizeof(*mv_ref_list) * MAX_MV_REF_CANDIDATES); - - // The nearest 2 blocks are treated differently - // if the size < 8x8 we get the mv from the bmi substructure, - // and we also need to keep a mode count. - for (i = 0; i < 2; ++i) { - const POSITION *const mv_ref = &mv_ref_search[i]; - if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { - const MODE_INFO *const candidate_mi = xd->mi[mv_ref->col + mv_ref->row * - xd->mi_stride].src_mi; - const MB_MODE_INFO *const candidate = &candidate_mi->mbmi; - // Keep counts for entropy encoding. - context_counter += mode_2_counter[candidate->mode]; - different_ref_found = 1; - - if (candidate->ref_frame[0] == ref_frame) - ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, 0, mv_ref->col, -1)); - } - } - - const_motion = 1; - - // Check the rest of the neighbors in much the same way - // as before except we don't need to keep track of sub blocks or - // mode counts. - for (; i < MVREF_NEIGHBOURS && !refmv_count; ++i) { - const POSITION *const mv_ref = &mv_ref_search[i]; - if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { - const MB_MODE_INFO *const candidate = &xd->mi[mv_ref->col + mv_ref->row * - xd->mi_stride].src_mi->mbmi; - different_ref_found = 1; - - if (candidate->ref_frame[0] == ref_frame) - ADD_MV_REF_LIST(candidate->mv[0]); - } - } - - // Since we couldn't find 2 mvs from the same reference frame - // go back through the neighbors and find motion vectors from - // different reference frames. - if (different_ref_found && !refmv_count) { - for (i = 0; i < MVREF_NEIGHBOURS; ++i) { - const POSITION *mv_ref = &mv_ref_search[i]; - if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { - const MB_MODE_INFO *const candidate = &xd->mi[mv_ref->col + mv_ref->row - * xd->mi_stride].src_mi->mbmi; - - // If the candidate is INTRA we don't want to consider its mv. - IF_DIFF_REF_FRAME_ADD_MV(candidate); - } - } - } - - Done: - - mi->mbmi.mode_context[ref_frame] = counter_to_context[context_counter]; - - // Clamp vectors - for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i) - clamp_mv_ref(&mv_ref_list[i].as_mv, xd); - - return const_motion; -} - -static int combined_motion_search(VP9_COMP *cpi, MACROBLOCK *x, - BLOCK_SIZE bsize, int mi_row, int mi_col, - int_mv *tmp_mv, int *rate_mv, - int64_t best_rd_sofar) { - MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi; - struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0, 0}}; - const int step_param = cpi->sf.mv.fullpel_search_step_param; - const int sadpb = x->sadperbit16; - MV mvp_full; - const int ref = mbmi->ref_frame[0]; - const MV ref_mv = mbmi->ref_mvs[ref][0].as_mv; - int dis; - int rate_mode; - const int tmp_col_min = x->mv_col_min; - const int tmp_col_max = x->mv_col_max; - const int tmp_row_min = x->mv_row_min; - const int tmp_row_max = x->mv_row_max; - int rv = 0; - int sad_list[5]; - const YV12_BUFFER_CONFIG *scaled_ref_frame = vp9_get_scaled_ref_frame(cpi, - ref); - if (cpi->common.show_frame && - (x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[LAST_FRAME]) - return rv; - - if (scaled_ref_frame) { - int i; - // Swap out the reference frame for a version that's been scaled to - // match the resolution of the current frame, allowing the existing - // motion search code to be used without additional modifications. - for (i = 0; i < MAX_MB_PLANE; i++) - backup_yv12[i] = xd->plane[i].pre[0]; - vp9_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL); - } - vp9_set_mv_search_range(x, &ref_mv); - - assert(x->mv_best_ref_index[ref] <= 2); - if (x->mv_best_ref_index[ref] < 2) - mvp_full = mbmi->ref_mvs[ref][x->mv_best_ref_index[ref]].as_mv; - else - mvp_full = x->pred_mv[ref]; - - mvp_full.col >>= 3; - mvp_full.row >>= 3; - - vp9_full_pixel_search(cpi, x, bsize, &mvp_full, step_param, sadpb, - cond_sad_list(cpi, sad_list), - &ref_mv, &tmp_mv->as_mv, INT_MAX, 0); - - x->mv_col_min = tmp_col_min; - x->mv_col_max = tmp_col_max; - x->mv_row_min = tmp_row_min; - x->mv_row_max = tmp_row_max; - - // calculate the bit cost on motion vector - mvp_full.row = tmp_mv->as_mv.row * 8; - mvp_full.col = tmp_mv->as_mv.col * 8; - - *rate_mv = vp9_mv_bit_cost(&mvp_full, &ref_mv, - x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); - - rate_mode = cpi->inter_mode_cost[mbmi->mode_context[ref]] - [INTER_OFFSET(NEWMV)]; - rv = !(RDCOST(x->rdmult, x->rddiv, (*rate_mv + rate_mode), 0) > - best_rd_sofar); - - if (rv) { - cpi->find_fractional_mv_step(x, &tmp_mv->as_mv, &ref_mv, - cpi->common.allow_high_precision_mv, - x->errorperbit, - &cpi->fn_ptr[bsize], - cpi->sf.mv.subpel_force_stop, - cpi->sf.mv.subpel_iters_per_step, - cond_sad_list(cpi, sad_list), - x->nmvjointcost, x->mvcost, - &dis, &x->pred_sse[ref], NULL, 0, 0); - x->pred_mv[ref] = tmp_mv->as_mv; - } - - if (scaled_ref_frame) { - int i; - for (i = 0; i < MAX_MB_PLANE; i++) - xd->plane[i].pre[0] = backup_yv12[i]; - } - return rv; -} - - -static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize, - MACROBLOCK *x, MACROBLOCKD *xd, - int *out_rate_sum, int64_t *out_dist_sum, - unsigned int *var_y, unsigned int *sse_y) { - // Note our transform coeffs are 8 times an orthogonal transform. - // Hence quantizer step is also 8 times. To get effective quantizer - // we need to divide by 8 before sending to modeling function. - unsigned int sse; - int rate; - int64_t dist; - struct macroblock_plane *const p = &x->plane[0]; - struct macroblockd_plane *const pd = &xd->plane[0]; - const uint32_t dc_quant = pd->dequant[0]; - const uint32_t ac_quant = pd->dequant[1]; - unsigned int var = cpi->fn_ptr[bsize].vf(p->src.buf, p->src.stride, - pd->dst.buf, pd->dst.stride, &sse); - *var_y = var; - *sse_y = sse; - - if (sse < dc_quant * dc_quant >> 6) - x->skip_txfm[0] = 1; - else if (var < ac_quant * ac_quant >> 6) - x->skip_txfm[0] = 2; - else - x->skip_txfm[0] = 0; - - if (cpi->common.tx_mode == TX_MODE_SELECT) { - if (sse > (var << 2)) - xd->mi[0].src_mi->mbmi.tx_size = - MIN(max_txsize_lookup[bsize], - tx_mode_to_biggest_tx_size[cpi->common.tx_mode]); - else - xd->mi[0].src_mi->mbmi.tx_size = TX_8X8; - } else { - xd->mi[0].src_mi->mbmi.tx_size = - MIN(max_txsize_lookup[bsize], - tx_mode_to_biggest_tx_size[cpi->common.tx_mode]); - } - - vp9_model_rd_from_var_lapndz(sse - var, 1 << num_pels_log2_lookup[bsize], - dc_quant >> 3, &rate, &dist); - *out_rate_sum = rate >> 1; - *out_dist_sum = dist << 3; - - vp9_model_rd_from_var_lapndz(var, 1 << num_pels_log2_lookup[bsize], - ac_quant >> 3, &rate, &dist); - *out_rate_sum += rate; - *out_dist_sum += dist << 4; -} - -static int get_pred_buffer(PRED_BUFFER *p, int len) { - int i; - - for (i = 0; i < len; i++) { - if (!p[i].in_use) { - p[i].in_use = 1; - return i; - } - } - return -1; -} - -static void free_pred_buffer(PRED_BUFFER *p) { - if (p != NULL) - p->in_use = 0; -} - -static void encode_breakout_test(VP9_COMP *cpi, MACROBLOCK *x, - BLOCK_SIZE bsize, int mi_row, int mi_col, - MV_REFERENCE_FRAME ref_frame, - PREDICTION_MODE this_mode, - unsigned int var_y, unsigned int sse_y, - struct buf_2d yv12_mb[][MAX_MB_PLANE], - int *rate, int64_t *dist) { - MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi; - - const BLOCK_SIZE uv_size = get_plane_block_size(bsize, &xd->plane[1]); - unsigned int var = var_y, sse = sse_y; - // Skipping threshold for ac. - unsigned int thresh_ac; - // Skipping threshold for dc. - unsigned int thresh_dc; - if (x->encode_breakout > 0) { - // Set a maximum for threshold to avoid big PSNR loss in low bit rate - // case. Use extreme low threshold for static frames to limit - // skipping. - const unsigned int max_thresh = 36000; - // The encode_breakout input - const unsigned int min_thresh = - MIN(((unsigned int)x->encode_breakout << 4), max_thresh); - - // Calculate threshold according to dequant value. - thresh_ac = (xd->plane[0].dequant[1] * xd->plane[0].dequant[1]) / 9; - thresh_ac = clamp(thresh_ac, min_thresh, max_thresh); - - // Adjust ac threshold according to partition size. - thresh_ac >>= - 8 - (b_width_log2(bsize) + b_height_log2(bsize)); - - thresh_dc = (xd->plane[0].dequant[0] * xd->plane[0].dequant[0] >> 6); - } else { - thresh_ac = 0; - thresh_dc = 0; - } - - // Y skipping condition checking for ac and dc. - if (var <= thresh_ac && (sse - var) <= thresh_dc) { - unsigned int sse_u, sse_v; - unsigned int var_u, var_v; - - // Skip UV prediction unless breakout is zero (lossless) to save - // computation with low impact on the result - if (x->encode_breakout == 0) { - xd->plane[1].pre[0] = yv12_mb[ref_frame][1]; - xd->plane[2].pre[0] = yv12_mb[ref_frame][2]; - vp9_build_inter_predictors_sbuv(xd, mi_row, mi_col, bsize); - } - - var_u = cpi->fn_ptr[uv_size].vf(x->plane[1].src.buf, - x->plane[1].src.stride, - xd->plane[1].dst.buf, - xd->plane[1].dst.stride, &sse_u); - - // U skipping condition checking - if ((var_u * 4 <= thresh_ac) && (sse_u - var_u <= thresh_dc)) { - var_v = cpi->fn_ptr[uv_size].vf(x->plane[2].src.buf, - x->plane[2].src.stride, - xd->plane[2].dst.buf, - xd->plane[2].dst.stride, &sse_v); - - // V skipping condition checking - if ((var_v * 4 <= thresh_ac) && (sse_v - var_v <= thresh_dc)) { - x->skip = 1; - - // The cost of skip bit needs to be added. - *rate = cpi->inter_mode_cost[mbmi->mode_context[ref_frame]] - [INTER_OFFSET(this_mode)]; - - // More on this part of rate - // rate += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1); - - // Scaling factor for SSE from spatial domain to frequency - // domain is 16. Adjust distortion accordingly. - // TODO(yunqingwang): In this function, only y-plane dist is - // calculated. - *dist = (sse << 4); // + ((sse_u + sse_v) << 4); - - // *disable_skip = 1; - } - } - } -} - -struct estimate_block_intra_args { - VP9_COMP *cpi; - MACROBLOCK *x; - PREDICTION_MODE mode; - int rate; - int64_t dist; -}; - -static void estimate_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, - TX_SIZE tx_size, void *arg) { - struct estimate_block_intra_args* const args = arg; - VP9_COMP *const cpi = args->cpi; - MACROBLOCK *const x = args->x; - MACROBLOCKD *const xd = &x->e_mbd; - struct macroblock_plane *const p = &x->plane[0]; - struct macroblockd_plane *const pd = &xd->plane[0]; - const BLOCK_SIZE bsize_tx = txsize_to_bsize[tx_size]; - uint8_t *const src_buf_base = p->src.buf; - uint8_t *const dst_buf_base = pd->dst.buf; - const int src_stride = p->src.stride; - const int dst_stride = pd->dst.stride; - int i, j; - int rate; - int64_t dist; - unsigned int var_y, sse_y; - txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j); - assert(plane == 0); - (void) plane; - - p->src.buf = &src_buf_base[4 * (j * src_stride + i)]; - pd->dst.buf = &dst_buf_base[4 * (j * dst_stride + i)]; - // Use source buffer as an approximation for the fully reconstructed buffer. - vp9_predict_intra_block(xd, block >> (2 * tx_size), - b_width_log2(plane_bsize), - tx_size, args->mode, - p->src.buf, src_stride, - pd->dst.buf, dst_stride, - i, j, 0); - // This procedure assumes zero offset from p->src.buf and pd->dst.buf. - model_rd_for_sb_y(cpi, bsize_tx, x, xd, &rate, &dist, &var_y, &sse_y); - p->src.buf = src_buf_base; - pd->dst.buf = dst_buf_base; - args->rate += rate; - args->dist += dist; -} - -static const THR_MODES mode_idx[MAX_REF_FRAMES - 1][INTER_MODES] = { - {THR_NEARESTMV, THR_NEARMV, THR_ZEROMV, THR_NEWMV}, - {THR_NEARESTG, THR_NEARG, THR_ZEROG, THR_NEWG}, - {THR_NEARESTA, THR_NEARA, THR_ZEROA, THR_NEWA}, -}; - -// TODO(jingning) placeholder for inter-frame non-RD mode decision. -// this needs various further optimizations. to be continued.. -void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, - const TileInfo *const tile, - int mi_row, int mi_col, - int *returnrate, - int64_t *returndistortion, - BLOCK_SIZE bsize, - PICK_MODE_CONTEXT *ctx) { - VP9_COMMON *const cm = &cpi->common; - MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi; - struct macroblockd_plane *const pd = &xd->plane[0]; - PREDICTION_MODE best_mode = ZEROMV; - MV_REFERENCE_FRAME ref_frame, best_ref_frame = LAST_FRAME; - TX_SIZE best_tx_size = MIN(max_txsize_lookup[bsize], - tx_mode_to_biggest_tx_size[cm->tx_mode]); - INTERP_FILTER best_pred_filter = EIGHTTAP; - int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES]; - struct buf_2d yv12_mb[4][MAX_MB_PLANE]; - static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG, - VP9_ALT_FLAG }; - int64_t best_rd = INT64_MAX; - int64_t this_rd = INT64_MAX; - uint8_t skip_txfm = 0; - int rate = INT_MAX; - int64_t dist = INT64_MAX; - // var_y and sse_y are saved to be used in skipping checking - unsigned int var_y = UINT_MAX; - unsigned int sse_y = UINT_MAX; - - const int intra_cost_penalty = - 20 * vp9_dc_quant(cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth); - const int64_t inter_mode_thresh = RDCOST(x->rdmult, x->rddiv, - intra_cost_penalty, 0); - const int intra_mode_cost = 50; - - const int8_t segment_id = mbmi->segment_id; - const int *const rd_threshes = cpi->rd.threshes[segment_id][bsize]; - const int *const rd_thresh_freq_fact = cpi->rd.thresh_freq_fact[bsize]; - INTERP_FILTER filter_ref = cm->interp_filter; - const int bsl = mi_width_log2(bsize); - const int pred_filter_search = cm->interp_filter == SWITCHABLE ? - (((mi_row + mi_col) >> bsl) + - get_chessboard_index(cm->current_video_frame)) & 0x1 : 0; - int const_motion[MAX_REF_FRAMES] = { 0 }; - const int bh = num_4x4_blocks_high_lookup[bsize] << 2; - const int bw = num_4x4_blocks_wide_lookup[bsize] << 2; - // For speed 6, the result of interp filter is reused later in actual encoding - // process. - // tmp[3] points to dst buffer, and the other 3 point to allocated buffers. - PRED_BUFFER tmp[4]; - DECLARE_ALIGNED_ARRAY(16, uint8_t, pred_buf, 3 * 64 * 64); - struct buf_2d orig_dst = pd->dst; - PRED_BUFFER *best_pred = NULL; - PRED_BUFFER *this_mode_pred = NULL; - - if (cpi->sf.reuse_inter_pred_sby) { - int i; - for (i = 0; i < 3; i++) { - tmp[i].data = &pred_buf[bw * bh * i]; - tmp[i].stride = bw; - tmp[i].in_use = 0; - } - tmp[3].data = pd->dst.buf; - tmp[3].stride = pd->dst.stride; - tmp[3].in_use = 0; - } - - x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH; - x->skip = 0; - - // initialize mode decisions - *returnrate = INT_MAX; - *returndistortion = INT64_MAX; - vpx_memset(mbmi, 0, sizeof(MB_MODE_INFO)); - mbmi->sb_type = bsize; - mbmi->ref_frame[0] = NONE; - mbmi->ref_frame[1] = NONE; - mbmi->tx_size = MIN(max_txsize_lookup[bsize], - tx_mode_to_biggest_tx_size[cm->tx_mode]); - mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? - EIGHTTAP : cm->interp_filter; - mbmi->segment_id = segment_id; - - for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { - PREDICTION_MODE this_mode; - x->pred_mv_sad[ref_frame] = INT_MAX; - frame_mv[NEWMV][ref_frame].as_int = INVALID_MV; - frame_mv[ZEROMV][ref_frame].as_int = 0; - - if (xd->up_available) - filter_ref = xd->mi[-xd->mi_stride].src_mi->mbmi.interp_filter; - else if (xd->left_available) - filter_ref = xd->mi[-1].src_mi->mbmi.interp_filter; - - if (cpi->ref_frame_flags & flag_list[ref_frame]) { - const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame); - int_mv *const candidates = mbmi->ref_mvs[ref_frame]; - const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf; - vp9_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, - sf, sf); - - if (!cm->error_resilient_mode) - vp9_find_mv_refs(cm, xd, tile, xd->mi[0].src_mi, ref_frame, - candidates, mi_row, mi_col); - else - const_motion[ref_frame] = mv_refs_rt(cm, xd, tile, xd->mi[0].src_mi, - ref_frame, candidates, - mi_row, mi_col); - - vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, candidates, - &frame_mv[NEARESTMV][ref_frame], - &frame_mv[NEARMV][ref_frame]); - - if (!vp9_is_scaled(sf) && bsize >= BLOCK_8X8) - vp9_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride, - ref_frame, bsize); - } else { - continue; - } - - // Select prediction reference frames. - xd->plane[0].pre[0] = yv12_mb[ref_frame][0]; - - clamp_mv2(&frame_mv[NEARESTMV][ref_frame].as_mv, xd); - clamp_mv2(&frame_mv[NEARMV][ref_frame].as_mv, xd); - - mbmi->ref_frame[0] = ref_frame; - - for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) { - int rate_mv = 0; - int mode_rd_thresh; - - if (const_motion[ref_frame] && - (this_mode == NEARMV || this_mode == ZEROMV)) - continue; - - if (!(cpi->sf.inter_mode_mask[bsize] & (1 << this_mode))) - continue; - - mode_rd_thresh = - rd_threshes[mode_idx[ref_frame - - LAST_FRAME][INTER_OFFSET(this_mode)]]; - if (rd_less_than_thresh(best_rd, mode_rd_thresh, - rd_thresh_freq_fact[this_mode])) - continue; - - if (this_mode == NEWMV) { - if (this_rd < (int64_t)(1 << num_pels_log2_lookup[bsize])) - continue; - if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col, - &frame_mv[NEWMV][ref_frame], - &rate_mv, best_rd)) - continue; - } - - if (this_mode != NEARESTMV && - frame_mv[this_mode][ref_frame].as_int == - frame_mv[NEARESTMV][ref_frame].as_int) - continue; - - mbmi->mode = this_mode; - mbmi->mv[0].as_int = frame_mv[this_mode][ref_frame].as_int; - - // Search for the best prediction filter type, when the resulting - // motion vector is at sub-pixel accuracy level for luma component, i.e., - // the last three bits are all zeros. - if (cpi->sf.reuse_inter_pred_sby) { - if (this_mode == NEARESTMV) { - this_mode_pred = &tmp[3]; - } else { - this_mode_pred = &tmp[get_pred_buffer(tmp, 3)]; - pd->dst.buf = this_mode_pred->data; - pd->dst.stride = bw; - } - } - - if ((this_mode == NEWMV || filter_ref == SWITCHABLE) && - pred_filter_search && - ((mbmi->mv[0].as_mv.row & 0x07) != 0 || - (mbmi->mv[0].as_mv.col & 0x07) != 0)) { - int pf_rate[3]; - int64_t pf_dist[3]; - unsigned int pf_var[3]; - unsigned int pf_sse[3]; - TX_SIZE pf_tx_size[3]; - int64_t best_cost = INT64_MAX; - INTERP_FILTER best_filter = SWITCHABLE, filter; - PRED_BUFFER *current_pred = this_mode_pred; - - for (filter = EIGHTTAP; filter <= EIGHTTAP_SHARP; ++filter) { - int64_t cost; - mbmi->interp_filter = filter; - vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize); - model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rate[filter], - &pf_dist[filter], &pf_var[filter], &pf_sse[filter]); - cost = RDCOST(x->rdmult, x->rddiv, - vp9_get_switchable_rate(cpi) + pf_rate[filter], - pf_dist[filter]); - pf_tx_size[filter] = mbmi->tx_size; - if (cost < best_cost) { - best_filter = filter; - best_cost = cost; - skip_txfm = x->skip_txfm[0]; - - if (cpi->sf.reuse_inter_pred_sby) { - if (this_mode_pred != current_pred) { - free_pred_buffer(this_mode_pred); - this_mode_pred = current_pred; - } - - if (filter < EIGHTTAP_SHARP) { - current_pred = &tmp[get_pred_buffer(tmp, 3)]; - pd->dst.buf = current_pred->data; - pd->dst.stride = bw; - } - } - } - } - - if (cpi->sf.reuse_inter_pred_sby && this_mode_pred != current_pred) - free_pred_buffer(current_pred); - - mbmi->interp_filter = best_filter; - mbmi->tx_size = pf_tx_size[mbmi->interp_filter]; - rate = pf_rate[mbmi->interp_filter]; - dist = pf_dist[mbmi->interp_filter]; - var_y = pf_var[mbmi->interp_filter]; - sse_y = pf_sse[mbmi->interp_filter]; - x->skip_txfm[0] = skip_txfm; - } else { - mbmi->interp_filter = (filter_ref == SWITCHABLE) ? EIGHTTAP: filter_ref; - vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize); - model_rd_for_sb_y(cpi, bsize, x, xd, &rate, &dist, &var_y, &sse_y); - } - - rate += rate_mv; - rate += cpi->inter_mode_cost[mbmi->mode_context[ref_frame]] - [INTER_OFFSET(this_mode)]; - this_rd = RDCOST(x->rdmult, x->rddiv, rate, dist); - - // Skipping checking: test to see if this block can be reconstructed by - // prediction only. - if (cpi->allow_encode_breakout) { - encode_breakout_test(cpi, x, bsize, mi_row, mi_col, ref_frame, - this_mode, var_y, sse_y, yv12_mb, &rate, &dist); - if (x->skip) { - rate += rate_mv; - this_rd = RDCOST(x->rdmult, x->rddiv, rate, dist); - } - } - -#if CONFIG_VP9_TEMPORAL_DENOISING - if (cpi->oxcf.noise_sensitivity > 0) { - vp9_denoiser_update_frame_stats(mbmi, sse_y, this_mode, ctx); - } -#else - (void)ctx; -#endif - - if (this_rd < best_rd || x->skip) { - best_rd = this_rd; - *returnrate = rate; - *returndistortion = dist; - best_mode = this_mode; - best_pred_filter = mbmi->interp_filter; - best_tx_size = mbmi->tx_size; - best_ref_frame = ref_frame; - skip_txfm = x->skip_txfm[0]; - - if (cpi->sf.reuse_inter_pred_sby) { - free_pred_buffer(best_pred); - - best_pred = this_mode_pred; - } - } else { - if (cpi->sf.reuse_inter_pred_sby) - free_pred_buffer(this_mode_pred); - } - - if (x->skip) - break; - } - // If the current reference frame is valid and we found a usable mode, - // we are done. - if (best_rd < INT64_MAX) - break; - } - - // If best prediction is not in dst buf, then copy the prediction block from - // temp buf to dst buf. - if (best_pred != NULL && cpi->sf.reuse_inter_pred_sby && - best_pred->data != orig_dst.buf) { - pd->dst = orig_dst; - vp9_convolve_copy(best_pred->data, bw, pd->dst.buf, pd->dst.stride, NULL, 0, - NULL, 0, bw, bh); - } - - mbmi->mode = best_mode; - mbmi->interp_filter = best_pred_filter; - mbmi->tx_size = best_tx_size; - mbmi->ref_frame[0] = best_ref_frame; - mbmi->mv[0].as_int = frame_mv[best_mode][best_ref_frame].as_int; - xd->mi[0].src_mi->bmi[0].as_mv[0].as_int = mbmi->mv[0].as_int; - x->skip_txfm[0] = skip_txfm; - - // Perform intra prediction search, if the best SAD is above a certain - // threshold. - if (!x->skip && best_rd > inter_mode_thresh && - bsize <= cpi->sf.max_intra_bsize) { - PREDICTION_MODE this_mode; - struct estimate_block_intra_args args = { cpi, x, DC_PRED, 0, 0 }; - const TX_SIZE intra_tx_size = - MIN(max_txsize_lookup[bsize], - tx_mode_to_biggest_tx_size[cpi->common.tx_mode]); - - if (cpi->sf.reuse_inter_pred_sby) { - pd->dst.buf = tmp[0].data; - pd->dst.stride = bw; - } - - for (this_mode = DC_PRED; this_mode <= DC_PRED; ++this_mode) { - const TX_SIZE saved_tx_size = mbmi->tx_size; - args.mode = this_mode; - args.rate = 0; - args.dist = 0; - mbmi->tx_size = intra_tx_size; - vp9_foreach_transformed_block_in_plane(xd, bsize, 0, - estimate_block_intra, &args); - mbmi->tx_size = saved_tx_size; - rate = args.rate; - dist = args.dist; - rate += cpi->mbmode_cost[this_mode]; - rate += intra_cost_penalty; - this_rd = RDCOST(x->rdmult, x->rddiv, rate, dist); - - if (this_rd + intra_mode_cost < best_rd) { - best_rd = this_rd; - *returnrate = rate; - *returndistortion = dist; - mbmi->mode = this_mode; - mbmi->tx_size = intra_tx_size; - mbmi->ref_frame[0] = INTRA_FRAME; - mbmi->uv_mode = this_mode; - mbmi->mv[0].as_int = INVALID_MV; - } else { - x->skip_txfm[0] = skip_txfm; - } - } - if (cpi->sf.reuse_inter_pred_sby) - pd->dst = orig_dst; - } -} diff --git a/media/libvpx/vp9/encoder/vp9_pickmode.h b/media/libvpx/vp9/encoder/vp9_pickmode.h deleted file mode 100644 index 97aeca76a7d..00000000000 --- a/media/libvpx/vp9/encoder/vp9_pickmode.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_ENCODER_VP9_PICKMODE_H_ -#define VP9_ENCODER_VP9_PICKMODE_H_ - -#include "vp9/encoder/vp9_encoder.h" - -#ifdef __cplusplus -extern "C" { -#endif - -void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, - const struct TileInfo *const tile, - int mi_row, int mi_col, - int *returnrate, - int64_t *returndistortion, - BLOCK_SIZE bsize, - PICK_MODE_CONTEXT *ctx); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_ENCODER_VP9_PICKMODE_H_ diff --git a/media/libvpx/vp9/encoder/vp9_rd.c b/media/libvpx/vp9/encoder/vp9_rd.c deleted file mode 100644 index 17369d4c739..00000000000 --- a/media/libvpx/vp9/encoder/vp9_rd.c +++ /dev/null @@ -1,600 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include -#include -#include - -#include "./vp9_rtcd.h" - -#include "vpx_mem/vpx_mem.h" - -#include "vp9/common/vp9_common.h" -#include "vp9/common/vp9_entropy.h" -#include "vp9/common/vp9_entropymode.h" -#include "vp9/common/vp9_mvref_common.h" -#include "vp9/common/vp9_pred_common.h" -#include "vp9/common/vp9_quant_common.h" -#include "vp9/common/vp9_reconinter.h" -#include "vp9/common/vp9_reconintra.h" -#include "vp9/common/vp9_seg_common.h" -#include "vp9/common/vp9_systemdependent.h" - -#include "vp9/encoder/vp9_cost.h" -#include "vp9/encoder/vp9_encodemb.h" -#include "vp9/encoder/vp9_encodemv.h" -#include "vp9/encoder/vp9_encoder.h" -#include "vp9/encoder/vp9_mcomp.h" -#include "vp9/encoder/vp9_quantize.h" -#include "vp9/encoder/vp9_ratectrl.h" -#include "vp9/encoder/vp9_rd.h" -#include "vp9/encoder/vp9_tokenize.h" -#include "vp9/encoder/vp9_variance.h" - -#define RD_THRESH_POW 1.25 -#define RD_MULT_EPB_RATIO 64 - -// Factor to weigh the rate for switchable interp filters. -#define SWITCHABLE_INTERP_RATE_FACTOR 1 - -// The baseline rd thresholds for breaking out of the rd loop for -// certain modes are assumed to be based on 8x8 blocks. -// This table is used to correct for block size. -// The factors here are << 2 (2 = x0.5, 32 = x8 etc). -static const uint8_t rd_thresh_block_size_factor[BLOCK_SIZES] = { - 2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32 -}; - -static void fill_mode_costs(VP9_COMP *cpi) { - const FRAME_CONTEXT *const fc = &cpi->common.fc; - int i, j; - - for (i = 0; i < INTRA_MODES; ++i) - for (j = 0; j < INTRA_MODES; ++j) - vp9_cost_tokens(cpi->y_mode_costs[i][j], vp9_kf_y_mode_prob[i][j], - vp9_intra_mode_tree); - - vp9_cost_tokens(cpi->mbmode_cost, fc->y_mode_prob[1], vp9_intra_mode_tree); - vp9_cost_tokens(cpi->intra_uv_mode_cost[KEY_FRAME], - vp9_kf_uv_mode_prob[TM_PRED], vp9_intra_mode_tree); - vp9_cost_tokens(cpi->intra_uv_mode_cost[INTER_FRAME], - fc->uv_mode_prob[TM_PRED], vp9_intra_mode_tree); - - for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) - vp9_cost_tokens(cpi->switchable_interp_costs[i], - fc->switchable_interp_prob[i], vp9_switchable_interp_tree); -} - -static void fill_token_costs(vp9_coeff_cost *c, - vp9_coeff_probs_model (*p)[PLANE_TYPES]) { - int i, j, k, l; - TX_SIZE t; - for (t = TX_4X4; t <= TX_32X32; ++t) - for (i = 0; i < PLANE_TYPES; ++i) - for (j = 0; j < REF_TYPES; ++j) - for (k = 0; k < COEF_BANDS; ++k) - for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) { - vp9_prob probs[ENTROPY_NODES]; - vp9_model_to_full_probs(p[t][i][j][k][l], probs); - vp9_cost_tokens((int *)c[t][i][j][k][0][l], probs, - vp9_coef_tree); - vp9_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs, - vp9_coef_tree); - assert(c[t][i][j][k][0][l][EOB_TOKEN] == - c[t][i][j][k][1][l][EOB_TOKEN]); - } -} - -// Values are now correlated to quantizer. -static int sad_per_bit16lut_8[QINDEX_RANGE]; -static int sad_per_bit4lut_8[QINDEX_RANGE]; - -#if CONFIG_VP9_HIGHBITDEPTH -static int sad_per_bit16lut_10[QINDEX_RANGE]; -static int sad_per_bit4lut_10[QINDEX_RANGE]; -static int sad_per_bit16lut_12[QINDEX_RANGE]; -static int sad_per_bit4lut_12[QINDEX_RANGE]; -#endif - -static void init_me_luts_bd(int *bit16lut, int *bit4lut, int range, - vpx_bit_depth_t bit_depth) { - int i; - // Initialize the sad lut tables using a formulaic calculation for now. - // This is to make it easier to resolve the impact of experimental changes - // to the quantizer tables. - for (i = 0; i < range; i++) { - const double q = vp9_convert_qindex_to_q(i, bit_depth); - bit16lut[i] = (int)(0.0418 * q + 2.4107); - bit4lut[i] = (int)(0.063 * q + 2.742); - } -} - -void vp9_init_me_luts() { - init_me_luts_bd(sad_per_bit16lut_8, sad_per_bit4lut_8, QINDEX_RANGE, - VPX_BITS_8); -#if CONFIG_VP9_HIGHBITDEPTH - init_me_luts_bd(sad_per_bit16lut_10, sad_per_bit4lut_10, QINDEX_RANGE, - VPX_BITS_10); - init_me_luts_bd(sad_per_bit16lut_12, sad_per_bit4lut_12, QINDEX_RANGE, - VPX_BITS_12); -#endif -} - -static const int rd_boost_factor[16] = { - 64, 32, 32, 32, 24, 16, 12, 12, - 8, 8, 4, 4, 2, 2, 1, 0 -}; -static const int rd_frame_type_factor[FRAME_UPDATE_TYPES] = { - 128, 144, 128, 128, 144 -}; - -int vp9_compute_rd_mult(const VP9_COMP *cpi, int qindex) { - const int q = vp9_dc_quant(qindex, 0, cpi->common.bit_depth); -#if CONFIG_VP9_HIGHBITDEPTH - int rdmult = 0; - switch (cpi->common.bit_depth) { - case VPX_BITS_8: - rdmult = 88 * q * q / 24; - break; - case VPX_BITS_10: - rdmult = ROUND_POWER_OF_TWO(88 * q * q / 24, 4); - break; - case VPX_BITS_12: - rdmult = ROUND_POWER_OF_TWO(88 * q * q / 24, 8); - break; - default: - assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12"); - return -1; - } -#else - int rdmult = 88 * q * q / 24; -#endif - if (cpi->oxcf.pass == 2 && (cpi->common.frame_type != KEY_FRAME)) { - const GF_GROUP *const gf_group = &cpi->twopass.gf_group; - const FRAME_UPDATE_TYPE frame_type = gf_group->update_type[gf_group->index]; - const int boost_index = MIN(15, (cpi->rc.gfu_boost / 100)); - - rdmult = (rdmult * rd_frame_type_factor[frame_type]) >> 7; - rdmult += ((rdmult * rd_boost_factor[boost_index]) >> 7); - } - return rdmult; -} - -static int compute_rd_thresh_factor(int qindex, vpx_bit_depth_t bit_depth) { - double q; -#if CONFIG_VP9_HIGHBITDEPTH - switch (bit_depth) { - case VPX_BITS_8: - q = vp9_dc_quant(qindex, 0, VPX_BITS_8) / 4.0; - break; - case VPX_BITS_10: - q = vp9_dc_quant(qindex, 0, VPX_BITS_10) / 16.0; - break; - case VPX_BITS_12: - q = vp9_dc_quant(qindex, 0, VPX_BITS_12) / 64.0; - break; - default: - assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12"); - return -1; - } -#else - (void) bit_depth; - q = vp9_dc_quant(qindex, 0, VPX_BITS_8) / 4.0; -#endif - // TODO(debargha): Adjust the function below. - return MAX((int)(pow(q, RD_THRESH_POW) * 5.12), 8); -} - -void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) { -#if CONFIG_VP9_HIGHBITDEPTH - switch (cpi->common.bit_depth) { - case VPX_BITS_8: - cpi->mb.sadperbit16 = sad_per_bit16lut_8[qindex]; - cpi->mb.sadperbit4 = sad_per_bit4lut_8[qindex]; - break; - case VPX_BITS_10: - cpi->mb.sadperbit16 = sad_per_bit16lut_10[qindex]; - cpi->mb.sadperbit4 = sad_per_bit4lut_10[qindex]; - break; - case VPX_BITS_12: - cpi->mb.sadperbit16 = sad_per_bit16lut_12[qindex]; - cpi->mb.sadperbit4 = sad_per_bit4lut_12[qindex]; - break; - default: - assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12"); - } -#else - cpi->mb.sadperbit16 = sad_per_bit16lut_8[qindex]; - cpi->mb.sadperbit4 = sad_per_bit4lut_8[qindex]; -#endif -} - -static void set_block_thresholds(const VP9_COMMON *cm, RD_OPT *rd) { - int i, bsize, segment_id; - - for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) { - const int qindex = - clamp(vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex) + - cm->y_dc_delta_q, 0, MAXQ); - const int q = compute_rd_thresh_factor(qindex, cm->bit_depth); - - for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) { - // Threshold here seems unnecessarily harsh but fine given actual - // range of values used for cpi->sf.thresh_mult[]. - const int t = q * rd_thresh_block_size_factor[bsize]; - const int thresh_max = INT_MAX / t; - - if (bsize >= BLOCK_8X8) { - for (i = 0; i < MAX_MODES; ++i) - rd->threshes[segment_id][bsize][i] = - rd->thresh_mult[i] < thresh_max - ? rd->thresh_mult[i] * t / 4 - : INT_MAX; - } else { - for (i = 0; i < MAX_REFS; ++i) - rd->threshes[segment_id][bsize][i] = - rd->thresh_mult_sub8x8[i] < thresh_max - ? rd->thresh_mult_sub8x8[i] * t / 4 - : INT_MAX; - } - } - } -} - -void vp9_initialize_rd_consts(VP9_COMP *cpi) { - VP9_COMMON *const cm = &cpi->common; - MACROBLOCK *const x = &cpi->mb; - RD_OPT *const rd = &cpi->rd; - int i; - - vp9_clear_system_state(); - - rd->RDDIV = RDDIV_BITS; // In bits (to multiply D by 128). - rd->RDMULT = vp9_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q); - - x->errorperbit = rd->RDMULT / RD_MULT_EPB_RATIO; - x->errorperbit += (x->errorperbit == 0); - - x->select_tx_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL && - cm->frame_type != KEY_FRAME) ? 0 : 1; - - set_block_thresholds(cm, rd); - - if (!cpi->sf.use_nonrd_pick_mode || cm->frame_type == KEY_FRAME) { - fill_token_costs(x->token_costs, cm->fc.coef_probs); - - for (i = 0; i < PARTITION_CONTEXTS; ++i) - vp9_cost_tokens(cpi->partition_cost[i], get_partition_probs(cm, i), - vp9_partition_tree); - } - - if (!cpi->sf.use_nonrd_pick_mode || (cm->current_video_frame & 0x07) == 1 || - cm->frame_type == KEY_FRAME) { - fill_mode_costs(cpi); - - if (!frame_is_intra_only(cm)) { - vp9_build_nmv_cost_table(x->nmvjointcost, - cm->allow_high_precision_mv ? x->nmvcost_hp - : x->nmvcost, - &cm->fc.nmvc, cm->allow_high_precision_mv); - - for (i = 0; i < INTER_MODE_CONTEXTS; ++i) - vp9_cost_tokens((int *)cpi->inter_mode_cost[i], - cm->fc.inter_mode_probs[i], vp9_inter_mode_tree); - } - } -} - -static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) { - // NOTE: The tables below must be of the same size. - - // The functions described below are sampled at the four most significant - // bits of x^2 + 8 / 256. - - // Normalized rate: - // This table models the rate for a Laplacian source with given variance - // when quantized with a uniform quantizer with given stepsize. The - // closed form expression is: - // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)], - // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance), - // and H(x) is the binary entropy function. - static const int rate_tab_q10[] = { - 65536, 6086, 5574, 5275, 5063, 4899, 4764, 4651, - 4553, 4389, 4255, 4142, 4044, 3958, 3881, 3811, - 3748, 3635, 3538, 3453, 3376, 3307, 3244, 3186, - 3133, 3037, 2952, 2877, 2809, 2747, 2690, 2638, - 2589, 2501, 2423, 2353, 2290, 2232, 2179, 2130, - 2084, 2001, 1928, 1862, 1802, 1748, 1698, 1651, - 1608, 1530, 1460, 1398, 1342, 1290, 1243, 1199, - 1159, 1086, 1021, 963, 911, 864, 821, 781, - 745, 680, 623, 574, 530, 490, 455, 424, - 395, 345, 304, 269, 239, 213, 190, 171, - 154, 126, 104, 87, 73, 61, 52, 44, - 38, 28, 21, 16, 12, 10, 8, 6, - 5, 3, 2, 1, 1, 1, 0, 0, - }; - // Normalized distortion: - // This table models the normalized distortion for a Laplacian source - // with given variance when quantized with a uniform quantizer - // with given stepsize. The closed form expression is: - // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2)) - // where x = qpstep / sqrt(variance). - // Note the actual distortion is Dn * variance. - static const int dist_tab_q10[] = { - 0, 0, 1, 1, 1, 2, 2, 2, - 3, 3, 4, 5, 5, 6, 7, 7, - 8, 9, 11, 12, 13, 15, 16, 17, - 18, 21, 24, 26, 29, 31, 34, 36, - 39, 44, 49, 54, 59, 64, 69, 73, - 78, 88, 97, 106, 115, 124, 133, 142, - 151, 167, 184, 200, 215, 231, 245, 260, - 274, 301, 327, 351, 375, 397, 418, 439, - 458, 495, 528, 559, 587, 613, 637, 659, - 680, 717, 749, 777, 801, 823, 842, 859, - 874, 899, 919, 936, 949, 960, 969, 977, - 983, 994, 1001, 1006, 1010, 1013, 1015, 1017, - 1018, 1020, 1022, 1022, 1023, 1023, 1023, 1024, - }; - static const int xsq_iq_q10[] = { - 0, 4, 8, 12, 16, 20, 24, 28, - 32, 40, 48, 56, 64, 72, 80, 88, - 96, 112, 128, 144, 160, 176, 192, 208, - 224, 256, 288, 320, 352, 384, 416, 448, - 480, 544, 608, 672, 736, 800, 864, 928, - 992, 1120, 1248, 1376, 1504, 1632, 1760, 1888, - 2016, 2272, 2528, 2784, 3040, 3296, 3552, 3808, - 4064, 4576, 5088, 5600, 6112, 6624, 7136, 7648, - 8160, 9184, 10208, 11232, 12256, 13280, 14304, 15328, - 16352, 18400, 20448, 22496, 24544, 26592, 28640, 30688, - 32736, 36832, 40928, 45024, 49120, 53216, 57312, 61408, - 65504, 73696, 81888, 90080, 98272, 106464, 114656, 122848, - 131040, 147424, 163808, 180192, 196576, 212960, 229344, 245728, - }; - const int tmp = (xsq_q10 >> 2) + 8; - const int k = get_msb(tmp) - 3; - const int xq = (k << 3) + ((tmp >> k) & 0x7); - const int one_q10 = 1 << 10; - const int a_q10 = ((xsq_q10 - xsq_iq_q10[xq]) << 10) >> (2 + k); - const int b_q10 = one_q10 - a_q10; - *r_q10 = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10; - *d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10; -} - -void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n, - unsigned int qstep, int *rate, - int64_t *dist) { - // This function models the rate and distortion for a Laplacian - // source with given variance when quantized with a uniform quantizer - // with given stepsize. The closed form expressions are in: - // Hang and Chen, "Source Model for transform video coder and its - // application - Part I: Fundamental Theory", IEEE Trans. Circ. - // Sys. for Video Tech., April 1997. - if (var == 0) { - *rate = 0; - *dist = 0; - } else { - int d_q10, r_q10; - static const uint32_t MAX_XSQ_Q10 = 245727; - const uint64_t xsq_q10_64 = - ((((uint64_t)qstep * qstep * n) << 10) + (var >> 1)) / var; - const int xsq_q10 = (int)MIN(xsq_q10_64, MAX_XSQ_Q10); - model_rd_norm(xsq_q10, &r_q10, &d_q10); - *rate = (n * r_q10 + 2) >> 2; - *dist = (var * (int64_t)d_q10 + 512) >> 10; - } -} - -void vp9_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size, - const struct macroblockd_plane *pd, - ENTROPY_CONTEXT t_above[16], - ENTROPY_CONTEXT t_left[16]) { - const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd); - const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize]; - const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; - const ENTROPY_CONTEXT *const above = pd->above_context; - const ENTROPY_CONTEXT *const left = pd->left_context; - - int i; - switch (tx_size) { - case TX_4X4: - vpx_memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w); - vpx_memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h); - break; - case TX_8X8: - for (i = 0; i < num_4x4_w; i += 2) - t_above[i] = !!*(const uint16_t *)&above[i]; - for (i = 0; i < num_4x4_h; i += 2) - t_left[i] = !!*(const uint16_t *)&left[i]; - break; - case TX_16X16: - for (i = 0; i < num_4x4_w; i += 4) - t_above[i] = !!*(const uint32_t *)&above[i]; - for (i = 0; i < num_4x4_h; i += 4) - t_left[i] = !!*(const uint32_t *)&left[i]; - break; - case TX_32X32: - for (i = 0; i < num_4x4_w; i += 8) - t_above[i] = !!*(const uint64_t *)&above[i]; - for (i = 0; i < num_4x4_h; i += 8) - t_left[i] = !!*(const uint64_t *)&left[i]; - break; - default: - assert(0 && "Invalid transform size."); - break; - } -} - -void vp9_mv_pred(VP9_COMP *cpi, MACROBLOCK *x, - uint8_t *ref_y_buffer, int ref_y_stride, - int ref_frame, BLOCK_SIZE block_size) { - MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi; - int i; - int zero_seen = 0; - int best_index = 0; - int best_sad = INT_MAX; - int this_sad = INT_MAX; - int max_mv = 0; - uint8_t *src_y_ptr = x->plane[0].src.buf; - uint8_t *ref_y_ptr; - const int num_mv_refs = MAX_MV_REF_CANDIDATES + - (cpi->sf.adaptive_motion_search && - block_size < cpi->sf.max_partition_size); - - MV pred_mv[3]; - pred_mv[0] = mbmi->ref_mvs[ref_frame][0].as_mv; - pred_mv[1] = mbmi->ref_mvs[ref_frame][1].as_mv; - pred_mv[2] = x->pred_mv[ref_frame]; - - // Get the sad for each candidate reference mv. - for (i = 0; i < num_mv_refs; ++i) { - const MV *this_mv = &pred_mv[i]; - - max_mv = MAX(max_mv, MAX(abs(this_mv->row), abs(this_mv->col)) >> 3); - if (is_zero_mv(this_mv) && zero_seen) - continue; - - zero_seen |= is_zero_mv(this_mv); - - ref_y_ptr = - &ref_y_buffer[ref_y_stride * (this_mv->row >> 3) + (this_mv->col >> 3)]; - - // Find sad for current vector. - this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride, - ref_y_ptr, ref_y_stride); - - // Note if it is the best so far. - if (this_sad < best_sad) { - best_sad = this_sad; - best_index = i; - } - } - - // Note the index of the mv that worked best in the reference list. - x->mv_best_ref_index[ref_frame] = best_index; - x->max_mv_context[ref_frame] = max_mv; - x->pred_mv_sad[ref_frame] = best_sad; -} - -void vp9_setup_pred_block(const MACROBLOCKD *xd, - struct buf_2d dst[MAX_MB_PLANE], - const YV12_BUFFER_CONFIG *src, - int mi_row, int mi_col, - const struct scale_factors *scale, - const struct scale_factors *scale_uv) { - int i; - - dst[0].buf = src->y_buffer; - dst[0].stride = src->y_stride; - dst[1].buf = src->u_buffer; - dst[2].buf = src->v_buffer; - dst[1].stride = dst[2].stride = src->uv_stride; - - for (i = 0; i < MAX_MB_PLANE; ++i) { - setup_pred_plane(dst + i, dst[i].buf, dst[i].stride, mi_row, mi_col, - i ? scale_uv : scale, - xd->plane[i].subsampling_x, xd->plane[i].subsampling_y); - } -} - -const YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const VP9_COMP *cpi, - int ref_frame) { - const VP9_COMMON *const cm = &cpi->common; - const int ref_idx = cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)]; - const int scaled_idx = cpi->scaled_ref_idx[ref_frame - 1]; - return (scaled_idx != ref_idx) ? &cm->frame_bufs[scaled_idx].buf : NULL; -} - -int vp9_get_switchable_rate(const VP9_COMP *cpi) { - const MACROBLOCKD *const xd = &cpi->mb.e_mbd; - const MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi; - const int ctx = vp9_get_pred_context_switchable_interp(xd); - return SWITCHABLE_INTERP_RATE_FACTOR * - cpi->switchable_interp_costs[ctx][mbmi->interp_filter]; -} - -void vp9_set_rd_speed_thresholds(VP9_COMP *cpi) { - int i; - RD_OPT *const rd = &cpi->rd; - SPEED_FEATURES *const sf = &cpi->sf; - - // Set baseline threshold values. - for (i = 0; i < MAX_MODES; ++i) - rd->thresh_mult[i] = cpi->oxcf.mode == BEST ? -500 : 0; - - if (sf->adaptive_rd_thresh) { - rd->thresh_mult[THR_NEARESTMV] = 300; - rd->thresh_mult[THR_NEARESTG] = 300; - rd->thresh_mult[THR_NEARESTA] = 300; - } else { - rd->thresh_mult[THR_NEARESTMV] = 0; - rd->thresh_mult[THR_NEARESTG] = 0; - rd->thresh_mult[THR_NEARESTA] = 0; - } - - rd->thresh_mult[THR_DC] += 1000; - - rd->thresh_mult[THR_NEWMV] += 1000; - rd->thresh_mult[THR_NEWA] += 1000; - rd->thresh_mult[THR_NEWG] += 1000; - - // Adjust threshold only in real time mode, which only uses last - // reference frame. - rd->thresh_mult[THR_NEWMV] += sf->elevate_newmv_thresh; - - rd->thresh_mult[THR_NEARMV] += 1000; - rd->thresh_mult[THR_NEARA] += 1000; - rd->thresh_mult[THR_COMP_NEARESTLA] += 1000; - rd->thresh_mult[THR_COMP_NEARESTGA] += 1000; - - rd->thresh_mult[THR_TM] += 1000; - - rd->thresh_mult[THR_COMP_NEARLA] += 1500; - rd->thresh_mult[THR_COMP_NEWLA] += 2000; - rd->thresh_mult[THR_NEARG] += 1000; - rd->thresh_mult[THR_COMP_NEARGA] += 1500; - rd->thresh_mult[THR_COMP_NEWGA] += 2000; - - rd->thresh_mult[THR_ZEROMV] += 2000; - rd->thresh_mult[THR_ZEROG] += 2000; - rd->thresh_mult[THR_ZEROA] += 2000; - rd->thresh_mult[THR_COMP_ZEROLA] += 2500; - rd->thresh_mult[THR_COMP_ZEROGA] += 2500; - - rd->thresh_mult[THR_H_PRED] += 2000; - rd->thresh_mult[THR_V_PRED] += 2000; - rd->thresh_mult[THR_D45_PRED ] += 2500; - rd->thresh_mult[THR_D135_PRED] += 2500; - rd->thresh_mult[THR_D117_PRED] += 2500; - rd->thresh_mult[THR_D153_PRED] += 2500; - rd->thresh_mult[THR_D207_PRED] += 2500; - rd->thresh_mult[THR_D63_PRED] += 2500; -} - -void vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi) { - const SPEED_FEATURES *const sf = &cpi->sf; - RD_OPT *const rd = &cpi->rd; - int i; - - for (i = 0; i < MAX_REFS; ++i) - rd->thresh_mult_sub8x8[i] = cpi->oxcf.mode == BEST ? -500 : 0; - - rd->thresh_mult_sub8x8[THR_LAST] += 2500; - rd->thresh_mult_sub8x8[THR_GOLD] += 2500; - rd->thresh_mult_sub8x8[THR_ALTR] += 2500; - rd->thresh_mult_sub8x8[THR_INTRA] += 2500; - rd->thresh_mult_sub8x8[THR_COMP_LA] += 4500; - rd->thresh_mult_sub8x8[THR_COMP_GA] += 4500; - - // Check for masked out split cases. - for (i = 0; i < MAX_REFS; ++i) - if (sf->disable_split_mask & (1 << i)) - rd->thresh_mult_sub8x8[i] = INT_MAX; -} diff --git a/media/libvpx/vp9/encoder/vp9_rd.h b/media/libvpx/vp9/encoder/vp9_rd.h deleted file mode 100644 index 5dcb2f8d75a..00000000000 --- a/media/libvpx/vp9/encoder/vp9_rd.h +++ /dev/null @@ -1,169 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_ENCODER_VP9_RD_H_ -#define VP9_ENCODER_VP9_RD_H_ - -#include - -#include "vp9/common/vp9_blockd.h" - -#include "vp9/encoder/vp9_block.h" -#include "vp9/encoder/vp9_context_tree.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define RDDIV_BITS 7 - -#define RDCOST(RM, DM, R, D) \ - (((128 + ((int64_t)R) * (RM)) >> 8) + (D << DM)) -#define QIDX_SKIP_THRESH 115 - -#define MV_COST_WEIGHT 108 -#define MV_COST_WEIGHT_SUB 120 - -#define INVALID_MV 0x80008000 - -#define MAX_MODES 30 -#define MAX_REFS 6 - -// This enumerator type needs to be kept aligned with the mode order in -// const MODE_DEFINITION vp9_mode_order[MAX_MODES] used in the rd code. -typedef enum { - THR_NEARESTMV, - THR_NEARESTA, - THR_NEARESTG, - - THR_DC, - - THR_NEWMV, - THR_NEWA, - THR_NEWG, - - THR_NEARMV, - THR_NEARA, - THR_NEARG, - - THR_ZEROMV, - THR_ZEROG, - THR_ZEROA, - - THR_COMP_NEARESTLA, - THR_COMP_NEARESTGA, - - THR_TM, - - THR_COMP_NEARLA, - THR_COMP_NEWLA, - THR_COMP_NEARGA, - THR_COMP_NEWGA, - - THR_COMP_ZEROLA, - THR_COMP_ZEROGA, - - THR_H_PRED, - THR_V_PRED, - THR_D135_PRED, - THR_D207_PRED, - THR_D153_PRED, - THR_D63_PRED, - THR_D117_PRED, - THR_D45_PRED, -} THR_MODES; - -typedef enum { - THR_LAST, - THR_GOLD, - THR_ALTR, - THR_COMP_LA, - THR_COMP_GA, - THR_INTRA, -} THR_MODES_SUB8X8; - -typedef struct RD_OPT { - // Thresh_mult is used to set a threshold for the rd score. A higher value - // means that we will accept the best mode so far more often. This number - // is used in combination with the current block size, and thresh_freq_fact - // to pick a threshold. - int thresh_mult[MAX_MODES]; - int thresh_mult_sub8x8[MAX_REFS]; - - int threshes[MAX_SEGMENTS][BLOCK_SIZES][MAX_MODES]; - int thresh_freq_fact[BLOCK_SIZES][MAX_MODES]; - - int mode_map[BLOCK_SIZES][MAX_MODES]; - - int64_t comp_pred_diff[REFERENCE_MODES]; - int64_t prediction_type_threshes[MAX_REF_FRAMES][REFERENCE_MODES]; - int64_t tx_select_diff[TX_MODES]; - // TODO(agrange): can this overflow? - int tx_select_threshes[MAX_REF_FRAMES][TX_MODES]; - - int64_t filter_diff[SWITCHABLE_FILTER_CONTEXTS]; - int64_t filter_threshes[MAX_REF_FRAMES][SWITCHABLE_FILTER_CONTEXTS]; - int64_t filter_cache[SWITCHABLE_FILTER_CONTEXTS]; - int64_t mask_filter; - - int RDMULT; - int RDDIV; -} RD_OPT; - -struct TileInfo; -struct VP9_COMP; -struct macroblock; - -int vp9_compute_rd_mult(const struct VP9_COMP *cpi, int qindex); - -void vp9_initialize_rd_consts(struct VP9_COMP *cpi); - -void vp9_initialize_me_consts(struct VP9_COMP *cpi, int qindex); - -void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n, - unsigned int qstep, int *rate, - int64_t *dist); - -int vp9_get_switchable_rate(const struct VP9_COMP *cpi); - -const YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const struct VP9_COMP *cpi, - int ref_frame); - -void vp9_init_me_luts(); - -void vp9_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size, - const struct macroblockd_plane *pd, - ENTROPY_CONTEXT t_above[16], - ENTROPY_CONTEXT t_left[16]); - -void vp9_set_rd_speed_thresholds(struct VP9_COMP *cpi); - -void vp9_set_rd_speed_thresholds_sub8x8(struct VP9_COMP *cpi); - -static INLINE int rd_less_than_thresh(int64_t best_rd, int thresh, - int thresh_fact) { - return best_rd < ((int64_t)thresh * thresh_fact >> 5) || thresh == INT_MAX; -} - -void vp9_mv_pred(struct VP9_COMP *cpi, MACROBLOCK *x, - uint8_t *ref_y_buffer, int ref_y_stride, - int ref_frame, BLOCK_SIZE block_size); - -void vp9_setup_pred_block(const MACROBLOCKD *xd, - struct buf_2d dst[MAX_MB_PLANE], - const YV12_BUFFER_CONFIG *src, - int mi_row, int mi_col, - const struct scale_factors *scale, - const struct scale_factors *scale_uv); -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_ENCODER_VP9_RD_H_ diff --git a/media/libvpx/vp9/encoder/vp9_resize.c b/media/libvpx/vp9/encoder/vp9_resize.c deleted file mode 100644 index 4a8a5215628..00000000000 --- a/media/libvpx/vp9/encoder/vp9_resize.c +++ /dev/null @@ -1,920 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include -#include -#include -#include -#include -#include - -#include "vp9/common/vp9_common.h" -#include "vp9/encoder/vp9_resize.h" - -#define FILTER_BITS 7 - -#define INTERP_TAPS 8 -#define SUBPEL_BITS 5 -#define SUBPEL_MASK ((1 << SUBPEL_BITS) - 1) -#define INTERP_PRECISION_BITS 32 - -typedef int16_t interp_kernel[INTERP_TAPS]; - -// Filters for interpolation (0.5-band) - note this also filters integer pels. -const interp_kernel vp9_filteredinterp_filters500[(1 << SUBPEL_BITS)] = { - {-3, 0, 35, 64, 35, 0, -3, 0}, - {-3, -1, 34, 64, 36, 1, -3, 0}, - {-3, -1, 32, 64, 38, 1, -3, 0}, - {-2, -2, 31, 63, 39, 2, -3, 0}, - {-2, -2, 29, 63, 41, 2, -3, 0}, - {-2, -2, 28, 63, 42, 3, -4, 0}, - {-2, -3, 27, 63, 43, 4, -4, 0}, - {-2, -3, 25, 62, 45, 5, -4, 0}, - {-2, -3, 24, 62, 46, 5, -4, 0}, - {-2, -3, 23, 61, 47, 6, -4, 0}, - {-2, -3, 21, 60, 49, 7, -4, 0}, - {-1, -4, 20, 60, 50, 8, -4, -1}, - {-1, -4, 19, 59, 51, 9, -4, -1}, - {-1, -4, 17, 58, 52, 10, -4, 0}, - {-1, -4, 16, 57, 53, 12, -4, -1}, - {-1, -4, 15, 56, 54, 13, -4, -1}, - {-1, -4, 14, 55, 55, 14, -4, -1}, - {-1, -4, 13, 54, 56, 15, -4, -1}, - {-1, -4, 12, 53, 57, 16, -4, -1}, - {0, -4, 10, 52, 58, 17, -4, -1}, - {-1, -4, 9, 51, 59, 19, -4, -1}, - {-1, -4, 8, 50, 60, 20, -4, -1}, - {0, -4, 7, 49, 60, 21, -3, -2}, - {0, -4, 6, 47, 61, 23, -3, -2}, - {0, -4, 5, 46, 62, 24, -3, -2}, - {0, -4, 5, 45, 62, 25, -3, -2}, - {0, -4, 4, 43, 63, 27, -3, -2}, - {0, -4, 3, 42, 63, 28, -2, -2}, - {0, -3, 2, 41, 63, 29, -2, -2}, - {0, -3, 2, 39, 63, 31, -2, -2}, - {0, -3, 1, 38, 64, 32, -1, -3}, - {0, -3, 1, 36, 64, 34, -1, -3} -}; - -// Filters for interpolation (0.625-band) - note this also filters integer pels. -const interp_kernel vp9_filteredinterp_filters625[(1 << SUBPEL_BITS)] = { - {-1, -8, 33, 80, 33, -8, -1, 0}, - {-1, -8, 30, 80, 35, -8, -1, 1}, - {-1, -8, 28, 80, 37, -7, -2, 1}, - {0, -8, 26, 79, 39, -7, -2, 1}, - {0, -8, 24, 79, 41, -7, -2, 1}, - {0, -8, 22, 78, 43, -6, -2, 1}, - {0, -8, 20, 78, 45, -5, -3, 1}, - {0, -8, 18, 77, 48, -5, -3, 1}, - {0, -8, 16, 76, 50, -4, -3, 1}, - {0, -8, 15, 75, 52, -3, -4, 1}, - {0, -7, 13, 74, 54, -3, -4, 1}, - {0, -7, 11, 73, 56, -2, -4, 1}, - {0, -7, 10, 71, 58, -1, -4, 1}, - {1, -7, 8, 70, 60, 0, -5, 1}, - {1, -6, 6, 68, 62, 1, -5, 1}, - {1, -6, 5, 67, 63, 2, -5, 1}, - {1, -6, 4, 65, 65, 4, -6, 1}, - {1, -5, 2, 63, 67, 5, -6, 1}, - {1, -5, 1, 62, 68, 6, -6, 1}, - {1, -5, 0, 60, 70, 8, -7, 1}, - {1, -4, -1, 58, 71, 10, -7, 0}, - {1, -4, -2, 56, 73, 11, -7, 0}, - {1, -4, -3, 54, 74, 13, -7, 0}, - {1, -4, -3, 52, 75, 15, -8, 0}, - {1, -3, -4, 50, 76, 16, -8, 0}, - {1, -3, -5, 48, 77, 18, -8, 0}, - {1, -3, -5, 45, 78, 20, -8, 0}, - {1, -2, -6, 43, 78, 22, -8, 0}, - {1, -2, -7, 41, 79, 24, -8, 0}, - {1, -2, -7, 39, 79, 26, -8, 0}, - {1, -2, -7, 37, 80, 28, -8, -1}, - {1, -1, -8, 35, 80, 30, -8, -1}, -}; - -// Filters for interpolation (0.75-band) - note this also filters integer pels. -const interp_kernel vp9_filteredinterp_filters750[(1 << SUBPEL_BITS)] = { - {2, -11, 25, 96, 25, -11, 2, 0}, - {2, -11, 22, 96, 28, -11, 2, 0}, - {2, -10, 19, 95, 31, -11, 2, 0}, - {2, -10, 17, 95, 34, -12, 2, 0}, - {2, -9, 14, 94, 37, -12, 2, 0}, - {2, -8, 12, 93, 40, -12, 1, 0}, - {2, -8, 9, 92, 43, -12, 1, 1}, - {2, -7, 7, 91, 46, -12, 1, 0}, - {2, -7, 5, 90, 49, -12, 1, 0}, - {2, -6, 3, 88, 52, -12, 0, 1}, - {2, -5, 1, 86, 55, -12, 0, 1}, - {2, -5, -1, 84, 58, -11, 0, 1}, - {2, -4, -2, 82, 61, -11, -1, 1}, - {2, -4, -4, 80, 64, -10, -1, 1}, - {1, -3, -5, 77, 67, -9, -1, 1}, - {1, -3, -6, 75, 70, -8, -2, 1}, - {1, -2, -7, 72, 72, -7, -2, 1}, - {1, -2, -8, 70, 75, -6, -3, 1}, - {1, -1, -9, 67, 77, -5, -3, 1}, - {1, -1, -10, 64, 80, -4, -4, 2}, - {1, -1, -11, 61, 82, -2, -4, 2}, - {1, 0, -11, 58, 84, -1, -5, 2}, - {1, 0, -12, 55, 86, 1, -5, 2}, - {1, 0, -12, 52, 88, 3, -6, 2}, - {0, 1, -12, 49, 90, 5, -7, 2}, - {0, 1, -12, 46, 91, 7, -7, 2}, - {1, 1, -12, 43, 92, 9, -8, 2}, - {0, 1, -12, 40, 93, 12, -8, 2}, - {0, 2, -12, 37, 94, 14, -9, 2}, - {0, 2, -12, 34, 95, 17, -10, 2}, - {0, 2, -11, 31, 95, 19, -10, 2}, - {0, 2, -11, 28, 96, 22, -11, 2} -}; - -// Filters for interpolation (0.875-band) - note this also filters integer pels. -const interp_kernel vp9_filteredinterp_filters875[(1 << SUBPEL_BITS)] = { - {3, -8, 13, 112, 13, -8, 3, 0}, - {3, -7, 10, 112, 17, -9, 3, -1}, - {2, -6, 7, 111, 21, -9, 3, -1}, - {2, -5, 4, 111, 24, -10, 3, -1}, - {2, -4, 1, 110, 28, -11, 3, -1}, - {1, -3, -1, 108, 32, -12, 4, -1}, - {1, -2, -3, 106, 36, -13, 4, -1}, - {1, -1, -6, 105, 40, -14, 4, -1}, - {1, -1, -7, 102, 44, -14, 4, -1}, - {1, 0, -9, 100, 48, -15, 4, -1}, - {1, 1, -11, 97, 53, -16, 4, -1}, - {0, 1, -12, 95, 57, -16, 4, -1}, - {0, 2, -13, 91, 61, -16, 4, -1}, - {0, 2, -14, 88, 65, -16, 4, -1}, - {0, 3, -15, 84, 69, -17, 4, 0}, - {0, 3, -16, 81, 73, -16, 3, 0}, - {0, 3, -16, 77, 77, -16, 3, 0}, - {0, 3, -16, 73, 81, -16, 3, 0}, - {0, 4, -17, 69, 84, -15, 3, 0}, - {-1, 4, -16, 65, 88, -14, 2, 0}, - {-1, 4, -16, 61, 91, -13, 2, 0}, - {-1, 4, -16, 57, 95, -12, 1, 0}, - {-1, 4, -16, 53, 97, -11, 1, 1}, - {-1, 4, -15, 48, 100, -9, 0, 1}, - {-1, 4, -14, 44, 102, -7, -1, 1}, - {-1, 4, -14, 40, 105, -6, -1, 1}, - {-1, 4, -13, 36, 106, -3, -2, 1}, - {-1, 4, -12, 32, 108, -1, -3, 1}, - {-1, 3, -11, 28, 110, 1, -4, 2}, - {-1, 3, -10, 24, 111, 4, -5, 2}, - {-1, 3, -9, 21, 111, 7, -6, 2}, - {-1, 3, -9, 17, 112, 10, -7, 3} -}; - -// Filters for interpolation (full-band) - no filtering for integer pixels -const interp_kernel vp9_filteredinterp_filters1000[(1 << SUBPEL_BITS)] = { - {0, 0, 0, 128, 0, 0, 0, 0}, - {0, 1, -3, 128, 3, -1, 0, 0}, - {-1, 2, -6, 127, 7, -2, 1, 0}, - {-1, 3, -9, 126, 12, -4, 1, 0}, - {-1, 4, -12, 125, 16, -5, 1, 0}, - {-1, 4, -14, 123, 20, -6, 2, 0}, - {-1, 5, -15, 120, 25, -8, 2, 0}, - {-1, 5, -17, 118, 30, -9, 3, -1}, - {-1, 6, -18, 114, 35, -10, 3, -1}, - {-1, 6, -19, 111, 41, -12, 3, -1}, - {-1, 6, -20, 107, 46, -13, 4, -1}, - {-1, 6, -21, 103, 52, -14, 4, -1}, - {-1, 6, -21, 99, 57, -16, 5, -1}, - {-1, 6, -21, 94, 63, -17, 5, -1}, - {-1, 6, -20, 89, 68, -18, 5, -1}, - {-1, 6, -20, 84, 73, -19, 6, -1}, - {-1, 6, -20, 79, 79, -20, 6, -1}, - {-1, 6, -19, 73, 84, -20, 6, -1}, - {-1, 5, -18, 68, 89, -20, 6, -1}, - {-1, 5, -17, 63, 94, -21, 6, -1}, - {-1, 5, -16, 57, 99, -21, 6, -1}, - {-1, 4, -14, 52, 103, -21, 6, -1}, - {-1, 4, -13, 46, 107, -20, 6, -1}, - {-1, 3, -12, 41, 111, -19, 6, -1}, - {-1, 3, -10, 35, 114, -18, 6, -1}, - {-1, 3, -9, 30, 118, -17, 5, -1}, - {0, 2, -8, 25, 120, -15, 5, -1}, - {0, 2, -6, 20, 123, -14, 4, -1}, - {0, 1, -5, 16, 125, -12, 4, -1}, - {0, 1, -4, 12, 126, -9, 3, -1}, - {0, 1, -2, 7, 127, -6, 2, -1}, - {0, 0, -1, 3, 128, -3, 1, 0} -}; - -// Filters for factor of 2 downsampling. -static const int16_t vp9_down2_symeven_half_filter[] = {56, 12, -3, -1}; -static const int16_t vp9_down2_symodd_half_filter[] = {64, 35, 0, -3}; - -static const interp_kernel *choose_interp_filter(int inlength, int outlength) { - int outlength16 = outlength * 16; - if (outlength16 >= inlength * 16) - return vp9_filteredinterp_filters1000; - else if (outlength16 >= inlength * 13) - return vp9_filteredinterp_filters875; - else if (outlength16 >= inlength * 11) - return vp9_filteredinterp_filters750; - else if (outlength16 >= inlength * 9) - return vp9_filteredinterp_filters625; - else - return vp9_filteredinterp_filters500; -} - -static void interpolate(const uint8_t *const input, int inlength, - uint8_t *output, int outlength) { - const int64_t delta = (((uint64_t)inlength << 32) + outlength / 2) / - outlength; - const int64_t offset = inlength > outlength ? - (((int64_t)(inlength - outlength) << 31) + outlength / 2) / outlength : - -(((int64_t)(outlength - inlength) << 31) + outlength / 2) / outlength; - uint8_t *optr = output; - int x, x1, x2, sum, k, int_pel, sub_pel; - int64_t y; - - const interp_kernel *interp_filters = - choose_interp_filter(inlength, outlength); - - x = 0; - y = offset; - while ((y >> INTERP_PRECISION_BITS) < (INTERP_TAPS / 2 - 1)) { - x++; - y += delta; - } - x1 = x; - x = outlength - 1; - y = delta * x + offset; - while ((y >> INTERP_PRECISION_BITS) + - (int64_t)(INTERP_TAPS / 2) >= inlength) { - x--; - y -= delta; - } - x2 = x; - if (x1 > x2) { - for (x = 0, y = offset; x < outlength; ++x, y += delta) { - const int16_t *filter; - int_pel = y >> INTERP_PRECISION_BITS; - sub_pel = (y >> (INTERP_PRECISION_BITS - SUBPEL_BITS)) & SUBPEL_MASK; - filter = interp_filters[sub_pel]; - sum = 0; - for (k = 0; k < INTERP_TAPS; ++k) { - const int pk = int_pel - INTERP_TAPS / 2 + 1 + k; - sum += filter[k] * input[(pk < 0 ? 0 : - (pk >= inlength ? inlength - 1 : pk))]; - } - *optr++ = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); - } - } else { - // Initial part. - for (x = 0, y = offset; x < x1; ++x, y += delta) { - const int16_t *filter; - int_pel = y >> INTERP_PRECISION_BITS; - sub_pel = (y >> (INTERP_PRECISION_BITS - SUBPEL_BITS)) & SUBPEL_MASK; - filter = interp_filters[sub_pel]; - sum = 0; - for (k = 0; k < INTERP_TAPS; ++k) - sum += filter[k] * input[(int_pel - INTERP_TAPS / 2 + 1 + k < 0 ? - 0 : - int_pel - INTERP_TAPS / 2 + 1 + k)]; - *optr++ = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); - } - // Middle part. - for (; x <= x2; ++x, y += delta) { - const int16_t *filter; - int_pel = y >> INTERP_PRECISION_BITS; - sub_pel = (y >> (INTERP_PRECISION_BITS - SUBPEL_BITS)) & SUBPEL_MASK; - filter = interp_filters[sub_pel]; - sum = 0; - for (k = 0; k < INTERP_TAPS; ++k) - sum += filter[k] * input[int_pel - INTERP_TAPS / 2 + 1 + k]; - *optr++ = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); - } - // End part. - for (; x < outlength; ++x, y += delta) { - const int16_t *filter; - int_pel = y >> INTERP_PRECISION_BITS; - sub_pel = (y >> (INTERP_PRECISION_BITS - SUBPEL_BITS)) & SUBPEL_MASK; - filter = interp_filters[sub_pel]; - sum = 0; - for (k = 0; k < INTERP_TAPS; ++k) - sum += filter[k] * input[(int_pel - INTERP_TAPS / 2 + 1 + k >= - inlength ? inlength - 1 : - int_pel - INTERP_TAPS / 2 + 1 + k)]; - *optr++ = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); - } - } -} - -static void down2_symeven(const uint8_t *const input, int length, - uint8_t *output) { - // Actual filter len = 2 * filter_len_half. - const int16_t *filter = vp9_down2_symeven_half_filter; - const int filter_len_half = sizeof(vp9_down2_symeven_half_filter) / 2; - int i, j; - uint8_t *optr = output; - int l1 = filter_len_half; - int l2 = (length - filter_len_half); - l1 += (l1 & 1); - l2 += (l2 & 1); - if (l1 > l2) { - // Short input length. - for (i = 0; i < length; i += 2) { - int sum = (1 << (FILTER_BITS - 1)); - for (j = 0; j < filter_len_half; ++j) { - sum += (input[(i - j < 0 ? 0 : i - j)] + - input[(i + 1 + j >= length ? length - 1 : i + 1 + j)]) * - filter[j]; - } - sum >>= FILTER_BITS; - *optr++ = clip_pixel(sum); - } - } else { - // Initial part. - for (i = 0; i < l1; i += 2) { - int sum = (1 << (FILTER_BITS - 1)); - for (j = 0; j < filter_len_half; ++j) { - sum += (input[(i - j < 0 ? 0 : i - j)] + input[i + 1 + j]) * filter[j]; - } - sum >>= FILTER_BITS; - *optr++ = clip_pixel(sum); - } - // Middle part. - for (; i < l2; i += 2) { - int sum = (1 << (FILTER_BITS - 1)); - for (j = 0; j < filter_len_half; ++j) { - sum += (input[i - j] + input[i + 1 + j]) * filter[j]; - } - sum >>= FILTER_BITS; - *optr++ = clip_pixel(sum); - } - // End part. - for (; i < length; i += 2) { - int sum = (1 << (FILTER_BITS - 1)); - for (j = 0; j < filter_len_half; ++j) { - sum += (input[i - j] + - input[(i + 1 + j >= length ? length - 1 : i + 1 + j)]) * - filter[j]; - } - sum >>= FILTER_BITS; - *optr++ = clip_pixel(sum); - } - } -} - -static void down2_symodd(const uint8_t *const input, int length, - uint8_t *output) { - // Actual filter len = 2 * filter_len_half - 1. - const int16_t *filter = vp9_down2_symodd_half_filter; - const int filter_len_half = sizeof(vp9_down2_symodd_half_filter) / 2; - int i, j; - uint8_t *optr = output; - int l1 = filter_len_half - 1; - int l2 = (length - filter_len_half + 1); - l1 += (l1 & 1); - l2 += (l2 & 1); - if (l1 > l2) { - // Short input length. - for (i = 0; i < length; i += 2) { - int sum = (1 << (FILTER_BITS - 1)) + input[i] * filter[0]; - for (j = 1; j < filter_len_half; ++j) { - sum += (input[(i - j < 0 ? 0 : i - j)] + - input[(i + j >= length ? length - 1 : i + j)]) * - filter[j]; - } - sum >>= FILTER_BITS; - *optr++ = clip_pixel(sum); - } - } else { - // Initial part. - for (i = 0; i < l1; i += 2) { - int sum = (1 << (FILTER_BITS - 1)) + input[i] * filter[0]; - for (j = 1; j < filter_len_half; ++j) { - sum += (input[(i - j < 0 ? 0 : i - j)] + input[i + j]) * filter[j]; - } - sum >>= FILTER_BITS; - *optr++ = clip_pixel(sum); - } - // Middle part. - for (; i < l2; i += 2) { - int sum = (1 << (FILTER_BITS - 1)) + input[i] * filter[0]; - for (j = 1; j < filter_len_half; ++j) { - sum += (input[i - j] + input[i + j]) * filter[j]; - } - sum >>= FILTER_BITS; - *optr++ = clip_pixel(sum); - } - // End part. - for (; i < length; i += 2) { - int sum = (1 << (FILTER_BITS - 1)) + input[i] * filter[0]; - for (j = 1; j < filter_len_half; ++j) { - sum += (input[i - j] + input[(i + j >= length ? length - 1 : i + j)]) * - filter[j]; - } - sum >>= FILTER_BITS; - *optr++ = clip_pixel(sum); - } - } -} - -static int get_down2_length(int length, int steps) { - int s; - for (s = 0; s < steps; ++s) - length = (length + 1) >> 1; - return length; -} - -int get_down2_steps(int in_length, int out_length) { - int steps = 0; - int proj_in_length; - while ((proj_in_length = get_down2_length(in_length, 1)) >= out_length) { - ++steps; - in_length = proj_in_length; - } - return steps; -} - -static void resize_multistep(const uint8_t *const input, - int length, - uint8_t *output, - int olength, - uint8_t *buf) { - int steps; - if (length == olength) { - memcpy(output, input, sizeof(uint8_t) * length); - return; - } - steps = get_down2_steps(length, olength); - - if (steps > 0) { - int s; - uint8_t *out = NULL; - uint8_t *tmpbuf = NULL; - uint8_t *otmp, *otmp2; - int filteredlength = length; - if (!tmpbuf) { - tmpbuf = (uint8_t *)malloc(sizeof(uint8_t) * length); - otmp = tmpbuf; - } else { - otmp = buf; - } - otmp2 = otmp + get_down2_length(length, 1); - for (s = 0; s < steps; ++s) { - const int proj_filteredlength = get_down2_length(filteredlength, 1); - const uint8_t *const in = (s == 0 ? input : out); - if (s == steps - 1 && proj_filteredlength == olength) - out = output; - else - out = (s & 1 ? otmp2 : otmp); - if (filteredlength & 1) - down2_symodd(in, filteredlength, out); - else - down2_symeven(in, filteredlength, out); - filteredlength = proj_filteredlength; - } - if (filteredlength != olength) { - interpolate(out, filteredlength, output, olength); - } - if (tmpbuf) - free(tmpbuf); - } else { - interpolate(input, length, output, olength); - } -} - -static void fill_col_to_arr(uint8_t *img, int stride, int len, uint8_t *arr) { - int i; - uint8_t *iptr = img; - uint8_t *aptr = arr; - for (i = 0; i < len; ++i, iptr += stride) { - *aptr++ = *iptr; - } -} - -static void fill_arr_to_col(uint8_t *img, int stride, int len, uint8_t *arr) { - int i; - uint8_t *iptr = img; - uint8_t *aptr = arr; - for (i = 0; i < len; ++i, iptr += stride) { - *iptr = *aptr++; - } -} - -void vp9_resize_plane(const uint8_t *const input, - int height, - int width, - int in_stride, - uint8_t *output, - int height2, - int width2, - int out_stride) { - int i; - uint8_t *intbuf = (uint8_t *)malloc(sizeof(uint8_t) * width2 * height); - uint8_t *tmpbuf = (uint8_t *)malloc(sizeof(uint8_t) * - (width < height ? height : width)); - uint8_t *arrbuf = (uint8_t *)malloc(sizeof(uint8_t) * (height + height2)); - for (i = 0; i < height; ++i) - resize_multistep(input + in_stride * i, width, - intbuf + width2 * i, width2, tmpbuf); - for (i = 0; i < width2; ++i) { - fill_col_to_arr(intbuf + i, width2, height, arrbuf); - resize_multistep(arrbuf, height, arrbuf + height, height2, tmpbuf); - fill_arr_to_col(output + i, out_stride, height2, arrbuf + height); - } - free(intbuf); - free(tmpbuf); - free(arrbuf); -} - -#if CONFIG_VP9_HIGHBITDEPTH -static void highbd_interpolate(const uint16_t *const input, int inlength, - uint16_t *output, int outlength, int bd) { - const int64_t delta = - (((uint64_t)inlength << 32) + outlength / 2) / outlength; - const int64_t offset = inlength > outlength ? - (((int64_t)(inlength - outlength) << 31) + outlength / 2) / outlength : - -(((int64_t)(outlength - inlength) << 31) + outlength / 2) / outlength; - uint16_t *optr = output; - int x, x1, x2, sum, k, int_pel, sub_pel; - int64_t y; - - const interp_kernel *interp_filters = - choose_interp_filter(inlength, outlength); - - x = 0; - y = offset; - while ((y >> INTERP_PRECISION_BITS) < (INTERP_TAPS / 2 - 1)) { - x++; - y += delta; - } - x1 = x; - x = outlength - 1; - y = delta * x + offset; - while ((y >> INTERP_PRECISION_BITS) + - (int64_t)(INTERP_TAPS / 2) >= inlength) { - x--; - y -= delta; - } - x2 = x; - if (x1 > x2) { - for (x = 0, y = offset; x < outlength; ++x, y += delta) { - const int16_t *filter; - int_pel = y >> INTERP_PRECISION_BITS; - sub_pel = (y >> (INTERP_PRECISION_BITS - SUBPEL_BITS)) & SUBPEL_MASK; - filter = interp_filters[sub_pel]; - sum = 0; - for (k = 0; k < INTERP_TAPS; ++k) { - const int pk = int_pel - INTERP_TAPS / 2 + 1 + k; - sum += filter[k] * - input[(pk < 0 ? 0 : (pk >= inlength ? inlength - 1 : pk))]; - } - *optr++ = clip_pixel_high(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd); - } - } else { - // Initial part. - for (x = 0, y = offset; x < x1; ++x, y += delta) { - const int16_t *filter; - int_pel = y >> INTERP_PRECISION_BITS; - sub_pel = (y >> (INTERP_PRECISION_BITS - SUBPEL_BITS)) & SUBPEL_MASK; - filter = interp_filters[sub_pel]; - sum = 0; - for (k = 0; k < INTERP_TAPS; ++k) - sum += filter[k] * - input[(int_pel - INTERP_TAPS / 2 + 1 + k < 0 ? - 0 : int_pel - INTERP_TAPS / 2 + 1 + k)]; - *optr++ = clip_pixel_high(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd); - } - // Middle part. - for (; x <= x2; ++x, y += delta) { - const int16_t *filter; - int_pel = y >> INTERP_PRECISION_BITS; - sub_pel = (y >> (INTERP_PRECISION_BITS - SUBPEL_BITS)) & SUBPEL_MASK; - filter = interp_filters[sub_pel]; - sum = 0; - for (k = 0; k < INTERP_TAPS; ++k) - sum += filter[k] * input[int_pel - INTERP_TAPS / 2 + 1 + k]; - *optr++ = clip_pixel_high(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd); - } - // End part. - for (; x < outlength; ++x, y += delta) { - const int16_t *filter; - int_pel = y >> INTERP_PRECISION_BITS; - sub_pel = (y >> (INTERP_PRECISION_BITS - SUBPEL_BITS)) & SUBPEL_MASK; - filter = interp_filters[sub_pel]; - sum = 0; - for (k = 0; k < INTERP_TAPS; ++k) - sum += filter[k] * input[(int_pel - INTERP_TAPS / 2 + 1 + k >= - inlength ? inlength - 1 : - int_pel - INTERP_TAPS / 2 + 1 + k)]; - *optr++ = clip_pixel_high(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd); - } - } -} - -static void highbd_down2_symeven(const uint16_t *const input, int length, - uint16_t *output, int bd) { - // Actual filter len = 2 * filter_len_half. - static const int16_t *filter = vp9_down2_symeven_half_filter; - const int filter_len_half = sizeof(vp9_down2_symeven_half_filter) / 2; - int i, j; - uint16_t *optr = output; - int l1 = filter_len_half; - int l2 = (length - filter_len_half); - l1 += (l1 & 1); - l2 += (l2 & 1); - if (l1 > l2) { - // Short input length. - for (i = 0; i < length; i += 2) { - int sum = (1 << (FILTER_BITS - 1)); - for (j = 0; j < filter_len_half; ++j) { - sum += (input[(i - j < 0 ? 0 : i - j)] + - input[(i + 1 + j >= length ? length - 1 : i + 1 + j)]) * - filter[j]; - } - sum >>= FILTER_BITS; - *optr++ = clip_pixel_high(sum, bd); - } - } else { - // Initial part. - for (i = 0; i < l1; i += 2) { - int sum = (1 << (FILTER_BITS - 1)); - for (j = 0; j < filter_len_half; ++j) { - sum += (input[(i - j < 0 ? 0 : i - j)] + input[i + 1 + j]) * filter[j]; - } - sum >>= FILTER_BITS; - *optr++ = clip_pixel_high(sum, bd); - } - // Middle part. - for (; i < l2; i += 2) { - int sum = (1 << (FILTER_BITS - 1)); - for (j = 0; j < filter_len_half; ++j) { - sum += (input[i - j] + input[i + 1 + j]) * filter[j]; - } - sum >>= FILTER_BITS; - *optr++ = clip_pixel_high(sum, bd); - } - // End part. - for (; i < length; i += 2) { - int sum = (1 << (FILTER_BITS - 1)); - for (j = 0; j < filter_len_half; ++j) { - sum += (input[i - j] + - input[(i + 1 + j >= length ? length - 1 : i + 1 + j)]) * - filter[j]; - } - sum >>= FILTER_BITS; - *optr++ = clip_pixel_high(sum, bd); - } - } -} - -static void highbd_down2_symodd(const uint16_t *const input, int length, - uint16_t *output, int bd) { - // Actual filter len = 2 * filter_len_half - 1. - static const int16_t *filter = vp9_down2_symodd_half_filter; - const int filter_len_half = sizeof(vp9_down2_symodd_half_filter) / 2; - int i, j; - uint16_t *optr = output; - int l1 = filter_len_half - 1; - int l2 = (length - filter_len_half + 1); - l1 += (l1 & 1); - l2 += (l2 & 1); - if (l1 > l2) { - // Short input length. - for (i = 0; i < length; i += 2) { - int sum = (1 << (FILTER_BITS - 1)) + input[i] * filter[0]; - for (j = 1; j < filter_len_half; ++j) { - sum += (input[(i - j < 0 ? 0 : i - j)] + - input[(i + j >= length ? length - 1 : i + j)]) * - filter[j]; - } - sum >>= FILTER_BITS; - *optr++ = clip_pixel_high(sum, bd); - } - } else { - // Initial part. - for (i = 0; i < l1; i += 2) { - int sum = (1 << (FILTER_BITS - 1)) + input[i] * filter[0]; - for (j = 1; j < filter_len_half; ++j) { - sum += (input[(i - j < 0 ? 0 : i - j)] + input[i + j]) * filter[j]; - } - sum >>= FILTER_BITS; - *optr++ = clip_pixel_high(sum, bd); - } - // Middle part. - for (; i < l2; i += 2) { - int sum = (1 << (FILTER_BITS - 1)) + input[i] * filter[0]; - for (j = 1; j < filter_len_half; ++j) { - sum += (input[i - j] + input[i + j]) * filter[j]; - } - sum >>= FILTER_BITS; - *optr++ = clip_pixel_high(sum, bd); - } - // End part. - for (; i < length; i += 2) { - int sum = (1 << (FILTER_BITS - 1)) + input[i] * filter[0]; - for (j = 1; j < filter_len_half; ++j) { - sum += (input[i - j] + input[(i + j >= length ? length - 1 : i + j)]) * - filter[j]; - } - sum >>= FILTER_BITS; - *optr++ = clip_pixel_high(sum, bd); - } - } -} - -static void highbd_resize_multistep(const uint16_t *const input, - int length, - uint16_t *output, - int olength, - uint16_t *buf, - int bd) { - int steps; - if (length == olength) { - memcpy(output, input, sizeof(uint16_t) * length); - return; - } - steps = get_down2_steps(length, olength); - - if (steps > 0) { - int s; - uint16_t *out = NULL; - uint16_t *tmpbuf = NULL; - uint16_t *otmp, *otmp2; - int filteredlength = length; - if (!tmpbuf) { - tmpbuf = (uint16_t *)malloc(sizeof(uint16_t) * length); - otmp = tmpbuf; - } else { - otmp = buf; - } - otmp2 = otmp + get_down2_length(length, 1); - for (s = 0; s < steps; ++s) { - const int proj_filteredlength = get_down2_length(filteredlength, 1); - const uint16_t *const in = (s == 0 ? input : out); - if (s == steps - 1 && proj_filteredlength == olength) - out = output; - else - out = (s & 1 ? otmp2 : otmp); - if (filteredlength & 1) - highbd_down2_symodd(in, filteredlength, out, bd); - else - highbd_down2_symeven(in, filteredlength, out, bd); - filteredlength = proj_filteredlength; - } - if (filteredlength != olength) { - highbd_interpolate(out, filteredlength, output, olength, bd); - } - if (tmpbuf) - free(tmpbuf); - } else { - highbd_interpolate(input, length, output, olength, bd); - } -} - -static void highbd_fill_col_to_arr(uint16_t *img, int stride, int len, - uint16_t *arr) { - int i; - uint16_t *iptr = img; - uint16_t *aptr = arr; - for (i = 0; i < len; ++i, iptr += stride) { - *aptr++ = *iptr; - } -} - -static void highbd_fill_arr_to_col(uint16_t *img, int stride, int len, - uint16_t *arr) { - int i; - uint16_t *iptr = img; - uint16_t *aptr = arr; - for (i = 0; i < len; ++i, iptr += stride) { - *iptr = *aptr++; - } -} - -void vp9_highbd_resize_plane(const uint8_t *const input, - int height, - int width, - int in_stride, - uint8_t *output, - int height2, - int width2, - int out_stride, - int bd) { - int i; - uint16_t *intbuf = (uint16_t *)malloc(sizeof(uint16_t) * width2 * height); - uint16_t *tmpbuf = (uint16_t *)malloc(sizeof(uint16_t) * - (width < height ? height : width)); - uint16_t *arrbuf = (uint16_t *)malloc(sizeof(uint16_t) * (height + height2)); - for (i = 0; i < height; ++i) { - highbd_resize_multistep(CONVERT_TO_SHORTPTR(input + in_stride * i), width, - intbuf + width2 * i, width2, tmpbuf, bd); - } - for (i = 0; i < width2; ++i) { - highbd_fill_col_to_arr(intbuf + i, width2, height, arrbuf); - highbd_resize_multistep(arrbuf, height, arrbuf + height, height2, tmpbuf, - bd); - highbd_fill_arr_to_col(CONVERT_TO_SHORTPTR(output + i), out_stride, height2, - arrbuf + height); - } - free(intbuf); - free(tmpbuf); - free(arrbuf); -} -#endif // CONFIG_VP9_HIGHBITDEPTH - -void vp9_resize_frame420(const uint8_t *const y, - int y_stride, - const uint8_t *const u, const uint8_t *const v, - int uv_stride, - int height, int width, - uint8_t *oy, int oy_stride, - uint8_t *ou, uint8_t *ov, int ouv_stride, - int oheight, int owidth) { - vp9_resize_plane(y, height, width, y_stride, - oy, oheight, owidth, oy_stride); - vp9_resize_plane(u, height / 2, width / 2, uv_stride, - ou, oheight / 2, owidth / 2, ouv_stride); - vp9_resize_plane(v, height / 2, width / 2, uv_stride, - ov, oheight / 2, owidth / 2, ouv_stride); -} - -void vp9_resize_frame422(const uint8_t *const y, int y_stride, - const uint8_t *const u, const uint8_t *const v, - int uv_stride, - int height, int width, - uint8_t *oy, int oy_stride, - uint8_t *ou, uint8_t *ov, int ouv_stride, - int oheight, int owidth) { - vp9_resize_plane(y, height, width, y_stride, - oy, oheight, owidth, oy_stride); - vp9_resize_plane(u, height, width / 2, uv_stride, - ou, oheight, owidth / 2, ouv_stride); - vp9_resize_plane(v, height, width / 2, uv_stride, - ov, oheight, owidth / 2, ouv_stride); -} - -void vp9_resize_frame444(const uint8_t *const y, int y_stride, - const uint8_t *const u, const uint8_t *const v, - int uv_stride, - int height, int width, - uint8_t *oy, int oy_stride, - uint8_t *ou, uint8_t *ov, int ouv_stride, - int oheight, int owidth) { - vp9_resize_plane(y, height, width, y_stride, - oy, oheight, owidth, oy_stride); - vp9_resize_plane(u, height, width, uv_stride, - ou, oheight, owidth, ouv_stride); - vp9_resize_plane(v, height, width, uv_stride, - ov, oheight, owidth, ouv_stride); -} - -#if CONFIG_VP9_HIGHBITDEPTH -void vp9_highbd_resize_frame420(const uint8_t *const y, - int y_stride, - const uint8_t *const u, const uint8_t *const v, - int uv_stride, - int height, int width, - uint8_t *oy, int oy_stride, - uint8_t *ou, uint8_t *ov, int ouv_stride, - int oheight, int owidth, int bd) { - vp9_highbd_resize_plane(y, height, width, y_stride, - oy, oheight, owidth, oy_stride, bd); - vp9_highbd_resize_plane(u, height / 2, width / 2, uv_stride, - ou, oheight / 2, owidth / 2, ouv_stride, bd); - vp9_highbd_resize_plane(v, height / 2, width / 2, uv_stride, - ov, oheight / 2, owidth / 2, ouv_stride, bd); -} - -void vp9_highbd_resize_frame422(const uint8_t *const y, int y_stride, - const uint8_t *const u, const uint8_t *const v, - int uv_stride, - int height, int width, - uint8_t *oy, int oy_stride, - uint8_t *ou, uint8_t *ov, int ouv_stride, - int oheight, int owidth, int bd) { - vp9_highbd_resize_plane(y, height, width, y_stride, - oy, oheight, owidth, oy_stride, bd); - vp9_highbd_resize_plane(u, height, width / 2, uv_stride, - ou, oheight, owidth / 2, ouv_stride, bd); - vp9_highbd_resize_plane(v, height, width / 2, uv_stride, - ov, oheight, owidth / 2, ouv_stride, bd); -} - -void vp9_highbd_resize_frame444(const uint8_t *const y, int y_stride, - const uint8_t *const u, const uint8_t *const v, - int uv_stride, - int height, int width, - uint8_t *oy, int oy_stride, - uint8_t *ou, uint8_t *ov, int ouv_stride, - int oheight, int owidth, int bd) { - vp9_highbd_resize_plane(y, height, width, y_stride, - oy, oheight, owidth, oy_stride, bd); - vp9_highbd_resize_plane(u, height, width, uv_stride, - ou, oheight, owidth, ouv_stride, bd); - vp9_highbd_resize_plane(v, height, width, uv_stride, - ov, oheight, owidth, ouv_stride, bd); -} -#endif // CONFIG_VP9_HIGHBITDEPTH diff --git a/media/libvpx/vp9/encoder/vp9_resize.h b/media/libvpx/vp9/encoder/vp9_resize.h deleted file mode 100644 index 067af53f993..00000000000 --- a/media/libvpx/vp9/encoder/vp9_resize.h +++ /dev/null @@ -1,124 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_ENCODER_VP9_RESIZE_H_ -#define VP9_ENCODER_VP9_RESIZE_H_ - -#include -#include "vpx/vpx_integer.h" - -void vp9_resize_plane(const uint8_t *const input, - int height, - int width, - int in_stride, - uint8_t *output, - int height2, - int width2, - int out_stride); -void vp9_resize_frame420(const uint8_t *const y, - int y_stride, - const uint8_t *const u, - const uint8_t *const v, - int uv_stride, - int height, - int width, - uint8_t *oy, - int oy_stride, - uint8_t *ou, - uint8_t *ov, - int ouv_stride, - int oheight, - int owidth); -void vp9_resize_frame422(const uint8_t *const y, - int y_stride, - const uint8_t *const u, - const uint8_t *const v, - int uv_stride, - int height, - int width, - uint8_t *oy, - int oy_stride, - uint8_t *ou, - uint8_t *ov, - int ouv_stride, - int oheight, - int owidth); -void vp9_resize_frame444(const uint8_t *const y, - int y_stride, - const uint8_t *const u, - const uint8_t *const v, - int uv_stride, - int height, - int width, - uint8_t *oy, - int oy_stride, - uint8_t *ou, - uint8_t *ov, - int ouv_stride, - int oheight, - int owidth); - -#if CONFIG_VP9_HIGHBITDEPTH -void vp9_highbd_resize_plane(const uint8_t *const input, - int height, - int width, - int in_stride, - uint8_t *output, - int height2, - int width2, - int out_stride, - int bd); -void vp9_highbd_resize_frame420(const uint8_t *const y, - int y_stride, - const uint8_t *const u, - const uint8_t *const v, - int uv_stride, - int height, - int width, - uint8_t *oy, - int oy_stride, - uint8_t *ou, - uint8_t *ov, - int ouv_stride, - int oheight, - int owidth, - int bd); -void vp9_highbd_resize_frame422(const uint8_t *const y, - int y_stride, - const uint8_t *const u, - const uint8_t *const v, - int uv_stride, - int height, - int width, - uint8_t *oy, - int oy_stride, - uint8_t *ou, - uint8_t *ov, - int ouv_stride, - int oheight, - int owidth, - int bd); -void vp9_highbd_resize_frame444(const uint8_t *const y, - int y_stride, - const uint8_t *const u, - const uint8_t *const v, - int uv_stride, - int height, - int width, - uint8_t *oy, - int oy_stride, - uint8_t *ou, - uint8_t *ov, - int ouv_stride, - int oheight, - int owidth, - int bd); -#endif // CONFIG_VP9_HIGHBITDEPTH -#endif // VP9_ENCODER_VP9_RESIZE_H_ diff --git a/media/libvpx/vp9/encoder/vp9_sad.c b/media/libvpx/vp9/encoder/vp9_sad.c deleted file mode 100644 index cee6ce140e6..00000000000 --- a/media/libvpx/vp9/encoder/vp9_sad.c +++ /dev/null @@ -1,271 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include - -#include "./vp9_rtcd.h" -#include "./vpx_config.h" - -#include "vpx/vpx_integer.h" -#if CONFIG_VP9_HIGHBITDEPTH -#include "vp9/common/vp9_common.h" -#endif -#include "vp9/encoder/vp9_variance.h" - -static INLINE unsigned int sad(const uint8_t *a, int a_stride, - const uint8_t *b, int b_stride, - int width, int height) { - int y, x; - unsigned int sad = 0; - - for (y = 0; y < height; y++) { - for (x = 0; x < width; x++) - sad += abs(a[x] - b[x]); - - a += a_stride; - b += b_stride; - } - - return sad; -} - -#define sadMxN(m, n) \ -unsigned int vp9_sad##m##x##n##_c(const uint8_t *src, int src_stride, \ - const uint8_t *ref, int ref_stride) { \ - return sad(src, src_stride, ref, ref_stride, m, n); \ -} \ -unsigned int vp9_sad##m##x##n##_avg_c(const uint8_t *src, int src_stride, \ - const uint8_t *ref, int ref_stride, \ - const uint8_t *second_pred) { \ - uint8_t comp_pred[m * n]; \ - vp9_comp_avg_pred(comp_pred, second_pred, m, n, ref, ref_stride); \ - return sad(src, src_stride, comp_pred, m, m, n); \ -} - -#define sadMxNxK(m, n, k) \ -void vp9_sad##m##x##n##x##k##_c(const uint8_t *src, int src_stride, \ - const uint8_t *ref, int ref_stride, \ - unsigned int *sads) { \ - int i; \ - for (i = 0; i < k; ++i) \ - sads[i] = vp9_sad##m##x##n##_c(src, src_stride, &ref[i], ref_stride); \ -} - -#define sadMxNx4D(m, n) \ -void vp9_sad##m##x##n##x4d_c(const uint8_t *src, int src_stride, \ - const uint8_t *const refs[], int ref_stride, \ - unsigned int *sads) { \ - int i; \ - for (i = 0; i < 4; ++i) \ - sads[i] = vp9_sad##m##x##n##_c(src, src_stride, refs[i], ref_stride); \ -} - -// 64x64 -sadMxN(64, 64) -sadMxNxK(64, 64, 3) -sadMxNxK(64, 64, 8) -sadMxNx4D(64, 64) - -// 64x32 -sadMxN(64, 32) -sadMxNx4D(64, 32) - -// 32x64 -sadMxN(32, 64) -sadMxNx4D(32, 64) - -// 32x32 -sadMxN(32, 32) -sadMxNxK(32, 32, 3) -sadMxNxK(32, 32, 8) -sadMxNx4D(32, 32) - -// 32x16 -sadMxN(32, 16) -sadMxNx4D(32, 16) - -// 16x32 -sadMxN(16, 32) -sadMxNx4D(16, 32) - -// 16x16 -sadMxN(16, 16) -sadMxNxK(16, 16, 3) -sadMxNxK(16, 16, 8) -sadMxNx4D(16, 16) - -// 16x8 -sadMxN(16, 8) -sadMxNxK(16, 8, 3) -sadMxNxK(16, 8, 8) -sadMxNx4D(16, 8) - -// 8x16 -sadMxN(8, 16) -sadMxNxK(8, 16, 3) -sadMxNxK(8, 16, 8) -sadMxNx4D(8, 16) - -// 8x8 -sadMxN(8, 8) -sadMxNxK(8, 8, 3) -sadMxNxK(8, 8, 8) -sadMxNx4D(8, 8) - -// 8x4 -sadMxN(8, 4) -sadMxNxK(8, 4, 8) -sadMxNx4D(8, 4) - -// 4x8 -sadMxN(4, 8) -sadMxNxK(4, 8, 8) -sadMxNx4D(4, 8) - -// 4x4 -sadMxN(4, 4) -sadMxNxK(4, 4, 3) -sadMxNxK(4, 4, 8) -sadMxNx4D(4, 4) - -#if CONFIG_VP9_HIGHBITDEPTH -static INLINE unsigned int high_sad(const uint8_t *a8, int a_stride, - const uint8_t *b8, int b_stride, - int width, int height) { - int y, x; - unsigned int sad = 0; - const uint16_t *a = CONVERT_TO_SHORTPTR(a8); - const uint16_t *b = CONVERT_TO_SHORTPTR(b8); - for (y = 0; y < height; y++) { - for (x = 0; x < width; x++) - sad += abs(a[x] - b[x]); - - a += a_stride; - b += b_stride; - } - return sad; -} - -static INLINE unsigned int high_sadb(const uint8_t *a8, int a_stride, - const uint16_t *b, int b_stride, - int width, int height) { - int y, x; - unsigned int sad = 0; - const uint16_t *a = CONVERT_TO_SHORTPTR(a8); - for (y = 0; y < height; y++) { - for (x = 0; x < width; x++) - sad += abs(a[x] - b[x]); - - a += a_stride; - b += b_stride; - } - return sad; -} - -#define high_sadMxN(m, n) \ -unsigned int vp9_high_sad##m##x##n##_c(const uint8_t *src, int src_stride, \ - const uint8_t *ref, int ref_stride) { \ - return high_sad(src, src_stride, ref, ref_stride, m, n); \ -} \ -unsigned int vp9_high_sad##m##x##n##_avg_c(const uint8_t *src, int src_stride, \ - const uint8_t *ref, int ref_stride, \ - const uint8_t *second_pred) { \ - uint16_t comp_pred[m * n]; \ - vp9_high_comp_avg_pred(comp_pred, second_pred, m, n, ref, ref_stride); \ - return high_sadb(src, src_stride, comp_pred, m, m, n); \ -} - -#define high_sadMxNxK(m, n, k) \ -void vp9_high_sad##m##x##n##x##k##_c(const uint8_t *src, int src_stride, \ - const uint8_t *ref, int ref_stride, \ - unsigned int *sads) { \ - int i; \ - for (i = 0; i < k; ++i) \ - sads[i] = vp9_high_sad##m##x##n##_c(src, src_stride, &ref[i], ref_stride); \ -} - -#define high_sadMxNx4D(m, n) \ -void vp9_high_sad##m##x##n##x4d_c(const uint8_t *src, int src_stride, \ - const uint8_t *const refs[], \ - int ref_stride, unsigned int *sads) { \ - int i; \ - for (i = 0; i < 4; ++i) \ - sads[i] = vp9_high_sad##m##x##n##_c(src, src_stride, refs[i], ref_stride); \ -} - -// 64x64 -high_sadMxN(64, 64) -high_sadMxNxK(64, 64, 3) -high_sadMxNxK(64, 64, 8) -high_sadMxNx4D(64, 64) - -// 64x32 -high_sadMxN(64, 32) -high_sadMxNx4D(64, 32) - -// 32x64 -high_sadMxN(32, 64) -high_sadMxNx4D(32, 64) - -// 32x32 -high_sadMxN(32, 32) -high_sadMxNxK(32, 32, 3) -high_sadMxNxK(32, 32, 8) -high_sadMxNx4D(32, 32) - -// 32x16 -high_sadMxN(32, 16) -high_sadMxNx4D(32, 16) - -// 16x32 -high_sadMxN(16, 32) -high_sadMxNx4D(16, 32) - -// 16x16 -high_sadMxN(16, 16) -high_sadMxNxK(16, 16, 3) -high_sadMxNxK(16, 16, 8) -high_sadMxNx4D(16, 16) - -// 16x8 -high_sadMxN(16, 8) -high_sadMxNxK(16, 8, 3) -high_sadMxNxK(16, 8, 8) -high_sadMxNx4D(16, 8) - -// 8x16 -high_sadMxN(8, 16) -high_sadMxNxK(8, 16, 3) -high_sadMxNxK(8, 16, 8) -high_sadMxNx4D(8, 16) - -// 8x8 -high_sadMxN(8, 8) -high_sadMxNxK(8, 8, 3) -high_sadMxNxK(8, 8, 8) -high_sadMxNx4D(8, 8) - -// 8x4 -high_sadMxN(8, 4) -high_sadMxNxK(8, 4, 8) -high_sadMxNx4D(8, 4) - -// 4x8 -high_sadMxN(4, 8) -high_sadMxNxK(4, 8, 8) -high_sadMxNx4D(4, 8) - -// 4x4 -high_sadMxN(4, 4) -high_sadMxNxK(4, 4, 3) -high_sadMxNxK(4, 4, 8) -high_sadMxNx4D(4, 4) - -#endif // CONFIG_VP9_HIGHBITDEPTH diff --git a/media/libvpx/vp9/encoder/vp9_speed_features.c b/media/libvpx/vp9/encoder/vp9_speed_features.c deleted file mode 100644 index 92e31497e3c..00000000000 --- a/media/libvpx/vp9/encoder/vp9_speed_features.c +++ /dev/null @@ -1,438 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include - -#include "vp9/encoder/vp9_encoder.h" -#include "vp9/encoder/vp9_speed_features.h" - -// Intra only frames, golden frames (except alt ref overlays) and -// alt ref frames tend to be coded at a higher than ambient quality -static int frame_is_boosted(const VP9_COMP *cpi) { - return frame_is_intra_only(&cpi->common) || - cpi->refresh_alt_ref_frame || - (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref) || - vp9_is_upper_layer_key_frame(cpi); -} - - -static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm, - SPEED_FEATURES *sf, int speed) { - const int boosted = frame_is_boosted(cpi); - - sf->adaptive_rd_thresh = 1; - sf->allow_skip_recode = 1; - - if (speed >= 1) { - sf->use_square_partition_only = !frame_is_intra_only(cm); - sf->less_rectangular_check = 1; - - if (MIN(cm->width, cm->height) >= 720) - sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT - : DISABLE_ALL_INTER_SPLIT; - else - sf->disable_split_mask = DISABLE_COMPOUND_SPLIT; - sf->use_rd_breakout = 1; - sf->adaptive_motion_search = 1; - sf->mv.auto_mv_step_size = 1; - sf->adaptive_rd_thresh = 2; - sf->mv.subpel_iters_per_step = 1; - sf->mode_skip_start = 10; - sf->adaptive_pred_interp_filter = 1; - - sf->recode_loop = ALLOW_RECODE_KFARFGF; - sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V; - sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V; - sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V; - sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V; - - sf->tx_size_search_breakout = 1; - - if (MIN(cm->width, cm->height) >= 720) - sf->partition_search_breakout_dist_thr = (1 << 23); - else - sf->partition_search_breakout_dist_thr = (1 << 21); - sf->partition_search_breakout_rate_thr = 500; - } - - if (speed >= 2) { - sf->tx_size_search_method = frame_is_boosted(cpi) ? USE_FULL_RD - : USE_LARGESTALL; - - if (MIN(cm->width, cm->height) >= 720) { - sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT - : DISABLE_ALL_INTER_SPLIT; - sf->adaptive_pred_interp_filter = 0; - } else { - sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY; - } - - sf->reference_masking = 1; - sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH | - FLAG_SKIP_INTRA_BESTINTER | - FLAG_SKIP_COMP_BESTINTRA | - FLAG_SKIP_INTRA_LOWVAR; - sf->disable_filter_search_var_thresh = 100; - sf->comp_inter_joint_search_thresh = BLOCK_SIZES; - sf->auto_min_max_partition_size = CONSTRAIN_NEIGHBORING_MIN_MAX; - - if (MIN(cm->width, cm->height) >= 720) - sf->partition_search_breakout_dist_thr = (1 << 24); - else - sf->partition_search_breakout_dist_thr = (1 << 22); - sf->partition_search_breakout_rate_thr = 700; - } - - if (speed >= 3) { - sf->tx_size_search_method = frame_is_intra_only(cm) ? USE_FULL_RD - : USE_LARGESTALL; - if (MIN(cm->width, cm->height) >= 720) { - sf->disable_split_mask = DISABLE_ALL_SPLIT; - sf->schedule_mode_search = cm->base_qindex < 220 ? 1 : 0; - sf->partition_search_breakout_dist_thr = (1 << 25); - } else { - sf->max_intra_bsize = BLOCK_32X32; - sf->disable_split_mask = DISABLE_ALL_INTER_SPLIT; - sf->schedule_mode_search = cm->base_qindex < 175 ? 1 : 0; - sf->partition_search_breakout_dist_thr = (1 << 23); - } - sf->mv.subpel_search_method = SUBPEL_TREE_PRUNED; - sf->adaptive_pred_interp_filter = 0; - sf->adaptive_mode_search = 1; - sf->cb_partition_search = !boosted; - sf->cb_pred_filter_search = 1; - sf->alt_ref_search_fp = 1; - sf->recode_loop = ALLOW_RECODE_KFMAXBW; - sf->adaptive_rd_thresh = 3; - sf->mode_skip_start = 6; - sf->intra_y_mode_mask[TX_32X32] = INTRA_DC; - sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC; - sf->adaptive_interp_filter_search = 1; - sf->partition_search_breakout_rate_thr = 1000; - } - - if (speed >= 4) { - sf->use_square_partition_only = 1; - sf->tx_size_search_method = USE_LARGESTALL; - sf->disable_split_mask = DISABLE_ALL_SPLIT; - sf->mv.search_method = BIGDIA; - sf->mv.subpel_search_method = SUBPEL_TREE_PRUNED; - sf->adaptive_rd_thresh = 4; - sf->mode_search_skip_flags |= FLAG_EARLY_TERMINATE; - sf->disable_filter_search_var_thresh = 200; - sf->use_lp32x32fdct = 1; - sf->use_fast_coef_updates = ONE_LOOP_REDUCED; - sf->use_fast_coef_costing = 1; - sf->motion_field_mode_search = !boosted; - - if (MIN(cm->width, cm->height) >= 720) - sf->partition_search_breakout_dist_thr = (1 << 26); - else - sf->partition_search_breakout_dist_thr = (1 << 24); - sf->partition_search_breakout_rate_thr = 1500; - } - - if (speed >= 5) { - int i; - - sf->partition_search_type = FIXED_PARTITION; - sf->optimize_coefficients = 0; - sf->mv.search_method = HEX; - sf->disable_filter_search_var_thresh = 500; - for (i = 0; i < TX_SIZES; ++i) { - sf->intra_y_mode_mask[i] = INTRA_DC; - sf->intra_uv_mode_mask[i] = INTRA_DC; - } - cpi->allow_encode_breakout = ENCODE_BREAKOUT_ENABLED; - } - if (speed >= 6) { - sf->mv.reduce_first_step_size = 1; - } -} - -static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf, - int speed, vp9e_tune_content content) { - VP9_COMMON *const cm = &cpi->common; - const int is_keyframe = cm->frame_type == KEY_FRAME; - const int frames_since_key = is_keyframe ? 0 : cpi->rc.frames_since_key; - sf->static_segmentation = 0; - sf->adaptive_rd_thresh = 1; - sf->use_fast_coef_costing = 1; - - if (speed >= 1) { - sf->use_square_partition_only = !frame_is_intra_only(cm); - sf->less_rectangular_check = 1; - sf->tx_size_search_method = frame_is_intra_only(cm) ? USE_FULL_RD - : USE_LARGESTALL; - - if (MIN(cm->width, cm->height) >= 720) - sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT - : DISABLE_ALL_INTER_SPLIT; - else - sf->disable_split_mask = DISABLE_COMPOUND_SPLIT; - - sf->use_rd_breakout = 1; - - sf->adaptive_motion_search = 1; - sf->adaptive_pred_interp_filter = 1; - sf->mv.auto_mv_step_size = 1; - sf->adaptive_rd_thresh = 2; - sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V; - sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V; - sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V; - } - - if (speed >= 2) { - if (MIN(cm->width, cm->height) >= 720) - sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT - : DISABLE_ALL_INTER_SPLIT; - else - sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY; - - sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH | - FLAG_SKIP_INTRA_BESTINTER | - FLAG_SKIP_COMP_BESTINTRA | - FLAG_SKIP_INTRA_LOWVAR; - sf->adaptive_pred_interp_filter = 2; - sf->reference_masking = 1; - sf->disable_filter_search_var_thresh = 50; - sf->comp_inter_joint_search_thresh = BLOCK_SIZES; - sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX; - sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_LOW_MOTION; - sf->lf_motion_threshold = LOW_MOTION_THRESHOLD; - sf->adjust_partitioning_from_last_frame = 1; - sf->last_partitioning_redo_frequency = 3; - sf->use_lp32x32fdct = 1; - sf->mode_skip_start = 11; - sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V; - } - - if (speed >= 3) { - sf->use_square_partition_only = 1; - sf->disable_filter_search_var_thresh = 100; - sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_ALL; - sf->constrain_copy_partition = 1; - sf->use_uv_intra_rd_estimate = 1; - sf->skip_encode_sb = 1; - sf->mv.subpel_iters_per_step = 1; - sf->use_fast_coef_updates = ONE_LOOP_REDUCED; - sf->adaptive_rd_thresh = 4; - sf->mode_skip_start = 6; - sf->allow_skip_recode = 0; - sf->optimize_coefficients = 0; - sf->disable_split_mask = DISABLE_ALL_SPLIT; - sf->lpf_pick = LPF_PICK_FROM_Q; - } - - if (speed >= 4) { - int i; - sf->last_partitioning_redo_frequency = 4; - sf->adaptive_rd_thresh = 5; - sf->use_fast_coef_costing = 0; - sf->auto_min_max_partition_size = STRICT_NEIGHBORING_MIN_MAX; - sf->adjust_partitioning_from_last_frame = - cm->last_frame_type != cm->frame_type || (0 == - (frames_since_key + 1) % sf->last_partitioning_redo_frequency); - sf->mv.subpel_force_stop = 1; - for (i = 0; i < TX_SIZES; i++) { - sf->intra_y_mode_mask[i] = INTRA_DC_H_V; - sf->intra_uv_mode_mask[i] = INTRA_DC; - } - sf->intra_y_mode_mask[TX_32X32] = INTRA_DC; - sf->frame_parameter_update = 0; - sf->mv.search_method = FAST_HEX; - sf->inter_mode_mask[BLOCK_32X32] = INTER_NEAREST_NEAR_NEW; - sf->inter_mode_mask[BLOCK_32X64] = INTER_NEAREST; - sf->inter_mode_mask[BLOCK_64X32] = INTER_NEAREST; - sf->inter_mode_mask[BLOCK_64X64] = INTER_NEAREST; - sf->max_intra_bsize = BLOCK_32X32; - sf->allow_skip_recode = 1; - } - - if (speed >= 5) { - sf->use_quant_fp = !is_keyframe; - sf->auto_min_max_partition_size = is_keyframe ? RELAXED_NEIGHBORING_MIN_MAX - : STRICT_NEIGHBORING_MIN_MAX; - sf->max_partition_size = BLOCK_32X32; - sf->min_partition_size = BLOCK_8X8; - sf->partition_check = - (frames_since_key % sf->last_partitioning_redo_frequency == 1); - sf->force_frame_boost = is_keyframe || - (frames_since_key % (sf->last_partitioning_redo_frequency << 1) == 1); - sf->max_delta_qindex = is_keyframe ? 20 : 15; - sf->partition_search_type = REFERENCE_PARTITION; - sf->use_nonrd_pick_mode = 1; - sf->allow_skip_recode = 0; - } - - if (speed >= 6) { - if (content == VP9E_CONTENT_SCREEN) { - int i; - // Allow fancy modes at all sizes since SOURCE_VAR_BASED_PARTITION is used - for (i = 0; i < BLOCK_SIZES; ++i) - sf->inter_mode_mask[i] = INTER_ALL; - } - - // Adaptively switch between SOURCE_VAR_BASED_PARTITION and FIXED_PARTITION. - sf->partition_search_type = SOURCE_VAR_BASED_PARTITION; - sf->search_type_check_frequency = 50; - - sf->tx_size_search_method = is_keyframe ? USE_LARGESTALL : USE_TX_8X8; - - // This feature is only enabled when partition search is disabled. - sf->reuse_inter_pred_sby = 1; - - // Increase mode checking threshold for NEWMV. - sf->elevate_newmv_thresh = 2000; - - sf->mv.reduce_first_step_size = 1; - } - - if (speed >= 7) { - sf->mv.search_method = FAST_DIAMOND; - sf->mv.fullpel_search_step_param = 10; - sf->lpf_pick = LPF_PICK_MINIMAL_LPF; - sf->encode_breakout_thresh = (MIN(cm->width, cm->height) >= 720) ? - 800 : 300; - sf->elevate_newmv_thresh = 2500; - } - - if (speed >= 12) { - sf->elevate_newmv_thresh = 4000; - sf->mv.subpel_force_stop = 2; - } - - if (speed >= 13) { - int i; - sf->max_intra_bsize = BLOCK_32X32; - for (i = 0; i < BLOCK_SIZES; ++i) - sf->inter_mode_mask[i] = INTER_NEAREST; - } -} - -void vp9_set_speed_features(VP9_COMP *cpi) { - SPEED_FEATURES *const sf = &cpi->sf; - VP9_COMMON *const cm = &cpi->common; - const VP9EncoderConfig *const oxcf = &cpi->oxcf; - int i; - - // best quality defaults - sf->frame_parameter_update = 1; - sf->mv.search_method = NSTEP; - sf->recode_loop = ALLOW_RECODE; - sf->mv.subpel_search_method = SUBPEL_TREE; - sf->mv.subpel_iters_per_step = 2; - sf->mv.subpel_force_stop = 0; - sf->optimize_coefficients = !is_lossless_requested(&cpi->oxcf); - sf->mv.reduce_first_step_size = 0; - sf->mv.auto_mv_step_size = 0; - sf->mv.fullpel_search_step_param = 6; - sf->comp_inter_joint_search_thresh = BLOCK_4X4; - sf->adaptive_rd_thresh = 0; - sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_OFF; - sf->tx_size_search_method = USE_FULL_RD; - sf->use_lp32x32fdct = 0; - sf->adaptive_motion_search = 0; - sf->adaptive_pred_interp_filter = 0; - sf->adaptive_mode_search = 0; - sf->cb_pred_filter_search = 0; - sf->cb_partition_search = 0; - sf->motion_field_mode_search = 0; - sf->alt_ref_search_fp = 0; - sf->use_quant_fp = 0; - sf->reference_masking = 0; - sf->partition_search_type = SEARCH_PARTITION; - sf->less_rectangular_check = 0; - sf->use_square_partition_only = 0; - sf->auto_min_max_partition_size = NOT_IN_USE; - sf->max_partition_size = BLOCK_64X64; - sf->min_partition_size = BLOCK_4X4; - sf->adjust_partitioning_from_last_frame = 0; - sf->last_partitioning_redo_frequency = 4; - sf->constrain_copy_partition = 0; - sf->disable_split_mask = 0; - sf->mode_search_skip_flags = 0; - sf->force_frame_boost = 0; - sf->max_delta_qindex = 0; - sf->disable_filter_search_var_thresh = 0; - sf->adaptive_interp_filter_search = 0; - - for (i = 0; i < TX_SIZES; i++) { - sf->intra_y_mode_mask[i] = INTRA_ALL; - sf->intra_uv_mode_mask[i] = INTRA_ALL; - } - sf->use_rd_breakout = 0; - sf->skip_encode_sb = 0; - sf->use_uv_intra_rd_estimate = 0; - sf->allow_skip_recode = 0; - sf->lpf_pick = LPF_PICK_FROM_FULL_IMAGE; - sf->use_fast_coef_updates = TWO_LOOP; - sf->use_fast_coef_costing = 0; - sf->mode_skip_start = MAX_MODES; // Mode index at which mode skip mask set - sf->schedule_mode_search = 0; - sf->use_nonrd_pick_mode = 0; - for (i = 0; i < BLOCK_SIZES; ++i) - sf->inter_mode_mask[i] = INTER_ALL; - sf->max_intra_bsize = BLOCK_64X64; - sf->reuse_inter_pred_sby = 0; - // This setting only takes effect when partition_search_type is set - // to FIXED_PARTITION. - sf->always_this_block_size = BLOCK_16X16; - sf->search_type_check_frequency = 50; - sf->encode_breakout_thresh = 0; - sf->elevate_newmv_thresh = 0; - // Recode loop tolerence %. - sf->recode_tolerance = 25; - sf->default_interp_filter = SWITCHABLE; - sf->tx_size_search_breakout = 0; - sf->partition_search_breakout_dist_thr = 0; - sf->partition_search_breakout_rate_thr = 0; - - if (oxcf->mode == REALTIME) - set_rt_speed_feature(cpi, sf, oxcf->speed, oxcf->content); - else if (oxcf->mode == GOOD) - set_good_speed_feature(cpi, cm, sf, oxcf->speed); - - cpi->full_search_sad = vp9_full_search_sad; - cpi->diamond_search_sad = oxcf->mode == BEST ? vp9_full_range_search - : vp9_diamond_search_sad; - cpi->refining_search_sad = vp9_refining_search_sad; - - - // Slow quant, dct and trellis not worthwhile for first pass - // so make sure they are always turned off. - if (oxcf->pass == 1) - sf->optimize_coefficients = 0; - - // No recode for 1 pass. - if (oxcf->pass == 0) { - sf->recode_loop = DISALLOW_RECODE; - sf->optimize_coefficients = 0; - } - - if (sf->mv.subpel_search_method == SUBPEL_TREE) { - cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_tree; - } else if (sf->mv.subpel_search_method == SUBPEL_TREE_PRUNED) { - cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_tree_pruned; - } - - cpi->mb.optimize = sf->optimize_coefficients == 1 && oxcf->pass != 1; - - if (sf->disable_split_mask == DISABLE_ALL_SPLIT) - sf->adaptive_pred_interp_filter = 0; - - if (!cpi->oxcf.frame_periodic_boost) { - sf->max_delta_qindex = 0; - } - - if (cpi->encode_breakout && oxcf->mode == REALTIME && - sf->encode_breakout_thresh > cpi->encode_breakout) - cpi->encode_breakout = sf->encode_breakout_thresh; -} diff --git a/media/libvpx/vp9/encoder/vp9_speed_features.h b/media/libvpx/vp9/encoder/vp9_speed_features.h deleted file mode 100644 index ed840086330..00000000000 --- a/media/libvpx/vp9/encoder/vp9_speed_features.h +++ /dev/null @@ -1,449 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_ENCODER_VP9_SPEED_FEATURES_H_ -#define VP9_ENCODER_VP9_SPEED_FEATURES_H_ - -#include "vp9/common/vp9_enums.h" - -#ifdef __cplusplus -extern "C" { -#endif - -enum { - INTRA_ALL = (1 << DC_PRED) | - (1 << V_PRED) | (1 << H_PRED) | - (1 << D45_PRED) | (1 << D135_PRED) | - (1 << D117_PRED) | (1 << D153_PRED) | - (1 << D207_PRED) | (1 << D63_PRED) | - (1 << TM_PRED), - INTRA_DC = (1 << DC_PRED), - INTRA_DC_TM = (1 << DC_PRED) | (1 << TM_PRED), - INTRA_DC_H_V = (1 << DC_PRED) | (1 << V_PRED) | (1 << H_PRED), - INTRA_DC_TM_H_V = (1 << DC_PRED) | (1 << TM_PRED) | (1 << V_PRED) | - (1 << H_PRED) -}; - -enum { - INTER_ALL = (1 << NEARESTMV) | (1 << NEARMV) | (1 << ZEROMV) | (1 << NEWMV), - INTER_NEAREST = (1 << NEARESTMV), - INTER_NEAREST_NEAR_NEW = (1 << NEARESTMV) | (1 << NEARMV) | (1 << NEWMV), - INTER_NEAREST_NEAR_ZERO = (1 << NEARESTMV) | (1 << NEARMV) | (1 << ZEROMV), -}; - -enum { - DISABLE_ALL_INTER_SPLIT = (1 << THR_COMP_GA) | - (1 << THR_COMP_LA) | - (1 << THR_ALTR) | - (1 << THR_GOLD) | - (1 << THR_LAST), - - DISABLE_ALL_SPLIT = (1 << THR_INTRA) | DISABLE_ALL_INTER_SPLIT, - - DISABLE_COMPOUND_SPLIT = (1 << THR_COMP_GA) | (1 << THR_COMP_LA), - - LAST_AND_INTRA_SPLIT_ONLY = (1 << THR_COMP_GA) | - (1 << THR_COMP_LA) | - (1 << THR_ALTR) | - (1 << THR_GOLD) -}; - -typedef enum { - DIAMOND = 0, - NSTEP = 1, - HEX = 2, - BIGDIA = 3, - SQUARE = 4, - FAST_HEX = 5, - FAST_DIAMOND = 6 -} SEARCH_METHODS; - -typedef enum { - // No recode. - DISALLOW_RECODE = 0, - // Allow recode for KF and exceeding maximum frame bandwidth. - ALLOW_RECODE_KFMAXBW = 1, - // Allow recode only for KF/ARF/GF frames. - ALLOW_RECODE_KFARFGF = 2, - // Allow recode for all frames based on bitrate constraints. - ALLOW_RECODE = 3, -} RECODE_LOOP_TYPE; - -typedef enum { - SUBPEL_TREE = 0, - SUBPEL_TREE_PRUNED = 1, - // Other methods to come -} SUBPEL_SEARCH_METHODS; - -typedef enum { - NO_MOTION_THRESHOLD = 0, - LOW_MOTION_THRESHOLD = 7 -} MOTION_THRESHOLD; - -typedef enum { - LAST_FRAME_PARTITION_OFF = 0, - LAST_FRAME_PARTITION_LOW_MOTION = 1, - LAST_FRAME_PARTITION_ALL = 2 -} LAST_FRAME_PARTITION_METHOD; - -typedef enum { - USE_FULL_RD = 0, - USE_LARGESTALL, - USE_TX_8X8 -} TX_SIZE_SEARCH_METHOD; - -typedef enum { - NOT_IN_USE = 0, - RELAXED_NEIGHBORING_MIN_MAX = 1, - CONSTRAIN_NEIGHBORING_MIN_MAX = 2, - STRICT_NEIGHBORING_MIN_MAX = 3 -} AUTO_MIN_MAX_MODE; - -typedef enum { - // Try the full image with different values. - LPF_PICK_FROM_FULL_IMAGE, - // Try a small portion of the image with different values. - LPF_PICK_FROM_SUBIMAGE, - // Estimate the level based on quantizer and frame type - LPF_PICK_FROM_Q, - // Pick 0 to disable LPF if LPF was enabled last frame - LPF_PICK_MINIMAL_LPF -} LPF_PICK_METHOD; - -typedef enum { - // Terminate search early based on distortion so far compared to - // qp step, distortion in the neighborhood of the frame, etc. - FLAG_EARLY_TERMINATE = 1 << 0, - - // Skips comp inter modes if the best so far is an intra mode. - FLAG_SKIP_COMP_BESTINTRA = 1 << 1, - - // Skips oblique intra modes if the best so far is an inter mode. - FLAG_SKIP_INTRA_BESTINTER = 1 << 3, - - // Skips oblique intra modes at angles 27, 63, 117, 153 if the best - // intra so far is not one of the neighboring directions. - FLAG_SKIP_INTRA_DIRMISMATCH = 1 << 4, - - // Skips intra modes other than DC_PRED if the source variance is small - FLAG_SKIP_INTRA_LOWVAR = 1 << 5, -} MODE_SEARCH_SKIP_LOGIC; - -typedef enum { - FLAG_SKIP_EIGHTTAP = 1 << EIGHTTAP, - FLAG_SKIP_EIGHTTAP_SMOOTH = 1 << EIGHTTAP_SMOOTH, - FLAG_SKIP_EIGHTTAP_SHARP = 1 << EIGHTTAP_SHARP, -} INTERP_FILTER_MASK; - -typedef enum { - // Search partitions using RD/NONRD criterion - SEARCH_PARTITION = 0, - - // Always use a fixed size partition - FIXED_PARTITION = 1, - - // Use a fixed size partition in every 64X64 SB, where the size is - // determined based on source variance - VAR_BASED_FIXED_PARTITION = 2, - - REFERENCE_PARTITION = 3, - - // Use an arbitrary partitioning scheme based on source variance within - // a 64X64 SB - VAR_BASED_PARTITION, - - // Use non-fixed partitions based on source variance - SOURCE_VAR_BASED_PARTITION -} PARTITION_SEARCH_TYPE; - -typedef enum { - // Does a dry run to see if any of the contexts need to be updated or not, - // before the final run. - TWO_LOOP = 0, - - // No dry run conducted. - ONE_LOOP = 1, - - // No dry run, also only half the coef contexts and bands are updated. - // The rest are not updated at all. - ONE_LOOP_REDUCED = 2 -} FAST_COEFF_UPDATE; - -typedef struct MV_SPEED_FEATURES { - // Motion search method (Diamond, NSTEP, Hex, Big Diamond, Square, etc). - SEARCH_METHODS search_method; - - // This parameter controls which step in the n-step process we start at. - // It's changed adaptively based on circumstances. - int reduce_first_step_size; - - // If this is set to 1, we limit the motion search range to 2 times the - // largest motion vector found in the last frame. - int auto_mv_step_size; - - // Subpel_search_method can only be subpel_tree which does a subpixel - // logarithmic search that keeps stepping at 1/2 pixel units until - // you stop getting a gain, and then goes on to 1/4 and repeats - // the same process. Along the way it skips many diagonals. - SUBPEL_SEARCH_METHODS subpel_search_method; - - // Maximum number of steps in logarithmic subpel search before giving up. - int subpel_iters_per_step; - - // Control when to stop subpel search - int subpel_force_stop; - - // This variable sets the step_param used in full pel motion search. - int fullpel_search_step_param; -} MV_SPEED_FEATURES; - -typedef struct SPEED_FEATURES { - MV_SPEED_FEATURES mv; - - // Frame level coding parameter update - int frame_parameter_update; - - RECODE_LOOP_TYPE recode_loop; - - // Trellis (dynamic programming) optimization of quantized values (+1, 0). - int optimize_coefficients; - - // Always set to 0. If on it enables 0 cost background transmission - // (except for the initial transmission of the segmentation). The feature is - // disabled because the addition of very large block sizes make the - // backgrounds very to cheap to encode, and the segmentation we have - // adds overhead. - int static_segmentation; - - // If 1 we iterate finding a best reference for 2 ref frames together - via - // a log search that iterates 4 times (check around mv for last for best - // error of combined predictor then check around mv for alt). If 0 we - // we just use the best motion vector found for each frame by itself. - BLOCK_SIZE comp_inter_joint_search_thresh; - - // This variable is used to cap the maximum number of times we skip testing a - // mode to be evaluated. A high value means we will be faster. - int adaptive_rd_thresh; - - // Enables skipping the reconstruction step (idct, recon) in the - // intermediate steps assuming the last frame didn't have too many intra - // blocks and the q is less than a threshold. - int skip_encode_sb; - int skip_encode_frame; - // Speed feature to allow or disallow skipping of recode at block - // level within a frame. - int allow_skip_recode; - - // This variable allows us to reuse the last frames partition choices - // (64x64 v 32x32 etc) for this frame. It can be set to only use the last - // frame as a starting point in low motion scenes or always use it. If set - // we use last partitioning_redo frequency to determine how often to redo - // the partitioning from scratch. Adjust_partitioning_from_last_frame - // enables us to adjust up or down one partitioning from the last frames - // partitioning. - LAST_FRAME_PARTITION_METHOD use_lastframe_partitioning; - - // The threshold is to determine how slow the motino is, it is used when - // use_lastframe_partitioning is set to LAST_FRAME_PARTITION_LOW_MOTION - MOTION_THRESHOLD lf_motion_threshold; - - // Determine which method we use to determine transform size. We can choose - // between options like full rd, largest for prediction size, largest - // for intra and model coefs for the rest. - TX_SIZE_SEARCH_METHOD tx_size_search_method; - - // Low precision 32x32 fdct keeps everything in 16 bits and thus is less - // precise but significantly faster than the non lp version. - int use_lp32x32fdct; - - // TODO(JBB): remove this as its no longer used. - - // After looking at the first set of modes (set by index here), skip - // checking modes for reference frames that don't match the reference frame - // of the best so far. - int mode_skip_start; - - // TODO(JBB): Remove this. - int reference_masking; - - PARTITION_SEARCH_TYPE partition_search_type; - - // Used if partition_search_type = FIXED_SIZE_PARTITION - BLOCK_SIZE always_this_block_size; - - // Skip rectangular partition test when partition type none gives better - // rd than partition type split. - int less_rectangular_check; - - // Disable testing non square partitions. (eg 16x32) - int use_square_partition_only; - - // Sets min and max partition sizes for this 64x64 region based on the - // same 64x64 in last encoded frame, and the left and above neighbor. - AUTO_MIN_MAX_MODE auto_min_max_partition_size; - - // Min and max partition size we enable (block_size) as per auto - // min max, but also used by adjust partitioning, and pick_partitioning. - BLOCK_SIZE min_partition_size; - BLOCK_SIZE max_partition_size; - - // Whether or not we allow partitions one smaller or one greater than the last - // frame's partitioning. Only used if use_lastframe_partitioning is set. - int adjust_partitioning_from_last_frame; - - // How frequently we re do the partitioning from scratch. Only used if - // use_lastframe_partitioning is set. - int last_partitioning_redo_frequency; - - // This enables constrained copy partitioning, which, given an input block - // size bsize, will copy previous partition for partitions less than bsize, - // otherwise bsize partition is used. bsize is currently set to 16x16. - // Used for the case where motion is detected in superblock. - int constrain_copy_partition; - - // Disables sub 8x8 blocksizes in different scenarios: Choices are to disable - // it always, to allow it for only Last frame and Intra, disable it for all - // inter modes or to enable it always. - int disable_split_mask; - - // TODO(jingning): combine the related motion search speed features - // This allows us to use motion search at other sizes as a starting - // point for this motion search and limits the search range around it. - int adaptive_motion_search; - - int schedule_mode_search; - - // Allows sub 8x8 modes to use the prediction filter that was determined - // best for 8x8 mode. If set to 0 we always re check all the filters for - // sizes less than 8x8, 1 means we check all filter modes if no 8x8 filter - // was selected, and 2 means we use 8 tap if no 8x8 filter mode was selected. - int adaptive_pred_interp_filter; - - // Adaptive prediction mode search - int adaptive_mode_search; - - // Chessboard pattern prediction filter type search - int cb_pred_filter_search; - - int cb_partition_search; - - int motion_field_mode_search; - - int alt_ref_search_fp; - - // Fast quantization process path - int use_quant_fp; - - // Search through variable block partition types in non-RD mode decision - // encoding process for RTC. - int partition_check; - - // Use finer quantizer in every other few frames that run variable block - // partition type search. - int force_frame_boost; - - // Maximally allowed base quantization index fluctuation. - int max_delta_qindex; - - // Implements various heuristics to skip searching modes - // The heuristics selected are based on flags - // defined in the MODE_SEARCH_SKIP_HEURISTICS enum - unsigned int mode_search_skip_flags; - - // A source variance threshold below which filter search is disabled - // Choose a very large value (UINT_MAX) to use 8-tap always - unsigned int disable_filter_search_var_thresh; - - // These bit masks allow you to enable or disable intra modes for each - // transform size separately. - int intra_y_mode_mask[TX_SIZES]; - int intra_uv_mode_mask[TX_SIZES]; - - // This variable enables an early break out of mode testing if the model for - // rd built from the prediction signal indicates a value that's much - // higher than the best rd we've seen so far. - int use_rd_breakout; - - // This enables us to use an estimate for intra rd based on dc mode rather - // than choosing an actual uv mode in the stage of encoding before the actual - // final encode. - int use_uv_intra_rd_estimate; - - // This feature controls how the loop filter level is determined. - LPF_PICK_METHOD lpf_pick; - - // This feature limits the number of coefficients updates we actually do - // by only looking at counts from 1/2 the bands. - FAST_COEFF_UPDATE use_fast_coef_updates; - - // This flag controls the use of non-RD mode decision. - int use_nonrd_pick_mode; - - // A binary mask indicating if NEARESTMV, NEARMV, ZEROMV, NEWMV - // modes are used in order from LSB to MSB for each BLOCK_SIZE. - int inter_mode_mask[BLOCK_SIZES]; - - // This feature controls whether we do the expensive context update and - // calculation in the rd coefficient costing loop. - int use_fast_coef_costing; - - // This feature controls the tolerence vs target used in deciding whether to - // recode a frame. It has no meaning if recode is disabled. - int recode_tolerance; - - // This variable controls the maximum block size where intra blocks can be - // used in inter frames. - // TODO(aconverse): Fold this into one of the other many mode skips - BLOCK_SIZE max_intra_bsize; - - // The frequency that we check if SOURCE_VAR_BASED_PARTITION or - // FIXED_PARTITION search type should be used. - int search_type_check_frequency; - - // When partition is pre-set, the inter prediction result from pick_inter_mode - // can be reused in final block encoding process. It is enabled only for real- - // time mode speed 6. - int reuse_inter_pred_sby; - - // This variable sets the encode_breakout threshold. Currently, it is only - // enabled in real time mode. - int encode_breakout_thresh; - - // In real time encoding, increase the threshold for NEWMV. - int elevate_newmv_thresh; - - // default interp filter choice - INTERP_FILTER default_interp_filter; - - // Early termination in transform size search, which only applies while - // tx_size_search_method is USE_FULL_RD. - int tx_size_search_breakout; - - // adaptive interp_filter search to allow skip of certain filter types. - int adaptive_interp_filter_search; - - // mask for skip evaluation of certain interp_filter type. - INTERP_FILTER_MASK interp_filter_search_mask; - - // Partition search early breakout thresholds. - int64_t partition_search_breakout_dist_thr; - int partition_search_breakout_rate_thr; -} SPEED_FEATURES; - -struct VP9_COMP; - -void vp9_set_speed_features(struct VP9_COMP *cpi); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_ENCODER_VP9_SPEED_FEATURES_H_ - diff --git a/media/libvpx/vp9/encoder/vp9_ssim.h b/media/libvpx/vp9/encoder/vp9_ssim.h deleted file mode 100644 index 28baa4b596c..00000000000 --- a/media/libvpx/vp9/encoder/vp9_ssim.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_ENCODER_VP9_SSIM_H_ -#define VP9_ENCODER_VP9_SSIM_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -#include "vpx_scale/yv12config.h" - -double vp9_calc_ssim(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, - double *weight); - -double vp9_calc_ssimg(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, - double *ssim_y, double *ssim_u, double *ssim_v); - -#if CONFIG_VP9_HIGHBITDEPTH -double vp9_highbd_calc_ssim(YV12_BUFFER_CONFIG *source, - YV12_BUFFER_CONFIG *dest, - double *weight, - unsigned int bd, - unsigned int shift); - -double vp9_highbd_calc_ssimg(YV12_BUFFER_CONFIG *source, - YV12_BUFFER_CONFIG *dest, - double *ssim_y, - double *ssim_u, - double *ssim_v, - unsigned int bps, - unsigned int shift); -#endif // CONFIG_VP9_HIGHBITDEPTH - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_ENCODER_VP9_SSIM_H_ diff --git a/media/libvpx/vp9/encoder/vp9_svc_layercontext.c b/media/libvpx/vp9/encoder/vp9_svc_layercontext.c deleted file mode 100644 index eed681c968a..00000000000 --- a/media/libvpx/vp9/encoder/vp9_svc_layercontext.c +++ /dev/null @@ -1,345 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include - -#include "vp9/encoder/vp9_encoder.h" -#include "vp9/encoder/vp9_svc_layercontext.h" -#include "vp9/encoder/vp9_extend.h" - -void vp9_init_layer_context(VP9_COMP *const cpi) { - SVC *const svc = &cpi->svc; - const VP9EncoderConfig *const oxcf = &cpi->oxcf; - int layer; - int layer_end; - int alt_ref_idx = svc->number_spatial_layers; - - svc->spatial_layer_id = 0; - svc->temporal_layer_id = 0; - - if (svc->number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) { - layer_end = svc->number_temporal_layers; - } else { - layer_end = svc->number_spatial_layers; - } - - for (layer = 0; layer < layer_end; ++layer) { - LAYER_CONTEXT *const lc = &svc->layer_context[layer]; - RATE_CONTROL *const lrc = &lc->rc; - int i; - lc->current_video_frame_in_layer = 0; - lc->layer_size = 0; - lc->frames_from_key_frame = 0; - lc->last_frame_type = FRAME_TYPES; - lrc->ni_av_qi = oxcf->worst_allowed_q; - lrc->total_actual_bits = 0; - lrc->total_target_vs_actual = 0; - lrc->ni_tot_qi = 0; - lrc->tot_q = 0.0; - lrc->avg_q = 0.0; - lrc->ni_frames = 0; - lrc->decimation_count = 0; - lrc->decimation_factor = 0; - - for (i = 0; i < RATE_FACTOR_LEVELS; ++i) { - lrc->rate_correction_factors[i] = 1.0; - } - - if (svc->number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) { - lc->target_bandwidth = oxcf->ts_target_bitrate[layer]; - lrc->last_q[INTER_FRAME] = oxcf->worst_allowed_q; - lrc->avg_frame_qindex[INTER_FRAME] = oxcf->worst_allowed_q; - } else { - lc->target_bandwidth = oxcf->ss_target_bitrate[layer]; - lrc->last_q[KEY_FRAME] = oxcf->best_allowed_q; - lrc->last_q[INTER_FRAME] = oxcf->best_allowed_q; - lrc->avg_frame_qindex[KEY_FRAME] = (oxcf->worst_allowed_q + - oxcf->best_allowed_q) / 2; - lrc->avg_frame_qindex[INTER_FRAME] = (oxcf->worst_allowed_q + - oxcf->best_allowed_q) / 2; - if (oxcf->ss_play_alternate[layer]) - lc->alt_ref_idx = alt_ref_idx++; - else - lc->alt_ref_idx = -1; - lc->gold_ref_idx = -1; - } - - lrc->buffer_level = oxcf->starting_buffer_level_ms * - lc->target_bandwidth / 1000; - lrc->bits_off_target = lrc->buffer_level; - } - - // Still have extra buffer for base layer golden frame - if (!(svc->number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) - && alt_ref_idx < REF_FRAMES) - svc->layer_context[0].gold_ref_idx = alt_ref_idx; -} - -// Update the layer context from a change_config() call. -void vp9_update_layer_context_change_config(VP9_COMP *const cpi, - const int target_bandwidth) { - SVC *const svc = &cpi->svc; - const VP9EncoderConfig *const oxcf = &cpi->oxcf; - const RATE_CONTROL *const rc = &cpi->rc; - int layer; - int layer_end; - float bitrate_alloc = 1.0; - - if (svc->number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) { - layer_end = svc->number_temporal_layers; - } else { - layer_end = svc->number_spatial_layers; - } - - for (layer = 0; layer < layer_end; ++layer) { - LAYER_CONTEXT *const lc = &svc->layer_context[layer]; - RATE_CONTROL *const lrc = &lc->rc; - - if (svc->number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) { - lc->target_bandwidth = oxcf->ts_target_bitrate[layer]; - } else { - lc->target_bandwidth = oxcf->ss_target_bitrate[layer]; - } - bitrate_alloc = (float)lc->target_bandwidth / target_bandwidth; - // Update buffer-related quantities. - lrc->starting_buffer_level = - (int64_t)(rc->starting_buffer_level * bitrate_alloc); - lrc->optimal_buffer_level = - (int64_t)(rc->optimal_buffer_level * bitrate_alloc); - lrc->maximum_buffer_size = - (int64_t)(rc->maximum_buffer_size * bitrate_alloc); - lrc->bits_off_target = MIN(lrc->bits_off_target, lrc->maximum_buffer_size); - lrc->buffer_level = MIN(lrc->buffer_level, lrc->maximum_buffer_size); - // Update framerate-related quantities. - if (svc->number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) { - lc->framerate = cpi->framerate / oxcf->ts_rate_decimator[layer]; - } else { - lc->framerate = cpi->framerate; - } - lrc->avg_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate); - lrc->max_frame_bandwidth = rc->max_frame_bandwidth; - // Update qp-related quantities. - lrc->worst_quality = rc->worst_quality; - lrc->best_quality = rc->best_quality; - } -} - -static LAYER_CONTEXT *get_layer_context(VP9_COMP *const cpi) { - return (cpi->svc.number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) ? - &cpi->svc.layer_context[cpi->svc.temporal_layer_id] : - &cpi->svc.layer_context[cpi->svc.spatial_layer_id]; -} - -void vp9_update_temporal_layer_framerate(VP9_COMP *const cpi) { - SVC *const svc = &cpi->svc; - const VP9EncoderConfig *const oxcf = &cpi->oxcf; - LAYER_CONTEXT *const lc = get_layer_context(cpi); - RATE_CONTROL *const lrc = &lc->rc; - const int layer = svc->temporal_layer_id; - - lc->framerate = cpi->framerate / oxcf->ts_rate_decimator[layer]; - lrc->avg_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate); - lrc->max_frame_bandwidth = cpi->rc.max_frame_bandwidth; - // Update the average layer frame size (non-cumulative per-frame-bw). - if (layer == 0) { - lc->avg_frame_size = lrc->avg_frame_bandwidth; - } else { - const double prev_layer_framerate = - cpi->framerate / oxcf->ts_rate_decimator[layer - 1]; - const int prev_layer_target_bandwidth = oxcf->ts_target_bitrate[layer - 1]; - lc->avg_frame_size = - (int)((lc->target_bandwidth - prev_layer_target_bandwidth) / - (lc->framerate - prev_layer_framerate)); - } -} - -void vp9_update_spatial_layer_framerate(VP9_COMP *const cpi, double framerate) { - const VP9EncoderConfig *const oxcf = &cpi->oxcf; - LAYER_CONTEXT *const lc = get_layer_context(cpi); - RATE_CONTROL *const lrc = &lc->rc; - - lc->framerate = framerate; - lrc->avg_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate); - lrc->min_frame_bandwidth = (int)(lrc->avg_frame_bandwidth * - oxcf->two_pass_vbrmin_section / 100); - lrc->max_frame_bandwidth = (int)(((int64_t)lrc->avg_frame_bandwidth * - oxcf->two_pass_vbrmax_section) / 100); - vp9_rc_set_gf_max_interval(cpi, lrc); -} - -void vp9_restore_layer_context(VP9_COMP *const cpi) { - LAYER_CONTEXT *const lc = get_layer_context(cpi); - const int old_frame_since_key = cpi->rc.frames_since_key; - const int old_frame_to_key = cpi->rc.frames_to_key; - - cpi->rc = lc->rc; - cpi->twopass = lc->twopass; - cpi->oxcf.target_bandwidth = lc->target_bandwidth; - cpi->alt_ref_source = lc->alt_ref_source; - // Reset the frames_since_key and frames_to_key counters to their values - // before the layer restore. Keep these defined for the stream (not layer). - if (cpi->svc.number_temporal_layers > 1) { - cpi->rc.frames_since_key = old_frame_since_key; - cpi->rc.frames_to_key = old_frame_to_key; - } -} - -void vp9_save_layer_context(VP9_COMP *const cpi) { - const VP9EncoderConfig *const oxcf = &cpi->oxcf; - LAYER_CONTEXT *const lc = get_layer_context(cpi); - - lc->rc = cpi->rc; - lc->twopass = cpi->twopass; - lc->target_bandwidth = (int)oxcf->target_bandwidth; - lc->alt_ref_source = cpi->alt_ref_source; -} - -void vp9_init_second_pass_spatial_svc(VP9_COMP *cpi) { - SVC *const svc = &cpi->svc; - int i; - - for (i = 0; i < svc->number_spatial_layers; ++i) { - TWO_PASS *const twopass = &svc->layer_context[i].twopass; - - svc->spatial_layer_id = i; - vp9_init_second_pass(cpi); - - twopass->total_stats.spatial_layer_id = i; - twopass->total_left_stats.spatial_layer_id = i; - } - svc->spatial_layer_id = 0; -} - -void vp9_inc_frame_in_layer(VP9_COMP *const cpi) { - LAYER_CONTEXT *const lc = - (cpi->svc.number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) ? - &cpi->svc.layer_context[cpi->svc.temporal_layer_id] : - &cpi->svc.layer_context[cpi->svc.spatial_layer_id]; - ++lc->current_video_frame_in_layer; - ++lc->frames_from_key_frame; -} - -int vp9_is_upper_layer_key_frame(const VP9_COMP *const cpi) { - return is_two_pass_svc(cpi) && - cpi->svc.spatial_layer_id > 0 && - cpi->svc.layer_context[cpi->svc.spatial_layer_id].is_key_frame; -} - -#if CONFIG_SPATIAL_SVC -static void get_layer_resolution(const int width_org, const int height_org, - const int num, const int den, - int *width_out, int *height_out) { - int w, h; - - if (width_out == NULL || height_out == NULL || den == 0) - return; - - w = width_org * num / den; - h = height_org * num / den; - - // make height and width even to make chrome player happy - w += w % 2; - h += h % 2; - - *width_out = w; - *height_out = h; -} - -int vp9_svc_start_frame(VP9_COMP *const cpi) { - int width = 0, height = 0; - LAYER_CONTEXT *lc; - int count = 1 << (cpi->svc.number_temporal_layers - 1); - - cpi->svc.spatial_layer_id = cpi->svc.spatial_layer_to_encode; - lc = &cpi->svc.layer_context[cpi->svc.spatial_layer_id]; - - cpi->svc.temporal_layer_id = 0; - while ((lc->current_video_frame_in_layer % count) != 0) { - ++cpi->svc.temporal_layer_id; - count >>= 1; - } - - cpi->ref_frame_flags = VP9_ALT_FLAG | VP9_GOLD_FLAG | VP9_LAST_FLAG; - - cpi->lst_fb_idx = cpi->svc.spatial_layer_id; - - if (cpi->svc.spatial_layer_id == 0) - cpi->gld_fb_idx = (lc->gold_ref_idx >= 0) ? - lc->gold_ref_idx : cpi->lst_fb_idx; - else - cpi->gld_fb_idx = cpi->svc.spatial_layer_id - 1; - - if (lc->current_video_frame_in_layer == 0) { - if (cpi->svc.spatial_layer_id >= 2) { - cpi->alt_fb_idx = cpi->svc.spatial_layer_id - 2; - } else { - cpi->alt_fb_idx = cpi->lst_fb_idx; - cpi->ref_frame_flags &= (~VP9_LAST_FLAG & ~VP9_ALT_FLAG); - } - } else { - if (cpi->oxcf.ss_play_alternate[cpi->svc.spatial_layer_id]) { - cpi->alt_fb_idx = lc->alt_ref_idx; - if (!lc->has_alt_frame) - cpi->ref_frame_flags &= (~VP9_ALT_FLAG); - } else { - // Find a proper alt_fb_idx for layers that don't have alt ref frame - if (cpi->svc.spatial_layer_id == 0) { - cpi->alt_fb_idx = cpi->lst_fb_idx; - } else { - LAYER_CONTEXT *lc_lower = - &cpi->svc.layer_context[cpi->svc.spatial_layer_id - 1]; - - if (cpi->oxcf.ss_play_alternate[cpi->svc.spatial_layer_id - 1] && - lc_lower->alt_ref_source != NULL) - cpi->alt_fb_idx = lc_lower->alt_ref_idx; - else if (cpi->svc.spatial_layer_id >= 2) - cpi->alt_fb_idx = cpi->svc.spatial_layer_id - 2; - else - cpi->alt_fb_idx = cpi->lst_fb_idx; - } - } - } - - get_layer_resolution(cpi->oxcf.width, cpi->oxcf.height, - lc->scaling_factor_num, lc->scaling_factor_den, - &width, &height); - if (vp9_set_size_literal(cpi, width, height) != 0) - return VPX_CODEC_INVALID_PARAM; - - cpi->oxcf.worst_allowed_q = vp9_quantizer_to_qindex(lc->max_q); - cpi->oxcf.best_allowed_q = vp9_quantizer_to_qindex(lc->min_q); - - vp9_change_config(cpi, &cpi->oxcf); - - vp9_set_high_precision_mv(cpi, 1); - - cpi->alt_ref_source = get_layer_context(cpi)->alt_ref_source; - - return 0; -} - -struct lookahead_entry *vp9_svc_lookahead_pop(VP9_COMP *const cpi, - struct lookahead_ctx *ctx, - int drain) { - struct lookahead_entry *buf = NULL; - - if (ctx->sz && (drain || ctx->sz == ctx->max_sz - MAX_PRE_FRAMES)) { - buf = vp9_lookahead_peek(ctx, 0); - if (buf != NULL) { - // Only remove the buffer when pop the highest layer. - if (cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1) { - vp9_lookahead_pop(ctx, drain); - } - } - } - - return buf; -} -#endif diff --git a/media/libvpx/vp9/encoder/vp9_svc_layercontext.h b/media/libvpx/vp9/encoder/vp9_svc_layercontext.h deleted file mode 100644 index 47a5456b619..00000000000 --- a/media/libvpx/vp9/encoder/vp9_svc_layercontext.h +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_ENCODER_VP9_SVC_LAYERCONTEXT_H_ -#define VP9_ENCODER_VP9_SVC_LAYERCONTEXT_H_ - -#include "vpx/vpx_encoder.h" - -#include "vp9/encoder/vp9_ratectrl.h" - -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct { - RATE_CONTROL rc; - int target_bandwidth; - double framerate; - int avg_frame_size; - int max_q; - int min_q; - int scaling_factor_num; - int scaling_factor_den; - TWO_PASS twopass; - vpx_fixed_buf_t rc_twopass_stats_in; - unsigned int current_video_frame_in_layer; - int is_key_frame; - int frames_from_key_frame; - FRAME_TYPE last_frame_type; - struct lookahead_entry *alt_ref_source; - int alt_ref_idx; - int gold_ref_idx; - int has_alt_frame; - size_t layer_size; - struct vpx_psnr_pkt psnr_pkt; -} LAYER_CONTEXT; - -typedef struct { - int spatial_layer_id; - int temporal_layer_id; - int number_spatial_layers; - int number_temporal_layers; - - int spatial_layer_to_encode; - - // Store scaled source frames to be used for temporal filter to generate - // a alt ref frame. - YV12_BUFFER_CONFIG scaled_frames[MAX_LAG_BUFFERS]; - - // Layer context used for rate control in one pass temporal CBR mode or - // two pass spatial mode. Defined for temporal or spatial layers for now. - // Does not support temporal combined with spatial RC. - LAYER_CONTEXT layer_context[MAX(VPX_TS_MAX_LAYERS, VPX_SS_MAX_LAYERS)]; -} SVC; - -struct VP9_COMP; - -// Initialize layer context data from init_config(). -void vp9_init_layer_context(struct VP9_COMP *const cpi); - -// Update the layer context from a change_config() call. -void vp9_update_layer_context_change_config(struct VP9_COMP *const cpi, - const int target_bandwidth); - -// Prior to encoding the frame, update framerate-related quantities -// for the current temporal layer. -void vp9_update_temporal_layer_framerate(struct VP9_COMP *const cpi); - -// Update framerate-related quantities for the current spatial layer. -void vp9_update_spatial_layer_framerate(struct VP9_COMP *const cpi, - double framerate); - -// Prior to encoding the frame, set the layer context, for the current layer -// to be encoded, to the cpi struct. -void vp9_restore_layer_context(struct VP9_COMP *const cpi); - -// Save the layer context after encoding the frame. -void vp9_save_layer_context(struct VP9_COMP *const cpi); - -// Initialize second pass rc for spatial svc. -void vp9_init_second_pass_spatial_svc(struct VP9_COMP *cpi); - -// Increment number of video frames in layer -void vp9_inc_frame_in_layer(struct VP9_COMP *const cpi); - -// Check if current layer is key frame in spatial upper layer -int vp9_is_upper_layer_key_frame(const struct VP9_COMP *const cpi); - -// Get the next source buffer to encode -struct lookahead_entry *vp9_svc_lookahead_pop(struct VP9_COMP *const cpi, - struct lookahead_ctx *ctx, - int drain); - -// Start a frame and initialize svc parameters -int vp9_svc_start_frame(struct VP9_COMP *const cpi); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_ENCODER_VP9_SVC_LAYERCONTEXT_ diff --git a/media/libvpx/vp9/encoder/vp9_variance.c b/media/libvpx/vp9/encoder/vp9_variance.c deleted file mode 100644 index c97f93fdaa9..00000000000 --- a/media/libvpx/vp9/encoder/vp9_variance.c +++ /dev/null @@ -1,641 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "./vp9_rtcd.h" - -#include "vpx_ports/mem.h" -#include "vpx/vpx_integer.h" - -#include "vp9/common/vp9_common.h" -#include "vp9/common/vp9_filter.h" - -#include "vp9/encoder/vp9_variance.h" - -void variance(const uint8_t *a, int a_stride, - const uint8_t *b, int b_stride, - int w, int h, unsigned int *sse, int *sum) { - int i, j; - - *sum = 0; - *sse = 0; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - const int diff = a[j] - b[j]; - *sum += diff; - *sse += diff * diff; - } - - a += a_stride; - b += b_stride; - } -} - -// Applies a 1-D 2-tap bi-linear filter to the source block in either horizontal -// or vertical direction to produce the filtered output block. Used to implement -// first-pass of 2-D separable filter. -// -// Produces int32_t output to retain precision for next pass. Two filter taps -// should sum to VP9_FILTER_WEIGHT. pixel_step defines whether the filter is -// applied horizontally (pixel_step=1) or vertically (pixel_step=stride). It -// defines the offset required to move from one input to the next. -static void var_filter_block2d_bil_first_pass(const uint8_t *src_ptr, - uint16_t *output_ptr, - unsigned int src_pixels_per_line, - int pixel_step, - unsigned int output_height, - unsigned int output_width, - const int16_t *vp9_filter) { - unsigned int i, j; - - for (i = 0; i < output_height; i++) { - for (j = 0; j < output_width; j++) { - output_ptr[j] = ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] + - (int)src_ptr[pixel_step] * vp9_filter[1], - FILTER_BITS); - - src_ptr++; - } - - // Next row... - src_ptr += src_pixels_per_line - output_width; - output_ptr += output_width; - } -} - -// Applies a 1-D 2-tap bi-linear filter to the source block in either horizontal -// or vertical direction to produce the filtered output block. Used to implement -// second-pass of 2-D separable filter. -// -// Requires 32-bit input as produced by filter_block2d_bil_first_pass. Two -// filter taps should sum to VP9_FILTER_WEIGHT. pixel_step defines whether the -// filter is applied horizontally (pixel_step=1) or vertically (pixel_step= -// stride). It defines the offset required to move from one input to the next. -static void var_filter_block2d_bil_second_pass(const uint16_t *src_ptr, - uint8_t *output_ptr, - unsigned int src_pixels_per_line, - unsigned int pixel_step, - unsigned int output_height, - unsigned int output_width, - const int16_t *vp9_filter) { - unsigned int i, j; - - for (i = 0; i < output_height; i++) { - for (j = 0; j < output_width; j++) { - output_ptr[j] = ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] + - (int)src_ptr[pixel_step] * vp9_filter[1], - FILTER_BITS); - src_ptr++; - } - - src_ptr += src_pixels_per_line - output_width; - output_ptr += output_width; - } -} - -unsigned int vp9_get_mb_ss_c(const int16_t *src_ptr) { - unsigned int i, sum = 0; - - for (i = 0; i < 256; ++i) { - sum += src_ptr[i] * src_ptr[i]; - } - - return sum; -} - -#define VAR(W, H) \ -unsigned int vp9_variance##W##x##H##_c(const uint8_t *a, int a_stride, \ - const uint8_t *b, int b_stride, \ - unsigned int *sse) { \ - int sum; \ - variance(a, a_stride, b, b_stride, W, H, sse, &sum); \ - return *sse - (((int64_t)sum * sum) / (W * H)); \ -} - -#define SUBPIX_VAR(W, H) \ -unsigned int vp9_sub_pixel_variance##W##x##H##_c( \ - const uint8_t *src, int src_stride, \ - int xoffset, int yoffset, \ - const uint8_t *dst, int dst_stride, \ - unsigned int *sse) { \ - uint16_t fdata3[(H + 1) * W]; \ - uint8_t temp2[H * W]; \ -\ - var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, W, \ - BILINEAR_FILTERS_2TAP(xoffset)); \ - var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ - BILINEAR_FILTERS_2TAP(yoffset)); \ -\ - return vp9_variance##W##x##H##_c(temp2, W, dst, dst_stride, sse); \ -} - -#define SUBPIX_AVG_VAR(W, H) \ -unsigned int vp9_sub_pixel_avg_variance##W##x##H##_c( \ - const uint8_t *src, int src_stride, \ - int xoffset, int yoffset, \ - const uint8_t *dst, int dst_stride, \ - unsigned int *sse, \ - const uint8_t *second_pred) { \ - uint16_t fdata3[(H + 1) * W]; \ - uint8_t temp2[H * W]; \ - DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, H * W); \ -\ - var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, W, \ - BILINEAR_FILTERS_2TAP(xoffset)); \ - var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ - BILINEAR_FILTERS_2TAP(yoffset)); \ -\ - vp9_comp_avg_pred(temp3, second_pred, W, H, temp2, W); \ -\ - return vp9_variance##W##x##H##_c(temp3, W, dst, dst_stride, sse); \ -} - -void vp9_get16x16var_c(const uint8_t *src_ptr, int source_stride, - const uint8_t *ref_ptr, int ref_stride, - unsigned int *sse, int *sum) { - variance(src_ptr, source_stride, ref_ptr, ref_stride, 16, 16, sse, sum); -} - -void vp9_get8x8var_c(const uint8_t *src_ptr, int source_stride, - const uint8_t *ref_ptr, int ref_stride, - unsigned int *sse, int *sum) { - variance(src_ptr, source_stride, ref_ptr, ref_stride, 8, 8, sse, sum); -} - -unsigned int vp9_mse16x16_c(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, - unsigned int *sse) { - int sum; - variance(src, src_stride, ref, ref_stride, 16, 16, sse, &sum); - return *sse; -} - -unsigned int vp9_mse16x8_c(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, - unsigned int *sse) { - int sum; - variance(src, src_stride, ref, ref_stride, 16, 8, sse, &sum); - return *sse; -} - -unsigned int vp9_mse8x16_c(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, - unsigned int *sse) { - int sum; - variance(src, src_stride, ref, ref_stride, 8, 16, sse, &sum); - return *sse; -} - -unsigned int vp9_mse8x8_c(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, - unsigned int *sse) { - int sum; - variance(src, src_stride, ref, ref_stride, 8, 8, sse, &sum); - return *sse; -} - -VAR(4, 4) -SUBPIX_VAR(4, 4) -SUBPIX_AVG_VAR(4, 4) - -VAR(4, 8) -SUBPIX_VAR(4, 8) -SUBPIX_AVG_VAR(4, 8) - -VAR(8, 4) -SUBPIX_VAR(8, 4) -SUBPIX_AVG_VAR(8, 4) - -VAR(8, 8) -SUBPIX_VAR(8, 8) -SUBPIX_AVG_VAR(8, 8) - -VAR(8, 16) -SUBPIX_VAR(8, 16) -SUBPIX_AVG_VAR(8, 16) - -VAR(16, 8) -SUBPIX_VAR(16, 8) -SUBPIX_AVG_VAR(16, 8) - -VAR(16, 16) -SUBPIX_VAR(16, 16) -SUBPIX_AVG_VAR(16, 16) - -VAR(16, 32) -SUBPIX_VAR(16, 32) -SUBPIX_AVG_VAR(16, 32) - -VAR(32, 16) -SUBPIX_VAR(32, 16) -SUBPIX_AVG_VAR(32, 16) - -VAR(32, 32) -SUBPIX_VAR(32, 32) -SUBPIX_AVG_VAR(32, 32) - -VAR(32, 64) -SUBPIX_VAR(32, 64) -SUBPIX_AVG_VAR(32, 64) - -VAR(64, 32) -SUBPIX_VAR(64, 32) -SUBPIX_AVG_VAR(64, 32) - -VAR(64, 64) -SUBPIX_VAR(64, 64) -SUBPIX_AVG_VAR(64, 64) - -void vp9_comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width, - int height, const uint8_t *ref, int ref_stride) { - int i, j; - - for (i = 0; i < height; i++) { - for (j = 0; j < width; j++) { - const int tmp = pred[j] + ref[j]; - comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1); - } - comp_pred += width; - pred += width; - ref += ref_stride; - } -} - -#if CONFIG_VP9_HIGHBITDEPTH -void high_variance64(const uint8_t *a8, int a_stride, - const uint8_t *b8, int b_stride, - int w, int h, uint64_t *sse, - uint64_t *sum) { - int i, j; - - uint16_t *a = CONVERT_TO_SHORTPTR(a8); - uint16_t *b = CONVERT_TO_SHORTPTR(b8); - *sum = 0; - *sse = 0; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - const int diff = a[j] - b[j]; - *sum += diff; - *sse += diff * diff; - } - a += a_stride; - b += b_stride; - } -} - -void high_variance(const uint8_t *a8, int a_stride, - const uint8_t *b8, int b_stride, - int w, int h, unsigned int *sse, - int *sum) { - uint64_t sse_long = 0; - uint64_t sum_long = 0; - high_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long); - *sse = sse_long; - *sum = sum_long; -} - -void high_10_variance(const uint8_t *a8, int a_stride, - const uint8_t *b8, int b_stride, - int w, int h, unsigned int *sse, - int *sum) { - uint64_t sse_long = 0; - uint64_t sum_long = 0; - high_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long); - *sum = ROUND_POWER_OF_TWO(sum_long, 2); - *sse = ROUND_POWER_OF_TWO(sse_long, 4); -} - -void high_12_variance(const uint8_t *a8, int a_stride, - const uint8_t *b8, int b_stride, - int w, int h, unsigned int *sse, - int *sum) { - uint64_t sse_long = 0; - uint64_t sum_long = 0; - high_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long); - *sum = ROUND_POWER_OF_TWO(sum_long, 4); - *sse = ROUND_POWER_OF_TWO(sse_long, 8); -} - -static void high_var_filter_block2d_bil_first_pass( - const uint8_t *src_ptr8, - uint16_t *output_ptr, - unsigned int src_pixels_per_line, - int pixel_step, - unsigned int output_height, - unsigned int output_width, - const int16_t *vp9_filter) { - unsigned int i, j; - uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src_ptr8); - for (i = 0; i < output_height; i++) { - for (j = 0; j < output_width; j++) { - output_ptr[j] = - ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] + - (int)src_ptr[pixel_step] * vp9_filter[1], - FILTER_BITS); - - src_ptr++; - } - - // Next row... - src_ptr += src_pixels_per_line - output_width; - output_ptr += output_width; - } -} - -static void high_var_filter_block2d_bil_second_pass( - const uint16_t *src_ptr, - uint16_t *output_ptr, - unsigned int src_pixels_per_line, - unsigned int pixel_step, - unsigned int output_height, - unsigned int output_width, - const int16_t *vp9_filter) { - unsigned int i, j; - - for (i = 0; i < output_height; i++) { - for (j = 0; j < output_width; j++) { - output_ptr[j] = - ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] + - (int)src_ptr[pixel_step] * vp9_filter[1], - FILTER_BITS); - src_ptr++; - } - - src_ptr += src_pixels_per_line - output_width; - output_ptr += output_width; - } -} - -#define HIGH_VAR(W, H) \ -unsigned int vp9_high_variance##W##x##H##_c(const uint8_t *a, int a_stride, \ - const uint8_t *b, int b_stride, \ - unsigned int *sse) { \ - int sum; \ - high_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \ - return *sse - (((int64_t)sum * sum) / (W * H)); \ -} \ -\ -unsigned int vp9_high_10_variance##W##x##H##_c(const uint8_t *a, int a_stride, \ - const uint8_t *b, int b_stride, \ - unsigned int *sse) { \ - int sum; \ - high_10_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \ - return *sse - (((int64_t)sum * sum) / (W * H)); \ -} \ -\ -unsigned int vp9_high_12_variance##W##x##H##_c(const uint8_t *a, int a_stride, \ - const uint8_t *b, int b_stride, \ - unsigned int *sse) { \ - int sum; \ - high_12_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \ - return *sse - (((int64_t)sum * sum) / (W * H)); \ -} - -#define HIGH_SUBPIX_VAR(W, H) \ -unsigned int vp9_high_sub_pixel_variance##W##x##H##_c( \ - const uint8_t *src, int src_stride, \ - int xoffset, int yoffset, \ - const uint8_t *dst, int dst_stride, \ - unsigned int *sse) { \ - uint16_t fdata3[(H + 1) * W]; \ - uint16_t temp2[H * W]; \ -\ - high_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \ - W, BILINEAR_FILTERS_2TAP(xoffset)); \ - high_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ - BILINEAR_FILTERS_2TAP(yoffset)); \ -\ - return vp9_high_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, dst, \ - dst_stride, sse); \ -} \ -\ -unsigned int vp9_high_10_sub_pixel_variance##W##x##H##_c( \ - const uint8_t *src, int src_stride, \ - int xoffset, int yoffset, \ - const uint8_t *dst, int dst_stride, \ - unsigned int *sse) { \ - uint16_t fdata3[(H + 1) * W]; \ - uint16_t temp2[H * W]; \ -\ - high_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \ - W, BILINEAR_FILTERS_2TAP(xoffset)); \ - high_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ - BILINEAR_FILTERS_2TAP(yoffset)); \ -\ - return vp9_high_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, dst, \ - dst_stride, sse); \ -} \ -\ -unsigned int vp9_high_12_sub_pixel_variance##W##x##H##_c( \ - const uint8_t *src, int src_stride, \ - int xoffset, int yoffset, \ - const uint8_t *dst, int dst_stride, \ - unsigned int *sse) { \ - uint16_t fdata3[(H + 1) * W]; \ - uint16_t temp2[H * W]; \ -\ - high_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \ - W, BILINEAR_FILTERS_2TAP(xoffset)); \ - high_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ - BILINEAR_FILTERS_2TAP(yoffset)); \ -\ - return vp9_high_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, dst, \ - dst_stride, sse); \ -} - -#define HIGH_SUBPIX_AVG_VAR(W, H) \ -unsigned int vp9_high_sub_pixel_avg_variance##W##x##H##_c( \ - const uint8_t *src, int src_stride, \ - int xoffset, int yoffset, \ - const uint8_t *dst, int dst_stride, \ - unsigned int *sse, \ - const uint8_t *second_pred) { \ - uint16_t fdata3[(H + 1) * W]; \ - uint16_t temp2[H * W]; \ - DECLARE_ALIGNED_ARRAY(16, uint16_t, temp3, H * W); \ -\ - high_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \ - W, BILINEAR_FILTERS_2TAP(xoffset)); \ - high_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ - BILINEAR_FILTERS_2TAP(yoffset)); \ -\ - vp9_high_comp_avg_pred(temp3, second_pred, W, H, CONVERT_TO_BYTEPTR(temp2), \ - W); \ -\ - return vp9_high_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, dst, \ - dst_stride, sse); \ -} \ -\ -unsigned int vp9_high_10_sub_pixel_avg_variance##W##x##H##_c( \ - const uint8_t *src, int src_stride, \ - int xoffset, int yoffset, \ - const uint8_t *dst, int dst_stride, \ - unsigned int *sse, \ - const uint8_t *second_pred) { \ - uint16_t fdata3[(H + 1) * W]; \ - uint16_t temp2[H * W]; \ - DECLARE_ALIGNED_ARRAY(16, uint16_t, temp3, H * W); \ -\ - high_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \ - W, BILINEAR_FILTERS_2TAP(xoffset)); \ - high_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ - BILINEAR_FILTERS_2TAP(yoffset)); \ -\ - vp9_high_comp_avg_pred(temp3, second_pred, W, H, CONVERT_TO_BYTEPTR(temp2), \ - W); \ -\ - return vp9_high_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, dst, \ - dst_stride, sse); \ -} \ -\ -unsigned int vp9_high_12_sub_pixel_avg_variance##W##x##H##_c( \ - const uint8_t *src, int src_stride, \ - int xoffset, int yoffset, \ - const uint8_t *dst, int dst_stride, \ - unsigned int *sse, \ - const uint8_t *second_pred) { \ - uint16_t fdata3[(H + 1) * W]; \ - uint16_t temp2[H * W]; \ - DECLARE_ALIGNED_ARRAY(16, uint16_t, temp3, H * W); \ -\ - high_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \ - W, BILINEAR_FILTERS_2TAP(xoffset)); \ - high_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ - BILINEAR_FILTERS_2TAP(yoffset)); \ -\ - vp9_high_comp_avg_pred(temp3, second_pred, W, H, CONVERT_TO_BYTEPTR(temp2), \ - W); \ -\ - return vp9_high_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, dst, \ - dst_stride, sse); \ -} - -#define HIGH_GET_VAR(S) \ -void vp9_high_get##S##x##S##var_c(const uint8_t *src, int src_stride, \ - const uint8_t *ref, int ref_stride, \ - unsigned int *sse, int *sum) { \ - high_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \ -} \ -\ -void vp9_high_10_get##S##x##S##var_c(const uint8_t *src, int src_stride, \ - const uint8_t *ref, int ref_stride, \ - unsigned int *sse, int *sum) { \ - high_10_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \ -} \ -\ -void vp9_high_12_get##S##x##S##var_c(const uint8_t *src, int src_stride, \ - const uint8_t *ref, int ref_stride, \ - unsigned int *sse, int *sum) { \ - high_12_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \ -} - -#define HIGH_MSE(W, H) \ -unsigned int vp9_high_mse##W##x##H##_c(const uint8_t *src, int src_stride, \ - const uint8_t *ref, int ref_stride, \ - unsigned int *sse) { \ - int sum; \ - high_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \ - return *sse; \ -} \ -\ -unsigned int vp9_high_10_mse##W##x##H##_c(const uint8_t *src, int src_stride, \ - const uint8_t *ref, int ref_stride, \ - unsigned int *sse) { \ - int sum; \ - high_10_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \ - return *sse; \ -} \ -\ -unsigned int vp9_high_12_mse##W##x##H##_c(const uint8_t *src, int src_stride, \ - const uint8_t *ref, int ref_stride, \ - unsigned int *sse) { \ - int sum; \ - high_12_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \ - return *sse; \ -} - -HIGH_GET_VAR(8) -HIGH_GET_VAR(16) - -HIGH_MSE(16, 16) -HIGH_MSE(16, 8) -HIGH_MSE(8, 16) -HIGH_MSE(8, 8) - -HIGH_VAR(4, 4) -HIGH_SUBPIX_VAR(4, 4) -HIGH_SUBPIX_AVG_VAR(4, 4) - -HIGH_VAR(4, 8) -HIGH_SUBPIX_VAR(4, 8) -HIGH_SUBPIX_AVG_VAR(4, 8) - -HIGH_VAR(8, 4) -HIGH_SUBPIX_VAR(8, 4) -HIGH_SUBPIX_AVG_VAR(8, 4) - -HIGH_VAR(8, 8) -HIGH_SUBPIX_VAR(8, 8) -HIGH_SUBPIX_AVG_VAR(8, 8) - -HIGH_VAR(8, 16) -HIGH_SUBPIX_VAR(8, 16) -HIGH_SUBPIX_AVG_VAR(8, 16) - -HIGH_VAR(16, 8) -HIGH_SUBPIX_VAR(16, 8) -HIGH_SUBPIX_AVG_VAR(16, 8) - -HIGH_VAR(16, 16) -HIGH_SUBPIX_VAR(16, 16) -HIGH_SUBPIX_AVG_VAR(16, 16) - -HIGH_VAR(16, 32) -HIGH_SUBPIX_VAR(16, 32) -HIGH_SUBPIX_AVG_VAR(16, 32) - -HIGH_VAR(32, 16) -HIGH_SUBPIX_VAR(32, 16) -HIGH_SUBPIX_AVG_VAR(32, 16) - -HIGH_VAR(32, 32) -HIGH_SUBPIX_VAR(32, 32) -HIGH_SUBPIX_AVG_VAR(32, 32) - -HIGH_VAR(32, 64) -HIGH_SUBPIX_VAR(32, 64) -HIGH_SUBPIX_AVG_VAR(32, 64) - -HIGH_VAR(64, 32) -HIGH_SUBPIX_VAR(64, 32) -HIGH_SUBPIX_AVG_VAR(64, 32) - -HIGH_VAR(64, 64) -HIGH_SUBPIX_VAR(64, 64) -HIGH_SUBPIX_AVG_VAR(64, 64) - -void vp9_high_comp_avg_pred(uint16_t *comp_pred, const uint8_t *pred8, - int width, int height, const uint8_t *ref8, - int ref_stride) { - int i, j; - uint16_t *pred = CONVERT_TO_SHORTPTR(pred8); - uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); - for (i = 0; i < height; i++) { - for (j = 0; j < width; j++) { - const int tmp = pred[j] + ref[j]; - comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1); - } - comp_pred += width; - pred += width; - ref += ref_stride; - } -} -#endif // CONFIG_VP9_HIGHBITDEPTH diff --git a/media/libvpx/vp9/encoder/vp9_write_bit_buffer.c b/media/libvpx/vp9/encoder/vp9_write_bit_buffer.c deleted file mode 100644 index 6d55e84e869..00000000000 --- a/media/libvpx/vp9/encoder/vp9_write_bit_buffer.c +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright (c) 2013 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include -#include "vp9/encoder/vp9_write_bit_buffer.h" - -size_t vp9_wb_bytes_written(const struct vp9_write_bit_buffer *wb) { - return wb->bit_offset / CHAR_BIT + (wb->bit_offset % CHAR_BIT > 0); -} - -void vp9_wb_write_bit(struct vp9_write_bit_buffer *wb, int bit) { - const int off = (int)wb->bit_offset; - const int p = off / CHAR_BIT; - const int q = CHAR_BIT - 1 - off % CHAR_BIT; - if (q == CHAR_BIT -1) { - wb->bit_buffer[p] = bit << q; - } else { - wb->bit_buffer[p] &= ~(1 << q); - wb->bit_buffer[p] |= bit << q; - } - wb->bit_offset = off + 1; -} - -void vp9_wb_write_literal(struct vp9_write_bit_buffer *wb, int data, int bits) { - int bit; - for (bit = bits - 1; bit >= 0; bit--) - vp9_wb_write_bit(wb, (data >> bit) & 1); -} diff --git a/media/libvpx/vp9/encoder/vp9_writer.c b/media/libvpx/vp9/encoder/vp9_writer.c deleted file mode 100644 index ff461f218a4..00000000000 --- a/media/libvpx/vp9/encoder/vp9_writer.c +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include -#include "vp9/encoder/vp9_writer.h" -#include "vp9/common/vp9_entropy.h" - -void vp9_start_encode(vp9_writer *br, uint8_t *source) { - br->lowvalue = 0; - br->range = 255; - br->count = -24; - br->buffer = source; - br->pos = 0; - vp9_write_bit(br, 0); -} - -void vp9_stop_encode(vp9_writer *br) { - int i; - - for (i = 0; i < 32; i++) - vp9_write_bit(br, 0); - - // Ensure there's no ambigous collision with any index marker bytes - if ((br->buffer[br->pos - 1] & 0xe0) == 0xc0) - br->buffer[br->pos++] = 0; -} - diff --git a/media/libvpx/vp9/encoder/vp9_writer.h b/media/libvpx/vp9/encoder/vp9_writer.h deleted file mode 100644 index 9d161f95cf6..00000000000 --- a/media/libvpx/vp9/encoder/vp9_writer.h +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_ENCODER_VP9_WRITER_H_ -#define VP9_ENCODER_VP9_WRITER_H_ - -#include "vpx_ports/mem.h" - -#include "vp9/common/vp9_prob.h" - -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct { - unsigned int lowvalue; - unsigned int range; - int count; - unsigned int pos; - uint8_t *buffer; -} vp9_writer; - -void vp9_start_encode(vp9_writer *bc, uint8_t *buffer); -void vp9_stop_encode(vp9_writer *bc); - -static INLINE void vp9_write(vp9_writer *br, int bit, int probability) { - unsigned int split; - int count = br->count; - unsigned int range = br->range; - unsigned int lowvalue = br->lowvalue; - register unsigned int shift; - - split = 1 + (((range - 1) * probability) >> 8); - - range = split; - - if (bit) { - lowvalue += split; - range = br->range - split; - } - - shift = vp9_norm[range]; - - range <<= shift; - count += shift; - - if (count >= 0) { - int offset = shift - count; - - if ((lowvalue << (offset - 1)) & 0x80000000) { - int x = br->pos - 1; - - while (x >= 0 && br->buffer[x] == 0xff) { - br->buffer[x] = 0; - x--; - } - - br->buffer[x] += 1; - } - - br->buffer[br->pos++] = (lowvalue >> (24 - offset)); - lowvalue <<= offset; - shift = count; - lowvalue &= 0xffffff; - count -= 8; - } - - lowvalue <<= shift; - br->count = count; - br->lowvalue = lowvalue; - br->range = range; -} - -static INLINE void vp9_write_bit(vp9_writer *w, int bit) { - vp9_write(w, bit, 128); // vp9_prob_half -} - -static INLINE void vp9_write_literal(vp9_writer *w, int data, int bits) { - int bit; - - for (bit = bits - 1; bit >= 0; bit--) - vp9_write_bit(w, 1 & (data >> bit)); -} - -#define vp9_write_prob(w, v) vp9_write_literal((w), (v), 8) - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_ENCODER_VP9_WRITER_H_ diff --git a/media/libvpx/vp9/encoder/x86/vp9_dct32x32_avx2.c b/media/libvpx/vp9/encoder/x86/vp9_dct32x32_avx2.c deleted file mode 100644 index 9ea22fed2b7..00000000000 --- a/media/libvpx/vp9/encoder/x86/vp9_dct32x32_avx2.c +++ /dev/null @@ -1,2710 +0,0 @@ -/* - * Copyright (c) 2012 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include // AVX2 -#include "vp9/common/vp9_idct.h" // for cospi constants -#include "vpx_ports/mem.h" - -#define pair256_set_epi16(a, b) \ - _mm256_set_epi16(b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a) - -#define pair256_set_epi32(a, b) \ - _mm256_set_epi32(b, a, b, a, b, a, b, a) - - - - -#if FDCT32x32_HIGH_PRECISION -static INLINE __m256i k_madd_epi32_avx2(__m256i a, __m256i b) { - __m256i buf0, buf1; - buf0 = _mm256_mul_epu32(a, b); - a = _mm256_srli_epi64(a, 32); - b = _mm256_srli_epi64(b, 32); - buf1 = _mm256_mul_epu32(a, b); - return _mm256_add_epi64(buf0, buf1); -} - -static INLINE __m256i k_packs_epi64_avx2(__m256i a, __m256i b) { - __m256i buf0 = _mm256_shuffle_epi32(a, _MM_SHUFFLE(0, 0, 2, 0)); - __m256i buf1 = _mm256_shuffle_epi32(b, _MM_SHUFFLE(0, 0, 2, 0)); - return _mm256_unpacklo_epi64(buf0, buf1); -} -#endif - -void FDCT32x32_2D_AVX2(const int16_t *input, - int16_t *output_org, int stride) { - // Calculate pre-multiplied strides - const int str1 = stride; - const int str2 = 2 * stride; - const int str3 = 2 * stride + str1; - // We need an intermediate buffer between passes. - DECLARE_ALIGNED(32, int16_t, intermediate[32 * 32]); - // Constants - // When we use them, in one case, they are all the same. In all others - // it's a pair of them that we need to repeat four times. This is done - // by constructing the 32 bit constant corresponding to that pair. - const __m256i k__cospi_p16_p16 = _mm256_set1_epi16(+cospi_16_64); - const __m256i k__cospi_p16_m16 = pair256_set_epi16(+cospi_16_64, -cospi_16_64); - const __m256i k__cospi_m08_p24 = pair256_set_epi16(-cospi_8_64, cospi_24_64); - const __m256i k__cospi_m24_m08 = pair256_set_epi16(-cospi_24_64, -cospi_8_64); - const __m256i k__cospi_p24_p08 = pair256_set_epi16(+cospi_24_64, cospi_8_64); - const __m256i k__cospi_p12_p20 = pair256_set_epi16(+cospi_12_64, cospi_20_64); - const __m256i k__cospi_m20_p12 = pair256_set_epi16(-cospi_20_64, cospi_12_64); - const __m256i k__cospi_m04_p28 = pair256_set_epi16(-cospi_4_64, cospi_28_64); - const __m256i k__cospi_p28_p04 = pair256_set_epi16(+cospi_28_64, cospi_4_64); - const __m256i k__cospi_m28_m04 = pair256_set_epi16(-cospi_28_64, -cospi_4_64); - const __m256i k__cospi_m12_m20 = pair256_set_epi16(-cospi_12_64, -cospi_20_64); - const __m256i k__cospi_p30_p02 = pair256_set_epi16(+cospi_30_64, cospi_2_64); - const __m256i k__cospi_p14_p18 = pair256_set_epi16(+cospi_14_64, cospi_18_64); - const __m256i k__cospi_p22_p10 = pair256_set_epi16(+cospi_22_64, cospi_10_64); - const __m256i k__cospi_p06_p26 = pair256_set_epi16(+cospi_6_64, cospi_26_64); - const __m256i k__cospi_m26_p06 = pair256_set_epi16(-cospi_26_64, cospi_6_64); - const __m256i k__cospi_m10_p22 = pair256_set_epi16(-cospi_10_64, cospi_22_64); - const __m256i k__cospi_m18_p14 = pair256_set_epi16(-cospi_18_64, cospi_14_64); - const __m256i k__cospi_m02_p30 = pair256_set_epi16(-cospi_2_64, cospi_30_64); - const __m256i k__cospi_p31_p01 = pair256_set_epi16(+cospi_31_64, cospi_1_64); - const __m256i k__cospi_p15_p17 = pair256_set_epi16(+cospi_15_64, cospi_17_64); - const __m256i k__cospi_p23_p09 = pair256_set_epi16(+cospi_23_64, cospi_9_64); - const __m256i k__cospi_p07_p25 = pair256_set_epi16(+cospi_7_64, cospi_25_64); - const __m256i k__cospi_m25_p07 = pair256_set_epi16(-cospi_25_64, cospi_7_64); - const __m256i k__cospi_m09_p23 = pair256_set_epi16(-cospi_9_64, cospi_23_64); - const __m256i k__cospi_m17_p15 = pair256_set_epi16(-cospi_17_64, cospi_15_64); - const __m256i k__cospi_m01_p31 = pair256_set_epi16(-cospi_1_64, cospi_31_64); - const __m256i k__cospi_p27_p05 = pair256_set_epi16(+cospi_27_64, cospi_5_64); - const __m256i k__cospi_p11_p21 = pair256_set_epi16(+cospi_11_64, cospi_21_64); - const __m256i k__cospi_p19_p13 = pair256_set_epi16(+cospi_19_64, cospi_13_64); - const __m256i k__cospi_p03_p29 = pair256_set_epi16(+cospi_3_64, cospi_29_64); - const __m256i k__cospi_m29_p03 = pair256_set_epi16(-cospi_29_64, cospi_3_64); - const __m256i k__cospi_m13_p19 = pair256_set_epi16(-cospi_13_64, cospi_19_64); - const __m256i k__cospi_m21_p11 = pair256_set_epi16(-cospi_21_64, cospi_11_64); - const __m256i k__cospi_m05_p27 = pair256_set_epi16(-cospi_5_64, cospi_27_64); - const __m256i k__DCT_CONST_ROUNDING = _mm256_set1_epi32(DCT_CONST_ROUNDING); - const __m256i kZero = _mm256_set1_epi16(0); - const __m256i kOne = _mm256_set1_epi16(1); - // Do the two transform/transpose passes - int pass; - for (pass = 0; pass < 2; ++pass) { - // We process sixteen columns (transposed rows in second pass) at a time. - int column_start; - for (column_start = 0; column_start < 32; column_start += 16) { - __m256i step1[32]; - __m256i step2[32]; - __m256i step3[32]; - __m256i out[32]; - // Stage 1 - // Note: even though all the loads below are aligned, using the aligned - // intrinsic make the code slightly slower. - if (0 == pass) { - const int16_t *in = &input[column_start]; - // step1[i] = (in[ 0 * stride] + in[(32 - 1) * stride]) << 2; - // Note: the next four blocks could be in a loop. That would help the - // instruction cache but is actually slower. - { - const int16_t *ina = in + 0 * str1; - const int16_t *inb = in + 31 * str1; - __m256i *step1a = &step1[ 0]; - __m256i *step1b = &step1[31]; - const __m256i ina0 = _mm256_loadu_si256((const __m256i *)(ina)); - const __m256i ina1 = _mm256_loadu_si256((const __m256i *)(ina + str1)); - const __m256i ina2 = _mm256_loadu_si256((const __m256i *)(ina + str2)); - const __m256i ina3 = _mm256_loadu_si256((const __m256i *)(ina + str3)); - const __m256i inb3 = _mm256_loadu_si256((const __m256i *)(inb - str3)); - const __m256i inb2 = _mm256_loadu_si256((const __m256i *)(inb - str2)); - const __m256i inb1 = _mm256_loadu_si256((const __m256i *)(inb - str1)); - const __m256i inb0 = _mm256_loadu_si256((const __m256i *)(inb)); - step1a[ 0] = _mm256_add_epi16(ina0, inb0); - step1a[ 1] = _mm256_add_epi16(ina1, inb1); - step1a[ 2] = _mm256_add_epi16(ina2, inb2); - step1a[ 3] = _mm256_add_epi16(ina3, inb3); - step1b[-3] = _mm256_sub_epi16(ina3, inb3); - step1b[-2] = _mm256_sub_epi16(ina2, inb2); - step1b[-1] = _mm256_sub_epi16(ina1, inb1); - step1b[-0] = _mm256_sub_epi16(ina0, inb0); - step1a[ 0] = _mm256_slli_epi16(step1a[ 0], 2); - step1a[ 1] = _mm256_slli_epi16(step1a[ 1], 2); - step1a[ 2] = _mm256_slli_epi16(step1a[ 2], 2); - step1a[ 3] = _mm256_slli_epi16(step1a[ 3], 2); - step1b[-3] = _mm256_slli_epi16(step1b[-3], 2); - step1b[-2] = _mm256_slli_epi16(step1b[-2], 2); - step1b[-1] = _mm256_slli_epi16(step1b[-1], 2); - step1b[-0] = _mm256_slli_epi16(step1b[-0], 2); - } - { - const int16_t *ina = in + 4 * str1; - const int16_t *inb = in + 27 * str1; - __m256i *step1a = &step1[ 4]; - __m256i *step1b = &step1[27]; - const __m256i ina0 = _mm256_loadu_si256((const __m256i *)(ina)); - const __m256i ina1 = _mm256_loadu_si256((const __m256i *)(ina + str1)); - const __m256i ina2 = _mm256_loadu_si256((const __m256i *)(ina + str2)); - const __m256i ina3 = _mm256_loadu_si256((const __m256i *)(ina + str3)); - const __m256i inb3 = _mm256_loadu_si256((const __m256i *)(inb - str3)); - const __m256i inb2 = _mm256_loadu_si256((const __m256i *)(inb - str2)); - const __m256i inb1 = _mm256_loadu_si256((const __m256i *)(inb - str1)); - const __m256i inb0 = _mm256_loadu_si256((const __m256i *)(inb)); - step1a[ 0] = _mm256_add_epi16(ina0, inb0); - step1a[ 1] = _mm256_add_epi16(ina1, inb1); - step1a[ 2] = _mm256_add_epi16(ina2, inb2); - step1a[ 3] = _mm256_add_epi16(ina3, inb3); - step1b[-3] = _mm256_sub_epi16(ina3, inb3); - step1b[-2] = _mm256_sub_epi16(ina2, inb2); - step1b[-1] = _mm256_sub_epi16(ina1, inb1); - step1b[-0] = _mm256_sub_epi16(ina0, inb0); - step1a[ 0] = _mm256_slli_epi16(step1a[ 0], 2); - step1a[ 1] = _mm256_slli_epi16(step1a[ 1], 2); - step1a[ 2] = _mm256_slli_epi16(step1a[ 2], 2); - step1a[ 3] = _mm256_slli_epi16(step1a[ 3], 2); - step1b[-3] = _mm256_slli_epi16(step1b[-3], 2); - step1b[-2] = _mm256_slli_epi16(step1b[-2], 2); - step1b[-1] = _mm256_slli_epi16(step1b[-1], 2); - step1b[-0] = _mm256_slli_epi16(step1b[-0], 2); - } - { - const int16_t *ina = in + 8 * str1; - const int16_t *inb = in + 23 * str1; - __m256i *step1a = &step1[ 8]; - __m256i *step1b = &step1[23]; - const __m256i ina0 = _mm256_loadu_si256((const __m256i *)(ina)); - const __m256i ina1 = _mm256_loadu_si256((const __m256i *)(ina + str1)); - const __m256i ina2 = _mm256_loadu_si256((const __m256i *)(ina + str2)); - const __m256i ina3 = _mm256_loadu_si256((const __m256i *)(ina + str3)); - const __m256i inb3 = _mm256_loadu_si256((const __m256i *)(inb - str3)); - const __m256i inb2 = _mm256_loadu_si256((const __m256i *)(inb - str2)); - const __m256i inb1 = _mm256_loadu_si256((const __m256i *)(inb - str1)); - const __m256i inb0 = _mm256_loadu_si256((const __m256i *)(inb)); - step1a[ 0] = _mm256_add_epi16(ina0, inb0); - step1a[ 1] = _mm256_add_epi16(ina1, inb1); - step1a[ 2] = _mm256_add_epi16(ina2, inb2); - step1a[ 3] = _mm256_add_epi16(ina3, inb3); - step1b[-3] = _mm256_sub_epi16(ina3, inb3); - step1b[-2] = _mm256_sub_epi16(ina2, inb2); - step1b[-1] = _mm256_sub_epi16(ina1, inb1); - step1b[-0] = _mm256_sub_epi16(ina0, inb0); - step1a[ 0] = _mm256_slli_epi16(step1a[ 0], 2); - step1a[ 1] = _mm256_slli_epi16(step1a[ 1], 2); - step1a[ 2] = _mm256_slli_epi16(step1a[ 2], 2); - step1a[ 3] = _mm256_slli_epi16(step1a[ 3], 2); - step1b[-3] = _mm256_slli_epi16(step1b[-3], 2); - step1b[-2] = _mm256_slli_epi16(step1b[-2], 2); - step1b[-1] = _mm256_slli_epi16(step1b[-1], 2); - step1b[-0] = _mm256_slli_epi16(step1b[-0], 2); - } - { - const int16_t *ina = in + 12 * str1; - const int16_t *inb = in + 19 * str1; - __m256i *step1a = &step1[12]; - __m256i *step1b = &step1[19]; - const __m256i ina0 = _mm256_loadu_si256((const __m256i *)(ina)); - const __m256i ina1 = _mm256_loadu_si256((const __m256i *)(ina + str1)); - const __m256i ina2 = _mm256_loadu_si256((const __m256i *)(ina + str2)); - const __m256i ina3 = _mm256_loadu_si256((const __m256i *)(ina + str3)); - const __m256i inb3 = _mm256_loadu_si256((const __m256i *)(inb - str3)); - const __m256i inb2 = _mm256_loadu_si256((const __m256i *)(inb - str2)); - const __m256i inb1 = _mm256_loadu_si256((const __m256i *)(inb - str1)); - const __m256i inb0 = _mm256_loadu_si256((const __m256i *)(inb)); - step1a[ 0] = _mm256_add_epi16(ina0, inb0); - step1a[ 1] = _mm256_add_epi16(ina1, inb1); - step1a[ 2] = _mm256_add_epi16(ina2, inb2); - step1a[ 3] = _mm256_add_epi16(ina3, inb3); - step1b[-3] = _mm256_sub_epi16(ina3, inb3); - step1b[-2] = _mm256_sub_epi16(ina2, inb2); - step1b[-1] = _mm256_sub_epi16(ina1, inb1); - step1b[-0] = _mm256_sub_epi16(ina0, inb0); - step1a[ 0] = _mm256_slli_epi16(step1a[ 0], 2); - step1a[ 1] = _mm256_slli_epi16(step1a[ 1], 2); - step1a[ 2] = _mm256_slli_epi16(step1a[ 2], 2); - step1a[ 3] = _mm256_slli_epi16(step1a[ 3], 2); - step1b[-3] = _mm256_slli_epi16(step1b[-3], 2); - step1b[-2] = _mm256_slli_epi16(step1b[-2], 2); - step1b[-1] = _mm256_slli_epi16(step1b[-1], 2); - step1b[-0] = _mm256_slli_epi16(step1b[-0], 2); - } - } else { - int16_t *in = &intermediate[column_start]; - // step1[i] = in[ 0 * 32] + in[(32 - 1) * 32]; - // Note: using the same approach as above to have common offset is - // counter-productive as all offsets can be calculated at compile - // time. - // Note: the next four blocks could be in a loop. That would help the - // instruction cache but is actually slower. - { - __m256i in00 = _mm256_loadu_si256((const __m256i *)(in + 0 * 32)); - __m256i in01 = _mm256_loadu_si256((const __m256i *)(in + 1 * 32)); - __m256i in02 = _mm256_loadu_si256((const __m256i *)(in + 2 * 32)); - __m256i in03 = _mm256_loadu_si256((const __m256i *)(in + 3 * 32)); - __m256i in28 = _mm256_loadu_si256((const __m256i *)(in + 28 * 32)); - __m256i in29 = _mm256_loadu_si256((const __m256i *)(in + 29 * 32)); - __m256i in30 = _mm256_loadu_si256((const __m256i *)(in + 30 * 32)); - __m256i in31 = _mm256_loadu_si256((const __m256i *)(in + 31 * 32)); - step1[ 0] = _mm256_add_epi16(in00, in31); - step1[ 1] = _mm256_add_epi16(in01, in30); - step1[ 2] = _mm256_add_epi16(in02, in29); - step1[ 3] = _mm256_add_epi16(in03, in28); - step1[28] = _mm256_sub_epi16(in03, in28); - step1[29] = _mm256_sub_epi16(in02, in29); - step1[30] = _mm256_sub_epi16(in01, in30); - step1[31] = _mm256_sub_epi16(in00, in31); - } - { - __m256i in04 = _mm256_loadu_si256((const __m256i *)(in + 4 * 32)); - __m256i in05 = _mm256_loadu_si256((const __m256i *)(in + 5 * 32)); - __m256i in06 = _mm256_loadu_si256((const __m256i *)(in + 6 * 32)); - __m256i in07 = _mm256_loadu_si256((const __m256i *)(in + 7 * 32)); - __m256i in24 = _mm256_loadu_si256((const __m256i *)(in + 24 * 32)); - __m256i in25 = _mm256_loadu_si256((const __m256i *)(in + 25 * 32)); - __m256i in26 = _mm256_loadu_si256((const __m256i *)(in + 26 * 32)); - __m256i in27 = _mm256_loadu_si256((const __m256i *)(in + 27 * 32)); - step1[ 4] = _mm256_add_epi16(in04, in27); - step1[ 5] = _mm256_add_epi16(in05, in26); - step1[ 6] = _mm256_add_epi16(in06, in25); - step1[ 7] = _mm256_add_epi16(in07, in24); - step1[24] = _mm256_sub_epi16(in07, in24); - step1[25] = _mm256_sub_epi16(in06, in25); - step1[26] = _mm256_sub_epi16(in05, in26); - step1[27] = _mm256_sub_epi16(in04, in27); - } - { - __m256i in08 = _mm256_loadu_si256((const __m256i *)(in + 8 * 32)); - __m256i in09 = _mm256_loadu_si256((const __m256i *)(in + 9 * 32)); - __m256i in10 = _mm256_loadu_si256((const __m256i *)(in + 10 * 32)); - __m256i in11 = _mm256_loadu_si256((const __m256i *)(in + 11 * 32)); - __m256i in20 = _mm256_loadu_si256((const __m256i *)(in + 20 * 32)); - __m256i in21 = _mm256_loadu_si256((const __m256i *)(in + 21 * 32)); - __m256i in22 = _mm256_loadu_si256((const __m256i *)(in + 22 * 32)); - __m256i in23 = _mm256_loadu_si256((const __m256i *)(in + 23 * 32)); - step1[ 8] = _mm256_add_epi16(in08, in23); - step1[ 9] = _mm256_add_epi16(in09, in22); - step1[10] = _mm256_add_epi16(in10, in21); - step1[11] = _mm256_add_epi16(in11, in20); - step1[20] = _mm256_sub_epi16(in11, in20); - step1[21] = _mm256_sub_epi16(in10, in21); - step1[22] = _mm256_sub_epi16(in09, in22); - step1[23] = _mm256_sub_epi16(in08, in23); - } - { - __m256i in12 = _mm256_loadu_si256((const __m256i *)(in + 12 * 32)); - __m256i in13 = _mm256_loadu_si256((const __m256i *)(in + 13 * 32)); - __m256i in14 = _mm256_loadu_si256((const __m256i *)(in + 14 * 32)); - __m256i in15 = _mm256_loadu_si256((const __m256i *)(in + 15 * 32)); - __m256i in16 = _mm256_loadu_si256((const __m256i *)(in + 16 * 32)); - __m256i in17 = _mm256_loadu_si256((const __m256i *)(in + 17 * 32)); - __m256i in18 = _mm256_loadu_si256((const __m256i *)(in + 18 * 32)); - __m256i in19 = _mm256_loadu_si256((const __m256i *)(in + 19 * 32)); - step1[12] = _mm256_add_epi16(in12, in19); - step1[13] = _mm256_add_epi16(in13, in18); - step1[14] = _mm256_add_epi16(in14, in17); - step1[15] = _mm256_add_epi16(in15, in16); - step1[16] = _mm256_sub_epi16(in15, in16); - step1[17] = _mm256_sub_epi16(in14, in17); - step1[18] = _mm256_sub_epi16(in13, in18); - step1[19] = _mm256_sub_epi16(in12, in19); - } - } - // Stage 2 - { - step2[ 0] = _mm256_add_epi16(step1[0], step1[15]); - step2[ 1] = _mm256_add_epi16(step1[1], step1[14]); - step2[ 2] = _mm256_add_epi16(step1[2], step1[13]); - step2[ 3] = _mm256_add_epi16(step1[3], step1[12]); - step2[ 4] = _mm256_add_epi16(step1[4], step1[11]); - step2[ 5] = _mm256_add_epi16(step1[5], step1[10]); - step2[ 6] = _mm256_add_epi16(step1[6], step1[ 9]); - step2[ 7] = _mm256_add_epi16(step1[7], step1[ 8]); - step2[ 8] = _mm256_sub_epi16(step1[7], step1[ 8]); - step2[ 9] = _mm256_sub_epi16(step1[6], step1[ 9]); - step2[10] = _mm256_sub_epi16(step1[5], step1[10]); - step2[11] = _mm256_sub_epi16(step1[4], step1[11]); - step2[12] = _mm256_sub_epi16(step1[3], step1[12]); - step2[13] = _mm256_sub_epi16(step1[2], step1[13]); - step2[14] = _mm256_sub_epi16(step1[1], step1[14]); - step2[15] = _mm256_sub_epi16(step1[0], step1[15]); - } - { - const __m256i s2_20_0 = _mm256_unpacklo_epi16(step1[27], step1[20]); - const __m256i s2_20_1 = _mm256_unpackhi_epi16(step1[27], step1[20]); - const __m256i s2_21_0 = _mm256_unpacklo_epi16(step1[26], step1[21]); - const __m256i s2_21_1 = _mm256_unpackhi_epi16(step1[26], step1[21]); - const __m256i s2_22_0 = _mm256_unpacklo_epi16(step1[25], step1[22]); - const __m256i s2_22_1 = _mm256_unpackhi_epi16(step1[25], step1[22]); - const __m256i s2_23_0 = _mm256_unpacklo_epi16(step1[24], step1[23]); - const __m256i s2_23_1 = _mm256_unpackhi_epi16(step1[24], step1[23]); - const __m256i s2_20_2 = _mm256_madd_epi16(s2_20_0, k__cospi_p16_m16); - const __m256i s2_20_3 = _mm256_madd_epi16(s2_20_1, k__cospi_p16_m16); - const __m256i s2_21_2 = _mm256_madd_epi16(s2_21_0, k__cospi_p16_m16); - const __m256i s2_21_3 = _mm256_madd_epi16(s2_21_1, k__cospi_p16_m16); - const __m256i s2_22_2 = _mm256_madd_epi16(s2_22_0, k__cospi_p16_m16); - const __m256i s2_22_3 = _mm256_madd_epi16(s2_22_1, k__cospi_p16_m16); - const __m256i s2_23_2 = _mm256_madd_epi16(s2_23_0, k__cospi_p16_m16); - const __m256i s2_23_3 = _mm256_madd_epi16(s2_23_1, k__cospi_p16_m16); - const __m256i s2_24_2 = _mm256_madd_epi16(s2_23_0, k__cospi_p16_p16); - const __m256i s2_24_3 = _mm256_madd_epi16(s2_23_1, k__cospi_p16_p16); - const __m256i s2_25_2 = _mm256_madd_epi16(s2_22_0, k__cospi_p16_p16); - const __m256i s2_25_3 = _mm256_madd_epi16(s2_22_1, k__cospi_p16_p16); - const __m256i s2_26_2 = _mm256_madd_epi16(s2_21_0, k__cospi_p16_p16); - const __m256i s2_26_3 = _mm256_madd_epi16(s2_21_1, k__cospi_p16_p16); - const __m256i s2_27_2 = _mm256_madd_epi16(s2_20_0, k__cospi_p16_p16); - const __m256i s2_27_3 = _mm256_madd_epi16(s2_20_1, k__cospi_p16_p16); - // dct_const_round_shift - const __m256i s2_20_4 = _mm256_add_epi32(s2_20_2, k__DCT_CONST_ROUNDING); - const __m256i s2_20_5 = _mm256_add_epi32(s2_20_3, k__DCT_CONST_ROUNDING); - const __m256i s2_21_4 = _mm256_add_epi32(s2_21_2, k__DCT_CONST_ROUNDING); - const __m256i s2_21_5 = _mm256_add_epi32(s2_21_3, k__DCT_CONST_ROUNDING); - const __m256i s2_22_4 = _mm256_add_epi32(s2_22_2, k__DCT_CONST_ROUNDING); - const __m256i s2_22_5 = _mm256_add_epi32(s2_22_3, k__DCT_CONST_ROUNDING); - const __m256i s2_23_4 = _mm256_add_epi32(s2_23_2, k__DCT_CONST_ROUNDING); - const __m256i s2_23_5 = _mm256_add_epi32(s2_23_3, k__DCT_CONST_ROUNDING); - const __m256i s2_24_4 = _mm256_add_epi32(s2_24_2, k__DCT_CONST_ROUNDING); - const __m256i s2_24_5 = _mm256_add_epi32(s2_24_3, k__DCT_CONST_ROUNDING); - const __m256i s2_25_4 = _mm256_add_epi32(s2_25_2, k__DCT_CONST_ROUNDING); - const __m256i s2_25_5 = _mm256_add_epi32(s2_25_3, k__DCT_CONST_ROUNDING); - const __m256i s2_26_4 = _mm256_add_epi32(s2_26_2, k__DCT_CONST_ROUNDING); - const __m256i s2_26_5 = _mm256_add_epi32(s2_26_3, k__DCT_CONST_ROUNDING); - const __m256i s2_27_4 = _mm256_add_epi32(s2_27_2, k__DCT_CONST_ROUNDING); - const __m256i s2_27_5 = _mm256_add_epi32(s2_27_3, k__DCT_CONST_ROUNDING); - const __m256i s2_20_6 = _mm256_srai_epi32(s2_20_4, DCT_CONST_BITS); - const __m256i s2_20_7 = _mm256_srai_epi32(s2_20_5, DCT_CONST_BITS); - const __m256i s2_21_6 = _mm256_srai_epi32(s2_21_4, DCT_CONST_BITS); - const __m256i s2_21_7 = _mm256_srai_epi32(s2_21_5, DCT_CONST_BITS); - const __m256i s2_22_6 = _mm256_srai_epi32(s2_22_4, DCT_CONST_BITS); - const __m256i s2_22_7 = _mm256_srai_epi32(s2_22_5, DCT_CONST_BITS); - const __m256i s2_23_6 = _mm256_srai_epi32(s2_23_4, DCT_CONST_BITS); - const __m256i s2_23_7 = _mm256_srai_epi32(s2_23_5, DCT_CONST_BITS); - const __m256i s2_24_6 = _mm256_srai_epi32(s2_24_4, DCT_CONST_BITS); - const __m256i s2_24_7 = _mm256_srai_epi32(s2_24_5, DCT_CONST_BITS); - const __m256i s2_25_6 = _mm256_srai_epi32(s2_25_4, DCT_CONST_BITS); - const __m256i s2_25_7 = _mm256_srai_epi32(s2_25_5, DCT_CONST_BITS); - const __m256i s2_26_6 = _mm256_srai_epi32(s2_26_4, DCT_CONST_BITS); - const __m256i s2_26_7 = _mm256_srai_epi32(s2_26_5, DCT_CONST_BITS); - const __m256i s2_27_6 = _mm256_srai_epi32(s2_27_4, DCT_CONST_BITS); - const __m256i s2_27_7 = _mm256_srai_epi32(s2_27_5, DCT_CONST_BITS); - // Combine - step2[20] = _mm256_packs_epi32(s2_20_6, s2_20_7); - step2[21] = _mm256_packs_epi32(s2_21_6, s2_21_7); - step2[22] = _mm256_packs_epi32(s2_22_6, s2_22_7); - step2[23] = _mm256_packs_epi32(s2_23_6, s2_23_7); - step2[24] = _mm256_packs_epi32(s2_24_6, s2_24_7); - step2[25] = _mm256_packs_epi32(s2_25_6, s2_25_7); - step2[26] = _mm256_packs_epi32(s2_26_6, s2_26_7); - step2[27] = _mm256_packs_epi32(s2_27_6, s2_27_7); - } - -#if !FDCT32x32_HIGH_PRECISION - // dump the magnitude by half, hence the intermediate values are within - // the range of 16 bits. - if (1 == pass) { - __m256i s3_00_0 = _mm256_cmpgt_epi16(kZero,step2[ 0]); - __m256i s3_01_0 = _mm256_cmpgt_epi16(kZero,step2[ 1]); - __m256i s3_02_0 = _mm256_cmpgt_epi16(kZero,step2[ 2]); - __m256i s3_03_0 = _mm256_cmpgt_epi16(kZero,step2[ 3]); - __m256i s3_04_0 = _mm256_cmpgt_epi16(kZero,step2[ 4]); - __m256i s3_05_0 = _mm256_cmpgt_epi16(kZero,step2[ 5]); - __m256i s3_06_0 = _mm256_cmpgt_epi16(kZero,step2[ 6]); - __m256i s3_07_0 = _mm256_cmpgt_epi16(kZero,step2[ 7]); - __m256i s2_08_0 = _mm256_cmpgt_epi16(kZero,step2[ 8]); - __m256i s2_09_0 = _mm256_cmpgt_epi16(kZero,step2[ 9]); - __m256i s3_10_0 = _mm256_cmpgt_epi16(kZero,step2[10]); - __m256i s3_11_0 = _mm256_cmpgt_epi16(kZero,step2[11]); - __m256i s3_12_0 = _mm256_cmpgt_epi16(kZero,step2[12]); - __m256i s3_13_0 = _mm256_cmpgt_epi16(kZero,step2[13]); - __m256i s2_14_0 = _mm256_cmpgt_epi16(kZero,step2[14]); - __m256i s2_15_0 = _mm256_cmpgt_epi16(kZero,step2[15]); - __m256i s3_16_0 = _mm256_cmpgt_epi16(kZero,step1[16]); - __m256i s3_17_0 = _mm256_cmpgt_epi16(kZero,step1[17]); - __m256i s3_18_0 = _mm256_cmpgt_epi16(kZero,step1[18]); - __m256i s3_19_0 = _mm256_cmpgt_epi16(kZero,step1[19]); - __m256i s3_20_0 = _mm256_cmpgt_epi16(kZero,step2[20]); - __m256i s3_21_0 = _mm256_cmpgt_epi16(kZero,step2[21]); - __m256i s3_22_0 = _mm256_cmpgt_epi16(kZero,step2[22]); - __m256i s3_23_0 = _mm256_cmpgt_epi16(kZero,step2[23]); - __m256i s3_24_0 = _mm256_cmpgt_epi16(kZero,step2[24]); - __m256i s3_25_0 = _mm256_cmpgt_epi16(kZero,step2[25]); - __m256i s3_26_0 = _mm256_cmpgt_epi16(kZero,step2[26]); - __m256i s3_27_0 = _mm256_cmpgt_epi16(kZero,step2[27]); - __m256i s3_28_0 = _mm256_cmpgt_epi16(kZero,step1[28]); - __m256i s3_29_0 = _mm256_cmpgt_epi16(kZero,step1[29]); - __m256i s3_30_0 = _mm256_cmpgt_epi16(kZero,step1[30]); - __m256i s3_31_0 = _mm256_cmpgt_epi16(kZero,step1[31]); - - step2[ 0] = _mm256_sub_epi16(step2[ 0], s3_00_0); - step2[ 1] = _mm256_sub_epi16(step2[ 1], s3_01_0); - step2[ 2] = _mm256_sub_epi16(step2[ 2], s3_02_0); - step2[ 3] = _mm256_sub_epi16(step2[ 3], s3_03_0); - step2[ 4] = _mm256_sub_epi16(step2[ 4], s3_04_0); - step2[ 5] = _mm256_sub_epi16(step2[ 5], s3_05_0); - step2[ 6] = _mm256_sub_epi16(step2[ 6], s3_06_0); - step2[ 7] = _mm256_sub_epi16(step2[ 7], s3_07_0); - step2[ 8] = _mm256_sub_epi16(step2[ 8], s2_08_0); - step2[ 9] = _mm256_sub_epi16(step2[ 9], s2_09_0); - step2[10] = _mm256_sub_epi16(step2[10], s3_10_0); - step2[11] = _mm256_sub_epi16(step2[11], s3_11_0); - step2[12] = _mm256_sub_epi16(step2[12], s3_12_0); - step2[13] = _mm256_sub_epi16(step2[13], s3_13_0); - step2[14] = _mm256_sub_epi16(step2[14], s2_14_0); - step2[15] = _mm256_sub_epi16(step2[15], s2_15_0); - step1[16] = _mm256_sub_epi16(step1[16], s3_16_0); - step1[17] = _mm256_sub_epi16(step1[17], s3_17_0); - step1[18] = _mm256_sub_epi16(step1[18], s3_18_0); - step1[19] = _mm256_sub_epi16(step1[19], s3_19_0); - step2[20] = _mm256_sub_epi16(step2[20], s3_20_0); - step2[21] = _mm256_sub_epi16(step2[21], s3_21_0); - step2[22] = _mm256_sub_epi16(step2[22], s3_22_0); - step2[23] = _mm256_sub_epi16(step2[23], s3_23_0); - step2[24] = _mm256_sub_epi16(step2[24], s3_24_0); - step2[25] = _mm256_sub_epi16(step2[25], s3_25_0); - step2[26] = _mm256_sub_epi16(step2[26], s3_26_0); - step2[27] = _mm256_sub_epi16(step2[27], s3_27_0); - step1[28] = _mm256_sub_epi16(step1[28], s3_28_0); - step1[29] = _mm256_sub_epi16(step1[29], s3_29_0); - step1[30] = _mm256_sub_epi16(step1[30], s3_30_0); - step1[31] = _mm256_sub_epi16(step1[31], s3_31_0); - - step2[ 0] = _mm256_add_epi16(step2[ 0], kOne); - step2[ 1] = _mm256_add_epi16(step2[ 1], kOne); - step2[ 2] = _mm256_add_epi16(step2[ 2], kOne); - step2[ 3] = _mm256_add_epi16(step2[ 3], kOne); - step2[ 4] = _mm256_add_epi16(step2[ 4], kOne); - step2[ 5] = _mm256_add_epi16(step2[ 5], kOne); - step2[ 6] = _mm256_add_epi16(step2[ 6], kOne); - step2[ 7] = _mm256_add_epi16(step2[ 7], kOne); - step2[ 8] = _mm256_add_epi16(step2[ 8], kOne); - step2[ 9] = _mm256_add_epi16(step2[ 9], kOne); - step2[10] = _mm256_add_epi16(step2[10], kOne); - step2[11] = _mm256_add_epi16(step2[11], kOne); - step2[12] = _mm256_add_epi16(step2[12], kOne); - step2[13] = _mm256_add_epi16(step2[13], kOne); - step2[14] = _mm256_add_epi16(step2[14], kOne); - step2[15] = _mm256_add_epi16(step2[15], kOne); - step1[16] = _mm256_add_epi16(step1[16], kOne); - step1[17] = _mm256_add_epi16(step1[17], kOne); - step1[18] = _mm256_add_epi16(step1[18], kOne); - step1[19] = _mm256_add_epi16(step1[19], kOne); - step2[20] = _mm256_add_epi16(step2[20], kOne); - step2[21] = _mm256_add_epi16(step2[21], kOne); - step2[22] = _mm256_add_epi16(step2[22], kOne); - step2[23] = _mm256_add_epi16(step2[23], kOne); - step2[24] = _mm256_add_epi16(step2[24], kOne); - step2[25] = _mm256_add_epi16(step2[25], kOne); - step2[26] = _mm256_add_epi16(step2[26], kOne); - step2[27] = _mm256_add_epi16(step2[27], kOne); - step1[28] = _mm256_add_epi16(step1[28], kOne); - step1[29] = _mm256_add_epi16(step1[29], kOne); - step1[30] = _mm256_add_epi16(step1[30], kOne); - step1[31] = _mm256_add_epi16(step1[31], kOne); - - step2[ 0] = _mm256_srai_epi16(step2[ 0], 2); - step2[ 1] = _mm256_srai_epi16(step2[ 1], 2); - step2[ 2] = _mm256_srai_epi16(step2[ 2], 2); - step2[ 3] = _mm256_srai_epi16(step2[ 3], 2); - step2[ 4] = _mm256_srai_epi16(step2[ 4], 2); - step2[ 5] = _mm256_srai_epi16(step2[ 5], 2); - step2[ 6] = _mm256_srai_epi16(step2[ 6], 2); - step2[ 7] = _mm256_srai_epi16(step2[ 7], 2); - step2[ 8] = _mm256_srai_epi16(step2[ 8], 2); - step2[ 9] = _mm256_srai_epi16(step2[ 9], 2); - step2[10] = _mm256_srai_epi16(step2[10], 2); - step2[11] = _mm256_srai_epi16(step2[11], 2); - step2[12] = _mm256_srai_epi16(step2[12], 2); - step2[13] = _mm256_srai_epi16(step2[13], 2); - step2[14] = _mm256_srai_epi16(step2[14], 2); - step2[15] = _mm256_srai_epi16(step2[15], 2); - step1[16] = _mm256_srai_epi16(step1[16], 2); - step1[17] = _mm256_srai_epi16(step1[17], 2); - step1[18] = _mm256_srai_epi16(step1[18], 2); - step1[19] = _mm256_srai_epi16(step1[19], 2); - step2[20] = _mm256_srai_epi16(step2[20], 2); - step2[21] = _mm256_srai_epi16(step2[21], 2); - step2[22] = _mm256_srai_epi16(step2[22], 2); - step2[23] = _mm256_srai_epi16(step2[23], 2); - step2[24] = _mm256_srai_epi16(step2[24], 2); - step2[25] = _mm256_srai_epi16(step2[25], 2); - step2[26] = _mm256_srai_epi16(step2[26], 2); - step2[27] = _mm256_srai_epi16(step2[27], 2); - step1[28] = _mm256_srai_epi16(step1[28], 2); - step1[29] = _mm256_srai_epi16(step1[29], 2); - step1[30] = _mm256_srai_epi16(step1[30], 2); - step1[31] = _mm256_srai_epi16(step1[31], 2); - } -#endif - -#if FDCT32x32_HIGH_PRECISION - if (pass == 0) { -#endif - // Stage 3 - { - step3[0] = _mm256_add_epi16(step2[(8 - 1)], step2[0]); - step3[1] = _mm256_add_epi16(step2[(8 - 2)], step2[1]); - step3[2] = _mm256_add_epi16(step2[(8 - 3)], step2[2]); - step3[3] = _mm256_add_epi16(step2[(8 - 4)], step2[3]); - step3[4] = _mm256_sub_epi16(step2[(8 - 5)], step2[4]); - step3[5] = _mm256_sub_epi16(step2[(8 - 6)], step2[5]); - step3[6] = _mm256_sub_epi16(step2[(8 - 7)], step2[6]); - step3[7] = _mm256_sub_epi16(step2[(8 - 8)], step2[7]); - } - { - const __m256i s3_10_0 = _mm256_unpacklo_epi16(step2[13], step2[10]); - const __m256i s3_10_1 = _mm256_unpackhi_epi16(step2[13], step2[10]); - const __m256i s3_11_0 = _mm256_unpacklo_epi16(step2[12], step2[11]); - const __m256i s3_11_1 = _mm256_unpackhi_epi16(step2[12], step2[11]); - const __m256i s3_10_2 = _mm256_madd_epi16(s3_10_0, k__cospi_p16_m16); - const __m256i s3_10_3 = _mm256_madd_epi16(s3_10_1, k__cospi_p16_m16); - const __m256i s3_11_2 = _mm256_madd_epi16(s3_11_0, k__cospi_p16_m16); - const __m256i s3_11_3 = _mm256_madd_epi16(s3_11_1, k__cospi_p16_m16); - const __m256i s3_12_2 = _mm256_madd_epi16(s3_11_0, k__cospi_p16_p16); - const __m256i s3_12_3 = _mm256_madd_epi16(s3_11_1, k__cospi_p16_p16); - const __m256i s3_13_2 = _mm256_madd_epi16(s3_10_0, k__cospi_p16_p16); - const __m256i s3_13_3 = _mm256_madd_epi16(s3_10_1, k__cospi_p16_p16); - // dct_const_round_shift - const __m256i s3_10_4 = _mm256_add_epi32(s3_10_2, k__DCT_CONST_ROUNDING); - const __m256i s3_10_5 = _mm256_add_epi32(s3_10_3, k__DCT_CONST_ROUNDING); - const __m256i s3_11_4 = _mm256_add_epi32(s3_11_2, k__DCT_CONST_ROUNDING); - const __m256i s3_11_5 = _mm256_add_epi32(s3_11_3, k__DCT_CONST_ROUNDING); - const __m256i s3_12_4 = _mm256_add_epi32(s3_12_2, k__DCT_CONST_ROUNDING); - const __m256i s3_12_5 = _mm256_add_epi32(s3_12_3, k__DCT_CONST_ROUNDING); - const __m256i s3_13_4 = _mm256_add_epi32(s3_13_2, k__DCT_CONST_ROUNDING); - const __m256i s3_13_5 = _mm256_add_epi32(s3_13_3, k__DCT_CONST_ROUNDING); - const __m256i s3_10_6 = _mm256_srai_epi32(s3_10_4, DCT_CONST_BITS); - const __m256i s3_10_7 = _mm256_srai_epi32(s3_10_5, DCT_CONST_BITS); - const __m256i s3_11_6 = _mm256_srai_epi32(s3_11_4, DCT_CONST_BITS); - const __m256i s3_11_7 = _mm256_srai_epi32(s3_11_5, DCT_CONST_BITS); - const __m256i s3_12_6 = _mm256_srai_epi32(s3_12_4, DCT_CONST_BITS); - const __m256i s3_12_7 = _mm256_srai_epi32(s3_12_5, DCT_CONST_BITS); - const __m256i s3_13_6 = _mm256_srai_epi32(s3_13_4, DCT_CONST_BITS); - const __m256i s3_13_7 = _mm256_srai_epi32(s3_13_5, DCT_CONST_BITS); - // Combine - step3[10] = _mm256_packs_epi32(s3_10_6, s3_10_7); - step3[11] = _mm256_packs_epi32(s3_11_6, s3_11_7); - step3[12] = _mm256_packs_epi32(s3_12_6, s3_12_7); - step3[13] = _mm256_packs_epi32(s3_13_6, s3_13_7); - } - { - step3[16] = _mm256_add_epi16(step2[23], step1[16]); - step3[17] = _mm256_add_epi16(step2[22], step1[17]); - step3[18] = _mm256_add_epi16(step2[21], step1[18]); - step3[19] = _mm256_add_epi16(step2[20], step1[19]); - step3[20] = _mm256_sub_epi16(step1[19], step2[20]); - step3[21] = _mm256_sub_epi16(step1[18], step2[21]); - step3[22] = _mm256_sub_epi16(step1[17], step2[22]); - step3[23] = _mm256_sub_epi16(step1[16], step2[23]); - step3[24] = _mm256_sub_epi16(step1[31], step2[24]); - step3[25] = _mm256_sub_epi16(step1[30], step2[25]); - step3[26] = _mm256_sub_epi16(step1[29], step2[26]); - step3[27] = _mm256_sub_epi16(step1[28], step2[27]); - step3[28] = _mm256_add_epi16(step2[27], step1[28]); - step3[29] = _mm256_add_epi16(step2[26], step1[29]); - step3[30] = _mm256_add_epi16(step2[25], step1[30]); - step3[31] = _mm256_add_epi16(step2[24], step1[31]); - } - - // Stage 4 - { - step1[ 0] = _mm256_add_epi16(step3[ 3], step3[ 0]); - step1[ 1] = _mm256_add_epi16(step3[ 2], step3[ 1]); - step1[ 2] = _mm256_sub_epi16(step3[ 1], step3[ 2]); - step1[ 3] = _mm256_sub_epi16(step3[ 0], step3[ 3]); - step1[ 8] = _mm256_add_epi16(step3[11], step2[ 8]); - step1[ 9] = _mm256_add_epi16(step3[10], step2[ 9]); - step1[10] = _mm256_sub_epi16(step2[ 9], step3[10]); - step1[11] = _mm256_sub_epi16(step2[ 8], step3[11]); - step1[12] = _mm256_sub_epi16(step2[15], step3[12]); - step1[13] = _mm256_sub_epi16(step2[14], step3[13]); - step1[14] = _mm256_add_epi16(step3[13], step2[14]); - step1[15] = _mm256_add_epi16(step3[12], step2[15]); - } - { - const __m256i s1_05_0 = _mm256_unpacklo_epi16(step3[6], step3[5]); - const __m256i s1_05_1 = _mm256_unpackhi_epi16(step3[6], step3[5]); - const __m256i s1_05_2 = _mm256_madd_epi16(s1_05_0, k__cospi_p16_m16); - const __m256i s1_05_3 = _mm256_madd_epi16(s1_05_1, k__cospi_p16_m16); - const __m256i s1_06_2 = _mm256_madd_epi16(s1_05_0, k__cospi_p16_p16); - const __m256i s1_06_3 = _mm256_madd_epi16(s1_05_1, k__cospi_p16_p16); - // dct_const_round_shift - const __m256i s1_05_4 = _mm256_add_epi32(s1_05_2, k__DCT_CONST_ROUNDING); - const __m256i s1_05_5 = _mm256_add_epi32(s1_05_3, k__DCT_CONST_ROUNDING); - const __m256i s1_06_4 = _mm256_add_epi32(s1_06_2, k__DCT_CONST_ROUNDING); - const __m256i s1_06_5 = _mm256_add_epi32(s1_06_3, k__DCT_CONST_ROUNDING); - const __m256i s1_05_6 = _mm256_srai_epi32(s1_05_4, DCT_CONST_BITS); - const __m256i s1_05_7 = _mm256_srai_epi32(s1_05_5, DCT_CONST_BITS); - const __m256i s1_06_6 = _mm256_srai_epi32(s1_06_4, DCT_CONST_BITS); - const __m256i s1_06_7 = _mm256_srai_epi32(s1_06_5, DCT_CONST_BITS); - // Combine - step1[5] = _mm256_packs_epi32(s1_05_6, s1_05_7); - step1[6] = _mm256_packs_epi32(s1_06_6, s1_06_7); - } - { - const __m256i s1_18_0 = _mm256_unpacklo_epi16(step3[18], step3[29]); - const __m256i s1_18_1 = _mm256_unpackhi_epi16(step3[18], step3[29]); - const __m256i s1_19_0 = _mm256_unpacklo_epi16(step3[19], step3[28]); - const __m256i s1_19_1 = _mm256_unpackhi_epi16(step3[19], step3[28]); - const __m256i s1_20_0 = _mm256_unpacklo_epi16(step3[20], step3[27]); - const __m256i s1_20_1 = _mm256_unpackhi_epi16(step3[20], step3[27]); - const __m256i s1_21_0 = _mm256_unpacklo_epi16(step3[21], step3[26]); - const __m256i s1_21_1 = _mm256_unpackhi_epi16(step3[21], step3[26]); - const __m256i s1_18_2 = _mm256_madd_epi16(s1_18_0, k__cospi_m08_p24); - const __m256i s1_18_3 = _mm256_madd_epi16(s1_18_1, k__cospi_m08_p24); - const __m256i s1_19_2 = _mm256_madd_epi16(s1_19_0, k__cospi_m08_p24); - const __m256i s1_19_3 = _mm256_madd_epi16(s1_19_1, k__cospi_m08_p24); - const __m256i s1_20_2 = _mm256_madd_epi16(s1_20_0, k__cospi_m24_m08); - const __m256i s1_20_3 = _mm256_madd_epi16(s1_20_1, k__cospi_m24_m08); - const __m256i s1_21_2 = _mm256_madd_epi16(s1_21_0, k__cospi_m24_m08); - const __m256i s1_21_3 = _mm256_madd_epi16(s1_21_1, k__cospi_m24_m08); - const __m256i s1_26_2 = _mm256_madd_epi16(s1_21_0, k__cospi_m08_p24); - const __m256i s1_26_3 = _mm256_madd_epi16(s1_21_1, k__cospi_m08_p24); - const __m256i s1_27_2 = _mm256_madd_epi16(s1_20_0, k__cospi_m08_p24); - const __m256i s1_27_3 = _mm256_madd_epi16(s1_20_1, k__cospi_m08_p24); - const __m256i s1_28_2 = _mm256_madd_epi16(s1_19_0, k__cospi_p24_p08); - const __m256i s1_28_3 = _mm256_madd_epi16(s1_19_1, k__cospi_p24_p08); - const __m256i s1_29_2 = _mm256_madd_epi16(s1_18_0, k__cospi_p24_p08); - const __m256i s1_29_3 = _mm256_madd_epi16(s1_18_1, k__cospi_p24_p08); - // dct_const_round_shift - const __m256i s1_18_4 = _mm256_add_epi32(s1_18_2, k__DCT_CONST_ROUNDING); - const __m256i s1_18_5 = _mm256_add_epi32(s1_18_3, k__DCT_CONST_ROUNDING); - const __m256i s1_19_4 = _mm256_add_epi32(s1_19_2, k__DCT_CONST_ROUNDING); - const __m256i s1_19_5 = _mm256_add_epi32(s1_19_3, k__DCT_CONST_ROUNDING); - const __m256i s1_20_4 = _mm256_add_epi32(s1_20_2, k__DCT_CONST_ROUNDING); - const __m256i s1_20_5 = _mm256_add_epi32(s1_20_3, k__DCT_CONST_ROUNDING); - const __m256i s1_21_4 = _mm256_add_epi32(s1_21_2, k__DCT_CONST_ROUNDING); - const __m256i s1_21_5 = _mm256_add_epi32(s1_21_3, k__DCT_CONST_ROUNDING); - const __m256i s1_26_4 = _mm256_add_epi32(s1_26_2, k__DCT_CONST_ROUNDING); - const __m256i s1_26_5 = _mm256_add_epi32(s1_26_3, k__DCT_CONST_ROUNDING); - const __m256i s1_27_4 = _mm256_add_epi32(s1_27_2, k__DCT_CONST_ROUNDING); - const __m256i s1_27_5 = _mm256_add_epi32(s1_27_3, k__DCT_CONST_ROUNDING); - const __m256i s1_28_4 = _mm256_add_epi32(s1_28_2, k__DCT_CONST_ROUNDING); - const __m256i s1_28_5 = _mm256_add_epi32(s1_28_3, k__DCT_CONST_ROUNDING); - const __m256i s1_29_4 = _mm256_add_epi32(s1_29_2, k__DCT_CONST_ROUNDING); - const __m256i s1_29_5 = _mm256_add_epi32(s1_29_3, k__DCT_CONST_ROUNDING); - const __m256i s1_18_6 = _mm256_srai_epi32(s1_18_4, DCT_CONST_BITS); - const __m256i s1_18_7 = _mm256_srai_epi32(s1_18_5, DCT_CONST_BITS); - const __m256i s1_19_6 = _mm256_srai_epi32(s1_19_4, DCT_CONST_BITS); - const __m256i s1_19_7 = _mm256_srai_epi32(s1_19_5, DCT_CONST_BITS); - const __m256i s1_20_6 = _mm256_srai_epi32(s1_20_4, DCT_CONST_BITS); - const __m256i s1_20_7 = _mm256_srai_epi32(s1_20_5, DCT_CONST_BITS); - const __m256i s1_21_6 = _mm256_srai_epi32(s1_21_4, DCT_CONST_BITS); - const __m256i s1_21_7 = _mm256_srai_epi32(s1_21_5, DCT_CONST_BITS); - const __m256i s1_26_6 = _mm256_srai_epi32(s1_26_4, DCT_CONST_BITS); - const __m256i s1_26_7 = _mm256_srai_epi32(s1_26_5, DCT_CONST_BITS); - const __m256i s1_27_6 = _mm256_srai_epi32(s1_27_4, DCT_CONST_BITS); - const __m256i s1_27_7 = _mm256_srai_epi32(s1_27_5, DCT_CONST_BITS); - const __m256i s1_28_6 = _mm256_srai_epi32(s1_28_4, DCT_CONST_BITS); - const __m256i s1_28_7 = _mm256_srai_epi32(s1_28_5, DCT_CONST_BITS); - const __m256i s1_29_6 = _mm256_srai_epi32(s1_29_4, DCT_CONST_BITS); - const __m256i s1_29_7 = _mm256_srai_epi32(s1_29_5, DCT_CONST_BITS); - // Combine - step1[18] = _mm256_packs_epi32(s1_18_6, s1_18_7); - step1[19] = _mm256_packs_epi32(s1_19_6, s1_19_7); - step1[20] = _mm256_packs_epi32(s1_20_6, s1_20_7); - step1[21] = _mm256_packs_epi32(s1_21_6, s1_21_7); - step1[26] = _mm256_packs_epi32(s1_26_6, s1_26_7); - step1[27] = _mm256_packs_epi32(s1_27_6, s1_27_7); - step1[28] = _mm256_packs_epi32(s1_28_6, s1_28_7); - step1[29] = _mm256_packs_epi32(s1_29_6, s1_29_7); - } - // Stage 5 - { - step2[4] = _mm256_add_epi16(step1[5], step3[4]); - step2[5] = _mm256_sub_epi16(step3[4], step1[5]); - step2[6] = _mm256_sub_epi16(step3[7], step1[6]); - step2[7] = _mm256_add_epi16(step1[6], step3[7]); - } - { - const __m256i out_00_0 = _mm256_unpacklo_epi16(step1[0], step1[1]); - const __m256i out_00_1 = _mm256_unpackhi_epi16(step1[0], step1[1]); - const __m256i out_08_0 = _mm256_unpacklo_epi16(step1[2], step1[3]); - const __m256i out_08_1 = _mm256_unpackhi_epi16(step1[2], step1[3]); - const __m256i out_00_2 = _mm256_madd_epi16(out_00_0, k__cospi_p16_p16); - const __m256i out_00_3 = _mm256_madd_epi16(out_00_1, k__cospi_p16_p16); - const __m256i out_16_2 = _mm256_madd_epi16(out_00_0, k__cospi_p16_m16); - const __m256i out_16_3 = _mm256_madd_epi16(out_00_1, k__cospi_p16_m16); - const __m256i out_08_2 = _mm256_madd_epi16(out_08_0, k__cospi_p24_p08); - const __m256i out_08_3 = _mm256_madd_epi16(out_08_1, k__cospi_p24_p08); - const __m256i out_24_2 = _mm256_madd_epi16(out_08_0, k__cospi_m08_p24); - const __m256i out_24_3 = _mm256_madd_epi16(out_08_1, k__cospi_m08_p24); - // dct_const_round_shift - const __m256i out_00_4 = _mm256_add_epi32(out_00_2, k__DCT_CONST_ROUNDING); - const __m256i out_00_5 = _mm256_add_epi32(out_00_3, k__DCT_CONST_ROUNDING); - const __m256i out_16_4 = _mm256_add_epi32(out_16_2, k__DCT_CONST_ROUNDING); - const __m256i out_16_5 = _mm256_add_epi32(out_16_3, k__DCT_CONST_ROUNDING); - const __m256i out_08_4 = _mm256_add_epi32(out_08_2, k__DCT_CONST_ROUNDING); - const __m256i out_08_5 = _mm256_add_epi32(out_08_3, k__DCT_CONST_ROUNDING); - const __m256i out_24_4 = _mm256_add_epi32(out_24_2, k__DCT_CONST_ROUNDING); - const __m256i out_24_5 = _mm256_add_epi32(out_24_3, k__DCT_CONST_ROUNDING); - const __m256i out_00_6 = _mm256_srai_epi32(out_00_4, DCT_CONST_BITS); - const __m256i out_00_7 = _mm256_srai_epi32(out_00_5, DCT_CONST_BITS); - const __m256i out_16_6 = _mm256_srai_epi32(out_16_4, DCT_CONST_BITS); - const __m256i out_16_7 = _mm256_srai_epi32(out_16_5, DCT_CONST_BITS); - const __m256i out_08_6 = _mm256_srai_epi32(out_08_4, DCT_CONST_BITS); - const __m256i out_08_7 = _mm256_srai_epi32(out_08_5, DCT_CONST_BITS); - const __m256i out_24_6 = _mm256_srai_epi32(out_24_4, DCT_CONST_BITS); - const __m256i out_24_7 = _mm256_srai_epi32(out_24_5, DCT_CONST_BITS); - // Combine - out[ 0] = _mm256_packs_epi32(out_00_6, out_00_7); - out[16] = _mm256_packs_epi32(out_16_6, out_16_7); - out[ 8] = _mm256_packs_epi32(out_08_6, out_08_7); - out[24] = _mm256_packs_epi32(out_24_6, out_24_7); - } - { - const __m256i s2_09_0 = _mm256_unpacklo_epi16(step1[ 9], step1[14]); - const __m256i s2_09_1 = _mm256_unpackhi_epi16(step1[ 9], step1[14]); - const __m256i s2_10_0 = _mm256_unpacklo_epi16(step1[10], step1[13]); - const __m256i s2_10_1 = _mm256_unpackhi_epi16(step1[10], step1[13]); - const __m256i s2_09_2 = _mm256_madd_epi16(s2_09_0, k__cospi_m08_p24); - const __m256i s2_09_3 = _mm256_madd_epi16(s2_09_1, k__cospi_m08_p24); - const __m256i s2_10_2 = _mm256_madd_epi16(s2_10_0, k__cospi_m24_m08); - const __m256i s2_10_3 = _mm256_madd_epi16(s2_10_1, k__cospi_m24_m08); - const __m256i s2_13_2 = _mm256_madd_epi16(s2_10_0, k__cospi_m08_p24); - const __m256i s2_13_3 = _mm256_madd_epi16(s2_10_1, k__cospi_m08_p24); - const __m256i s2_14_2 = _mm256_madd_epi16(s2_09_0, k__cospi_p24_p08); - const __m256i s2_14_3 = _mm256_madd_epi16(s2_09_1, k__cospi_p24_p08); - // dct_const_round_shift - const __m256i s2_09_4 = _mm256_add_epi32(s2_09_2, k__DCT_CONST_ROUNDING); - const __m256i s2_09_5 = _mm256_add_epi32(s2_09_3, k__DCT_CONST_ROUNDING); - const __m256i s2_10_4 = _mm256_add_epi32(s2_10_2, k__DCT_CONST_ROUNDING); - const __m256i s2_10_5 = _mm256_add_epi32(s2_10_3, k__DCT_CONST_ROUNDING); - const __m256i s2_13_4 = _mm256_add_epi32(s2_13_2, k__DCT_CONST_ROUNDING); - const __m256i s2_13_5 = _mm256_add_epi32(s2_13_3, k__DCT_CONST_ROUNDING); - const __m256i s2_14_4 = _mm256_add_epi32(s2_14_2, k__DCT_CONST_ROUNDING); - const __m256i s2_14_5 = _mm256_add_epi32(s2_14_3, k__DCT_CONST_ROUNDING); - const __m256i s2_09_6 = _mm256_srai_epi32(s2_09_4, DCT_CONST_BITS); - const __m256i s2_09_7 = _mm256_srai_epi32(s2_09_5, DCT_CONST_BITS); - const __m256i s2_10_6 = _mm256_srai_epi32(s2_10_4, DCT_CONST_BITS); - const __m256i s2_10_7 = _mm256_srai_epi32(s2_10_5, DCT_CONST_BITS); - const __m256i s2_13_6 = _mm256_srai_epi32(s2_13_4, DCT_CONST_BITS); - const __m256i s2_13_7 = _mm256_srai_epi32(s2_13_5, DCT_CONST_BITS); - const __m256i s2_14_6 = _mm256_srai_epi32(s2_14_4, DCT_CONST_BITS); - const __m256i s2_14_7 = _mm256_srai_epi32(s2_14_5, DCT_CONST_BITS); - // Combine - step2[ 9] = _mm256_packs_epi32(s2_09_6, s2_09_7); - step2[10] = _mm256_packs_epi32(s2_10_6, s2_10_7); - step2[13] = _mm256_packs_epi32(s2_13_6, s2_13_7); - step2[14] = _mm256_packs_epi32(s2_14_6, s2_14_7); - } - { - step2[16] = _mm256_add_epi16(step1[19], step3[16]); - step2[17] = _mm256_add_epi16(step1[18], step3[17]); - step2[18] = _mm256_sub_epi16(step3[17], step1[18]); - step2[19] = _mm256_sub_epi16(step3[16], step1[19]); - step2[20] = _mm256_sub_epi16(step3[23], step1[20]); - step2[21] = _mm256_sub_epi16(step3[22], step1[21]); - step2[22] = _mm256_add_epi16(step1[21], step3[22]); - step2[23] = _mm256_add_epi16(step1[20], step3[23]); - step2[24] = _mm256_add_epi16(step1[27], step3[24]); - step2[25] = _mm256_add_epi16(step1[26], step3[25]); - step2[26] = _mm256_sub_epi16(step3[25], step1[26]); - step2[27] = _mm256_sub_epi16(step3[24], step1[27]); - step2[28] = _mm256_sub_epi16(step3[31], step1[28]); - step2[29] = _mm256_sub_epi16(step3[30], step1[29]); - step2[30] = _mm256_add_epi16(step1[29], step3[30]); - step2[31] = _mm256_add_epi16(step1[28], step3[31]); - } - // Stage 6 - { - const __m256i out_04_0 = _mm256_unpacklo_epi16(step2[4], step2[7]); - const __m256i out_04_1 = _mm256_unpackhi_epi16(step2[4], step2[7]); - const __m256i out_20_0 = _mm256_unpacklo_epi16(step2[5], step2[6]); - const __m256i out_20_1 = _mm256_unpackhi_epi16(step2[5], step2[6]); - const __m256i out_12_0 = _mm256_unpacklo_epi16(step2[5], step2[6]); - const __m256i out_12_1 = _mm256_unpackhi_epi16(step2[5], step2[6]); - const __m256i out_28_0 = _mm256_unpacklo_epi16(step2[4], step2[7]); - const __m256i out_28_1 = _mm256_unpackhi_epi16(step2[4], step2[7]); - const __m256i out_04_2 = _mm256_madd_epi16(out_04_0, k__cospi_p28_p04); - const __m256i out_04_3 = _mm256_madd_epi16(out_04_1, k__cospi_p28_p04); - const __m256i out_20_2 = _mm256_madd_epi16(out_20_0, k__cospi_p12_p20); - const __m256i out_20_3 = _mm256_madd_epi16(out_20_1, k__cospi_p12_p20); - const __m256i out_12_2 = _mm256_madd_epi16(out_12_0, k__cospi_m20_p12); - const __m256i out_12_3 = _mm256_madd_epi16(out_12_1, k__cospi_m20_p12); - const __m256i out_28_2 = _mm256_madd_epi16(out_28_0, k__cospi_m04_p28); - const __m256i out_28_3 = _mm256_madd_epi16(out_28_1, k__cospi_m04_p28); - // dct_const_round_shift - const __m256i out_04_4 = _mm256_add_epi32(out_04_2, k__DCT_CONST_ROUNDING); - const __m256i out_04_5 = _mm256_add_epi32(out_04_3, k__DCT_CONST_ROUNDING); - const __m256i out_20_4 = _mm256_add_epi32(out_20_2, k__DCT_CONST_ROUNDING); - const __m256i out_20_5 = _mm256_add_epi32(out_20_3, k__DCT_CONST_ROUNDING); - const __m256i out_12_4 = _mm256_add_epi32(out_12_2, k__DCT_CONST_ROUNDING); - const __m256i out_12_5 = _mm256_add_epi32(out_12_3, k__DCT_CONST_ROUNDING); - const __m256i out_28_4 = _mm256_add_epi32(out_28_2, k__DCT_CONST_ROUNDING); - const __m256i out_28_5 = _mm256_add_epi32(out_28_3, k__DCT_CONST_ROUNDING); - const __m256i out_04_6 = _mm256_srai_epi32(out_04_4, DCT_CONST_BITS); - const __m256i out_04_7 = _mm256_srai_epi32(out_04_5, DCT_CONST_BITS); - const __m256i out_20_6 = _mm256_srai_epi32(out_20_4, DCT_CONST_BITS); - const __m256i out_20_7 = _mm256_srai_epi32(out_20_5, DCT_CONST_BITS); - const __m256i out_12_6 = _mm256_srai_epi32(out_12_4, DCT_CONST_BITS); - const __m256i out_12_7 = _mm256_srai_epi32(out_12_5, DCT_CONST_BITS); - const __m256i out_28_6 = _mm256_srai_epi32(out_28_4, DCT_CONST_BITS); - const __m256i out_28_7 = _mm256_srai_epi32(out_28_5, DCT_CONST_BITS); - // Combine - out[ 4] = _mm256_packs_epi32(out_04_6, out_04_7); - out[20] = _mm256_packs_epi32(out_20_6, out_20_7); - out[12] = _mm256_packs_epi32(out_12_6, out_12_7); - out[28] = _mm256_packs_epi32(out_28_6, out_28_7); - } - { - step3[ 8] = _mm256_add_epi16(step2[ 9], step1[ 8]); - step3[ 9] = _mm256_sub_epi16(step1[ 8], step2[ 9]); - step3[10] = _mm256_sub_epi16(step1[11], step2[10]); - step3[11] = _mm256_add_epi16(step2[10], step1[11]); - step3[12] = _mm256_add_epi16(step2[13], step1[12]); - step3[13] = _mm256_sub_epi16(step1[12], step2[13]); - step3[14] = _mm256_sub_epi16(step1[15], step2[14]); - step3[15] = _mm256_add_epi16(step2[14], step1[15]); - } - { - const __m256i s3_17_0 = _mm256_unpacklo_epi16(step2[17], step2[30]); - const __m256i s3_17_1 = _mm256_unpackhi_epi16(step2[17], step2[30]); - const __m256i s3_18_0 = _mm256_unpacklo_epi16(step2[18], step2[29]); - const __m256i s3_18_1 = _mm256_unpackhi_epi16(step2[18], step2[29]); - const __m256i s3_21_0 = _mm256_unpacklo_epi16(step2[21], step2[26]); - const __m256i s3_21_1 = _mm256_unpackhi_epi16(step2[21], step2[26]); - const __m256i s3_22_0 = _mm256_unpacklo_epi16(step2[22], step2[25]); - const __m256i s3_22_1 = _mm256_unpackhi_epi16(step2[22], step2[25]); - const __m256i s3_17_2 = _mm256_madd_epi16(s3_17_0, k__cospi_m04_p28); - const __m256i s3_17_3 = _mm256_madd_epi16(s3_17_1, k__cospi_m04_p28); - const __m256i s3_18_2 = _mm256_madd_epi16(s3_18_0, k__cospi_m28_m04); - const __m256i s3_18_3 = _mm256_madd_epi16(s3_18_1, k__cospi_m28_m04); - const __m256i s3_21_2 = _mm256_madd_epi16(s3_21_0, k__cospi_m20_p12); - const __m256i s3_21_3 = _mm256_madd_epi16(s3_21_1, k__cospi_m20_p12); - const __m256i s3_22_2 = _mm256_madd_epi16(s3_22_0, k__cospi_m12_m20); - const __m256i s3_22_3 = _mm256_madd_epi16(s3_22_1, k__cospi_m12_m20); - const __m256i s3_25_2 = _mm256_madd_epi16(s3_22_0, k__cospi_m20_p12); - const __m256i s3_25_3 = _mm256_madd_epi16(s3_22_1, k__cospi_m20_p12); - const __m256i s3_26_2 = _mm256_madd_epi16(s3_21_0, k__cospi_p12_p20); - const __m256i s3_26_3 = _mm256_madd_epi16(s3_21_1, k__cospi_p12_p20); - const __m256i s3_29_2 = _mm256_madd_epi16(s3_18_0, k__cospi_m04_p28); - const __m256i s3_29_3 = _mm256_madd_epi16(s3_18_1, k__cospi_m04_p28); - const __m256i s3_30_2 = _mm256_madd_epi16(s3_17_0, k__cospi_p28_p04); - const __m256i s3_30_3 = _mm256_madd_epi16(s3_17_1, k__cospi_p28_p04); - // dct_const_round_shift - const __m256i s3_17_4 = _mm256_add_epi32(s3_17_2, k__DCT_CONST_ROUNDING); - const __m256i s3_17_5 = _mm256_add_epi32(s3_17_3, k__DCT_CONST_ROUNDING); - const __m256i s3_18_4 = _mm256_add_epi32(s3_18_2, k__DCT_CONST_ROUNDING); - const __m256i s3_18_5 = _mm256_add_epi32(s3_18_3, k__DCT_CONST_ROUNDING); - const __m256i s3_21_4 = _mm256_add_epi32(s3_21_2, k__DCT_CONST_ROUNDING); - const __m256i s3_21_5 = _mm256_add_epi32(s3_21_3, k__DCT_CONST_ROUNDING); - const __m256i s3_22_4 = _mm256_add_epi32(s3_22_2, k__DCT_CONST_ROUNDING); - const __m256i s3_22_5 = _mm256_add_epi32(s3_22_3, k__DCT_CONST_ROUNDING); - const __m256i s3_17_6 = _mm256_srai_epi32(s3_17_4, DCT_CONST_BITS); - const __m256i s3_17_7 = _mm256_srai_epi32(s3_17_5, DCT_CONST_BITS); - const __m256i s3_18_6 = _mm256_srai_epi32(s3_18_4, DCT_CONST_BITS); - const __m256i s3_18_7 = _mm256_srai_epi32(s3_18_5, DCT_CONST_BITS); - const __m256i s3_21_6 = _mm256_srai_epi32(s3_21_4, DCT_CONST_BITS); - const __m256i s3_21_7 = _mm256_srai_epi32(s3_21_5, DCT_CONST_BITS); - const __m256i s3_22_6 = _mm256_srai_epi32(s3_22_4, DCT_CONST_BITS); - const __m256i s3_22_7 = _mm256_srai_epi32(s3_22_5, DCT_CONST_BITS); - const __m256i s3_25_4 = _mm256_add_epi32(s3_25_2, k__DCT_CONST_ROUNDING); - const __m256i s3_25_5 = _mm256_add_epi32(s3_25_3, k__DCT_CONST_ROUNDING); - const __m256i s3_26_4 = _mm256_add_epi32(s3_26_2, k__DCT_CONST_ROUNDING); - const __m256i s3_26_5 = _mm256_add_epi32(s3_26_3, k__DCT_CONST_ROUNDING); - const __m256i s3_29_4 = _mm256_add_epi32(s3_29_2, k__DCT_CONST_ROUNDING); - const __m256i s3_29_5 = _mm256_add_epi32(s3_29_3, k__DCT_CONST_ROUNDING); - const __m256i s3_30_4 = _mm256_add_epi32(s3_30_2, k__DCT_CONST_ROUNDING); - const __m256i s3_30_5 = _mm256_add_epi32(s3_30_3, k__DCT_CONST_ROUNDING); - const __m256i s3_25_6 = _mm256_srai_epi32(s3_25_4, DCT_CONST_BITS); - const __m256i s3_25_7 = _mm256_srai_epi32(s3_25_5, DCT_CONST_BITS); - const __m256i s3_26_6 = _mm256_srai_epi32(s3_26_4, DCT_CONST_BITS); - const __m256i s3_26_7 = _mm256_srai_epi32(s3_26_5, DCT_CONST_BITS); - const __m256i s3_29_6 = _mm256_srai_epi32(s3_29_4, DCT_CONST_BITS); - const __m256i s3_29_7 = _mm256_srai_epi32(s3_29_5, DCT_CONST_BITS); - const __m256i s3_30_6 = _mm256_srai_epi32(s3_30_4, DCT_CONST_BITS); - const __m256i s3_30_7 = _mm256_srai_epi32(s3_30_5, DCT_CONST_BITS); - // Combine - step3[17] = _mm256_packs_epi32(s3_17_6, s3_17_7); - step3[18] = _mm256_packs_epi32(s3_18_6, s3_18_7); - step3[21] = _mm256_packs_epi32(s3_21_6, s3_21_7); - step3[22] = _mm256_packs_epi32(s3_22_6, s3_22_7); - // Combine - step3[25] = _mm256_packs_epi32(s3_25_6, s3_25_7); - step3[26] = _mm256_packs_epi32(s3_26_6, s3_26_7); - step3[29] = _mm256_packs_epi32(s3_29_6, s3_29_7); - step3[30] = _mm256_packs_epi32(s3_30_6, s3_30_7); - } - // Stage 7 - { - const __m256i out_02_0 = _mm256_unpacklo_epi16(step3[ 8], step3[15]); - const __m256i out_02_1 = _mm256_unpackhi_epi16(step3[ 8], step3[15]); - const __m256i out_18_0 = _mm256_unpacklo_epi16(step3[ 9], step3[14]); - const __m256i out_18_1 = _mm256_unpackhi_epi16(step3[ 9], step3[14]); - const __m256i out_10_0 = _mm256_unpacklo_epi16(step3[10], step3[13]); - const __m256i out_10_1 = _mm256_unpackhi_epi16(step3[10], step3[13]); - const __m256i out_26_0 = _mm256_unpacklo_epi16(step3[11], step3[12]); - const __m256i out_26_1 = _mm256_unpackhi_epi16(step3[11], step3[12]); - const __m256i out_02_2 = _mm256_madd_epi16(out_02_0, k__cospi_p30_p02); - const __m256i out_02_3 = _mm256_madd_epi16(out_02_1, k__cospi_p30_p02); - const __m256i out_18_2 = _mm256_madd_epi16(out_18_0, k__cospi_p14_p18); - const __m256i out_18_3 = _mm256_madd_epi16(out_18_1, k__cospi_p14_p18); - const __m256i out_10_2 = _mm256_madd_epi16(out_10_0, k__cospi_p22_p10); - const __m256i out_10_3 = _mm256_madd_epi16(out_10_1, k__cospi_p22_p10); - const __m256i out_26_2 = _mm256_madd_epi16(out_26_0, k__cospi_p06_p26); - const __m256i out_26_3 = _mm256_madd_epi16(out_26_1, k__cospi_p06_p26); - const __m256i out_06_2 = _mm256_madd_epi16(out_26_0, k__cospi_m26_p06); - const __m256i out_06_3 = _mm256_madd_epi16(out_26_1, k__cospi_m26_p06); - const __m256i out_22_2 = _mm256_madd_epi16(out_10_0, k__cospi_m10_p22); - const __m256i out_22_3 = _mm256_madd_epi16(out_10_1, k__cospi_m10_p22); - const __m256i out_14_2 = _mm256_madd_epi16(out_18_0, k__cospi_m18_p14); - const __m256i out_14_3 = _mm256_madd_epi16(out_18_1, k__cospi_m18_p14); - const __m256i out_30_2 = _mm256_madd_epi16(out_02_0, k__cospi_m02_p30); - const __m256i out_30_3 = _mm256_madd_epi16(out_02_1, k__cospi_m02_p30); - // dct_const_round_shift - const __m256i out_02_4 = _mm256_add_epi32(out_02_2, k__DCT_CONST_ROUNDING); - const __m256i out_02_5 = _mm256_add_epi32(out_02_3, k__DCT_CONST_ROUNDING); - const __m256i out_18_4 = _mm256_add_epi32(out_18_2, k__DCT_CONST_ROUNDING); - const __m256i out_18_5 = _mm256_add_epi32(out_18_3, k__DCT_CONST_ROUNDING); - const __m256i out_10_4 = _mm256_add_epi32(out_10_2, k__DCT_CONST_ROUNDING); - const __m256i out_10_5 = _mm256_add_epi32(out_10_3, k__DCT_CONST_ROUNDING); - const __m256i out_26_4 = _mm256_add_epi32(out_26_2, k__DCT_CONST_ROUNDING); - const __m256i out_26_5 = _mm256_add_epi32(out_26_3, k__DCT_CONST_ROUNDING); - const __m256i out_06_4 = _mm256_add_epi32(out_06_2, k__DCT_CONST_ROUNDING); - const __m256i out_06_5 = _mm256_add_epi32(out_06_3, k__DCT_CONST_ROUNDING); - const __m256i out_22_4 = _mm256_add_epi32(out_22_2, k__DCT_CONST_ROUNDING); - const __m256i out_22_5 = _mm256_add_epi32(out_22_3, k__DCT_CONST_ROUNDING); - const __m256i out_14_4 = _mm256_add_epi32(out_14_2, k__DCT_CONST_ROUNDING); - const __m256i out_14_5 = _mm256_add_epi32(out_14_3, k__DCT_CONST_ROUNDING); - const __m256i out_30_4 = _mm256_add_epi32(out_30_2, k__DCT_CONST_ROUNDING); - const __m256i out_30_5 = _mm256_add_epi32(out_30_3, k__DCT_CONST_ROUNDING); - const __m256i out_02_6 = _mm256_srai_epi32(out_02_4, DCT_CONST_BITS); - const __m256i out_02_7 = _mm256_srai_epi32(out_02_5, DCT_CONST_BITS); - const __m256i out_18_6 = _mm256_srai_epi32(out_18_4, DCT_CONST_BITS); - const __m256i out_18_7 = _mm256_srai_epi32(out_18_5, DCT_CONST_BITS); - const __m256i out_10_6 = _mm256_srai_epi32(out_10_4, DCT_CONST_BITS); - const __m256i out_10_7 = _mm256_srai_epi32(out_10_5, DCT_CONST_BITS); - const __m256i out_26_6 = _mm256_srai_epi32(out_26_4, DCT_CONST_BITS); - const __m256i out_26_7 = _mm256_srai_epi32(out_26_5, DCT_CONST_BITS); - const __m256i out_06_6 = _mm256_srai_epi32(out_06_4, DCT_CONST_BITS); - const __m256i out_06_7 = _mm256_srai_epi32(out_06_5, DCT_CONST_BITS); - const __m256i out_22_6 = _mm256_srai_epi32(out_22_4, DCT_CONST_BITS); - const __m256i out_22_7 = _mm256_srai_epi32(out_22_5, DCT_CONST_BITS); - const __m256i out_14_6 = _mm256_srai_epi32(out_14_4, DCT_CONST_BITS); - const __m256i out_14_7 = _mm256_srai_epi32(out_14_5, DCT_CONST_BITS); - const __m256i out_30_6 = _mm256_srai_epi32(out_30_4, DCT_CONST_BITS); - const __m256i out_30_7 = _mm256_srai_epi32(out_30_5, DCT_CONST_BITS); - // Combine - out[ 2] = _mm256_packs_epi32(out_02_6, out_02_7); - out[18] = _mm256_packs_epi32(out_18_6, out_18_7); - out[10] = _mm256_packs_epi32(out_10_6, out_10_7); - out[26] = _mm256_packs_epi32(out_26_6, out_26_7); - out[ 6] = _mm256_packs_epi32(out_06_6, out_06_7); - out[22] = _mm256_packs_epi32(out_22_6, out_22_7); - out[14] = _mm256_packs_epi32(out_14_6, out_14_7); - out[30] = _mm256_packs_epi32(out_30_6, out_30_7); - } - { - step1[16] = _mm256_add_epi16(step3[17], step2[16]); - step1[17] = _mm256_sub_epi16(step2[16], step3[17]); - step1[18] = _mm256_sub_epi16(step2[19], step3[18]); - step1[19] = _mm256_add_epi16(step3[18], step2[19]); - step1[20] = _mm256_add_epi16(step3[21], step2[20]); - step1[21] = _mm256_sub_epi16(step2[20], step3[21]); - step1[22] = _mm256_sub_epi16(step2[23], step3[22]); - step1[23] = _mm256_add_epi16(step3[22], step2[23]); - step1[24] = _mm256_add_epi16(step3[25], step2[24]); - step1[25] = _mm256_sub_epi16(step2[24], step3[25]); - step1[26] = _mm256_sub_epi16(step2[27], step3[26]); - step1[27] = _mm256_add_epi16(step3[26], step2[27]); - step1[28] = _mm256_add_epi16(step3[29], step2[28]); - step1[29] = _mm256_sub_epi16(step2[28], step3[29]); - step1[30] = _mm256_sub_epi16(step2[31], step3[30]); - step1[31] = _mm256_add_epi16(step3[30], step2[31]); - } - // Final stage --- outputs indices are bit-reversed. - { - const __m256i out_01_0 = _mm256_unpacklo_epi16(step1[16], step1[31]); - const __m256i out_01_1 = _mm256_unpackhi_epi16(step1[16], step1[31]); - const __m256i out_17_0 = _mm256_unpacklo_epi16(step1[17], step1[30]); - const __m256i out_17_1 = _mm256_unpackhi_epi16(step1[17], step1[30]); - const __m256i out_09_0 = _mm256_unpacklo_epi16(step1[18], step1[29]); - const __m256i out_09_1 = _mm256_unpackhi_epi16(step1[18], step1[29]); - const __m256i out_25_0 = _mm256_unpacklo_epi16(step1[19], step1[28]); - const __m256i out_25_1 = _mm256_unpackhi_epi16(step1[19], step1[28]); - const __m256i out_01_2 = _mm256_madd_epi16(out_01_0, k__cospi_p31_p01); - const __m256i out_01_3 = _mm256_madd_epi16(out_01_1, k__cospi_p31_p01); - const __m256i out_17_2 = _mm256_madd_epi16(out_17_0, k__cospi_p15_p17); - const __m256i out_17_3 = _mm256_madd_epi16(out_17_1, k__cospi_p15_p17); - const __m256i out_09_2 = _mm256_madd_epi16(out_09_0, k__cospi_p23_p09); - const __m256i out_09_3 = _mm256_madd_epi16(out_09_1, k__cospi_p23_p09); - const __m256i out_25_2 = _mm256_madd_epi16(out_25_0, k__cospi_p07_p25); - const __m256i out_25_3 = _mm256_madd_epi16(out_25_1, k__cospi_p07_p25); - const __m256i out_07_2 = _mm256_madd_epi16(out_25_0, k__cospi_m25_p07); - const __m256i out_07_3 = _mm256_madd_epi16(out_25_1, k__cospi_m25_p07); - const __m256i out_23_2 = _mm256_madd_epi16(out_09_0, k__cospi_m09_p23); - const __m256i out_23_3 = _mm256_madd_epi16(out_09_1, k__cospi_m09_p23); - const __m256i out_15_2 = _mm256_madd_epi16(out_17_0, k__cospi_m17_p15); - const __m256i out_15_3 = _mm256_madd_epi16(out_17_1, k__cospi_m17_p15); - const __m256i out_31_2 = _mm256_madd_epi16(out_01_0, k__cospi_m01_p31); - const __m256i out_31_3 = _mm256_madd_epi16(out_01_1, k__cospi_m01_p31); - // dct_const_round_shift - const __m256i out_01_4 = _mm256_add_epi32(out_01_2, k__DCT_CONST_ROUNDING); - const __m256i out_01_5 = _mm256_add_epi32(out_01_3, k__DCT_CONST_ROUNDING); - const __m256i out_17_4 = _mm256_add_epi32(out_17_2, k__DCT_CONST_ROUNDING); - const __m256i out_17_5 = _mm256_add_epi32(out_17_3, k__DCT_CONST_ROUNDING); - const __m256i out_09_4 = _mm256_add_epi32(out_09_2, k__DCT_CONST_ROUNDING); - const __m256i out_09_5 = _mm256_add_epi32(out_09_3, k__DCT_CONST_ROUNDING); - const __m256i out_25_4 = _mm256_add_epi32(out_25_2, k__DCT_CONST_ROUNDING); - const __m256i out_25_5 = _mm256_add_epi32(out_25_3, k__DCT_CONST_ROUNDING); - const __m256i out_07_4 = _mm256_add_epi32(out_07_2, k__DCT_CONST_ROUNDING); - const __m256i out_07_5 = _mm256_add_epi32(out_07_3, k__DCT_CONST_ROUNDING); - const __m256i out_23_4 = _mm256_add_epi32(out_23_2, k__DCT_CONST_ROUNDING); - const __m256i out_23_5 = _mm256_add_epi32(out_23_3, k__DCT_CONST_ROUNDING); - const __m256i out_15_4 = _mm256_add_epi32(out_15_2, k__DCT_CONST_ROUNDING); - const __m256i out_15_5 = _mm256_add_epi32(out_15_3, k__DCT_CONST_ROUNDING); - const __m256i out_31_4 = _mm256_add_epi32(out_31_2, k__DCT_CONST_ROUNDING); - const __m256i out_31_5 = _mm256_add_epi32(out_31_3, k__DCT_CONST_ROUNDING); - const __m256i out_01_6 = _mm256_srai_epi32(out_01_4, DCT_CONST_BITS); - const __m256i out_01_7 = _mm256_srai_epi32(out_01_5, DCT_CONST_BITS); - const __m256i out_17_6 = _mm256_srai_epi32(out_17_4, DCT_CONST_BITS); - const __m256i out_17_7 = _mm256_srai_epi32(out_17_5, DCT_CONST_BITS); - const __m256i out_09_6 = _mm256_srai_epi32(out_09_4, DCT_CONST_BITS); - const __m256i out_09_7 = _mm256_srai_epi32(out_09_5, DCT_CONST_BITS); - const __m256i out_25_6 = _mm256_srai_epi32(out_25_4, DCT_CONST_BITS); - const __m256i out_25_7 = _mm256_srai_epi32(out_25_5, DCT_CONST_BITS); - const __m256i out_07_6 = _mm256_srai_epi32(out_07_4, DCT_CONST_BITS); - const __m256i out_07_7 = _mm256_srai_epi32(out_07_5, DCT_CONST_BITS); - const __m256i out_23_6 = _mm256_srai_epi32(out_23_4, DCT_CONST_BITS); - const __m256i out_23_7 = _mm256_srai_epi32(out_23_5, DCT_CONST_BITS); - const __m256i out_15_6 = _mm256_srai_epi32(out_15_4, DCT_CONST_BITS); - const __m256i out_15_7 = _mm256_srai_epi32(out_15_5, DCT_CONST_BITS); - const __m256i out_31_6 = _mm256_srai_epi32(out_31_4, DCT_CONST_BITS); - const __m256i out_31_7 = _mm256_srai_epi32(out_31_5, DCT_CONST_BITS); - // Combine - out[ 1] = _mm256_packs_epi32(out_01_6, out_01_7); - out[17] = _mm256_packs_epi32(out_17_6, out_17_7); - out[ 9] = _mm256_packs_epi32(out_09_6, out_09_7); - out[25] = _mm256_packs_epi32(out_25_6, out_25_7); - out[ 7] = _mm256_packs_epi32(out_07_6, out_07_7); - out[23] = _mm256_packs_epi32(out_23_6, out_23_7); - out[15] = _mm256_packs_epi32(out_15_6, out_15_7); - out[31] = _mm256_packs_epi32(out_31_6, out_31_7); - } - { - const __m256i out_05_0 = _mm256_unpacklo_epi16(step1[20], step1[27]); - const __m256i out_05_1 = _mm256_unpackhi_epi16(step1[20], step1[27]); - const __m256i out_21_0 = _mm256_unpacklo_epi16(step1[21], step1[26]); - const __m256i out_21_1 = _mm256_unpackhi_epi16(step1[21], step1[26]); - const __m256i out_13_0 = _mm256_unpacklo_epi16(step1[22], step1[25]); - const __m256i out_13_1 = _mm256_unpackhi_epi16(step1[22], step1[25]); - const __m256i out_29_0 = _mm256_unpacklo_epi16(step1[23], step1[24]); - const __m256i out_29_1 = _mm256_unpackhi_epi16(step1[23], step1[24]); - const __m256i out_05_2 = _mm256_madd_epi16(out_05_0, k__cospi_p27_p05); - const __m256i out_05_3 = _mm256_madd_epi16(out_05_1, k__cospi_p27_p05); - const __m256i out_21_2 = _mm256_madd_epi16(out_21_0, k__cospi_p11_p21); - const __m256i out_21_3 = _mm256_madd_epi16(out_21_1, k__cospi_p11_p21); - const __m256i out_13_2 = _mm256_madd_epi16(out_13_0, k__cospi_p19_p13); - const __m256i out_13_3 = _mm256_madd_epi16(out_13_1, k__cospi_p19_p13); - const __m256i out_29_2 = _mm256_madd_epi16(out_29_0, k__cospi_p03_p29); - const __m256i out_29_3 = _mm256_madd_epi16(out_29_1, k__cospi_p03_p29); - const __m256i out_03_2 = _mm256_madd_epi16(out_29_0, k__cospi_m29_p03); - const __m256i out_03_3 = _mm256_madd_epi16(out_29_1, k__cospi_m29_p03); - const __m256i out_19_2 = _mm256_madd_epi16(out_13_0, k__cospi_m13_p19); - const __m256i out_19_3 = _mm256_madd_epi16(out_13_1, k__cospi_m13_p19); - const __m256i out_11_2 = _mm256_madd_epi16(out_21_0, k__cospi_m21_p11); - const __m256i out_11_3 = _mm256_madd_epi16(out_21_1, k__cospi_m21_p11); - const __m256i out_27_2 = _mm256_madd_epi16(out_05_0, k__cospi_m05_p27); - const __m256i out_27_3 = _mm256_madd_epi16(out_05_1, k__cospi_m05_p27); - // dct_const_round_shift - const __m256i out_05_4 = _mm256_add_epi32(out_05_2, k__DCT_CONST_ROUNDING); - const __m256i out_05_5 = _mm256_add_epi32(out_05_3, k__DCT_CONST_ROUNDING); - const __m256i out_21_4 = _mm256_add_epi32(out_21_2, k__DCT_CONST_ROUNDING); - const __m256i out_21_5 = _mm256_add_epi32(out_21_3, k__DCT_CONST_ROUNDING); - const __m256i out_13_4 = _mm256_add_epi32(out_13_2, k__DCT_CONST_ROUNDING); - const __m256i out_13_5 = _mm256_add_epi32(out_13_3, k__DCT_CONST_ROUNDING); - const __m256i out_29_4 = _mm256_add_epi32(out_29_2, k__DCT_CONST_ROUNDING); - const __m256i out_29_5 = _mm256_add_epi32(out_29_3, k__DCT_CONST_ROUNDING); - const __m256i out_03_4 = _mm256_add_epi32(out_03_2, k__DCT_CONST_ROUNDING); - const __m256i out_03_5 = _mm256_add_epi32(out_03_3, k__DCT_CONST_ROUNDING); - const __m256i out_19_4 = _mm256_add_epi32(out_19_2, k__DCT_CONST_ROUNDING); - const __m256i out_19_5 = _mm256_add_epi32(out_19_3, k__DCT_CONST_ROUNDING); - const __m256i out_11_4 = _mm256_add_epi32(out_11_2, k__DCT_CONST_ROUNDING); - const __m256i out_11_5 = _mm256_add_epi32(out_11_3, k__DCT_CONST_ROUNDING); - const __m256i out_27_4 = _mm256_add_epi32(out_27_2, k__DCT_CONST_ROUNDING); - const __m256i out_27_5 = _mm256_add_epi32(out_27_3, k__DCT_CONST_ROUNDING); - const __m256i out_05_6 = _mm256_srai_epi32(out_05_4, DCT_CONST_BITS); - const __m256i out_05_7 = _mm256_srai_epi32(out_05_5, DCT_CONST_BITS); - const __m256i out_21_6 = _mm256_srai_epi32(out_21_4, DCT_CONST_BITS); - const __m256i out_21_7 = _mm256_srai_epi32(out_21_5, DCT_CONST_BITS); - const __m256i out_13_6 = _mm256_srai_epi32(out_13_4, DCT_CONST_BITS); - const __m256i out_13_7 = _mm256_srai_epi32(out_13_5, DCT_CONST_BITS); - const __m256i out_29_6 = _mm256_srai_epi32(out_29_4, DCT_CONST_BITS); - const __m256i out_29_7 = _mm256_srai_epi32(out_29_5, DCT_CONST_BITS); - const __m256i out_03_6 = _mm256_srai_epi32(out_03_4, DCT_CONST_BITS); - const __m256i out_03_7 = _mm256_srai_epi32(out_03_5, DCT_CONST_BITS); - const __m256i out_19_6 = _mm256_srai_epi32(out_19_4, DCT_CONST_BITS); - const __m256i out_19_7 = _mm256_srai_epi32(out_19_5, DCT_CONST_BITS); - const __m256i out_11_6 = _mm256_srai_epi32(out_11_4, DCT_CONST_BITS); - const __m256i out_11_7 = _mm256_srai_epi32(out_11_5, DCT_CONST_BITS); - const __m256i out_27_6 = _mm256_srai_epi32(out_27_4, DCT_CONST_BITS); - const __m256i out_27_7 = _mm256_srai_epi32(out_27_5, DCT_CONST_BITS); - // Combine - out[ 5] = _mm256_packs_epi32(out_05_6, out_05_7); - out[21] = _mm256_packs_epi32(out_21_6, out_21_7); - out[13] = _mm256_packs_epi32(out_13_6, out_13_7); - out[29] = _mm256_packs_epi32(out_29_6, out_29_7); - out[ 3] = _mm256_packs_epi32(out_03_6, out_03_7); - out[19] = _mm256_packs_epi32(out_19_6, out_19_7); - out[11] = _mm256_packs_epi32(out_11_6, out_11_7); - out[27] = _mm256_packs_epi32(out_27_6, out_27_7); - } -#if FDCT32x32_HIGH_PRECISION - } else { - __m256i lstep1[64], lstep2[64], lstep3[64]; - __m256i u[32], v[32], sign[16]; - const __m256i K32One = _mm256_set_epi32(1, 1, 1, 1, 1, 1, 1, 1); - // start using 32-bit operations - // stage 3 - { - // expanding to 32-bit length priori to addition operations - lstep2[ 0] = _mm256_unpacklo_epi16(step2[ 0], kZero); - lstep2[ 1] = _mm256_unpackhi_epi16(step2[ 0], kZero); - lstep2[ 2] = _mm256_unpacklo_epi16(step2[ 1], kZero); - lstep2[ 3] = _mm256_unpackhi_epi16(step2[ 1], kZero); - lstep2[ 4] = _mm256_unpacklo_epi16(step2[ 2], kZero); - lstep2[ 5] = _mm256_unpackhi_epi16(step2[ 2], kZero); - lstep2[ 6] = _mm256_unpacklo_epi16(step2[ 3], kZero); - lstep2[ 7] = _mm256_unpackhi_epi16(step2[ 3], kZero); - lstep2[ 8] = _mm256_unpacklo_epi16(step2[ 4], kZero); - lstep2[ 9] = _mm256_unpackhi_epi16(step2[ 4], kZero); - lstep2[10] = _mm256_unpacklo_epi16(step2[ 5], kZero); - lstep2[11] = _mm256_unpackhi_epi16(step2[ 5], kZero); - lstep2[12] = _mm256_unpacklo_epi16(step2[ 6], kZero); - lstep2[13] = _mm256_unpackhi_epi16(step2[ 6], kZero); - lstep2[14] = _mm256_unpacklo_epi16(step2[ 7], kZero); - lstep2[15] = _mm256_unpackhi_epi16(step2[ 7], kZero); - lstep2[ 0] = _mm256_madd_epi16(lstep2[ 0], kOne); - lstep2[ 1] = _mm256_madd_epi16(lstep2[ 1], kOne); - lstep2[ 2] = _mm256_madd_epi16(lstep2[ 2], kOne); - lstep2[ 3] = _mm256_madd_epi16(lstep2[ 3], kOne); - lstep2[ 4] = _mm256_madd_epi16(lstep2[ 4], kOne); - lstep2[ 5] = _mm256_madd_epi16(lstep2[ 5], kOne); - lstep2[ 6] = _mm256_madd_epi16(lstep2[ 6], kOne); - lstep2[ 7] = _mm256_madd_epi16(lstep2[ 7], kOne); - lstep2[ 8] = _mm256_madd_epi16(lstep2[ 8], kOne); - lstep2[ 9] = _mm256_madd_epi16(lstep2[ 9], kOne); - lstep2[10] = _mm256_madd_epi16(lstep2[10], kOne); - lstep2[11] = _mm256_madd_epi16(lstep2[11], kOne); - lstep2[12] = _mm256_madd_epi16(lstep2[12], kOne); - lstep2[13] = _mm256_madd_epi16(lstep2[13], kOne); - lstep2[14] = _mm256_madd_epi16(lstep2[14], kOne); - lstep2[15] = _mm256_madd_epi16(lstep2[15], kOne); - - lstep3[ 0] = _mm256_add_epi32(lstep2[14], lstep2[ 0]); - lstep3[ 1] = _mm256_add_epi32(lstep2[15], lstep2[ 1]); - lstep3[ 2] = _mm256_add_epi32(lstep2[12], lstep2[ 2]); - lstep3[ 3] = _mm256_add_epi32(lstep2[13], lstep2[ 3]); - lstep3[ 4] = _mm256_add_epi32(lstep2[10], lstep2[ 4]); - lstep3[ 5] = _mm256_add_epi32(lstep2[11], lstep2[ 5]); - lstep3[ 6] = _mm256_add_epi32(lstep2[ 8], lstep2[ 6]); - lstep3[ 7] = _mm256_add_epi32(lstep2[ 9], lstep2[ 7]); - lstep3[ 8] = _mm256_sub_epi32(lstep2[ 6], lstep2[ 8]); - lstep3[ 9] = _mm256_sub_epi32(lstep2[ 7], lstep2[ 9]); - lstep3[10] = _mm256_sub_epi32(lstep2[ 4], lstep2[10]); - lstep3[11] = _mm256_sub_epi32(lstep2[ 5], lstep2[11]); - lstep3[12] = _mm256_sub_epi32(lstep2[ 2], lstep2[12]); - lstep3[13] = _mm256_sub_epi32(lstep2[ 3], lstep2[13]); - lstep3[14] = _mm256_sub_epi32(lstep2[ 0], lstep2[14]); - lstep3[15] = _mm256_sub_epi32(lstep2[ 1], lstep2[15]); - } - { - const __m256i s3_10_0 = _mm256_unpacklo_epi16(step2[13], step2[10]); - const __m256i s3_10_1 = _mm256_unpackhi_epi16(step2[13], step2[10]); - const __m256i s3_11_0 = _mm256_unpacklo_epi16(step2[12], step2[11]); - const __m256i s3_11_1 = _mm256_unpackhi_epi16(step2[12], step2[11]); - const __m256i s3_10_2 = _mm256_madd_epi16(s3_10_0, k__cospi_p16_m16); - const __m256i s3_10_3 = _mm256_madd_epi16(s3_10_1, k__cospi_p16_m16); - const __m256i s3_11_2 = _mm256_madd_epi16(s3_11_0, k__cospi_p16_m16); - const __m256i s3_11_3 = _mm256_madd_epi16(s3_11_1, k__cospi_p16_m16); - const __m256i s3_12_2 = _mm256_madd_epi16(s3_11_0, k__cospi_p16_p16); - const __m256i s3_12_3 = _mm256_madd_epi16(s3_11_1, k__cospi_p16_p16); - const __m256i s3_13_2 = _mm256_madd_epi16(s3_10_0, k__cospi_p16_p16); - const __m256i s3_13_3 = _mm256_madd_epi16(s3_10_1, k__cospi_p16_p16); - // dct_const_round_shift - const __m256i s3_10_4 = _mm256_add_epi32(s3_10_2, k__DCT_CONST_ROUNDING); - const __m256i s3_10_5 = _mm256_add_epi32(s3_10_3, k__DCT_CONST_ROUNDING); - const __m256i s3_11_4 = _mm256_add_epi32(s3_11_2, k__DCT_CONST_ROUNDING); - const __m256i s3_11_5 = _mm256_add_epi32(s3_11_3, k__DCT_CONST_ROUNDING); - const __m256i s3_12_4 = _mm256_add_epi32(s3_12_2, k__DCT_CONST_ROUNDING); - const __m256i s3_12_5 = _mm256_add_epi32(s3_12_3, k__DCT_CONST_ROUNDING); - const __m256i s3_13_4 = _mm256_add_epi32(s3_13_2, k__DCT_CONST_ROUNDING); - const __m256i s3_13_5 = _mm256_add_epi32(s3_13_3, k__DCT_CONST_ROUNDING); - lstep3[20] = _mm256_srai_epi32(s3_10_4, DCT_CONST_BITS); - lstep3[21] = _mm256_srai_epi32(s3_10_5, DCT_CONST_BITS); - lstep3[22] = _mm256_srai_epi32(s3_11_4, DCT_CONST_BITS); - lstep3[23] = _mm256_srai_epi32(s3_11_5, DCT_CONST_BITS); - lstep3[24] = _mm256_srai_epi32(s3_12_4, DCT_CONST_BITS); - lstep3[25] = _mm256_srai_epi32(s3_12_5, DCT_CONST_BITS); - lstep3[26] = _mm256_srai_epi32(s3_13_4, DCT_CONST_BITS); - lstep3[27] = _mm256_srai_epi32(s3_13_5, DCT_CONST_BITS); - } - { - lstep2[40] = _mm256_unpacklo_epi16(step2[20], kZero); - lstep2[41] = _mm256_unpackhi_epi16(step2[20], kZero); - lstep2[42] = _mm256_unpacklo_epi16(step2[21], kZero); - lstep2[43] = _mm256_unpackhi_epi16(step2[21], kZero); - lstep2[44] = _mm256_unpacklo_epi16(step2[22], kZero); - lstep2[45] = _mm256_unpackhi_epi16(step2[22], kZero); - lstep2[46] = _mm256_unpacklo_epi16(step2[23], kZero); - lstep2[47] = _mm256_unpackhi_epi16(step2[23], kZero); - lstep2[48] = _mm256_unpacklo_epi16(step2[24], kZero); - lstep2[49] = _mm256_unpackhi_epi16(step2[24], kZero); - lstep2[50] = _mm256_unpacklo_epi16(step2[25], kZero); - lstep2[51] = _mm256_unpackhi_epi16(step2[25], kZero); - lstep2[52] = _mm256_unpacklo_epi16(step2[26], kZero); - lstep2[53] = _mm256_unpackhi_epi16(step2[26], kZero); - lstep2[54] = _mm256_unpacklo_epi16(step2[27], kZero); - lstep2[55] = _mm256_unpackhi_epi16(step2[27], kZero); - lstep2[40] = _mm256_madd_epi16(lstep2[40], kOne); - lstep2[41] = _mm256_madd_epi16(lstep2[41], kOne); - lstep2[42] = _mm256_madd_epi16(lstep2[42], kOne); - lstep2[43] = _mm256_madd_epi16(lstep2[43], kOne); - lstep2[44] = _mm256_madd_epi16(lstep2[44], kOne); - lstep2[45] = _mm256_madd_epi16(lstep2[45], kOne); - lstep2[46] = _mm256_madd_epi16(lstep2[46], kOne); - lstep2[47] = _mm256_madd_epi16(lstep2[47], kOne); - lstep2[48] = _mm256_madd_epi16(lstep2[48], kOne); - lstep2[49] = _mm256_madd_epi16(lstep2[49], kOne); - lstep2[50] = _mm256_madd_epi16(lstep2[50], kOne); - lstep2[51] = _mm256_madd_epi16(lstep2[51], kOne); - lstep2[52] = _mm256_madd_epi16(lstep2[52], kOne); - lstep2[53] = _mm256_madd_epi16(lstep2[53], kOne); - lstep2[54] = _mm256_madd_epi16(lstep2[54], kOne); - lstep2[55] = _mm256_madd_epi16(lstep2[55], kOne); - - lstep1[32] = _mm256_unpacklo_epi16(step1[16], kZero); - lstep1[33] = _mm256_unpackhi_epi16(step1[16], kZero); - lstep1[34] = _mm256_unpacklo_epi16(step1[17], kZero); - lstep1[35] = _mm256_unpackhi_epi16(step1[17], kZero); - lstep1[36] = _mm256_unpacklo_epi16(step1[18], kZero); - lstep1[37] = _mm256_unpackhi_epi16(step1[18], kZero); - lstep1[38] = _mm256_unpacklo_epi16(step1[19], kZero); - lstep1[39] = _mm256_unpackhi_epi16(step1[19], kZero); - lstep1[56] = _mm256_unpacklo_epi16(step1[28], kZero); - lstep1[57] = _mm256_unpackhi_epi16(step1[28], kZero); - lstep1[58] = _mm256_unpacklo_epi16(step1[29], kZero); - lstep1[59] = _mm256_unpackhi_epi16(step1[29], kZero); - lstep1[60] = _mm256_unpacklo_epi16(step1[30], kZero); - lstep1[61] = _mm256_unpackhi_epi16(step1[30], kZero); - lstep1[62] = _mm256_unpacklo_epi16(step1[31], kZero); - lstep1[63] = _mm256_unpackhi_epi16(step1[31], kZero); - lstep1[32] = _mm256_madd_epi16(lstep1[32], kOne); - lstep1[33] = _mm256_madd_epi16(lstep1[33], kOne); - lstep1[34] = _mm256_madd_epi16(lstep1[34], kOne); - lstep1[35] = _mm256_madd_epi16(lstep1[35], kOne); - lstep1[36] = _mm256_madd_epi16(lstep1[36], kOne); - lstep1[37] = _mm256_madd_epi16(lstep1[37], kOne); - lstep1[38] = _mm256_madd_epi16(lstep1[38], kOne); - lstep1[39] = _mm256_madd_epi16(lstep1[39], kOne); - lstep1[56] = _mm256_madd_epi16(lstep1[56], kOne); - lstep1[57] = _mm256_madd_epi16(lstep1[57], kOne); - lstep1[58] = _mm256_madd_epi16(lstep1[58], kOne); - lstep1[59] = _mm256_madd_epi16(lstep1[59], kOne); - lstep1[60] = _mm256_madd_epi16(lstep1[60], kOne); - lstep1[61] = _mm256_madd_epi16(lstep1[61], kOne); - lstep1[62] = _mm256_madd_epi16(lstep1[62], kOne); - lstep1[63] = _mm256_madd_epi16(lstep1[63], kOne); - - lstep3[32] = _mm256_add_epi32(lstep2[46], lstep1[32]); - lstep3[33] = _mm256_add_epi32(lstep2[47], lstep1[33]); - - lstep3[34] = _mm256_add_epi32(lstep2[44], lstep1[34]); - lstep3[35] = _mm256_add_epi32(lstep2[45], lstep1[35]); - lstep3[36] = _mm256_add_epi32(lstep2[42], lstep1[36]); - lstep3[37] = _mm256_add_epi32(lstep2[43], lstep1[37]); - lstep3[38] = _mm256_add_epi32(lstep2[40], lstep1[38]); - lstep3[39] = _mm256_add_epi32(lstep2[41], lstep1[39]); - lstep3[40] = _mm256_sub_epi32(lstep1[38], lstep2[40]); - lstep3[41] = _mm256_sub_epi32(lstep1[39], lstep2[41]); - lstep3[42] = _mm256_sub_epi32(lstep1[36], lstep2[42]); - lstep3[43] = _mm256_sub_epi32(lstep1[37], lstep2[43]); - lstep3[44] = _mm256_sub_epi32(lstep1[34], lstep2[44]); - lstep3[45] = _mm256_sub_epi32(lstep1[35], lstep2[45]); - lstep3[46] = _mm256_sub_epi32(lstep1[32], lstep2[46]); - lstep3[47] = _mm256_sub_epi32(lstep1[33], lstep2[47]); - lstep3[48] = _mm256_sub_epi32(lstep1[62], lstep2[48]); - lstep3[49] = _mm256_sub_epi32(lstep1[63], lstep2[49]); - lstep3[50] = _mm256_sub_epi32(lstep1[60], lstep2[50]); - lstep3[51] = _mm256_sub_epi32(lstep1[61], lstep2[51]); - lstep3[52] = _mm256_sub_epi32(lstep1[58], lstep2[52]); - lstep3[53] = _mm256_sub_epi32(lstep1[59], lstep2[53]); - lstep3[54] = _mm256_sub_epi32(lstep1[56], lstep2[54]); - lstep3[55] = _mm256_sub_epi32(lstep1[57], lstep2[55]); - lstep3[56] = _mm256_add_epi32(lstep2[54], lstep1[56]); - lstep3[57] = _mm256_add_epi32(lstep2[55], lstep1[57]); - lstep3[58] = _mm256_add_epi32(lstep2[52], lstep1[58]); - lstep3[59] = _mm256_add_epi32(lstep2[53], lstep1[59]); - lstep3[60] = _mm256_add_epi32(lstep2[50], lstep1[60]); - lstep3[61] = _mm256_add_epi32(lstep2[51], lstep1[61]); - lstep3[62] = _mm256_add_epi32(lstep2[48], lstep1[62]); - lstep3[63] = _mm256_add_epi32(lstep2[49], lstep1[63]); - } - - // stage 4 - { - // expanding to 32-bit length priori to addition operations - lstep2[16] = _mm256_unpacklo_epi16(step2[ 8], kZero); - lstep2[17] = _mm256_unpackhi_epi16(step2[ 8], kZero); - lstep2[18] = _mm256_unpacklo_epi16(step2[ 9], kZero); - lstep2[19] = _mm256_unpackhi_epi16(step2[ 9], kZero); - lstep2[28] = _mm256_unpacklo_epi16(step2[14], kZero); - lstep2[29] = _mm256_unpackhi_epi16(step2[14], kZero); - lstep2[30] = _mm256_unpacklo_epi16(step2[15], kZero); - lstep2[31] = _mm256_unpackhi_epi16(step2[15], kZero); - lstep2[16] = _mm256_madd_epi16(lstep2[16], kOne); - lstep2[17] = _mm256_madd_epi16(lstep2[17], kOne); - lstep2[18] = _mm256_madd_epi16(lstep2[18], kOne); - lstep2[19] = _mm256_madd_epi16(lstep2[19], kOne); - lstep2[28] = _mm256_madd_epi16(lstep2[28], kOne); - lstep2[29] = _mm256_madd_epi16(lstep2[29], kOne); - lstep2[30] = _mm256_madd_epi16(lstep2[30], kOne); - lstep2[31] = _mm256_madd_epi16(lstep2[31], kOne); - - lstep1[ 0] = _mm256_add_epi32(lstep3[ 6], lstep3[ 0]); - lstep1[ 1] = _mm256_add_epi32(lstep3[ 7], lstep3[ 1]); - lstep1[ 2] = _mm256_add_epi32(lstep3[ 4], lstep3[ 2]); - lstep1[ 3] = _mm256_add_epi32(lstep3[ 5], lstep3[ 3]); - lstep1[ 4] = _mm256_sub_epi32(lstep3[ 2], lstep3[ 4]); - lstep1[ 5] = _mm256_sub_epi32(lstep3[ 3], lstep3[ 5]); - lstep1[ 6] = _mm256_sub_epi32(lstep3[ 0], lstep3[ 6]); - lstep1[ 7] = _mm256_sub_epi32(lstep3[ 1], lstep3[ 7]); - lstep1[16] = _mm256_add_epi32(lstep3[22], lstep2[16]); - lstep1[17] = _mm256_add_epi32(lstep3[23], lstep2[17]); - lstep1[18] = _mm256_add_epi32(lstep3[20], lstep2[18]); - lstep1[19] = _mm256_add_epi32(lstep3[21], lstep2[19]); - lstep1[20] = _mm256_sub_epi32(lstep2[18], lstep3[20]); - lstep1[21] = _mm256_sub_epi32(lstep2[19], lstep3[21]); - lstep1[22] = _mm256_sub_epi32(lstep2[16], lstep3[22]); - lstep1[23] = _mm256_sub_epi32(lstep2[17], lstep3[23]); - lstep1[24] = _mm256_sub_epi32(lstep2[30], lstep3[24]); - lstep1[25] = _mm256_sub_epi32(lstep2[31], lstep3[25]); - lstep1[26] = _mm256_sub_epi32(lstep2[28], lstep3[26]); - lstep1[27] = _mm256_sub_epi32(lstep2[29], lstep3[27]); - lstep1[28] = _mm256_add_epi32(lstep3[26], lstep2[28]); - lstep1[29] = _mm256_add_epi32(lstep3[27], lstep2[29]); - lstep1[30] = _mm256_add_epi32(lstep3[24], lstep2[30]); - lstep1[31] = _mm256_add_epi32(lstep3[25], lstep2[31]); - } - { - // to be continued... - // - const __m256i k32_p16_p16 = pair256_set_epi32(cospi_16_64, cospi_16_64); - const __m256i k32_p16_m16 = pair256_set_epi32(cospi_16_64, -cospi_16_64); - - u[0] = _mm256_unpacklo_epi32(lstep3[12], lstep3[10]); - u[1] = _mm256_unpackhi_epi32(lstep3[12], lstep3[10]); - u[2] = _mm256_unpacklo_epi32(lstep3[13], lstep3[11]); - u[3] = _mm256_unpackhi_epi32(lstep3[13], lstep3[11]); - - // TODO(jingning): manually inline k_madd_epi32_avx2_ to further hide - // instruction latency. - v[ 0] = k_madd_epi32_avx2(u[0], k32_p16_m16); - v[ 1] = k_madd_epi32_avx2(u[1], k32_p16_m16); - v[ 2] = k_madd_epi32_avx2(u[2], k32_p16_m16); - v[ 3] = k_madd_epi32_avx2(u[3], k32_p16_m16); - v[ 4] = k_madd_epi32_avx2(u[0], k32_p16_p16); - v[ 5] = k_madd_epi32_avx2(u[1], k32_p16_p16); - v[ 6] = k_madd_epi32_avx2(u[2], k32_p16_p16); - v[ 7] = k_madd_epi32_avx2(u[3], k32_p16_p16); - - u[0] = k_packs_epi64_avx2(v[0], v[1]); - u[1] = k_packs_epi64_avx2(v[2], v[3]); - u[2] = k_packs_epi64_avx2(v[4], v[5]); - u[3] = k_packs_epi64_avx2(v[6], v[7]); - - v[0] = _mm256_add_epi32(u[0], k__DCT_CONST_ROUNDING); - v[1] = _mm256_add_epi32(u[1], k__DCT_CONST_ROUNDING); - v[2] = _mm256_add_epi32(u[2], k__DCT_CONST_ROUNDING); - v[3] = _mm256_add_epi32(u[3], k__DCT_CONST_ROUNDING); - - lstep1[10] = _mm256_srai_epi32(v[0], DCT_CONST_BITS); - lstep1[11] = _mm256_srai_epi32(v[1], DCT_CONST_BITS); - lstep1[12] = _mm256_srai_epi32(v[2], DCT_CONST_BITS); - lstep1[13] = _mm256_srai_epi32(v[3], DCT_CONST_BITS); - } - { - const __m256i k32_m08_p24 = pair256_set_epi32(-cospi_8_64, cospi_24_64); - const __m256i k32_m24_m08 = pair256_set_epi32(-cospi_24_64, -cospi_8_64); - const __m256i k32_p24_p08 = pair256_set_epi32(cospi_24_64, cospi_8_64); - - u[ 0] = _mm256_unpacklo_epi32(lstep3[36], lstep3[58]); - u[ 1] = _mm256_unpackhi_epi32(lstep3[36], lstep3[58]); - u[ 2] = _mm256_unpacklo_epi32(lstep3[37], lstep3[59]); - u[ 3] = _mm256_unpackhi_epi32(lstep3[37], lstep3[59]); - u[ 4] = _mm256_unpacklo_epi32(lstep3[38], lstep3[56]); - u[ 5] = _mm256_unpackhi_epi32(lstep3[38], lstep3[56]); - u[ 6] = _mm256_unpacklo_epi32(lstep3[39], lstep3[57]); - u[ 7] = _mm256_unpackhi_epi32(lstep3[39], lstep3[57]); - u[ 8] = _mm256_unpacklo_epi32(lstep3[40], lstep3[54]); - u[ 9] = _mm256_unpackhi_epi32(lstep3[40], lstep3[54]); - u[10] = _mm256_unpacklo_epi32(lstep3[41], lstep3[55]); - u[11] = _mm256_unpackhi_epi32(lstep3[41], lstep3[55]); - u[12] = _mm256_unpacklo_epi32(lstep3[42], lstep3[52]); - u[13] = _mm256_unpackhi_epi32(lstep3[42], lstep3[52]); - u[14] = _mm256_unpacklo_epi32(lstep3[43], lstep3[53]); - u[15] = _mm256_unpackhi_epi32(lstep3[43], lstep3[53]); - - v[ 0] = k_madd_epi32_avx2(u[ 0], k32_m08_p24); - v[ 1] = k_madd_epi32_avx2(u[ 1], k32_m08_p24); - v[ 2] = k_madd_epi32_avx2(u[ 2], k32_m08_p24); - v[ 3] = k_madd_epi32_avx2(u[ 3], k32_m08_p24); - v[ 4] = k_madd_epi32_avx2(u[ 4], k32_m08_p24); - v[ 5] = k_madd_epi32_avx2(u[ 5], k32_m08_p24); - v[ 6] = k_madd_epi32_avx2(u[ 6], k32_m08_p24); - v[ 7] = k_madd_epi32_avx2(u[ 7], k32_m08_p24); - v[ 8] = k_madd_epi32_avx2(u[ 8], k32_m24_m08); - v[ 9] = k_madd_epi32_avx2(u[ 9], k32_m24_m08); - v[10] = k_madd_epi32_avx2(u[10], k32_m24_m08); - v[11] = k_madd_epi32_avx2(u[11], k32_m24_m08); - v[12] = k_madd_epi32_avx2(u[12], k32_m24_m08); - v[13] = k_madd_epi32_avx2(u[13], k32_m24_m08); - v[14] = k_madd_epi32_avx2(u[14], k32_m24_m08); - v[15] = k_madd_epi32_avx2(u[15], k32_m24_m08); - v[16] = k_madd_epi32_avx2(u[12], k32_m08_p24); - v[17] = k_madd_epi32_avx2(u[13], k32_m08_p24); - v[18] = k_madd_epi32_avx2(u[14], k32_m08_p24); - v[19] = k_madd_epi32_avx2(u[15], k32_m08_p24); - v[20] = k_madd_epi32_avx2(u[ 8], k32_m08_p24); - v[21] = k_madd_epi32_avx2(u[ 9], k32_m08_p24); - v[22] = k_madd_epi32_avx2(u[10], k32_m08_p24); - v[23] = k_madd_epi32_avx2(u[11], k32_m08_p24); - v[24] = k_madd_epi32_avx2(u[ 4], k32_p24_p08); - v[25] = k_madd_epi32_avx2(u[ 5], k32_p24_p08); - v[26] = k_madd_epi32_avx2(u[ 6], k32_p24_p08); - v[27] = k_madd_epi32_avx2(u[ 7], k32_p24_p08); - v[28] = k_madd_epi32_avx2(u[ 0], k32_p24_p08); - v[29] = k_madd_epi32_avx2(u[ 1], k32_p24_p08); - v[30] = k_madd_epi32_avx2(u[ 2], k32_p24_p08); - v[31] = k_madd_epi32_avx2(u[ 3], k32_p24_p08); - - u[ 0] = k_packs_epi64_avx2(v[ 0], v[ 1]); - u[ 1] = k_packs_epi64_avx2(v[ 2], v[ 3]); - u[ 2] = k_packs_epi64_avx2(v[ 4], v[ 5]); - u[ 3] = k_packs_epi64_avx2(v[ 6], v[ 7]); - u[ 4] = k_packs_epi64_avx2(v[ 8], v[ 9]); - u[ 5] = k_packs_epi64_avx2(v[10], v[11]); - u[ 6] = k_packs_epi64_avx2(v[12], v[13]); - u[ 7] = k_packs_epi64_avx2(v[14], v[15]); - u[ 8] = k_packs_epi64_avx2(v[16], v[17]); - u[ 9] = k_packs_epi64_avx2(v[18], v[19]); - u[10] = k_packs_epi64_avx2(v[20], v[21]); - u[11] = k_packs_epi64_avx2(v[22], v[23]); - u[12] = k_packs_epi64_avx2(v[24], v[25]); - u[13] = k_packs_epi64_avx2(v[26], v[27]); - u[14] = k_packs_epi64_avx2(v[28], v[29]); - u[15] = k_packs_epi64_avx2(v[30], v[31]); - - v[ 0] = _mm256_add_epi32(u[ 0], k__DCT_CONST_ROUNDING); - v[ 1] = _mm256_add_epi32(u[ 1], k__DCT_CONST_ROUNDING); - v[ 2] = _mm256_add_epi32(u[ 2], k__DCT_CONST_ROUNDING); - v[ 3] = _mm256_add_epi32(u[ 3], k__DCT_CONST_ROUNDING); - v[ 4] = _mm256_add_epi32(u[ 4], k__DCT_CONST_ROUNDING); - v[ 5] = _mm256_add_epi32(u[ 5], k__DCT_CONST_ROUNDING); - v[ 6] = _mm256_add_epi32(u[ 6], k__DCT_CONST_ROUNDING); - v[ 7] = _mm256_add_epi32(u[ 7], k__DCT_CONST_ROUNDING); - v[ 8] = _mm256_add_epi32(u[ 8], k__DCT_CONST_ROUNDING); - v[ 9] = _mm256_add_epi32(u[ 9], k__DCT_CONST_ROUNDING); - v[10] = _mm256_add_epi32(u[10], k__DCT_CONST_ROUNDING); - v[11] = _mm256_add_epi32(u[11], k__DCT_CONST_ROUNDING); - v[12] = _mm256_add_epi32(u[12], k__DCT_CONST_ROUNDING); - v[13] = _mm256_add_epi32(u[13], k__DCT_CONST_ROUNDING); - v[14] = _mm256_add_epi32(u[14], k__DCT_CONST_ROUNDING); - v[15] = _mm256_add_epi32(u[15], k__DCT_CONST_ROUNDING); - - lstep1[36] = _mm256_srai_epi32(v[ 0], DCT_CONST_BITS); - lstep1[37] = _mm256_srai_epi32(v[ 1], DCT_CONST_BITS); - lstep1[38] = _mm256_srai_epi32(v[ 2], DCT_CONST_BITS); - lstep1[39] = _mm256_srai_epi32(v[ 3], DCT_CONST_BITS); - lstep1[40] = _mm256_srai_epi32(v[ 4], DCT_CONST_BITS); - lstep1[41] = _mm256_srai_epi32(v[ 5], DCT_CONST_BITS); - lstep1[42] = _mm256_srai_epi32(v[ 6], DCT_CONST_BITS); - lstep1[43] = _mm256_srai_epi32(v[ 7], DCT_CONST_BITS); - lstep1[52] = _mm256_srai_epi32(v[ 8], DCT_CONST_BITS); - lstep1[53] = _mm256_srai_epi32(v[ 9], DCT_CONST_BITS); - lstep1[54] = _mm256_srai_epi32(v[10], DCT_CONST_BITS); - lstep1[55] = _mm256_srai_epi32(v[11], DCT_CONST_BITS); - lstep1[56] = _mm256_srai_epi32(v[12], DCT_CONST_BITS); - lstep1[57] = _mm256_srai_epi32(v[13], DCT_CONST_BITS); - lstep1[58] = _mm256_srai_epi32(v[14], DCT_CONST_BITS); - lstep1[59] = _mm256_srai_epi32(v[15], DCT_CONST_BITS); - } - // stage 5 - { - lstep2[ 8] = _mm256_add_epi32(lstep1[10], lstep3[ 8]); - lstep2[ 9] = _mm256_add_epi32(lstep1[11], lstep3[ 9]); - lstep2[10] = _mm256_sub_epi32(lstep3[ 8], lstep1[10]); - lstep2[11] = _mm256_sub_epi32(lstep3[ 9], lstep1[11]); - lstep2[12] = _mm256_sub_epi32(lstep3[14], lstep1[12]); - lstep2[13] = _mm256_sub_epi32(lstep3[15], lstep1[13]); - lstep2[14] = _mm256_add_epi32(lstep1[12], lstep3[14]); - lstep2[15] = _mm256_add_epi32(lstep1[13], lstep3[15]); - } - { - const __m256i k32_p16_p16 = pair256_set_epi32(cospi_16_64, cospi_16_64); - const __m256i k32_p16_m16 = pair256_set_epi32(cospi_16_64, -cospi_16_64); - const __m256i k32_p24_p08 = pair256_set_epi32(cospi_24_64, cospi_8_64); - const __m256i k32_m08_p24 = pair256_set_epi32(-cospi_8_64, cospi_24_64); - - u[0] = _mm256_unpacklo_epi32(lstep1[0], lstep1[2]); - u[1] = _mm256_unpackhi_epi32(lstep1[0], lstep1[2]); - u[2] = _mm256_unpacklo_epi32(lstep1[1], lstep1[3]); - u[3] = _mm256_unpackhi_epi32(lstep1[1], lstep1[3]); - u[4] = _mm256_unpacklo_epi32(lstep1[4], lstep1[6]); - u[5] = _mm256_unpackhi_epi32(lstep1[4], lstep1[6]); - u[6] = _mm256_unpacklo_epi32(lstep1[5], lstep1[7]); - u[7] = _mm256_unpackhi_epi32(lstep1[5], lstep1[7]); - - // TODO(jingning): manually inline k_madd_epi32_avx2_ to further hide - // instruction latency. - v[ 0] = k_madd_epi32_avx2(u[0], k32_p16_p16); - v[ 1] = k_madd_epi32_avx2(u[1], k32_p16_p16); - v[ 2] = k_madd_epi32_avx2(u[2], k32_p16_p16); - v[ 3] = k_madd_epi32_avx2(u[3], k32_p16_p16); - v[ 4] = k_madd_epi32_avx2(u[0], k32_p16_m16); - v[ 5] = k_madd_epi32_avx2(u[1], k32_p16_m16); - v[ 6] = k_madd_epi32_avx2(u[2], k32_p16_m16); - v[ 7] = k_madd_epi32_avx2(u[3], k32_p16_m16); - v[ 8] = k_madd_epi32_avx2(u[4], k32_p24_p08); - v[ 9] = k_madd_epi32_avx2(u[5], k32_p24_p08); - v[10] = k_madd_epi32_avx2(u[6], k32_p24_p08); - v[11] = k_madd_epi32_avx2(u[7], k32_p24_p08); - v[12] = k_madd_epi32_avx2(u[4], k32_m08_p24); - v[13] = k_madd_epi32_avx2(u[5], k32_m08_p24); - v[14] = k_madd_epi32_avx2(u[6], k32_m08_p24); - v[15] = k_madd_epi32_avx2(u[7], k32_m08_p24); - - u[0] = k_packs_epi64_avx2(v[0], v[1]); - u[1] = k_packs_epi64_avx2(v[2], v[3]); - u[2] = k_packs_epi64_avx2(v[4], v[5]); - u[3] = k_packs_epi64_avx2(v[6], v[7]); - u[4] = k_packs_epi64_avx2(v[8], v[9]); - u[5] = k_packs_epi64_avx2(v[10], v[11]); - u[6] = k_packs_epi64_avx2(v[12], v[13]); - u[7] = k_packs_epi64_avx2(v[14], v[15]); - - v[0] = _mm256_add_epi32(u[0], k__DCT_CONST_ROUNDING); - v[1] = _mm256_add_epi32(u[1], k__DCT_CONST_ROUNDING); - v[2] = _mm256_add_epi32(u[2], k__DCT_CONST_ROUNDING); - v[3] = _mm256_add_epi32(u[3], k__DCT_CONST_ROUNDING); - v[4] = _mm256_add_epi32(u[4], k__DCT_CONST_ROUNDING); - v[5] = _mm256_add_epi32(u[5], k__DCT_CONST_ROUNDING); - v[6] = _mm256_add_epi32(u[6], k__DCT_CONST_ROUNDING); - v[7] = _mm256_add_epi32(u[7], k__DCT_CONST_ROUNDING); - - u[0] = _mm256_srai_epi32(v[0], DCT_CONST_BITS); - u[1] = _mm256_srai_epi32(v[1], DCT_CONST_BITS); - u[2] = _mm256_srai_epi32(v[2], DCT_CONST_BITS); - u[3] = _mm256_srai_epi32(v[3], DCT_CONST_BITS); - u[4] = _mm256_srai_epi32(v[4], DCT_CONST_BITS); - u[5] = _mm256_srai_epi32(v[5], DCT_CONST_BITS); - u[6] = _mm256_srai_epi32(v[6], DCT_CONST_BITS); - u[7] = _mm256_srai_epi32(v[7], DCT_CONST_BITS); - - sign[0] = _mm256_cmpgt_epi32(kZero,u[0]); - sign[1] = _mm256_cmpgt_epi32(kZero,u[1]); - sign[2] = _mm256_cmpgt_epi32(kZero,u[2]); - sign[3] = _mm256_cmpgt_epi32(kZero,u[3]); - sign[4] = _mm256_cmpgt_epi32(kZero,u[4]); - sign[5] = _mm256_cmpgt_epi32(kZero,u[5]); - sign[6] = _mm256_cmpgt_epi32(kZero,u[6]); - sign[7] = _mm256_cmpgt_epi32(kZero,u[7]); - - u[0] = _mm256_sub_epi32(u[0], sign[0]); - u[1] = _mm256_sub_epi32(u[1], sign[1]); - u[2] = _mm256_sub_epi32(u[2], sign[2]); - u[3] = _mm256_sub_epi32(u[3], sign[3]); - u[4] = _mm256_sub_epi32(u[4], sign[4]); - u[5] = _mm256_sub_epi32(u[5], sign[5]); - u[6] = _mm256_sub_epi32(u[6], sign[6]); - u[7] = _mm256_sub_epi32(u[7], sign[7]); - - u[0] = _mm256_add_epi32(u[0], K32One); - u[1] = _mm256_add_epi32(u[1], K32One); - u[2] = _mm256_add_epi32(u[2], K32One); - u[3] = _mm256_add_epi32(u[3], K32One); - u[4] = _mm256_add_epi32(u[4], K32One); - u[5] = _mm256_add_epi32(u[5], K32One); - u[6] = _mm256_add_epi32(u[6], K32One); - u[7] = _mm256_add_epi32(u[7], K32One); - - u[0] = _mm256_srai_epi32(u[0], 2); - u[1] = _mm256_srai_epi32(u[1], 2); - u[2] = _mm256_srai_epi32(u[2], 2); - u[3] = _mm256_srai_epi32(u[3], 2); - u[4] = _mm256_srai_epi32(u[4], 2); - u[5] = _mm256_srai_epi32(u[5], 2); - u[6] = _mm256_srai_epi32(u[6], 2); - u[7] = _mm256_srai_epi32(u[7], 2); - - // Combine - out[ 0] = _mm256_packs_epi32(u[0], u[1]); - out[16] = _mm256_packs_epi32(u[2], u[3]); - out[ 8] = _mm256_packs_epi32(u[4], u[5]); - out[24] = _mm256_packs_epi32(u[6], u[7]); - } - { - const __m256i k32_m08_p24 = pair256_set_epi32(-cospi_8_64, cospi_24_64); - const __m256i k32_m24_m08 = pair256_set_epi32(-cospi_24_64, -cospi_8_64); - const __m256i k32_p24_p08 = pair256_set_epi32(cospi_24_64, cospi_8_64); - - u[0] = _mm256_unpacklo_epi32(lstep1[18], lstep1[28]); - u[1] = _mm256_unpackhi_epi32(lstep1[18], lstep1[28]); - u[2] = _mm256_unpacklo_epi32(lstep1[19], lstep1[29]); - u[3] = _mm256_unpackhi_epi32(lstep1[19], lstep1[29]); - u[4] = _mm256_unpacklo_epi32(lstep1[20], lstep1[26]); - u[5] = _mm256_unpackhi_epi32(lstep1[20], lstep1[26]); - u[6] = _mm256_unpacklo_epi32(lstep1[21], lstep1[27]); - u[7] = _mm256_unpackhi_epi32(lstep1[21], lstep1[27]); - - v[0] = k_madd_epi32_avx2(u[0], k32_m08_p24); - v[1] = k_madd_epi32_avx2(u[1], k32_m08_p24); - v[2] = k_madd_epi32_avx2(u[2], k32_m08_p24); - v[3] = k_madd_epi32_avx2(u[3], k32_m08_p24); - v[4] = k_madd_epi32_avx2(u[4], k32_m24_m08); - v[5] = k_madd_epi32_avx2(u[5], k32_m24_m08); - v[6] = k_madd_epi32_avx2(u[6], k32_m24_m08); - v[7] = k_madd_epi32_avx2(u[7], k32_m24_m08); - v[ 8] = k_madd_epi32_avx2(u[4], k32_m08_p24); - v[ 9] = k_madd_epi32_avx2(u[5], k32_m08_p24); - v[10] = k_madd_epi32_avx2(u[6], k32_m08_p24); - v[11] = k_madd_epi32_avx2(u[7], k32_m08_p24); - v[12] = k_madd_epi32_avx2(u[0], k32_p24_p08); - v[13] = k_madd_epi32_avx2(u[1], k32_p24_p08); - v[14] = k_madd_epi32_avx2(u[2], k32_p24_p08); - v[15] = k_madd_epi32_avx2(u[3], k32_p24_p08); - - u[0] = k_packs_epi64_avx2(v[0], v[1]); - u[1] = k_packs_epi64_avx2(v[2], v[3]); - u[2] = k_packs_epi64_avx2(v[4], v[5]); - u[3] = k_packs_epi64_avx2(v[6], v[7]); - u[4] = k_packs_epi64_avx2(v[8], v[9]); - u[5] = k_packs_epi64_avx2(v[10], v[11]); - u[6] = k_packs_epi64_avx2(v[12], v[13]); - u[7] = k_packs_epi64_avx2(v[14], v[15]); - - u[0] = _mm256_add_epi32(u[0], k__DCT_CONST_ROUNDING); - u[1] = _mm256_add_epi32(u[1], k__DCT_CONST_ROUNDING); - u[2] = _mm256_add_epi32(u[2], k__DCT_CONST_ROUNDING); - u[3] = _mm256_add_epi32(u[3], k__DCT_CONST_ROUNDING); - u[4] = _mm256_add_epi32(u[4], k__DCT_CONST_ROUNDING); - u[5] = _mm256_add_epi32(u[5], k__DCT_CONST_ROUNDING); - u[6] = _mm256_add_epi32(u[6], k__DCT_CONST_ROUNDING); - u[7] = _mm256_add_epi32(u[7], k__DCT_CONST_ROUNDING); - - lstep2[18] = _mm256_srai_epi32(u[0], DCT_CONST_BITS); - lstep2[19] = _mm256_srai_epi32(u[1], DCT_CONST_BITS); - lstep2[20] = _mm256_srai_epi32(u[2], DCT_CONST_BITS); - lstep2[21] = _mm256_srai_epi32(u[3], DCT_CONST_BITS); - lstep2[26] = _mm256_srai_epi32(u[4], DCT_CONST_BITS); - lstep2[27] = _mm256_srai_epi32(u[5], DCT_CONST_BITS); - lstep2[28] = _mm256_srai_epi32(u[6], DCT_CONST_BITS); - lstep2[29] = _mm256_srai_epi32(u[7], DCT_CONST_BITS); - } - { - lstep2[32] = _mm256_add_epi32(lstep1[38], lstep3[32]); - lstep2[33] = _mm256_add_epi32(lstep1[39], lstep3[33]); - lstep2[34] = _mm256_add_epi32(lstep1[36], lstep3[34]); - lstep2[35] = _mm256_add_epi32(lstep1[37], lstep3[35]); - lstep2[36] = _mm256_sub_epi32(lstep3[34], lstep1[36]); - lstep2[37] = _mm256_sub_epi32(lstep3[35], lstep1[37]); - lstep2[38] = _mm256_sub_epi32(lstep3[32], lstep1[38]); - lstep2[39] = _mm256_sub_epi32(lstep3[33], lstep1[39]); - lstep2[40] = _mm256_sub_epi32(lstep3[46], lstep1[40]); - lstep2[41] = _mm256_sub_epi32(lstep3[47], lstep1[41]); - lstep2[42] = _mm256_sub_epi32(lstep3[44], lstep1[42]); - lstep2[43] = _mm256_sub_epi32(lstep3[45], lstep1[43]); - lstep2[44] = _mm256_add_epi32(lstep1[42], lstep3[44]); - lstep2[45] = _mm256_add_epi32(lstep1[43], lstep3[45]); - lstep2[46] = _mm256_add_epi32(lstep1[40], lstep3[46]); - lstep2[47] = _mm256_add_epi32(lstep1[41], lstep3[47]); - lstep2[48] = _mm256_add_epi32(lstep1[54], lstep3[48]); - lstep2[49] = _mm256_add_epi32(lstep1[55], lstep3[49]); - lstep2[50] = _mm256_add_epi32(lstep1[52], lstep3[50]); - lstep2[51] = _mm256_add_epi32(lstep1[53], lstep3[51]); - lstep2[52] = _mm256_sub_epi32(lstep3[50], lstep1[52]); - lstep2[53] = _mm256_sub_epi32(lstep3[51], lstep1[53]); - lstep2[54] = _mm256_sub_epi32(lstep3[48], lstep1[54]); - lstep2[55] = _mm256_sub_epi32(lstep3[49], lstep1[55]); - lstep2[56] = _mm256_sub_epi32(lstep3[62], lstep1[56]); - lstep2[57] = _mm256_sub_epi32(lstep3[63], lstep1[57]); - lstep2[58] = _mm256_sub_epi32(lstep3[60], lstep1[58]); - lstep2[59] = _mm256_sub_epi32(lstep3[61], lstep1[59]); - lstep2[60] = _mm256_add_epi32(lstep1[58], lstep3[60]); - lstep2[61] = _mm256_add_epi32(lstep1[59], lstep3[61]); - lstep2[62] = _mm256_add_epi32(lstep1[56], lstep3[62]); - lstep2[63] = _mm256_add_epi32(lstep1[57], lstep3[63]); - } - // stage 6 - { - const __m256i k32_p28_p04 = pair256_set_epi32(cospi_28_64, cospi_4_64); - const __m256i k32_p12_p20 = pair256_set_epi32(cospi_12_64, cospi_20_64); - const __m256i k32_m20_p12 = pair256_set_epi32(-cospi_20_64, cospi_12_64); - const __m256i k32_m04_p28 = pair256_set_epi32(-cospi_4_64, cospi_28_64); - - u[0] = _mm256_unpacklo_epi32(lstep2[ 8], lstep2[14]); - u[1] = _mm256_unpackhi_epi32(lstep2[ 8], lstep2[14]); - u[2] = _mm256_unpacklo_epi32(lstep2[ 9], lstep2[15]); - u[3] = _mm256_unpackhi_epi32(lstep2[ 9], lstep2[15]); - u[4] = _mm256_unpacklo_epi32(lstep2[10], lstep2[12]); - u[5] = _mm256_unpackhi_epi32(lstep2[10], lstep2[12]); - u[6] = _mm256_unpacklo_epi32(lstep2[11], lstep2[13]); - u[7] = _mm256_unpackhi_epi32(lstep2[11], lstep2[13]); - u[8] = _mm256_unpacklo_epi32(lstep2[10], lstep2[12]); - u[9] = _mm256_unpackhi_epi32(lstep2[10], lstep2[12]); - u[10] = _mm256_unpacklo_epi32(lstep2[11], lstep2[13]); - u[11] = _mm256_unpackhi_epi32(lstep2[11], lstep2[13]); - u[12] = _mm256_unpacklo_epi32(lstep2[ 8], lstep2[14]); - u[13] = _mm256_unpackhi_epi32(lstep2[ 8], lstep2[14]); - u[14] = _mm256_unpacklo_epi32(lstep2[ 9], lstep2[15]); - u[15] = _mm256_unpackhi_epi32(lstep2[ 9], lstep2[15]); - - v[0] = k_madd_epi32_avx2(u[0], k32_p28_p04); - v[1] = k_madd_epi32_avx2(u[1], k32_p28_p04); - v[2] = k_madd_epi32_avx2(u[2], k32_p28_p04); - v[3] = k_madd_epi32_avx2(u[3], k32_p28_p04); - v[4] = k_madd_epi32_avx2(u[4], k32_p12_p20); - v[5] = k_madd_epi32_avx2(u[5], k32_p12_p20); - v[6] = k_madd_epi32_avx2(u[6], k32_p12_p20); - v[7] = k_madd_epi32_avx2(u[7], k32_p12_p20); - v[ 8] = k_madd_epi32_avx2(u[ 8], k32_m20_p12); - v[ 9] = k_madd_epi32_avx2(u[ 9], k32_m20_p12); - v[10] = k_madd_epi32_avx2(u[10], k32_m20_p12); - v[11] = k_madd_epi32_avx2(u[11], k32_m20_p12); - v[12] = k_madd_epi32_avx2(u[12], k32_m04_p28); - v[13] = k_madd_epi32_avx2(u[13], k32_m04_p28); - v[14] = k_madd_epi32_avx2(u[14], k32_m04_p28); - v[15] = k_madd_epi32_avx2(u[15], k32_m04_p28); - - u[0] = k_packs_epi64_avx2(v[0], v[1]); - u[1] = k_packs_epi64_avx2(v[2], v[3]); - u[2] = k_packs_epi64_avx2(v[4], v[5]); - u[3] = k_packs_epi64_avx2(v[6], v[7]); - u[4] = k_packs_epi64_avx2(v[8], v[9]); - u[5] = k_packs_epi64_avx2(v[10], v[11]); - u[6] = k_packs_epi64_avx2(v[12], v[13]); - u[7] = k_packs_epi64_avx2(v[14], v[15]); - - v[0] = _mm256_add_epi32(u[0], k__DCT_CONST_ROUNDING); - v[1] = _mm256_add_epi32(u[1], k__DCT_CONST_ROUNDING); - v[2] = _mm256_add_epi32(u[2], k__DCT_CONST_ROUNDING); - v[3] = _mm256_add_epi32(u[3], k__DCT_CONST_ROUNDING); - v[4] = _mm256_add_epi32(u[4], k__DCT_CONST_ROUNDING); - v[5] = _mm256_add_epi32(u[5], k__DCT_CONST_ROUNDING); - v[6] = _mm256_add_epi32(u[6], k__DCT_CONST_ROUNDING); - v[7] = _mm256_add_epi32(u[7], k__DCT_CONST_ROUNDING); - - u[0] = _mm256_srai_epi32(v[0], DCT_CONST_BITS); - u[1] = _mm256_srai_epi32(v[1], DCT_CONST_BITS); - u[2] = _mm256_srai_epi32(v[2], DCT_CONST_BITS); - u[3] = _mm256_srai_epi32(v[3], DCT_CONST_BITS); - u[4] = _mm256_srai_epi32(v[4], DCT_CONST_BITS); - u[5] = _mm256_srai_epi32(v[5], DCT_CONST_BITS); - u[6] = _mm256_srai_epi32(v[6], DCT_CONST_BITS); - u[7] = _mm256_srai_epi32(v[7], DCT_CONST_BITS); - - sign[0] = _mm256_cmpgt_epi32(kZero,u[0]); - sign[1] = _mm256_cmpgt_epi32(kZero,u[1]); - sign[2] = _mm256_cmpgt_epi32(kZero,u[2]); - sign[3] = _mm256_cmpgt_epi32(kZero,u[3]); - sign[4] = _mm256_cmpgt_epi32(kZero,u[4]); - sign[5] = _mm256_cmpgt_epi32(kZero,u[5]); - sign[6] = _mm256_cmpgt_epi32(kZero,u[6]); - sign[7] = _mm256_cmpgt_epi32(kZero,u[7]); - - u[0] = _mm256_sub_epi32(u[0], sign[0]); - u[1] = _mm256_sub_epi32(u[1], sign[1]); - u[2] = _mm256_sub_epi32(u[2], sign[2]); - u[3] = _mm256_sub_epi32(u[3], sign[3]); - u[4] = _mm256_sub_epi32(u[4], sign[4]); - u[5] = _mm256_sub_epi32(u[5], sign[5]); - u[6] = _mm256_sub_epi32(u[6], sign[6]); - u[7] = _mm256_sub_epi32(u[7], sign[7]); - - u[0] = _mm256_add_epi32(u[0], K32One); - u[1] = _mm256_add_epi32(u[1], K32One); - u[2] = _mm256_add_epi32(u[2], K32One); - u[3] = _mm256_add_epi32(u[3], K32One); - u[4] = _mm256_add_epi32(u[4], K32One); - u[5] = _mm256_add_epi32(u[5], K32One); - u[6] = _mm256_add_epi32(u[6], K32One); - u[7] = _mm256_add_epi32(u[7], K32One); - - u[0] = _mm256_srai_epi32(u[0], 2); - u[1] = _mm256_srai_epi32(u[1], 2); - u[2] = _mm256_srai_epi32(u[2], 2); - u[3] = _mm256_srai_epi32(u[3], 2); - u[4] = _mm256_srai_epi32(u[4], 2); - u[5] = _mm256_srai_epi32(u[5], 2); - u[6] = _mm256_srai_epi32(u[6], 2); - u[7] = _mm256_srai_epi32(u[7], 2); - - out[ 4] = _mm256_packs_epi32(u[0], u[1]); - out[20] = _mm256_packs_epi32(u[2], u[3]); - out[12] = _mm256_packs_epi32(u[4], u[5]); - out[28] = _mm256_packs_epi32(u[6], u[7]); - } - { - lstep3[16] = _mm256_add_epi32(lstep2[18], lstep1[16]); - lstep3[17] = _mm256_add_epi32(lstep2[19], lstep1[17]); - lstep3[18] = _mm256_sub_epi32(lstep1[16], lstep2[18]); - lstep3[19] = _mm256_sub_epi32(lstep1[17], lstep2[19]); - lstep3[20] = _mm256_sub_epi32(lstep1[22], lstep2[20]); - lstep3[21] = _mm256_sub_epi32(lstep1[23], lstep2[21]); - lstep3[22] = _mm256_add_epi32(lstep2[20], lstep1[22]); - lstep3[23] = _mm256_add_epi32(lstep2[21], lstep1[23]); - lstep3[24] = _mm256_add_epi32(lstep2[26], lstep1[24]); - lstep3[25] = _mm256_add_epi32(lstep2[27], lstep1[25]); - lstep3[26] = _mm256_sub_epi32(lstep1[24], lstep2[26]); - lstep3[27] = _mm256_sub_epi32(lstep1[25], lstep2[27]); - lstep3[28] = _mm256_sub_epi32(lstep1[30], lstep2[28]); - lstep3[29] = _mm256_sub_epi32(lstep1[31], lstep2[29]); - lstep3[30] = _mm256_add_epi32(lstep2[28], lstep1[30]); - lstep3[31] = _mm256_add_epi32(lstep2[29], lstep1[31]); - } - { - const __m256i k32_m04_p28 = pair256_set_epi32(-cospi_4_64, cospi_28_64); - const __m256i k32_m28_m04 = pair256_set_epi32(-cospi_28_64, -cospi_4_64); - const __m256i k32_m20_p12 = pair256_set_epi32(-cospi_20_64, cospi_12_64); - const __m256i k32_m12_m20 = pair256_set_epi32(-cospi_12_64, - -cospi_20_64); - const __m256i k32_p12_p20 = pair256_set_epi32(cospi_12_64, cospi_20_64); - const __m256i k32_p28_p04 = pair256_set_epi32(cospi_28_64, cospi_4_64); - - u[ 0] = _mm256_unpacklo_epi32(lstep2[34], lstep2[60]); - u[ 1] = _mm256_unpackhi_epi32(lstep2[34], lstep2[60]); - u[ 2] = _mm256_unpacklo_epi32(lstep2[35], lstep2[61]); - u[ 3] = _mm256_unpackhi_epi32(lstep2[35], lstep2[61]); - u[ 4] = _mm256_unpacklo_epi32(lstep2[36], lstep2[58]); - u[ 5] = _mm256_unpackhi_epi32(lstep2[36], lstep2[58]); - u[ 6] = _mm256_unpacklo_epi32(lstep2[37], lstep2[59]); - u[ 7] = _mm256_unpackhi_epi32(lstep2[37], lstep2[59]); - u[ 8] = _mm256_unpacklo_epi32(lstep2[42], lstep2[52]); - u[ 9] = _mm256_unpackhi_epi32(lstep2[42], lstep2[52]); - u[10] = _mm256_unpacklo_epi32(lstep2[43], lstep2[53]); - u[11] = _mm256_unpackhi_epi32(lstep2[43], lstep2[53]); - u[12] = _mm256_unpacklo_epi32(lstep2[44], lstep2[50]); - u[13] = _mm256_unpackhi_epi32(lstep2[44], lstep2[50]); - u[14] = _mm256_unpacklo_epi32(lstep2[45], lstep2[51]); - u[15] = _mm256_unpackhi_epi32(lstep2[45], lstep2[51]); - - v[ 0] = k_madd_epi32_avx2(u[ 0], k32_m04_p28); - v[ 1] = k_madd_epi32_avx2(u[ 1], k32_m04_p28); - v[ 2] = k_madd_epi32_avx2(u[ 2], k32_m04_p28); - v[ 3] = k_madd_epi32_avx2(u[ 3], k32_m04_p28); - v[ 4] = k_madd_epi32_avx2(u[ 4], k32_m28_m04); - v[ 5] = k_madd_epi32_avx2(u[ 5], k32_m28_m04); - v[ 6] = k_madd_epi32_avx2(u[ 6], k32_m28_m04); - v[ 7] = k_madd_epi32_avx2(u[ 7], k32_m28_m04); - v[ 8] = k_madd_epi32_avx2(u[ 8], k32_m20_p12); - v[ 9] = k_madd_epi32_avx2(u[ 9], k32_m20_p12); - v[10] = k_madd_epi32_avx2(u[10], k32_m20_p12); - v[11] = k_madd_epi32_avx2(u[11], k32_m20_p12); - v[12] = k_madd_epi32_avx2(u[12], k32_m12_m20); - v[13] = k_madd_epi32_avx2(u[13], k32_m12_m20); - v[14] = k_madd_epi32_avx2(u[14], k32_m12_m20); - v[15] = k_madd_epi32_avx2(u[15], k32_m12_m20); - v[16] = k_madd_epi32_avx2(u[12], k32_m20_p12); - v[17] = k_madd_epi32_avx2(u[13], k32_m20_p12); - v[18] = k_madd_epi32_avx2(u[14], k32_m20_p12); - v[19] = k_madd_epi32_avx2(u[15], k32_m20_p12); - v[20] = k_madd_epi32_avx2(u[ 8], k32_p12_p20); - v[21] = k_madd_epi32_avx2(u[ 9], k32_p12_p20); - v[22] = k_madd_epi32_avx2(u[10], k32_p12_p20); - v[23] = k_madd_epi32_avx2(u[11], k32_p12_p20); - v[24] = k_madd_epi32_avx2(u[ 4], k32_m04_p28); - v[25] = k_madd_epi32_avx2(u[ 5], k32_m04_p28); - v[26] = k_madd_epi32_avx2(u[ 6], k32_m04_p28); - v[27] = k_madd_epi32_avx2(u[ 7], k32_m04_p28); - v[28] = k_madd_epi32_avx2(u[ 0], k32_p28_p04); - v[29] = k_madd_epi32_avx2(u[ 1], k32_p28_p04); - v[30] = k_madd_epi32_avx2(u[ 2], k32_p28_p04); - v[31] = k_madd_epi32_avx2(u[ 3], k32_p28_p04); - - u[ 0] = k_packs_epi64_avx2(v[ 0], v[ 1]); - u[ 1] = k_packs_epi64_avx2(v[ 2], v[ 3]); - u[ 2] = k_packs_epi64_avx2(v[ 4], v[ 5]); - u[ 3] = k_packs_epi64_avx2(v[ 6], v[ 7]); - u[ 4] = k_packs_epi64_avx2(v[ 8], v[ 9]); - u[ 5] = k_packs_epi64_avx2(v[10], v[11]); - u[ 6] = k_packs_epi64_avx2(v[12], v[13]); - u[ 7] = k_packs_epi64_avx2(v[14], v[15]); - u[ 8] = k_packs_epi64_avx2(v[16], v[17]); - u[ 9] = k_packs_epi64_avx2(v[18], v[19]); - u[10] = k_packs_epi64_avx2(v[20], v[21]); - u[11] = k_packs_epi64_avx2(v[22], v[23]); - u[12] = k_packs_epi64_avx2(v[24], v[25]); - u[13] = k_packs_epi64_avx2(v[26], v[27]); - u[14] = k_packs_epi64_avx2(v[28], v[29]); - u[15] = k_packs_epi64_avx2(v[30], v[31]); - - v[ 0] = _mm256_add_epi32(u[ 0], k__DCT_CONST_ROUNDING); - v[ 1] = _mm256_add_epi32(u[ 1], k__DCT_CONST_ROUNDING); - v[ 2] = _mm256_add_epi32(u[ 2], k__DCT_CONST_ROUNDING); - v[ 3] = _mm256_add_epi32(u[ 3], k__DCT_CONST_ROUNDING); - v[ 4] = _mm256_add_epi32(u[ 4], k__DCT_CONST_ROUNDING); - v[ 5] = _mm256_add_epi32(u[ 5], k__DCT_CONST_ROUNDING); - v[ 6] = _mm256_add_epi32(u[ 6], k__DCT_CONST_ROUNDING); - v[ 7] = _mm256_add_epi32(u[ 7], k__DCT_CONST_ROUNDING); - v[ 8] = _mm256_add_epi32(u[ 8], k__DCT_CONST_ROUNDING); - v[ 9] = _mm256_add_epi32(u[ 9], k__DCT_CONST_ROUNDING); - v[10] = _mm256_add_epi32(u[10], k__DCT_CONST_ROUNDING); - v[11] = _mm256_add_epi32(u[11], k__DCT_CONST_ROUNDING); - v[12] = _mm256_add_epi32(u[12], k__DCT_CONST_ROUNDING); - v[13] = _mm256_add_epi32(u[13], k__DCT_CONST_ROUNDING); - v[14] = _mm256_add_epi32(u[14], k__DCT_CONST_ROUNDING); - v[15] = _mm256_add_epi32(u[15], k__DCT_CONST_ROUNDING); - - lstep3[34] = _mm256_srai_epi32(v[ 0], DCT_CONST_BITS); - lstep3[35] = _mm256_srai_epi32(v[ 1], DCT_CONST_BITS); - lstep3[36] = _mm256_srai_epi32(v[ 2], DCT_CONST_BITS); - lstep3[37] = _mm256_srai_epi32(v[ 3], DCT_CONST_BITS); - lstep3[42] = _mm256_srai_epi32(v[ 4], DCT_CONST_BITS); - lstep3[43] = _mm256_srai_epi32(v[ 5], DCT_CONST_BITS); - lstep3[44] = _mm256_srai_epi32(v[ 6], DCT_CONST_BITS); - lstep3[45] = _mm256_srai_epi32(v[ 7], DCT_CONST_BITS); - lstep3[50] = _mm256_srai_epi32(v[ 8], DCT_CONST_BITS); - lstep3[51] = _mm256_srai_epi32(v[ 9], DCT_CONST_BITS); - lstep3[52] = _mm256_srai_epi32(v[10], DCT_CONST_BITS); - lstep3[53] = _mm256_srai_epi32(v[11], DCT_CONST_BITS); - lstep3[58] = _mm256_srai_epi32(v[12], DCT_CONST_BITS); - lstep3[59] = _mm256_srai_epi32(v[13], DCT_CONST_BITS); - lstep3[60] = _mm256_srai_epi32(v[14], DCT_CONST_BITS); - lstep3[61] = _mm256_srai_epi32(v[15], DCT_CONST_BITS); - } - // stage 7 - { - const __m256i k32_p30_p02 = pair256_set_epi32(cospi_30_64, cospi_2_64); - const __m256i k32_p14_p18 = pair256_set_epi32(cospi_14_64, cospi_18_64); - const __m256i k32_p22_p10 = pair256_set_epi32(cospi_22_64, cospi_10_64); - const __m256i k32_p06_p26 = pair256_set_epi32(cospi_6_64, cospi_26_64); - const __m256i k32_m26_p06 = pair256_set_epi32(-cospi_26_64, cospi_6_64); - const __m256i k32_m10_p22 = pair256_set_epi32(-cospi_10_64, cospi_22_64); - const __m256i k32_m18_p14 = pair256_set_epi32(-cospi_18_64, cospi_14_64); - const __m256i k32_m02_p30 = pair256_set_epi32(-cospi_2_64, cospi_30_64); - - u[ 0] = _mm256_unpacklo_epi32(lstep3[16], lstep3[30]); - u[ 1] = _mm256_unpackhi_epi32(lstep3[16], lstep3[30]); - u[ 2] = _mm256_unpacklo_epi32(lstep3[17], lstep3[31]); - u[ 3] = _mm256_unpackhi_epi32(lstep3[17], lstep3[31]); - u[ 4] = _mm256_unpacklo_epi32(lstep3[18], lstep3[28]); - u[ 5] = _mm256_unpackhi_epi32(lstep3[18], lstep3[28]); - u[ 6] = _mm256_unpacklo_epi32(lstep3[19], lstep3[29]); - u[ 7] = _mm256_unpackhi_epi32(lstep3[19], lstep3[29]); - u[ 8] = _mm256_unpacklo_epi32(lstep3[20], lstep3[26]); - u[ 9] = _mm256_unpackhi_epi32(lstep3[20], lstep3[26]); - u[10] = _mm256_unpacklo_epi32(lstep3[21], lstep3[27]); - u[11] = _mm256_unpackhi_epi32(lstep3[21], lstep3[27]); - u[12] = _mm256_unpacklo_epi32(lstep3[22], lstep3[24]); - u[13] = _mm256_unpackhi_epi32(lstep3[22], lstep3[24]); - u[14] = _mm256_unpacklo_epi32(lstep3[23], lstep3[25]); - u[15] = _mm256_unpackhi_epi32(lstep3[23], lstep3[25]); - - v[ 0] = k_madd_epi32_avx2(u[ 0], k32_p30_p02); - v[ 1] = k_madd_epi32_avx2(u[ 1], k32_p30_p02); - v[ 2] = k_madd_epi32_avx2(u[ 2], k32_p30_p02); - v[ 3] = k_madd_epi32_avx2(u[ 3], k32_p30_p02); - v[ 4] = k_madd_epi32_avx2(u[ 4], k32_p14_p18); - v[ 5] = k_madd_epi32_avx2(u[ 5], k32_p14_p18); - v[ 6] = k_madd_epi32_avx2(u[ 6], k32_p14_p18); - v[ 7] = k_madd_epi32_avx2(u[ 7], k32_p14_p18); - v[ 8] = k_madd_epi32_avx2(u[ 8], k32_p22_p10); - v[ 9] = k_madd_epi32_avx2(u[ 9], k32_p22_p10); - v[10] = k_madd_epi32_avx2(u[10], k32_p22_p10); - v[11] = k_madd_epi32_avx2(u[11], k32_p22_p10); - v[12] = k_madd_epi32_avx2(u[12], k32_p06_p26); - v[13] = k_madd_epi32_avx2(u[13], k32_p06_p26); - v[14] = k_madd_epi32_avx2(u[14], k32_p06_p26); - v[15] = k_madd_epi32_avx2(u[15], k32_p06_p26); - v[16] = k_madd_epi32_avx2(u[12], k32_m26_p06); - v[17] = k_madd_epi32_avx2(u[13], k32_m26_p06); - v[18] = k_madd_epi32_avx2(u[14], k32_m26_p06); - v[19] = k_madd_epi32_avx2(u[15], k32_m26_p06); - v[20] = k_madd_epi32_avx2(u[ 8], k32_m10_p22); - v[21] = k_madd_epi32_avx2(u[ 9], k32_m10_p22); - v[22] = k_madd_epi32_avx2(u[10], k32_m10_p22); - v[23] = k_madd_epi32_avx2(u[11], k32_m10_p22); - v[24] = k_madd_epi32_avx2(u[ 4], k32_m18_p14); - v[25] = k_madd_epi32_avx2(u[ 5], k32_m18_p14); - v[26] = k_madd_epi32_avx2(u[ 6], k32_m18_p14); - v[27] = k_madd_epi32_avx2(u[ 7], k32_m18_p14); - v[28] = k_madd_epi32_avx2(u[ 0], k32_m02_p30); - v[29] = k_madd_epi32_avx2(u[ 1], k32_m02_p30); - v[30] = k_madd_epi32_avx2(u[ 2], k32_m02_p30); - v[31] = k_madd_epi32_avx2(u[ 3], k32_m02_p30); - - u[ 0] = k_packs_epi64_avx2(v[ 0], v[ 1]); - u[ 1] = k_packs_epi64_avx2(v[ 2], v[ 3]); - u[ 2] = k_packs_epi64_avx2(v[ 4], v[ 5]); - u[ 3] = k_packs_epi64_avx2(v[ 6], v[ 7]); - u[ 4] = k_packs_epi64_avx2(v[ 8], v[ 9]); - u[ 5] = k_packs_epi64_avx2(v[10], v[11]); - u[ 6] = k_packs_epi64_avx2(v[12], v[13]); - u[ 7] = k_packs_epi64_avx2(v[14], v[15]); - u[ 8] = k_packs_epi64_avx2(v[16], v[17]); - u[ 9] = k_packs_epi64_avx2(v[18], v[19]); - u[10] = k_packs_epi64_avx2(v[20], v[21]); - u[11] = k_packs_epi64_avx2(v[22], v[23]); - u[12] = k_packs_epi64_avx2(v[24], v[25]); - u[13] = k_packs_epi64_avx2(v[26], v[27]); - u[14] = k_packs_epi64_avx2(v[28], v[29]); - u[15] = k_packs_epi64_avx2(v[30], v[31]); - - v[ 0] = _mm256_add_epi32(u[ 0], k__DCT_CONST_ROUNDING); - v[ 1] = _mm256_add_epi32(u[ 1], k__DCT_CONST_ROUNDING); - v[ 2] = _mm256_add_epi32(u[ 2], k__DCT_CONST_ROUNDING); - v[ 3] = _mm256_add_epi32(u[ 3], k__DCT_CONST_ROUNDING); - v[ 4] = _mm256_add_epi32(u[ 4], k__DCT_CONST_ROUNDING); - v[ 5] = _mm256_add_epi32(u[ 5], k__DCT_CONST_ROUNDING); - v[ 6] = _mm256_add_epi32(u[ 6], k__DCT_CONST_ROUNDING); - v[ 7] = _mm256_add_epi32(u[ 7], k__DCT_CONST_ROUNDING); - v[ 8] = _mm256_add_epi32(u[ 8], k__DCT_CONST_ROUNDING); - v[ 9] = _mm256_add_epi32(u[ 9], k__DCT_CONST_ROUNDING); - v[10] = _mm256_add_epi32(u[10], k__DCT_CONST_ROUNDING); - v[11] = _mm256_add_epi32(u[11], k__DCT_CONST_ROUNDING); - v[12] = _mm256_add_epi32(u[12], k__DCT_CONST_ROUNDING); - v[13] = _mm256_add_epi32(u[13], k__DCT_CONST_ROUNDING); - v[14] = _mm256_add_epi32(u[14], k__DCT_CONST_ROUNDING); - v[15] = _mm256_add_epi32(u[15], k__DCT_CONST_ROUNDING); - - u[ 0] = _mm256_srai_epi32(v[ 0], DCT_CONST_BITS); - u[ 1] = _mm256_srai_epi32(v[ 1], DCT_CONST_BITS); - u[ 2] = _mm256_srai_epi32(v[ 2], DCT_CONST_BITS); - u[ 3] = _mm256_srai_epi32(v[ 3], DCT_CONST_BITS); - u[ 4] = _mm256_srai_epi32(v[ 4], DCT_CONST_BITS); - u[ 5] = _mm256_srai_epi32(v[ 5], DCT_CONST_BITS); - u[ 6] = _mm256_srai_epi32(v[ 6], DCT_CONST_BITS); - u[ 7] = _mm256_srai_epi32(v[ 7], DCT_CONST_BITS); - u[ 8] = _mm256_srai_epi32(v[ 8], DCT_CONST_BITS); - u[ 9] = _mm256_srai_epi32(v[ 9], DCT_CONST_BITS); - u[10] = _mm256_srai_epi32(v[10], DCT_CONST_BITS); - u[11] = _mm256_srai_epi32(v[11], DCT_CONST_BITS); - u[12] = _mm256_srai_epi32(v[12], DCT_CONST_BITS); - u[13] = _mm256_srai_epi32(v[13], DCT_CONST_BITS); - u[14] = _mm256_srai_epi32(v[14], DCT_CONST_BITS); - u[15] = _mm256_srai_epi32(v[15], DCT_CONST_BITS); - - v[ 0] = _mm256_cmpgt_epi32(kZero,u[ 0]); - v[ 1] = _mm256_cmpgt_epi32(kZero,u[ 1]); - v[ 2] = _mm256_cmpgt_epi32(kZero,u[ 2]); - v[ 3] = _mm256_cmpgt_epi32(kZero,u[ 3]); - v[ 4] = _mm256_cmpgt_epi32(kZero,u[ 4]); - v[ 5] = _mm256_cmpgt_epi32(kZero,u[ 5]); - v[ 6] = _mm256_cmpgt_epi32(kZero,u[ 6]); - v[ 7] = _mm256_cmpgt_epi32(kZero,u[ 7]); - v[ 8] = _mm256_cmpgt_epi32(kZero,u[ 8]); - v[ 9] = _mm256_cmpgt_epi32(kZero,u[ 9]); - v[10] = _mm256_cmpgt_epi32(kZero,u[10]); - v[11] = _mm256_cmpgt_epi32(kZero,u[11]); - v[12] = _mm256_cmpgt_epi32(kZero,u[12]); - v[13] = _mm256_cmpgt_epi32(kZero,u[13]); - v[14] = _mm256_cmpgt_epi32(kZero,u[14]); - v[15] = _mm256_cmpgt_epi32(kZero,u[15]); - - u[ 0] = _mm256_sub_epi32(u[ 0], v[ 0]); - u[ 1] = _mm256_sub_epi32(u[ 1], v[ 1]); - u[ 2] = _mm256_sub_epi32(u[ 2], v[ 2]); - u[ 3] = _mm256_sub_epi32(u[ 3], v[ 3]); - u[ 4] = _mm256_sub_epi32(u[ 4], v[ 4]); - u[ 5] = _mm256_sub_epi32(u[ 5], v[ 5]); - u[ 6] = _mm256_sub_epi32(u[ 6], v[ 6]); - u[ 7] = _mm256_sub_epi32(u[ 7], v[ 7]); - u[ 8] = _mm256_sub_epi32(u[ 8], v[ 8]); - u[ 9] = _mm256_sub_epi32(u[ 9], v[ 9]); - u[10] = _mm256_sub_epi32(u[10], v[10]); - u[11] = _mm256_sub_epi32(u[11], v[11]); - u[12] = _mm256_sub_epi32(u[12], v[12]); - u[13] = _mm256_sub_epi32(u[13], v[13]); - u[14] = _mm256_sub_epi32(u[14], v[14]); - u[15] = _mm256_sub_epi32(u[15], v[15]); - - v[ 0] = _mm256_add_epi32(u[ 0], K32One); - v[ 1] = _mm256_add_epi32(u[ 1], K32One); - v[ 2] = _mm256_add_epi32(u[ 2], K32One); - v[ 3] = _mm256_add_epi32(u[ 3], K32One); - v[ 4] = _mm256_add_epi32(u[ 4], K32One); - v[ 5] = _mm256_add_epi32(u[ 5], K32One); - v[ 6] = _mm256_add_epi32(u[ 6], K32One); - v[ 7] = _mm256_add_epi32(u[ 7], K32One); - v[ 8] = _mm256_add_epi32(u[ 8], K32One); - v[ 9] = _mm256_add_epi32(u[ 9], K32One); - v[10] = _mm256_add_epi32(u[10], K32One); - v[11] = _mm256_add_epi32(u[11], K32One); - v[12] = _mm256_add_epi32(u[12], K32One); - v[13] = _mm256_add_epi32(u[13], K32One); - v[14] = _mm256_add_epi32(u[14], K32One); - v[15] = _mm256_add_epi32(u[15], K32One); - - u[ 0] = _mm256_srai_epi32(v[ 0], 2); - u[ 1] = _mm256_srai_epi32(v[ 1], 2); - u[ 2] = _mm256_srai_epi32(v[ 2], 2); - u[ 3] = _mm256_srai_epi32(v[ 3], 2); - u[ 4] = _mm256_srai_epi32(v[ 4], 2); - u[ 5] = _mm256_srai_epi32(v[ 5], 2); - u[ 6] = _mm256_srai_epi32(v[ 6], 2); - u[ 7] = _mm256_srai_epi32(v[ 7], 2); - u[ 8] = _mm256_srai_epi32(v[ 8], 2); - u[ 9] = _mm256_srai_epi32(v[ 9], 2); - u[10] = _mm256_srai_epi32(v[10], 2); - u[11] = _mm256_srai_epi32(v[11], 2); - u[12] = _mm256_srai_epi32(v[12], 2); - u[13] = _mm256_srai_epi32(v[13], 2); - u[14] = _mm256_srai_epi32(v[14], 2); - u[15] = _mm256_srai_epi32(v[15], 2); - - out[ 2] = _mm256_packs_epi32(u[0], u[1]); - out[18] = _mm256_packs_epi32(u[2], u[3]); - out[10] = _mm256_packs_epi32(u[4], u[5]); - out[26] = _mm256_packs_epi32(u[6], u[7]); - out[ 6] = _mm256_packs_epi32(u[8], u[9]); - out[22] = _mm256_packs_epi32(u[10], u[11]); - out[14] = _mm256_packs_epi32(u[12], u[13]); - out[30] = _mm256_packs_epi32(u[14], u[15]); - } - { - lstep1[32] = _mm256_add_epi32(lstep3[34], lstep2[32]); - lstep1[33] = _mm256_add_epi32(lstep3[35], lstep2[33]); - lstep1[34] = _mm256_sub_epi32(lstep2[32], lstep3[34]); - lstep1[35] = _mm256_sub_epi32(lstep2[33], lstep3[35]); - lstep1[36] = _mm256_sub_epi32(lstep2[38], lstep3[36]); - lstep1[37] = _mm256_sub_epi32(lstep2[39], lstep3[37]); - lstep1[38] = _mm256_add_epi32(lstep3[36], lstep2[38]); - lstep1[39] = _mm256_add_epi32(lstep3[37], lstep2[39]); - lstep1[40] = _mm256_add_epi32(lstep3[42], lstep2[40]); - lstep1[41] = _mm256_add_epi32(lstep3[43], lstep2[41]); - lstep1[42] = _mm256_sub_epi32(lstep2[40], lstep3[42]); - lstep1[43] = _mm256_sub_epi32(lstep2[41], lstep3[43]); - lstep1[44] = _mm256_sub_epi32(lstep2[46], lstep3[44]); - lstep1[45] = _mm256_sub_epi32(lstep2[47], lstep3[45]); - lstep1[46] = _mm256_add_epi32(lstep3[44], lstep2[46]); - lstep1[47] = _mm256_add_epi32(lstep3[45], lstep2[47]); - lstep1[48] = _mm256_add_epi32(lstep3[50], lstep2[48]); - lstep1[49] = _mm256_add_epi32(lstep3[51], lstep2[49]); - lstep1[50] = _mm256_sub_epi32(lstep2[48], lstep3[50]); - lstep1[51] = _mm256_sub_epi32(lstep2[49], lstep3[51]); - lstep1[52] = _mm256_sub_epi32(lstep2[54], lstep3[52]); - lstep1[53] = _mm256_sub_epi32(lstep2[55], lstep3[53]); - lstep1[54] = _mm256_add_epi32(lstep3[52], lstep2[54]); - lstep1[55] = _mm256_add_epi32(lstep3[53], lstep2[55]); - lstep1[56] = _mm256_add_epi32(lstep3[58], lstep2[56]); - lstep1[57] = _mm256_add_epi32(lstep3[59], lstep2[57]); - lstep1[58] = _mm256_sub_epi32(lstep2[56], lstep3[58]); - lstep1[59] = _mm256_sub_epi32(lstep2[57], lstep3[59]); - lstep1[60] = _mm256_sub_epi32(lstep2[62], lstep3[60]); - lstep1[61] = _mm256_sub_epi32(lstep2[63], lstep3[61]); - lstep1[62] = _mm256_add_epi32(lstep3[60], lstep2[62]); - lstep1[63] = _mm256_add_epi32(lstep3[61], lstep2[63]); - } - // stage 8 - { - const __m256i k32_p31_p01 = pair256_set_epi32(cospi_31_64, cospi_1_64); - const __m256i k32_p15_p17 = pair256_set_epi32(cospi_15_64, cospi_17_64); - const __m256i k32_p23_p09 = pair256_set_epi32(cospi_23_64, cospi_9_64); - const __m256i k32_p07_p25 = pair256_set_epi32(cospi_7_64, cospi_25_64); - const __m256i k32_m25_p07 = pair256_set_epi32(-cospi_25_64, cospi_7_64); - const __m256i k32_m09_p23 = pair256_set_epi32(-cospi_9_64, cospi_23_64); - const __m256i k32_m17_p15 = pair256_set_epi32(-cospi_17_64, cospi_15_64); - const __m256i k32_m01_p31 = pair256_set_epi32(-cospi_1_64, cospi_31_64); - - u[ 0] = _mm256_unpacklo_epi32(lstep1[32], lstep1[62]); - u[ 1] = _mm256_unpackhi_epi32(lstep1[32], lstep1[62]); - u[ 2] = _mm256_unpacklo_epi32(lstep1[33], lstep1[63]); - u[ 3] = _mm256_unpackhi_epi32(lstep1[33], lstep1[63]); - u[ 4] = _mm256_unpacklo_epi32(lstep1[34], lstep1[60]); - u[ 5] = _mm256_unpackhi_epi32(lstep1[34], lstep1[60]); - u[ 6] = _mm256_unpacklo_epi32(lstep1[35], lstep1[61]); - u[ 7] = _mm256_unpackhi_epi32(lstep1[35], lstep1[61]); - u[ 8] = _mm256_unpacklo_epi32(lstep1[36], lstep1[58]); - u[ 9] = _mm256_unpackhi_epi32(lstep1[36], lstep1[58]); - u[10] = _mm256_unpacklo_epi32(lstep1[37], lstep1[59]); - u[11] = _mm256_unpackhi_epi32(lstep1[37], lstep1[59]); - u[12] = _mm256_unpacklo_epi32(lstep1[38], lstep1[56]); - u[13] = _mm256_unpackhi_epi32(lstep1[38], lstep1[56]); - u[14] = _mm256_unpacklo_epi32(lstep1[39], lstep1[57]); - u[15] = _mm256_unpackhi_epi32(lstep1[39], lstep1[57]); - - v[ 0] = k_madd_epi32_avx2(u[ 0], k32_p31_p01); - v[ 1] = k_madd_epi32_avx2(u[ 1], k32_p31_p01); - v[ 2] = k_madd_epi32_avx2(u[ 2], k32_p31_p01); - v[ 3] = k_madd_epi32_avx2(u[ 3], k32_p31_p01); - v[ 4] = k_madd_epi32_avx2(u[ 4], k32_p15_p17); - v[ 5] = k_madd_epi32_avx2(u[ 5], k32_p15_p17); - v[ 6] = k_madd_epi32_avx2(u[ 6], k32_p15_p17); - v[ 7] = k_madd_epi32_avx2(u[ 7], k32_p15_p17); - v[ 8] = k_madd_epi32_avx2(u[ 8], k32_p23_p09); - v[ 9] = k_madd_epi32_avx2(u[ 9], k32_p23_p09); - v[10] = k_madd_epi32_avx2(u[10], k32_p23_p09); - v[11] = k_madd_epi32_avx2(u[11], k32_p23_p09); - v[12] = k_madd_epi32_avx2(u[12], k32_p07_p25); - v[13] = k_madd_epi32_avx2(u[13], k32_p07_p25); - v[14] = k_madd_epi32_avx2(u[14], k32_p07_p25); - v[15] = k_madd_epi32_avx2(u[15], k32_p07_p25); - v[16] = k_madd_epi32_avx2(u[12], k32_m25_p07); - v[17] = k_madd_epi32_avx2(u[13], k32_m25_p07); - v[18] = k_madd_epi32_avx2(u[14], k32_m25_p07); - v[19] = k_madd_epi32_avx2(u[15], k32_m25_p07); - v[20] = k_madd_epi32_avx2(u[ 8], k32_m09_p23); - v[21] = k_madd_epi32_avx2(u[ 9], k32_m09_p23); - v[22] = k_madd_epi32_avx2(u[10], k32_m09_p23); - v[23] = k_madd_epi32_avx2(u[11], k32_m09_p23); - v[24] = k_madd_epi32_avx2(u[ 4], k32_m17_p15); - v[25] = k_madd_epi32_avx2(u[ 5], k32_m17_p15); - v[26] = k_madd_epi32_avx2(u[ 6], k32_m17_p15); - v[27] = k_madd_epi32_avx2(u[ 7], k32_m17_p15); - v[28] = k_madd_epi32_avx2(u[ 0], k32_m01_p31); - v[29] = k_madd_epi32_avx2(u[ 1], k32_m01_p31); - v[30] = k_madd_epi32_avx2(u[ 2], k32_m01_p31); - v[31] = k_madd_epi32_avx2(u[ 3], k32_m01_p31); - - u[ 0] = k_packs_epi64_avx2(v[ 0], v[ 1]); - u[ 1] = k_packs_epi64_avx2(v[ 2], v[ 3]); - u[ 2] = k_packs_epi64_avx2(v[ 4], v[ 5]); - u[ 3] = k_packs_epi64_avx2(v[ 6], v[ 7]); - u[ 4] = k_packs_epi64_avx2(v[ 8], v[ 9]); - u[ 5] = k_packs_epi64_avx2(v[10], v[11]); - u[ 6] = k_packs_epi64_avx2(v[12], v[13]); - u[ 7] = k_packs_epi64_avx2(v[14], v[15]); - u[ 8] = k_packs_epi64_avx2(v[16], v[17]); - u[ 9] = k_packs_epi64_avx2(v[18], v[19]); - u[10] = k_packs_epi64_avx2(v[20], v[21]); - u[11] = k_packs_epi64_avx2(v[22], v[23]); - u[12] = k_packs_epi64_avx2(v[24], v[25]); - u[13] = k_packs_epi64_avx2(v[26], v[27]); - u[14] = k_packs_epi64_avx2(v[28], v[29]); - u[15] = k_packs_epi64_avx2(v[30], v[31]); - - v[ 0] = _mm256_add_epi32(u[ 0], k__DCT_CONST_ROUNDING); - v[ 1] = _mm256_add_epi32(u[ 1], k__DCT_CONST_ROUNDING); - v[ 2] = _mm256_add_epi32(u[ 2], k__DCT_CONST_ROUNDING); - v[ 3] = _mm256_add_epi32(u[ 3], k__DCT_CONST_ROUNDING); - v[ 4] = _mm256_add_epi32(u[ 4], k__DCT_CONST_ROUNDING); - v[ 5] = _mm256_add_epi32(u[ 5], k__DCT_CONST_ROUNDING); - v[ 6] = _mm256_add_epi32(u[ 6], k__DCT_CONST_ROUNDING); - v[ 7] = _mm256_add_epi32(u[ 7], k__DCT_CONST_ROUNDING); - v[ 8] = _mm256_add_epi32(u[ 8], k__DCT_CONST_ROUNDING); - v[ 9] = _mm256_add_epi32(u[ 9], k__DCT_CONST_ROUNDING); - v[10] = _mm256_add_epi32(u[10], k__DCT_CONST_ROUNDING); - v[11] = _mm256_add_epi32(u[11], k__DCT_CONST_ROUNDING); - v[12] = _mm256_add_epi32(u[12], k__DCT_CONST_ROUNDING); - v[13] = _mm256_add_epi32(u[13], k__DCT_CONST_ROUNDING); - v[14] = _mm256_add_epi32(u[14], k__DCT_CONST_ROUNDING); - v[15] = _mm256_add_epi32(u[15], k__DCT_CONST_ROUNDING); - - u[ 0] = _mm256_srai_epi32(v[ 0], DCT_CONST_BITS); - u[ 1] = _mm256_srai_epi32(v[ 1], DCT_CONST_BITS); - u[ 2] = _mm256_srai_epi32(v[ 2], DCT_CONST_BITS); - u[ 3] = _mm256_srai_epi32(v[ 3], DCT_CONST_BITS); - u[ 4] = _mm256_srai_epi32(v[ 4], DCT_CONST_BITS); - u[ 5] = _mm256_srai_epi32(v[ 5], DCT_CONST_BITS); - u[ 6] = _mm256_srai_epi32(v[ 6], DCT_CONST_BITS); - u[ 7] = _mm256_srai_epi32(v[ 7], DCT_CONST_BITS); - u[ 8] = _mm256_srai_epi32(v[ 8], DCT_CONST_BITS); - u[ 9] = _mm256_srai_epi32(v[ 9], DCT_CONST_BITS); - u[10] = _mm256_srai_epi32(v[10], DCT_CONST_BITS); - u[11] = _mm256_srai_epi32(v[11], DCT_CONST_BITS); - u[12] = _mm256_srai_epi32(v[12], DCT_CONST_BITS); - u[13] = _mm256_srai_epi32(v[13], DCT_CONST_BITS); - u[14] = _mm256_srai_epi32(v[14], DCT_CONST_BITS); - u[15] = _mm256_srai_epi32(v[15], DCT_CONST_BITS); - - v[ 0] = _mm256_cmpgt_epi32(kZero,u[ 0]); - v[ 1] = _mm256_cmpgt_epi32(kZero,u[ 1]); - v[ 2] = _mm256_cmpgt_epi32(kZero,u[ 2]); - v[ 3] = _mm256_cmpgt_epi32(kZero,u[ 3]); - v[ 4] = _mm256_cmpgt_epi32(kZero,u[ 4]); - v[ 5] = _mm256_cmpgt_epi32(kZero,u[ 5]); - v[ 6] = _mm256_cmpgt_epi32(kZero,u[ 6]); - v[ 7] = _mm256_cmpgt_epi32(kZero,u[ 7]); - v[ 8] = _mm256_cmpgt_epi32(kZero,u[ 8]); - v[ 9] = _mm256_cmpgt_epi32(kZero,u[ 9]); - v[10] = _mm256_cmpgt_epi32(kZero,u[10]); - v[11] = _mm256_cmpgt_epi32(kZero,u[11]); - v[12] = _mm256_cmpgt_epi32(kZero,u[12]); - v[13] = _mm256_cmpgt_epi32(kZero,u[13]); - v[14] = _mm256_cmpgt_epi32(kZero,u[14]); - v[15] = _mm256_cmpgt_epi32(kZero,u[15]); - - u[ 0] = _mm256_sub_epi32(u[ 0], v[ 0]); - u[ 1] = _mm256_sub_epi32(u[ 1], v[ 1]); - u[ 2] = _mm256_sub_epi32(u[ 2], v[ 2]); - u[ 3] = _mm256_sub_epi32(u[ 3], v[ 3]); - u[ 4] = _mm256_sub_epi32(u[ 4], v[ 4]); - u[ 5] = _mm256_sub_epi32(u[ 5], v[ 5]); - u[ 6] = _mm256_sub_epi32(u[ 6], v[ 6]); - u[ 7] = _mm256_sub_epi32(u[ 7], v[ 7]); - u[ 8] = _mm256_sub_epi32(u[ 8], v[ 8]); - u[ 9] = _mm256_sub_epi32(u[ 9], v[ 9]); - u[10] = _mm256_sub_epi32(u[10], v[10]); - u[11] = _mm256_sub_epi32(u[11], v[11]); - u[12] = _mm256_sub_epi32(u[12], v[12]); - u[13] = _mm256_sub_epi32(u[13], v[13]); - u[14] = _mm256_sub_epi32(u[14], v[14]); - u[15] = _mm256_sub_epi32(u[15], v[15]); - - v[0] = _mm256_add_epi32(u[0], K32One); - v[1] = _mm256_add_epi32(u[1], K32One); - v[2] = _mm256_add_epi32(u[2], K32One); - v[3] = _mm256_add_epi32(u[3], K32One); - v[4] = _mm256_add_epi32(u[4], K32One); - v[5] = _mm256_add_epi32(u[5], K32One); - v[6] = _mm256_add_epi32(u[6], K32One); - v[7] = _mm256_add_epi32(u[7], K32One); - v[8] = _mm256_add_epi32(u[8], K32One); - v[9] = _mm256_add_epi32(u[9], K32One); - v[10] = _mm256_add_epi32(u[10], K32One); - v[11] = _mm256_add_epi32(u[11], K32One); - v[12] = _mm256_add_epi32(u[12], K32One); - v[13] = _mm256_add_epi32(u[13], K32One); - v[14] = _mm256_add_epi32(u[14], K32One); - v[15] = _mm256_add_epi32(u[15], K32One); - - u[0] = _mm256_srai_epi32(v[0], 2); - u[1] = _mm256_srai_epi32(v[1], 2); - u[2] = _mm256_srai_epi32(v[2], 2); - u[3] = _mm256_srai_epi32(v[3], 2); - u[4] = _mm256_srai_epi32(v[4], 2); - u[5] = _mm256_srai_epi32(v[5], 2); - u[6] = _mm256_srai_epi32(v[6], 2); - u[7] = _mm256_srai_epi32(v[7], 2); - u[8] = _mm256_srai_epi32(v[8], 2); - u[9] = _mm256_srai_epi32(v[9], 2); - u[10] = _mm256_srai_epi32(v[10], 2); - u[11] = _mm256_srai_epi32(v[11], 2); - u[12] = _mm256_srai_epi32(v[12], 2); - u[13] = _mm256_srai_epi32(v[13], 2); - u[14] = _mm256_srai_epi32(v[14], 2); - u[15] = _mm256_srai_epi32(v[15], 2); - - out[ 1] = _mm256_packs_epi32(u[0], u[1]); - out[17] = _mm256_packs_epi32(u[2], u[3]); - out[ 9] = _mm256_packs_epi32(u[4], u[5]); - out[25] = _mm256_packs_epi32(u[6], u[7]); - out[ 7] = _mm256_packs_epi32(u[8], u[9]); - out[23] = _mm256_packs_epi32(u[10], u[11]); - out[15] = _mm256_packs_epi32(u[12], u[13]); - out[31] = _mm256_packs_epi32(u[14], u[15]); - } - { - const __m256i k32_p27_p05 = pair256_set_epi32(cospi_27_64, cospi_5_64); - const __m256i k32_p11_p21 = pair256_set_epi32(cospi_11_64, cospi_21_64); - const __m256i k32_p19_p13 = pair256_set_epi32(cospi_19_64, cospi_13_64); - const __m256i k32_p03_p29 = pair256_set_epi32(cospi_3_64, cospi_29_64); - const __m256i k32_m29_p03 = pair256_set_epi32(-cospi_29_64, cospi_3_64); - const __m256i k32_m13_p19 = pair256_set_epi32(-cospi_13_64, cospi_19_64); - const __m256i k32_m21_p11 = pair256_set_epi32(-cospi_21_64, cospi_11_64); - const __m256i k32_m05_p27 = pair256_set_epi32(-cospi_5_64, cospi_27_64); - - u[ 0] = _mm256_unpacklo_epi32(lstep1[40], lstep1[54]); - u[ 1] = _mm256_unpackhi_epi32(lstep1[40], lstep1[54]); - u[ 2] = _mm256_unpacklo_epi32(lstep1[41], lstep1[55]); - u[ 3] = _mm256_unpackhi_epi32(lstep1[41], lstep1[55]); - u[ 4] = _mm256_unpacklo_epi32(lstep1[42], lstep1[52]); - u[ 5] = _mm256_unpackhi_epi32(lstep1[42], lstep1[52]); - u[ 6] = _mm256_unpacklo_epi32(lstep1[43], lstep1[53]); - u[ 7] = _mm256_unpackhi_epi32(lstep1[43], lstep1[53]); - u[ 8] = _mm256_unpacklo_epi32(lstep1[44], lstep1[50]); - u[ 9] = _mm256_unpackhi_epi32(lstep1[44], lstep1[50]); - u[10] = _mm256_unpacklo_epi32(lstep1[45], lstep1[51]); - u[11] = _mm256_unpackhi_epi32(lstep1[45], lstep1[51]); - u[12] = _mm256_unpacklo_epi32(lstep1[46], lstep1[48]); - u[13] = _mm256_unpackhi_epi32(lstep1[46], lstep1[48]); - u[14] = _mm256_unpacklo_epi32(lstep1[47], lstep1[49]); - u[15] = _mm256_unpackhi_epi32(lstep1[47], lstep1[49]); - - v[ 0] = k_madd_epi32_avx2(u[ 0], k32_p27_p05); - v[ 1] = k_madd_epi32_avx2(u[ 1], k32_p27_p05); - v[ 2] = k_madd_epi32_avx2(u[ 2], k32_p27_p05); - v[ 3] = k_madd_epi32_avx2(u[ 3], k32_p27_p05); - v[ 4] = k_madd_epi32_avx2(u[ 4], k32_p11_p21); - v[ 5] = k_madd_epi32_avx2(u[ 5], k32_p11_p21); - v[ 6] = k_madd_epi32_avx2(u[ 6], k32_p11_p21); - v[ 7] = k_madd_epi32_avx2(u[ 7], k32_p11_p21); - v[ 8] = k_madd_epi32_avx2(u[ 8], k32_p19_p13); - v[ 9] = k_madd_epi32_avx2(u[ 9], k32_p19_p13); - v[10] = k_madd_epi32_avx2(u[10], k32_p19_p13); - v[11] = k_madd_epi32_avx2(u[11], k32_p19_p13); - v[12] = k_madd_epi32_avx2(u[12], k32_p03_p29); - v[13] = k_madd_epi32_avx2(u[13], k32_p03_p29); - v[14] = k_madd_epi32_avx2(u[14], k32_p03_p29); - v[15] = k_madd_epi32_avx2(u[15], k32_p03_p29); - v[16] = k_madd_epi32_avx2(u[12], k32_m29_p03); - v[17] = k_madd_epi32_avx2(u[13], k32_m29_p03); - v[18] = k_madd_epi32_avx2(u[14], k32_m29_p03); - v[19] = k_madd_epi32_avx2(u[15], k32_m29_p03); - v[20] = k_madd_epi32_avx2(u[ 8], k32_m13_p19); - v[21] = k_madd_epi32_avx2(u[ 9], k32_m13_p19); - v[22] = k_madd_epi32_avx2(u[10], k32_m13_p19); - v[23] = k_madd_epi32_avx2(u[11], k32_m13_p19); - v[24] = k_madd_epi32_avx2(u[ 4], k32_m21_p11); - v[25] = k_madd_epi32_avx2(u[ 5], k32_m21_p11); - v[26] = k_madd_epi32_avx2(u[ 6], k32_m21_p11); - v[27] = k_madd_epi32_avx2(u[ 7], k32_m21_p11); - v[28] = k_madd_epi32_avx2(u[ 0], k32_m05_p27); - v[29] = k_madd_epi32_avx2(u[ 1], k32_m05_p27); - v[30] = k_madd_epi32_avx2(u[ 2], k32_m05_p27); - v[31] = k_madd_epi32_avx2(u[ 3], k32_m05_p27); - - u[ 0] = k_packs_epi64_avx2(v[ 0], v[ 1]); - u[ 1] = k_packs_epi64_avx2(v[ 2], v[ 3]); - u[ 2] = k_packs_epi64_avx2(v[ 4], v[ 5]); - u[ 3] = k_packs_epi64_avx2(v[ 6], v[ 7]); - u[ 4] = k_packs_epi64_avx2(v[ 8], v[ 9]); - u[ 5] = k_packs_epi64_avx2(v[10], v[11]); - u[ 6] = k_packs_epi64_avx2(v[12], v[13]); - u[ 7] = k_packs_epi64_avx2(v[14], v[15]); - u[ 8] = k_packs_epi64_avx2(v[16], v[17]); - u[ 9] = k_packs_epi64_avx2(v[18], v[19]); - u[10] = k_packs_epi64_avx2(v[20], v[21]); - u[11] = k_packs_epi64_avx2(v[22], v[23]); - u[12] = k_packs_epi64_avx2(v[24], v[25]); - u[13] = k_packs_epi64_avx2(v[26], v[27]); - u[14] = k_packs_epi64_avx2(v[28], v[29]); - u[15] = k_packs_epi64_avx2(v[30], v[31]); - - v[ 0] = _mm256_add_epi32(u[ 0], k__DCT_CONST_ROUNDING); - v[ 1] = _mm256_add_epi32(u[ 1], k__DCT_CONST_ROUNDING); - v[ 2] = _mm256_add_epi32(u[ 2], k__DCT_CONST_ROUNDING); - v[ 3] = _mm256_add_epi32(u[ 3], k__DCT_CONST_ROUNDING); - v[ 4] = _mm256_add_epi32(u[ 4], k__DCT_CONST_ROUNDING); - v[ 5] = _mm256_add_epi32(u[ 5], k__DCT_CONST_ROUNDING); - v[ 6] = _mm256_add_epi32(u[ 6], k__DCT_CONST_ROUNDING); - v[ 7] = _mm256_add_epi32(u[ 7], k__DCT_CONST_ROUNDING); - v[ 8] = _mm256_add_epi32(u[ 8], k__DCT_CONST_ROUNDING); - v[ 9] = _mm256_add_epi32(u[ 9], k__DCT_CONST_ROUNDING); - v[10] = _mm256_add_epi32(u[10], k__DCT_CONST_ROUNDING); - v[11] = _mm256_add_epi32(u[11], k__DCT_CONST_ROUNDING); - v[12] = _mm256_add_epi32(u[12], k__DCT_CONST_ROUNDING); - v[13] = _mm256_add_epi32(u[13], k__DCT_CONST_ROUNDING); - v[14] = _mm256_add_epi32(u[14], k__DCT_CONST_ROUNDING); - v[15] = _mm256_add_epi32(u[15], k__DCT_CONST_ROUNDING); - - u[ 0] = _mm256_srai_epi32(v[ 0], DCT_CONST_BITS); - u[ 1] = _mm256_srai_epi32(v[ 1], DCT_CONST_BITS); - u[ 2] = _mm256_srai_epi32(v[ 2], DCT_CONST_BITS); - u[ 3] = _mm256_srai_epi32(v[ 3], DCT_CONST_BITS); - u[ 4] = _mm256_srai_epi32(v[ 4], DCT_CONST_BITS); - u[ 5] = _mm256_srai_epi32(v[ 5], DCT_CONST_BITS); - u[ 6] = _mm256_srai_epi32(v[ 6], DCT_CONST_BITS); - u[ 7] = _mm256_srai_epi32(v[ 7], DCT_CONST_BITS); - u[ 8] = _mm256_srai_epi32(v[ 8], DCT_CONST_BITS); - u[ 9] = _mm256_srai_epi32(v[ 9], DCT_CONST_BITS); - u[10] = _mm256_srai_epi32(v[10], DCT_CONST_BITS); - u[11] = _mm256_srai_epi32(v[11], DCT_CONST_BITS); - u[12] = _mm256_srai_epi32(v[12], DCT_CONST_BITS); - u[13] = _mm256_srai_epi32(v[13], DCT_CONST_BITS); - u[14] = _mm256_srai_epi32(v[14], DCT_CONST_BITS); - u[15] = _mm256_srai_epi32(v[15], DCT_CONST_BITS); - - v[ 0] = _mm256_cmpgt_epi32(kZero,u[ 0]); - v[ 1] = _mm256_cmpgt_epi32(kZero,u[ 1]); - v[ 2] = _mm256_cmpgt_epi32(kZero,u[ 2]); - v[ 3] = _mm256_cmpgt_epi32(kZero,u[ 3]); - v[ 4] = _mm256_cmpgt_epi32(kZero,u[ 4]); - v[ 5] = _mm256_cmpgt_epi32(kZero,u[ 5]); - v[ 6] = _mm256_cmpgt_epi32(kZero,u[ 6]); - v[ 7] = _mm256_cmpgt_epi32(kZero,u[ 7]); - v[ 8] = _mm256_cmpgt_epi32(kZero,u[ 8]); - v[ 9] = _mm256_cmpgt_epi32(kZero,u[ 9]); - v[10] = _mm256_cmpgt_epi32(kZero,u[10]); - v[11] = _mm256_cmpgt_epi32(kZero,u[11]); - v[12] = _mm256_cmpgt_epi32(kZero,u[12]); - v[13] = _mm256_cmpgt_epi32(kZero,u[13]); - v[14] = _mm256_cmpgt_epi32(kZero,u[14]); - v[15] = _mm256_cmpgt_epi32(kZero,u[15]); - - u[ 0] = _mm256_sub_epi32(u[ 0], v[ 0]); - u[ 1] = _mm256_sub_epi32(u[ 1], v[ 1]); - u[ 2] = _mm256_sub_epi32(u[ 2], v[ 2]); - u[ 3] = _mm256_sub_epi32(u[ 3], v[ 3]); - u[ 4] = _mm256_sub_epi32(u[ 4], v[ 4]); - u[ 5] = _mm256_sub_epi32(u[ 5], v[ 5]); - u[ 6] = _mm256_sub_epi32(u[ 6], v[ 6]); - u[ 7] = _mm256_sub_epi32(u[ 7], v[ 7]); - u[ 8] = _mm256_sub_epi32(u[ 8], v[ 8]); - u[ 9] = _mm256_sub_epi32(u[ 9], v[ 9]); - u[10] = _mm256_sub_epi32(u[10], v[10]); - u[11] = _mm256_sub_epi32(u[11], v[11]); - u[12] = _mm256_sub_epi32(u[12], v[12]); - u[13] = _mm256_sub_epi32(u[13], v[13]); - u[14] = _mm256_sub_epi32(u[14], v[14]); - u[15] = _mm256_sub_epi32(u[15], v[15]); - - v[0] = _mm256_add_epi32(u[0], K32One); - v[1] = _mm256_add_epi32(u[1], K32One); - v[2] = _mm256_add_epi32(u[2], K32One); - v[3] = _mm256_add_epi32(u[3], K32One); - v[4] = _mm256_add_epi32(u[4], K32One); - v[5] = _mm256_add_epi32(u[5], K32One); - v[6] = _mm256_add_epi32(u[6], K32One); - v[7] = _mm256_add_epi32(u[7], K32One); - v[8] = _mm256_add_epi32(u[8], K32One); - v[9] = _mm256_add_epi32(u[9], K32One); - v[10] = _mm256_add_epi32(u[10], K32One); - v[11] = _mm256_add_epi32(u[11], K32One); - v[12] = _mm256_add_epi32(u[12], K32One); - v[13] = _mm256_add_epi32(u[13], K32One); - v[14] = _mm256_add_epi32(u[14], K32One); - v[15] = _mm256_add_epi32(u[15], K32One); - - u[0] = _mm256_srai_epi32(v[0], 2); - u[1] = _mm256_srai_epi32(v[1], 2); - u[2] = _mm256_srai_epi32(v[2], 2); - u[3] = _mm256_srai_epi32(v[3], 2); - u[4] = _mm256_srai_epi32(v[4], 2); - u[5] = _mm256_srai_epi32(v[5], 2); - u[6] = _mm256_srai_epi32(v[6], 2); - u[7] = _mm256_srai_epi32(v[7], 2); - u[8] = _mm256_srai_epi32(v[8], 2); - u[9] = _mm256_srai_epi32(v[9], 2); - u[10] = _mm256_srai_epi32(v[10], 2); - u[11] = _mm256_srai_epi32(v[11], 2); - u[12] = _mm256_srai_epi32(v[12], 2); - u[13] = _mm256_srai_epi32(v[13], 2); - u[14] = _mm256_srai_epi32(v[14], 2); - u[15] = _mm256_srai_epi32(v[15], 2); - - out[ 5] = _mm256_packs_epi32(u[0], u[1]); - out[21] = _mm256_packs_epi32(u[2], u[3]); - out[13] = _mm256_packs_epi32(u[4], u[5]); - out[29] = _mm256_packs_epi32(u[6], u[7]); - out[ 3] = _mm256_packs_epi32(u[8], u[9]); - out[19] = _mm256_packs_epi32(u[10], u[11]); - out[11] = _mm256_packs_epi32(u[12], u[13]); - out[27] = _mm256_packs_epi32(u[14], u[15]); - } - } -#endif - // Transpose the results, do it as four 8x8 transposes. - { - int transpose_block; - int16_t *output_currStep,*output_nextStep; - if (0 == pass){ - output_currStep = &intermediate[column_start * 32]; - output_nextStep = &intermediate[(column_start + 8) * 32]; - } else{ - output_currStep = &output_org[column_start * 32]; - output_nextStep = &output_org[(column_start + 8) * 32]; - } - for (transpose_block = 0; transpose_block < 4; ++transpose_block) { - __m256i *this_out = &out[8 * transpose_block]; - // 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 - // 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 - // 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 - // 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 - // 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 - // 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 - // 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 - // 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 - const __m256i tr0_0 = _mm256_unpacklo_epi16(this_out[0], this_out[1]); - const __m256i tr0_1 = _mm256_unpacklo_epi16(this_out[2], this_out[3]); - const __m256i tr0_2 = _mm256_unpackhi_epi16(this_out[0], this_out[1]); - const __m256i tr0_3 = _mm256_unpackhi_epi16(this_out[2], this_out[3]); - const __m256i tr0_4 = _mm256_unpacklo_epi16(this_out[4], this_out[5]); - const __m256i tr0_5 = _mm256_unpacklo_epi16(this_out[6], this_out[7]); - const __m256i tr0_6 = _mm256_unpackhi_epi16(this_out[4], this_out[5]); - const __m256i tr0_7 = _mm256_unpackhi_epi16(this_out[6], this_out[7]); - // 00 20 01 21 02 22 03 23 08 28 09 29 10 30 11 31 - // 40 60 41 61 42 62 43 63 48 68 49 69 50 70 51 71 - // 04 24 05 25 06 26 07 27 12 32 13 33 14 34 15 35 - // 44 64 45 65 46 66 47 67 52 72 53 73 54 74 55 75 - // 80 100 81 101 82 102 83 103 88 108 89 109 90 110 91 101 - // 120 140 121 141 122 142 123 143 128 148 129 149 130 150 131 151 - // 84 104 85 105 86 106 87 107 92 112 93 113 94 114 95 115 - // 124 144 125 145 126 146 127 147 132 152 133 153 134 154 135 155 - - const __m256i tr1_0 = _mm256_unpacklo_epi32(tr0_0, tr0_1); - const __m256i tr1_1 = _mm256_unpacklo_epi32(tr0_2, tr0_3); - const __m256i tr1_2 = _mm256_unpackhi_epi32(tr0_0, tr0_1); - const __m256i tr1_3 = _mm256_unpackhi_epi32(tr0_2, tr0_3); - const __m256i tr1_4 = _mm256_unpacklo_epi32(tr0_4, tr0_5); - const __m256i tr1_5 = _mm256_unpacklo_epi32(tr0_6, tr0_7); - const __m256i tr1_6 = _mm256_unpackhi_epi32(tr0_4, tr0_5); - const __m256i tr1_7 = _mm256_unpackhi_epi32(tr0_6, tr0_7); - // 00 20 40 60 01 21 41 61 08 28 48 68 09 29 49 69 - // 04 24 44 64 05 25 45 65 12 32 52 72 13 33 53 73 - // 02 22 42 62 03 23 43 63 10 30 50 70 11 31 51 71 - // 06 26 46 66 07 27 47 67 14 34 54 74 15 35 55 75 - // 80 100 120 140 81 101 121 141 88 108 128 148 89 109 129 149 - // 84 104 124 144 85 105 125 145 92 112 132 152 93 113 133 153 - // 82 102 122 142 83 103 123 143 90 110 130 150 91 101 131 151 - // 86 106 126 146 87 107 127 147 94 114 134 154 95 115 135 155 - __m256i tr2_0 = _mm256_unpacklo_epi64(tr1_0, tr1_4); - __m256i tr2_1 = _mm256_unpackhi_epi64(tr1_0, tr1_4); - __m256i tr2_2 = _mm256_unpacklo_epi64(tr1_2, tr1_6); - __m256i tr2_3 = _mm256_unpackhi_epi64(tr1_2, tr1_6); - __m256i tr2_4 = _mm256_unpacklo_epi64(tr1_1, tr1_5); - __m256i tr2_5 = _mm256_unpackhi_epi64(tr1_1, tr1_5); - __m256i tr2_6 = _mm256_unpacklo_epi64(tr1_3, tr1_7); - __m256i tr2_7 = _mm256_unpackhi_epi64(tr1_3, tr1_7); - // 00 20 40 60 80 100 120 140 08 28 48 68 88 108 128 148 - // 01 21 41 61 81 101 121 141 09 29 49 69 89 109 129 149 - // 02 22 42 62 82 102 122 142 10 30 50 70 90 110 130 150 - // 03 23 43 63 83 103 123 143 11 31 51 71 91 101 131 151 - // 04 24 44 64 84 104 124 144 12 32 52 72 92 112 132 152 - // 05 25 45 65 85 105 125 145 13 33 53 73 93 113 133 153 - // 06 26 46 66 86 106 126 146 14 34 54 74 94 114 134 154 - // 07 27 47 67 87 107 127 147 15 35 55 75 95 115 135 155 - if (0 == pass) { - // output[j] = (output[j] + 1 + (output[j] > 0)) >> 2; - // TODO(cd): see quality impact of only doing - // output[j] = (output[j] + 1) >> 2; - // which would remove the code between here ... - __m256i tr2_0_0 = _mm256_cmpgt_epi16(tr2_0, kZero); - __m256i tr2_1_0 = _mm256_cmpgt_epi16(tr2_1, kZero); - __m256i tr2_2_0 = _mm256_cmpgt_epi16(tr2_2, kZero); - __m256i tr2_3_0 = _mm256_cmpgt_epi16(tr2_3, kZero); - __m256i tr2_4_0 = _mm256_cmpgt_epi16(tr2_4, kZero); - __m256i tr2_5_0 = _mm256_cmpgt_epi16(tr2_5, kZero); - __m256i tr2_6_0 = _mm256_cmpgt_epi16(tr2_6, kZero); - __m256i tr2_7_0 = _mm256_cmpgt_epi16(tr2_7, kZero); - tr2_0 = _mm256_sub_epi16(tr2_0, tr2_0_0); - tr2_1 = _mm256_sub_epi16(tr2_1, tr2_1_0); - tr2_2 = _mm256_sub_epi16(tr2_2, tr2_2_0); - tr2_3 = _mm256_sub_epi16(tr2_3, tr2_3_0); - tr2_4 = _mm256_sub_epi16(tr2_4, tr2_4_0); - tr2_5 = _mm256_sub_epi16(tr2_5, tr2_5_0); - tr2_6 = _mm256_sub_epi16(tr2_6, tr2_6_0); - tr2_7 = _mm256_sub_epi16(tr2_7, tr2_7_0); - // ... and here. - // PS: also change code in vp9/encoder/vp9_dct.c - tr2_0 = _mm256_add_epi16(tr2_0, kOne); - tr2_1 = _mm256_add_epi16(tr2_1, kOne); - tr2_2 = _mm256_add_epi16(tr2_2, kOne); - tr2_3 = _mm256_add_epi16(tr2_3, kOne); - tr2_4 = _mm256_add_epi16(tr2_4, kOne); - tr2_5 = _mm256_add_epi16(tr2_5, kOne); - tr2_6 = _mm256_add_epi16(tr2_6, kOne); - tr2_7 = _mm256_add_epi16(tr2_7, kOne); - tr2_0 = _mm256_srai_epi16(tr2_0, 2); - tr2_1 = _mm256_srai_epi16(tr2_1, 2); - tr2_2 = _mm256_srai_epi16(tr2_2, 2); - tr2_3 = _mm256_srai_epi16(tr2_3, 2); - tr2_4 = _mm256_srai_epi16(tr2_4, 2); - tr2_5 = _mm256_srai_epi16(tr2_5, 2); - tr2_6 = _mm256_srai_epi16(tr2_6, 2); - tr2_7 = _mm256_srai_epi16(tr2_7, 2); - } - // Note: even though all these stores are aligned, using the aligned - // intrinsic make the code slightly slower. - _mm_storeu_si128((__m128i *)(output_currStep + 0 * 32), _mm256_castsi256_si128(tr2_0)); - _mm_storeu_si128((__m128i *)(output_currStep + 1 * 32), _mm256_castsi256_si128(tr2_1)); - _mm_storeu_si128((__m128i *)(output_currStep + 2 * 32), _mm256_castsi256_si128(tr2_2)); - _mm_storeu_si128((__m128i *)(output_currStep + 3 * 32), _mm256_castsi256_si128(tr2_3)); - _mm_storeu_si128((__m128i *)(output_currStep + 4 * 32), _mm256_castsi256_si128(tr2_4)); - _mm_storeu_si128((__m128i *)(output_currStep + 5 * 32), _mm256_castsi256_si128(tr2_5)); - _mm_storeu_si128((__m128i *)(output_currStep + 6 * 32), _mm256_castsi256_si128(tr2_6)); - _mm_storeu_si128((__m128i *)(output_currStep + 7 * 32), _mm256_castsi256_si128(tr2_7)); - - _mm_storeu_si128((__m128i *)(output_nextStep + 0 * 32), _mm256_extractf128_si256(tr2_0,1)); - _mm_storeu_si128((__m128i *)(output_nextStep + 1 * 32), _mm256_extractf128_si256(tr2_1,1)); - _mm_storeu_si128((__m128i *)(output_nextStep + 2 * 32), _mm256_extractf128_si256(tr2_2,1)); - _mm_storeu_si128((__m128i *)(output_nextStep + 3 * 32), _mm256_extractf128_si256(tr2_3,1)); - _mm_storeu_si128((__m128i *)(output_nextStep + 4 * 32), _mm256_extractf128_si256(tr2_4,1)); - _mm_storeu_si128((__m128i *)(output_nextStep + 5 * 32), _mm256_extractf128_si256(tr2_5,1)); - _mm_storeu_si128((__m128i *)(output_nextStep + 6 * 32), _mm256_extractf128_si256(tr2_6,1)); - _mm_storeu_si128((__m128i *)(output_nextStep + 7 * 32), _mm256_extractf128_si256(tr2_7,1)); - // Process next 8x8 - output_currStep += 8; - output_nextStep += 8; - } - } - } - } -} // NOLINT diff --git a/media/libvpx/vp9/encoder/x86/vp9_dct_avx2.c b/media/libvpx/vp9/encoder/x86/vp9_dct_avx2.c deleted file mode 100644 index 3a19f52746c..00000000000 --- a/media/libvpx/vp9/encoder/x86/vp9_dct_avx2.c +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Copyright (c) 2012 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include // AVX2 -#include "vp9/common/vp9_idct.h" // for cospi constants -#include "vpx_ports/mem.h" - - -#define FDCT32x32_2D_AVX2 vp9_fdct32x32_rd_avx2 -#define FDCT32x32_HIGH_PRECISION 0 -#include "vp9/encoder/x86/vp9_dct32x32_avx2.c" -#undef FDCT32x32_2D_AVX2 -#undef FDCT32x32_HIGH_PRECISION - -#define FDCT32x32_2D_AVX2 vp9_fdct32x32_avx2 -#define FDCT32x32_HIGH_PRECISION 1 -#include "vp9/encoder/x86/vp9_dct32x32_avx2.c" // NOLINT -#undef FDCT32x32_2D_AVX2 -#undef FDCT32x32_HIGH_PRECISION diff --git a/media/libvpx/vp9/encoder/x86/vp9_dct_mmx.asm b/media/libvpx/vp9/encoder/x86/vp9_dct_mmx.asm deleted file mode 100644 index f71181c5e91..00000000000 --- a/media/libvpx/vp9/encoder/x86/vp9_dct_mmx.asm +++ /dev/null @@ -1,70 +0,0 @@ -; -; Copyright (c) 2014 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; -%include "third_party/x86inc/x86inc.asm" - -SECTION .text - -%macro TRANSFORM_COLS 0 - paddw m0, m1 - movq m4, m0 - psubw m3, m2 - psubw m4, m3 - psraw m4, 1 - movq m5, m4 - psubw m5, m1 ;b1 - psubw m4, m2 ;c1 - psubw m0, m4 - paddw m3, m5 - ; m0 a0 - SWAP 1, 4 ; m1 c1 - SWAP 2, 3 ; m2 d1 - SWAP 3, 5 ; m3 b1 -%endmacro - -%macro TRANSPOSE_4X4 0 - movq m4, m0 - movq m5, m2 - punpcklwd m4, m1 - punpckhwd m0, m1 - punpcklwd m5, m3 - punpckhwd m2, m3 - movq m1, m4 - movq m3, m0 - punpckldq m1, m5 - punpckhdq m4, m5 - punpckldq m3, m2 - punpckhdq m0, m2 - SWAP 2, 3, 0, 1, 4 -%endmacro - -INIT_MMX mmx -cglobal fwht4x4, 3, 4, 8, input, output, stride - lea r3q, [inputq + strideq*4] - movq m0, [inputq] ;a1 - movq m1, [inputq + strideq*2] ;b1 - movq m2, [r3q] ;c1 - movq m3, [r3q + strideq*2] ;d1 - - TRANSFORM_COLS - TRANSPOSE_4X4 - TRANSFORM_COLS - TRANSPOSE_4X4 - - psllw m0, 2 - psllw m1, 2 - psllw m2, 2 - psllw m3, 2 - - movq [outputq], m0 - movq [outputq + 8], m1 - movq [outputq + 16], m2 - movq [outputq + 24], m3 - - RET diff --git a/media/libvpx/vp9/encoder/x86/vp9_dct_ssse3_x86_64.asm b/media/libvpx/vp9/encoder/x86/vp9_dct_ssse3_x86_64.asm deleted file mode 100644 index 28458dcdd52..00000000000 --- a/media/libvpx/vp9/encoder/x86/vp9_dct_ssse3_x86_64.asm +++ /dev/null @@ -1,182 +0,0 @@ -; -; Copyright (c) 2014 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; -%include "third_party/x86inc/x86inc.asm" - -; This file provides SSSE3 version of the forward transformation. Part -; of the macro definitions are originally derived from the ffmpeg project. -; The current version applies to x86 64-bit only. - -SECTION_RODATA - -pw_11585x2: times 8 dw 23170 -pd_8192: times 4 dd 8192 - -%macro TRANSFORM_COEFFS 2 -pw_%1_%2: dw %1, %2, %1, %2, %1, %2, %1, %2 -pw_%2_m%1: dw %2, -%1, %2, -%1, %2, -%1, %2, -%1 -%endmacro - -TRANSFORM_COEFFS 11585, 11585 -TRANSFORM_COEFFS 15137, 6270 -TRANSFORM_COEFFS 16069, 3196 -TRANSFORM_COEFFS 9102, 13623 - -SECTION .text - -%if ARCH_X86_64 -%macro SUM_SUB 3 - psubw m%3, m%1, m%2 - paddw m%1, m%2 - SWAP %2, %3 -%endmacro - -; butterfly operation -%macro MUL_ADD_2X 6 ; dst1, dst2, src, round, coefs1, coefs2 - pmaddwd m%1, m%3, %5 - pmaddwd m%2, m%3, %6 - paddd m%1, %4 - paddd m%2, %4 - psrad m%1, 14 - psrad m%2, 14 -%endmacro - -%macro BUTTERFLY_4X 7 ; dst1, dst2, coef1, coef2, round, tmp1, tmp2 - punpckhwd m%6, m%2, m%1 - MUL_ADD_2X %7, %6, %6, %5, [pw_%4_%3], [pw_%3_m%4] - punpcklwd m%2, m%1 - MUL_ADD_2X %1, %2, %2, %5, [pw_%4_%3], [pw_%3_m%4] - packssdw m%1, m%7 - packssdw m%2, m%6 -%endmacro - -; matrix transpose -%macro INTERLEAVE_2X 4 - punpckh%1 m%4, m%2, m%3 - punpckl%1 m%2, m%3 - SWAP %3, %4 -%endmacro - -%macro TRANSPOSE8X8 9 - INTERLEAVE_2X wd, %1, %2, %9 - INTERLEAVE_2X wd, %3, %4, %9 - INTERLEAVE_2X wd, %5, %6, %9 - INTERLEAVE_2X wd, %7, %8, %9 - - INTERLEAVE_2X dq, %1, %3, %9 - INTERLEAVE_2X dq, %2, %4, %9 - INTERLEAVE_2X dq, %5, %7, %9 - INTERLEAVE_2X dq, %6, %8, %9 - - INTERLEAVE_2X qdq, %1, %5, %9 - INTERLEAVE_2X qdq, %3, %7, %9 - INTERLEAVE_2X qdq, %2, %6, %9 - INTERLEAVE_2X qdq, %4, %8, %9 - - SWAP %2, %5 - SWAP %4, %7 -%endmacro - -; 1D forward 8x8 DCT transform -%macro FDCT8_1D 1 - SUM_SUB 0, 7, 9 - SUM_SUB 1, 6, 9 - SUM_SUB 2, 5, 9 - SUM_SUB 3, 4, 9 - - SUM_SUB 0, 3, 9 - SUM_SUB 1, 2, 9 - SUM_SUB 6, 5, 9 -%if %1 == 0 - SUM_SUB 0, 1, 9 -%endif - - BUTTERFLY_4X 2, 3, 6270, 15137, m8, 9, 10 - - pmulhrsw m6, m12 - pmulhrsw m5, m12 -%if %1 == 0 - pmulhrsw m0, m12 - pmulhrsw m1, m12 -%else - BUTTERFLY_4X 1, 0, 11585, 11585, m8, 9, 10 - SWAP 0, 1 -%endif - - SUM_SUB 4, 5, 9 - SUM_SUB 7, 6, 9 - BUTTERFLY_4X 4, 7, 3196, 16069, m8, 9, 10 - BUTTERFLY_4X 5, 6, 13623, 9102, m8, 9, 10 - SWAP 1, 4 - SWAP 3, 6 -%endmacro - -%macro DIVIDE_ROUND_2X 4 ; dst1, dst2, tmp1, tmp2 - psraw m%3, m%1, 15 - psraw m%4, m%2, 15 - psubw m%1, m%3 - psubw m%2, m%4 - psraw m%1, 1 - psraw m%2, 1 -%endmacro - -INIT_XMM ssse3 -cglobal fdct8x8, 3, 5, 13, input, output, stride - - mova m8, [pd_8192] - mova m12, [pw_11585x2] - pxor m11, m11 - - lea r3, [2 * strideq] - lea r4, [4 * strideq] - mova m0, [inputq] - mova m1, [inputq + r3] - lea inputq, [inputq + r4] - mova m2, [inputq] - mova m3, [inputq + r3] - lea inputq, [inputq + r4] - mova m4, [inputq] - mova m5, [inputq + r3] - lea inputq, [inputq + r4] - mova m6, [inputq] - mova m7, [inputq + r3] - - ; left shift by 2 to increase forward transformation precision - psllw m0, 2 - psllw m1, 2 - psllw m2, 2 - psllw m3, 2 - psllw m4, 2 - psllw m5, 2 - psllw m6, 2 - psllw m7, 2 - - ; column transform - FDCT8_1D 0 - TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9 - - FDCT8_1D 1 - TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9 - - DIVIDE_ROUND_2X 0, 1, 9, 10 - DIVIDE_ROUND_2X 2, 3, 9, 10 - DIVIDE_ROUND_2X 4, 5, 9, 10 - DIVIDE_ROUND_2X 6, 7, 9, 10 - - mova [outputq + 0], m0 - mova [outputq + 16], m1 - mova [outputq + 32], m2 - mova [outputq + 48], m3 - mova [outputq + 64], m4 - mova [outputq + 80], m5 - mova [outputq + 96], m6 - mova [outputq + 112], m7 - - RET -%endif diff --git a/media/libvpx/vp9/encoder/x86/vp9_error_intrin_avx2.c b/media/libvpx/vp9/encoder/x86/vp9_error_intrin_avx2.c deleted file mode 100644 index c67490fad34..00000000000 --- a/media/libvpx/vp9/encoder/x86/vp9_error_intrin_avx2.c +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Usee of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include // AVX2 -#include "vpx/vpx_integer.h" - - -int64_t vp9_block_error_avx2(const int16_t *coeff, - const int16_t *dqcoeff, - intptr_t block_size, - int64_t *ssz) { - __m256i sse_reg, ssz_reg, coeff_reg, dqcoeff_reg; - __m256i exp_dqcoeff_lo, exp_dqcoeff_hi, exp_coeff_lo, exp_coeff_hi; - __m256i sse_reg_64hi, ssz_reg_64hi; - __m128i sse_reg128, ssz_reg128; - int64_t sse; - int i; - const __m256i zero_reg = _mm256_set1_epi16(0); - - // init sse and ssz registerd to zero - sse_reg = _mm256_set1_epi16(0); - ssz_reg = _mm256_set1_epi16(0); - - for (i = 0 ; i < block_size ; i+= 16) { - // load 32 bytes from coeff and dqcoeff - coeff_reg = _mm256_loadu_si256((const __m256i *)(coeff + i)); - dqcoeff_reg = _mm256_loadu_si256((const __m256i *)(dqcoeff + i)); - // dqcoeff - coeff - dqcoeff_reg = _mm256_sub_epi16(dqcoeff_reg, coeff_reg); - // madd (dqcoeff - coeff) - dqcoeff_reg = _mm256_madd_epi16(dqcoeff_reg, dqcoeff_reg); - // madd coeff - coeff_reg = _mm256_madd_epi16(coeff_reg, coeff_reg); - // expand each double word of madd (dqcoeff - coeff) to quad word - exp_dqcoeff_lo = _mm256_unpacklo_epi32(dqcoeff_reg, zero_reg); - exp_dqcoeff_hi = _mm256_unpackhi_epi32(dqcoeff_reg, zero_reg); - // expand each double word of madd (coeff) to quad word - exp_coeff_lo = _mm256_unpacklo_epi32(coeff_reg, zero_reg); - exp_coeff_hi = _mm256_unpackhi_epi32(coeff_reg, zero_reg); - // add each quad word of madd (dqcoeff - coeff) and madd (coeff) - sse_reg = _mm256_add_epi64(sse_reg, exp_dqcoeff_lo); - ssz_reg = _mm256_add_epi64(ssz_reg, exp_coeff_lo); - sse_reg = _mm256_add_epi64(sse_reg, exp_dqcoeff_hi); - ssz_reg = _mm256_add_epi64(ssz_reg, exp_coeff_hi); - } - // save the higher 64 bit of each 128 bit lane - sse_reg_64hi = _mm256_srli_si256(sse_reg, 8); - ssz_reg_64hi = _mm256_srli_si256(ssz_reg, 8); - // add the higher 64 bit to the low 64 bit - sse_reg = _mm256_add_epi64(sse_reg, sse_reg_64hi); - ssz_reg = _mm256_add_epi64(ssz_reg, ssz_reg_64hi); - - // add each 64 bit from each of the 128 bit lane of the 256 bit - sse_reg128 = _mm_add_epi64(_mm256_castsi256_si128(sse_reg), - _mm256_extractf128_si256(sse_reg, 1)); - - ssz_reg128 = _mm_add_epi64(_mm256_castsi256_si128(ssz_reg), - _mm256_extractf128_si256(ssz_reg, 1)); - - // store the results - _mm_storel_epi64((__m128i*)(&sse), sse_reg128); - - _mm_storel_epi64((__m128i*)(ssz), ssz_reg128); - return sse; -} diff --git a/media/libvpx/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm b/media/libvpx/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm deleted file mode 100644 index 508e1d4f55a..00000000000 --- a/media/libvpx/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm +++ /dev/null @@ -1,402 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - -%include "third_party/x86inc/x86inc.asm" - -SECTION_RODATA -pw_1: times 8 dw 1 - -SECTION .text - -%macro QUANTIZE_FN 2 -cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ - shift, qcoeff, dqcoeff, dequant, zbin_oq, \ - eob, scan, iscan - cmp dword skipm, 0 - jne .blank - - ; actual quantize loop - setup pointers, rounders, etc. - movifnidn coeffq, coeffmp - movifnidn ncoeffq, ncoeffmp - mov r2, dequantmp - movifnidn zbinq, zbinmp - movifnidn roundq, roundmp - movifnidn quantq, quantmp - movd m4, dword zbin_oqm ; m4 = zbin_oq - mova m0, [zbinq] ; m0 = zbin - punpcklwd m4, m4 - mova m1, [roundq] ; m1 = round - pshufd m4, m4, 0 - mova m2, [quantq] ; m2 = quant - paddw m0, m4 ; m0 = zbin + zbin_oq -%ifidn %1, b_32x32 - pcmpeqw m5, m5 - psrlw m5, 15 - paddw m0, m5 - paddw m1, m5 - psrlw m0, 1 ; m0 = (m0 + 1) / 2 - psrlw m1, 1 ; m1 = (m1 + 1) / 2 -%endif - mova m3, [r2q] ; m3 = dequant - psubw m0, [pw_1] - mov r2, shiftmp - mov r3, qcoeffmp - mova m4, [r2] ; m4 = shift - mov r4, dqcoeffmp - mov r5, iscanmp -%ifidn %1, b_32x32 - psllw m4, 1 -%endif - pxor m5, m5 ; m5 = dedicated zero - DEFINE_ARGS coeff, ncoeff, d1, qcoeff, dqcoeff, iscan, d2, d3, d4, d5, d6, eob - lea coeffq, [ coeffq+ncoeffq*2] - lea iscanq, [ iscanq+ncoeffq*2] - lea qcoeffq, [ qcoeffq+ncoeffq*2] - lea dqcoeffq, [dqcoeffq+ncoeffq*2] - neg ncoeffq - - ; get DC and first 15 AC coeffs - mova m9, [ coeffq+ncoeffq*2+ 0] ; m9 = c[i] - mova m10, [ coeffq+ncoeffq*2+16] ; m10 = c[i] - pabsw m6, m9 ; m6 = abs(m9) - pabsw m11, m10 ; m11 = abs(m10) - pcmpgtw m7, m6, m0 ; m7 = c[i] >= zbin - punpckhqdq m0, m0 - pcmpgtw m12, m11, m0 ; m12 = c[i] >= zbin - paddsw m6, m1 ; m6 += round - punpckhqdq m1, m1 - paddsw m11, m1 ; m11 += round - pmulhw m8, m6, m2 ; m8 = m6*q>>16 - punpckhqdq m2, m2 - pmulhw m13, m11, m2 ; m13 = m11*q>>16 - paddw m8, m6 ; m8 += m6 - paddw m13, m11 ; m13 += m11 - pmulhw m8, m4 ; m8 = m8*qsh>>16 - punpckhqdq m4, m4 - pmulhw m13, m4 ; m13 = m13*qsh>>16 - psignw m8, m9 ; m8 = reinsert sign - psignw m13, m10 ; m13 = reinsert sign - pand m8, m7 - pand m13, m12 - mova [qcoeffq+ncoeffq*2+ 0], m8 - mova [qcoeffq+ncoeffq*2+16], m13 -%ifidn %1, b_32x32 - pabsw m8, m8 - pabsw m13, m13 -%endif - pmullw m8, m3 ; dqc[i] = qc[i] * q - punpckhqdq m3, m3 - pmullw m13, m3 ; dqc[i] = qc[i] * q -%ifidn %1, b_32x32 - psrlw m8, 1 - psrlw m13, 1 - psignw m8, m9 - psignw m13, m10 -%endif - mova [dqcoeffq+ncoeffq*2+ 0], m8 - mova [dqcoeffq+ncoeffq*2+16], m13 - pcmpeqw m8, m5 ; m8 = c[i] == 0 - pcmpeqw m13, m5 ; m13 = c[i] == 0 - mova m6, [ iscanq+ncoeffq*2+ 0] ; m6 = scan[i] - mova m11, [ iscanq+ncoeffq*2+16] ; m11 = scan[i] - psubw m6, m7 ; m6 = scan[i] + 1 - psubw m11, m12 ; m11 = scan[i] + 1 - pandn m8, m6 ; m8 = max(eob) - pandn m13, m11 ; m13 = max(eob) - pmaxsw m8, m13 - add ncoeffq, mmsize - jz .accumulate_eob - -.ac_only_loop: - mova m9, [ coeffq+ncoeffq*2+ 0] ; m9 = c[i] - mova m10, [ coeffq+ncoeffq*2+16] ; m10 = c[i] - pabsw m6, m9 ; m6 = abs(m9) - pabsw m11, m10 ; m11 = abs(m10) - pcmpgtw m7, m6, m0 ; m7 = c[i] >= zbin - pcmpgtw m12, m11, m0 ; m12 = c[i] >= zbin -%ifidn %1, b_32x32 - pmovmskb r6, m7 - pmovmskb r2, m12 - or r6, r2 - jz .skip_iter -%endif - paddsw m6, m1 ; m6 += round - paddsw m11, m1 ; m11 += round - pmulhw m14, m6, m2 ; m14 = m6*q>>16 - pmulhw m13, m11, m2 ; m13 = m11*q>>16 - paddw m14, m6 ; m14 += m6 - paddw m13, m11 ; m13 += m11 - pmulhw m14, m4 ; m14 = m14*qsh>>16 - pmulhw m13, m4 ; m13 = m13*qsh>>16 - psignw m14, m9 ; m14 = reinsert sign - psignw m13, m10 ; m13 = reinsert sign - pand m14, m7 - pand m13, m12 - mova [qcoeffq+ncoeffq*2+ 0], m14 - mova [qcoeffq+ncoeffq*2+16], m13 -%ifidn %1, b_32x32 - pabsw m14, m14 - pabsw m13, m13 -%endif - pmullw m14, m3 ; dqc[i] = qc[i] * q - pmullw m13, m3 ; dqc[i] = qc[i] * q -%ifidn %1, b_32x32 - psrlw m14, 1 - psrlw m13, 1 - psignw m14, m9 - psignw m13, m10 -%endif - mova [dqcoeffq+ncoeffq*2+ 0], m14 - mova [dqcoeffq+ncoeffq*2+16], m13 - pcmpeqw m14, m5 ; m14 = c[i] == 0 - pcmpeqw m13, m5 ; m13 = c[i] == 0 - mova m6, [ iscanq+ncoeffq*2+ 0] ; m6 = scan[i] - mova m11, [ iscanq+ncoeffq*2+16] ; m11 = scan[i] - psubw m6, m7 ; m6 = scan[i] + 1 - psubw m11, m12 ; m11 = scan[i] + 1 - pandn m14, m6 ; m14 = max(eob) - pandn m13, m11 ; m13 = max(eob) - pmaxsw m8, m14 - pmaxsw m8, m13 - add ncoeffq, mmsize - jl .ac_only_loop - -%ifidn %1, b_32x32 - jmp .accumulate_eob -.skip_iter: - mova [qcoeffq+ncoeffq*2+ 0], m5 - mova [qcoeffq+ncoeffq*2+16], m5 - mova [dqcoeffq+ncoeffq*2+ 0], m5 - mova [dqcoeffq+ncoeffq*2+16], m5 - add ncoeffq, mmsize - jl .ac_only_loop -%endif - -.accumulate_eob: - ; horizontally accumulate/max eobs and write into [eob] memory pointer - mov r2, eobmp - pshufd m7, m8, 0xe - pmaxsw m8, m7 - pshuflw m7, m8, 0xe - pmaxsw m8, m7 - pshuflw m7, m8, 0x1 - pmaxsw m8, m7 - pextrw r6, m8, 0 - mov [r2], r6 - RET - - ; skip-block, i.e. just write all zeroes -.blank: - mov r0, dqcoeffmp - movifnidn ncoeffq, ncoeffmp - mov r2, qcoeffmp - mov r3, eobmp - DEFINE_ARGS dqcoeff, ncoeff, qcoeff, eob - lea dqcoeffq, [dqcoeffq+ncoeffq*2] - lea qcoeffq, [ qcoeffq+ncoeffq*2] - neg ncoeffq - pxor m7, m7 -.blank_loop: - mova [dqcoeffq+ncoeffq*2+ 0], m7 - mova [dqcoeffq+ncoeffq*2+16], m7 - mova [qcoeffq+ncoeffq*2+ 0], m7 - mova [qcoeffq+ncoeffq*2+16], m7 - add ncoeffq, mmsize - jl .blank_loop - mov word [eobq], 0 - RET -%endmacro - -INIT_XMM ssse3 -QUANTIZE_FN b, 7 -QUANTIZE_FN b_32x32, 7 - -%macro QUANTIZE_FP 2 -cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ - shift, qcoeff, dqcoeff, dequant, zbin_oq, \ - eob, scan, iscan - cmp dword skipm, 0 - jne .blank - - ; actual quantize loop - setup pointers, rounders, etc. - movifnidn coeffq, coeffmp - movifnidn ncoeffq, ncoeffmp - mov r2, dequantmp - movifnidn zbinq, zbinmp - movifnidn roundq, roundmp - movifnidn quantq, quantmp - mova m1, [roundq] ; m1 = round - mova m2, [quantq] ; m2 = quant -%ifidn %1, fp_32x32 - pcmpeqw m5, m5 - psrlw m5, 15 - paddw m1, m5 - psrlw m1, 1 ; m1 = (m1 + 1) / 2 -%endif - mova m3, [r2q] ; m3 = dequant - mov r3, qcoeffmp - mov r4, dqcoeffmp - mov r5, iscanmp -%ifidn %1, fp_32x32 - psllw m2, 1 -%endif - pxor m5, m5 ; m5 = dedicated zero - DEFINE_ARGS coeff, ncoeff, d1, qcoeff, dqcoeff, iscan, d2, d3, d4, d5, d6, eob - lea coeffq, [ coeffq+ncoeffq*2] - lea iscanq, [ iscanq+ncoeffq*2] - lea qcoeffq, [ qcoeffq+ncoeffq*2] - lea dqcoeffq, [dqcoeffq+ncoeffq*2] - neg ncoeffq - - ; get DC and first 15 AC coeffs - mova m9, [ coeffq+ncoeffq*2+ 0] ; m9 = c[i] - mova m10, [ coeffq+ncoeffq*2+16] ; m10 = c[i] - pabsw m6, m9 ; m6 = abs(m9) - pabsw m11, m10 ; m11 = abs(m10) - pcmpeqw m7, m7 - - paddsw m6, m1 ; m6 += round - punpckhqdq m1, m1 - paddsw m11, m1 ; m11 += round - pmulhw m8, m6, m2 ; m8 = m6*q>>16 - punpckhqdq m2, m2 - pmulhw m13, m11, m2 ; m13 = m11*q>>16 - psignw m8, m9 ; m8 = reinsert sign - psignw m13, m10 ; m13 = reinsert sign - mova [qcoeffq+ncoeffq*2+ 0], m8 - mova [qcoeffq+ncoeffq*2+16], m13 -%ifidn %1, fp_32x32 - pabsw m8, m8 - pabsw m13, m13 -%endif - pmullw m8, m3 ; dqc[i] = qc[i] * q - punpckhqdq m3, m3 - pmullw m13, m3 ; dqc[i] = qc[i] * q -%ifidn %1, fp_32x32 - psrlw m8, 1 - psrlw m13, 1 - psignw m8, m9 - psignw m13, m10 - psrlw m0, m3, 2 -%endif - mova [dqcoeffq+ncoeffq*2+ 0], m8 - mova [dqcoeffq+ncoeffq*2+16], m13 - pcmpeqw m8, m5 ; m8 = c[i] == 0 - pcmpeqw m13, m5 ; m13 = c[i] == 0 - mova m6, [ iscanq+ncoeffq*2+ 0] ; m6 = scan[i] - mova m11, [ iscanq+ncoeffq*2+16] ; m11 = scan[i] - psubw m6, m7 ; m6 = scan[i] + 1 - psubw m11, m7 ; m11 = scan[i] + 1 - pandn m8, m6 ; m8 = max(eob) - pandn m13, m11 ; m13 = max(eob) - pmaxsw m8, m13 - add ncoeffq, mmsize - jz .accumulate_eob - -.ac_only_loop: - mova m9, [ coeffq+ncoeffq*2+ 0] ; m9 = c[i] - mova m10, [ coeffq+ncoeffq*2+16] ; m10 = c[i] - pabsw m6, m9 ; m6 = abs(m9) - pabsw m11, m10 ; m11 = abs(m10) -%ifidn %1, fp_32x32 - pcmpgtw m7, m6, m0 - pcmpgtw m12, m11, m0 - pmovmskb r6, m7 - pmovmskb r2, m12 - - or r6, r2 - jz .skip_iter -%endif - pcmpeqw m7, m7 - - paddsw m6, m1 ; m6 += round - paddsw m11, m1 ; m11 += round - pmulhw m14, m6, m2 ; m14 = m6*q>>16 - pmulhw m13, m11, m2 ; m13 = m11*q>>16 - psignw m14, m9 ; m14 = reinsert sign - psignw m13, m10 ; m13 = reinsert sign - mova [qcoeffq+ncoeffq*2+ 0], m14 - mova [qcoeffq+ncoeffq*2+16], m13 -%ifidn %1, fp_32x32 - pabsw m14, m14 - pabsw m13, m13 -%endif - pmullw m14, m3 ; dqc[i] = qc[i] * q - pmullw m13, m3 ; dqc[i] = qc[i] * q -%ifidn %1, fp_32x32 - psrlw m14, 1 - psrlw m13, 1 - psignw m14, m9 - psignw m13, m10 -%endif - mova [dqcoeffq+ncoeffq*2+ 0], m14 - mova [dqcoeffq+ncoeffq*2+16], m13 - pcmpeqw m14, m5 ; m14 = c[i] == 0 - pcmpeqw m13, m5 ; m13 = c[i] == 0 - mova m6, [ iscanq+ncoeffq*2+ 0] ; m6 = scan[i] - mova m11, [ iscanq+ncoeffq*2+16] ; m11 = scan[i] - psubw m6, m7 ; m6 = scan[i] + 1 - psubw m11, m7 ; m11 = scan[i] + 1 - pandn m14, m6 ; m14 = max(eob) - pandn m13, m11 ; m13 = max(eob) - pmaxsw m8, m14 - pmaxsw m8, m13 - add ncoeffq, mmsize - jl .ac_only_loop - -%ifidn %1, fp_32x32 - jmp .accumulate_eob -.skip_iter: - mova [qcoeffq+ncoeffq*2+ 0], m5 - mova [qcoeffq+ncoeffq*2+16], m5 - mova [dqcoeffq+ncoeffq*2+ 0], m5 - mova [dqcoeffq+ncoeffq*2+16], m5 - add ncoeffq, mmsize - jl .ac_only_loop -%endif - -.accumulate_eob: - ; horizontally accumulate/max eobs and write into [eob] memory pointer - mov r2, eobmp - pshufd m7, m8, 0xe - pmaxsw m8, m7 - pshuflw m7, m8, 0xe - pmaxsw m8, m7 - pshuflw m7, m8, 0x1 - pmaxsw m8, m7 - pextrw r6, m8, 0 - mov [r2], r6 - RET - - ; skip-block, i.e. just write all zeroes -.blank: - mov r0, dqcoeffmp - movifnidn ncoeffq, ncoeffmp - mov r2, qcoeffmp - mov r3, eobmp - DEFINE_ARGS dqcoeff, ncoeff, qcoeff, eob - lea dqcoeffq, [dqcoeffq+ncoeffq*2] - lea qcoeffq, [ qcoeffq+ncoeffq*2] - neg ncoeffq - pxor m7, m7 -.blank_loop: - mova [dqcoeffq+ncoeffq*2+ 0], m7 - mova [dqcoeffq+ncoeffq*2+16], m7 - mova [qcoeffq+ncoeffq*2+ 0], m7 - mova [qcoeffq+ncoeffq*2+16], m7 - add ncoeffq, mmsize - jl .blank_loop - mov word [eobq], 0 - RET -%endmacro - -INIT_XMM ssse3 -QUANTIZE_FP fp, 7 -QUANTIZE_FP fp_32x32, 7 diff --git a/media/libvpx/vp9/encoder/x86/vp9_sad4d_intrin_avx2.c b/media/libvpx/vp9/encoder/x86/vp9_sad4d_intrin_avx2.c deleted file mode 100644 index 1feed62566b..00000000000 --- a/media/libvpx/vp9/encoder/x86/vp9_sad4d_intrin_avx2.c +++ /dev/null @@ -1,167 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ -#include // AVX2 -#include "vpx/vpx_integer.h" - -void vp9_sad32x32x4d_avx2(uint8_t *src, - int src_stride, - uint8_t *ref[4], - int ref_stride, - unsigned int res[4]) { - __m256i src_reg, ref0_reg, ref1_reg, ref2_reg, ref3_reg; - __m256i sum_ref0, sum_ref1, sum_ref2, sum_ref3; - __m256i sum_mlow, sum_mhigh; - int i; - uint8_t *ref0, *ref1, *ref2, *ref3; - - ref0 = ref[0]; - ref1 = ref[1]; - ref2 = ref[2]; - ref3 = ref[3]; - sum_ref0 = _mm256_set1_epi16(0); - sum_ref1 = _mm256_set1_epi16(0); - sum_ref2 = _mm256_set1_epi16(0); - sum_ref3 = _mm256_set1_epi16(0); - for (i = 0; i < 32 ; i++) { - // load src and all refs - src_reg = _mm256_loadu_si256((__m256i *)(src)); - ref0_reg = _mm256_loadu_si256((__m256i *) (ref0)); - ref1_reg = _mm256_loadu_si256((__m256i *) (ref1)); - ref2_reg = _mm256_loadu_si256((__m256i *) (ref2)); - ref3_reg = _mm256_loadu_si256((__m256i *) (ref3)); - // sum of the absolute differences between every ref-i to src - ref0_reg = _mm256_sad_epu8(ref0_reg, src_reg); - ref1_reg = _mm256_sad_epu8(ref1_reg, src_reg); - ref2_reg = _mm256_sad_epu8(ref2_reg, src_reg); - ref3_reg = _mm256_sad_epu8(ref3_reg, src_reg); - // sum every ref-i - sum_ref0 = _mm256_add_epi32(sum_ref0, ref0_reg); - sum_ref1 = _mm256_add_epi32(sum_ref1, ref1_reg); - sum_ref2 = _mm256_add_epi32(sum_ref2, ref2_reg); - sum_ref3 = _mm256_add_epi32(sum_ref3, ref3_reg); - - src+= src_stride; - ref0+= ref_stride; - ref1+= ref_stride; - ref2+= ref_stride; - ref3+= ref_stride; - } - { - __m128i sum; - // in sum_ref-i the result is saved in the first 4 bytes - // the other 4 bytes are zeroed. - // sum_ref1 and sum_ref3 are shifted left by 4 bytes - sum_ref1 = _mm256_slli_si256(sum_ref1, 4); - sum_ref3 = _mm256_slli_si256(sum_ref3, 4); - - // merge sum_ref0 and sum_ref1 also sum_ref2 and sum_ref3 - sum_ref0 = _mm256_or_si256(sum_ref0, sum_ref1); - sum_ref2 = _mm256_or_si256(sum_ref2, sum_ref3); - - // merge every 64 bit from each sum_ref-i - sum_mlow = _mm256_unpacklo_epi64(sum_ref0, sum_ref2); - sum_mhigh = _mm256_unpackhi_epi64(sum_ref0, sum_ref2); - - // add the low 64 bit to the high 64 bit - sum_mlow = _mm256_add_epi32(sum_mlow, sum_mhigh); - - // add the low 128 bit to the high 128 bit - sum = _mm_add_epi32(_mm256_castsi256_si128(sum_mlow), - _mm256_extractf128_si256(sum_mlow, 1)); - - _mm_storeu_si128((__m128i *)(res), sum); - } -} - -void vp9_sad64x64x4d_avx2(uint8_t *src, - int src_stride, - uint8_t *ref[4], - int ref_stride, - unsigned int res[4]) { - __m256i src_reg, srcnext_reg, ref0_reg, ref0next_reg; - __m256i ref1_reg, ref1next_reg, ref2_reg, ref2next_reg; - __m256i ref3_reg, ref3next_reg; - __m256i sum_ref0, sum_ref1, sum_ref2, sum_ref3; - __m256i sum_mlow, sum_mhigh; - int i; - uint8_t *ref0, *ref1, *ref2, *ref3; - - ref0 = ref[0]; - ref1 = ref[1]; - ref2 = ref[2]; - ref3 = ref[3]; - sum_ref0 = _mm256_set1_epi16(0); - sum_ref1 = _mm256_set1_epi16(0); - sum_ref2 = _mm256_set1_epi16(0); - sum_ref3 = _mm256_set1_epi16(0); - for (i = 0; i < 64 ; i++) { - // load 64 bytes from src and all refs - src_reg = _mm256_loadu_si256((__m256i *)(src)); - srcnext_reg = _mm256_loadu_si256((__m256i *)(src + 32)); - ref0_reg = _mm256_loadu_si256((__m256i *) (ref0)); - ref0next_reg = _mm256_loadu_si256((__m256i *) (ref0 + 32)); - ref1_reg = _mm256_loadu_si256((__m256i *) (ref1)); - ref1next_reg = _mm256_loadu_si256((__m256i *) (ref1 + 32)); - ref2_reg = _mm256_loadu_si256((__m256i *) (ref2)); - ref2next_reg = _mm256_loadu_si256((__m256i *) (ref2 + 32)); - ref3_reg = _mm256_loadu_si256((__m256i *) (ref3)); - ref3next_reg = _mm256_loadu_si256((__m256i *) (ref3 + 32)); - // sum of the absolute differences between every ref-i to src - ref0_reg = _mm256_sad_epu8(ref0_reg, src_reg); - ref1_reg = _mm256_sad_epu8(ref1_reg, src_reg); - ref2_reg = _mm256_sad_epu8(ref2_reg, src_reg); - ref3_reg = _mm256_sad_epu8(ref3_reg, src_reg); - ref0next_reg = _mm256_sad_epu8(ref0next_reg, srcnext_reg); - ref1next_reg = _mm256_sad_epu8(ref1next_reg, srcnext_reg); - ref2next_reg = _mm256_sad_epu8(ref2next_reg, srcnext_reg); - ref3next_reg = _mm256_sad_epu8(ref3next_reg, srcnext_reg); - - // sum every ref-i - sum_ref0 = _mm256_add_epi32(sum_ref0, ref0_reg); - sum_ref1 = _mm256_add_epi32(sum_ref1, ref1_reg); - sum_ref2 = _mm256_add_epi32(sum_ref2, ref2_reg); - sum_ref3 = _mm256_add_epi32(sum_ref3, ref3_reg); - sum_ref0 = _mm256_add_epi32(sum_ref0, ref0next_reg); - sum_ref1 = _mm256_add_epi32(sum_ref1, ref1next_reg); - sum_ref2 = _mm256_add_epi32(sum_ref2, ref2next_reg); - sum_ref3 = _mm256_add_epi32(sum_ref3, ref3next_reg); - src+= src_stride; - ref0+= ref_stride; - ref1+= ref_stride; - ref2+= ref_stride; - ref3+= ref_stride; - } - { - __m128i sum; - - // in sum_ref-i the result is saved in the first 4 bytes - // the other 4 bytes are zeroed. - // sum_ref1 and sum_ref3 are shifted left by 4 bytes - sum_ref1 = _mm256_slli_si256(sum_ref1, 4); - sum_ref3 = _mm256_slli_si256(sum_ref3, 4); - - // merge sum_ref0 and sum_ref1 also sum_ref2 and sum_ref3 - sum_ref0 = _mm256_or_si256(sum_ref0, sum_ref1); - sum_ref2 = _mm256_or_si256(sum_ref2, sum_ref3); - - // merge every 64 bit from each sum_ref-i - sum_mlow = _mm256_unpacklo_epi64(sum_ref0, sum_ref2); - sum_mhigh = _mm256_unpackhi_epi64(sum_ref0, sum_ref2); - - // add the low 64 bit to the high 64 bit - sum_mlow = _mm256_add_epi32(sum_mlow, sum_mhigh); - - // add the low 128 bit to the high 128 bit - sum = _mm_add_epi32(_mm256_castsi256_si128(sum_mlow), - _mm256_extractf128_si256(sum_mlow, 1)); - - _mm_storeu_si128((__m128i *)(res), sum); - } -} diff --git a/media/libvpx/vp9/encoder/x86/vp9_ssim_opt_x86_64.asm b/media/libvpx/vp9/encoder/x86/vp9_ssim_opt_x86_64.asm deleted file mode 100644 index 455d10d2c8a..00000000000 --- a/media/libvpx/vp9/encoder/x86/vp9_ssim_opt_x86_64.asm +++ /dev/null @@ -1,216 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - -%include "vpx_ports/x86_abi_support.asm" - -; tabulate_ssim - sums sum_s,sum_r,sum_sq_s,sum_sq_r, sum_sxr -%macro TABULATE_SSIM 0 - paddusw xmm15, xmm3 ; sum_s - paddusw xmm14, xmm4 ; sum_r - movdqa xmm1, xmm3 - pmaddwd xmm1, xmm1 - paddd xmm13, xmm1 ; sum_sq_s - movdqa xmm2, xmm4 - pmaddwd xmm2, xmm2 - paddd xmm12, xmm2 ; sum_sq_r - pmaddwd xmm3, xmm4 - paddd xmm11, xmm3 ; sum_sxr -%endmacro - -; Sum across the register %1 starting with q words -%macro SUM_ACROSS_Q 1 - movdqa xmm2,%1 - punpckldq %1,xmm0 - punpckhdq xmm2,xmm0 - paddq %1,xmm2 - movdqa xmm2,%1 - punpcklqdq %1,xmm0 - punpckhqdq xmm2,xmm0 - paddq %1,xmm2 -%endmacro - -; Sum across the register %1 starting with q words -%macro SUM_ACROSS_W 1 - movdqa xmm1, %1 - punpcklwd %1,xmm0 - punpckhwd xmm1,xmm0 - paddd %1, xmm1 - SUM_ACROSS_Q %1 -%endmacro -;void ssim_parms_sse2( -; unsigned char *s, -; int sp, -; unsigned char *r, -; int rp -; unsigned long *sum_s, -; unsigned long *sum_r, -; unsigned long *sum_sq_s, -; unsigned long *sum_sq_r, -; unsigned long *sum_sxr); -; -; TODO: Use parm passing through structure, probably don't need the pxors -; ( calling app will initialize to 0 ) could easily fit everything in sse2 -; without too much hastle, and can probably do better estimates with psadw -; or pavgb At this point this is just meant to be first pass for calculating -; all the parms needed for 16x16 ssim so we can play with dssim as distortion -; in mode selection code. -global sym(vp9_ssim_parms_16x16_sse2) PRIVATE -sym(vp9_ssim_parms_16x16_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 9 - SAVE_XMM 15 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;s - mov rcx, arg(1) ;sp - mov rdi, arg(2) ;r - mov rax, arg(3) ;rp - - pxor xmm0, xmm0 - pxor xmm15,xmm15 ;sum_s - pxor xmm14,xmm14 ;sum_r - pxor xmm13,xmm13 ;sum_sq_s - pxor xmm12,xmm12 ;sum_sq_r - pxor xmm11,xmm11 ;sum_sxr - - mov rdx, 16 ;row counter -.NextRow: - - ;grab source and reference pixels - movdqu xmm5, [rsi] - movdqu xmm6, [rdi] - movdqa xmm3, xmm5 - movdqa xmm4, xmm6 - punpckhbw xmm3, xmm0 ; high_s - punpckhbw xmm4, xmm0 ; high_r - - TABULATE_SSIM - - movdqa xmm3, xmm5 - movdqa xmm4, xmm6 - punpcklbw xmm3, xmm0 ; low_s - punpcklbw xmm4, xmm0 ; low_r - - TABULATE_SSIM - - add rsi, rcx ; next s row - add rdi, rax ; next r row - - dec rdx ; counter - jnz .NextRow - - SUM_ACROSS_W xmm15 - SUM_ACROSS_W xmm14 - SUM_ACROSS_Q xmm13 - SUM_ACROSS_Q xmm12 - SUM_ACROSS_Q xmm11 - - mov rdi,arg(4) - movd [rdi], xmm15; - mov rdi,arg(5) - movd [rdi], xmm14; - mov rdi,arg(6) - movd [rdi], xmm13; - mov rdi,arg(7) - movd [rdi], xmm12; - mov rdi,arg(8) - movd [rdi], xmm11; - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -;void ssim_parms_sse2( -; unsigned char *s, -; int sp, -; unsigned char *r, -; int rp -; unsigned long *sum_s, -; unsigned long *sum_r, -; unsigned long *sum_sq_s, -; unsigned long *sum_sq_r, -; unsigned long *sum_sxr); -; -; TODO: Use parm passing through structure, probably don't need the pxors -; ( calling app will initialize to 0 ) could easily fit everything in sse2 -; without too much hastle, and can probably do better estimates with psadw -; or pavgb At this point this is just meant to be first pass for calculating -; all the parms needed for 16x16 ssim so we can play with dssim as distortion -; in mode selection code. -global sym(vp9_ssim_parms_8x8_sse2) PRIVATE -sym(vp9_ssim_parms_8x8_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 9 - SAVE_XMM 15 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;s - mov rcx, arg(1) ;sp - mov rdi, arg(2) ;r - mov rax, arg(3) ;rp - - pxor xmm0, xmm0 - pxor xmm15,xmm15 ;sum_s - pxor xmm14,xmm14 ;sum_r - pxor xmm13,xmm13 ;sum_sq_s - pxor xmm12,xmm12 ;sum_sq_r - pxor xmm11,xmm11 ;sum_sxr - - mov rdx, 8 ;row counter -.NextRow: - - ;grab source and reference pixels - movq xmm3, [rsi] - movq xmm4, [rdi] - punpcklbw xmm3, xmm0 ; low_s - punpcklbw xmm4, xmm0 ; low_r - - TABULATE_SSIM - - add rsi, rcx ; next s row - add rdi, rax ; next r row - - dec rdx ; counter - jnz .NextRow - - SUM_ACROSS_W xmm15 - SUM_ACROSS_W xmm14 - SUM_ACROSS_Q xmm13 - SUM_ACROSS_Q xmm12 - SUM_ACROSS_Q xmm11 - - mov rdi,arg(4) - movd [rdi], xmm15; - mov rdi,arg(5) - movd [rdi], xmm14; - mov rdi,arg(6) - movd [rdi], xmm13; - mov rdi,arg(7) - movd [rdi], xmm12; - mov rdi,arg(8) - movd [rdi], xmm11; - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret diff --git a/media/libvpx/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c b/media/libvpx/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c deleted file mode 100644 index a441cadaf70..00000000000 --- a/media/libvpx/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c +++ /dev/null @@ -1,539 +0,0 @@ -/* - * Copyright (c) 2012 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include // AVX2 -#include "vpx_ports/mem.h" -#include "vp9/encoder/vp9_variance.h" - -DECLARE_ALIGNED(32, static const uint8_t, bilinear_filters_avx2[512]) = { - 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, - 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, - 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, - 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, - 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, - 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, - 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, - 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, - 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, - 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, - 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, - 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, - 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, - 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, - 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, - 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, - 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, - 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, - 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, - 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, - 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, - 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, - 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, - 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, - 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, - 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, - 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, - 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, - 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15 -}; - -#define FILTER_SRC(filter) \ - /* filter the source */ \ - exp_src_lo = _mm256_maddubs_epi16(exp_src_lo, filter); \ - exp_src_hi = _mm256_maddubs_epi16(exp_src_hi, filter); \ - \ - /* add 8 to source */ \ - exp_src_lo = _mm256_add_epi16(exp_src_lo, pw8); \ - exp_src_hi = _mm256_add_epi16(exp_src_hi, pw8); \ - \ - /* divide source by 16 */ \ - exp_src_lo = _mm256_srai_epi16(exp_src_lo, 4); \ - exp_src_hi = _mm256_srai_epi16(exp_src_hi, 4); - -#define MERGE_WITH_SRC(src_reg, reg) \ - exp_src_lo = _mm256_unpacklo_epi8(src_reg, reg); \ - exp_src_hi = _mm256_unpackhi_epi8(src_reg, reg); - -#define LOAD_SRC_DST \ - /* load source and destination */ \ - src_reg = _mm256_loadu_si256((__m256i const *) (src)); \ - dst_reg = _mm256_loadu_si256((__m256i const *) (dst)); - -#define AVG_NEXT_SRC(src_reg, size_stride) \ - src_next_reg = _mm256_loadu_si256((__m256i const *) \ - (src + size_stride)); \ - /* average between current and next stride source */ \ - src_reg = _mm256_avg_epu8(src_reg, src_next_reg); - -#define MERGE_NEXT_SRC(src_reg, size_stride) \ - src_next_reg = _mm256_loadu_si256((__m256i const *) \ - (src + size_stride)); \ - MERGE_WITH_SRC(src_reg, src_next_reg) - -#define CALC_SUM_SSE_INSIDE_LOOP \ - /* expand each byte to 2 bytes */ \ - exp_dst_lo = _mm256_unpacklo_epi8(dst_reg, zero_reg); \ - exp_dst_hi = _mm256_unpackhi_epi8(dst_reg, zero_reg); \ - /* source - dest */ \ - exp_src_lo = _mm256_sub_epi16(exp_src_lo, exp_dst_lo); \ - exp_src_hi = _mm256_sub_epi16(exp_src_hi, exp_dst_hi); \ - /* caculate sum */ \ - sum_reg = _mm256_add_epi16(sum_reg, exp_src_lo); \ - exp_src_lo = _mm256_madd_epi16(exp_src_lo, exp_src_lo); \ - sum_reg = _mm256_add_epi16(sum_reg, exp_src_hi); \ - exp_src_hi = _mm256_madd_epi16(exp_src_hi, exp_src_hi); \ - /* calculate sse */ \ - sse_reg = _mm256_add_epi32(sse_reg, exp_src_lo); \ - sse_reg = _mm256_add_epi32(sse_reg, exp_src_hi); - -// final calculation to sum and sse -#define CALC_SUM_AND_SSE \ - res_cmp = _mm256_cmpgt_epi16(zero_reg, sum_reg); \ - sse_reg_hi = _mm256_srli_si256(sse_reg, 8); \ - sum_reg_lo = _mm256_unpacklo_epi16(sum_reg, res_cmp); \ - sum_reg_hi = _mm256_unpackhi_epi16(sum_reg, res_cmp); \ - sse_reg = _mm256_add_epi32(sse_reg, sse_reg_hi); \ - sum_reg = _mm256_add_epi32(sum_reg_lo, sum_reg_hi); \ - \ - sse_reg_hi = _mm256_srli_si256(sse_reg, 4); \ - sum_reg_hi = _mm256_srli_si256(sum_reg, 8); \ - \ - sse_reg = _mm256_add_epi32(sse_reg, sse_reg_hi); \ - sum_reg = _mm256_add_epi32(sum_reg, sum_reg_hi); \ - *((int*)sse)= _mm_cvtsi128_si32(_mm256_castsi256_si128(sse_reg)) + \ - _mm_cvtsi128_si32(_mm256_extractf128_si256(sse_reg, 1)); \ - sum_reg_hi = _mm256_srli_si256(sum_reg, 4); \ - sum_reg = _mm256_add_epi32(sum_reg, sum_reg_hi); \ - sum = _mm_cvtsi128_si32(_mm256_castsi256_si128(sum_reg)) + \ - _mm_cvtsi128_si32(_mm256_extractf128_si256(sum_reg, 1)); - - -unsigned int vp9_sub_pixel_variance32xh_avx2(const uint8_t *src, - int src_stride, - int x_offset, - int y_offset, - const uint8_t *dst, - int dst_stride, - int height, - unsigned int *sse) { - __m256i src_reg, dst_reg, exp_src_lo, exp_src_hi, exp_dst_lo, exp_dst_hi; - __m256i sse_reg, sum_reg, sse_reg_hi, res_cmp, sum_reg_lo, sum_reg_hi; - __m256i zero_reg; - int i, sum; - sum_reg = _mm256_set1_epi16(0); - sse_reg = _mm256_set1_epi16(0); - zero_reg = _mm256_set1_epi16(0); - - // x_offset = 0 and y_offset = 0 - if (x_offset == 0) { - if (y_offset == 0) { - for (i = 0; i < height ; i++) { - LOAD_SRC_DST - // expend each byte to 2 bytes - MERGE_WITH_SRC(src_reg, zero_reg) - CALC_SUM_SSE_INSIDE_LOOP - src+= src_stride; - dst+= dst_stride; - } - // x_offset = 0 and y_offset = 8 - } else if (y_offset == 8) { - __m256i src_next_reg; - for (i = 0; i < height ; i++) { - LOAD_SRC_DST - AVG_NEXT_SRC(src_reg, src_stride) - // expend each byte to 2 bytes - MERGE_WITH_SRC(src_reg, zero_reg) - CALC_SUM_SSE_INSIDE_LOOP - src+= src_stride; - dst+= dst_stride; - } - // x_offset = 0 and y_offset = bilin interpolation - } else { - __m256i filter, pw8, src_next_reg; - - y_offset <<= 5; - filter = _mm256_load_si256((__m256i const *) - (bilinear_filters_avx2 + y_offset)); - pw8 = _mm256_set1_epi16(8); - for (i = 0; i < height ; i++) { - LOAD_SRC_DST - MERGE_NEXT_SRC(src_reg, src_stride) - FILTER_SRC(filter) - CALC_SUM_SSE_INSIDE_LOOP - src+= src_stride; - dst+= dst_stride; - } - } - // x_offset = 8 and y_offset = 0 - } else if (x_offset == 8) { - if (y_offset == 0) { - __m256i src_next_reg; - for (i = 0; i < height ; i++) { - LOAD_SRC_DST - AVG_NEXT_SRC(src_reg, 1) - // expand each byte to 2 bytes - MERGE_WITH_SRC(src_reg, zero_reg) - CALC_SUM_SSE_INSIDE_LOOP - src+= src_stride; - dst+= dst_stride; - } - // x_offset = 8 and y_offset = 8 - } else if (y_offset == 8) { - __m256i src_next_reg, src_avg; - // load source and another source starting from the next - // following byte - src_reg = _mm256_loadu_si256((__m256i const *) (src)); - AVG_NEXT_SRC(src_reg, 1) - for (i = 0; i < height ; i++) { - src_avg = src_reg; - src+= src_stride; - LOAD_SRC_DST - AVG_NEXT_SRC(src_reg, 1) - // average between previous average to current average - src_avg = _mm256_avg_epu8(src_avg, src_reg); - // expand each byte to 2 bytes - MERGE_WITH_SRC(src_avg, zero_reg) - // save current source average - CALC_SUM_SSE_INSIDE_LOOP - dst+= dst_stride; - } - // x_offset = 8 and y_offset = bilin interpolation - } else { - __m256i filter, pw8, src_next_reg, src_avg; - y_offset <<= 5; - filter = _mm256_load_si256((__m256i const *) - (bilinear_filters_avx2 + y_offset)); - pw8 = _mm256_set1_epi16(8); - // load source and another source starting from the next - // following byte - src_reg = _mm256_loadu_si256((__m256i const *) (src)); - AVG_NEXT_SRC(src_reg, 1) - for (i = 0; i < height ; i++) { - // save current source average - src_avg = src_reg; - src+= src_stride; - LOAD_SRC_DST - AVG_NEXT_SRC(src_reg, 1) - MERGE_WITH_SRC(src_avg, src_reg) - FILTER_SRC(filter) - CALC_SUM_SSE_INSIDE_LOOP - dst+= dst_stride; - } - } - // x_offset = bilin interpolation and y_offset = 0 - } else { - if (y_offset == 0) { - __m256i filter, pw8, src_next_reg; - x_offset <<= 5; - filter = _mm256_load_si256((__m256i const *) - (bilinear_filters_avx2 + x_offset)); - pw8 = _mm256_set1_epi16(8); - for (i = 0; i < height ; i++) { - LOAD_SRC_DST - MERGE_NEXT_SRC(src_reg, 1) - FILTER_SRC(filter) - CALC_SUM_SSE_INSIDE_LOOP - src+= src_stride; - dst+= dst_stride; - } - // x_offset = bilin interpolation and y_offset = 8 - } else if (y_offset == 8) { - __m256i filter, pw8, src_next_reg, src_pack; - x_offset <<= 5; - filter = _mm256_load_si256((__m256i const *) - (bilinear_filters_avx2 + x_offset)); - pw8 = _mm256_set1_epi16(8); - src_reg = _mm256_loadu_si256((__m256i const *) (src)); - MERGE_NEXT_SRC(src_reg, 1) - FILTER_SRC(filter) - // convert each 16 bit to 8 bit to each low and high lane source - src_pack = _mm256_packus_epi16(exp_src_lo, exp_src_hi); - for (i = 0; i < height ; i++) { - src+= src_stride; - LOAD_SRC_DST - MERGE_NEXT_SRC(src_reg, 1) - FILTER_SRC(filter) - src_reg = _mm256_packus_epi16(exp_src_lo, exp_src_hi); - // average between previous pack to the current - src_pack = _mm256_avg_epu8(src_pack, src_reg); - MERGE_WITH_SRC(src_pack, zero_reg) - CALC_SUM_SSE_INSIDE_LOOP - src_pack = src_reg; - dst+= dst_stride; - } - // x_offset = bilin interpolation and y_offset = bilin interpolation - } else { - __m256i xfilter, yfilter, pw8, src_next_reg, src_pack; - x_offset <<= 5; - xfilter = _mm256_load_si256((__m256i const *) - (bilinear_filters_avx2 + x_offset)); - y_offset <<= 5; - yfilter = _mm256_load_si256((__m256i const *) - (bilinear_filters_avx2 + y_offset)); - pw8 = _mm256_set1_epi16(8); - // load source and another source starting from the next - // following byte - src_reg = _mm256_loadu_si256((__m256i const *) (src)); - MERGE_NEXT_SRC(src_reg, 1) - - FILTER_SRC(xfilter) - // convert each 16 bit to 8 bit to each low and high lane source - src_pack = _mm256_packus_epi16(exp_src_lo, exp_src_hi); - for (i = 0; i < height ; i++) { - src+= src_stride; - LOAD_SRC_DST - MERGE_NEXT_SRC(src_reg, 1) - FILTER_SRC(xfilter) - src_reg = _mm256_packus_epi16(exp_src_lo, exp_src_hi); - // merge previous pack to current pack source - MERGE_WITH_SRC(src_pack, src_reg) - // filter the source - FILTER_SRC(yfilter) - src_pack = src_reg; - CALC_SUM_SSE_INSIDE_LOOP - dst+= dst_stride; - } - } - } - CALC_SUM_AND_SSE - return sum; -} - -unsigned int vp9_sub_pixel_avg_variance32xh_avx2(const uint8_t *src, - int src_stride, - int x_offset, - int y_offset, - const uint8_t *dst, - int dst_stride, - const uint8_t *sec, - int sec_stride, - int height, - unsigned int *sse) { - __m256i sec_reg; - __m256i src_reg, dst_reg, exp_src_lo, exp_src_hi, exp_dst_lo, exp_dst_hi; - __m256i sse_reg, sum_reg, sse_reg_hi, res_cmp, sum_reg_lo, sum_reg_hi; - __m256i zero_reg; - int i, sum; - sum_reg = _mm256_set1_epi16(0); - sse_reg = _mm256_set1_epi16(0); - zero_reg = _mm256_set1_epi16(0); - - // x_offset = 0 and y_offset = 0 - if (x_offset == 0) { - if (y_offset == 0) { - for (i = 0; i < height ; i++) { - LOAD_SRC_DST - sec_reg = _mm256_loadu_si256((__m256i const *) (sec)); - src_reg = _mm256_avg_epu8(src_reg, sec_reg); - sec+= sec_stride; - // expend each byte to 2 bytes - MERGE_WITH_SRC(src_reg, zero_reg) - CALC_SUM_SSE_INSIDE_LOOP - src+= src_stride; - dst+= dst_stride; - } - } else if (y_offset == 8) { - __m256i src_next_reg; - for (i = 0; i < height ; i++) { - LOAD_SRC_DST - AVG_NEXT_SRC(src_reg, src_stride) - sec_reg = _mm256_loadu_si256((__m256i const *) (sec)); - src_reg = _mm256_avg_epu8(src_reg, sec_reg); - sec+= sec_stride; - // expend each byte to 2 bytes - MERGE_WITH_SRC(src_reg, zero_reg) - CALC_SUM_SSE_INSIDE_LOOP - src+= src_stride; - dst+= dst_stride; - } - // x_offset = 0 and y_offset = bilin interpolation - } else { - __m256i filter, pw8, src_next_reg; - - y_offset <<= 5; - filter = _mm256_load_si256((__m256i const *) - (bilinear_filters_avx2 + y_offset)); - pw8 = _mm256_set1_epi16(8); - for (i = 0; i < height ; i++) { - LOAD_SRC_DST - MERGE_NEXT_SRC(src_reg, src_stride) - FILTER_SRC(filter) - src_reg = _mm256_packus_epi16(exp_src_lo, exp_src_hi); - sec_reg = _mm256_loadu_si256((__m256i const *) (sec)); - src_reg = _mm256_avg_epu8(src_reg, sec_reg); - sec+= sec_stride; - MERGE_WITH_SRC(src_reg, zero_reg) - CALC_SUM_SSE_INSIDE_LOOP - src+= src_stride; - dst+= dst_stride; - } - } - // x_offset = 8 and y_offset = 0 - } else if (x_offset == 8) { - if (y_offset == 0) { - __m256i src_next_reg; - for (i = 0; i < height ; i++) { - LOAD_SRC_DST - AVG_NEXT_SRC(src_reg, 1) - sec_reg = _mm256_loadu_si256((__m256i const *) (sec)); - src_reg = _mm256_avg_epu8(src_reg, sec_reg); - sec+= sec_stride; - // expand each byte to 2 bytes - MERGE_WITH_SRC(src_reg, zero_reg) - CALC_SUM_SSE_INSIDE_LOOP - src+= src_stride; - dst+= dst_stride; - } - // x_offset = 8 and y_offset = 8 - } else if (y_offset == 8) { - __m256i src_next_reg, src_avg; - // load source and another source starting from the next - // following byte - src_reg = _mm256_loadu_si256((__m256i const *) (src)); - AVG_NEXT_SRC(src_reg, 1) - for (i = 0; i < height ; i++) { - // save current source average - src_avg = src_reg; - src+= src_stride; - LOAD_SRC_DST - AVG_NEXT_SRC(src_reg, 1) - // average between previous average to current average - src_avg = _mm256_avg_epu8(src_avg, src_reg); - sec_reg = _mm256_loadu_si256((__m256i const *) (sec)); - src_avg = _mm256_avg_epu8(src_avg, sec_reg); - sec+= sec_stride; - // expand each byte to 2 bytes - MERGE_WITH_SRC(src_avg, zero_reg) - CALC_SUM_SSE_INSIDE_LOOP - dst+= dst_stride; - } - // x_offset = 8 and y_offset = bilin interpolation - } else { - __m256i filter, pw8, src_next_reg, src_avg; - y_offset <<= 5; - filter = _mm256_load_si256((__m256i const *) - (bilinear_filters_avx2 + y_offset)); - pw8 = _mm256_set1_epi16(8); - // load source and another source starting from the next - // following byte - src_reg = _mm256_loadu_si256((__m256i const *) (src)); - AVG_NEXT_SRC(src_reg, 1) - for (i = 0; i < height ; i++) { - // save current source average - src_avg = src_reg; - src+= src_stride; - LOAD_SRC_DST - AVG_NEXT_SRC(src_reg, 1) - MERGE_WITH_SRC(src_avg, src_reg) - FILTER_SRC(filter) - src_avg = _mm256_packus_epi16(exp_src_lo, exp_src_hi); - sec_reg = _mm256_loadu_si256((__m256i const *) (sec)); - src_avg = _mm256_avg_epu8(src_avg, sec_reg); - // expand each byte to 2 bytes - MERGE_WITH_SRC(src_avg, zero_reg) - sec+= sec_stride; - CALC_SUM_SSE_INSIDE_LOOP - dst+= dst_stride; - } - } - // x_offset = bilin interpolation and y_offset = 0 - } else { - if (y_offset == 0) { - __m256i filter, pw8, src_next_reg; - x_offset <<= 5; - filter = _mm256_load_si256((__m256i const *) - (bilinear_filters_avx2 + x_offset)); - pw8 = _mm256_set1_epi16(8); - for (i = 0; i < height ; i++) { - LOAD_SRC_DST - MERGE_NEXT_SRC(src_reg, 1) - FILTER_SRC(filter) - src_reg = _mm256_packus_epi16(exp_src_lo, exp_src_hi); - sec_reg = _mm256_loadu_si256((__m256i const *) (sec)); - src_reg = _mm256_avg_epu8(src_reg, sec_reg); - MERGE_WITH_SRC(src_reg, zero_reg) - sec+= sec_stride; - CALC_SUM_SSE_INSIDE_LOOP - src+= src_stride; - dst+= dst_stride; - } - // x_offset = bilin interpolation and y_offset = 8 - } else if (y_offset == 8) { - __m256i filter, pw8, src_next_reg, src_pack; - x_offset <<= 5; - filter = _mm256_load_si256((__m256i const *) - (bilinear_filters_avx2 + x_offset)); - pw8 = _mm256_set1_epi16(8); - src_reg = _mm256_loadu_si256((__m256i const *) (src)); - MERGE_NEXT_SRC(src_reg, 1) - FILTER_SRC(filter) - // convert each 16 bit to 8 bit to each low and high lane source - src_pack = _mm256_packus_epi16(exp_src_lo, exp_src_hi); - for (i = 0; i < height ; i++) { - src+= src_stride; - LOAD_SRC_DST - MERGE_NEXT_SRC(src_reg, 1) - FILTER_SRC(filter) - src_reg = _mm256_packus_epi16(exp_src_lo, exp_src_hi); - // average between previous pack to the current - src_pack = _mm256_avg_epu8(src_pack, src_reg); - sec_reg = _mm256_loadu_si256((__m256i const *) (sec)); - src_pack = _mm256_avg_epu8(src_pack, sec_reg); - sec+= sec_stride; - MERGE_WITH_SRC(src_pack, zero_reg) - src_pack = src_reg; - CALC_SUM_SSE_INSIDE_LOOP - dst+= dst_stride; - } - // x_offset = bilin interpolation and y_offset = bilin interpolation - } else { - __m256i xfilter, yfilter, pw8, src_next_reg, src_pack; - x_offset <<= 5; - xfilter = _mm256_load_si256((__m256i const *) - (bilinear_filters_avx2 + x_offset)); - y_offset <<= 5; - yfilter = _mm256_load_si256((__m256i const *) - (bilinear_filters_avx2 + y_offset)); - pw8 = _mm256_set1_epi16(8); - // load source and another source starting from the next - // following byte - src_reg = _mm256_loadu_si256((__m256i const *) (src)); - MERGE_NEXT_SRC(src_reg, 1) - - FILTER_SRC(xfilter) - // convert each 16 bit to 8 bit to each low and high lane source - src_pack = _mm256_packus_epi16(exp_src_lo, exp_src_hi); - for (i = 0; i < height ; i++) { - src+= src_stride; - LOAD_SRC_DST - MERGE_NEXT_SRC(src_reg, 1) - FILTER_SRC(xfilter) - src_reg = _mm256_packus_epi16(exp_src_lo, exp_src_hi); - // merge previous pack to current pack source - MERGE_WITH_SRC(src_pack, src_reg) - // filter the source - FILTER_SRC(yfilter) - src_pack = _mm256_packus_epi16(exp_src_lo, exp_src_hi); - sec_reg = _mm256_loadu_si256((__m256i const *) (sec)); - src_pack = _mm256_avg_epu8(src_pack, sec_reg); - MERGE_WITH_SRC(src_pack, zero_reg) - src_pack = src_reg; - sec+= sec_stride; - CALC_SUM_SSE_INSIDE_LOOP - dst+= dst_stride; - } - } - } - CALC_SUM_AND_SSE - return sum; -} diff --git a/media/libvpx/vp9/encoder/x86/vp9_variance_avx2.c b/media/libvpx/vp9/encoder/x86/vp9_variance_avx2.c deleted file mode 100644 index ea09b959e12..00000000000 --- a/media/libvpx/vp9/encoder/x86/vp9_variance_avx2.c +++ /dev/null @@ -1,190 +0,0 @@ -/* - * Copyright (c) 2012 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ -#include "./vpx_config.h" - -#include "vp9/encoder/vp9_variance.h" -#include "vpx_ports/mem.h" - -typedef void (*get_var_avx2)(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, - unsigned int *sse, int *sum); - -void vp9_get16x16var_avx2(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, - unsigned int *sse, int *sum); - -void vp9_get32x32var_avx2(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, - unsigned int *sse, int *sum); - -unsigned int vp9_sub_pixel_variance32xh_avx2(const uint8_t *src, int src_stride, - int x_offset, int y_offset, - const uint8_t *dst, int dst_stride, - int height, - unsigned int *sse); - -unsigned int vp9_sub_pixel_avg_variance32xh_avx2(const uint8_t *src, - int src_stride, - int x_offset, - int y_offset, - const uint8_t *dst, - int dst_stride, - const uint8_t *sec, - int sec_stride, - int height, - unsigned int *sseptr); - -static void variance_avx2(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, - int w, int h, unsigned int *sse, int *sum, - get_var_avx2 var_fn, int block_size) { - int i, j; - - *sse = 0; - *sum = 0; - - for (i = 0; i < h; i += 16) { - for (j = 0; j < w; j += block_size) { - unsigned int sse0; - int sum0; - var_fn(&src[src_stride * i + j], src_stride, - &ref[ref_stride * i + j], ref_stride, &sse0, &sum0); - *sse += sse0; - *sum += sum0; - } - } -} - - -unsigned int vp9_variance16x16_avx2(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, - unsigned int *sse) { - int sum; - variance_avx2(src, src_stride, ref, ref_stride, 16, 16, - sse, &sum, vp9_get16x16var_avx2, 16); - return *sse - (((unsigned int)sum * sum) >> 8); -} - -unsigned int vp9_mse16x16_avx2(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, - unsigned int *sse) { - int sum; - vp9_get16x16var_avx2(src, src_stride, ref, ref_stride, sse, &sum); - return *sse; -} - -unsigned int vp9_variance32x16_avx2(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, - unsigned int *sse) { - int sum; - variance_avx2(src, src_stride, ref, ref_stride, 32, 16, - sse, &sum, vp9_get32x32var_avx2, 32); - return *sse - (((int64_t)sum * sum) >> 9); -} - -unsigned int vp9_variance32x32_avx2(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, - unsigned int *sse) { - int sum; - variance_avx2(src, src_stride, ref, ref_stride, 32, 32, - sse, &sum, vp9_get32x32var_avx2, 32); - return *sse - (((int64_t)sum * sum) >> 10); -} - -unsigned int vp9_variance64x64_avx2(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, - unsigned int *sse) { - int sum; - variance_avx2(src, src_stride, ref, ref_stride, 64, 64, - sse, &sum, vp9_get32x32var_avx2, 32); - return *sse - (((int64_t)sum * sum) >> 12); -} - -unsigned int vp9_variance64x32_avx2(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, - unsigned int *sse) { - int sum; - variance_avx2(src, src_stride, ref, ref_stride, 64, 32, - sse, &sum, vp9_get32x32var_avx2, 32); - return *sse - (((int64_t)sum * sum) >> 11); -} - -unsigned int vp9_sub_pixel_variance64x64_avx2(const uint8_t *src, - int src_stride, - int x_offset, - int y_offset, - const uint8_t *dst, - int dst_stride, - unsigned int *sse) { - unsigned int sse1; - const int se1 = vp9_sub_pixel_variance32xh_avx2(src, src_stride, x_offset, - y_offset, dst, dst_stride, - 64, &sse1); - unsigned int sse2; - const int se2 = vp9_sub_pixel_variance32xh_avx2(src + 32, src_stride, - x_offset, y_offset, - dst + 32, dst_stride, - 64, &sse2); - const int se = se1 + se2; - *sse = sse1 + sse2; - return *sse - (((int64_t)se * se) >> 12); -} - -unsigned int vp9_sub_pixel_variance32x32_avx2(const uint8_t *src, - int src_stride, - int x_offset, - int y_offset, - const uint8_t *dst, - int dst_stride, - unsigned int *sse) { - const int se = vp9_sub_pixel_variance32xh_avx2(src, src_stride, x_offset, - y_offset, dst, dst_stride, - 32, sse); - return *sse - (((int64_t)se * se) >> 10); -} - -unsigned int vp9_sub_pixel_avg_variance64x64_avx2(const uint8_t *src, - int src_stride, - int x_offset, - int y_offset, - const uint8_t *dst, - int dst_stride, - unsigned int *sse, - const uint8_t *sec) { - unsigned int sse1; - const int se1 = vp9_sub_pixel_avg_variance32xh_avx2(src, src_stride, x_offset, - y_offset, dst, dst_stride, - sec, 64, 64, &sse1); - unsigned int sse2; - const int se2 = - vp9_sub_pixel_avg_variance32xh_avx2(src + 32, src_stride, x_offset, - y_offset, dst + 32, dst_stride, - sec + 32, 64, 64, &sse2); - const int se = se1 + se2; - - *sse = sse1 + sse2; - - return *sse - (((int64_t)se * se) >> 12); -} - -unsigned int vp9_sub_pixel_avg_variance32x32_avx2(const uint8_t *src, - int src_stride, - int x_offset, - int y_offset, - const uint8_t *dst, - int dst_stride, - unsigned int *sse, - const uint8_t *sec) { - // processing 32 element in parallel - const int se = vp9_sub_pixel_avg_variance32xh_avx2(src, src_stride, x_offset, - y_offset, dst, dst_stride, - sec, 32, 32, sse); - return *sse - (((int64_t)se * se) >> 10); -} diff --git a/media/libvpx/vp9/encoder/x86/vp9_variance_impl_intrin_avx2.c b/media/libvpx/vp9/encoder/x86/vp9_variance_impl_intrin_avx2.c deleted file mode 100644 index f9923280a34..00000000000 --- a/media/libvpx/vp9/encoder/x86/vp9_variance_impl_intrin_avx2.c +++ /dev/null @@ -1,213 +0,0 @@ -/* - * Copyright (c) 2012 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include // AVX2 - -void vp9_get16x16var_avx2(const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *SSE, - int *Sum) { - __m256i src, src_expand_low, src_expand_high, ref, ref_expand_low; - __m256i ref_expand_high, madd_low, madd_high; - unsigned int i, src_2strides, ref_2strides; - __m256i zero_reg = _mm256_set1_epi16(0); - __m256i sum_ref_src = _mm256_set1_epi16(0); - __m256i madd_ref_src = _mm256_set1_epi16(0); - - // processing two strides in a 256 bit register reducing the number - // of loop stride by half (comparing to the sse2 code) - src_2strides = source_stride << 1; - ref_2strides = recon_stride << 1; - for (i = 0; i < 8; i++) { - src = _mm256_castsi128_si256( - _mm_loadu_si128((__m128i const *) (src_ptr))); - src = _mm256_inserti128_si256(src, - _mm_loadu_si128((__m128i const *)(src_ptr+source_stride)), 1); - - ref =_mm256_castsi128_si256( - _mm_loadu_si128((__m128i const *) (ref_ptr))); - ref = _mm256_inserti128_si256(ref, - _mm_loadu_si128((__m128i const *)(ref_ptr+recon_stride)), 1); - - // expanding to 16 bit each lane - src_expand_low = _mm256_unpacklo_epi8(src, zero_reg); - src_expand_high = _mm256_unpackhi_epi8(src, zero_reg); - - ref_expand_low = _mm256_unpacklo_epi8(ref, zero_reg); - ref_expand_high = _mm256_unpackhi_epi8(ref, zero_reg); - - // src-ref - src_expand_low = _mm256_sub_epi16(src_expand_low, ref_expand_low); - src_expand_high = _mm256_sub_epi16(src_expand_high, ref_expand_high); - - // madd low (src - ref) - madd_low = _mm256_madd_epi16(src_expand_low, src_expand_low); - - // add high to low - src_expand_low = _mm256_add_epi16(src_expand_low, src_expand_high); - - // madd high (src - ref) - madd_high = _mm256_madd_epi16(src_expand_high, src_expand_high); - - sum_ref_src = _mm256_add_epi16(sum_ref_src, src_expand_low); - - // add high to low - madd_ref_src = _mm256_add_epi32(madd_ref_src, - _mm256_add_epi32(madd_low, madd_high)); - - src_ptr+= src_2strides; - ref_ptr+= ref_2strides; - } - - { - __m128i sum_res, madd_res; - __m128i expand_sum_low, expand_sum_high, expand_sum; - __m128i expand_madd_low, expand_madd_high, expand_madd; - __m128i ex_expand_sum_low, ex_expand_sum_high, ex_expand_sum; - - // extract the low lane and add it to the high lane - sum_res = _mm_add_epi16(_mm256_castsi256_si128(sum_ref_src), - _mm256_extractf128_si256(sum_ref_src, 1)); - - madd_res = _mm_add_epi32(_mm256_castsi256_si128(madd_ref_src), - _mm256_extractf128_si256(madd_ref_src, 1)); - - // padding each 2 bytes with another 2 zeroed bytes - expand_sum_low = _mm_unpacklo_epi16(_mm256_castsi256_si128(zero_reg), - sum_res); - expand_sum_high = _mm_unpackhi_epi16(_mm256_castsi256_si128(zero_reg), - sum_res); - - // shifting the sign 16 bits right - expand_sum_low = _mm_srai_epi32(expand_sum_low, 16); - expand_sum_high = _mm_srai_epi32(expand_sum_high, 16); - - expand_sum = _mm_add_epi32(expand_sum_low, expand_sum_high); - - // expand each 32 bits of the madd result to 64 bits - expand_madd_low = _mm_unpacklo_epi32(madd_res, - _mm256_castsi256_si128(zero_reg)); - expand_madd_high = _mm_unpackhi_epi32(madd_res, - _mm256_castsi256_si128(zero_reg)); - - expand_madd = _mm_add_epi32(expand_madd_low, expand_madd_high); - - ex_expand_sum_low = _mm_unpacklo_epi32(expand_sum, - _mm256_castsi256_si128(zero_reg)); - ex_expand_sum_high = _mm_unpackhi_epi32(expand_sum, - _mm256_castsi256_si128(zero_reg)); - - ex_expand_sum = _mm_add_epi32(ex_expand_sum_low, ex_expand_sum_high); - - // shift 8 bytes eight - madd_res = _mm_srli_si128(expand_madd, 8); - sum_res = _mm_srli_si128(ex_expand_sum, 8); - - madd_res = _mm_add_epi32(madd_res, expand_madd); - sum_res = _mm_add_epi32(sum_res, ex_expand_sum); - - *((int*)SSE)= _mm_cvtsi128_si32(madd_res); - - *((int*)Sum)= _mm_cvtsi128_si32(sum_res); - } -} - -void vp9_get32x32var_avx2(const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *SSE, - int *Sum) { - __m256i src, src_expand_low, src_expand_high, ref, ref_expand_low; - __m256i ref_expand_high, madd_low, madd_high; - unsigned int i; - __m256i zero_reg = _mm256_set1_epi16(0); - __m256i sum_ref_src = _mm256_set1_epi16(0); - __m256i madd_ref_src = _mm256_set1_epi16(0); - - // processing 32 elements in parallel - for (i = 0; i < 16; i++) { - src = _mm256_loadu_si256((__m256i const *) (src_ptr)); - - ref = _mm256_loadu_si256((__m256i const *) (ref_ptr)); - - // expanding to 16 bit each lane - src_expand_low = _mm256_unpacklo_epi8(src, zero_reg); - src_expand_high = _mm256_unpackhi_epi8(src, zero_reg); - - ref_expand_low = _mm256_unpacklo_epi8(ref, zero_reg); - ref_expand_high = _mm256_unpackhi_epi8(ref, zero_reg); - - // src-ref - src_expand_low = _mm256_sub_epi16(src_expand_low, ref_expand_low); - src_expand_high = _mm256_sub_epi16(src_expand_high, ref_expand_high); - - // madd low (src - ref) - madd_low = _mm256_madd_epi16(src_expand_low, src_expand_low); - - // add high to low - src_expand_low = _mm256_add_epi16(src_expand_low, src_expand_high); - - // madd high (src - ref) - madd_high = _mm256_madd_epi16(src_expand_high, src_expand_high); - - sum_ref_src = _mm256_add_epi16(sum_ref_src, src_expand_low); - - // add high to low - madd_ref_src = _mm256_add_epi32(madd_ref_src, - _mm256_add_epi32(madd_low, madd_high)); - - src_ptr+= source_stride; - ref_ptr+= recon_stride; - } - - { - __m256i expand_sum_low, expand_sum_high, expand_sum; - __m256i expand_madd_low, expand_madd_high, expand_madd; - __m256i ex_expand_sum_low, ex_expand_sum_high, ex_expand_sum; - - // padding each 2 bytes with another 2 zeroed bytes - expand_sum_low = _mm256_unpacklo_epi16(zero_reg, sum_ref_src); - expand_sum_high = _mm256_unpackhi_epi16(zero_reg, sum_ref_src); - - // shifting the sign 16 bits right - expand_sum_low = _mm256_srai_epi32(expand_sum_low, 16); - expand_sum_high = _mm256_srai_epi32(expand_sum_high, 16); - - expand_sum = _mm256_add_epi32(expand_sum_low, expand_sum_high); - - // expand each 32 bits of the madd result to 64 bits - expand_madd_low = _mm256_unpacklo_epi32(madd_ref_src, zero_reg); - expand_madd_high = _mm256_unpackhi_epi32(madd_ref_src, zero_reg); - - expand_madd = _mm256_add_epi32(expand_madd_low, expand_madd_high); - - ex_expand_sum_low = _mm256_unpacklo_epi32(expand_sum, zero_reg); - ex_expand_sum_high = _mm256_unpackhi_epi32(expand_sum, zero_reg); - - ex_expand_sum = _mm256_add_epi32(ex_expand_sum_low, ex_expand_sum_high); - - // shift 8 bytes eight - madd_ref_src = _mm256_srli_si256(expand_madd, 8); - sum_ref_src = _mm256_srli_si256(ex_expand_sum, 8); - - madd_ref_src = _mm256_add_epi32(madd_ref_src, expand_madd); - sum_ref_src = _mm256_add_epi32(sum_ref_src, ex_expand_sum); - - // extract the low lane and the high lane and add the results - *((int*)SSE)= _mm_cvtsi128_si32(_mm256_castsi256_si128(madd_ref_src)) + - _mm_cvtsi128_si32(_mm256_extractf128_si256(madd_ref_src, 1)); - - *((int*)Sum)= _mm_cvtsi128_si32(_mm256_castsi256_si128(sum_ref_src)) + - _mm_cvtsi128_si32(_mm256_extractf128_si256(sum_ref_src, 1)); - } -} diff --git a/media/libvpx/vpx/internal/vpx_psnr.h b/media/libvpx/vpx/internal/vpx_psnr.h deleted file mode 100644 index 07d81bb8d90..00000000000 --- a/media/libvpx/vpx/internal/vpx_psnr.h +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VPX_INTERNAL_VPX_PSNR_H_ -#define VPX_INTERNAL_VPX_PSNR_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -// TODO(dkovalev) change vpx_sse_to_psnr signature: double -> int64_t - -/*!\brief Converts SSE to PSNR - * - * Converts sum of squared errros (SSE) to peak signal-to-noise ratio (PNSR). - * - * \param[in] samples Number of samples - * \param[in] peak Max sample value - * \param[in] sse Sum of squared errors - */ -double vpx_sse_to_psnr(double samples, double peak, double sse); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VPX_INTERNAL_VPX_PSNR_H_ diff --git a/media/libvpx/vpx/src/vpx_psnr.c b/media/libvpx/vpx/src/vpx_psnr.c deleted file mode 100644 index 05843acb61f..00000000000 --- a/media/libvpx/vpx/src/vpx_psnr.c +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include - -#include "vpx/internal/vpx_psnr.h" - -#define MAX_PSNR 100.0 - -double vpx_sse_to_psnr(double samples, double peak, double sse) { - if (sse > 0.0) { - const double psnr = 10.0 * log10(samples * peak * peak / sse); - return psnr > MAX_PSNR ? MAX_PSNR : psnr; - } else { - return MAX_PSNR; - } -} diff --git a/media/libvpx/vpx/vpx_frame_buffer.h b/media/libvpx/vpx/vpx_frame_buffer.h deleted file mode 100644 index 41038b10df6..00000000000 --- a/media/libvpx/vpx/vpx_frame_buffer.h +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VPX_VPX_FRAME_BUFFER_H_ -#define VPX_VPX_FRAME_BUFFER_H_ - -/*!\file - * \brief Describes the decoder external frame buffer interface. - */ - -#ifdef __cplusplus -extern "C" { -#endif - -#include "./vpx_integer.h" - -/*!\brief The maximum number of work buffers used by libvpx. - */ -#define VPX_MAXIMUM_WORK_BUFFERS 1 - -/*!\brief The maximum number of reference buffers that a VP9 encoder may use. - */ -#define VP9_MAXIMUM_REF_BUFFERS 8 - -/*!\brief External frame buffer - * - * This structure holds allocated frame buffers used by the decoder. - */ -typedef struct vpx_codec_frame_buffer { - uint8_t *data; /**< Pointer to the data buffer */ - size_t size; /**< Size of data in bytes */ - void *priv; /**< Frame's private data */ -} vpx_codec_frame_buffer_t; - -/*!\brief get frame buffer callback prototype - * - * This callback is invoked by the decoder to retrieve data for the frame - * buffer in order for the decode call to complete. The callback must - * allocate at least min_size in bytes and assign it to fb->data. The callback - * must zero out all the data allocated. Then the callback must set fb->size - * to the allocated size. The application does not need to align the allocated - * data. The callback is triggered when the decoder needs a frame buffer to - * decode a compressed image into. This function may be called more than once - * for every call to vpx_codec_decode. The application may set fb->priv to - * some data which will be passed back in the ximage and the release function - * call. |fb| is guaranteed to not be NULL. On success the callback must - * return 0. Any failure the callback must return a value less than 0. - * - * \param[in] priv Callback's private data - * \param[in] new_size Size in bytes needed by the buffer - * \param[in,out] fb Pointer to vpx_codec_frame_buffer_t - */ -typedef int (*vpx_get_frame_buffer_cb_fn_t)( - void *priv, size_t min_size, vpx_codec_frame_buffer_t *fb); - -/*!\brief release frame buffer callback prototype - * - * This callback is invoked by the decoder when the frame buffer is not - * referenced by any other buffers. |fb| is guaranteed to not be NULL. On - * success the callback must return 0. Any failure the callback must return - * a value less than 0. - * - * \param[in] priv Callback's private data - * \param[in] fb Pointer to vpx_codec_frame_buffer_t - */ -typedef int (*vpx_release_frame_buffer_cb_fn_t)( - void *priv, vpx_codec_frame_buffer_t *fb); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VPX_VPX_FRAME_BUFFER_H_ From de132743edecd140f3039c679790acdeb00426f0 Mon Sep 17 00:00:00 2001 From: Ralph Giles Date: Wed, 1 Oct 2014 17:28:03 -0700 Subject: [PATCH 023/146] Bug 1063327 - Reject vp9 frames with invalid tiles. r=kinetik --- media/libvpx/vp9/decoder/vp9_decodframe.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/media/libvpx/vp9/decoder/vp9_decodframe.c b/media/libvpx/vp9/decoder/vp9_decodframe.c index 3c4781bde32..d20593a1119 100644 --- a/media/libvpx/vp9/decoder/vp9_decodframe.c +++ b/media/libvpx/vp9/decoder/vp9_decodframe.c @@ -868,6 +868,11 @@ static size_t get_tile(const uint8_t *const data_end, size = read_be32(*data); *data += 4; + + if (size > data_end - *data) { + vpx_internal_error(error_info, VPX_CODEC_CORRUPT_FRAME, + "Truncated packet or corrupt tile size"); + } } else { size = data_end - *data; } From a8aeeace85750b24480c4095100ba57710f51fdc Mon Sep 17 00:00:00 2001 From: Xidorn Quan Date: Thu, 2 Oct 2014 14:29:04 -0400 Subject: [PATCH 024/146] Bug 1075336 - Fix lifetime management of CounterStyle. r=dbaron --HG-- extra : rebase_source : c9fe6b023024c11147372cd4a9d7f25fdfaabed8 --- layout/style/CounterStyleManager.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/layout/style/CounterStyleManager.cpp b/layout/style/CounterStyleManager.cpp index cb905331ea6..9f42c3617e3 100644 --- a/layout/style/CounterStyleManager.cpp +++ b/layout/style/CounterStyleManager.cpp @@ -2038,11 +2038,6 @@ InvalidateOldStyle(const nsSubstring& aKey, static_cast(aStyle.get()); if (style->GetRule() != newRule) { toBeRemoved = true; - // Since |style| is being removed from mCacheTable, it won't be visited - // by our post-removal InvalidateDependentData() traversal. So, we have - // to give it a manual ResetDependentData() call. (This only really - // matters if something else is holding a reference & keeping it alive.) - style->ResetDependentData(); } else if (style->GetRuleGeneration() != newRule->GetGeneration()) { toBeUpdated = true; style->ResetCachedData(); @@ -2052,6 +2047,13 @@ InvalidateOldStyle(const nsSubstring& aKey, data->mChanged = data->mChanged || toBeUpdated || toBeRemoved; if (toBeRemoved) { if (aStyle->IsDependentStyle()) { + if (aStyle->IsCustomStyle()) { + // Since |aStyle| is being removed from mCacheTable, it won't be visited + // by our post-removal InvalidateDependentData() traversal. So, we have + // to give it a manual ResetDependentData() call. (This only really + // matters if something else is holding a reference & keeping it alive.) + static_cast(aStyle.get())->ResetDependentData(); + } // The object has to be held here so that it will not be released // before all pointers that refer to it are reset. It will be // released when the MarkAndCleanData goes out of scope at the end From 7b6f220caade484becbf64e1682ad628323b70f1 Mon Sep 17 00:00:00 2001 From: Eric Rahm Date: Thu, 2 Oct 2014 11:33:37 -0700 Subject: [PATCH 025/146] Bug 1074415 - Build error in nsNPAPIPluginInstance when enabling PR_LOGGING in non-debug builds. r=johns --- dom/plugins/base/nsNPAPIPluginInstance.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dom/plugins/base/nsNPAPIPluginInstance.cpp b/dom/plugins/base/nsNPAPIPluginInstance.cpp index eb70a3c8378..094635e422e 100644 --- a/dom/plugins/base/nsNPAPIPluginInstance.cpp +++ b/dom/plugins/base/nsNPAPIPluginInstance.cpp @@ -558,7 +558,11 @@ nsresult nsNPAPIPluginInstance::SetWindow(NPWindow* window) NPPAutoPusher nppPusher(&mNPP); +#ifndef PR_LOGGING DebugOnly error; +#else + NPError error; +#endif NS_TRY_SAFE_CALL_RETURN(error, (*pluginFunctions->setwindow)(&mNPP, (NPWindow*)window), this, NS_PLUGIN_CALL_UNSAFE_TO_REENTER_GECKO); From 6c4b49217d86936ebaf404de9a3a2677f01fd306 Mon Sep 17 00:00:00 2001 From: Joel Maher Date: Thu, 2 Oct 2014 14:36:54 -0400 Subject: [PATCH 026/146] Bug 1076990 - update talos.json on tip to capture mainthreadio and other talos cleanup. r=wlach --- testing/talos/talos.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/testing/talos/talos.json b/testing/talos/talos.json index 7a62fb6d592..9111d75118d 100644 --- a/testing/talos/talos.json +++ b/testing/talos/talos.json @@ -1,11 +1,11 @@ { "talos.zip": { - "url": "http://talos-bundles.pvt.build.mozilla.org/zips/talos.49b74c08dad4.zip", + "url": "http://talos-bundles.pvt.build.mozilla.org/zips/talos.a8ce24a66add.zip", "path": "" }, "global": { "talos_repo": "https://hg.mozilla.org/build/talos", - "talos_revision": "5e18c3e56875" + "talos_revision": "a8ce24a66add" }, "suites": { "chromez": { From 0064bea8704357ed68626a4845522a597e452f13 Mon Sep 17 00:00:00 2001 From: Richard Barnes Date: Thu, 2 Oct 2014 14:47:19 -0400 Subject: [PATCH 027/146] Bug 1074001 - Expose WebCrypto by default (remove pref dom.webcrypto.enabled) r=bz --- modules/libpref/init/all.js | 4 ---- 1 file changed, 4 deletions(-) diff --git a/modules/libpref/init/all.js b/modules/libpref/init/all.js index 1688fd90d75..9edd814f0a2 100644 --- a/modules/libpref/init/all.js +++ b/modules/libpref/init/all.js @@ -153,11 +153,7 @@ pref("dom.keyboardevent.code.enabled", true); #endif // Whether the WebCrypto API is enabled -#ifdef RELEASE_BUILD -pref("dom.webcrypto.enabled", false); -#else pref("dom.webcrypto.enabled", true); -#endif // Whether the UndoManager API is enabled pref("dom.undo_manager.enabled", false); From d047384ad3273a0efb13a6b5d1e565d14543d108 Mon Sep 17 00:00:00 2001 From: Ryan VanderMeulen Date: Thu, 2 Oct 2014 15:14:28 -0400 Subject: [PATCH 028/146] Backed out changeset fd8e58a613da (bug 1074415) for Linux Werror bustage. CLOSED TREE --- dom/plugins/base/nsNPAPIPluginInstance.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/dom/plugins/base/nsNPAPIPluginInstance.cpp b/dom/plugins/base/nsNPAPIPluginInstance.cpp index 094635e422e..eb70a3c8378 100644 --- a/dom/plugins/base/nsNPAPIPluginInstance.cpp +++ b/dom/plugins/base/nsNPAPIPluginInstance.cpp @@ -558,11 +558,7 @@ nsresult nsNPAPIPluginInstance::SetWindow(NPWindow* window) NPPAutoPusher nppPusher(&mNPP); -#ifndef PR_LOGGING DebugOnly error; -#else - NPError error; -#endif NS_TRY_SAFE_CALL_RETURN(error, (*pluginFunctions->setwindow)(&mNPP, (NPWindow*)window), this, NS_PLUGIN_CALL_UNSAFE_TO_REENTER_GECKO); From e5793f09bd1a20bb659caab3103da03147e8afc2 Mon Sep 17 00:00:00 2001 From: Ryan VanderMeulen Date: Thu, 2 Oct 2014 15:53:21 -0400 Subject: [PATCH 029/146] Backed out 3 changesets (bug 1076129, bug 1003448) for frequent xpcshell crashes on a CLOSED TREE. Backed out changeset 3034162ee435 (bug 1003448) Backed out changeset 086fe4b0ba14 (bug 1003448) Backed out changeset 1babd65ebec7 (bug 1076129) --- modules/libpref/init/all.js | 10 - netwerk/base/public/nsISpeculativeConnect.idl | 7 +- netwerk/base/src/Predictor.cpp | 7 - netwerk/base/src/nsSocketTransport2.cpp | 4 +- netwerk/protocol/http/AlternateServices.cpp | 452 ------------------ netwerk/protocol/http/AlternateServices.h | 128 ----- netwerk/protocol/http/Http2Session.cpp | 221 +-------- netwerk/protocol/http/Http2Session.h | 24 +- netwerk/protocol/http/NullHttpTransaction.cpp | 4 +- netwerk/protocol/http/NullHttpTransaction.h | 8 +- netwerk/protocol/http/SpdySession3.cpp | 30 +- netwerk/protocol/http/SpdySession31.cpp | 31 +- netwerk/protocol/http/moz.build | 1 - netwerk/protocol/http/nsAHttpConnection.h | 9 - netwerk/protocol/http/nsAHttpTransaction.h | 2 +- netwerk/protocol/http/nsHttp.cpp | 122 ----- netwerk/protocol/http/nsHttp.h | 51 -- netwerk/protocol/http/nsHttpAtomList.h | 2 - netwerk/protocol/http/nsHttpChannel.cpp | 211 +------- netwerk/protocol/http/nsHttpChannel.h | 3 - netwerk/protocol/http/nsHttpConnection.cpp | 69 +-- netwerk/protocol/http/nsHttpConnection.h | 2 - .../protocol/http/nsHttpConnectionInfo.cpp | 98 +--- netwerk/protocol/http/nsHttpConnectionInfo.h | 40 +- netwerk/protocol/http/nsHttpConnectionMgr.cpp | 73 +-- netwerk/protocol/http/nsHttpConnectionMgr.h | 13 +- netwerk/protocol/http/nsHttpHandler.cpp | 34 +- netwerk/protocol/http/nsHttpHandler.h | 21 - netwerk/protocol/http/nsHttpRequestHead.cpp | 12 - netwerk/protocol/http/nsHttpRequestHead.h | 4 - netwerk/protocol/http/nsHttpTransaction.cpp | 28 +- netwerk/protocol/http/nsHttpTransaction.h | 1 - netwerk/socket/nsISSLSocketControl.idl | 28 +- netwerk/test/unit/test_http2.js | 55 --- .../ssl/src/SSLServerCertVerification.cpp | 12 +- security/manager/ssl/src/nsNSSIOLayer.cpp | 92 +--- security/manager/ssl/src/nsNSSIOLayer.h | 18 - toolkit/components/telemetry/Histograms.json | 10 - 38 files changed, 148 insertions(+), 1789 deletions(-) delete mode 100644 netwerk/protocol/http/AlternateServices.cpp delete mode 100644 netwerk/protocol/http/AlternateServices.h diff --git a/modules/libpref/init/all.js b/modules/libpref/init/all.js index 9edd814f0a2..4300b439fb8 100644 --- a/modules/libpref/init/all.js +++ b/modules/libpref/init/all.js @@ -1242,16 +1242,6 @@ pref("network.http.spdy.send-buffer-size", 131072); pref("network.http.spdy.allow-push", true); pref("network.http.spdy.push-allowance", 131072); -// alt-svc allows separation of transport routing from -// the origin host without using a proxy. -#ifdef RELEASE_BUILD -pref("network.http.altsvc.enabled", false); -pref("network.http.altsvc.oe", false); -#else -pref("network.http.altsvc.enabled", true); -pref("network.http.altsvc.oe", true); -#endif - pref("network.http.diagnostics", false); pref("network.http.pacing.requests.enabled", true); diff --git a/netwerk/base/public/nsISpeculativeConnect.idl b/netwerk/base/public/nsISpeculativeConnect.idl index 4ef559d6337..aabe54278db 100644 --- a/netwerk/base/public/nsISpeculativeConnect.idl +++ b/netwerk/base/public/nsISpeculativeConnect.idl @@ -35,7 +35,7 @@ interface nsISpeculativeConnect : nsISupports * inline) to determine whether or not to actually make a speculative * connection. */ -[builtinclass, uuid(f6a0d1e5-369f-4abc-81ae-d370d36e4006)] +[builtinclass, uuid(a9cdd875-2ef8-4d53-95d6-e4e18f65e0db)] interface nsISpeculativeConnectionOverrider : nsISupports { /** @@ -63,9 +63,4 @@ interface nsISpeculativeConnectionOverrider : nsISupports * usage. */ [infallible] readonly attribute boolean isFromPredictor; - - /** - * by default speculative connections are not made to RFC 1918 addresses - */ - [infallible] readonly attribute boolean allow1918; }; diff --git a/netwerk/base/src/Predictor.cpp b/netwerk/base/src/Predictor.cpp index e827bd5e875..08904e7ad0a 100644 --- a/netwerk/base/src/Predictor.cpp +++ b/netwerk/base/src/Predictor.cpp @@ -395,13 +395,6 @@ Predictor::GetIsFromPredictor(bool *isFromPredictor) return NS_OK; } -NS_IMETHODIMP -Predictor::GetAllow1918(bool *allow1918) -{ - *allow1918 = false; - return NS_OK; -} - // Predictor::nsIInterfaceRequestor NS_IMETHODIMP diff --git a/netwerk/base/src/nsSocketTransport2.cpp b/netwerk/base/src/nsSocketTransport2.cpp index 3b149ecca55..19233f554e0 100644 --- a/netwerk/base/src/nsSocketTransport2.cpp +++ b/netwerk/base/src/nsSocketTransport2.cpp @@ -1239,9 +1239,7 @@ nsSocketTransport::InitiateSocket() netAddrCString.get())); } #endif - mCondition = NS_ERROR_CONNECTION_REFUSED; - OnSocketDetached(nullptr); - return mCondition; + return NS_ERROR_CONNECTION_REFUSED; } // diff --git a/netwerk/protocol/http/AlternateServices.cpp b/netwerk/protocol/http/AlternateServices.cpp deleted file mode 100644 index 542568e9145..00000000000 --- a/netwerk/protocol/http/AlternateServices.cpp +++ /dev/null @@ -1,452 +0,0 @@ -/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* vim: set sw=2 ts=8 et tw=80 : */ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -#include "HttpLog.h" - -#include "AlternateServices.h" -#include "nsHttpConnectionInfo.h" -#include "nsHttpHandler.h" -#include "nsThreadUtils.h" -#include "NullHttpTransaction.h" -#include "nsISSLStatusProvider.h" -#include "nsISSLStatus.h" -#include "nsISSLSocketControl.h" - -namespace mozilla { -namespace net { - -AltSvcMapping::AltSvcMapping(const nsACString &originScheme, - const nsACString &originHost, - int32_t originPort, - const nsACString &username, - bool privateBrowsing, - uint32_t expiresAt, - const nsACString &alternateHost, - int32_t alternatePort, - const nsACString &npnToken) - : mAlternateHost(alternateHost) - , mAlternatePort(alternatePort) - , mOriginHost(originHost) - , mOriginPort(originPort) - , mUsername(username) - , mPrivate(privateBrowsing) - , mExpiresAt(expiresAt) - , mValidated(false) - , mRunning(false) - , mNPNToken(npnToken) -{ - mHttps = originScheme.Equals("https"); - - if (mAlternatePort == -1) { - mAlternatePort = mHttps ? NS_HTTPS_DEFAULT_PORT : NS_HTTP_DEFAULT_PORT; - } - if (mOriginPort == -1) { - mOriginPort = mHttps ? NS_HTTPS_DEFAULT_PORT : NS_HTTP_DEFAULT_PORT; - } - - LOG(("AltSvcMapping ctor %p %s://%s:%d to %s:%d\n", this, - nsCString(originScheme).get(), mOriginHost.get(), mOriginPort, - mAlternateHost.get(), mAlternatePort)); - - if (mAlternateHost.IsEmpty()) { - mAlternateHost = mOriginHost; - } - MakeHashKey(mHashKey, originScheme, mOriginHost, mOriginPort, mPrivate); -} - -void -AltSvcMapping::MakeHashKey(nsCString &outKey, - const nsACString &originScheme, - const nsACString &originHost, - int32_t originPort, - bool privateBrowsing) -{ - if (originPort == -1) { - bool isHttps = originScheme.Equals("https"); - originPort = isHttps ? NS_HTTPS_DEFAULT_PORT : NS_HTTP_DEFAULT_PORT; - } - - outKey.Append(originScheme); - outKey.Append(':'); - outKey.Append(originHost); - outKey.Append(':'); - outKey.AppendInt(originPort); - outKey.Append(':'); - outKey.Append(privateBrowsing ? 'P' : '.'); -} - -int32_t -AltSvcMapping::TTL() -{ - return mExpiresAt - NowInSeconds(); -} - -void -AltSvcMapping::SetExpired() -{ - mExpiresAt = NowInSeconds() - 1; -} - -bool -AltSvcMapping::RouteEquals(AltSvcMapping *map) -{ - MOZ_ASSERT(map->mHashKey.Equals(mHashKey)); - return mAlternateHost.Equals(map->mAlternateHost) && - (mAlternatePort == map->mAlternatePort) && - mNPNToken.Equals(map->mNPNToken); - - return false; -} - -void -AltSvcMapping::GetConnectionInfo(nsHttpConnectionInfo **outCI, - nsProxyInfo *pi) -{ - nsRefPtr ci = - new nsHttpConnectionInfo(mAlternateHost, mAlternatePort, mNPNToken, - mUsername, pi, mOriginHost, mOriginPort); - if (!mHttps) { - ci->SetRelaxed(true); - } - ci->SetPrivate(mPrivate); - ci.forget(outCI); -} - -// This is the asynchronous null transaction used to validate -// an alt-svc advertisement -class AltSvcTransaction MOZ_FINAL : public NullHttpTransaction -{ -public: - AltSvcTransaction(AltSvcMapping *map, - nsHttpConnectionInfo *ci, - nsIInterfaceRequestor *callbacks, - uint32_t caps) - : NullHttpTransaction(ci, callbacks, caps) - , mMapping(map) - , mRunning(false) - , mTriedToValidate(false) - , mTriedToWrite(false) - { - MOZ_ASSERT(mMapping); - LOG(("AltSvcTransaction ctor %p map %p [%s -> %s]", - this, map, map->OriginHost().get(), map->AlternateHost().get())); - } - - ~AltSvcTransaction() - { - LOG(("AltSvcTransaction dtor %p map %p running %d", - this, mMapping.get(), mRunning)); - - if (mRunning) { - MOZ_ASSERT(mMapping->IsRunning()); - MaybeValidate(NS_OK); - } - if (!mMapping->Validated()) { - // try again later - mMapping->SetExpiresAt(NowInSeconds() + 2); - } - LOG(("AltSvcTransaction dtor %p map %p validated %d [%s]", - this, mMapping.get(), mMapping->Validated(), - mMapping->HashKey().get())); - mMapping->SetRunning(false); - } - - void StartTransaction() - { - LOG(("AltSvcTransaction::StartTransaction() %p", this)); - - MOZ_ASSERT(!mRunning); - MOZ_ASSERT(!mMapping->IsRunning()); - mCaps &= ~NS_HTTP_ALLOW_KEEPALIVE; - mRunning = true; - mMapping->SetRunning(true); - } - - void MaybeValidate(nsresult reason) - { - if (mTriedToValidate) { - return; - } - mTriedToValidate = true; - - LOG(("AltSvcTransaction::MaybeValidate() %p reason=%x running=%d conn=%p write=%d", - this, reason, mRunning, mConnection.get(), mTriedToWrite)); - - if (mTriedToWrite && reason == NS_BASE_STREAM_CLOSED) { - // The normal course of events is to cause the transaction to fail with CLOSED - // on a write - so that's a success that means the HTTP/2 session is setup. - reason = NS_OK; - } - - if (NS_FAILED(reason) || !mRunning || !mConnection) { - LOG(("AltSvcTransaction::MaybeValidate %p Failed due to precondition", this)); - return; - } - - // insist on spdy/3* or >= http/2 - uint32_t version = mConnection->Version(); - LOG(("AltSvcTransaction::MaybeValidate() %p version %d\n", this, version)); - if ((version < HTTP_VERSION_2) && - (version != SPDY_VERSION_31) && (version != SPDY_VERSION_3)) { - LOG(("AltSvcTransaction::MaybeValidate %p Failed due to protocol version", this)); - return; - } - - nsCOMPtr secInfo; - mConnection->GetSecurityInfo(getter_AddRefs(secInfo)); - nsCOMPtr socketControl = do_QueryInterface(secInfo); - bool bypassAuth = false; - - if (!socketControl || - NS_FAILED(socketControl->GetBypassAuthentication(&bypassAuth))) { - bypassAuth = false; - } - - LOG(("AltSvcTransaction::MaybeValidate() %p socketControl=%p bypass=%d", - this, socketControl.get(), bypassAuth)); - - if (bypassAuth) { - LOG(("AltSvcTransaction::MaybeValidate() %p " - "validating alternate service because relaxed", this)); - mMapping->SetValidated(true); - return; - } - - if (socketControl->GetFailedVerification()) { - LOG(("AltSvcTransaction::MaybeValidate() %p " - "not validated due to auth error", this)); - return; - } - - LOG(("AltSvcTransaction::MaybeValidate() %p " - "validating alternate service with auth check", this)); - mMapping->SetValidated(true); - } - - void Close(nsresult reason) MOZ_OVERRIDE - { - LOG(("AltSvcTransaction::Close() %p reason=%x running %d", - this, reason, mRunning)); - - MaybeValidate(reason); - if (!mMapping->Validated() && mConnection) { - mConnection->DontReuse(); - } - NullHttpTransaction::Close(reason); - } - - nsresult ReadSegments(nsAHttpSegmentReader *reader, - uint32_t count, uint32_t *countRead) MOZ_OVERRIDE - { - LOG(("AltSvcTransaction::ReadSegements() %p\n")); - mTriedToWrite = true; - return NullHttpTransaction::ReadSegments(reader, count, countRead); - } - -private: - nsRefPtr mMapping; - uint32_t mRunning : 1; - uint32_t mTriedToValidate : 1; - uint32_t mTriedToWrite : 1; -}; - -void -AltSvcCache::UpdateAltServiceMapping(AltSvcMapping *map, nsProxyInfo *pi, - nsIInterfaceRequestor *aCallbacks, - uint32_t caps) -{ - MOZ_ASSERT(NS_IsMainThread()); - AltSvcMapping *existing = mHash.GetWeak(map->mHashKey); - LOG(("AltSvcCache::UpdateAltServiceMapping %p map %p existing %p %s", - this, map, existing, map->AlternateHost().get())); - - if (existing && (existing->TTL() <= 0)) { - LOG(("AltSvcCache::UpdateAltServiceMapping %p map %p is expired", - this, map)); - existing = nullptr; - mHash.Remove(map->mHashKey); - } - - if (existing && existing->mValidated) { - if (existing->RouteEquals(map)) { - // update expires - LOG(("AltSvcCache::UpdateAltServiceMapping %p map %p updates ttl of %p\n", - this, map, existing)); - existing->SetExpiresAt(map->GetExpiresAt()); - return; - } - - LOG(("AltSvcCache::UpdateAltServiceMapping %p map %p overwrites %p\n", - this, map, existing)); - existing = nullptr; - mHash.Remove(map->mHashKey); - } - - if (existing) { - LOG(("AltSvcCache::UpdateAltServiceMapping %p map %p ignored because %p " - "still in progress\n", this, map, existing)); - return; - } - - mHash.Put(map->mHashKey, map); - - nsRefPtr ci; - map->GetConnectionInfo(getter_AddRefs(ci), pi); - caps |= ci->GetAnonymous() ? NS_HTTP_LOAD_ANONYMOUS : 0; - - nsCOMPtr callbacks = new AltSvcOverride(aCallbacks); - - nsRefPtr nullTransaction = - new AltSvcTransaction(map, ci, aCallbacks, caps); - nullTransaction->StartTransaction(); - gHttpHandler->ConnMgr()->SpeculativeConnect(ci, callbacks, caps, nullTransaction); -} - -AltSvcMapping * -AltSvcCache::GetAltServiceMapping(const nsACString &scheme, const nsACString &host, - int32_t port, bool privateBrowsing) -{ - MOZ_ASSERT(NS_IsMainThread()); - if (!gHttpHandler->AllowAltSvc()) { - return nullptr; - } - if (!gHttpHandler->AllowAltSvcOE() && scheme.Equals(NS_LITERAL_CSTRING("http"))) { - return nullptr; - } - - nsAutoCString key; - AltSvcMapping::MakeHashKey(key, scheme, host, port, privateBrowsing); - AltSvcMapping *existing = mHash.GetWeak(key); - LOG(("AltSvcCache::GetAltServiceMapping %p key=%s " - "existing=%p validated=%d running=%d ttl=%d", - this, key.get(), existing, existing ? existing->mValidated : 0, - existing ? existing->mRunning : 0, - existing ? existing->TTL() : 0)); - if (existing && (existing->TTL() <= 0)) { - LOG(("AltSvcCache::GetAltServiceMapping %p map %p is expired", this, existing)); - mHash.Remove(existing->mHashKey); - existing = nullptr; - } - if (existing && existing->mValidated) - return existing; - return nullptr; -} - -class ProxyClearHostMapping : public nsRunnable { -public: - explicit ProxyClearHostMapping(const nsACString &host, int32_t port) - : mHost(host) - , mPort(port) - {} - - NS_IMETHOD Run() - { - MOZ_ASSERT(NS_IsMainThread()); - gHttpHandler->ConnMgr()->ClearHostMapping(mHost, mPort); - return NS_OK; - } -private: - nsCString mHost; - int32_t mPort; -}; - -void -AltSvcCache::ClearHostMapping(const nsACString &host, int32_t port) -{ - if (!NS_IsMainThread()) { - nsCOMPtr event = new ProxyClearHostMapping(host, port); - if (event) { - NS_DispatchToMainThread(event); - } - return; - } - - nsAutoCString key; - - AltSvcMapping::MakeHashKey(key, NS_LITERAL_CSTRING("http"), host, port, true); - AltSvcMapping *existing = mHash.GetWeak(key); - if (existing) { - existing->SetExpired(); - } - - AltSvcMapping::MakeHashKey(key, NS_LITERAL_CSTRING("https"), host, port, true); - existing = mHash.GetWeak(key); - if (existing) { - existing->SetExpired(); - } - - AltSvcMapping::MakeHashKey(key, NS_LITERAL_CSTRING("http"), host, port, false); - existing = mHash.GetWeak(key); - if (existing) { - existing->SetExpired(); - } - - AltSvcMapping::MakeHashKey(key, NS_LITERAL_CSTRING("https"), host, port, false); - existing = mHash.GetWeak(key); - if (existing) { - existing->SetExpired(); - } -} - -void -AltSvcCache::ClearAltServiceMappings() -{ - MOZ_ASSERT(NS_IsMainThread()); - mHash.Clear(); -} - -NS_IMETHODIMP -AltSvcOverride::GetInterface(const nsIID &iid, void **result) -{ - if (NS_SUCCEEDED(QueryInterface(iid, result)) && *result) { - return NS_OK; - } - return mCallbacks->GetInterface(iid, result); -} - -NS_IMETHODIMP -AltSvcOverride::GetIgnoreIdle(bool *ignoreIdle) -{ - *ignoreIdle = true; - return NS_OK; -} - -NS_IMETHODIMP -AltSvcOverride::GetIgnorePossibleSpdyConnections(bool *ignorePossibleSpdyConnections) -{ - *ignorePossibleSpdyConnections = true; - return NS_OK; -} - -NS_IMETHODIMP -AltSvcOverride::GetParallelSpeculativeConnectLimit( - uint32_t *parallelSpeculativeConnectLimit) -{ - *parallelSpeculativeConnectLimit = 32; - return NS_OK; -} - -NS_IMETHODIMP -AltSvcOverride::GetIsFromPredictor(bool *isFromPredictor) -{ - *isFromPredictor = false; - return NS_OK; -} - -NS_IMETHODIMP -AltSvcOverride::GetAllow1918(bool *allow) -{ - // normally we don't do speculative connects to 1918.. and we use - // speculative connects for the mapping validation, so override - // that default here for alt-svc - *allow = true; - return NS_OK; -} - -NS_IMPL_ISUPPORTS(AltSvcOverride, nsIInterfaceRequestor, nsISpeculativeConnectionOverrider) - -} // namespace mozilla::net -} // namespace mozilla diff --git a/netwerk/protocol/http/AlternateServices.h b/netwerk/protocol/http/AlternateServices.h deleted file mode 100644 index 75d7bab6279..00000000000 --- a/netwerk/protocol/http/AlternateServices.h +++ /dev/null @@ -1,128 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* vim: set sw=2 ts=8 et tw=80 : */ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -/* -Alt-Svc allows separation of transport routing from the origin host without -using a proxy. See https://httpwg.github.io/http-extensions/alt-svc.html - - Nice To Have Future Enhancements:: - * flush on network change event when we have an indicator - * use established https channel for http instead separate of conninfo hash - * pin via http-tls header - * clear based on origin when a random fail happens not just 421 - * upon establishment of channel, cancel and retry trans that have not yet written anything - * persistent storage (including private browsing filter) - * memory reporter for cache, but this is rather tiny -*/ - -#ifndef mozilla_net_AlternateServices_h -#define mozilla_net_AlternateServices_h - -#include "nsRefPtrHashtable.h" -#include "nsString.h" -#include "nsIInterfaceRequestor.h" -#include "nsISpeculativeConnect.h" - -class nsProxyInfo; - -namespace mozilla { namespace net { - -class nsHttpConnectionInfo; - -class AltSvcMapping -{ - NS_INLINE_DECL_THREADSAFE_REFCOUNTING(AltSvcMapping) - friend class AltSvcCache; - -public: - AltSvcMapping(const nsACString &originScheme, - const nsACString &originHost, - int32_t originPort, - const nsACString &username, - bool privateBrowsing, - uint32_t expiresAt, - const nsACString &alternateHost, - int32_t alternatePort, - const nsACString &npnToken); - - const nsCString &AlternateHost() const { return mAlternateHost; } - const nsCString &OriginHost() const { return mOriginHost; } - const nsCString &HashKey() const { return mHashKey; } - uint32_t AlternatePort() const { return mAlternatePort; } - bool Validated() { return mValidated; } - void SetValidated(bool val) { mValidated = val; } - bool IsRunning() { return mRunning; } - void SetRunning(bool val) { mRunning = val; } - int32_t GetExpiresAt() { return mExpiresAt; } - void SetExpiresAt(int32_t val) { mExpiresAt = val; } - void SetExpired(); - bool RouteEquals(AltSvcMapping *map); - - void GetConnectionInfo(nsHttpConnectionInfo **outCI, nsProxyInfo *pi); - int32_t TTL(); - -private: - virtual ~AltSvcMapping() {}; - static void MakeHashKey(nsCString &outKey, - const nsACString &originScheme, - const nsACString &originHost, - int32_t originPort, - bool privateBrowsing); - - nsCString mHashKey; - - nsCString mAlternateHost; - int32_t mAlternatePort; - - nsCString mOriginHost; - int32_t mOriginPort; - - nsCString mUsername; - bool mPrivate; - - uint32_t mExpiresAt; - - bool mValidated; - bool mRunning; - bool mHttps; - - nsCString mNPNToken; -}; - -class AltSvcOverride : public nsIInterfaceRequestor - , public nsISpeculativeConnectionOverrider -{ -public: - NS_DECL_THREADSAFE_ISUPPORTS - NS_DECL_NSISPECULATIVECONNECTIONOVERRIDER - NS_DECL_NSIINTERFACEREQUESTOR - - AltSvcOverride(nsIInterfaceRequestor *aRequestor) - : mCallbacks(aRequestor) {} - -private: - virtual ~AltSvcOverride() {} - nsCOMPtr mCallbacks; -}; - -class AltSvcCache -{ -public: - void UpdateAltServiceMapping(AltSvcMapping *map, nsProxyInfo *pi, - nsIInterfaceRequestor *, uint32_t caps); // main thread - AltSvcMapping *GetAltServiceMapping(const nsACString &scheme, - const nsACString &host, - int32_t port, bool pb); - void ClearAltServiceMappings(); - void ClearHostMapping(const nsACString &host, int32_t port); - -private: - nsRefPtrHashtable mHash; -}; - -}} // namespace mozilla::net - -#endif // include guard diff --git a/netwerk/protocol/http/Http2Session.cpp b/netwerk/protocol/http/Http2Session.cpp index 8c8fe1a12c4..547a5cd5780 100644 --- a/netwerk/protocol/http/Http2Session.cpp +++ b/netwerk/protocol/http/Http2Session.cpp @@ -258,8 +258,7 @@ static Http2ControlFx sControlFunctions[] = { Http2Session::RecvPing, Http2Session::RecvGoAway, Http2Session::RecvWindowUpdate, - Http2Session::RecvContinuation, - Http2Session::RecvAltSvc // extension for type 0x0A + Http2Session::RecvContinuation }; bool @@ -443,8 +442,7 @@ Http2Session::AddStream(nsAHttpTransaction *aHttpTransaction, mQueuedStreams.Push(stream); } - if (!(aHttpTransaction->Caps() & NS_HTTP_ALLOW_KEEPALIVE) && - !aHttpTransaction->IsNullTransaction()) { + if (!(aHttpTransaction->Caps() & NS_HTTP_ALLOW_KEEPALIVE)) { LOG3(("Http2Session::AddStream %p transaction %p forces keep-alive off.\n", this, aHttpTransaction)); DontReuse(); @@ -1860,208 +1858,6 @@ Http2Session::RecvContinuation(Http2Session *self) return RecvPushPromise(self); } -class UpdateAltSvcEvent : public nsRunnable -{ -public: - UpdateAltSvcEvent(const nsCString &host, const uint16_t port, - const nsCString &npnToken, const uint32_t expires, - const nsCString &aOrigin, - nsHttpConnectionInfo *aCI, - nsIInterfaceRequestor *callbacks) - : mHost(host) - , mPort(port) - , mNPNToken(npnToken) - , mExpires(expires) - , mOrigin(aOrigin) - , mCI(aCI) - , mCallbacks(callbacks) - { - } - - NS_IMETHOD Run() MOZ_OVERRIDE - { - MOZ_ASSERT(NS_IsMainThread()); - - nsCString originScheme; - nsCString originHost; - int32_t originPort = -1; - - nsCOMPtr uri; - if (NS_FAILED(NS_NewURI(getter_AddRefs(uri), mOrigin))) { - LOG(("UpdateAltSvcEvent origin does not parse %s\n", - mOrigin.get())); - return NS_OK; - } - uri->GetScheme(originScheme); - uri->GetHost(originHost); - uri->GetPort(&originPort); - - const char *username = mCI->Username(); - const bool privateBrowsing = mCI->GetPrivate(); - - LOG(("UpdateAltSvcEvent location=%s:%u protocol=%s expires=%u " - "origin=%s://%s:%u user=%s private=%d", mHost.get(), mPort, - mNPNToken.get(), mExpires, originScheme.get(), originHost.get(), - originPort, username, privateBrowsing)); - nsRefPtr mapping = new AltSvcMapping( - nsDependentCString(originScheme.get()), - nsDependentCString(originHost.get()), - originPort, nsDependentCString(username), privateBrowsing, mExpires, - mHost, mPort, mNPNToken); - - nsProxyInfo *proxyInfo = mCI->ProxyInfo(); - gHttpHandler->UpdateAltServiceMapping(mapping, proxyInfo, mCallbacks, 0); - return NS_OK; - } - -private: - nsCString mHost; - uint16_t mPort; - nsCString mNPNToken; - uint32_t mExpires; - nsCString mOrigin; - nsRefPtr mCI; - nsCOMPtr mCallbacks; -}; - -// defined as an http2 extension - alt-svc -// defines receipt of frame type 0x0A.. See AlternateSevices.h -nsresult -Http2Session::RecvAltSvc(Http2Session *self) -{ - MOZ_ASSERT(self->mInputFrameType == FRAME_TYPE_ALTSVC); - LOG3(("Http2Session::RecvAltSvc %p Flags 0x%X id 0x%X\n", self, - self->mInputFrameFlags, self->mInputFrameID)); - - if (self->mInputFrameDataSize < 8) { - LOG3(("Http2Session::RecvAltSvc %p frame too small", self)); - RETURN_SESSION_ERROR(self, FRAME_SIZE_ERROR); - } - - uint32_t maxAge = - PR_ntohl(*reinterpret_cast(self->mInputFrameBuffer.get() + kFrameHeaderBytes)); - uint16_t portRoute = - PR_ntohs(*reinterpret_cast(self->mInputFrameBuffer.get() + kFrameHeaderBytes + 4)); - uint8_t protoLen = self->mInputFrameBuffer.get()[kFrameHeaderBytes + 6]; - LOG3(("Http2Session::RecvAltSvc %p maxAge=%d port=%d protoLen=%d", self, - maxAge, portRoute, protoLen)); - - if (self->mInputFrameDataSize < (8U + protoLen)) { - LOG3(("Http2Session::RecvAltSvc %p frame too small for protocol", self)); - RETURN_SESSION_ERROR(self, FRAME_SIZE_ERROR); - } - nsAutoCString protocol; - protocol.Assign(self->mInputFrameBuffer.get() + kFrameHeaderBytes + 7, protoLen); - - uint32_t spdyIndex; - SpdyInformation *spdyInfo = gHttpHandler->SpdyInfo(); - if (!(NS_SUCCEEDED(spdyInfo->GetNPNIndex(protocol, &spdyIndex)) && - spdyInfo->ProtocolEnabled(spdyIndex))) { - LOG3(("Http2Session::RecvAltSvc %p unknown protocol %s, ignoring", self, - protocol.BeginReading())); - self->ResetDownstreamState(); - return NS_OK; - } - - uint8_t hostLen = self->mInputFrameBuffer.get()[kFrameHeaderBytes + 7 + protoLen]; - if (self->mInputFrameDataSize < (8U + protoLen + hostLen)) { - LOG3(("Http2Session::RecvAltSvc %p frame too small for host", self)); - RETURN_SESSION_ERROR(self, FRAME_SIZE_ERROR); - } - - nsRefPtr ci(self->ConnectionInfo()); - if (!self->mConnection || !ci) { - LOG3(("Http2Session::RecvAltSvc %p no connection or conninfo for %d", self, - self->mInputFrameID)); - self->ResetDownstreamState(); - return NS_OK; - } - - nsAutoCString hostRoute; - hostRoute.Assign(self->mInputFrameBuffer.get() + kFrameHeaderBytes + 8 + protoLen, hostLen); - - uint32_t originLen = self->mInputFrameDataSize - 8 - protoLen - hostLen; - nsAutoCString specifiedOrigin; - if (originLen) { - if (self->mInputFrameID) { - LOG3(("Http2Session::RecvAltSvc %p got frame w/origin on non zero stream", self)); - self->ResetDownstreamState(); - return NS_OK; - } - specifiedOrigin.Assign( - self->mInputFrameBuffer.get() + kFrameHeaderBytes + 8 + protoLen + hostLen, - originLen); - - bool okToReroute = true; - nsCOMPtr securityInfo; - self->mConnection->GetSecurityInfo(getter_AddRefs(securityInfo)); - nsCOMPtr ssl = do_QueryInterface(securityInfo); - if (!ssl) { - okToReroute = false; - } - - // a little off main thread origin parser. This is a non critical function because - // any alternate route created has to be verified anyhow - nsAutoCString specifiedOriginHost; - if (specifiedOrigin.EqualsIgnoreCase("https://", 8)) { - specifiedOriginHost.Assign(specifiedOrigin.get() + 8, - specifiedOrigin.Length() - 8); - if (ci->GetRelaxed()) { - // technically this is ok because it will still be confirmed before being used - // but let's not support it. - okToReroute = false; - } - } else if (specifiedOrigin.EqualsIgnoreCase("http://", 7)) { - specifiedOriginHost.Assign(specifiedOrigin.get() + 7, - specifiedOrigin.Length() - 7); - } - - int32_t colonOffset = specifiedOriginHost.FindCharInSet(":", 0); - if (colonOffset != kNotFound) { - specifiedOriginHost.Truncate(colonOffset); - } - - if (okToReroute) { - ssl->IsAcceptableForHost(specifiedOriginHost, &okToReroute); - } - if (!okToReroute) { - LOG3(("Http2Session::RecvAltSvc %p can't reroute non-authoritative origin %s", - self, specifiedOrigin.BeginReading())); - self->ResetDownstreamState(); - return NS_OK; - } - } else { - // no origin specified in frame. We need to have an active pull stream to match - // this up to as if it were a response header. - if (!(self->mInputFrameID & 0x1) || - NS_FAILED(self->SetInputFrameDataStream(self->mInputFrameID)) || - !self->mInputFrameDataStream->Transaction() || - !self->mInputFrameDataStream->Transaction()->RequestHead()) { - LOG3(("Http2Session::RecvAltSvc %p got frame w/o origin on invalid stream", self)); - self->ResetDownstreamState(); - return NS_OK; - } - - specifiedOrigin.Assign( - self->mInputFrameDataStream->Transaction()->RequestHead()->Origin()); - } - - nsCOMPtr callbacks; - self->mConnection->GetSecurityInfo(getter_AddRefs(callbacks)); - nsCOMPtr irCallbacks = do_QueryInterface(callbacks); - - nsRefPtr event = new UpdateAltSvcEvent( - hostRoute, portRoute, protocol, NowInSeconds() + maxAge, - specifiedOrigin, ci, irCallbacks); - NS_DispatchToMainThread(event); - - LOG3(("Http2Session::RecvAltSvc %p processed location=%s:%u protocol=%s " - "maxAge=%u origin=%s", self, hostRoute.get(), portRoute, - protocol.get(), maxAge, specifiedOrigin.get())); - self->ResetDownstreamState(); - return NS_OK; -} - //----------------------------------------------------------------------------- // nsAHttpTransaction. It is expected that nsHttpConnection is the caller // of these methods @@ -2187,17 +1983,10 @@ Http2Session::ReadSegments(nsAHttpSegmentReader *reader, } if (NS_FAILED(rv)) { - LOG3(("Http2Session::ReadSegments %p may return FAIL code %X", + LOG3(("Http2Session::ReadSegments %p returning FAIL code %X", this, rv)); - if (rv == NS_BASE_STREAM_WOULD_BLOCK) { - return rv; - } - - CleanupStream(stream, rv, CANCEL_ERROR); - if (SoftStreamError(rv)) { - LOG3(("Http2Session::ReadSegments %p soft error override\n", this)); - rv = NS_OK; - } + if (rv != NS_BASE_STREAM_WOULD_BLOCK) + CleanupStream(stream, rv, CANCEL_ERROR); return rv; } diff --git a/netwerk/protocol/http/Http2Session.h b/netwerk/protocol/http/Http2Session.h index 1c1d0bcd9da..e59d3828547 100644 --- a/netwerk/protocol/http/Http2Session.h +++ b/netwerk/protocol/http/Http2Session.h @@ -75,18 +75,17 @@ public: */ enum frameType { - FRAME_TYPE_DATA = 0x0, - FRAME_TYPE_HEADERS = 0x1, - FRAME_TYPE_PRIORITY = 0x2, - FRAME_TYPE_RST_STREAM = 0x3, - FRAME_TYPE_SETTINGS = 0x4, - FRAME_TYPE_PUSH_PROMISE = 0x5, - FRAME_TYPE_PING = 0x6, - FRAME_TYPE_GOAWAY = 0x7, - FRAME_TYPE_WINDOW_UPDATE = 0x8, - FRAME_TYPE_CONTINUATION = 0x9, - FRAME_TYPE_ALTSVC = 0xA, - FRAME_TYPE_LAST = 0xB + FRAME_TYPE_DATA = 0, + FRAME_TYPE_HEADERS = 1, + FRAME_TYPE_PRIORITY = 2, + FRAME_TYPE_RST_STREAM = 3, + FRAME_TYPE_SETTINGS = 4, + FRAME_TYPE_PUSH_PROMISE = 5, + FRAME_TYPE_PING = 6, + FRAME_TYPE_GOAWAY = 7, + FRAME_TYPE_WINDOW_UPDATE = 8, + FRAME_TYPE_CONTINUATION = 9, + FRAME_TYPE_LAST = 10 }; // NO_ERROR is a macro defined on windows, so we'll name the HTTP2 goaway @@ -169,7 +168,6 @@ public: static nsresult RecvGoAway(Http2Session *); static nsresult RecvWindowUpdate(Http2Session *); static nsresult RecvContinuation(Http2Session *); - static nsresult RecvAltSvc(Http2Session *); char *EnsureOutputBuffer(uint32_t needed); diff --git a/netwerk/protocol/http/NullHttpTransaction.cpp b/netwerk/protocol/http/NullHttpTransaction.cpp index edc1c5bee2f..58666533085 100644 --- a/netwerk/protocol/http/NullHttpTransaction.cpp +++ b/netwerk/protocol/http/NullHttpTransaction.cpp @@ -23,10 +23,10 @@ NullHttpTransaction::NullHttpTransaction(nsHttpConnectionInfo *ci, : mStatus(NS_OK) , mCaps(caps | NS_HTTP_ALLOW_KEEPALIVE) , mCapsToClear(0) - , mRequestHead(nullptr) - , mIsDone(false) , mCallbacks(callbacks) , mConnectionInfo(ci) + , mRequestHead(nullptr) + , mIsDone(false) { } diff --git a/netwerk/protocol/http/NullHttpTransaction.h b/netwerk/protocol/http/NullHttpTransaction.h index d32ebf622f6..45c1f6c7ef7 100644 --- a/netwerk/protocol/http/NullHttpTransaction.h +++ b/netwerk/protocol/http/NullHttpTransaction.h @@ -49,22 +49,18 @@ protected: private: nsresult mStatus; -protected: uint32_t mCaps; -private: // mCapsToClear holds flags that should be cleared in mCaps, e.g. unset // NS_HTTP_REFRESH_DNS when DNS refresh request has completed to avoid // redundant requests on the network. To deal with raciness, only unsetting // bitfields should be allowed: 'lost races' will thus err on the // conservative side, e.g. by going ahead with a 2nd DNS refresh. uint32_t mCapsToClear; - nsHttpRequestHead *mRequestHead; - bool mIsDone; - -protected: nsRefPtr mConnection; nsCOMPtr mCallbacks; nsRefPtr mConnectionInfo; + nsHttpRequestHead *mRequestHead; + bool mIsDone; }; NS_DEFINE_STATIC_IID_ACCESSOR(NullHttpTransaction, NS_NULLHTTPTRANSACTION_IID) diff --git a/netwerk/protocol/http/SpdySession3.cpp b/netwerk/protocol/http/SpdySession3.cpp index b556cac6610..93a25a625b5 100644 --- a/netwerk/protocol/http/SpdySession3.cpp +++ b/netwerk/protocol/http/SpdySession3.cpp @@ -386,8 +386,7 @@ SpdySession3::AddStream(nsAHttpTransaction *aHttpTransaction, mQueuedStreams.Push(stream); } - if (!(aHttpTransaction->Caps() & NS_HTTP_ALLOW_KEEPALIVE) && - !aHttpTransaction->IsNullTransaction()) { + if (!(aHttpTransaction->Caps() & NS_HTTP_ALLOW_KEEPALIVE)) { LOG3(("SpdySession3::AddStream %p transaction %p forces keep-alive off.\n", this, aHttpTransaction)); DontReuse(); @@ -402,14 +401,12 @@ SpdySession3::ActivateStream(SpdyStream3 *stream) MOZ_ASSERT(!stream->StreamID() || (stream->StreamID() & 1), "Do not activate pushed streams"); - if (!(stream->Transaction() && stream->Transaction()->IsNullTransaction())) { - ++mConcurrent; - if (mConcurrent > mConcurrentHighWater) - mConcurrentHighWater = mConcurrent; - LOG3(("SpdySession3::AddStream %p activating stream %p Currently %d " - "streams in session, high water mark is %d", - this, stream, mConcurrent, mConcurrentHighWater)); - } + ++mConcurrent; + if (mConcurrent > mConcurrentHighWater) + mConcurrentHighWater = mConcurrent; + LOG3(("SpdySession3::AddStream %p activating stream %p Currently %d " + "streams in session, high water mark is %d", + this, stream, mConcurrent, mConcurrentHighWater)); mReadyForWrite.Push(stream); SetWriteCallbacks(); @@ -1764,17 +1761,10 @@ SpdySession3::ReadSegments(nsAHttpSegmentReader *reader, } if (NS_FAILED(rv)) { - LOG3(("SpdySession3::ReadSegments %p may return FAIL code %X", + LOG3(("SpdySession3::ReadSegments %p returning FAIL code %X", this, rv)); - if (rv == NS_BASE_STREAM_WOULD_BLOCK) { - return rv; - } - - CleanupStream(stream, rv, RST_CANCEL); - if (SoftStreamError(rv)) { - LOG3(("SpdySession3::ReadSegments %p soft error override\n", this)); - rv = NS_OK; - } + if (rv != NS_BASE_STREAM_WOULD_BLOCK) + CleanupStream(stream, rv, RST_CANCEL); return rv; } diff --git a/netwerk/protocol/http/SpdySession31.cpp b/netwerk/protocol/http/SpdySession31.cpp index fbe4a77d469..d570d56f671 100644 --- a/netwerk/protocol/http/SpdySession31.cpp +++ b/netwerk/protocol/http/SpdySession31.cpp @@ -389,8 +389,7 @@ SpdySession31::AddStream(nsAHttpTransaction *aHttpTransaction, mQueuedStreams.Push(stream); } - if (!(aHttpTransaction->Caps() & NS_HTTP_ALLOW_KEEPALIVE) && - !aHttpTransaction->IsNullTransaction()) { + if (!(aHttpTransaction->Caps() & NS_HTTP_ALLOW_KEEPALIVE)) { LOG3(("SpdySession31::AddStream %p transaction %p forces keep-alive off.\n", this, aHttpTransaction)); DontReuse(); @@ -406,15 +405,12 @@ SpdySession31::ActivateStream(SpdyStream31 *stream) MOZ_ASSERT(!stream->StreamID() || (stream->StreamID() & 1), "Do not activate pushed streams"); - if (!(stream->Transaction() && stream->Transaction()->IsNullTransaction())) { - ++mConcurrent; - if (mConcurrent > mConcurrentHighWater) { - mConcurrentHighWater = mConcurrent; - } - LOG3(("SpdySession31::AddStream %p activating stream %p Currently %d " - "streams in session, high water mark is %d", - this, stream, mConcurrent, mConcurrentHighWater)); - } + ++mConcurrent; + if (mConcurrent > mConcurrentHighWater) + mConcurrentHighWater = mConcurrent; + LOG3(("SpdySession31::AddStream %p activating stream %p Currently %d " + "streams in session, high water mark is %d", + this, stream, mConcurrent, mConcurrentHighWater)); mReadyForWrite.Push(stream); SetWriteCallbacks(); @@ -1831,17 +1827,10 @@ SpdySession31::ReadSegments(nsAHttpSegmentReader *reader, } if (NS_FAILED(rv)) { - LOG3(("SpdySession31::ReadSegments %p may return FAIL code %X", + LOG3(("SpdySession31::ReadSegments %p returning FAIL code %X", this, rv)); - if (rv == NS_BASE_STREAM_WOULD_BLOCK) { - return rv; - } - - CleanupStream(stream, rv, RST_CANCEL); - if (SoftStreamError(rv)) { - LOG3(("SpdySession31::ReadSegments %p soft error override\n", this)); - rv = NS_OK; - } + if (rv != NS_BASE_STREAM_WOULD_BLOCK) + CleanupStream(stream, rv, RST_CANCEL); return rv; } diff --git a/netwerk/protocol/http/moz.build b/netwerk/protocol/http/moz.build index b84b8eb4d9c..64f90f36f3d 100644 --- a/netwerk/protocol/http/moz.build +++ b/netwerk/protocol/http/moz.build @@ -41,7 +41,6 @@ EXPORTS.mozilla.net += [ # The rest of these files cannot be built in unified mode because they want to # force NSPR logging. SOURCES += [ - 'AlternateServices.cpp', 'ASpdySession.cpp', 'ConnectionDiagnostics.cpp', 'Http2Compression.cpp', diff --git a/netwerk/protocol/http/nsAHttpConnection.h b/netwerk/protocol/http/nsAHttpConnection.h index 994be14e281..7060f004442 100644 --- a/netwerk/protocol/http/nsAHttpConnection.h +++ b/netwerk/protocol/http/nsAHttpConnection.h @@ -140,9 +140,6 @@ public: // Update the callbacks used to provide security info. May be called on // any thread. virtual void SetSecurityCallbacks(nsIInterfaceRequestor* aCallbacks) = 0; - - // nsHttp.h version - virtual uint32_t Version() = 0; }; NS_DEFINE_STATIC_IID_ACCESSOR(nsAHttpConnection, NS_AHTTPCONNECTION_IID) @@ -210,12 +207,6 @@ NS_DEFINE_STATIC_IID_ACCESSOR(nsAHttpConnection, NS_AHTTPCONNECTION_IID) return nullptr; \ return (fwdObject)->Transport(); \ } \ - uint32_t Version() \ - { \ - return (fwdObject) ? \ - (fwdObject)->Version() : \ - NS_HTTP_VERSION_UNKNOWN; \ - } \ bool IsProxyConnectInProgress() \ { \ return (fwdObject)->IsProxyConnectInProgress(); \ diff --git a/netwerk/protocol/http/nsAHttpTransaction.h b/netwerk/protocol/http/nsAHttpTransaction.h index 547e7942c55..6aed565e5a5 100644 --- a/netwerk/protocol/http/nsAHttpTransaction.h +++ b/netwerk/protocol/http/nsAHttpTransaction.h @@ -203,7 +203,7 @@ NS_DEFINE_STATIC_IID_ACCESSOR(nsAHttpTransaction, NS_AHTTPTRANSACTION_IID) uint64_t Available(); \ virtual nsresult ReadSegments(nsAHttpSegmentReader *, uint32_t, uint32_t *); \ virtual nsresult WriteSegments(nsAHttpSegmentWriter *, uint32_t, uint32_t *); \ - virtual void Close(nsresult reason); \ + void Close(nsresult reason); \ nsHttpConnectionInfo *ConnectionInfo(); \ void SetProxyConnectFailed(); \ virtual nsHttpRequestHead *RequestHead(); \ diff --git a/netwerk/protocol/http/nsHttp.cpp b/netwerk/protocol/http/nsHttp.cpp index cae91990d52..ac52e0e24ae 100644 --- a/netwerk/protocol/http/nsHttp.cpp +++ b/netwerk/protocol/http/nsHttp.cpp @@ -346,128 +346,6 @@ void EnsureBuffer(nsAutoArrayPtr &buf, uint32_t newSize, { localEnsureBuffer (buf, newSize, preserve, objSize); } -/// - -void -ParsedHeaderValueList::Tokenize(char *input, uint32_t inputLen, char **token, - uint32_t *tokenLen, bool *foundEquals, char **next) -{ - if (foundEquals) { - *foundEquals = false; - } - if (next) { - *next = nullptr; - } - if (inputLen < 1 || !input || !token) { - return; - } - - bool foundFirst = false; - bool inQuote = false; - bool foundToken = false; - *token = input; - *tokenLen = inputLen; - - for (uint32_t index = 0; !foundToken && index < inputLen; ++index) { - // strip leading cruft - if (!foundFirst && - (input[index] == ' ' || input[index] == '"' || input[index] == '\t')) { - (*token)++; - } else { - foundFirst = true; - } - - if (input[index] == '"') { - inQuote = !inQuote; - continue; - } - - if (inQuote) { - continue; - } - - if (input[index] == '=' || input[index] == ';') { - *tokenLen = (input + index) - *token; - if (next && ((index + 1) < inputLen)) { - *next = input + index + 1; - } - foundToken = true; - if (foundEquals && input[index] == '=') { - *foundEquals = true; - } - break; - } - } - - if (!foundToken) { - *tokenLen = (input + inputLen) - *token; - } - - // strip trailing cruft - for (char *index = *token + *tokenLen - 1; index >= *token; --index) { - if (*index != ' ' && *index != '\t' && *index != '"') { - break; - } - --(*tokenLen); - if (*index == '"') { - break; - } - } -} - -ParsedHeaderValueList::ParsedHeaderValueList(char *t, uint32_t len) -{ - char *name = nullptr; - uint32_t nameLen = 0; - char *value = nullptr; - uint32_t valueLen = 0; - char *next = nullptr; - bool foundEquals; - - while (t) { - Tokenize(t, len, &name, &nameLen, &foundEquals, &next); - if (next) { - len -= next - t; - } - t = next; - if (foundEquals && t) { - Tokenize(t, len, &value, &valueLen, nullptr, &next); - if (next) { - len -= next - t; - } - t = next; - } - mValues.AppendElement(ParsedHeaderPair(name, nameLen, value, valueLen)); - value = name = nullptr; - valueLen = nameLen = 0; - next = nullptr; - } -} - -ParsedHeaderValueListList::ParsedHeaderValueListList(const nsCString &fullHeader) - : mFull(fullHeader) -{ - char *t = mFull.BeginWriting(); - uint32_t len = mFull.Length(); - char *last = t; - bool inQuote = false; - for (uint32_t index = 0; index < len; ++index) { - if (t[index] == '"') { - inQuote = !inQuote; - continue; - } - if (inQuote) { - continue; - } - if (t[index] == ',') { - mValues.AppendElement(ParsedHeaderValueList(last, (t + index) - last)); - last = t + index + 1; - } - } - if (!inQuote) { - mValues.AppendElement(ParsedHeaderValueList(last, (t + len) - last)); - } -} } // namespace mozilla::net } // namespace mozilla diff --git a/netwerk/protocol/http/nsHttp.h b/netwerk/protocol/http/nsHttp.h index c05c4a1c92d..9be18962d8d 100644 --- a/netwerk/protocol/http/nsHttp.h +++ b/netwerk/protocol/http/nsHttp.h @@ -12,7 +12,6 @@ #include "nsAutoPtr.h" #include "nsString.h" #include "nsError.h" -#include "nsTArray.h" // http version codes #define NS_HTTP_VERSION_UNKNOWN 0 @@ -204,56 +203,6 @@ void EnsureBuffer(nsAutoArrayPtr &buf, uint32_t newSize, void EnsureBuffer(nsAutoArrayPtr &buf, uint32_t newSize, uint32_t preserve, uint32_t &objSize); -// h2=":443"; ma=60; single -// results in 3 mValues = {{h2, :443}, {ma, 60}, {single}} - -class ParsedHeaderPair -{ -public: - ParsedHeaderPair(const char *name, int32_t nameLen, - const char *val, int32_t valLen) - { - if (nameLen > 0) { - mName.Rebind(name, name + nameLen); - } - if (valLen > 0) { - mValue.Rebind(val, val + valLen); - } - } - - ParsedHeaderPair(ParsedHeaderPair const ©) - : mName(copy.mName) - , mValue(copy.mValue) - { - } - - nsDependentCSubstring mName; - nsDependentCSubstring mValue; -}; - -class ParsedHeaderValueList -{ -public: - ParsedHeaderValueList(char *t, uint32_t len); - nsTArray mValues; - -private: - void ParsePair(char *t, uint32_t len); - void Tokenize(char *input, uint32_t inputLen, char **token, - uint32_t *tokenLen, bool *foundEquals, char **next); -}; - -class ParsedHeaderValueListList -{ -public: - explicit ParsedHeaderValueListList(const nsCString &txt); - nsTArray mValues; - -private: - nsCString mFull; -}; - - } // namespace mozilla::net } // namespace mozilla diff --git a/netwerk/protocol/http/nsHttpAtomList.h b/netwerk/protocol/http/nsHttpAtomList.h index 2ad6baa006d..cebe85e1771 100644 --- a/netwerk/protocol/http/nsHttpAtomList.h +++ b/netwerk/protocol/http/nsHttpAtomList.h @@ -23,8 +23,6 @@ HTTP_ATOM(Accept_Language, "Accept-Language") HTTP_ATOM(Accept_Ranges, "Accept-Ranges") HTTP_ATOM(Age, "Age") HTTP_ATOM(Allow, "Allow") -HTTP_ATOM(Alternate_Service, "Alt-Svc") -HTTP_ATOM(Alternate_Service_Used, "Alt-Svc-Used") HTTP_ATOM(Assoc_Req, "Assoc-Req") HTTP_ATOM(Authentication, "Authentication") HTTP_ATOM(Authorization, "Authorization") diff --git a/netwerk/protocol/http/nsHttpChannel.cpp b/netwerk/protocol/http/nsHttpChannel.cpp index 24c2d0e1450..6224a4ead92 100644 --- a/netwerk/protocol/http/nsHttpChannel.cpp +++ b/netwerk/protocol/http/nsHttpChannel.cpp @@ -65,7 +65,6 @@ #include "nsPerformance.h" #include "CacheObserver.h" #include "mozilla/Telemetry.h" -#include "AlternateServices.h" namespace mozilla { namespace net { @@ -279,11 +278,11 @@ nsHttpChannel::Connect() // data (it is read-only). // if the connection is not using SSL and either the exact host matches or // a superdomain wants to force HTTPS, do it. - bool isHttps = false; - rv = mURI->SchemeIs("https", &isHttps); + bool usingSSL = false; + rv = mURI->SchemeIs("https", &usingSSL); NS_ENSURE_SUCCESS(rv,rv); - if (mAllowSTS && !isHttps) { + if (mAllowSTS && !usingSSL) { // enforce Strict-Transport-Security nsISiteSecurityService* sss = gHttpHandler->GetSSService(); NS_ENSURE_TRUE(sss, NS_ERROR_OUT_OF_MEMORY); @@ -327,7 +326,7 @@ nsHttpChannel::Connect() } // open a cache entry for this channel... - rv = OpenCacheEntry(isHttps); + rv = OpenCacheEntry(usingSSL); // do not continue if asyncOpenCacheEntry is in progress if (mCacheEntriesToWaitFor) { @@ -1231,127 +1230,6 @@ nsHttpChannel::ProcessSSLInformation() } } -void -nsHttpChannel::ProcessAltService() -{ - // e.g. Alt-Svc: h2=":443"; ma=60 - // e.g. Alt-Svc: h2="otherhost:443" - // Alt-Svc = 1#( alternative *( OWS ";" OWS parameter ) ) - // alternative = protocol-id "=" alt-authority - // protocol-id = token ; percent-encoded ALPN protocol identifier - // alt-authority = quoted-string ; containing [ uri-host ] ":" port - - if (!gHttpHandler->AllowAltSvc()) { - return; - } - - nsAutoCString scheme; - mURI->GetScheme(scheme); - bool isHttp = scheme.Equals(NS_LITERAL_CSTRING("http")); - if (!isHttp && !scheme.Equals(NS_LITERAL_CSTRING("https"))) { - return; - } - - if (isHttp && !gHttpHandler->AllowAltSvcOE()) { - return; - } - - const char *altSvc; - if (!(altSvc = mResponseHead->PeekHeader(nsHttp::Alternate_Service))) { - return; - } - - LOG(("nsHttpChannel %p Alt-Svc Response Header %s\n", this, altSvc)); - - nsCString buf(altSvc); - if (!nsHttp::IsReasonableHeaderValue(buf)) { - LOG(("Alt-Svc Response Header seems unreasonable - skipping\n")); - return; - } - - ParsedHeaderValueListList parsedAltSvc(buf); - nsRefPtr mapping; - - nsAutoCString originHost; - int32_t originPort = 80; - mURI->GetPort(&originPort); - if (NS_FAILED(mURI->GetHost(originHost))) { - return; - } - uint32_t now = NowInSeconds(), currentAge = 0; - mResponseHead->ComputeCurrentAge(now, mRequestTime, ¤tAge); - - for (uint32_t index = 0; index < parsedAltSvc.mValues.Length(); ++index) { - uint32_t maxage = 86400; // default - nsAutoCString hostname; // Always empty in the header form - nsAutoCString npnToken; - int32_t portno = originPort; - - for (uint32_t pairIndex = 0; - pairIndex < parsedAltSvc.mValues[index].mValues.Length(); - ++pairIndex) { - nsDependentCSubstring ¤tName = - parsedAltSvc.mValues[index].mValues[pairIndex].mName; - nsDependentCSubstring ¤tValue = - parsedAltSvc.mValues[index].mValues[pairIndex].mValue; - - if (!pairIndex) { - // h2=:443 - npnToken = currentName; - int32_t colonIndex = currentValue.FindChar(':'); - if (colonIndex >= 0) { - portno = - atoi(PromiseFlatCString(currentValue).get() + colonIndex + 1); - } else { - colonIndex = 0; - } - hostname.Assign(currentValue.BeginReading(), colonIndex); - } else if (currentName.Equals(NS_LITERAL_CSTRING("ma"))) { - maxage = atoi(PromiseFlatCString(currentValue).get()); - break; - } - } - - // unescape modifies a c string in place, so afterwards - // update nsCString length - nsUnescape(npnToken.BeginWriting()); - npnToken.SetLength(strlen(npnToken.BeginReading())); - - uint32_t spdyIndex; - SpdyInformation *spdyInfo = gHttpHandler->SpdyInfo(); - if (!(NS_SUCCEEDED(spdyInfo->GetNPNIndex(npnToken, &spdyIndex)) && - spdyInfo->ProtocolEnabled(spdyIndex))) { - LOG(("Alt Svc %p unknown protocol %s, ignoring", this, npnToken.get())); - continue; - } - - mapping = new AltSvcMapping(scheme, - originHost, originPort, - mUsername, mPrivateBrowsing, - NowInSeconds() + maxage, - hostname, portno, npnToken); - if (!mapping) { - continue; - } - - nsCOMPtr callbacks; - NS_NewNotificationCallbacksAggregation(mCallbacks, mLoadGroup, - getter_AddRefs(callbacks)); - if (!callbacks) { - return; - } - - nsCOMPtr proxyInfo; - if (mProxyInfo) { - proxyInfo = do_QueryInterface(mProxyInfo); - } - - gHttpHandler-> - UpdateAltServiceMapping(mapping, proxyInfo, callbacks, - mCaps & (NS_HTTP_ALLOW_RSA_FALSESTART | NS_HTTP_DISALLOW_SPDY)); - } -} - nsresult nsHttpChannel::ProcessResponse() { @@ -1406,10 +1284,6 @@ nsHttpChannel::ProcessResponse() LOG((" continuation state has been reset")); } - if (httpStatus < 500) { - ProcessAltService(); - } - bool successfulReval = false; // handle different server response categories. Note that we handle @@ -2639,7 +2513,7 @@ IsSubRangeRequest(nsHttpRequestHead &aRequestHead) } nsresult -nsHttpChannel::OpenCacheEntry(bool isHttps) +nsHttpChannel::OpenCacheEntry(bool usingSSL) { MOZ_EVENT_TRACER_EXEC(this, "net::http::OpenCacheEntry"); @@ -2966,8 +2840,8 @@ nsHttpChannel::OnCacheEntryCheck(nsICacheEntry* entry, nsIApplicationCache* appC } } - bool isHttps = false; - rv = mURI->SchemeIs("https", &isHttps); + bool usingSSL = false; + rv = mURI->SchemeIs("https", &usingSSL); NS_ENSURE_SUCCESS(rv,rv); bool doValidation = false; @@ -3006,7 +2880,7 @@ nsHttpChannel::OnCacheEntryCheck(nsICacheEntry* entry, nsIApplicationCache* appC // if no-store or if no-cache and ssl, validate cached response (see // bug 112564 for an explanation of this logic) if (mCachedResponseHead->NoStore() || - (mCachedResponseHead->NoCache() && isHttps)) { + (mCachedResponseHead->NoCache() && usingSSL)) { LOG(("Validating based on (no-store || (no-cache && ssl)) logic\n")); doValidation = true; } @@ -3561,11 +3435,11 @@ nsHttpChannel::OpenCacheInputStream(nsICacheEntry* cacheEntry, bool startBufferi { nsresult rv; - bool isHttps = false; - rv = mURI->SchemeIs("https", &isHttps); + bool usingSSL = false; + rv = mURI->SchemeIs("https", &usingSSL); NS_ENSURE_SUCCESS(rv,rv); - if (isHttps) { + if (usingSSL) { rv = cacheEntry->GetSecurityInfo( getter_AddRefs(mCachedSecurityInfo)); if (NS_FAILED(rv)) { @@ -3919,11 +3793,9 @@ nsHttpChannel::UpdateInhibitPersistentCachingFlag() mLoadFlags |= INHIBIT_PERSISTENT_CACHING; // Only cache SSL content on disk if the pref is set - bool isHttps; if (!gHttpHandler->IsPersistentHttpsCachingEnabled() && - NS_SUCCEEDED(mURI->SchemeIs("https", &isHttps)) && isHttps) { + mConnectionInfo->EndToEndSSL()) mLoadFlags |= INHIBIT_PERSISTENT_CACHING; - } } nsresult @@ -4694,19 +4566,17 @@ nsHttpChannel::BeginConnect() // Construct connection info object nsAutoCString host; - nsAutoCString scheme; int32_t port = -1; - bool isHttps = false; + nsAutoCString username; + bool usingSSL = false; - rv = mURI->GetScheme(scheme); - if (NS_SUCCEEDED(rv)) - rv = mURI->SchemeIs("https", &isHttps); + rv = mURI->SchemeIs("https", &usingSSL); if (NS_SUCCEEDED(rv)) rv = mURI->GetAsciiHost(host); if (NS_SUCCEEDED(rv)) rv = mURI->GetPort(&port); if (NS_SUCCEEDED(rv)) - mURI->GetUsername(mUsername); + mURI->GetUsername(username); if (NS_SUCCEEDED(rv)) rv = mURI->GetAsciiSpec(mSpec); if (NS_FAILED(rv)) @@ -4722,47 +4592,8 @@ nsHttpChannel::BeginConnect() if (mProxyInfo) proxyInfo = do_QueryInterface(mProxyInfo); - mRequestHead.SetHTTPS(isHttps); - mRequestHead.SetOrigin(scheme, host, port); - - nsRefPtr mapping; - if ((scheme.Equals(NS_LITERAL_CSTRING("http")) || - scheme.Equals(NS_LITERAL_CSTRING("https"))) && - (mapping = gHttpHandler->GetAltServiceMapping(scheme, - host, port, - mPrivateBrowsing))) { - LOG(("nsHttpChannel %p Alt Service Mapping Found %s://%s:%d\n", this, - scheme.get(), mapping->AlternateHost().get(), - mapping->AlternatePort())); - mRequestHead.SetHeader(nsHttp::Alternate_Service_Used, NS_LITERAL_CSTRING("1")); - - nsCOMPtr consoleService = - do_GetService(NS_CONSOLESERVICE_CONTRACTID); - if (consoleService) { - nsAutoString message(NS_LITERAL_STRING("Alternate Service Mapping found: ")); - AppendASCIItoUTF16(scheme.get(), message); - message.Append(NS_LITERAL_STRING("://")); - AppendASCIItoUTF16(host.get(), message); - message.Append(NS_LITERAL_STRING(":")); - message.AppendInt(port); - message.Append(NS_LITERAL_STRING(" to ")); - AppendASCIItoUTF16(scheme.get(), message); - message.Append(NS_LITERAL_STRING("://")); - AppendASCIItoUTF16(mapping->AlternateHost().get(), message); - message.Append(NS_LITERAL_STRING(":")); - message.AppendInt(mapping->AlternatePort()); - consoleService->LogStringMessage(message.get()); - } - - LOG(("nsHttpChannel %p Using connection info from altsvc mapping", this)); - mapping->GetConnectionInfo(getter_AddRefs(mConnectionInfo), proxyInfo); - Telemetry::Accumulate(Telemetry::HTTP_TRANSACTION_USE_ALTSVC, true); - Telemetry::Accumulate(Telemetry::HTTP_TRANSACTION_USE_ALTSVC_OE, !isHttps); - } else { - LOG(("nsHttpChannel %p Using default connection info", this)); - mConnectionInfo = new nsHttpConnectionInfo(host, port, EmptyCString(), mUsername, proxyInfo, isHttps); - Telemetry::Accumulate(Telemetry::HTTP_TRANSACTION_USE_ALTSVC, false); - } + mConnectionInfo = new nsHttpConnectionInfo(host, port, username, proxyInfo, usingSSL); + mRequestHead.SetHTTPS(usingSSL); mAuthProvider = do_CreateInstance("@mozilla.org/network/http-channel-auth-provider;1", @@ -5037,10 +4868,8 @@ nsHttpChannel::GetResponseEnd(TimeStamp* _retval) { NS_IMETHODIMP nsHttpChannel::GetIsSSL(bool *aIsSSL) { - // this attribute is really misnamed - it wants to know if - // https:// is being used. SSL might be used to cover http:// - // in some circumstances (proxies, http/2, etc..) - return mURI->SchemeIs("https", aIsSSL); + *aIsSSL = mConnectionInfo->EndToEndSSL(); + return NS_OK; } NS_IMETHODIMP diff --git a/netwerk/protocol/http/nsHttpChannel.h b/netwerk/protocol/http/nsHttpChannel.h index 36f42c03961..06debfe44df 100644 --- a/netwerk/protocol/http/nsHttpChannel.h +++ b/netwerk/protocol/http/nsHttpChannel.h @@ -205,7 +205,6 @@ private: nsresult ContinueProcessResponse(nsresult); nsresult ProcessNormal(); nsresult ContinueProcessNormal(nsresult); - void ProcessAltService(); nsresult ProcessNotModified(); nsresult AsyncProcessRedirection(uint32_t httpStatus); nsresult ContinueProcessRedirection(nsresult); @@ -428,8 +427,6 @@ private: void PushRedirectAsyncFunc(nsContinueRedirectionFunc func); void PopRedirectAsyncFunc(nsContinueRedirectionFunc func); - nsCString mUsername; - protected: virtual void DoNotifyListenerCleanup(); nsPerformance* GetPerformance(); diff --git a/netwerk/protocol/http/nsHttpConnection.cpp b/netwerk/protocol/http/nsHttpConnection.cpp index 1774df40d95..7e5f55e0966 100644 --- a/netwerk/protocol/http/nsHttpConnection.cpp +++ b/netwerk/protocol/http/nsHttpConnection.cpp @@ -483,48 +483,25 @@ nsHttpConnection::SetupNPNList(nsISSLSocketControl *ssl, uint32_t caps) { nsTArray protocolArray; - nsCString npnToken = mConnInfo->GetNPNToken(); - if (npnToken.IsEmpty()) { - // The first protocol is used as the fallback if none of the - // protocols supported overlap with the server's list. - // When using ALPN the advertised preferences are protocolArray indicies - // {1, .., N, 0} in decreasing order. - // For NPN, In the case of overlap, matching priority is driven by - // the order of the server's advertisement - with index 0 used when - // there is no match. - protocolArray.AppendElement(NS_LITERAL_CSTRING("http/1.1")); + // The first protocol is used as the fallback if none of the + // protocols supported overlap with the server's list. + // When using ALPN the advertised preferences are protocolArray indicies + // {1, .., N, 0} in decreasing order. + // For NPN, In the case of overlap, matching priority is driven by + // the order of the server's advertisement - with index 0 used when + // there is no match. + protocolArray.AppendElement(NS_LITERAL_CSTRING("http/1.1")); - if (gHttpHandler->IsSpdyEnabled() && - !(caps & NS_HTTP_DISALLOW_SPDY)) { - LOG(("nsHttpConnection::SetupSSL Allow SPDY NPN selection")); - const SpdyInformation *info = gHttpHandler->SpdyInfo(); - for (uint32_t index = SpdyInformation::kCount; index > 0; --index) { - if (info->ProtocolEnabled(index - 1) && - info->ALPNCallbacks[index - 1](ssl)) { - protocolArray.AppendElement(info->VersionString[index - 1]); - } + if (gHttpHandler->IsSpdyEnabled() && + !(caps & NS_HTTP_DISALLOW_SPDY)) { + LOG(("nsHttpConnection::SetupSSL Allow SPDY NPN selection")); + const SpdyInformation *info = gHttpHandler->SpdyInfo(); + for (uint32_t index = SpdyInformation::kCount; index > 0; --index) { + if (info->ProtocolEnabled(index - 1) && + info->ALPNCallbacks[index - 1](ssl)) { + protocolArray.AppendElement(info->VersionString[index - 1]); } } - } else { - LOG(("nsHttpConnection::SetupSSL limiting NPN selection to %s", - npnToken.get())); - protocolArray.AppendElement(npnToken); - } - - nsCString authHost = mConnInfo->GetAuthenticationHost(); - int32_t authPort = mConnInfo->GetAuthenticationPort(); - - if (!authHost.IsEmpty()) { - ssl->SetAuthenticationName(authHost); - ssl->SetAuthenticationPort(authPort); - } - - if (mConnInfo->GetRelaxed()) { // http:// over tls - if (authHost.IsEmpty() || authHost.Equals(mConnInfo->GetHost())) { - LOG(("nsHttpConnection::SetupSSL %p TLS-Relaxed " - "with Same Host Auth Bypass", this)); - ssl->SetBypassAuthentication(true); - } } nsresult rv = ssl->SetNPNList(protocolArray); @@ -554,14 +531,6 @@ nsHttpConnection::AddTransaction(nsAHttpTransaction *httpTransaction, LOG(("nsHttpConnection::AddTransaction for SPDY%s", needTunnel ? " over tunnel" : "")); - // do a runtime check here just for defense in depth - if (transCI->GetRelaxed() && - httpTransaction->RequestHead() && httpTransaction->RequestHead()->IsHTTPS()) { - LOG(("This Cannot happen - https on relaxed tls stream\n")); - MOZ_ASSERT(false, "https:// on tls relaxed"); - return NS_ERROR_FAILURE; - } - if (!mSpdySession->AddStream(httpTransaction, priority, needTunnel, mCallbacks)) { MOZ_ASSERT(false); // this cannot happen! @@ -1438,12 +1407,6 @@ nsHttpConnection::EndIdleMonitoring() } } -uint32_t -nsHttpConnection::Version() -{ - return mUsingSpdyVersion ? mUsingSpdyVersion : mLastHttpResponseVersion; -} - //----------------------------------------------------------------------------- // nsHttpConnection //----------------------------------------------------------------------------- diff --git a/netwerk/protocol/http/nsHttpConnection.h b/netwerk/protocol/http/nsHttpConnection.h index 4d999d5d00d..c2e52ab9a83 100644 --- a/netwerk/protocol/http/nsHttpConnection.h +++ b/netwerk/protocol/http/nsHttpConnection.h @@ -211,8 +211,6 @@ public: return mTrafficStamp && (mTrafficCount == (mTotalBytesWritten + mTotalBytesRead)); } - // override of nsAHttpConnection - virtual uint32_t Version(); private: // Value (set in mTCPKeepaliveConfig) indicates which set of prefs to use. diff --git a/netwerk/protocol/http/nsHttpConnectionInfo.cpp b/netwerk/protocol/http/nsHttpConnectionInfo.cpp index 0453a599d50..d01ad60c987 100644 --- a/netwerk/protocol/http/nsHttpConnectionInfo.cpp +++ b/netwerk/protocol/http/nsHttpConnectionInfo.cpp @@ -20,49 +20,16 @@ namespace mozilla { namespace net { -nsHttpConnectionInfo::nsHttpConnectionInfo(const nsACString &physicalHost, - int32_t physicalPort, - const nsACString &npnToken, +nsHttpConnectionInfo::nsHttpConnectionInfo(const nsACString &host, int32_t port, const nsACString &username, - nsProxyInfo *proxyInfo, + nsProxyInfo* proxyInfo, bool endToEndSSL) - : mAuthenticationPort(443) + : mUsername(username) + , mProxyInfo(proxyInfo) + , mEndToEndSSL(endToEndSSL) + , mUsingConnect(false) { - Init(physicalHost, physicalPort, npnToken, username, proxyInfo, endToEndSSL); -} - -nsHttpConnectionInfo::nsHttpConnectionInfo(const nsACString &physicalHost, - int32_t physicalPort, - const nsACString &npnToken, - const nsACString &username, - nsProxyInfo *proxyInfo, - const nsACString &logicalHost, - int32_t logicalPort) - -{ - mEndToEndSSL = true; // so DefaultPort() works - mAuthenticationPort = logicalPort == -1 ? DefaultPort() : logicalPort; - - if (!physicalHost.Equals(logicalHost) || (physicalPort != logicalPort)) { - mAuthenticationHost = logicalHost; - } - Init(physicalHost, physicalPort, npnToken, username, proxyInfo, true); -} - -void -nsHttpConnectionInfo::Init(const nsACString &host, int32_t port, - const nsACString &npnToken, - const nsACString &username, - nsProxyInfo* proxyInfo, - bool e2eSSL) -{ - LOG(("Init nsHttpConnectionInfo @%p\n", this)); - - mUsername = username; - mProxyInfo = proxyInfo; - mEndToEndSSL = e2eSSL; - mUsingConnect = false; - mNPNToken = npnToken; + LOG(("Creating nsHttpConnectionInfo @%x\n", this)); mUsingHttpsProxy = (proxyInfo && proxyInfo->IsHTTPS()); mUsingHttpProxy = mUsingHttpsProxy || (proxyInfo && proxyInfo->IsHTTP()); @@ -111,9 +78,8 @@ nsHttpConnectionInfo::SetOriginServer(const nsACString &host, int32_t port) // byte 1 is S/. S is for end to end ssl such as https:// uris // byte 2 is A/. A is for an anonymous channel (no cookies, etc..) // byte 3 is P/. P is for a private browising channel - // byte 4 is R/. R is for 'relaxed' unauthed TLS for http:// uris + mHashKey.AssignLiteral("...."); - mHashKey.AssignLiteral("....."); mHashKey.Append(keyHost); mHashKey.Append(':'); mHashKey.AppendInt(keyPort); @@ -152,62 +118,20 @@ nsHttpConnectionInfo::SetOriginServer(const nsACString &host, int32_t port) mHashKey.AppendInt(ProxyPort()); mHashKey.Append(')'); } - - if(!mAuthenticationHost.IsEmpty()) { - mHashKey.AppendLiteral(" '); - } - - if (!mNPNToken.IsEmpty()) { - mHashKey.AppendLiteral(" {NPN-TOKEN "); - mHashKey.Append(mNPNToken); - mHashKey.AppendLiteral("}"); - } } nsHttpConnectionInfo* nsHttpConnectionInfo::Clone() const { - nsHttpConnectionInfo *clone; - if (mAuthenticationHost.IsEmpty()) { - clone = new nsHttpConnectionInfo(mHost, mPort, mNPNToken, mUsername, mProxyInfo, mEndToEndSSL); - } else { - MOZ_ASSERT(mEndToEndSSL); - clone = new nsHttpConnectionInfo(mHost, mPort, mNPNToken, mUsername, mProxyInfo, - mAuthenticationHost, - mAuthenticationPort); - } + nsHttpConnectionInfo* clone = new nsHttpConnectionInfo(mHost, mPort, mUsername, mProxyInfo, mEndToEndSSL); - // Make sure the anonymous, relaxed, and private flags are transferred + // Make sure the anonymous and private flags are transferred! clone->SetAnonymous(GetAnonymous()); clone->SetPrivate(GetPrivate()); - clone->SetRelaxed(GetRelaxed()); MOZ_ASSERT(clone->Equals(this)); - return clone; } -void -nsHttpConnectionInfo::CloneAsDirectRoute(nsHttpConnectionInfo **outCI) -{ - if (mAuthenticationHost.IsEmpty()) { - *outCI = Clone(); - return; - } - - nsRefPtr clone = - new nsHttpConnectionInfo(mAuthenticationHost, mAuthenticationPort, - EmptyCString(), mUsername, mProxyInfo, mEndToEndSSL); - // Make sure the anonymous, relaxed, and private flags are transferred - clone->SetAnonymous(GetAnonymous()); - clone->SetPrivate(GetPrivate()); - clone->SetRelaxed(GetRelaxed()); - clone.forget(outCI); -} - nsresult nsHttpConnectionInfo::CreateWildCard(nsHttpConnectionInfo **outParam) { @@ -221,7 +145,7 @@ nsHttpConnectionInfo::CreateWildCard(nsHttpConnectionInfo **outParam) nsRefPtr clone; clone = new nsHttpConnectionInfo(NS_LITERAL_CSTRING("*"), 0, - mNPNToken, mUsername, mProxyInfo, true); + mUsername, mProxyInfo, true); // Make sure the anonymous and private flags are transferred! clone->SetAnonymous(GetAnonymous()); clone->SetPrivate(GetPrivate()); diff --git a/netwerk/protocol/http/nsHttpConnectionInfo.h b/netwerk/protocol/http/nsHttpConnectionInfo.h index af440a79a7a..6f3d0bdd4b9 100644 --- a/netwerk/protocol/http/nsHttpConnectionInfo.h +++ b/netwerk/protocol/http/nsHttpConnectionInfo.h @@ -32,23 +32,11 @@ namespace mozilla { namespace net { class nsHttpConnectionInfo { public: - nsHttpConnectionInfo(const nsACString &physicalHost, - int32_t physicalPort, - const nsACString &npnToken, + nsHttpConnectionInfo(const nsACString &host, int32_t port, const nsACString &username, - nsProxyInfo *proxyInfo, + nsProxyInfo* proxyInfo, bool endToEndSSL = false); - // this version must use TLS and you may supply the domain - // information to be validated - nsHttpConnectionInfo(const nsACString &physicalHost, - int32_t physicalPort, - const nsACString &npnToken, - const nsACString &username, - nsProxyInfo *proxyInfo, - const nsACString &logicalHost, - int32_t logicalPort); - private: virtual ~nsHttpConnectionInfo() { @@ -58,12 +46,15 @@ private: public: const nsAFlatCString &HashKey() const { return mHashKey; } - const nsCString &GetAuthenticationHost() const { return mAuthenticationHost; } - int32_t GetAuthenticationPort() const { return mAuthenticationPort; } + void SetOriginServer(const nsACString &host, int32_t port); + + void SetOriginServer(const char *host, int32_t port) + { + SetOriginServer(nsDependentCString(host), port); + } // OK to treat these as an infalible allocation nsHttpConnectionInfo* Clone() const; - void CloneAsDirectRoute(nsHttpConnectionInfo **outParam); nsresult CreateWildCard(nsHttpConnectionInfo **outParam); const char *ProxyHost() const { return mProxyInfo ? mProxyInfo->Host().get() : nullptr; } @@ -92,12 +83,8 @@ public: bool GetAnonymous() const { return mHashKey.CharAt(2) == 'A'; } void SetPrivate(bool priv) { mHashKey.SetCharAt(priv ? 'P' : '.', 3); } bool GetPrivate() const { return mHashKey.CharAt(3) == 'P'; } - void SetRelaxed(bool relaxed) - { mHashKey.SetCharAt(relaxed ? 'R' : '.', 4); } - bool GetRelaxed() const { return mHashKey.CharAt(4) == 'R'; } const nsCString &GetHost() { return mHost; } - const nsCString &GetNPNToken() { return mNPNToken; } // Returns true for any kind of proxy (http, socks, https, etc..) bool UsingProxy(); @@ -121,26 +108,15 @@ public: bool HostIsLocalIPLiteral() const; private: - void Init(const nsACString &host, - int32_t port, - const nsACString &npnToken, - const nsACString &username, - nsProxyInfo* proxyInfo, - bool EndToEndSSL); - void SetOriginServer(const nsACString &host, int32_t port); - nsCString mHashKey; nsCString mHost; int32_t mPort; nsCString mUsername; - nsCString mAuthenticationHost; - int32_t mAuthenticationPort; nsCOMPtr mProxyInfo; bool mUsingHttpProxy; bool mUsingHttpsProxy; bool mEndToEndSSL; bool mUsingConnect; // if will use CONNECT with http proxy - nsCString mNPNToken; // for nsRefPtr NS_INLINE_DECL_THREADSAFE_REFCOUNTING(nsHttpConnectionInfo) diff --git a/netwerk/protocol/http/nsHttpConnectionMgr.cpp b/netwerk/protocol/http/nsHttpConnectionMgr.cpp index 208d0a6e887..65ee70d7d94 100644 --- a/netwerk/protocol/http/nsHttpConnectionMgr.cpp +++ b/netwerk/protocol/http/nsHttpConnectionMgr.cpp @@ -382,7 +382,6 @@ public: // intentional! bool mIgnoreIdle; bool mIgnorePossibleSpdyConnections; bool mIsFromPredictor; - bool mAllow1918; // As above, added manually so we can use nsRefPtr without inheriting from // nsISupports @@ -397,25 +396,16 @@ NS_IMPL_RELEASE(SpeculativeConnectArgs) nsresult nsHttpConnectionMgr::SpeculativeConnect(nsHttpConnectionInfo *ci, nsIInterfaceRequestor *callbacks, - uint32_t caps, - NullHttpTransaction *nullTransaction) + uint32_t caps) { MOZ_ASSERT(NS_IsMainThread(), "nsHttpConnectionMgr::SpeculativeConnect called off main thread!"); LOG(("nsHttpConnectionMgr::SpeculativeConnect [ci=%s]\n", ci->HashKey().get())); - nsCOMPtr overrider = - do_GetInterface(callbacks); - - bool allow1918 = false; - if (overrider) { - overrider->GetAllow1918(&allow1918); - } - // Hosts that are Local IP Literals should not be speculatively // connected - Bug 853423. - if ((!allow1918) && ci && ci->HostIsLocalIPLiteral()) { + if (ci && ci->HostIsLocalIPLiteral()) { LOG(("nsHttpConnectionMgr::SpeculativeConnect skipping RFC1918 " "address [%s]", ci->Host())); return NS_OK; @@ -429,9 +419,10 @@ nsHttpConnectionMgr::SpeculativeConnect(nsHttpConnectionInfo *ci, NS_NewInterfaceRequestorAggregation(callbacks, nullptr, getter_AddRefs(wrappedCallbacks)); caps |= ci->GetAnonymous() ? NS_HTTP_LOAD_ANONYMOUS : 0; - args->mTrans = - nullTransaction ? nullTransaction : new NullHttpTransaction(ci, wrappedCallbacks, caps); + args->mTrans = new NullHttpTransaction(ci, wrappedCallbacks, caps); + nsCOMPtr overrider = + do_GetInterface(callbacks); if (overrider) { args->mOverridesOK = true; overrider->GetParallelSpeculativeConnectLimit( @@ -440,7 +431,6 @@ nsHttpConnectionMgr::SpeculativeConnect(nsHttpConnectionInfo *ci, overrider->GetIgnorePossibleSpdyConnections( &args->mIgnorePossibleSpdyConnections); overrider->GetIsFromPredictor(&args->mIsFromPredictor); - overrider->GetAllow1918(&args->mAllow1918); } nsresult rv = @@ -1307,7 +1297,7 @@ nsHttpConnectionMgr::ReportFailedToProcess(nsIURI *uri) // report the event for all the permutations of anonymous and // private versions of this host nsRefPtr ci = - new nsHttpConnectionInfo(host, port, EmptyCString(), username, nullptr, usingSSL); + new nsHttpConnectionInfo(host, port, username, nullptr, usingSSL); ci->SetAnonymous(false); ci->SetPrivate(false); PipelineFeedbackInfo(ci, RedCorruptedContent, nullptr, 0); @@ -1477,15 +1467,7 @@ nsHttpConnectionMgr::MakeNewConnection(nsConnectionEntry *ent, LOG(("nsHttpConnectionMgr::MakeNewConnection [ci = %s]\n" "Found a speculative half open connection\n", ent->mConnInfo->HashKey().get())); - - uint32_t flags; ent->mHalfOpens[i]->SetSpeculative(false); - nsISocketTransport *transport = ent->mHalfOpens[i]->SocketTransport(); - if (NS_SUCCEEDED(transport->GetConnectionFlags(&flags))) { - flags &= ~nsISocketTransport::DISABLE_RFC1918; - transport->SetConnectionFlags(flags); - } - Telemetry::AutoCounter usedSpeculativeConn; ++usedSpeculativeConn; @@ -1528,7 +1510,7 @@ nsHttpConnectionMgr::MakeNewConnection(nsConnectionEntry *ent, if (AtActiveConnectionLimit(ent, trans->Caps())) return NS_ERROR_NOT_AVAILABLE; - nsresult rv = CreateTransport(ent, trans, trans->Caps(), false, false, true); + nsresult rv = CreateTransport(ent, trans, trans->Caps(), false); if (NS_FAILED(rv)) { /* hard failure */ LOG(("nsHttpConnectionMgr::MakeNewConnection [ci = %s trans = %p] " @@ -2155,15 +2137,13 @@ nsHttpConnectionMgr::CreateTransport(nsConnectionEntry *ent, nsAHttpTransaction *trans, uint32_t caps, bool speculative, - bool isFromPredictor, - bool allow1918) + bool isFromPredictor) { MOZ_ASSERT(PR_GetCurrentThread() == gSocketThread); nsRefPtr sock = new nsHalfOpenSocket(ent, trans, caps); if (speculative) { sock->SetSpeculative(true); - sock->SetAllow1918(allow1918); Telemetry::AutoCounter totalSpeculativeConn; ++totalSpeculativeConn; @@ -2948,23 +2928,20 @@ nsHttpConnectionMgr::OnMsgSpeculativeConnect(int32_t, void *param) bool ignorePossibleSpdyConnections = false; bool ignoreIdle = false; bool isFromPredictor = false; - bool allow1918 = false; if (args->mOverridesOK) { parallelSpeculativeConnectLimit = args->mParallelSpeculativeConnectLimit; ignorePossibleSpdyConnections = args->mIgnorePossibleSpdyConnections; ignoreIdle = args->mIgnoreIdle; isFromPredictor = args->mIsFromPredictor; - allow1918 = args->mAllow1918; } - bool keepAlive = args->mTrans->Caps() & NS_HTTP_ALLOW_KEEPALIVE; if (mNumHalfOpenConns < parallelSpeculativeConnectLimit && ((ignoreIdle && (ent->mIdleConns.Length() < parallelSpeculativeConnectLimit)) || !ent->mIdleConns.Length()) && - !(keepAlive && RestrictConnections(ent, ignorePossibleSpdyConnections)) && + !RestrictConnections(ent, ignorePossibleSpdyConnections) && !AtActiveConnectionLimit(ent, args->mTrans->Caps())) { - CreateTransport(ent, args->mTrans, args->mTrans->Caps(), true, isFromPredictor, allow1918); + CreateTransport(ent, args->mTrans, args->mTrans->Caps(), true, isFromPredictor); } else { LOG((" Transport not created due to existing connection count\n")); @@ -2998,6 +2975,7 @@ nsHttpConnectionMgr::nsConnectionHandle::PushBack(const char *buf, uint32_t bufL //////////////////////// nsHalfOpenSocket + NS_IMPL_ISUPPORTS(nsHttpConnectionMgr::nsHalfOpenSocket, nsIOutputStreamCallback, nsITransportEventSink, @@ -3013,7 +2991,6 @@ nsHalfOpenSocket::nsHalfOpenSocket(nsConnectionEntry *ent, , mCaps(caps) , mSpeculative(false) , mIsFromPredictor(false) - , mAllow1918(true) , mHasConnected(false) , mPrimaryConnectedOK(false) , mBackupConnectedOK(false) @@ -3089,7 +3066,7 @@ nsHalfOpenSocket::SetupStreams(nsISocketTransport **transport, tmpFlags |= nsISocketTransport::DISABLE_IPV6; } - if (!Allow1918()) { + if (IsSpeculative()) { tmpFlags |= nsISocketTransport::DISABLE_RFC1918; } @@ -3153,8 +3130,6 @@ nsHttpConnectionMgr::nsHalfOpenSocket::SetupPrimaryStreams() nsresult nsHttpConnectionMgr::nsHalfOpenSocket::SetupBackupStreams() { - MOZ_ASSERT(mTransaction && !mTransaction->IsNullTransaction()); - mBackupSynStarted = TimeStamp::Now(); nsresult rv = SetupStreams(getter_AddRefs(mBackupTransport), getter_AddRefs(mBackupStreamIn), @@ -3177,8 +3152,8 @@ nsHttpConnectionMgr::nsHalfOpenSocket::SetupBackupTimer() { uint16_t timeout = gHttpHandler->GetIdleSynTimeout(); MOZ_ASSERT(!mSynTimer, "timer already initd"); - if (timeout && !mTransaction->IsDone() && - !mTransaction->IsNullTransaction()) { + + if (timeout && !mTransaction->IsDone()) { // Setup the timer that will establish a backup socket // if we do not get a writable event on the main one. // We do this because a lost SYN takes a very long time @@ -3364,7 +3339,8 @@ nsHalfOpenSocket::OnOutputStreamReady(nsIAsyncOutputStream *out) mEnt->mPendingQ.RemoveElementAt(index); gHttpHandler->ConnMgr()->AddActiveConn(conn, mEnt); rv = gHttpHandler->ConnMgr()->DispatchTransaction(mEnt, temp, conn); - } else { + } + else { // this transaction was dispatched off the pending q before all the // sockets established themselves. @@ -3382,22 +3358,17 @@ nsHalfOpenSocket::OnOutputStreamReady(nsIAsyncOutputStream *out) !mEnt->mConnInfo->UsingConnect()) { LOG(("nsHalfOpenSocket::OnOutputStreamReady null transaction will " "be used to finish SSL handshake on conn %p\n", conn.get())); - nsRefPtr trans; - if (mTransaction->IsNullTransaction()) { - // null transactions cannot be put in the entry queue, so that - // explains why it is not present. - trans = mTransaction; - } else { - trans = new NullHttpTransaction(mEnt->mConnInfo, - callbacks, - mCaps & ~NS_HTTP_ALLOW_PIPELINING); - } + nsRefPtr trans = + new NullHttpTransaction(mEnt->mConnInfo, + callbacks, + mCaps & ~NS_HTTP_ALLOW_PIPELINING); gHttpHandler->ConnMgr()->AddActiveConn(conn, mEnt); conn->Classify(nsAHttpTransaction::CLASS_SOLO); rv = gHttpHandler->ConnMgr()-> DispatchAbstractTransaction(mEnt, trans, mCaps, conn, 0); - } else { + } + else { // otherwise just put this in the persistent connection pool LOG(("nsHalfOpenSocket::OnOutputStreamReady no transaction match " "returning conn %p to pool\n", conn.get())); diff --git a/netwerk/protocol/http/nsHttpConnectionMgr.h b/netwerk/protocol/http/nsHttpConnectionMgr.h index 1fd8bff14e2..38f2cd04c50 100644 --- a/netwerk/protocol/http/nsHttpConnectionMgr.h +++ b/netwerk/protocol/http/nsHttpConnectionMgr.h @@ -16,7 +16,6 @@ #include "mozilla/ReentrantMonitor.h" #include "mozilla/TimeStamp.h" #include "mozilla/Attributes.h" -#include "AlternateServices.h" #include "nsIObserver.h" #include "nsITimer.h" @@ -26,13 +25,11 @@ class nsIHttpUpgradeListener; namespace mozilla { namespace net { class EventTokenBucket; -class NullHttpTransaction; struct HttpRetParams; //----------------------------------------------------------------------------- class nsHttpConnectionMgr : public nsIObserver - , public AltSvcCache { public: NS_DECL_THREADSAFE_ISUPPORTS @@ -118,8 +115,7 @@ public: // real transaction for this connectionInfo. nsresult SpeculativeConnect(nsHttpConnectionInfo *, nsIInterfaceRequestor *, - uint32_t caps = 0, - NullHttpTransaction * = nullptr); + uint32_t caps = 0); // called when a connection is done processing a transaction. if the // connection can be reused then it will be added to the idle list, else @@ -469,9 +465,6 @@ private: bool IsFromPredictor() { return mIsFromPredictor; } void SetIsFromPredictor(bool val) { mIsFromPredictor = val; } - bool Allow1918() { return mAllow1918; } - void SetAllow1918(bool val) { mAllow1918 = val; } - bool HasConnected() { return mHasConnected; } void PrintDiagnostics(nsCString &log); @@ -497,8 +490,6 @@ private: // connections from the predictor. bool mIsFromPredictor; - bool mAllow1918; - TimeStamp mPrimarySynStarted; TimeStamp mBackupSynStarted; @@ -571,7 +562,7 @@ private: void ClosePersistentConnections(nsConnectionEntry *ent); void ReportProxyTelemetry(nsConnectionEntry *ent); nsresult CreateTransport(nsConnectionEntry *, nsAHttpTransaction *, - uint32_t, bool, bool, bool); + uint32_t, bool, bool = false); void AddActiveConn(nsHttpConnection *, nsConnectionEntry *); void DecrementActiveConnCount(nsHttpConnection *); void StartedConnect(); diff --git a/netwerk/protocol/http/nsHttpHandler.cpp b/netwerk/protocol/http/nsHttpHandler.cpp index a681284cf6e..1aa2321da03 100644 --- a/netwerk/protocol/http/nsHttpHandler.cpp +++ b/netwerk/protocol/http/nsHttpHandler.cpp @@ -190,8 +190,6 @@ nsHttpHandler::nsHttpHandler() , mCoalesceSpdy(true) , mSpdyPersistentSettings(false) , mAllowPush(true) - , mEnableAltSvc(true) - , mEnableAltSvcOE(true) , mSpdySendingChunkSize(ASpdySession::kSendingChunkSize) , mSpdySendBufferSize(ASpdySession::kTCPSendBufferSize) , mSpdyPushAllowance(32768) @@ -1232,21 +1230,6 @@ nsHttpHandler::PrefsChanged(nsIPrefBranch *prefs, const char *pref) mAllowPush = cVar; } - if (PREF_CHANGED(HTTP_PREF("altsvc.enabled"))) { - rv = prefs->GetBoolPref(HTTP_PREF("atsvc.enabled"), - &cVar); - if (NS_SUCCEEDED(rv)) - mEnableAltSvc = cVar; - } - - - if (PREF_CHANGED(HTTP_PREF("altsvc.oe"))) { - rv = prefs->GetBoolPref(HTTP_PREF("atsvc.oe"), - &cVar); - if (NS_SUCCEEDED(rv)) - mEnableAltSvcOE = cVar; - } - if (PREF_CHANGED(HTTP_PREF("spdy.push-allowance"))) { rv = prefs->GetIntPref(HTTP_PREF("spdy.push-allowance"), &val); if (NS_SUCCEEDED(rv)) { @@ -1851,18 +1834,11 @@ nsHttpHandler::Observe(nsISupports *subject, } } else if (!strcmp(topic, "last-pb-context-exited")) { mPrivateAuthCache.ClearAll(); - if (mConnMgr) { - mConnMgr->ClearAltServiceMappings(); - } } else if (!strcmp(topic, "browser:purge-session-history")) { - if (mConnMgr) { - if (gSocketTransportService) { - nsCOMPtr event = - NS_NewRunnableMethod(mConnMgr, - &nsHttpConnectionMgr::ClearConnectionHistory); - gSocketTransportService->Dispatch(event, NS_DISPATCH_NORMAL); - } - mConnMgr->ClearAltServiceMappings(); + if (mConnMgr && gSocketTransportService) { + nsCOMPtr event = NS_NewRunnableMethod(mConnMgr, + &nsHttpConnectionMgr::ClearConnectionHistory); + gSocketTransportService->Dispatch(event, NS_DISPATCH_NORMAL); } } else if (!strcmp(topic, NS_NETWORK_LINK_TOPIC)) { nsAutoCString converted = NS_ConvertUTF16toUTF8(data); @@ -1941,7 +1917,7 @@ nsHttpHandler::SpeculativeConnect(nsIURI *aURI, aURI->GetUsername(username); nsHttpConnectionInfo *ci = - new nsHttpConnectionInfo(host, port, EmptyCString(), username, nullptr, usingSSL); + new nsHttpConnectionInfo(host, port, username, nullptr, usingSSL); return SpeculativeConnect(ci, aCallbacks); } diff --git a/netwerk/protocol/http/nsHttpHandler.h b/netwerk/protocol/http/nsHttpHandler.h index b124bef6c75..6fb1affb65d 100644 --- a/netwerk/protocol/http/nsHttpHandler.h +++ b/netwerk/protocol/http/nsHttpHandler.h @@ -37,7 +37,6 @@ class Tickler; class nsHttpConnection; class nsHttpConnectionInfo; class nsHttpTransaction; -class AltSvcMapping; //----------------------------------------------------------------------------- // nsHttpHandler - protocol handler for HTTP and HTTPS @@ -109,8 +108,6 @@ public: PRIntervalTime SpdyPingThreshold() { return mSpdyPingThreshold; } PRIntervalTime SpdyPingTimeout() { return mSpdyPingTimeout; } bool AllowPush() { return mAllowPush; } - bool AllowAltSvc() { return mEnableAltSvc; } - bool AllowAltSvcOE() { return mEnableAltSvcOE; } uint32_t ConnectTimeout() { return mConnectTimeout; } uint32_t ParallelSpeculativeConnectLimit() { return mParallelSpeculativeConnectLimit; } bool CriticalRequestPrioritization() { return mCriticalRequestPrioritization; } @@ -222,22 +219,6 @@ public: return mConnMgr->SpeculativeConnect(ci, callbacks, caps); } - // Alternate Services Maps are main thread only - void UpdateAltServiceMapping(AltSvcMapping *map, - nsProxyInfo *proxyInfo, - nsIInterfaceRequestor *callbacks, - uint32_t caps) - { - mConnMgr->UpdateAltServiceMapping(map, proxyInfo, callbacks, caps); - } - - AltSvcMapping *GetAltServiceMapping(const nsACString &scheme, - const nsACString &host, - int32_t port, bool pb) - { - return mConnMgr->GetAltServiceMapping(scheme, host, port, pb); - } - // // The HTTP handler caches pointers to specific XPCOM services, and // provides the following helper routines for accessing those services: @@ -473,8 +454,6 @@ private: uint32_t mCoalesceSpdy : 1; uint32_t mSpdyPersistentSettings : 1; uint32_t mAllowPush : 1; - uint32_t mEnableAltSvc : 1; - uint32_t mEnableAltSvcOE : 1; // Try to use SPDY features instead of HTTP/1.1 over SSL SpdyInformation mSpdyInfo; diff --git a/netwerk/protocol/http/nsHttpRequestHead.cpp b/netwerk/protocol/http/nsHttpRequestHead.cpp index 66835bffe90..4ca578a4144 100644 --- a/netwerk/protocol/http/nsHttpRequestHead.cpp +++ b/netwerk/protocol/http/nsHttpRequestHead.cpp @@ -51,18 +51,6 @@ nsHttpRequestHead::SetMethod(const nsACString &method) } } -void -nsHttpRequestHead::SetOrigin(const nsACString &scheme, const nsACString &host, int32_t port) -{ - mOrigin.Assign(scheme); - mOrigin.Append(NS_LITERAL_CSTRING("://")); - mOrigin.Append(host); - if (port >= 0) { - mOrigin.Append(NS_LITERAL_CSTRING(":")); - mOrigin.AppendInt(port); - } -} - bool nsHttpRequestHead::IsSafeMethod() const { diff --git a/netwerk/protocol/http/nsHttpRequestHead.h b/netwerk/protocol/http/nsHttpRequestHead.h index 5d9c0ce60ee..2a0c010b70c 100644 --- a/netwerk/protocol/http/nsHttpRequestHead.h +++ b/netwerk/protocol/http/nsHttpRequestHead.h @@ -36,9 +36,6 @@ public: void SetHTTPS(bool val) { mHTTPS = val; } bool IsHTTPS() const { return mHTTPS; } - void SetOrigin(const nsACString &scheme, const nsACString &host, int32_t port); - const nsCString &Origin() const { return mOrigin; } - const char *PeekHeader(nsHttpAtom h) const { return mHeaders.PeekHeader(h); @@ -100,7 +97,6 @@ private: nsCString mMethod; nsHttpVersion mVersion; nsCString mRequestURI; - nsCString mOrigin; ParsedMethodType mParsedMethod; bool mHTTPS; }; diff --git a/netwerk/protocol/http/nsHttpTransaction.cpp b/netwerk/protocol/http/nsHttpTransaction.cpp index 161f8fcb377..15c50476424 100644 --- a/netwerk/protocol/http/nsHttpTransaction.cpp +++ b/netwerk/protocol/http/nsHttpTransaction.cpp @@ -91,6 +91,7 @@ nsHttpTransaction::nsHttpTransaction() : mLock("transaction lock") , mRequestSize(0) , mConnection(nullptr) + , mConnInfo(nullptr) , mRequestHead(nullptr) , mResponseHead(nullptr) , mContentLength(-1) @@ -123,7 +124,6 @@ nsHttpTransaction::nsHttpTransaction() , mDispatchedAsBlocking(false) , mResponseTimeoutEnabled(true) , mDontRouteViaWildCard(false) - , mForceRestart(false) , mReportedStart(false) , mReportedResponseHeader(false) , mForTakeResponseHead(nullptr) @@ -848,11 +848,6 @@ nsHttpTransaction::Close(nsresult reason) // if (reason == NS_ERROR_NET_RESET || reason == NS_OK) { - if (mForceRestart && NS_SUCCEEDED(Restart())) { - LOG(("transaction force restarted\n")); - return; - } - // reallySentData is meant to separate the instances where data has // been sent by this transaction but buffered at a higher level while // a TLS session (perhaps via a tunnel) is setup. @@ -1115,17 +1110,6 @@ nsHttpTransaction::Restart() mCaps &= ~NS_HTTP_ALLOW_PIPELINING; SetPipelinePosition(0); - if (!mConnInfo->GetAuthenticationHost().IsEmpty()) { - MutexAutoLock lock(*nsHttp::GetLock()); - nsRefPtr ci; - mConnInfo->CloneAsDirectRoute(getter_AddRefs(ci)); - mConnInfo = ci; - if (mRequestHead) { - mRequestHead->SetHeader(nsHttp::Alternate_Service_Used, NS_LITERAL_CSTRING("0")); - } - } - mForceRestart = false; - return gHttpHandler->InitiateTransaction(this, mPriority); } @@ -1397,11 +1381,11 @@ nsHttpTransaction::ParseHead(char *buf, return NS_OK; } +// called on the socket thread nsresult nsHttpTransaction::HandleContentStart() { LOG(("nsHttpTransaction::HandleContentStart [this=%p]\n", this)); - MOZ_ASSERT(PR_GetCurrentThread() == gSocketThread); if (mResponseHead) { #if defined(PR_LOGGING) @@ -1445,14 +1429,6 @@ nsHttpTransaction::HandleContentStart() mNoContent = true; LOG(("this response should not contain a body.\n")); break; - case 421: - if (!mConnInfo->GetAuthenticationHost().IsEmpty()) { - LOG(("Not Authoritative.\n")); - gHttpHandler->ConnMgr()-> - ClearHostMapping(mConnInfo->GetHost(), mConnInfo->Port()); - mForceRestart = true; - } - break; } if (mResponseHead->Status() == 200 && diff --git a/netwerk/protocol/http/nsHttpTransaction.h b/netwerk/protocol/http/nsHttpTransaction.h index 6cae916d4b3..791b66f89c0 100644 --- a/netwerk/protocol/http/nsHttpTransaction.h +++ b/netwerk/protocol/http/nsHttpTransaction.h @@ -264,7 +264,6 @@ private: bool mDispatchedAsBlocking; bool mResponseTimeoutEnabled; bool mDontRouteViaWildCard; - bool mForceRestart; // mClosed := transaction has been explicitly closed // mTransactionDone := transaction ran to completion or was interrupted diff --git a/netwerk/socket/nsISSLSocketControl.idl b/netwerk/socket/nsISSLSocketControl.idl index 4bbf08af638..0288172dd0f 100644 --- a/netwerk/socket/nsISSLSocketControl.idl +++ b/netwerk/socket/nsISSLSocketControl.idl @@ -15,7 +15,7 @@ class nsCString; %} [ref] native nsCStringTArrayRef(nsTArray); -[scriptable, builtinclass, uuid(f160ec31-01f3-47f2-b542-0e12a647b07f)] +[scriptable, builtinclass, uuid(89b819dc-31b0-4d09-915a-66f8a3703483)] interface nsISSLSocketControl : nsISupports { attribute nsIInterfaceRequestor notificationCallbacks; @@ -53,11 +53,6 @@ interface nsISSLSocketControl : nsISupports { in ACString hostname, in long port); - /* Determine if existing connection should be trusted to convey information about - * a hostname. - */ - boolean isAcceptableForHost(in ACString hostname); - /* The Key Exchange Algorithm is used when determining whether or not to do false start and whether or not HTTP/2 can be used. @@ -108,26 +103,5 @@ interface nsISSLSocketControl : nsISupports { * the user or searching the set of rememebered user cert decisions. */ attribute nsIX509Cert clientCert; - - /** - * If you wish to verify the host certificate using a different name than - * was used for the tcp connection, but without using proxy semantics, you - * can set authenticationName and authenticationPort - */ - attribute ACString authenticationName; - [infallible] attribute long authenticationPort; - - /** - * set bypassAuthentication to true if the server certificate checks should - * not be enforced. This is to enable non-secure transport over TLS. - */ - [infallible] attribute boolean bypassAuthentication; - - /* - * failedVerification is true if any enforced certificate checks have failed. - * Connections that have not yet tried to verify, have verifications bypassed, - * or are using acceptable exceptions will all return false. - */ - [infallible] readonly attribute boolean failedVerification; }; diff --git a/netwerk/test/unit/test_http2.js b/netwerk/test/unit/test_http2.js index 5bf9284c708..885e5ea9466 100644 --- a/netwerk/test/unit/test_http2.js +++ b/netwerk/test/unit/test_http2.js @@ -335,53 +335,6 @@ function test_http2_post_big() { do_post(posts[1], chan, listener); } -Cu.import("resource://testing-common/httpd.js"); -var httpserv = null; -var ios = Components.classes["@mozilla.org/network/io-service;1"] - .getService(Components.interfaces.nsIIOService); - -var altsvcClientListener = { - onStartRequest: function test_onStartR(request, ctx) { - do_check_eq(request.status, Components.results.NS_OK); - }, - - onDataAvailable: function test_ODA(request, cx, stream, offset, cnt) { - read_stream(stream, cnt); - }, - - onStopRequest: function test_onStopR(request, ctx, status) { - var isHttp2Connection = checkIsHttp2(request); - if (!isHttp2Connection) { - // not over tls yet - retry. It's all async and transparent to client - var chan = ios.newChannel("http://localhost:" + httpserv.identity.primaryPort + "/altsvc1", - null, null).QueryInterface(Components.interfaces.nsIHttpChannel); - chan.asyncOpen(altsvcClientListener, null); - } else { - do_check_true(isHttp2Connection); - httpserv.stop(do_test_finished); - run_next_test(); - } - } -}; - -function altsvcHttp1Server(metadata, response) { - response.setStatusLine(metadata.httpVersion, 200, "OK"); - response.setHeader("Content-Type", "text/plain", false); - response.setHeader("Alt-Svc", 'h2=":6944"; ma=3200, h2-14=":6944"', false); - var body = "this is where a cool kid would write something neat.\n"; - response.bodyOutputStream.write(body, body.length); -} - -function test_http2_altsvc() { - httpserv = new HttpServer(); - httpserv.registerPathHandler("/altsvc1", altsvcHttp1Server); - httpserv.start(-1); - - var chan = ios.newChannel("http://localhost:" + httpserv.identity.primaryPort + "/altsvc1", - null, null).QueryInterface(Components.interfaces.nsIHttpChannel); - chan.asyncOpen(altsvcClientListener, null); -} - // hack - the header test resets the multiplex object on the server, // so make sure header is always run before the multiplex test. // @@ -393,7 +346,6 @@ var tests = [ test_http2_post_big , test_http2_push2 , test_http2_push3 , test_http2_push4 - , test_http2_altsvc , test_http2_doubleheader , test_http2_xhr , test_http2_header @@ -480,8 +432,6 @@ function resetPrefs() { prefs.setBoolPref("network.http.spdy.allow-push", spdypush); prefs.setBoolPref("network.http.spdy.enabled.http2draft", http2pref); prefs.setBoolPref("network.http.spdy.enforce-tls-profile", tlspref); - prefs.setBoolPref("network.http.altsvc.enabled", altsvcpref1); - prefs.setBoolPref("network.http.altsvc.oe", altsvcpref2); } function run_test() { @@ -504,16 +454,11 @@ function run_test() { spdypush = prefs.getBoolPref("network.http.spdy.allow-push"); http2pref = prefs.getBoolPref("network.http.spdy.enabled.http2draft"); tlspref = prefs.getBoolPref("network.http.spdy.enforce-tls-profile"); - altsvcpref1 = prefs.getBoolPref("network.http.altsvc.enabled"); - altsvcpref2 = prefs.getBoolPref("network.http.altsvc.oe", true); - prefs.setBoolPref("network.http.spdy.enabled", true); prefs.setBoolPref("network.http.spdy.enabled.v3", true); prefs.setBoolPref("network.http.spdy.allow-push", true); prefs.setBoolPref("network.http.spdy.enabled.http2draft", true); prefs.setBoolPref("network.http.spdy.enforce-tls-profile", false); - prefs.setBoolPref("network.http.altsvc.enabled", true); - prefs.setBoolPref("network.http.altsvc.oe", true); loadGroup = Cc["@mozilla.org/network/load-group;1"].createInstance(Ci.nsILoadGroup); diff --git a/security/manager/ssl/src/SSLServerCertVerification.cpp b/security/manager/ssl/src/SSLServerCertVerification.cpp index 224d008480b..41ee6b9fd83 100644 --- a/security/manager/ssl/src/SSLServerCertVerification.cpp +++ b/security/manager/ssl/src/SSLServerCertVerification.cpp @@ -400,16 +400,6 @@ CertErrorRunnable::CheckCertOverrides() mDefaultErrorCodeToReport); } - nsCOMPtr sslSocketControl = do_QueryInterface( - NS_ISUPPORTS_CAST(nsITransportSecurityInfo*, mInfoObject)); - if (sslSocketControl && - sslSocketControl->GetBypassAuthentication()) { - PR_LOG(gPIPNSSLog, PR_LOG_DEBUG, - ("[%p][%p] Bypass Auth in CheckCertOverrides\n", - mFdForLogging, this)); - return new SSLServerCertVerificationResult(mInfoObject, 0); - } - int32_t port; mInfoObject->GetPort(&port); @@ -500,6 +490,8 @@ CertErrorRunnable::CheckCertOverrides() // First, deliver the technical details of the broken SSL status. // Try to get a nsIBadCertListener2 implementation from the socket consumer. + nsCOMPtr sslSocketControl = do_QueryInterface( + NS_ISUPPORTS_CAST(nsITransportSecurityInfo*, mInfoObject)); if (sslSocketControl) { nsCOMPtr cb; sslSocketControl->GetNotificationCallbacks(getter_AddRefs(cb)); diff --git a/security/manager/ssl/src/nsNSSIOLayer.cpp b/security/manager/ssl/src/nsNSSIOLayer.cpp index 630885f8e81..7f915e7eb15 100644 --- a/security/manager/ssl/src/nsNSSIOLayer.cpp +++ b/security/manager/ssl/src/nsNSSIOLayer.cpp @@ -132,13 +132,11 @@ nsNSSSocketInfo::nsNSSSocketInfo(SharedSSLState& aState, uint32_t providerFlags) mJoined(false), mSentClientCert(false), mNotedTimeUntilReady(false), - mFailedVerification(false), mKEAUsed(nsISSLSocketControl::KEY_EXCHANGE_UNKNOWN), mKEAExpected(nsISSLSocketControl::KEY_EXCHANGE_UNKNOWN), mKEAKeyBits(0), mSSLVersionUsed(nsISSLSocketControl::SSL_VERSION_UNKNOWN), mMACAlgorithmUsed(nsISSLSocketControl::SSL_MAC_UNKNOWN), - mBypassAuthentication(false), mProviderFlags(providerFlags), mSocketCreationTimestamp(TimeStamp::Now()), mPlaintextBytesRead(0), @@ -228,52 +226,6 @@ nsNSSSocketInfo::SetClientCert(nsIX509Cert* aClientCert) return NS_OK; } -NS_IMETHODIMP -nsNSSSocketInfo::GetBypassAuthentication(bool* arg) -{ - *arg = mBypassAuthentication; - return NS_OK; -} - -NS_IMETHODIMP -nsNSSSocketInfo::SetBypassAuthentication(bool arg) -{ - mBypassAuthentication = arg; - return NS_OK; -} - -NS_IMETHODIMP -nsNSSSocketInfo::GetFailedVerification(bool* arg) -{ - *arg = mFailedVerification; - return NS_OK; -} - -NS_IMETHODIMP -nsNSSSocketInfo::GetAuthenticationName(nsACString& aAuthenticationName) -{ - aAuthenticationName = GetHostName(); - return NS_OK; -} - -NS_IMETHODIMP -nsNSSSocketInfo::SetAuthenticationName(const nsACString& aAuthenticationName) -{ - return SetHostName(PromiseFlatCString(aAuthenticationName).get()); -} - -NS_IMETHODIMP -nsNSSSocketInfo::GetAuthenticationPort(int32_t* aAuthenticationPort) -{ - return GetPort(aAuthenticationPort); -} - -NS_IMETHODIMP -nsNSSSocketInfo::SetAuthenticationPort(int32_t aAuthenticationPort) -{ - return SetPort(aAuthenticationPort); -} - NS_IMETHODIMP nsNSSSocketInfo::GetRememberClientAuthCertificate(bool* aRemember) { @@ -426,8 +378,21 @@ nsNSSSocketInfo::GetNegotiatedNPN(nsACString& aNegotiatedNPN) } NS_IMETHODIMP -nsNSSSocketInfo::IsAcceptableForHost(const nsACString& hostname, bool* _retval) +nsNSSSocketInfo::JoinConnection(const nsACString& npnProtocol, + const nsACString& hostname, + int32_t port, + bool* _retval) { + *_retval = false; + + // Different ports may not be joined together + if (port != GetPort()) + return NS_OK; + + // Make sure NPN has been completed and matches requested npnProtocol + if (!mNPNCompleted || !mNegotiatedNPN.Equals(npnProtocol)) + return NS_OK; + // If this is the same hostname then the certicate status does not // need to be considered. They are joinable. if (hostname.Equals(GetHostName())) { @@ -497,36 +462,12 @@ nsNSSSocketInfo::IsAcceptableForHost(const nsACString& hostname, bool* _retval) return NS_OK; } - // All tests pass + // All tests pass - this is joinable + mJoined = true; *_retval = true; return NS_OK; } -NS_IMETHODIMP -nsNSSSocketInfo::JoinConnection(const nsACString& npnProtocol, - const nsACString& hostname, - int32_t port, - bool* _retval) -{ - *_retval = false; - - // Different ports may not be joined together - if (port != GetPort()) - return NS_OK; - - // Make sure NPN has been completed and matches requested npnProtocol - if (!mNPNCompleted || !mNegotiatedNPN.Equals(npnProtocol)) - return NS_OK; - - IsAcceptableForHost(hostname, _retval); - - if (*_retval) { - // All tests pass - this is joinable - mJoined = true; - } - return NS_OK; -} - bool nsNSSSocketInfo::GetForSTARTTLS() { @@ -691,7 +632,6 @@ nsNSSSocketInfo::SetCertVerificationResult(PRErrorCode errorCode, } if (errorCode) { - mFailedVerification = true; SetCanceled(errorCode, errorMessageType); } diff --git a/security/manager/ssl/src/nsNSSIOLayer.h b/security/manager/ssl/src/nsNSSIOLayer.h index 6624881a8fe..1d3d735ded2 100644 --- a/security/manager/ssl/src/nsNSSIOLayer.h +++ b/security/manager/ssl/src/nsNSSIOLayer.h @@ -113,22 +113,6 @@ public: void SetMACAlgorithmUsed(int16_t mac) { mMACAlgorithmUsed = mac; } - inline bool GetBypassAuthentication() - { - bool result = false; - mozilla::DebugOnly rv = GetBypassAuthentication(&result); - MOZ_ASSERT(NS_SUCCEEDED(rv)); - return result; - } - - inline int32_t GetAuthenticationPort() - { - int32_t result = -1; - mozilla::DebugOnly rv = GetAuthenticationPort(&result); - MOZ_ASSERT(NS_SUCCEEDED(rv)); - return result; - } - protected: virtual ~nsNSSSocketInfo(); @@ -155,7 +139,6 @@ private: bool mJoined; bool mSentClientCert; bool mNotedTimeUntilReady; - bool mFailedVerification; // mKEA* are used in false start and http/2 detetermination // Values are from nsISSLSocketControl @@ -164,7 +147,6 @@ private: uint32_t mKEAKeyBits; int16_t mSSLVersionUsed; int16_t mMACAlgorithmUsed; - bool mBypassAuthentication; uint32_t mProviderFlags; mozilla::TimeStamp mSocketCreationTimestamp; diff --git a/toolkit/components/telemetry/Histograms.json b/toolkit/components/telemetry/Histograms.json index 29477f05d1f..af74dc30358 100644 --- a/toolkit/components/telemetry/Histograms.json +++ b/toolkit/components/telemetry/Histograms.json @@ -1193,16 +1193,6 @@ "kind": "boolean", "description": "Whether a HTTP base page load was over SSL or not." }, - "HTTP_TRANSACTION_USE_ALTSVC": { - "expires_in_version": "never", - "kind": "boolean", - "description": "Whether a HTTP transaction was routed via Alt-Svc or not." - }, - "HTTP_TRANSACTION_USE_ALTSVC_OE": { - "expires_in_version": "never", - "kind": "boolean", - "description": "Whether a HTTP transaction routed via Alt-Svc was scheme=http" - }, "SSL_HANDSHAKE_VERSION": { "expires_in_version": "never", "kind": "enumerated", From 7ca0b27d04c7e5fe9393db1dfd851b4f9c28073b Mon Sep 17 00:00:00 2001 From: Brian Hackett Date: Thu, 2 Oct 2014 19:32:39 -0700 Subject: [PATCH 030/146] Bug 1073842 - Add NativeObject subclass of JSObject, remove ObjectImpl, r=luke. --- js/public/RootingAPI.h | 28 + js/src/asmjs/AsmJSLink.cpp | 5 +- js/src/asmjs/AsmJSModule.cpp | 5 +- js/src/asmjs/AsmJSModule.h | 2 +- js/src/builtin/Intl.cpp | 30 +- js/src/builtin/MapObject.cpp | 27 +- js/src/builtin/MapObject.h | 4 +- js/src/builtin/RegExp.cpp | 14 +- js/src/builtin/SymbolObject.h | 5 +- js/src/builtin/TestingFunctions.cpp | 10 +- js/src/builtin/TypedObject.cpp | 72 +- js/src/builtin/TypedObject.h | 27 +- js/src/builtin/WeakSetObject.cpp | 6 +- js/src/builtin/WeakSetObject.h | 4 +- js/src/ctypes/CTypes.cpp | 6 +- js/src/frontend/BytecodeEmitter.cpp | 24 +- js/src/frontend/FullParseHandler.h | 2 +- js/src/frontend/ParseNode.cpp | 2 +- js/src/frontend/ParseNode.h | 4 +- js/src/frontend/Parser.cpp | 6 +- js/src/frontend/Parser.h | 2 +- js/src/frontend/SyntaxParseHandler.h | 2 +- js/src/gc/Barrier.cpp | 12 +- js/src/gc/Barrier.h | 10 +- js/src/gc/ForkJoinNursery.cpp | 54 +- js/src/gc/Heap.h | 2 +- js/src/gc/Marking.cpp | 54 +- js/src/gc/Marking.h | 9 + js/src/gc/Nursery.cpp | 60 +- js/src/gc/Nursery.h | 3 +- js/src/gc/Rooting.h | 7 + js/src/gc/StoreBuffer.cpp | 8 +- js/src/gc/Tracer.cpp | 2 +- js/src/gc/Tracer.h | 6 +- js/src/gc/Verifier.cpp | 2 +- js/src/gc/Zone.cpp | 2 +- js/src/jit/BaselineCompiler.cpp | 23 +- js/src/jit/BaselineIC.cpp | 125 +- js/src/jit/BaselineIC.h | 63 +- js/src/jit/BaselineInspector.cpp | 6 +- js/src/jit/BaselineInspector.h | 4 +- js/src/jit/CodeGenerator.cpp | 98 +- js/src/jit/CodeGenerator.h | 2 +- js/src/jit/IonAnalysis.cpp | 10 +- js/src/jit/IonAnalysis.h | 2 +- js/src/jit/IonBuilder.cpp | 20 +- js/src/jit/IonCaches.cpp | 125 +- js/src/jit/IonCaches.h | 15 +- js/src/jit/IonMacroAssembler.cpp | 34 +- js/src/jit/IonMacroAssembler.h | 14 +- js/src/jit/MCallOptimize.cpp | 14 +- js/src/jit/MIR.cpp | 6 +- js/src/jit/MIR.h | 88 +- js/src/jit/ParallelFunctions.cpp | 36 +- js/src/jit/ParallelFunctions.h | 4 +- js/src/jit/ParallelSafetyAnalysis.cpp | 4 +- js/src/jit/RangeAnalysis.cpp | 2 +- js/src/jit/Recover.cpp | 15 +- js/src/jit/VMFunctions.cpp | 39 +- js/src/jit/VMFunctions.h | 23 +- .../testDefinePropertyIgnoredAttributes.cpp | 2 +- js/src/jsapi-tests/testLookup.cpp | 2 +- js/src/jsapi-tests/testResolveRecursion.cpp | 4 +- js/src/jsapi.cpp | 56 +- js/src/jsarray.cpp | 250 ++-- js/src/jsarray.h | 7 +- js/src/jsbool.cpp | 2 +- js/src/jscntxt.cpp | 2 +- js/src/jscntxt.h | 6 +- js/src/jsdate.cpp | 4 +- js/src/jsfriendapi.cpp | 9 +- js/src/jsfun.cpp | 10 +- js/src/jsfun.h | 4 +- js/src/jsgc.cpp | 22 +- js/src/jsgc.h | 9 +- js/src/jsgcinlines.h | 2 +- js/src/jsinfer.cpp | 86 +- js/src/jsinfer.h | 22 +- js/src/jsinferinlines.h | 4 +- js/src/jsiter.cpp | 39 +- js/src/jsiter.h | 2 +- js/src/jsobj.cpp | 564 ++++---- js/src/jsobj.h | 945 ++++--------- js/src/jsobjinlines.h | 540 +------- js/src/json.cpp | 7 +- js/src/jsonparser.cpp | 6 +- js/src/jsscript.cpp | 45 +- js/src/jsscript.h | 12 +- js/src/jsstr.cpp | 22 +- js/src/jswatchpoint.cpp | 5 +- js/src/perf/jsperf.cpp | 2 +- js/src/proxy/Proxy.cpp | 6 +- js/src/proxy/ScriptedIndirectProxyHandler.cpp | 10 +- js/src/shell/js.cpp | 10 +- js/src/vm/ArgumentsObject.cpp | 37 +- js/src/vm/ArgumentsObject.h | 10 +- js/src/vm/ArrayBufferObject.cpp | 15 +- js/src/vm/ArrayBufferObject.h | 10 +- js/src/vm/ArrayObject-inl.h | 97 +- js/src/vm/ArrayObject.h | 41 +- js/src/vm/BooleanObject.h | 5 +- js/src/vm/DateObject.h | 2 +- js/src/vm/Debugger-inl.h | 2 +- js/src/vm/Debugger.cpp | 152 ++- js/src/vm/Debugger.h | 14 +- js/src/vm/DebuggerMemory.cpp | 9 +- js/src/vm/DebuggerMemory.h | 2 +- js/src/vm/ErrorObject.cpp | 15 +- js/src/vm/ErrorObject.h | 5 +- js/src/vm/GeneratorObject.h | 4 +- js/src/vm/GlobalObject.cpp | 34 +- js/src/vm/GlobalObject.h | 98 +- js/src/vm/Interpreter-inl.h | 21 +- js/src/vm/Interpreter.cpp | 32 +- js/src/vm/NumberObject.h | 2 +- js/src/vm/ObjectImpl-inl.h | 467 ++++++- js/src/vm/ObjectImpl.cpp | 59 +- js/src/vm/ObjectImpl.h | 1179 ++++++++++++----- js/src/vm/PIC.cpp | 25 +- js/src/vm/PIC.h | 12 +- js/src/vm/ProxyObject.cpp | 8 +- js/src/vm/ProxyObject.h | 13 +- js/src/vm/RegExpObject.cpp | 35 +- js/src/vm/RegExpObject.h | 8 +- js/src/vm/RegExpStatics.cpp | 12 +- js/src/vm/RegExpStatics.h | 3 +- js/src/vm/RegExpStaticsObject.h | 2 +- js/src/vm/Runtime.cpp | 4 +- js/src/vm/Runtime.h | 4 +- js/src/vm/SavedStacks.cpp | 17 +- js/src/vm/SavedStacks.h | 2 +- js/src/vm/ScopeObject-inl.h | 2 +- js/src/vm/ScopeObject.cpp | 50 +- js/src/vm/ScopeObject.h | 6 +- js/src/vm/SelfHosting.cpp | 46 +- js/src/vm/Shape-inl.h | 4 +- js/src/vm/Shape.cpp | 186 ++- js/src/vm/Shape.h | 5 +- js/src/vm/SharedArrayObject.cpp | 2 +- js/src/vm/SharedTypedArrayObject.cpp | 2 +- js/src/vm/SharedTypedArrayObject.h | 2 +- js/src/vm/Stack.cpp | 2 +- js/src/vm/StringObject-inl.h | 2 +- js/src/vm/StringObject.h | 2 +- js/src/vm/TypedArrayCommon.h | 42 +- js/src/vm/TypedArrayObject.cpp | 20 +- js/src/vm/TypedArrayObject.h | 8 +- js/src/vm/UbiNode.cpp | 2 +- js/src/vm/WeakMapObject.h | 2 +- js/src/vm/WrapperObject.h | 4 +- js/src/vm/Xdr.cpp | 2 +- js/src/vm/Xdr.h | 2 +- 152 files changed, 3671 insertions(+), 3187 deletions(-) diff --git a/js/public/RootingAPI.h b/js/public/RootingAPI.h index 5b7f76431fa..275d06005c2 100644 --- a/js/public/RootingAPI.h +++ b/js/public/RootingAPI.h @@ -885,6 +885,24 @@ class RootedBase JS::Handle as() const; }; +/* + * Augment the generic Handle interface when T = JSObject* with + * downcasting operations. + * + * Given a Handle obj, one can view + * Handle h = obj.as(); + * as an optimization of + * Rooted rooted(cx, &obj->as()); + * Handle h = rooted; + */ +template <> +class HandleBase +{ + public: + template + JS::Handle as() const; +}; + /* Interface substitute for Rooted which does not root the variable's memory. */ template class FakeRooted : public RootedBase @@ -1003,6 +1021,11 @@ template class MaybeRooted static inline JS::MutableHandle toMutableHandle(MutableHandleType v) { return v; } + + template + static inline JS::Handle downcastHandle(HandleType v) { + return v.template as(); + } }; template class MaybeRooted @@ -1019,6 +1042,11 @@ template class MaybeRooted static JS::MutableHandle toMutableHandle(MutableHandleType v) { MOZ_CRASH("Bad conversion"); } + + template + static inline T2* downcastHandle(HandleType v) { + return &v->template as(); + } }; } /* namespace js */ diff --git a/js/src/asmjs/AsmJSLink.cpp b/js/src/asmjs/AsmJSLink.cpp index 1e3ea00b38c..f99f8703e8e 100644 --- a/js/src/asmjs/AsmJSLink.cpp +++ b/js/src/asmjs/AsmJSLink.cpp @@ -44,6 +44,7 @@ #include "jsobjinlines.h" #include "vm/ArrayBufferObject-inl.h" +#include "vm/ObjectImpl-inl.h" using namespace js; using namespace js::jit; @@ -117,7 +118,7 @@ HasPureCoercion(JSContext *cx, HandleValue v) jsid toString = NameToId(cx->names().toString); if (v.toObject().is() && HasObjectValueOf(&v.toObject(), cx) && - ClassMethodIsNative(cx, &v.toObject(), &JSFunction::class_, toString, fun_toString)) + ClassMethodIsNative(cx, &v.toObject().as(), &JSFunction::class_, toString, fun_toString)) { return true; } @@ -897,7 +898,7 @@ CreateExportObject(JSContext *cx, Handle moduleObj) } gc::AllocKind allocKind = gc::GetGCObjectKind(module.numExportedFunctions()); - RootedObject obj(cx, NewBuiltinClassInstance(cx, &JSObject::class_, allocKind)); + RootedNativeObject obj(cx, NewNativeBuiltinClassInstance(cx, &JSObject::class_, allocKind)); if (!obj) return nullptr; diff --git a/js/src/asmjs/AsmJSModule.cpp b/js/src/asmjs/AsmJSModule.cpp index 270c9372bdf..6bcef9a799e 100644 --- a/js/src/asmjs/AsmJSModule.cpp +++ b/js/src/asmjs/AsmJSModule.cpp @@ -868,9 +868,10 @@ AsmJSModuleObject::create(ExclusiveContext *cx, ScopedJSDeletePtr * JSObject *obj = NewObjectWithGivenProto(cx, &AsmJSModuleObject::class_, nullptr, nullptr); if (!obj) return nullptr; + AsmJSModuleObject *nobj = &obj->as(); - obj->setReservedSlot(MODULE_SLOT, PrivateValue(module->forget())); - return &obj->as(); + nobj->setReservedSlot(MODULE_SLOT, PrivateValue(module->forget())); + return nobj; } AsmJSModule & diff --git a/js/src/asmjs/AsmJSModule.h b/js/src/asmjs/AsmJSModule.h index fc7fc5e8b69..4ea4c91c466 100644 --- a/js/src/asmjs/AsmJSModule.h +++ b/js/src/asmjs/AsmJSModule.h @@ -1437,7 +1437,7 @@ LookupAsmJSModuleInCache(ExclusiveContext *cx, // directly to user script) which manages the lifetime of an AsmJSModule. A // JSObject is necessary since we want LinkAsmJS/CallAsmJS JSFunctions to be // able to point to their module via their extended slots. -class AsmJSModuleObject : public JSObject +class AsmJSModuleObject : public NativeObject { static const unsigned MODULE_SLOT = 0; diff --git a/js/src/builtin/Intl.cpp b/js/src/builtin/Intl.cpp index c505366de58..252475fdf76 100644 --- a/js/src/builtin/Intl.cpp +++ b/js/src/builtin/Intl.cpp @@ -633,7 +633,7 @@ Collator(JSContext *cx, CallArgs args, bool construct) if (!obj) return false; - obj->setReservedSlot(UCOLLATOR_SLOT, PrivateValue(nullptr)); + obj->as().setReservedSlot(UCOLLATOR_SLOT, PrivateValue(nullptr)); } // 10.1.2.1 steps 1 and 2; 10.1.3.1 steps 1 and 2 @@ -669,7 +669,7 @@ js::intl_Collator(JSContext *cx, unsigned argc, Value *vp) static void collator_finalize(FreeOp *fop, JSObject *obj) { - UCollator *coll = static_cast(obj->getReservedSlot(UCOLLATOR_SLOT).toPrivate()); + UCollator *coll = static_cast(obj->as().getReservedSlot(UCOLLATOR_SLOT).toPrivate()); if (coll) ucol_close(coll); } @@ -732,7 +732,7 @@ InitCollatorClass(JSContext *cx, HandleObject Intl, Handle global bool GlobalObject::initCollatorProto(JSContext *cx, Handle global) { - RootedObject proto(cx, global->createBlankPrototype(cx, &CollatorClass)); + RootedNativeObject proto(cx, global->createBlankPrototype(cx, &CollatorClass)); if (!proto) return false; proto->setReservedSlot(UCOLLATOR_SLOT, PrivateValue(nullptr)); @@ -1006,12 +1006,12 @@ js::intl_CompareStrings(JSContext *cx, unsigned argc, Value *vp) bool isCollatorInstance = collator->getClass() == &CollatorClass; UCollator *coll; if (isCollatorInstance) { - coll = static_cast(collator->getReservedSlot(UCOLLATOR_SLOT).toPrivate()); + coll = static_cast(collator->as().getReservedSlot(UCOLLATOR_SLOT).toPrivate()); if (!coll) { coll = NewUCollator(cx, collator); if (!coll) return false; - collator->setReservedSlot(UCOLLATOR_SLOT, PrivateValue(coll)); + collator->as().setReservedSlot(UCOLLATOR_SLOT, PrivateValue(coll)); } } else { // There's no good place to cache the ICU collator for an object @@ -1122,7 +1122,7 @@ NumberFormat(JSContext *cx, CallArgs args, bool construct) if (!obj) return false; - obj->setReservedSlot(UNUMBER_FORMAT_SLOT, PrivateValue(nullptr)); + obj->as().setReservedSlot(UNUMBER_FORMAT_SLOT, PrivateValue(nullptr)); } // 11.1.2.1 steps 1 and 2; 11.1.3.1 steps 1 and 2 @@ -1160,7 +1160,7 @@ static void numberFormat_finalize(FreeOp *fop, JSObject *obj) { UNumberFormat *nf = - static_cast(obj->getReservedSlot(UNUMBER_FORMAT_SLOT).toPrivate()); + static_cast(obj->as().getReservedSlot(UNUMBER_FORMAT_SLOT).toPrivate()); if (nf) unum_close(nf); } @@ -1223,7 +1223,7 @@ InitNumberFormatClass(JSContext *cx, HandleObject Intl, Handle gl bool GlobalObject::initNumberFormatProto(JSContext *cx, Handle global) { - RootedObject proto(cx, global->createBlankPrototype(cx, &NumberFormatClass)); + RootedNativeObject proto(cx, global->createBlankPrototype(cx, &NumberFormatClass)); if (!proto) return false; proto->setReservedSlot(UNUMBER_FORMAT_SLOT, PrivateValue(nullptr)); @@ -1465,12 +1465,12 @@ js::intl_FormatNumber(JSContext *cx, unsigned argc, Value *vp) bool isNumberFormatInstance = numberFormat->getClass() == &NumberFormatClass; UNumberFormat *nf; if (isNumberFormatInstance) { - nf = static_cast(numberFormat->getReservedSlot(UNUMBER_FORMAT_SLOT).toPrivate()); + nf = static_cast(numberFormat->as().getReservedSlot(UNUMBER_FORMAT_SLOT).toPrivate()); if (!nf) { nf = NewUNumberFormat(cx, numberFormat); if (!nf) return false; - numberFormat->setReservedSlot(UNUMBER_FORMAT_SLOT, PrivateValue(nf)); + numberFormat->as().setReservedSlot(UNUMBER_FORMAT_SLOT, PrivateValue(nf)); } } else { // There's no good place to cache the ICU number format for an object @@ -1579,7 +1579,7 @@ DateTimeFormat(JSContext *cx, CallArgs args, bool construct) if (!obj) return false; - obj->setReservedSlot(UDATE_FORMAT_SLOT, PrivateValue(nullptr)); + obj->as().setReservedSlot(UDATE_FORMAT_SLOT, PrivateValue(nullptr)); } // 12.1.2.1 steps 1 and 2; 12.1.3.1 steps 1 and 2 @@ -1616,7 +1616,7 @@ js::intl_DateTimeFormat(JSContext *cx, unsigned argc, Value *vp) static void dateTimeFormat_finalize(FreeOp *fop, JSObject *obj) { - UDateFormat *df = static_cast(obj->getReservedSlot(UDATE_FORMAT_SLOT).toPrivate()); + UDateFormat *df = static_cast(obj->as().getReservedSlot(UDATE_FORMAT_SLOT).toPrivate()); if (df) udat_close(df); } @@ -1679,7 +1679,7 @@ InitDateTimeFormatClass(JSContext *cx, HandleObject Intl, Handle bool GlobalObject::initDateTimeFormatProto(JSContext *cx, Handle global) { - RootedObject proto(cx, global->createBlankPrototype(cx, &DateTimeFormatClass)); + RootedNativeObject proto(cx, global->createBlankPrototype(cx, &DateTimeFormatClass)); if (!proto) return false; proto->setReservedSlot(UDATE_FORMAT_SLOT, PrivateValue(nullptr)); @@ -1966,12 +1966,12 @@ js::intl_FormatDateTime(JSContext *cx, unsigned argc, Value *vp) bool isDateTimeFormatInstance = dateTimeFormat->getClass() == &DateTimeFormatClass; UDateFormat *df; if (isDateTimeFormatInstance) { - df = static_cast(dateTimeFormat->getReservedSlot(UDATE_FORMAT_SLOT).toPrivate()); + df = static_cast(dateTimeFormat->as().getReservedSlot(UDATE_FORMAT_SLOT).toPrivate()); if (!df) { df = NewUDateFormat(cx, dateTimeFormat); if (!df) return false; - dateTimeFormat->setReservedSlot(UDATE_FORMAT_SLOT, PrivateValue(df)); + dateTimeFormat->as().setReservedSlot(UDATE_FORMAT_SLOT, PrivateValue(df)); } } else { // There's no good place to cache the ICU date-time format for an object diff --git a/js/src/builtin/MapObject.cpp b/js/src/builtin/MapObject.cpp index 5b1dae3747a..44cf8b2043e 100644 --- a/js/src/builtin/MapObject.cpp +++ b/js/src/builtin/MapObject.cpp @@ -19,6 +19,8 @@ #include "jsobjinlines.h" +#include "vm/ObjectImpl-inl.h" + using namespace js; using mozilla::ArrayLength; @@ -840,7 +842,7 @@ HashableValue::mark(JSTracer *trc) const namespace { -class MapIteratorObject : public JSObject +class MapIteratorObject : public NativeObject { public: static const Class class_; @@ -901,8 +903,8 @@ GlobalObject::initMapIteratorProto(JSContext *cx, Handle global) JSObject *base = GlobalObject::getOrCreateIteratorPrototype(cx, global); if (!base) return false; - Rooted proto(cx, - NewObjectWithGivenProto(cx, &MapIteratorObject::class_, base, global)); + RootedNativeObject proto(cx, + NewNativeObjectWithGivenProto(cx, &MapIteratorObject::class_, base, global)); if (!proto) return false; proto->setSlot(MapIteratorObject::RangeSlot, PrivateValue(nullptr)); @@ -925,7 +927,7 @@ MapIteratorObject::create(JSContext *cx, HandleObject mapobj, ValueMap *data, if (!range) return nullptr; - JSObject *iterobj = NewObjectWithGivenProto(cx, &class_, proto, global); + NativeObject *iterobj = NewNativeObjectWithGivenProto(cx, &class_, proto, global); if (!iterobj) { js_delete(range); return nullptr; @@ -1044,7 +1046,7 @@ static JSObject * InitClass(JSContext *cx, Handle global, const Class *clasp, JSProtoKey key, Native construct, const JSPropertySpec *properties, const JSFunctionSpec *methods) { - Rooted proto(cx, global->createBlankPrototype(cx, clasp)); + RootedNativeObject proto(cx, global->createBlankPrototype(cx, clasp)); if (!proto) return nullptr; proto->setPrivate(nullptr); @@ -1198,7 +1200,7 @@ MapObject::set(JSContext *cx, HandleObject obj, HandleValue k, HandleValue v) MapObject* MapObject::create(JSContext *cx) { - RootedObject obj(cx, NewBuiltinClassInstance(cx, &class_)); + RootedNativeObject obj(cx, NewNativeBuiltinClassInstance(cx, &class_)); if (!obj) return nullptr; @@ -1278,7 +1280,7 @@ MapObject::construct(JSContext *cx, unsigned argc, Value *vp) bool MapObject::is(HandleValue v) { - return v.isObject() && v.toObject().hasClass(&class_) && v.toObject().getPrivate(); + return v.isObject() && v.toObject().hasClass(&class_) && v.toObject().as().getPrivate(); } #define ARG0_KEY(cx, args, key) \ @@ -1489,7 +1491,7 @@ js_InitMapClass(JSContext *cx, HandleObject obj) namespace { -class SetIteratorObject : public JSObject +class SetIteratorObject : public NativeObject { public: static const Class class_; @@ -1550,7 +1552,8 @@ GlobalObject::initSetIteratorProto(JSContext *cx, Handle global) JSObject *base = GlobalObject::getOrCreateIteratorPrototype(cx, global); if (!base) return false; - RootedObject proto(cx, NewObjectWithGivenProto(cx, &SetIteratorObject::class_, base, global)); + RootedNativeObject proto(cx, NewNativeObjectWithGivenProto(cx, &SetIteratorObject::class_, + base, global)); if (!proto) return false; proto->setSlot(SetIteratorObject::RangeSlot, PrivateValue(nullptr)); @@ -1573,7 +1576,7 @@ SetIteratorObject::create(JSContext *cx, HandleObject setobj, ValueSet *data, if (!range) return nullptr; - JSObject *iterobj = NewObjectWithGivenProto(cx, &class_, proto, global); + NativeObject *iterobj = NewNativeObjectWithGivenProto(cx, &class_, proto, global); if (!iterobj) { js_delete(range); return nullptr; @@ -1742,7 +1745,7 @@ SetObject::add(JSContext *cx, HandleObject obj, HandleValue k) SetObject* SetObject::create(JSContext *cx) { - RootedObject obj(cx, NewBuiltinClassInstance(cx, &class_)); + RootedNativeObject obj(cx, NewNativeBuiltinClassInstance(cx, &class_)); if (!obj) return nullptr; @@ -1812,7 +1815,7 @@ SetObject::construct(JSContext *cx, unsigned argc, Value *vp) bool SetObject::is(HandleValue v) { - return v.isObject() && v.toObject().hasClass(&class_) && v.toObject().getPrivate(); + return v.isObject() && v.toObject().hasClass(&class_) && v.toObject().as().getPrivate(); } ValueSet & diff --git a/js/src/builtin/MapObject.h b/js/src/builtin/MapObject.h index 4b000c1eacb..108bb2e2b27 100644 --- a/js/src/builtin/MapObject.h +++ b/js/src/builtin/MapObject.h @@ -85,7 +85,7 @@ typedef OrderedHashSet ValueSet; -class MapObject : public JSObject { +class MapObject : public NativeObject { public: enum IteratorKind { Keys, Values, Entries }; @@ -130,7 +130,7 @@ class MapObject : public JSObject { static bool clear(JSContext *cx, unsigned argc, Value *vp); }; -class SetObject : public JSObject { +class SetObject : public NativeObject { public: enum IteratorKind { Values, Entries }; static JSObject *initClass(JSContext *cx, JSObject *obj); diff --git a/js/src/builtin/RegExp.cpp b/js/src/builtin/RegExp.cpp index 27599a09037..c46cf1db23c 100644 --- a/js/src/builtin/RegExp.cpp +++ b/js/src/builtin/RegExp.cpp @@ -16,6 +16,8 @@ #include "jsobjinlines.h" +#include "vm/ObjectImpl-inl.h" + using namespace js; using namespace js::types; @@ -46,7 +48,7 @@ js::CreateRegExpMatchResult(JSContext *cx, HandleString input, const MatchPairs size_t numPairs = matches.length(); MOZ_ASSERT(numPairs > 0); - RootedObject arr(cx, NewDenseFullyAllocatedArrayWithTemplate(cx, numPairs, templateObject)); + RootedArrayObject arr(cx, NewDenseFullyAllocatedArrayWithTemplate(cx, numPairs, templateObject)); if (!arr) return false; @@ -71,21 +73,21 @@ js::CreateRegExpMatchResult(JSContext *cx, HandleString input, const MatchPairs } /* Set the |index| property. (TemplateObject positions it in slot 0) */ - arr->nativeSetSlot(0, Int32Value(matches[0].start)); + arr->setSlot(0, Int32Value(matches[0].start)); /* Set the |input| property. (TemplateObject positions it in slot 1) */ - arr->nativeSetSlot(1, StringValue(input)); + arr->setSlot(1, StringValue(input)); #ifdef DEBUG RootedValue test(cx); RootedId id(cx, NameToId(cx->names().index)); if (!baseops::GetProperty(cx, arr, id, &test)) return false; - MOZ_ASSERT(test == arr->nativeGetSlot(0)); + MOZ_ASSERT(test == arr->getSlot(0)); id = NameToId(cx->names().input); if (!baseops::GetProperty(cx, arr, id, &test)) return false; - MOZ_ASSERT(test == arr->nativeGetSlot(1)); + MOZ_ASSERT(test == arr->getSlot(1)); #endif rval.setObject(*arr); @@ -508,7 +510,7 @@ js_InitRegExpClass(JSContext *cx, HandleObject obj) Rooted global(cx, &obj->as()); - RootedObject proto(cx, global->createBlankPrototype(cx, &RegExpObject::class_)); + RootedNativeObject proto(cx, global->createBlankPrototype(cx, &RegExpObject::class_)); if (!proto) return nullptr; proto->setPrivate(nullptr); diff --git a/js/src/builtin/SymbolObject.h b/js/src/builtin/SymbolObject.h index c61620d025b..f1e846d7562 100644 --- a/js/src/builtin/SymbolObject.h +++ b/js/src/builtin/SymbolObject.h @@ -7,13 +7,12 @@ #ifndef builtin_SymbolObject_h #define builtin_SymbolObject_h -#include "jsobj.h" - +#include "vm/ObjectImpl.h" #include "vm/Symbol.h" namespace js { -class SymbolObject : public JSObject +class SymbolObject : public NativeObject { /* Stores this Symbol object's [[PrimitiveValue]]. */ static const unsigned PRIMITIVE_VALUE_SLOT = 0; diff --git a/js/src/builtin/TestingFunctions.cpp b/js/src/builtin/TestingFunctions.cpp index e52900c5785..fe30fb800d2 100644 --- a/js/src/builtin/TestingFunctions.cpp +++ b/js/src/builtin/TestingFunctions.cpp @@ -36,6 +36,8 @@ #include "jscntxtinlines.h" #include "jsobjinlines.h" +#include "vm/ObjectImpl-inl.h" + using namespace js; using namespace JS; @@ -1369,7 +1371,7 @@ SetIonCheckGraphCoherency(JSContext *cx, unsigned argc, jsval *vp) return true; } -class CloneBufferObject : public JSObject { +class CloneBufferObject : public NativeObject { static const JSPropertySpec props_[2]; static const size_t DATA_SLOT = 0; static const size_t LENGTH_SLOT = 1; @@ -1382,8 +1384,8 @@ class CloneBufferObject : public JSObject { RootedObject obj(cx, JS_NewObject(cx, Jsvalify(&class_), JS::NullPtr(), JS::NullPtr())); if (!obj) return nullptr; - obj->setReservedSlot(DATA_SLOT, PrivateValue(nullptr)); - obj->setReservedSlot(LENGTH_SLOT, Int32Value(0)); + obj->as().setReservedSlot(DATA_SLOT, PrivateValue(nullptr)); + obj->as().setReservedSlot(LENGTH_SLOT, Int32Value(0)); if (!JS_DefineProperties(cx, obj, props_)) return nullptr; @@ -1976,7 +1978,7 @@ FindPath(JSContext *cx, unsigned argc, jsval *vp) // // { node: undefined, edge: } size_t length = nodes.length(); - RootedObject result(cx, NewDenseFullyAllocatedArray(cx, length)); + RootedArrayObject result(cx, NewDenseFullyAllocatedArray(cx, length)); if (!result) return false; result->ensureDenseInitializedLength(cx, 0, length); diff --git a/js/src/builtin/TypedObject.cpp b/js/src/builtin/TypedObject.cpp index 513e43ff746..878531fd6f5 100644 --- a/js/src/builtin/TypedObject.cpp +++ b/js/src/builtin/TypedObject.cpp @@ -25,6 +25,7 @@ #include "jsatominlines.h" #include "jsobjinlines.h" +#include "vm/ObjectImpl-inl.h" #include "vm/Shape-inl.h" using mozilla::AssertedCast; @@ -1123,21 +1124,19 @@ StructMetaTypeDescr::construct(JSContext *cx, unsigned int argc, Value *vp) size_t StructTypeDescr::fieldCount() const { - return getReservedSlot(JS_DESCR_SLOT_STRUCT_FIELD_NAMES).toObject().getDenseInitializedLength(); + return fieldInfoObject(JS_DESCR_SLOT_STRUCT_FIELD_NAMES).getDenseInitializedLength(); } size_t StructTypeDescr::maybeForwardedFieldCount() const { - JSObject *fieldNames = - MaybeForwarded(&getReservedSlot(JS_DESCR_SLOT_STRUCT_FIELD_NAMES).toObject()); - return fieldNames->getDenseInitializedLength(); + return maybeForwardedFieldInfoObject(JS_DESCR_SLOT_STRUCT_FIELD_NAMES).getDenseInitializedLength(); } bool StructTypeDescr::fieldIndex(jsid id, size_t *out) const { - JSObject &fieldNames = getReservedSlot(JS_DESCR_SLOT_STRUCT_FIELD_NAMES).toObject(); + NativeObject &fieldNames = fieldInfoObject(JS_DESCR_SLOT_STRUCT_FIELD_NAMES); size_t l = fieldNames.getDenseInitializedLength(); for (size_t i = 0; i < l; i++) { JSAtom &a = fieldNames.getDenseElement(i).toString()->asAtom(); @@ -1152,15 +1151,13 @@ StructTypeDescr::fieldIndex(jsid id, size_t *out) const JSAtom & StructTypeDescr::fieldName(size_t index) const { - JSObject &fieldNames = getReservedSlot(JS_DESCR_SLOT_STRUCT_FIELD_NAMES).toObject(); - return fieldNames.getDenseElement(index).toString()->asAtom(); + return fieldInfoObject(JS_DESCR_SLOT_STRUCT_FIELD_NAMES).getDenseElement(index).toString()->asAtom(); } size_t StructTypeDescr::fieldOffset(size_t index) const { - JSObject &fieldOffsets = - getReservedSlot(JS_DESCR_SLOT_STRUCT_FIELD_OFFSETS).toObject(); + NativeObject &fieldOffsets = fieldInfoObject(JS_DESCR_SLOT_STRUCT_FIELD_OFFSETS); MOZ_ASSERT(index < fieldOffsets.getDenseInitializedLength()); return AssertedCast(fieldOffsets.getDenseElement(index).toInt32()); } @@ -1168,8 +1165,7 @@ StructTypeDescr::fieldOffset(size_t index) const size_t StructTypeDescr::maybeForwardedFieldOffset(size_t index) const { - JSObject &fieldOffsets = - *MaybeForwarded(&getReservedSlot(JS_DESCR_SLOT_STRUCT_FIELD_OFFSETS).toObject()); + NativeObject &fieldOffsets = maybeForwardedFieldInfoObject(JS_DESCR_SLOT_STRUCT_FIELD_OFFSETS); MOZ_ASSERT(index < fieldOffsets.getDenseInitializedLength()); return AssertedCast(fieldOffsets.getDenseElement(index).toInt32()); } @@ -1177,8 +1173,7 @@ StructTypeDescr::maybeForwardedFieldOffset(size_t index) const SizedTypeDescr& StructTypeDescr::fieldDescr(size_t index) const { - JSObject &fieldDescrs = - getReservedSlot(JS_DESCR_SLOT_STRUCT_FIELD_TYPES).toObject(); + NativeObject &fieldDescrs = fieldInfoObject(JS_DESCR_SLOT_STRUCT_FIELD_TYPES); MOZ_ASSERT(index < fieldDescrs.getDenseInitializedLength()); return fieldDescrs.getDenseElement(index).toObject().as(); } @@ -1186,8 +1181,7 @@ StructTypeDescr::fieldDescr(size_t index) const SizedTypeDescr& StructTypeDescr::maybeForwardedFieldDescr(size_t index) const { - JSObject &fieldDescrs = - *MaybeForwarded(&getReservedSlot(JS_DESCR_SLOT_STRUCT_FIELD_TYPES).toObject()); + NativeObject &fieldDescrs = maybeForwardedFieldInfoObject(JS_DESCR_SLOT_STRUCT_FIELD_TYPES); MOZ_ASSERT(index < fieldDescrs.getDenseInitializedLength()); JSObject &descr = *MaybeForwarded(&fieldDescrs.getDenseElement(index).toObject()); @@ -1301,7 +1295,7 @@ template static JSObject * DefineMetaTypeDescr(JSContext *cx, Handle global, - HandleObject module, + HandleNativeObject module, TypedObjectModuleObject::Slot protoSlot) { RootedAtom className(cx, Atomize(cx, T::class_.name, @@ -1470,7 +1464,7 @@ TypedObject::offset() const { if (is()) return 0; - return getReservedSlot(JS_BUFVIEW_SLOT_BYTEOFFSET).toInt32(); + return fakeNativeGetReservedSlot(JS_BUFVIEW_SLOT_BYTEOFFSET).toInt32(); } int32_t @@ -1586,10 +1580,10 @@ OutlineTypedObject::createUnattachedWithClass(JSContext *cx, if (!obj) return nullptr; - obj->initPrivate(nullptr); - obj->initReservedSlot(JS_BUFVIEW_SLOT_BYTEOFFSET, Int32Value(0)); - obj->initReservedSlot(JS_BUFVIEW_SLOT_LENGTH, Int32Value(length)); - obj->initReservedSlot(JS_BUFVIEW_SLOT_OWNER, NullValue()); + obj->fakeNativeInitPrivate(nullptr); + obj->fakeNativeInitReservedSlot(JS_BUFVIEW_SLOT_BYTEOFFSET, Int32Value(0)); + obj->fakeNativeInitReservedSlot(JS_BUFVIEW_SLOT_LENGTH, Int32Value(length)); + obj->fakeNativeInitReservedSlot(JS_BUFVIEW_SLOT_OWNER, NullValue()); return &obj->as(); } @@ -1603,9 +1597,9 @@ OutlineTypedObject::attach(JSContext *cx, ArrayBufferObject &buffer, int32_t off if (!buffer.addView(cx, this)) CrashAtUnhandlableOOM("TypedObject::attach"); - InitArrayBufferViewDataPointer(this, &buffer, offset); - setReservedSlot(JS_BUFVIEW_SLOT_BYTEOFFSET, Int32Value(offset)); - setReservedSlot(JS_BUFVIEW_SLOT_OWNER, ObjectValue(buffer)); + fakeNativeInitPrivate(buffer.dataPointer() + offset); + fakeNativeSetReservedSlot(JS_BUFVIEW_SLOT_BYTEOFFSET, Int32Value(offset)); + fakeNativeSetReservedSlot(JS_BUFVIEW_SLOT_OWNER, ObjectValue(buffer)); } void @@ -1623,11 +1617,11 @@ OutlineTypedObject::attach(JSContext *cx, TypedObject &typedObj, int32_t offset) attach(cx, owner->as(), offset); } else { MOZ_ASSERT(owner->is()); - initPrivate(owner->as().inlineTypedMem() + offset); + fakeNativeInitPrivate(owner->as().inlineTypedMem() + offset); PostBarrierTypedArrayObject(this); - setReservedSlot(JS_BUFVIEW_SLOT_BYTEOFFSET, Int32Value(offset)); - setReservedSlot(JS_BUFVIEW_SLOT_OWNER, ObjectValue(*owner)); + fakeNativeSetReservedSlot(JS_BUFVIEW_SLOT_BYTEOFFSET, Int32Value(offset)); + fakeNativeSetReservedSlot(JS_BUFVIEW_SLOT_OWNER, ObjectValue(*owner)); } } @@ -1767,7 +1761,7 @@ OutlineTypedObject::obj_trace(JSTracer *trc, JSObject *object) // Mark the owner, watching in case it is moved by the tracer. JSObject *oldOwner = typedObj.maybeOwner(); - gc::MarkSlot(trc, &typedObj.getFixedSlotRef(JS_BUFVIEW_SLOT_OWNER), "typed object owner"); + gc::MarkSlot(trc, &typedObj.fakeNativeGetSlotRef(JS_BUFVIEW_SLOT_OWNER), "typed object owner"); JSObject *owner = typedObj.maybeOwner(); uint8_t *mem = typedObj.outOfLineTypedMem(); @@ -1779,7 +1773,7 @@ OutlineTypedObject::obj_trace(JSTracer *trc, JSObject *object) owner->as().hasInlineData())) { mem += reinterpret_cast(owner) - reinterpret_cast(oldOwner); - typedObj.setPrivate(mem); + typedObj.fakeNativeSetPrivate(mem); } if (!descr.opaque() || !typedObj.maybeForwardedIsAttached()) @@ -2328,7 +2322,7 @@ TypedObject::obj_enumerate(JSContext *cx, HandleObject obj, JSIterateOp enum_op, /* static */ size_t OutlineTypedObject::offsetOfOwnerSlot() { - return JSObject::getFixedSlotOffset(JS_BUFVIEW_SLOT_OWNER); + return NativeObject::getFixedSlotOffset(JS_BUFVIEW_SLOT_OWNER); } /* static */ size_t @@ -2343,21 +2337,21 @@ OutlineTypedObject::offsetOfDataSlot() MOZ_ASSERT(DATA_SLOT == nfixed - 1); #endif - return JSObject::getPrivateDataOffset(DATA_SLOT); + return NativeObject::getPrivateDataOffset(DATA_SLOT); } /* static */ size_t OutlineTypedObject::offsetOfByteOffsetSlot() { - return JSObject::getFixedSlotOffset(JS_BUFVIEW_SLOT_BYTEOFFSET); + return NativeObject::getFixedSlotOffset(JS_BUFVIEW_SLOT_BYTEOFFSET); } void OutlineTypedObject::neuter(void *newData) { - setSlot(JS_BUFVIEW_SLOT_LENGTH, Int32Value(0)); - setSlot(JS_BUFVIEW_SLOT_BYTEOFFSET, Int32Value(0)); - setPrivate(newData); + fakeNativeSetSlot(JS_BUFVIEW_SLOT_LENGTH, Int32Value(0)); + fakeNativeSetSlot(JS_BUFVIEW_SLOT_BYTEOFFSET, Int32Value(0)); + fakeNativeSetPrivate(newData); } /****************************************************************************** @@ -2383,14 +2377,14 @@ InlineOpaqueTypedObject::create(JSContext *cx, HandleTypeDescr descr) uint8_t * InlineOpaqueTypedObject::inlineTypedMem() const { - return fixedData(0); + return fakeNativeFixedData(0); } /* static */ size_t InlineOpaqueTypedObject::offsetOfDataStart() { - return getFixedSlotOffset(0); + return NativeObject::getFixedSlotOffset(0); } /* static */ void @@ -2843,8 +2837,8 @@ js::SetTypedObjectOffset(ThreadSafeContext *, unsigned argc, Value *vp) MOZ_ASSERT(typedObj.isAttached()); int32_t oldOffset = typedObj.offset(); - typedObj.setPrivate((typedObj.typedMem() - oldOffset) + offset); - typedObj.setReservedSlot(JS_BUFVIEW_SLOT_BYTEOFFSET, Int32Value(offset)); + typedObj.fakeNativeSetPrivate((typedObj.typedMem() - oldOffset) + offset); + typedObj.fakeNativeSetReservedSlot(JS_BUFVIEW_SLOT_BYTEOFFSET, Int32Value(offset)); args.rval().setUndefined(); return true; } diff --git a/js/src/builtin/TypedObject.h b/js/src/builtin/TypedObject.h index de8f8389e7b..3046965d860 100644 --- a/js/src/builtin/TypedObject.h +++ b/js/src/builtin/TypedObject.h @@ -144,7 +144,7 @@ class SizedTypedProto; * type descriptor. Eventually will carry most of the type information * we want. */ -class TypedProto : public JSObject +class TypedProto : public NativeObject { public: static const Class class_; @@ -162,7 +162,7 @@ class TypedProto : public JSObject inline type::Kind kind() const; }; -class TypeDescr : public JSObject +class TypeDescr : public NativeObject { public: // This is *intentionally* not defined so as to produce link @@ -509,6 +509,15 @@ class StructTypeDescr : public ComplexTypeDescr // Return the offset of the field at index `index`. size_t fieldOffset(size_t index) const; size_t maybeForwardedFieldOffset(size_t index) const; + + private: + NativeObject &fieldInfoObject(size_t slot) const { + return getReservedSlot(slot).toObject().as(); + } + + NativeObject &maybeForwardedFieldInfoObject(size_t slot) const { + return *MaybeForwarded(&fieldInfoObject(slot)); + } }; typedef Handle HandleStructTypeDescr; @@ -518,7 +527,7 @@ typedef Handle HandleStructTypeDescr; * somewhat, rather than sticking them all into the global object. * Eventually it will go away and become a module. */ -class TypedObjectModuleObject : public JSObject { +class TypedObjectModuleObject : public NativeObject { public: enum Slot { ArrayTypePrototype, @@ -530,7 +539,7 @@ class TypedObjectModuleObject : public JSObject { }; /* Base type for transparent and opaque typed objects. */ -class TypedObject : public ArrayBufferViewObject +class TypedObject : public JSObject { private: static const bool IsTypedObjectClass = true; @@ -690,19 +699,19 @@ class OutlineTypedObject : public TypedObject static size_t offsetOfByteOffsetSlot(); JSObject &owner() const { - return getReservedSlot(JS_BUFVIEW_SLOT_OWNER).toObject(); + return fakeNativeGetReservedSlot(JS_BUFVIEW_SLOT_OWNER).toObject(); } JSObject *maybeOwner() const { - return getReservedSlot(JS_BUFVIEW_SLOT_OWNER).toObjectOrNull(); + return fakeNativeGetReservedSlot(JS_BUFVIEW_SLOT_OWNER).toObjectOrNull(); } uint8_t *outOfLineTypedMem() const { - return static_cast(getPrivate(DATA_SLOT)); + return static_cast(fakeNativeGetPrivate(DATA_SLOT)); } int32_t length() const { - return getReservedSlot(JS_BUFVIEW_SLOT_LENGTH).toInt32(); + return fakeNativeGetReservedSlot(JS_BUFVIEW_SLOT_LENGTH).toInt32(); } // Helper for createUnattached() @@ -763,7 +772,7 @@ class InlineOpaqueTypedObject : public TypedObject public: static const Class class_; - static const size_t MaximumSize = JSObject::MAX_FIXED_SLOTS * sizeof(Value); + static const size_t MaximumSize = NativeObject::MAX_FIXED_SLOTS * sizeof(Value); static gc::AllocKind allocKindForTypeDescriptor(TypeDescr *descr) { size_t nbytes = descr->as().size(); diff --git a/js/src/builtin/WeakSetObject.cpp b/js/src/builtin/WeakSetObject.cpp index dbaf1f8a5d9..3222186b735 100644 --- a/js/src/builtin/WeakSetObject.cpp +++ b/js/src/builtin/WeakSetObject.cpp @@ -15,6 +15,8 @@ #include "jsobjinlines.h" +#include "vm/ObjectImpl-inl.h" + using namespace js; using namespace JS; @@ -48,7 +50,7 @@ WeakSetObject::initClass(JSContext *cx, JSObject *obj) { Rooted global(cx, &obj->as()); // Todo: WeakSet.prototype should not be a WeakSet! - Rooted proto(cx, global->createBlankPrototype(cx, &class_)); + RootedNativeObject proto(cx, global->createBlankPrototype(cx, &class_)); if (!proto) return nullptr; proto->setReservedSlot(WEAKSET_MAP_SLOT, UndefinedValue()); @@ -67,7 +69,7 @@ WeakSetObject::initClass(JSContext *cx, JSObject *obj) WeakSetObject* WeakSetObject::create(JSContext *cx) { - RootedObject obj(cx, NewBuiltinClassInstance(cx, &class_)); + RootedNativeObject obj(cx, NewNativeBuiltinClassInstance(cx, &class_)); if (!obj) return nullptr; diff --git a/js/src/builtin/WeakSetObject.h b/js/src/builtin/WeakSetObject.h index b45a514a50c..a4f9ae33a44 100644 --- a/js/src/builtin/WeakSetObject.h +++ b/js/src/builtin/WeakSetObject.h @@ -7,11 +7,11 @@ #ifndef builtin_WeakSetObject_h #define builtin_WeakSetObject_h -#include "jsobj.h" +#include "vm/ObjectImpl.h" namespace js { -class WeakSetObject : public JSObject +class WeakSetObject : public NativeObject { public: static const unsigned RESERVED_SLOTS = 1; diff --git a/js/src/ctypes/CTypes.cpp b/js/src/ctypes/CTypes.cpp index c38be3ee59b..5a61f9a8569 100644 --- a/js/src/ctypes/CTypes.cpp +++ b/js/src/ctypes/CTypes.cpp @@ -3393,14 +3393,14 @@ void CType::Trace(JSTracer* trc, JSObject* obj) { // Make sure our TypeCode slot is legit. If it's not, bail. - jsval slot = obj->getSlot(SLOT_TYPECODE); + jsval slot = obj->as().getSlot(SLOT_TYPECODE); if (slot.isUndefined()) return; // The contents of our slots depends on what kind of type we are. switch (TypeCode(slot.toInt32())) { case TYPE_struct: { - slot = obj->getReservedSlot(SLOT_FIELDINFO); + slot = obj->as().getReservedSlot(SLOT_FIELDINFO); if (slot.isUndefined()) return; @@ -3417,7 +3417,7 @@ CType::Trace(JSTracer* trc, JSObject* obj) } case TYPE_function: { // Check if we have a FunctionInfo. - slot = obj->getReservedSlot(SLOT_FNINFO); + slot = obj->as().getReservedSlot(SLOT_FNINFO); if (slot.isUndefined()) return; diff --git a/js/src/frontend/BytecodeEmitter.cpp b/js/src/frontend/BytecodeEmitter.cpp index a1bedbb650c..c3986fe68ff 100644 --- a/js/src/frontend/BytecodeEmitter.cpp +++ b/js/src/frontend/BytecodeEmitter.cpp @@ -39,6 +39,7 @@ #include "frontend/ParseMaps-inl.h" #include "frontend/ParseNode-inl.h" +#include "vm/ObjectImpl-inl.h" #include "vm/ScopeObject-inl.h" using namespace js; @@ -2166,9 +2167,9 @@ IteratorResultShape(ExclusiveContext *cx, BytecodeEmitter *bce, unsigned *shape) { MOZ_ASSERT(bce->script->compileAndGo()); - RootedObject obj(cx); + RootedNativeObject obj(cx); gc::AllocKind kind = GuessObjectGCKind(2); - obj = NewBuiltinClassInstance(cx, &JSObject::class_, kind); + obj = NewNativeBuiltinClassInstance(cx, &JSObject::class_, kind); if (!obj) return false; @@ -4065,7 +4066,7 @@ ParseNode::getConstantValue(ExclusiveContext *cx, AllowConstantObjects allowObje pn = pn_head; } - RootedObject obj(cx, NewDenseFullyAllocatedArray(cx, count, nullptr, MaybeSingletonObject)); + RootedArrayObject obj(cx, NewDenseFullyAllocatedArray(cx, count, nullptr, MaybeSingletonObject)); if (!obj) return false; @@ -4094,7 +4095,8 @@ ParseNode::getConstantValue(ExclusiveContext *cx, AllowConstantObjects allowObje allowObjects = DontAllowObjects; gc::AllocKind kind = GuessObjectGCKind(pn_count); - RootedObject obj(cx, NewBuiltinClassInstance(cx, &JSObject::class_, kind, MaybeSingletonObject)); + RootedNativeObject obj(cx, NewNativeBuiltinClassInstance(cx, &JSObject::class_, + kind, MaybeSingletonObject)); if (!obj) return false; @@ -4157,7 +4159,7 @@ EmitSingletonInitialiser(ExclusiveContext *cx, BytecodeEmitter *bce, ParseNode * if (!pn->getConstantValue(cx, ParseNode::AllowObjects, &value)) return false; - RootedObject obj(cx, &value.toObject()); + RootedNativeObject obj(cx, &value.toObject().as()); if (!obj->is() && !JSObject::setSingletonType(cx, obj)) return false; @@ -4177,7 +4179,7 @@ EmitCallSiteObject(ExclusiveContext *cx, BytecodeEmitter *bce, ParseNode *pn) MOZ_ASSERT(value.isObject()); - ObjectBox *objbox1 = bce->parser->newObjectBox(&value.toObject()); + ObjectBox *objbox1 = bce->parser->newObjectBox(&value.toObject().as()); if (!objbox1) return false; @@ -4186,7 +4188,7 @@ EmitCallSiteObject(ExclusiveContext *cx, BytecodeEmitter *bce, ParseNode *pn) MOZ_ASSERT(value.isObject()); - ObjectBox *objbox2 = bce->parser->newObjectBox(&value.toObject()); + ObjectBox *objbox2 = bce->parser->newObjectBox(&value.toObject().as()); if (!objbox2) return false; @@ -6219,10 +6221,10 @@ EmitObject(ExclusiveContext *cx, BytecodeEmitter *bce, ParseNode *pn) * Try to construct the shape of the object as we go, so we can emit a * JSOP_NEWOBJECT with the final shape instead. */ - RootedObject obj(cx); + RootedNativeObject obj(cx); if (bce->script->compileAndGo()) { gc::AllocKind kind = GuessObjectGCKind(pn->pn_count); - obj = NewBuiltinClassInstance(cx, &JSObject::class_, kind, TenuredObject); + obj = NewNativeBuiltinClassInstance(cx, &JSObject::class_, kind, TenuredObject); if (!obj) return false; } @@ -6904,7 +6906,7 @@ frontend::EmitTree(ExclusiveContext *cx, BytecodeEmitter *bce, ParseNode *pn) // for the template is accurate. We don't do this here as we // want to use types::InitObject, which requires a finished // script. - JSObject *obj = &value.toObject(); + NativeObject *obj = &value.toObject().as(); if (!ObjectElements::MakeElementsCopyOnWrite(cx, obj)) return false; @@ -7278,7 +7280,7 @@ CGObjectList::finish(ObjectArray *array) MOZ_ASSERT(length <= INDEX_LIMIT); MOZ_ASSERT(length == array->length); - js::HeapPtrObject *cursor = array->vector + array->length; + js::HeapPtrNativeObject *cursor = array->vector + array->length; ObjectBox *objbox = lastbox; do { --cursor; diff --git a/js/src/frontend/FullParseHandler.h b/js/src/frontend/FullParseHandler.h index 0cf427fe7ea..a7c16bf246b 100644 --- a/js/src/frontend/FullParseHandler.h +++ b/js/src/frontend/FullParseHandler.h @@ -172,7 +172,7 @@ class FullParseHandler // Specifically, a Boxer has a .newObjectBox(T) method that accepts a // Rooted argument and returns an ObjectBox*. template - ParseNode *newRegExp(HandleObject reobj, const TokenPos &pos, Boxer &boxer) { + ParseNode *newRegExp(RegExpObject *reobj, const TokenPos &pos, Boxer &boxer) { ObjectBox *objbox = boxer.newObjectBox(reobj); if (!objbox) return null(); diff --git a/js/src/frontend/ParseNode.cpp b/js/src/frontend/ParseNode.cpp index 32d737613d0..796a09c437f 100644 --- a/js/src/frontend/ParseNode.cpp +++ b/js/src/frontend/ParseNode.cpp @@ -754,7 +754,7 @@ NameNode::dump(int indent) } #endif -ObjectBox::ObjectBox(JSObject *object, ObjectBox* traceLink) +ObjectBox::ObjectBox(NativeObject *object, ObjectBox* traceLink) : object(object), traceLink(traceLink), emitLink(nullptr) diff --git a/js/src/frontend/ParseNode.h b/js/src/frontend/ParseNode.h index 56ec17981cb..8cf57a0cb76 100644 --- a/js/src/frontend/ParseNode.h +++ b/js/src/frontend/ParseNode.h @@ -1498,9 +1498,9 @@ ParseNode::isConstant() class ObjectBox { public: - JSObject *object; + NativeObject *object; - ObjectBox(JSObject *object, ObjectBox *traceLink); + ObjectBox(NativeObject *object, ObjectBox *traceLink); bool isFunctionBox() { return object->is(); } FunctionBox *asFunctionBox(); void trace(JSTracer *trc); diff --git a/js/src/frontend/Parser.cpp b/js/src/frontend/Parser.cpp index 5f2f3fbb05f..da4ee15c22c 100644 --- a/js/src/frontend/Parser.cpp +++ b/js/src/frontend/Parser.cpp @@ -537,7 +537,7 @@ Parser::~Parser() template ObjectBox * -Parser::newObjectBox(JSObject *obj) +Parser::newObjectBox(NativeObject *obj) { MOZ_ASSERT(obj && !IsPoisonedPtr(obj)); @@ -2961,7 +2961,7 @@ LexicalLookup(ContextT *ct, HandleAtom atom, int *slotp, typename ContextT::Stmt continue; StaticBlockObject &blockObj = stmt->staticBlock(); - Shape *shape = blockObj.nativeLookup(ct->sc->context, id); + Shape *shape = blockObj.lookup(ct->sc->context, id); if (shape) { if (slotp) *slotp = blockObj.shapeToIndex(*shape); @@ -7219,7 +7219,7 @@ Parser::arrayInitializer() bool spread = false, missingTrailingComma = false; uint32_t index = 0; for (; ; index++) { - if (index == JSObject::NELEMENTS_LIMIT) { + if (index == NativeObject::NELEMENTS_LIMIT) { report(ParseError, false, null(), JSMSG_ARRAY_INIT_TOO_BIG); return null(); } diff --git a/js/src/frontend/Parser.h b/js/src/frontend/Parser.h index 909fc5215de..d4e6ad9c1af 100644 --- a/js/src/frontend/Parser.h +++ b/js/src/frontend/Parser.h @@ -429,7 +429,7 @@ class Parser : private JS::AutoGCRooter, public StrictModeGetter * Allocate a new parsed object or function container from * cx->tempLifoAlloc. */ - ObjectBox *newObjectBox(JSObject *obj); + ObjectBox *newObjectBox(NativeObject *obj); FunctionBox *newFunctionBox(Node fn, JSFunction *fun, ParseContext *pc, Directives directives, GeneratorKind generatorKind); diff --git a/js/src/frontend/SyntaxParseHandler.h b/js/src/frontend/SyntaxParseHandler.h index 01b01001bf7..7073a71bc07 100644 --- a/js/src/frontend/SyntaxParseHandler.h +++ b/js/src/frontend/SyntaxParseHandler.h @@ -95,7 +95,7 @@ class SyntaxParseHandler Node newNullLiteral(const TokenPos &pos) { return NodeGeneric; } template - Node newRegExp(JSObject *reobj, const TokenPos &pos, Boxer &boxer) { return NodeGeneric; } + Node newRegExp(RegExpObject *reobj, const TokenPos &pos, Boxer &boxer) { return NodeGeneric; } Node newConditional(Node cond, Node thenExpr, Node elseExpr) { return NodeGeneric; } diff --git a/js/src/gc/Barrier.cpp b/js/src/gc/Barrier.cpp index 812b2510ae6..99d5f1f3b8d 100644 --- a/js/src/gc/Barrier.cpp +++ b/js/src/gc/Barrier.cpp @@ -34,16 +34,16 @@ bool HeapSlot::preconditionForSet(JSObject *owner, Kind kind, uint32_t slot) { return kind == Slot - ? &owner->getSlotRef(slot) == this - : &owner->getDenseElement(slot) == (const Value *)this; + ? &owner->fakeNativeGetSlotRef(slot) == this + : &owner->fakeNativeGetDenseElement(slot) == (const Value *)this; } bool HeapSlot::preconditionForSet(Zone *zone, JSObject *owner, Kind kind, uint32_t slot) { bool ok = kind == Slot - ? &owner->getSlotRef(slot) == this - : &owner->getDenseElement(slot) == (const Value *)this; + ? &owner->fakeNativeGetSlotRef(slot) == this + : &owner->fakeNativeGetDenseElement(slot) == (const Value *)this; return ok && owner->zone() == zone; } @@ -51,8 +51,8 @@ bool HeapSlot::preconditionForWriteBarrierPost(JSObject *obj, Kind kind, uint32_t slot, Value target) const { return kind == Slot - ? obj->getSlotAddressUnchecked(slot)->get() == target - : static_cast(obj->getDenseElements() + slot)->get() == target; + ? obj->fakeNativeGetSlotAddressUnchecked(slot)->get() == target + : static_cast(obj->fakeNativeGetDenseElements() + slot)->get() == target; } bool diff --git a/js/src/gc/Barrier.h b/js/src/gc/Barrier.h index 166dc136664..cb5cfdc0b78 100644 --- a/js/src/gc/Barrier.h +++ b/js/src/gc/Barrier.h @@ -161,6 +161,8 @@ class Symbol; namespace js { +class NativeObject; +class ArrayObject; class ArgumentsObject; class ArrayBufferObjectMaybeShared; class ArrayBufferObject; @@ -173,7 +175,6 @@ class GlobalObject; class LazyScript; class NestedScopeObject; class Nursery; -class ObjectImpl; class PropertyName; class SavedFrame; class ScopeObject; @@ -202,6 +203,8 @@ StringIsPermanentAtom(JSString *str); namespace gc { template struct MapTypeToTraceKind {}; +template <> struct MapTypeToTraceKind { static const JSGCTraceKind kind = JSTRACE_OBJECT; }; +template <> struct MapTypeToTraceKind { static const JSGCTraceKind kind = JSTRACE_OBJECT; }; template <> struct MapTypeToTraceKind { static const JSGCTraceKind kind = JSTRACE_OBJECT; }; template <> struct MapTypeToTraceKind{ static const JSGCTraceKind kind = JSTRACE_OBJECT; }; template <> struct MapTypeToTraceKind{ static const JSGCTraceKind kind = JSTRACE_OBJECT; }; @@ -219,7 +222,6 @@ template <> struct MapTypeToTraceKind { static const JSGCTrace template <> struct MapTypeToTraceKind { static const JSGCTraceKind kind = JSTRACE_STRING; }; template <> struct MapTypeToTraceKind { static const JSGCTraceKind kind = JSTRACE_LAZY_SCRIPT; }; template <> struct MapTypeToTraceKind{ static const JSGCTraceKind kind = JSTRACE_OBJECT; }; -template <> struct MapTypeToTraceKind { static const JSGCTraceKind kind = JSTRACE_OBJECT; }; template <> struct MapTypeToTraceKind { static const JSGCTraceKind kind = JSTRACE_STRING; }; template <> struct MapTypeToTraceKind { static const JSGCTraceKind kind = JSTRACE_OBJECT; }; template <> struct MapTypeToTraceKind { static const JSGCTraceKind kind = JSTRACE_OBJECT; }; @@ -778,6 +780,7 @@ class ReadBarriered void set(T v) { value = v; } }; +class ArrayObject; class ArrayBufferObject; class NestedScopeObject; class DebugScopeObject; @@ -801,8 +804,11 @@ typedef PreBarriered PreBarrieredAtom; typedef RelocatablePtr RelocatablePtrObject; typedef RelocatablePtr RelocatablePtrScript; +typedef RelocatablePtr RelocatablePtrNativeObject; typedef RelocatablePtr RelocatablePtrNestedScopeObject; +typedef HeapPtr HeapPtrNativeObject; +typedef HeapPtr HeapPtrArrayObject; typedef HeapPtr HeapPtrArrayBufferObjectMaybeShared; typedef HeapPtr HeapPtrArrayBufferObject; typedef HeapPtr HeapPtrBaseShape; diff --git a/js/src/gc/ForkJoinNursery.cpp b/js/src/gc/ForkJoinNursery.cpp index c193a03d1fa..71d60fc16a7 100644 --- a/js/src/gc/ForkJoinNursery.cpp +++ b/js/src/gc/ForkJoinNursery.cpp @@ -538,9 +538,9 @@ ForkJoinNursery::allocateObject(size_t baseSize, size_t numDynamic, bool& tooLar tooLarge = false; return nullptr; } - obj->setInitialSlots(numDynamic - ? reinterpret_cast(size_t(obj) + baseSize) - : nullptr); + obj->fakeNativeSetInitialSlots(numDynamic + ? reinterpret_cast(size_t(obj) + baseSize) + : nullptr); return obj; } @@ -668,12 +668,13 @@ ForkJoinNursery::traceObject(ForkJoinNurseryCollectionTracer *trc, JSObject *obj if (!obj->isNative()) return; + NativeObject *nobj = &obj->as(); - if (!obj->hasEmptyElements()) - markSlots(obj->getDenseElements(), obj->getDenseInitializedLength()); + if (!nobj->hasEmptyElements()) + markSlots(nobj->getDenseElements(), nobj->getDenseInitializedLength()); HeapSlot *fixedStart, *fixedEnd, *dynStart, *dynEnd; - obj->getSlotRange(0, obj->slotSpan(), &fixedStart, &fixedEnd, &dynStart, &dynEnd); + nobj->getSlotRange(0, nobj->slotSpan(), &fixedStart, &fixedEnd, &dynStart, &dynEnd); markSlots(fixedStart, fixedEnd); markSlots(dynStart, dynEnd); } @@ -714,20 +715,21 @@ AllocKind ForkJoinNursery::getObjectAllocKind(JSObject *obj) { if (obj->is()) { - MOZ_ASSERT(obj->numFixedSlots() == 0); + ArrayObject *aobj = &obj->as(); + MOZ_ASSERT(aobj->numFixedSlots() == 0); // Use minimal size object if we are just going to copy the pointer. - if (!isInsideFromspace((void *)obj->getElementsHeader())) + if (!isInsideFromspace((void *)aobj->getElementsHeader())) return FINALIZE_OBJECT0_BACKGROUND; - size_t nelements = obj->getDenseCapacity(); + size_t nelements = aobj->getDenseCapacity(); return GetBackgroundAllocKind(GetGCArrayKind(nelements)); } if (obj->is()) return obj->as().getAllocKind(); - AllocKind kind = GetGCObjectFixedSlotsKind(obj->numFixedSlots()); + AllocKind kind = GetGCObjectFixedSlotsKind(obj->fakeNativeNumFixedSlots()); MOZ_ASSERT(!IsBackgroundFinalized(kind)); MOZ_ASSERT(CanBeFinalizedInBackground(kind, obj->getClass())); return GetBackgroundAllocKind(kind); @@ -808,7 +810,7 @@ ForkJoinNursery::copyObjectToTospace(JSObject *dst, JSObject *src, AllocKind dst // We deal with this by copying elements manually, possibly re-inlining // them if there is adequate room inline in dst. if (src->is()) - srcSize = movedSize = sizeof(ObjectImpl); + srcSize = movedSize = sizeof(NativeObject); js_memcpy(dst, src, srcSize); movedSize += copySlotsToTospace(dst, src, dstKind); @@ -827,38 +829,38 @@ size_t ForkJoinNursery::copySlotsToTospace(JSObject *dst, JSObject *src, AllocKind dstKind) { // Fixed slots have already been copied over. - if (!src->hasDynamicSlots()) + if (!src->fakeNativeHasDynamicSlots()) return 0; - if (!isInsideFromspace(src->slots)) { - hugeSlots[hugeSlotsFrom].remove(src->slots); + if (!isInsideFromspace(src->fakeNativeSlots())) { + hugeSlots[hugeSlotsFrom].remove(src->fakeNativeSlots()); if (!isEvacuating_) - hugeSlots[hugeSlotsNew].put(src->slots); + hugeSlots[hugeSlotsNew].put(src->fakeNativeSlots()); return 0; } - size_t count = src->numDynamicSlots(); - dst->slots = allocateInTospace(count); - if (!dst->slots) + size_t count = src->fakeNativeNumDynamicSlots(); + dst->fakeNativeSlots() = allocateInTospace(count); + if (!dst->fakeNativeSlots()) CrashAtUnhandlableOOM("Failed to allocate slots while moving object."); - js_memcpy(dst->slots, src->slots, count * sizeof(HeapSlot)); - setSlotsForwardingPointer(src->slots, dst->slots, count); + js_memcpy(dst->fakeNativeSlots(), src->fakeNativeSlots(), count * sizeof(HeapSlot)); + setSlotsForwardingPointer(src->fakeNativeSlots(), dst->fakeNativeSlots(), count); return count * sizeof(HeapSlot); } size_t ForkJoinNursery::copyElementsToTospace(JSObject *dst, JSObject *src, AllocKind dstKind) { - if (src->hasEmptyElements() || src->denseElementsAreCopyOnWrite()) + if (src->fakeNativeHasEmptyElements() || src->fakeNativeDenseElementsAreCopyOnWrite()) return 0; - ObjectElements *srcHeader = src->getElementsHeader(); + ObjectElements *srcHeader = src->fakeNativeGetElementsHeader(); ObjectElements *dstHeader; // TODO Bug 874151: Prefer to put element data inline if we have space. // (Note, not a correctness issue.) if (!isInsideFromspace(srcHeader)) { - MOZ_ASSERT(src->elements == dst->elements); + MOZ_ASSERT(src->fakeNativeElements() == dst->fakeNativeElements()); hugeSlots[hugeSlotsFrom].remove(reinterpret_cast(srcHeader)); if (!isEvacuating_) hugeSlots[hugeSlotsNew].put(reinterpret_cast(srcHeader)); @@ -869,8 +871,8 @@ ForkJoinNursery::copyElementsToTospace(JSObject *dst, JSObject *src, AllocKind d // Unlike other objects, Arrays can have fixed elements. if (src->is() && nslots <= GetGCKindSlots(dstKind)) { - dst->setFixedElements(); - dstHeader = dst->getElementsHeader(); + dst->as().setFixedElements(); + dstHeader = dst->as().getElementsHeader(); js_memcpy(dstHeader, srcHeader, nslots * sizeof(HeapSlot)); setElementsForwardingPointer(srcHeader, dstHeader, nslots); return nslots * sizeof(HeapSlot); @@ -882,7 +884,7 @@ ForkJoinNursery::copyElementsToTospace(JSObject *dst, JSObject *src, AllocKind d CrashAtUnhandlableOOM("Failed to allocate elements while moving object."); js_memcpy(dstHeader, srcHeader, nslots * sizeof(HeapSlot)); setElementsForwardingPointer(srcHeader, dstHeader, nslots); - dst->elements = dstHeader->elements(); + dst->fakeNativeElements() = dstHeader->elements(); return nslots * sizeof(HeapSlot); } diff --git a/js/src/gc/Heap.h b/js/src/gc/Heap.h index 6cc59f4a40c..ce5e6b9c5ca 100644 --- a/js/src/gc/Heap.h +++ b/js/src/gc/Heap.h @@ -183,7 +183,7 @@ class TenuredCell : public Cell MOZ_ALWAYS_INLINE void unmark(uint32_t color) const; MOZ_ALWAYS_INLINE void copyMarkBitsFrom(const TenuredCell *src); - // Note: this is in TenuredCell because ObjectImpl subclasses are sometimes + // Note: this is in TenuredCell because JSObject subclasses are sometimes // used tagged. static MOZ_ALWAYS_INLINE bool isNullLike(const Cell *thing) { return !thing; } diff --git a/js/src/gc/Marking.cpp b/js/src/gc/Marking.cpp index f1139f9bf49..17b081d8403 100644 --- a/js/src/gc/Marking.cpp +++ b/js/src/gc/Marking.cpp @@ -13,6 +13,7 @@ #include "jit/IonCode.h" #include "js/SliceBudget.h" #include "vm/ArgumentsObject.h" +#include "vm/ArrayObject.h" #include "vm/ScopeObject.h" #include "vm/Shape.h" #include "vm/Symbol.h" @@ -68,7 +69,7 @@ JS_PUBLIC_DATA(void * const) JS::NullPtr::constNullValue = nullptr; */ static inline void -PushMarkStack(GCMarker *gcmarker, ObjectImpl *thing); +PushMarkStack(GCMarker *gcmarker, JSObject *thing); static inline void PushMarkStack(GCMarker *gcmarker, JSFunction *thing); @@ -254,6 +255,13 @@ SetMaybeAliveFlag(JSObject *thing) thing->compartment()->maybeAlive = true; } +template<> +void +SetMaybeAliveFlag(NativeObject *thing) +{ + thing->compartment()->maybeAlive = true; +} + template<> void SetMaybeAliveFlag(JSScript *thing) @@ -629,6 +637,8 @@ Update##base##IfRelocated(JSRuntime *rt, type **thingp) DeclMarkerImpl(BaseShape, BaseShape) DeclMarkerImpl(BaseShape, UnownedBaseShape) DeclMarkerImpl(JitCode, jit::JitCode) +DeclMarkerImpl(Object, NativeObject) +DeclMarkerImpl(Object, ArrayObject) DeclMarkerImpl(Object, ArgumentsObject) DeclMarkerImpl(Object, ArrayBufferObject) DeclMarkerImpl(Object, ArrayBufferObjectMaybeShared) @@ -638,7 +648,6 @@ DeclMarkerImpl(Object, GlobalObject) DeclMarkerImpl(Object, JSObject) DeclMarkerImpl(Object, JSFunction) DeclMarkerImpl(Object, NestedScopeObject) -DeclMarkerImpl(Object, ObjectImpl) DeclMarkerImpl(Object, SavedFrame) DeclMarkerImpl(Object, ScopeObject) DeclMarkerImpl(Object, SharedArrayBufferObject) @@ -931,7 +940,7 @@ gc::MarkObjectSlots(JSTracer *trc, JSObject *obj, uint32_t start, uint32_t nslot MOZ_ASSERT(obj->isNative()); for (uint32_t i = start; i < (start + nslots); ++i) { trc->setTracingDetails(js_GetObjectSlotName, obj, i); - MarkValueInternal(trc, obj->nativeGetSlotRef(i).unsafeGet()); + MarkValueInternal(trc, obj->fakeNativeGetSlotRef(i).unsafeGet()); } } @@ -1036,7 +1045,7 @@ gc::IsCellAboutToBeFinalized(Cell **thingp) JS_COMPARTMENT_ASSERT_STR(rt, sym) static void -PushMarkStack(GCMarker *gcmarker, ObjectImpl *thing) +PushMarkStack(GCMarker *gcmarker, JSObject *thing) { JS_COMPARTMENT_ASSERT(gcmarker->runtime(), thing); MOZ_ASSERT(!IsInsideNursery(thing)); @@ -1541,7 +1550,7 @@ struct SlotArrayLayout HeapSlot *start; uintptr_t index; }; - JSObject *obj; + NativeObject *obj; static void staticAsserts() { /* This should have the same layout as three mark stack items. */ @@ -1566,7 +1575,7 @@ GCMarker::saveValueRanges() *p &= ~StackTagMask; p -= 2; SlotArrayLayout *arr = reinterpret_cast(p); - JSObject *obj = arr->obj; + NativeObject *obj = arr->obj; MOZ_ASSERT(obj->isNative()); HeapSlot *vp = obj->getDenseElements(); @@ -1597,7 +1606,7 @@ GCMarker::saveValueRanges() } bool -GCMarker::restoreValueArray(JSObject *obj, void **vpp, void **endp) +GCMarker::restoreValueArray(NativeObject *obj, void **vpp, void **endp) { uintptr_t start = stack.pop(); HeapSlot::Kind kind = (HeapSlot::Kind) stack.pop(); @@ -1645,7 +1654,7 @@ GCMarker::processMarkStackOther(uintptr_t tag, uintptr_t addr) ScanTypeObject(this, reinterpret_cast(addr)); } else if (tag == SavedValueArrayTag) { MOZ_ASSERT(!(addr & CellMask)); - JSObject *obj = reinterpret_cast(addr); + NativeObject *obj = reinterpret_cast(addr); HeapValue *vp, *end; if (restoreValueArray(obj, (void **)&vp, (void **)&end)) pushValueArray(obj, vp, end); @@ -1759,38 +1768,39 @@ GCMarker::processMarkStackTop(SliceBudget &budget) if (!shape->isNative()) return; - unsigned nslots = obj->slotSpan(); + NativeObject *nobj = &obj->as(); + unsigned nslots = nobj->slotSpan(); do { - if (obj->hasEmptyElements()) + if (nobj->hasEmptyElements()) break; - if (obj->denseElementsAreCopyOnWrite()) { - JSObject *owner = obj->getElementsHeader()->ownerObject(); - if (owner != obj) { + if (nobj->denseElementsAreCopyOnWrite()) { + JSObject *owner = nobj->getElementsHeader()->ownerObject(); + if (owner != nobj) { PushMarkStack(this, owner); break; } } - vp = obj->getDenseElementsAllowCopyOnWrite(); - end = vp + obj->getDenseInitializedLength(); + vp = nobj->getDenseElementsAllowCopyOnWrite(); + end = vp + nobj->getDenseInitializedLength(); if (!nslots) goto scan_value_array; - pushValueArray(obj, vp, end); + pushValueArray(nobj, vp, end); } while (false); - vp = obj->fixedSlots(); - if (obj->slots) { - unsigned nfixed = obj->numFixedSlots(); + vp = nobj->fixedSlots(); + if (nobj->slots) { + unsigned nfixed = nobj->numFixedSlots(); if (nslots > nfixed) { - pushValueArray(obj, vp, vp + nfixed); - vp = obj->slots; + pushValueArray(nobj, vp, vp + nfixed); + vp = nobj->slots; end = vp + (nslots - nfixed); goto scan_value_array; } } - MOZ_ASSERT(nslots <= obj->numFixedSlots()); + MOZ_ASSERT(nslots <= nobj->numFixedSlots()); end = vp + nslots; goto scan_value_array; } diff --git a/js/src/gc/Marking.h b/js/src/gc/Marking.h index ce2ecd1a95d..b1e732f61b6 100644 --- a/js/src/gc/Marking.h +++ b/js/src/gc/Marking.h @@ -105,6 +105,8 @@ type *Update##base##IfRelocated(JSRuntime *rt, type **thingp); DeclMarker(BaseShape, BaseShape) DeclMarker(BaseShape, UnownedBaseShape) DeclMarker(JitCode, jit::JitCode) +DeclMarker(Object, NativeObject) +DeclMarker(Object, ArrayObject) DeclMarker(Object, ArgumentsObject) DeclMarker(Object, ArrayBufferObject) DeclMarker(Object, ArrayBufferObjectMaybeShared) @@ -305,6 +307,13 @@ Mark(JSTracer *trc, JSObject **objp, const char *name) MarkObjectUnbarriered(trc, objp, name); } +/* For use by Debugger::WeakMap's missingScopes HashKeyRef instantiation. */ +inline void +Mark(JSTracer *trc, NativeObject **obj, const char *name) +{ + MarkObjectUnbarriered(trc, obj, name); +} + /* For use by Debugger::WeakMap's proxiedScopes HashKeyRef instantiation. */ inline void Mark(JSTracer *trc, ScopeObject **obj, const char *name) diff --git a/js/src/gc/Nursery.cpp b/js/src/gc/Nursery.cpp index d16a40ba369..e5175347aa0 100644 --- a/js/src/gc/Nursery.cpp +++ b/js/src/gc/Nursery.cpp @@ -171,7 +171,7 @@ js::Nursery::allocateObject(JSContext *cx, size_t size, size_t numDynamic) size_t totalSize = size + sizeof(HeapSlot) * numDynamic; JSObject *obj = static_cast(allocate(totalSize)); if (obj) { - obj->setInitialSlots(reinterpret_cast(size_t(obj) + size)); + obj->fakeNativeSetInitialSlots(reinterpret_cast(size_t(obj) + size)); TraceNurseryAlloc(obj, size); return obj; } @@ -188,7 +188,7 @@ js::Nursery::allocateObject(JSContext *cx, size_t size, size_t numDynamic) JSObject *obj = static_cast(allocate(size)); if (obj) - obj->setInitialSlots(slots); + obj->fakeNativeSetInitialSlots(slots); else freeSlots(slots); @@ -367,13 +367,14 @@ static AllocKind GetObjectAllocKindForCopy(const Nursery &nursery, JSObject *obj) { if (obj->is()) { - MOZ_ASSERT(obj->numFixedSlots() == 0); + ArrayObject *aobj = &obj->as(); + MOZ_ASSERT(aobj->numFixedSlots() == 0); /* Use minimal size object if we are just going to copy the pointer. */ - if (!nursery.isInside(obj->getElementsHeader())) + if (!nursery.isInside(aobj->getElementsHeader())) return FINALIZE_OBJECT0_BACKGROUND; - size_t nelements = obj->getDenseCapacity(); + size_t nelements = aobj->getDenseCapacity(); return GetBackgroundAllocKind(GetGCArrayKind(nelements)); } @@ -399,7 +400,7 @@ GetObjectAllocKindForCopy(const Nursery &nursery, JSObject *obj) return InlineOpaqueTypedObject::allocKindForTypeDescriptor(descr); } - AllocKind kind = GetGCObjectFixedSlotsKind(obj->numFixedSlots()); + AllocKind kind = GetGCObjectFixedSlotsKind(obj->fakeNativeNumFixedSlots()); MOZ_ASSERT(!IsBackgroundFinalized(kind)); MOZ_ASSERT(CanBeFinalizedInBackground(kind, obj->getClass())); return GetBackgroundAllocKind(kind); @@ -518,14 +519,15 @@ js::Nursery::traceObject(MinorCollectionTracer *trc, JSObject *obj) MOZ_ASSERT(obj->isNative() == clasp->isNative()); if (!clasp->isNative()) return; + NativeObject *nobj = &obj->as(); // Note: the contents of copy on write elements pointers are filled in // during parsing and cannot contain nursery pointers. - if (!obj->hasEmptyElements() && !obj->denseElementsAreCopyOnWrite()) - markSlots(trc, obj->getDenseElements(), obj->getDenseInitializedLength()); + if (!nobj->hasEmptyElements() && !nobj->denseElementsAreCopyOnWrite()) + markSlots(trc, nobj->getDenseElements(), nobj->getDenseInitializedLength()); HeapSlot *fixedStart, *fixedEnd, *dynStart, *dynEnd; - obj->getSlotRange(0, obj->slotSpan(), &fixedStart, &fixedEnd, &dynStart, &dynEnd); + nobj->getSlotRange(0, nobj->slotSpan(), &fixedStart, &fixedEnd, &dynStart, &dynEnd); markSlots(trc, fixedStart, fixedEnd); markSlots(trc, dynStart, dynEnd); } @@ -597,14 +599,14 @@ js::Nursery::moveObjectToTenured(JSObject *dst, JSObject *src, AllocKind dstKind * even if they are inlined. */ if (src->is()) - tenuredSize = srcSize = sizeof(ObjectImpl); + tenuredSize = srcSize = sizeof(NativeObject); js_memcpy(dst, src, srcSize); tenuredSize += moveSlotsToTenured(dst, src, dstKind); tenuredSize += moveElementsToTenured(dst, src, dstKind); if (src->is()) - forwardTypedArrayPointers(dst, src); + forwardTypedArrayPointers(&dst->as(), &src->as()); /* The shape's list head may point into the old object. */ if (&src->shape_ == dst->shape_->listp) @@ -614,17 +616,17 @@ js::Nursery::moveObjectToTenured(JSObject *dst, JSObject *src, AllocKind dstKind } void -js::Nursery::forwardTypedArrayPointers(JSObject *dst, JSObject *src) +js::Nursery::forwardTypedArrayPointers(TypedArrayObject *dst, TypedArrayObject *src) { /* * Typed array data may be stored inline inside the object's fixed slots. If * so, we need update the private pointer and leave a forwarding pointer at * the start of the data. */ - TypedArrayObject &typedArray = src->as(); - MOZ_ASSERT_IF(typedArray.buffer(), !isInside(src->getPrivate())); - if (typedArray.buffer()) + if (src->buffer()) { + MOZ_ASSERT(!isInside(src->getPrivate())); return; + } void *srcData = src->fixedData(TypedArrayObject::FIXED_DATA_START); void *dstData = dst->fixedData(TypedArrayObject::FIXED_DATA_START); @@ -646,37 +648,37 @@ MOZ_ALWAYS_INLINE size_t js::Nursery::moveSlotsToTenured(JSObject *dst, JSObject *src, AllocKind dstKind) { /* Fixed slots have already been copied over. */ - if (!src->hasDynamicSlots()) + if (!src->fakeNativeHasDynamicSlots()) return 0; - if (!isInside(src->slots)) { - hugeSlots.remove(src->slots); + if (!isInside(src->fakeNativeSlots())) { + hugeSlots.remove(src->fakeNativeSlots()); return 0; } Zone *zone = src->zone(); - size_t count = src->numDynamicSlots(); - dst->slots = zone->pod_malloc(count); - if (!dst->slots) + size_t count = src->fakeNativeNumDynamicSlots(); + dst->fakeNativeSlots() = zone->pod_malloc(count); + if (!dst->fakeNativeSlots()) CrashAtUnhandlableOOM("Failed to allocate slots while tenuring."); - PodCopy(dst->slots, src->slots, count); - setSlotsForwardingPointer(src->slots, dst->slots, count); + PodCopy(dst->fakeNativeSlots(), src->fakeNativeSlots(), count); + setSlotsForwardingPointer(src->fakeNativeSlots(), dst->fakeNativeSlots(), count); return count * sizeof(HeapSlot); } MOZ_ALWAYS_INLINE size_t js::Nursery::moveElementsToTenured(JSObject *dst, JSObject *src, AllocKind dstKind) { - if (src->hasEmptyElements() || src->denseElementsAreCopyOnWrite()) + if (src->fakeNativeHasEmptyElements() || src->fakeNativeDenseElementsAreCopyOnWrite()) return 0; Zone *zone = src->zone(); - ObjectElements *srcHeader = src->getElementsHeader(); + ObjectElements *srcHeader = src->fakeNativeGetElementsHeader(); ObjectElements *dstHeader; /* TODO Bug 874151: Prefer to put element data inline if we have space. */ if (!isInside(srcHeader)) { - MOZ_ASSERT(src->elements == dst->elements); + MOZ_ASSERT(src->fakeNativeElements() == dst->fakeNativeElements()); hugeSlots.remove(reinterpret_cast(srcHeader)); return 0; } @@ -685,8 +687,8 @@ js::Nursery::moveElementsToTenured(JSObject *dst, JSObject *src, AllocKind dstKi /* Unlike other objects, Arrays can have fixed elements. */ if (src->is() && nslots <= GetGCKindSlots(dstKind)) { - dst->setFixedElements(); - dstHeader = dst->getElementsHeader(); + dst->as().setFixedElements(); + dstHeader = dst->as().getElementsHeader(); js_memcpy(dstHeader, srcHeader, nslots * sizeof(HeapSlot)); setElementsForwardingPointer(srcHeader, dstHeader, nslots); return nslots * sizeof(HeapSlot); @@ -698,7 +700,7 @@ js::Nursery::moveElementsToTenured(JSObject *dst, JSObject *src, AllocKind dstKi CrashAtUnhandlableOOM("Failed to allocate elements while tenuring."); js_memcpy(dstHeader, srcHeader, nslots * sizeof(HeapSlot)); setElementsForwardingPointer(srcHeader, dstHeader, nslots); - dst->elements = dstHeader->elements(); + dst->fakeNativeElements() = dstHeader->elements(); return nslots * sizeof(HeapSlot); } diff --git a/js/src/gc/Nursery.h b/js/src/gc/Nursery.h index 6eb368885a3..851ee29ef61 100644 --- a/js/src/gc/Nursery.h +++ b/js/src/gc/Nursery.h @@ -28,6 +28,7 @@ struct Zone; namespace js { +class TypedArrayObject; class ObjectElements; class HeapSlot; void SetGCZeal(JSRuntime *, uint8_t, uint32_t); @@ -286,7 +287,7 @@ class Nursery size_t moveObjectToTenured(JSObject *dst, JSObject *src, gc::AllocKind dstKind); size_t moveElementsToTenured(JSObject *dst, JSObject *src, gc::AllocKind dstKind); size_t moveSlotsToTenured(JSObject *dst, JSObject *src, gc::AllocKind dstKind); - void forwardTypedArrayPointers(JSObject *dst, JSObject *src); + void forwardTypedArrayPointers(TypedArrayObject *dst, TypedArrayObject *src); /* Handle relocation of slots/elements pointers stored in Ion frames. */ void setSlotsForwardingPointer(HeapSlot *oldSlots, HeapSlot *newSlots, uint32_t nslots); diff --git a/js/src/gc/Rooting.h b/js/src/gc/Rooting.h index e5801a76686..1bfbba36276 100644 --- a/js/src/gc/Rooting.h +++ b/js/src/gc/Rooting.h @@ -15,6 +15,8 @@ class JSLinearString; namespace js { class PropertyName; +class NativeObject; +class ArrayObject; class ScriptSourceObject; class Shape; @@ -22,21 +24,26 @@ namespace types { struct TypeObject; } // These are internal counterparts to the public types such as HandleObject. +typedef JS::Handle HandleNativeObject; typedef JS::Handle HandleShape; typedef JS::Handle HandleTypeObject; typedef JS::Handle HandleAtom; typedef JS::Handle HandleLinearString; typedef JS::Handle HandlePropertyName; +typedef JS::Handle HandleArrayObject; typedef JS::Handle HandleScriptSource; typedef JS::MutableHandle MutableHandleShape; typedef JS::MutableHandle MutableHandleAtom; +typedef JS::MutableHandle MutableHandleNativeObject; +typedef JS::Rooted RootedNativeObject; typedef JS::Rooted RootedShape; typedef JS::Rooted RootedTypeObject; typedef JS::Rooted RootedAtom; typedef JS::Rooted RootedLinearString; typedef JS::Rooted RootedPropertyName; +typedef JS::Rooted RootedArrayObject; typedef JS::Rooted RootedScriptSource; } /* namespace js */ diff --git a/js/src/gc/StoreBuffer.cpp b/js/src/gc/StoreBuffer.cpp index ceef03a4aa2..a02fabacfab 100644 --- a/js/src/gc/StoreBuffer.cpp +++ b/js/src/gc/StoreBuffer.cpp @@ -37,14 +37,14 @@ StoreBuffer::SlotsEdge::mark(JSTracer *trc) } if (kind() == ElementKind) { - int32_t initLen = obj->getDenseInitializedLength(); + int32_t initLen = obj->fakeNativeGetDenseInitializedLength(); int32_t clampedStart = Min(start_, initLen); int32_t clampedEnd = Min(start_ + count_, initLen); gc::MarkArraySlots(trc, clampedEnd - clampedStart, - obj->getDenseElements() + clampedStart, "element"); + obj->fakeNativeGetDenseElements() + clampedStart, "element"); } else { - int32_t start = Min(uint32_t(start_), obj->slotSpan()); - int32_t end = Min(uint32_t(start_) + count_, obj->slotSpan()); + int32_t start = Min(uint32_t(start_), obj->fakeNativeSlotSpan()); + int32_t end = Min(uint32_t(start_) + count_, obj->fakeNativeSlotSpan()); MOZ_ASSERT(end >= start); MarkObjectSlots(trc, obj, start, end - start); } diff --git a/js/src/gc/Tracer.cpp b/js/src/gc/Tracer.cpp index 2271c961d1b..9ec47c8cc7d 100644 --- a/js/src/gc/Tracer.cpp +++ b/js/src/gc/Tracer.cpp @@ -204,7 +204,7 @@ JS_GetTraceThingInfo(char *buf, size_t bufsize, JSTracer *trc, void *thing, PutEscapedString(buf, bufsize, fun->displayAtom(), 0); } } else if (obj->getClass()->flags & JSCLASS_HAS_PRIVATE) { - JS_snprintf(buf, bufsize, " %p", obj->getPrivate()); + JS_snprintf(buf, bufsize, " %p", obj->fakeNativeGetPrivate()); } else { JS_snprintf(buf, bufsize, " "); } diff --git a/js/src/gc/Tracer.h b/js/src/gc/Tracer.h index 0665d597cfc..221ff81a099 100644 --- a/js/src/gc/Tracer.h +++ b/js/src/gc/Tracer.h @@ -14,8 +14,8 @@ #include "js/TracingAPI.h" namespace js { +class NativeObject; class GCMarker; -class ObjectImpl; namespace gc { struct ArenaHeader; } @@ -139,7 +139,7 @@ class GCMarker : public JSTracer void stop(); void reset(); - void pushObject(ObjectImpl *obj) { + void pushObject(JSObject *obj) { pushTaggedPtr(ObjectTag, obj); } @@ -270,7 +270,7 @@ class GCMarker : public JSTracer return stack.isEmpty(); } - bool restoreValueArray(JSObject *obj, void **vpp, void **endp); + bool restoreValueArray(NativeObject *obj, void **vpp, void **endp); void saveValueRanges(); inline void processMarkStackTop(SliceBudget &budget); void processMarkStackOther(uintptr_t tag, uintptr_t addr); diff --git a/js/src/gc/Verifier.cpp b/js/src/gc/Verifier.cpp index b614a9fef1c..f092b5e116a 100644 --- a/js/src/gc/Verifier.cpp +++ b/js/src/gc/Verifier.cpp @@ -478,7 +478,7 @@ PostVerifierVisitEdge(JSTracer *jstrc, void **thingp, JSGCTraceKind kind) /* * Values will be unpacked to the stack before getting here. However, the * only things that enter this callback are marked by the JS_TraceChildren - * below. Since ObjectImpl::markChildren handles this, the real trace + * below. Since JSObject::markChildren handles this, the real trace * location will be set correctly in these cases. */ void **loc = trc->tracingLocation(thingp); diff --git a/js/src/gc/Zone.cpp b/js/src/gc/Zone.cpp index dbcb5684ee9..c366735a5d1 100644 --- a/js/src/gc/Zone.cpp +++ b/js/src/gc/Zone.cpp @@ -155,7 +155,7 @@ Zone::sweepBreakpoints(FreeOp *fop) Breakpoint *nextbp; for (Breakpoint *bp = site->firstBreakpoint(); bp; bp = nextbp) { nextbp = bp->nextInSite(); - HeapPtrObject &dbgobj = bp->debugger->toJSObjectRef(); + HeapPtrNativeObject &dbgobj = bp->debugger->toJSObjectRef(); MOZ_ASSERT_IF(isGCSweeping() && dbgobj->zone()->isCollecting(), dbgobj->zone()->isGCSweeping()); bool dying = scriptGone || IsObjectAboutToBeFinalized(&dbgobj); diff --git a/js/src/jit/BaselineCompiler.cpp b/js/src/jit/BaselineCompiler.cpp index a02b8d7ab90..caa6ab3c8b9 100644 --- a/js/src/jit/BaselineCompiler.cpp +++ b/js/src/jit/BaselineCompiler.cpp @@ -23,6 +23,7 @@ #include "jsscriptinlines.h" #include "vm/Interpreter-inl.h" +#include "vm/ObjectImpl-inl.h" using namespace js; using namespace js::jit; @@ -1258,7 +1259,7 @@ BaselineCompiler::emit_JSOP_STRING() return true; } -typedef JSObject *(*DeepCloneObjectLiteralFn)(JSContext *, HandleObject, NewObjectKind); +typedef NativeObject *(*DeepCloneObjectLiteralFn)(JSContext *, HandleNativeObject, NewObjectKind); static const VMFunction DeepCloneObjectLiteralInfo = FunctionInfo(DeepCloneObjectLiteral); @@ -1646,7 +1647,7 @@ BaselineCompiler::emit_JSOP_NEWARRAY() masm.move32(Imm32(length), R0.scratchReg()); masm.movePtr(ImmGCPtr(type), R1.scratchReg()); - JSObject *templateObject = NewDenseUnallocatedArray(cx, length, nullptr, TenuredObject); + ArrayObject *templateObject = NewDenseUnallocatedArray(cx, length, nullptr, TenuredObject); if (!templateObject) return false; templateObject->setType(type); @@ -1659,7 +1660,7 @@ BaselineCompiler::emit_JSOP_NEWARRAY() return true; } -typedef JSObject *(*NewArrayCopyOnWriteFn)(JSContext *, HandleObject, gc::InitialHeap); +typedef JSObject *(*NewArrayCopyOnWriteFn)(JSContext *, HandleNativeObject, gc::InitialHeap); const VMFunction jit::NewArrayCopyOnWriteInfo = FunctionInfo(js::NewDenseCopyOnWriteArray); @@ -1717,8 +1718,8 @@ BaselineCompiler::emit_JSOP_NEWOBJECT() return false; } - RootedObject baseObject(cx, script->getObject(pc)); - RootedObject templateObject(cx, CopyInitializerObject(cx, baseObject, TenuredObject)); + RootedNativeObject baseObject(cx, script->getObject(pc)); + RootedNativeObject templateObject(cx, CopyInitializerObject(cx, baseObject, TenuredObject)); if (!templateObject) return false; @@ -1755,7 +1756,7 @@ BaselineCompiler::emit_JSOP_NEWINIT() masm.move32(Imm32(0), R0.scratchReg()); masm.movePtr(ImmGCPtr(type), R1.scratchReg()); - JSObject *templateObject = NewDenseUnallocatedArray(cx, 0, nullptr, TenuredObject); + ArrayObject *templateObject = NewDenseUnallocatedArray(cx, 0, nullptr, TenuredObject); if (!templateObject) return false; templateObject->setType(type); @@ -1766,8 +1767,8 @@ BaselineCompiler::emit_JSOP_NEWINIT() } else { MOZ_ASSERT(key == JSProto_Object); - RootedObject templateObject(cx); - templateObject = NewBuiltinClassInstance(cx, &JSObject::class_, TenuredObject); + RootedNativeObject templateObject(cx); + templateObject = NewNativeBuiltinClassInstance(cx, &JSObject::class_, TenuredObject); if (!templateObject) return false; @@ -2103,11 +2104,11 @@ BaselineCompiler::getScopeCoordinateAddressFromObject(Register objReg, Register Address addr; if (shape->numFixedSlots() <= sc.slot()) { - masm.loadPtr(Address(objReg, JSObject::offsetOfSlots()), reg); + masm.loadPtr(Address(objReg, NativeObject::offsetOfSlots()), reg); return Address(reg, (sc.slot() - shape->numFixedSlots()) * sizeof(Value)); } - return Address(objReg, JSObject::getFixedSlotOffset(sc.slot())); + return Address(objReg, NativeObject::getFixedSlotOffset(sc.slot())); } Address @@ -3217,7 +3218,7 @@ BaselineCompiler::emit_JSOP_REST() { frame.syncStack(0); - JSObject *templateObject = NewDenseUnallocatedArray(cx, 0, nullptr, TenuredObject); + ArrayObject *templateObject = NewDenseUnallocatedArray(cx, 0, nullptr, TenuredObject); if (!templateObject) return false; types::FixRestArgumentsType(cx, templateObject); diff --git a/js/src/jit/BaselineIC.cpp b/js/src/jit/BaselineIC.cpp index 280d9ca6319..4b32bf722de 100644 --- a/js/src/jit/BaselineIC.cpp +++ b/js/src/jit/BaselineIC.cpp @@ -1748,7 +1748,7 @@ DoNewObject(JSContext *cx, ICNewObject_Fallback *stub, MutableHandleValue res) { FallbackICSpew(cx, stub, "NewObject"); - RootedObject templateObject(cx, stub->templateObject()); + RootedNativeObject templateObject(cx, stub->templateObject()); JSObject *obj = NewInitObject(cx, templateObject); if (!obj) return false; @@ -3189,13 +3189,13 @@ ICUnaryArith_Double::Compiler::generateStubCode(MacroAssembler &masm) // GetElem_Fallback // -static void GetFixedOrDynamicSlotOffset(HandleObject obj, uint32_t slot, +static void GetFixedOrDynamicSlotOffset(NativeObject *obj, uint32_t slot, bool *isFixed, uint32_t *offset) { MOZ_ASSERT(isFixed); MOZ_ASSERT(offset); *isFixed = obj->isFixedSlot(slot); - *offset = *isFixed ? JSObject::getFixedSlotOffset(slot) + *offset = *isFixed ? NativeObject::getFixedSlotOffset(slot) : obj->dynamicSlotIndex(slot) * sizeof(Value); } @@ -3221,7 +3221,7 @@ GenerateDOMProxyChecks(JSContext *cx, MacroAssembler &masm, Register object, // 2. The object does not have expando properties, or has an expando // which is known to not have the desired property. Address handlerAddr(object, ProxyObject::offsetOfHandler()); - Address expandoAddr(object, JSObject::getFixedSlotOffset(GetDOMProxyExpandoSlot())); + Address expandoAddr(object, NativeObject::getFixedSlotOffset(GetDOMProxyExpandoSlot())); // Check that object is a DOMProxy. masm.loadPtr(checkProxyHandlerAddr, scratch); @@ -3333,7 +3333,7 @@ EffectlesslyLookupProperty(JSContext *cx, HandleObject obj, HandlePropertyName n if (!JSObject::lookupProperty(cx, checkObj, name, holder, shape)) return false; } else if (checkObj->isNative()) { - shape.set(checkObj->nativeLookup(cx, NameToId(name))); + shape.set(checkObj->as().lookup(cx, NameToId(name))); if (shape) holder.set(checkObj); } @@ -3352,7 +3352,7 @@ CheckHasNoSuchProperty(JSContext *cx, HandleObject obj, HandlePropertyName name, if (!curObj->isNative()) return false; - Shape *shape = curObj->nativeLookup(cx, NameToId(name)); + Shape *shape = curObj->as().lookup(cx, NameToId(name)); if (shape) return false; @@ -3510,7 +3510,7 @@ IsCacheableSetPropAddSlot(JSContext *cx, HandleObject obj, HandleShape oldShape, return false; // if prototype defines this property in a non-plain way, don't optimize - Shape *protoShape = proto->nativeLookup(cx, id); + Shape *protoShape = proto->as().lookup(cx, id); if (protoShape && !protoShape->hasDefaultSetter()) return false; @@ -3523,7 +3523,7 @@ IsCacheableSetPropAddSlot(JSContext *cx, HandleObject obj, HandleShape oldShape, // Only add a IC entry if the dynamic slots didn't change when the shapes // changed. Need to ensure that a shape change for a subsequent object // won't involve reallocating the slot array. - if (obj->numDynamicSlots() != oldSlots) + if (obj->as().numDynamicSlots() != oldSlots) return false; *protoChainDepth = chainDepth; @@ -3782,7 +3782,8 @@ static bool TryAttachNativeGetElemStub(JSContext *cx, HandleScript script, jsbyt bool isFixedSlot; uint32_t offset; - GetFixedOrDynamicSlotOffset(holder, shape->slot(), &isFixedSlot, &offset); + GetFixedOrDynamicSlotOffset(&holder->as(), + shape->slot(), &isFixedSlot, &offset); ICStub *monitorStub = stub->fallbackMonitorStub()->firstMonitorStub(); ICStub::Kind kind = (obj == holder) ? ICStub::GetElem_NativeSlot @@ -4326,7 +4327,7 @@ ICGetElemNativeCompiler::generateStubCode(MacroAssembler &masm) // Load from object. if (acctype_ == ICGetElemNativeStub::DynamicSlot) - masm.addPtr(Address(holderReg, JSObject::offsetOfSlots()), scratchReg); + masm.addPtr(Address(holderReg, NativeObject::offsetOfSlots()), scratchReg); else masm.addPtr(holderReg, scratchReg); @@ -4501,7 +4502,7 @@ ICGetElem_Dense::Compiler::generateStubCode(MacroAssembler &masm) masm.branchTestObjShape(Assembler::NotEqual, obj, scratchReg, &failure); // Load obj->elements. - masm.loadPtr(Address(obj, JSObject::offsetOfElements()), scratchReg); + masm.loadPtr(Address(obj, NativeObject::offsetOfElements()), scratchReg); // Unbox key. Register key = masm.extractInt32(R1, ExtractTemp1); @@ -4902,8 +4903,8 @@ RemoveExistingTypedArraySetElemStub(JSContext *cx, ICSetElem_Fallback *stub, Han } static bool -CanOptimizeDenseSetElem(JSContext *cx, HandleObject obj, uint32_t index, - HandleShape oldShape, uint32_t oldCapacity, uint32_t oldInitLength, +CanOptimizeDenseSetElem(NativeObject *obj, uint32_t index, + Shape *oldShape, uint32_t oldCapacity, uint32_t oldInitLength, bool *isAddingCaseOut, size_t *protoDepthOut) { uint32_t initLength = obj->getDenseInitializedLength(); @@ -4916,7 +4917,7 @@ CanOptimizeDenseSetElem(JSContext *cx, HandleObject obj, uint32_t index, if (initLength < oldInitLength || capacity < oldCapacity) return false; - RootedShape shape(cx, obj->lastProperty()); + Shape *shape = obj->lastProperty(); // Cannot optimize if the shape changed. if (oldShape != shape) @@ -4950,7 +4951,7 @@ CanOptimizeDenseSetElem(JSContext *cx, HandleObject obj, uint32_t index, // either directly, or via a prototype, or via the target object for a prototype // which is a proxy, that handles a particular integer write. // Scan the prototype and shape chain to make sure that this is not the case. - RootedObject curObj(cx, obj); + JSObject *curObj = obj; while (curObj) { // Ensure object is native. if (!curObj->isNative()) @@ -5000,8 +5001,8 @@ DoSetElemFallback(JSContext *cx, BaselineFrame *frame, ICSetElem_Fallback *stub_ uint32_t oldCapacity = 0; uint32_t oldInitLength = 0; if (obj->isNative() && index.isInt32() && index.toInt32() >= 0) { - oldCapacity = obj->getDenseCapacity(); - oldInitLength = obj->getDenseInitializedLength(); + oldCapacity = obj->as().getDenseCapacity(); + oldInitLength = obj->as().getDenseInitializedLength(); } if (op == JSOP_INITELEM) { @@ -5042,7 +5043,8 @@ DoSetElemFallback(JSContext *cx, BaselineFrame *frame, ICSetElem_Fallback *stub_ bool addingCase; size_t protoDepth; - if (CanOptimizeDenseSetElem(cx, obj, index.toInt32(), oldShape, oldCapacity, oldInitLength, + if (CanOptimizeDenseSetElem(&obj->as(), index.toInt32(), + oldShape, oldCapacity, oldInitLength, &addingCase, &protoDepth)) { RootedShape shape(cx, obj->lastProperty()); @@ -5229,7 +5231,7 @@ ICSetElem_Dense::Compiler::generateStubCode(MacroAssembler &masm) Register key = masm.extractInt32(R1, ExtractTemp1); // Load obj->elements in scratchReg. - masm.loadPtr(Address(obj, JSObject::offsetOfElements()), scratchReg); + masm.loadPtr(Address(obj, NativeObject::offsetOfElements()), scratchReg); // Bounds check. Address initLength(scratchReg, ObjectElements::offsetOfInitializedLength()); @@ -5412,7 +5414,7 @@ ICSetElemDenseAddCompiler::generateStubCode(MacroAssembler &masm) Register key = masm.extractInt32(R1, ExtractTemp1); // Load obj->elements in scratchReg. - masm.loadPtr(Address(obj, JSObject::offsetOfElements()), scratchReg); + masm.loadPtr(Address(obj, NativeObject::offsetOfElements()), scratchReg); // Bounds check (key == initLength) Address initLength(scratchReg, ObjectElements::offsetOfInitializedLength()); @@ -5669,7 +5671,7 @@ ICIn_Fallback::Compiler::generateStubCode(MacroAssembler &masm) // Attach an optimized stub for a GETGNAME/CALLGNAME op. static bool TryAttachGlobalNameStub(JSContext *cx, HandleScript script, jsbytecode *pc, - ICGetName_Fallback *stub, HandleObject global, + ICGetName_Fallback *stub, Handle global, HandlePropertyName name) { MOZ_ASSERT(global->is()); @@ -5681,7 +5683,7 @@ TryAttachGlobalNameStub(JSContext *cx, HandleScript script, jsbytecode *pc, types::EnsureTrackPropertyTypes(cx, global, NameToId(name)); // The property must be found, and it must be found as a normal data property. - RootedShape shape(cx, global->nativeLookup(cx, id)); + RootedShape shape(cx, global->lookup(cx, id)); if (!shape) return true; @@ -5740,7 +5742,7 @@ TryAttachScopeNameStub(JSContext *cx, HandleScript script, ICGetName_Fallback *s return false; if (scopeChain->is()) { - shape = scopeChain->nativeLookup(cx, id); + shape = scopeChain->as().lookup(cx, id); if (shape) break; return true; @@ -5752,7 +5754,7 @@ TryAttachScopeNameStub(JSContext *cx, HandleScript script, ICGetName_Fallback *s // Check for an 'own' property on the scope. There is no need to // check the prototype as non-with scopes do not inherit properties // from any prototype. - shape = scopeChain->nativeLookup(cx, id); + shape = scopeChain->as().lookup(cx, id); if (shape) break; @@ -5768,7 +5770,8 @@ TryAttachScopeNameStub(JSContext *cx, HandleScript script, ICGetName_Fallback *s bool isFixedSlot; uint32_t offset; - GetFixedOrDynamicSlotOffset(scopeChain, shape->slot(), &isFixedSlot, &offset); + GetFixedOrDynamicSlotOffset(&scopeChain->as(), + shape->slot(), &isFixedSlot, &offset); ICStub *monitorStub = stub->fallbackMonitorStub()->firstMonitorStub(); ICStub *newStub; @@ -5861,7 +5864,7 @@ DoGetNameFallback(JSContext *cx, BaselineFrame *frame, ICGetName_Fallback *stub_ } if (js_CodeSpec[*pc].format & JOF_GNAME) { - if (!TryAttachGlobalNameStub(cx, script, pc, stub, scopeChain, name)) + if (!TryAttachGlobalNameStub(cx, script, pc, stub, scopeChain.as(), name)) return false; } else { if (!TryAttachScopeNameStub(cx, script, stub, scopeChain, name)) @@ -5901,7 +5904,7 @@ ICGetName_Global::Compiler::generateStubCode(MacroAssembler &masm) masm.branchTestObjShape(Assembler::NotEqual, obj, scratch, &failure); // Load dynamic slot. - masm.loadPtr(Address(obj, JSObject::offsetOfSlots()), obj); + masm.loadPtr(Address(obj, NativeObject::offsetOfSlots()), obj); masm.load32(Address(BaselineStubReg, ICGetName_Global::offsetOfSlot()), scratch); masm.loadValue(BaseIndex(obj, scratch, TimesEight), R0); @@ -5941,7 +5944,7 @@ ICGetName_Scope::Compiler::generateStubCode(MacroAssembler &masm) Register scope = NumHops ? walker : obj; if (!isFixedSlot_) { - masm.loadPtr(Address(scope, JSObject::offsetOfSlots()), walker); + masm.loadPtr(Address(scope, NativeObject::offsetOfSlots()), walker); scope = walker; } @@ -6185,7 +6188,7 @@ static bool UpdateExistingGenerationalDOMProxyStub(ICGetProp_Fallback *stub, HandleObject obj) { - Value expandoSlot = obj->getFixedSlot(GetDOMProxyExpandoSlot()); + Value expandoSlot = obj->fakeNativeGetReservedSlot(GetDOMProxyExpandoSlot()); MOZ_ASSERT(!expandoSlot.isObject() && !expandoSlot.isUndefined()); ExpandoAndGeneration *expandoAndGeneration = (ExpandoAndGeneration*)expandoSlot.toPrivate(); for (ICStubConstIterator iter = stub->beginChainConst(); !iter.atEnd(); iter++) { @@ -6272,7 +6275,7 @@ TryAttachNativeGetPropStub(JSContext *cx, HandleScript script, jsbytecode *pc, if (!isDOMProxy && IsCacheableGetPropReadSlot(obj, holder, shape)) { bool isFixedSlot; uint32_t offset; - GetFixedOrDynamicSlotOffset(holder, shape->slot(), &isFixedSlot, &offset); + GetFixedOrDynamicSlotOffset(&holder->as(), shape->slot(), &isFixedSlot, &offset); // Instantiate this property for singleton holders, for use during Ion compilation. if (IsIonEnabled(cx)) @@ -6417,7 +6420,7 @@ TryAttachPrimitiveGetPropStub(JSContext *cx, HandleScript script, jsbytecode *pc MOZ_ASSERT(!*attached); JSValueType primitiveType; - RootedObject proto(cx); + RootedNativeObject proto(cx); Rooted global(cx, &script->global()); if (val.isString()) { primitiveType = JSVAL_TYPE_STRING; @@ -6442,7 +6445,7 @@ TryAttachPrimitiveGetPropStub(JSContext *cx, HandleScript script, jsbytecode *pc types::EnsureTrackPropertyTypes(cx, proto, id); // For now, only look for properties directly set on the prototype. - RootedShape shape(cx, proto->nativeLookup(cx, id)); + RootedShape shape(cx, proto->lookup(cx, id)); if (!shape || !shape->hasSlot() || !shape->hasDefaultGetter()) return true; @@ -6693,7 +6696,7 @@ ICGetProp_ArrayLength::Compiler::generateStubCode(MacroAssembler &masm) masm.branchTestObjClass(Assembler::NotEqual, obj, scratch, &ArrayObject::class_, &failure); // Load obj->elements->length. - masm.loadPtr(Address(obj, JSObject::offsetOfElements()), scratch); + masm.loadPtr(Address(obj, NativeObject::offsetOfElements()), scratch); masm.load32(Address(scratch, ObjectElements::offsetOfLength()), scratch); // Guard length fits in an int32. @@ -6760,7 +6763,7 @@ ICGetProp_Primitive::Compiler::generateStubCode(MacroAssembler &masm) masm.branchPtr(Assembler::NotEqual, shapeAddr, scratchReg, &failure); if (!isFixedSlot_) - masm.loadPtr(Address(holderReg, JSObject::offsetOfSlots()), holderReg); + masm.loadPtr(Address(holderReg, NativeObject::offsetOfSlots()), holderReg); masm.load32(Address(BaselineStubReg, ICGetPropNativeStub::offsetOfOffset()), scratchReg); masm.loadValue(BaseIndex(holderReg, scratchReg, TimesOne), R0); @@ -6807,7 +6810,7 @@ ICGetPropNativeCompiler::generateStubCode(MacroAssembler &masm) // Don't overwrite actual holderReg if we need to load a dynamic slots object. // May need to preserve object for noSuchMethod check later. Register nextHolder = regs.takeAny(); - masm.loadPtr(Address(holderReg, JSObject::offsetOfSlots()), nextHolder); + masm.loadPtr(Address(holderReg, NativeObject::offsetOfSlots()), nextHolder); holderReg = nextHolder; } @@ -7241,7 +7244,7 @@ ICGetPropCallDOMProxyNativeCompiler::getStub(ICStubSpace *space) RootedShape shape(cx, proxy_->lastProperty()); RootedShape holderShape(cx, holder_->lastProperty()); - Value expandoSlot = proxy_->getFixedSlot(GetDOMProxyExpandoSlot()); + Value expandoSlot = proxy_->fakeNativeGetReservedSlot(GetDOMProxyExpandoSlot()); RootedShape expandoShape(cx, nullptr); ExpandoAndGeneration *expandoAndGeneration; int32_t generation; @@ -7528,7 +7531,7 @@ TryAttachSetPropStub(JSContext *cx, HandleScript script, jsbytecode *pc, ICSetPr bool isFixedSlot; uint32_t offset; - GetFixedOrDynamicSlotOffset(obj, shape->slot(), &isFixedSlot, &offset); + GetFixedOrDynamicSlotOffset(&obj->as(), shape->slot(), &isFixedSlot, &offset); JitSpew(JitSpew_BaselineIC, " Generating SetProp(NativeObject.ADD) stub"); ICSetPropNativeAddCompiler compiler(cx, obj, oldShape, oldType, @@ -7557,7 +7560,7 @@ TryAttachSetPropStub(JSContext *cx, HandleScript script, jsbytecode *pc, ICSetPr bool isFixedSlot; uint32_t offset; - GetFixedOrDynamicSlotOffset(obj, shape->slot(), &isFixedSlot, &offset); + GetFixedOrDynamicSlotOffset(&obj->as(), shape->slot(), &isFixedSlot, &offset); JitSpew(JitSpew_BaselineIC, " Generating SetProp(NativeObject.PROP) stub"); ICSetProp_Native::Compiler compiler(cx, obj, isFixedSlot, offset); @@ -7654,12 +7657,15 @@ DoSetPropFallback(JSContext *cx, BaselineFrame *frame, ICSetProp_Fallback *stub_ RootedTypeObject oldType(cx, obj->getType(cx)); if (!oldType) return false; - uint32_t oldSlots = obj->numDynamicSlots(); + uint32_t oldSlots = obj->fakeNativeNumDynamicSlots(); if (op == JSOP_INITPROP) { MOZ_ASSERT(obj->is()); - if (!DefineNativeProperty(cx, obj, id, rhs, nullptr, nullptr, JSPROP_ENUMERATE)) + if (!DefineNativeProperty(cx, obj.as(), id, rhs, + nullptr, nullptr, JSPROP_ENUMERATE)) + { return false; + } } else if (op == JSOP_SETNAME || op == JSOP_SETGNAME) { if (!SetNameOperation(cx, script, pc, obj, rhs)) return false; @@ -7798,7 +7804,7 @@ ICSetProp_Native::Compiler::generateStubCode(MacroAssembler &masm) holderReg = objReg; } else { holderReg = regs.takeAny(); - masm.loadPtr(Address(objReg, JSObject::offsetOfSlots()), holderReg); + masm.loadPtr(Address(objReg, NativeObject::offsetOfSlots()), holderReg); } // Perform the store. @@ -7942,7 +7948,7 @@ ICSetPropNativeAddCompiler::generateStubCode(MacroAssembler &masm) holderReg = objReg; } else { holderReg = regs.takeAny(); - masm.loadPtr(Address(objReg, JSObject::offsetOfSlots()), holderReg); + masm.loadPtr(Address(objReg, NativeObject::offsetOfSlots()), holderReg); } // Perform the store. No write barrier required since this is a new @@ -8256,7 +8262,7 @@ TryAttachFunCallStub(JSContext *cx, ICCall_Fallback *stub, HandleScript script, static bool GetTemplateObjectForNative(JSContext *cx, HandleScript script, jsbytecode *pc, - Native native, const CallArgs &args, MutableHandleObject res) + Native native, const CallArgs &args, MutableHandleNativeObject res) { // Check for natives to which template objects can be attached. This is // done to provide templates to Ion for inlining these natives later on. @@ -8431,7 +8437,7 @@ TryAttachCallStub(JSContext *cx, ICCall_Fallback *stub, HandleScript script, jsb // Remember the template object associated with any script being called // as a constructor, for later use during Ion compilation. - RootedObject templateObject(cx); + RootedNativeObject templateObject(cx); if (constructing) { templateObject = CreateThisForFunction(cx, fun, MaybeSingletonObject); if (!templateObject) @@ -8496,7 +8502,7 @@ TryAttachCallStub(JSContext *cx, ICCall_Fallback *stub, HandleScript script, jsb return true; } - RootedObject templateObject(cx); + RootedNativeObject templateObject(cx); if (MOZ_LIKELY(!isSpread)) { CallArgs args = CallArgsFromVp(argc, vp); if (!GetTemplateObjectForNative(cx, script, pc, fun->native(), args, &templateObject)) @@ -8520,7 +8526,7 @@ TryAttachCallStub(JSContext *cx, ICCall_Fallback *stub, HandleScript script, jsb } static bool -CopyArray(JSContext *cx, HandleObject obj, MutableHandleValue result) +CopyArray(JSContext *cx, HandleArrayObject obj, MutableHandleValue result) { MOZ_ASSERT(obj->is()); uint32_t length = obj->as().length(); @@ -8530,7 +8536,7 @@ CopyArray(JSContext *cx, HandleObject obj, MutableHandleValue result) if (!type) return false; - RootedObject newObj(cx, NewDenseArray(cx, length, type, NewArray_FullyAllocating)); + RootedArrayObject newObj(cx, NewDenseArray(cx, length, type, NewArray_FullyAllocating)); if (!newObj) return false; @@ -8554,13 +8560,12 @@ TryAttachStringSplit(JSContext *cx, ICCall_Fallback *stub, HandleScript script, if (!IsOptimizableCallStringSplit(callee, thisv, argc, args)) return true; - MOZ_ASSERT(res.toObject().is()); MOZ_ASSERT(callee.isObject()); MOZ_ASSERT(callee.toObject().is()); RootedString thisString(cx, thisv.toString()); RootedString argString(cx, args[0].toString()); - RootedObject obj(cx, &res.toObject()); + RootedArrayObject obj(cx, &res.toObject().as()); RootedValue arr(cx); // Copy the array before storing in stub. @@ -8765,7 +8770,7 @@ void ICCallStubCompiler::guardSpreadCall(MacroAssembler &masm, Register argcReg, Label *failure) { masm.unboxObject(Address(BaselineStackReg, ICStackValueOffset), argcReg); - masm.loadPtr(Address(argcReg, JSObject::offsetOfElements()), argcReg); + masm.loadPtr(Address(argcReg, NativeObject::offsetOfElements()), argcReg); masm.load32(Address(argcReg, ObjectElements::offsetOfLength()), argcReg); // Limit actual argc to something reasonable (huge number of arguments can @@ -8784,7 +8789,7 @@ ICCallStubCompiler::pushSpreadCallArguments(MacroAssembler &masm, GeneralRegiste Register startReg = regs.takeAny(); Register endReg = regs.takeAny(); masm.unboxObject(Address(BaselineStackReg, STUB_FRAME_SIZE), startReg); - masm.loadPtr(Address(startReg, JSObject::offsetOfElements()), startReg); + masm.loadPtr(Address(startReg, NativeObject::offsetOfElements()), startReg); masm.mov(argcReg, endReg); static_assert(sizeof(Value) == 8, "Value must be 8 bytes"); masm.lshiftPtr(Imm32(3), endReg); @@ -8847,7 +8852,7 @@ ICCallStubCompiler::guardFunApply(MacroAssembler &masm, GeneralRegisterSet regs, &ArrayObject::class_, failure); // Get the array elements and ensure that initializedLength == length - masm.loadPtr(Address(secondArgObj, JSObject::offsetOfElements()), secondArgObj); + masm.loadPtr(Address(secondArgObj, NativeObject::offsetOfElements()), secondArgObj); Register lenReg = regsx.takeAny(); masm.load32(Address(secondArgObj, ObjectElements::offsetOfLength()), lenReg); @@ -8957,7 +8962,7 @@ ICCallStubCompiler::pushArrayArguments(MacroAssembler &masm, Address arrayVal, Register startReg = regs.takeAny(); Register endReg = regs.takeAny(); masm.extractObject(arrayVal, startReg); - masm.loadPtr(Address(startReg, JSObject::offsetOfElements()), startReg); + masm.loadPtr(Address(startReg, NativeObject::offsetOfElements()), startReg); masm.load32(Address(startReg, ObjectElements::offsetOfInitializedLength()), endReg); JS_STATIC_ASSERT(sizeof(Value) == 8); masm.lshiftPtr(Imm32(3), endReg); @@ -9355,7 +9360,7 @@ ICCallScriptedCompiler::generateStubCode(MacroAssembler &masm) return true; } -typedef bool (*CopyArrayFn)(JSContext *, HandleObject, MutableHandleValue); +typedef bool (*CopyArrayFn)(JSContext *, HandleArrayObject, MutableHandleValue); static const VMFunction CopyArrayInfo = FunctionInfo(CopyArray); bool @@ -9617,7 +9622,7 @@ ICCall_ScriptedApplyArray::Compiler::generateStubCode(MacroAssembler &masm) // Reload argc from length of array. masm.extractObject(arrayVal, argcReg); - masm.loadPtr(Address(argcReg, JSObject::offsetOfElements()), argcReg); + masm.loadPtr(Address(argcReg, NativeObject::offsetOfElements()), argcReg); masm.load32(Address(argcReg, ObjectElements::offsetOfInitializedLength()), argcReg); masm.Push(argcReg); @@ -10852,7 +10857,7 @@ ICSetProp_CallNative::Clone(JSContext *cx, ICStubSpace *space, ICStub *, } ICCall_Scripted::ICCall_Scripted(JitCode *stubCode, ICStub *firstMonitorStub, - HandleScript calleeScript, HandleObject templateObject, + HandleScript calleeScript, HandleNativeObject templateObject, uint32_t pcOffset) : ICMonitoredStub(ICStub::Call_Scripted, stubCode, firstMonitorStub), calleeScript_(calleeScript), @@ -10865,7 +10870,7 @@ ICCall_Scripted::Clone(JSContext *cx, ICStubSpace *space, ICStub *firstMonitorSt ICCall_Scripted &other) { RootedScript calleeScript(cx, other.calleeScript_); - RootedObject templateObject(cx, other.templateObject_); + RootedNativeObject templateObject(cx, other.templateObject_); return New(space, other.jitCode(), firstMonitorStub, calleeScript, templateObject, other.pcOffset_); } @@ -10878,7 +10883,7 @@ ICCall_AnyScripted::Clone(JSContext *, ICStubSpace *space, ICStub *firstMonitorS } ICCall_Native::ICCall_Native(JitCode *stubCode, ICStub *firstMonitorStub, - HandleFunction callee, HandleObject templateObject, + HandleFunction callee, HandleNativeObject templateObject, uint32_t pcOffset) : ICMonitoredStub(ICStub::Call_Native, stubCode, firstMonitorStub), callee_(callee), @@ -10899,7 +10904,7 @@ ICCall_Native::Clone(JSContext *cx, ICStubSpace *space, ICStub *firstMonitorStub ICCall_Native &other) { RootedFunction callee(cx, other.callee_); - RootedObject templateObject(cx, other.templateObject_); + RootedNativeObject templateObject(cx, other.templateObject_); return New(space, other.jitCode(), firstMonitorStub, callee, templateObject, other.pcOffset_); } @@ -11027,7 +11032,7 @@ static bool DoRestFallback(JSContext *cx, ICRest_Fallback *stub, unsigned numRest = numActuals > numFormals ? numActuals - numFormals : 0; Value *rest = frame->argv() + numFormals; - JSObject *obj = NewDenseCopiedArray(cx, numRest, rest, nullptr); + ArrayObject *obj = NewDenseCopiedArray(cx, numRest, rest, nullptr); if (!obj) return false; types::FixRestArgumentsType(cx, obj); diff --git a/js/src/jit/BaselineIC.h b/js/src/jit/BaselineIC.h index 4412e69ae38..d2c5cbb6d28 100644 --- a/js/src/jit/BaselineIC.h +++ b/js/src/jit/BaselineIC.h @@ -16,6 +16,7 @@ #include "jit/BaselineJIT.h" #include "jit/BaselineRegisters.h" +#include "vm/ArrayObject.h" namespace js { @@ -1878,26 +1879,26 @@ class ICNewArray_Fallback : public ICFallbackStub { friend class ICStubSpace; - HeapPtrObject templateObject_; + HeapPtrArrayObject templateObject_; - ICNewArray_Fallback(JitCode *stubCode, JSObject *templateObject) + ICNewArray_Fallback(JitCode *stubCode, ArrayObject *templateObject) : ICFallbackStub(ICStub::NewArray_Fallback, stubCode), templateObject_(templateObject) {} public: static inline ICNewArray_Fallback *New(ICStubSpace *space, JitCode *code, - JSObject *templateObject) { + ArrayObject *templateObject) { if (!code) return nullptr; return space->allocate(code, templateObject); } class Compiler : public ICStubCompiler { - RootedObject templateObject; + RootedArrayObject templateObject; bool generateStubCode(MacroAssembler &masm); public: - Compiler(JSContext *cx, JSObject *templateObject) + Compiler(JSContext *cx, ArrayObject *templateObject) : ICStubCompiler(cx, ICStub::NewArray_Fallback), templateObject(cx, templateObject) {} @@ -1907,7 +1908,7 @@ class ICNewArray_Fallback : public ICFallbackStub } }; - HeapPtrObject &templateObject() { + HeapPtrArrayObject &templateObject() { return templateObject_; } }; @@ -1916,26 +1917,26 @@ class ICNewObject_Fallback : public ICFallbackStub { friend class ICStubSpace; - HeapPtrObject templateObject_; + HeapPtrNativeObject templateObject_; - ICNewObject_Fallback(JitCode *stubCode, JSObject *templateObject) + ICNewObject_Fallback(JitCode *stubCode, NativeObject *templateObject) : ICFallbackStub(ICStub::NewObject_Fallback, stubCode), templateObject_(templateObject) {} public: static inline ICNewObject_Fallback *New(ICStubSpace *space, JitCode *code, - JSObject *templateObject) { + NativeObject *templateObject) { if (!code) return nullptr; return space->allocate(code, templateObject); } class Compiler : public ICStubCompiler { - RootedObject templateObject; + RootedNativeObject templateObject; bool generateStubCode(MacroAssembler &masm); public: - Compiler(JSContext *cx, JSObject *templateObject) + Compiler(JSContext *cx, NativeObject *templateObject) : ICStubCompiler(cx, ICStub::NewObject_Fallback), templateObject(cx, templateObject) {} @@ -1945,7 +1946,7 @@ class ICNewObject_Fallback : public ICFallbackStub } }; - HeapPtrObject &templateObject() { + HeapPtrNativeObject &templateObject() { return templateObject_; } }; @@ -5679,17 +5680,17 @@ class ICCall_Scripted : public ICMonitoredStub protected: HeapPtrScript calleeScript_; - HeapPtrObject templateObject_; + HeapPtrNativeObject templateObject_; uint32_t pcOffset_; ICCall_Scripted(JitCode *stubCode, ICStub *firstMonitorStub, - HandleScript calleeScript, HandleObject templateObject, + HandleScript calleeScript, HandleNativeObject templateObject, uint32_t pcOffset); public: static inline ICCall_Scripted *New( ICStubSpace *space, JitCode *code, ICStub *firstMonitorStub, - HandleScript calleeScript, HandleObject templateObject, + HandleScript calleeScript, HandleNativeObject templateObject, uint32_t pcOffset) { if (!code) @@ -5704,7 +5705,7 @@ class ICCall_Scripted : public ICMonitoredStub HeapPtrScript &calleeScript() { return calleeScript_; } - HeapPtrObject &templateObject() { + HeapPtrNativeObject &templateObject() { return templateObject_; } @@ -5752,7 +5753,7 @@ class ICCallScriptedCompiler : public ICCallStubCompiler { bool isConstructing_; bool isSpread_; RootedScript calleeScript_; - RootedObject templateObject_; + RootedNativeObject templateObject_; uint32_t pcOffset_; bool generateStubCode(MacroAssembler &masm); @@ -5763,7 +5764,7 @@ class ICCallScriptedCompiler : public ICCallStubCompiler { public: ICCallScriptedCompiler(JSContext *cx, ICStub *firstMonitorStub, - HandleScript calleeScript, HandleObject templateObject, + HandleScript calleeScript, HandleNativeObject templateObject, bool isConstructing, bool isSpread, uint32_t pcOffset) : ICCallStubCompiler(cx, ICStub::Call_Scripted), firstMonitorStub_(firstMonitorStub), @@ -5801,7 +5802,7 @@ class ICCall_Native : public ICMonitoredStub protected: HeapPtrFunction callee_; - HeapPtrObject templateObject_; + HeapPtrNativeObject templateObject_; uint32_t pcOffset_; #if defined(JS_ARM_SIMULATOR) || defined(JS_MIPS_SIMULATOR) @@ -5809,12 +5810,12 @@ class ICCall_Native : public ICMonitoredStub #endif ICCall_Native(JitCode *stubCode, ICStub *firstMonitorStub, - HandleFunction callee, HandleObject templateObject, + HandleFunction callee, HandleNativeObject templateObject, uint32_t pcOffset); public: static inline ICCall_Native *New(ICStubSpace *space, JitCode *code, ICStub *firstMonitorStub, - HandleFunction callee, HandleObject templateObject, + HandleFunction callee, HandleNativeObject templateObject, uint32_t pcOffset) { if (!code) @@ -5829,7 +5830,7 @@ class ICCall_Native : public ICMonitoredStub HeapPtrFunction &callee() { return callee_; } - HeapPtrObject &templateObject() { + HeapPtrNativeObject &templateObject() { return templateObject_; } @@ -5853,7 +5854,7 @@ class ICCall_Native : public ICMonitoredStub bool isConstructing_; bool isSpread_; RootedFunction callee_; - RootedObject templateObject_; + RootedNativeObject templateObject_; uint32_t pcOffset_; bool generateStubCode(MacroAssembler &masm); @@ -5864,7 +5865,7 @@ class ICCall_Native : public ICMonitoredStub public: Compiler(JSContext *cx, ICStub *firstMonitorStub, - HandleFunction callee, HandleObject templateObject, + HandleFunction callee, HandleNativeObject templateObject, bool isConstructing, bool isSpread, uint32_t pcOffset) : ICCallStubCompiler(cx, ICStub::Call_Native), firstMonitorStub_(firstMonitorStub), @@ -6422,9 +6423,9 @@ class ICRest_Fallback : public ICFallbackStub { friend class ICStubSpace; - HeapPtrObject templateObject_; + HeapPtrArrayObject templateObject_; - ICRest_Fallback(JitCode *stubCode, JSObject *templateObject) + ICRest_Fallback(JitCode *stubCode, ArrayObject *templateObject) : ICFallbackStub(ICStub::Rest_Fallback, stubCode), templateObject_(templateObject) { } @@ -6432,23 +6433,23 @@ class ICRest_Fallback : public ICFallbackStub static const uint32_t MAX_OPTIMIZED_STUBS = 8; static inline ICRest_Fallback *New(ICStubSpace *space, JitCode *code, - JSObject *templateObject) { + ArrayObject *templateObject) { if (!code) return nullptr; return space->allocate(code, templateObject); } - HeapPtrObject &templateObject() { + HeapPtrArrayObject &templateObject() { return templateObject_; } class Compiler : public ICStubCompiler { protected: - RootedObject templateObject; + RootedArrayObject templateObject; bool generateStubCode(MacroAssembler &masm); public: - Compiler(JSContext *cx, JSObject *templateObject) + Compiler(JSContext *cx, ArrayObject *templateObject) : ICStubCompiler(cx, ICStub::Rest_Fallback), templateObject(cx, templateObject) { } @@ -6553,7 +6554,7 @@ IsCacheableDOMProxy(JSObject *obj) if (handler->family() != GetDOMProxyHandlerFamily()) return false; - if (obj->numFixedSlots() <= GetDOMProxyExpandoSlot()) + if (obj->fakeNativeNumFixedSlots() <= GetDOMProxyExpandoSlot()) return false; return true; diff --git a/js/src/jit/BaselineInspector.cpp b/js/src/jit/BaselineInspector.cpp index b678b136d10..8726ef5dfe6 100644 --- a/js/src/jit/BaselineInspector.cpp +++ b/js/src/jit/BaselineInspector.cpp @@ -413,7 +413,7 @@ BaselineInspector::hasSeenDoubleResult(jsbytecode *pc) return false; } -JSObject * +NativeObject * BaselineInspector::getTemplateObject(jsbytecode *pc) { if (!hasBaselineScript()) @@ -429,7 +429,7 @@ BaselineInspector::getTemplateObject(jsbytecode *pc) case ICStub::Rest_Fallback: return stub->toRest_Fallback()->templateObject(); case ICStub::Call_Scripted: - if (JSObject *obj = stub->toCall_Scripted()->templateObject()) + if (NativeObject *obj = stub->toCall_Scripted()->templateObject()) return obj; break; default: @@ -440,7 +440,7 @@ BaselineInspector::getTemplateObject(jsbytecode *pc) return nullptr; } -JSObject * +NativeObject * BaselineInspector::getTemplateObjectForNative(jsbytecode *pc, Native native) { if (!hasBaselineScript()) diff --git a/js/src/jit/BaselineInspector.h b/js/src/jit/BaselineInspector.h index 640a2f7a8dc..9520c5be5b2 100644 --- a/js/src/jit/BaselineInspector.h +++ b/js/src/jit/BaselineInspector.h @@ -109,8 +109,8 @@ class BaselineInspector bool hasSeenDoubleResult(jsbytecode *pc); bool hasSeenNonStringIterMore(jsbytecode *pc); - JSObject *getTemplateObject(jsbytecode *pc); - JSObject *getTemplateObjectForNative(jsbytecode *pc, Native native); + NativeObject *getTemplateObject(jsbytecode *pc); + NativeObject *getTemplateObjectForNative(jsbytecode *pc, Native native); DeclEnvObject *templateDeclEnvObject(); CallObject *templateCallObject(); diff --git a/js/src/jit/CodeGenerator.cpp b/js/src/jit/CodeGenerator.cpp index 88168c3c828..ade397b54c2 100644 --- a/js/src/jit/CodeGenerator.cpp +++ b/js/src/jit/CodeGenerator.cpp @@ -1376,7 +1376,7 @@ CodeGenerator::visitTableSwitchV(LTableSwitchV *ins) return emitTableSwitchDispatch(mir, index, ToRegisterOrInvalid(ins->tempPointer())); } -typedef JSObject *(*DeepCloneObjectLiteralFn)(JSContext *, HandleObject, NewObjectKind); +typedef NativeObject *(*DeepCloneObjectLiteralFn)(JSContext *, HandleNativeObject, NewObjectKind); static const VMFunction DeepCloneObjectLiteralInfo = FunctionInfo(DeepCloneObjectLiteral); @@ -1634,7 +1634,7 @@ CodeGenerator::visitPointer(LPointer *lir) bool CodeGenerator::visitSlots(LSlots *lir) { - Address slots(ToRegister(lir->object()), JSObject::offsetOfSlots()); + Address slots(ToRegister(lir->object()), NativeObject::offsetOfSlots()); masm.loadPtr(slots, ToRegister(lir->output())); return true; } @@ -1722,12 +1722,12 @@ CodeGenerator::emitGetPropertyPolymorphic(LInstruction *ins, Register obj, Regis Shape *shape = mir->shape(i); if (shape->slot() < shape->numFixedSlots()) { // Fixed slot. - masm.loadTypedOrValue(Address(obj, JSObject::getFixedSlotOffset(shape->slot())), + masm.loadTypedOrValue(Address(obj, NativeObject::getFixedSlotOffset(shape->slot())), output); } else { // Dynamic slot. uint32_t offset = (shape->slot() - shape->numFixedSlots()) * sizeof(js::Value); - masm.loadPtr(Address(obj, JSObject::offsetOfSlots()), scratch); + masm.loadPtr(Address(obj, NativeObject::offsetOfSlots()), scratch); masm.loadTypedOrValue(Address(scratch, offset), output); } @@ -1784,13 +1784,13 @@ CodeGenerator::emitSetPropertyPolymorphic(LInstruction *ins, Register obj, Regis Shape *shape = mir->shape(i); if (shape->slot() < shape->numFixedSlots()) { // Fixed slot. - Address addr(obj, JSObject::getFixedSlotOffset(shape->slot())); + Address addr(obj, NativeObject::getFixedSlotOffset(shape->slot())); if (mir->needsBarrier()) emitPreBarrier(addr); masm.storeConstantOrRegister(value, addr); } else { // Dynamic slot. - masm.loadPtr(Address(obj, JSObject::offsetOfSlots()), scratch); + masm.loadPtr(Address(obj, NativeObject::offsetOfSlots()), scratch); Address addr(scratch, (shape->slot() - shape->numFixedSlots()) * sizeof(js::Value)); if (mir->needsBarrier()) emitPreBarrier(addr); @@ -1833,7 +1833,7 @@ CodeGenerator::visitSetPropertyPolymorphicT(LSetPropertyPolymorphicT *ins) bool CodeGenerator::visitElements(LElements *lir) { - Address elements(ToRegister(lir->object()), JSObject::offsetOfElements()); + Address elements(ToRegister(lir->object()), NativeObject::offsetOfElements()); masm.loadPtr(elements, ToRegister(lir->output())); return true; } @@ -1887,9 +1887,9 @@ CodeGenerator::visitMaybeToDoubleElement(LMaybeToDoubleElement *lir) return true; } -typedef bool (*CopyElementsForWriteFn)(ThreadSafeContext *, JSObject *); +typedef bool (*CopyElementsForWriteFn)(ThreadSafeContext *, NativeObject *); static const VMFunction CopyElementsForWriteInfo = - FunctionInfo(JSObject::CopyElementsForWrite); + FunctionInfo(NativeObject::CopyElementsForWrite); bool CodeGenerator::visitMaybeCopyElementsForWrite(LMaybeCopyElementsForWrite *lir) @@ -1902,7 +1902,7 @@ CodeGenerator::visitMaybeCopyElementsForWrite(LMaybeCopyElementsForWrite *lir) if (!ool) return false; - masm.loadPtr(Address(object, JSObject::offsetOfElements()), temp); + masm.loadPtr(Address(object, NativeObject::offsetOfElements()), temp); masm.branchTest32(Assembler::NonZero, Address(temp, ObjectElements::offsetOfFlags()), Imm32(ObjectElements::COPY_ON_WRITE), @@ -2278,7 +2278,7 @@ CodeGenerator::visitCallDOMNative(LCallDOMNative *call) masm.computeEffectiveAddress(Address(StackPointer, 2 * sizeof(Value)), argArgs); // GetReservedSlot(obj, DOM_OBJECT_SLOT).toPrivate() - masm.loadPrivate(Address(obj, JSObject::getFixedSlotOffset(0)), argPrivate); + masm.loadPrivate(Address(obj, NativeObject::getFixedSlotOffset(0)), argPrivate); // Push argc from the call instruction into what will become the IonExitFrame masm.Push(Imm32(call->numStackArgs())); @@ -3595,10 +3595,10 @@ CodeGenerator::visitNewArray(LNewArray *lir) MOZ_ASSERT(gen->info().executionMode() == SequentialExecution); Register objReg = ToRegister(lir->output()); Register tempReg = ToRegister(lir->temp()); - JSObject *templateObject = lir->mir()->templateObject(); + ArrayObject *templateObject = lir->mir()->templateObject(); DebugOnly count = lir->mir()->count(); - MOZ_ASSERT(count < JSObject::NELEMENTS_LIMIT); + MOZ_ASSERT(count < NativeObject::NELEMENTS_LIMIT); if (lir->mir()->shouldUseVM()) return visitNewArrayCallVM(lir); @@ -3627,7 +3627,7 @@ CodeGenerator::visitNewArrayCopyOnWrite(LNewArrayCopyOnWrite *lir) { Register objReg = ToRegister(lir->output()); Register tempReg = ToRegister(lir->temp()); - JSObject *templateObject = lir->mir()->templateObject(); + ArrayObject *templateObject = lir->mir()->templateObject(); gc::InitialHeap initialHeap = lir->mir()->initialHeap(); // If we have a template object, we can inline call object creation. @@ -3662,7 +3662,7 @@ class OutOfLineNewObject : public OutOfLineCodeBase } }; -typedef JSObject *(*NewInitObjectFn)(JSContext *, HandleObject); +typedef JSObject *(*NewInitObjectFn)(JSContext *, HandleNativeObject); static const VMFunction NewInitObjectInfo = FunctionInfo(NewInitObject); typedef JSObject *(*NewInitObjectWithClassPrototypeFn)(JSContext *, HandleObject); @@ -3700,7 +3700,7 @@ CodeGenerator::visitNewObjectVMCall(LNewObject *lir) } static bool -ShouldInitFixedSlots(LInstruction *lir, JSObject *templateObj) +ShouldInitFixedSlots(LInstruction *lir, NativeObject *templateObj) { // Look for StoreFixedSlot instructions following an object allocation // that write to this object before a GC is triggered or this object is @@ -3721,8 +3721,8 @@ ShouldInitFixedSlots(LInstruction *lir, JSObject *templateObj) // Keep track of the fixed slots that are initialized. initializedSlots is // a bit mask with a bit for each slot. - MOZ_ASSERT(nfixed <= JSObject::MAX_FIXED_SLOTS); - static_assert(JSObject::MAX_FIXED_SLOTS <= 32, "Slot bits must fit in 32 bits"); + MOZ_ASSERT(nfixed <= NativeObject::MAX_FIXED_SLOTS); + static_assert(NativeObject::MAX_FIXED_SLOTS <= 32, "Slot bits must fit in 32 bits"); uint32_t initializedSlots = 0; uint32_t numInitialized = 0; @@ -3789,7 +3789,7 @@ CodeGenerator::visitNewObject(LNewObject *lir) MOZ_ASSERT(gen->info().executionMode() == SequentialExecution); Register objReg = ToRegister(lir->output()); Register tempReg = ToRegister(lir->temp()); - JSObject *templateObject = lir->mir()->templateObject(); + NativeObject *templateObject = lir->mir()->templateObject(); if (lir->mir()->shouldUseVM()) return visitNewObjectVMCall(lir); @@ -3824,7 +3824,7 @@ CodeGenerator::visitNewDeclEnvObject(LNewDeclEnvObject *lir) { Register objReg = ToRegister(lir->output()); Register tempReg = ToRegister(lir->temp()); - JSObject *templateObj = lir->mir()->templateObj(); + NativeObject *templateObj = lir->mir()->templateObj(); CompileInfo &info = lir->mir()->block()->info(); // If we have a template object, we can inline call object creation. @@ -3853,7 +3853,7 @@ CodeGenerator::visitNewCallObject(LNewCallObject *lir) Register objReg = ToRegister(lir->output()); Register tempReg = ToRegister(lir->temp()); - JSObject *templateObj = lir->mir()->templateObject(); + NativeObject *templateObj = lir->mir()->templateObject(); OutOfLineCode *ool = oolCallVM(NewCallObjectInfo, lir, (ArgList(), ImmGCPtr(templateObj->lastProperty()), @@ -3905,12 +3905,12 @@ CodeGenerator::visitNewCallObjectPar(LNewCallObjectPar *lir) Register cxReg = ToRegister(lir->forkJoinContext()); Register tempReg1 = ToRegister(lir->getTemp0()); Register tempReg2 = ToRegister(lir->getTemp1()); - JSObject *templateObj = lir->mir()->templateObj(); + NativeObject *templateObj = lir->mir()->templateObj(); return emitAllocateGCThingPar(lir, resultReg, cxReg, tempReg1, tempReg2, templateObj); } -typedef JSObject *(*ExtendArrayParFn)(ForkJoinContext*, JSObject*, uint32_t); +typedef ArrayObject *(*ExtendArrayParFn)(ForkJoinContext*, ArrayObject*, uint32_t); static const VMFunction ExtendArrayParInfo = FunctionInfo(ExtendArrayPar); @@ -3922,7 +3922,7 @@ CodeGenerator::visitNewDenseArrayPar(LNewDenseArrayPar *lir) Register tempReg0 = ToRegister(lir->getTemp0()); Register tempReg1 = ToRegister(lir->getTemp1()); Register tempReg2 = ToRegister(lir->getTemp2()); - JSObject *templateObj = lir->mir()->templateObject(); + ArrayObject *templateObj = lir->mir()->templateObject(); if (!emitAllocateGCThingPar(lir, tempReg2, cxReg, tempReg0, tempReg1, templateObj)) return false; @@ -3976,7 +3976,7 @@ CodeGenerator::visitNewPar(LNewPar *lir) Register cxReg = ToRegister(lir->forkJoinContext()); Register tempReg1 = ToRegister(lir->getTemp0()); Register tempReg2 = ToRegister(lir->getTemp1()); - JSObject *templateObject = lir->mir()->templateObject(); + NativeObject *templateObject = lir->mir()->templateObject(); return emitAllocateGCThingPar(lir, objReg, cxReg, tempReg1, tempReg2, templateObject); } @@ -4006,7 +4006,7 @@ static const VMFunction NewGCThingParInfo = bool CodeGenerator::emitAllocateGCThingPar(LInstruction *lir, Register objReg, Register cxReg, - Register tempReg1, Register tempReg2, JSObject *templateObj) + Register tempReg1, Register tempReg2, NativeObject *templateObj) { MOZ_ASSERT(lir->mirRaw()); MOZ_ASSERT(lir->mirRaw()->isInstruction()); @@ -4103,7 +4103,7 @@ CodeGenerator::visitMutateProto(LMutateProto *lir) return callVM(MutatePrototypeInfo, lir); } -typedef bool(*InitPropFn)(JSContext *cx, HandleObject obj, +typedef bool(*InitPropFn)(JSContext *cx, HandleNativeObject obj, HandlePropertyName name, HandleValue value); static const VMFunction InitPropInfo = FunctionInfo(InitProp); @@ -4192,7 +4192,7 @@ static const VMFunction NewGCObjectInfo = bool CodeGenerator::visitCreateThisWithTemplate(LCreateThisWithTemplate *lir) { - JSObject *templateObject = lir->mir()->templateObject(); + NativeObject *templateObject = lir->mir()->templateObject(); gc::AllocKind allocKind = templateObject->asTenured().getAllocKind(); gc::InitialHeap initialHeap = lir->mir()->initialHeap(); Register objReg = ToRegister(lir->output()); @@ -5947,7 +5947,7 @@ CodeGenerator::visitStoreElementHoleV(LStoreElementHoleV *lir) return true; } -typedef bool (*SetDenseElementFn)(JSContext *, HandleObject, int32_t, HandleValue, +typedef bool (*SetDenseElementFn)(JSContext *, HandleNativeObject, int32_t, HandleValue, bool strict); typedef bool (*SetDenseElementParFn)(ForkJoinContext *, HandleObject, int32_t, HandleValue, bool); static const VMFunctionsModal SetDenseElementInfo = VMFunctionsModal( @@ -6069,7 +6069,7 @@ CodeGenerator::emitArrayPopShift(LInstruction *lir, const MArrayPopShift *mir, R masm.branchTestNeedsIncrementalBarrier(Assembler::NonZero, ool->entry()); // Load elements and length. - masm.loadPtr(Address(obj, JSObject::offsetOfElements()), elementsTemp); + masm.loadPtr(Address(obj, NativeObject::offsetOfElements()), elementsTemp); masm.load32(Address(elementsTemp, ObjectElements::offsetOfLength()), lengthTemp); // VM call if length != initializedLength. @@ -6152,7 +6152,7 @@ CodeGenerator::visitArrayPopShiftT(LArrayPopShiftT *lir) return emitArrayPopShift(lir, lir->mir(), obj, elements, length, out); } -typedef bool (*ArrayPushDenseFn)(JSContext *, HandleObject, HandleValue, uint32_t *); +typedef bool (*ArrayPushDenseFn)(JSContext *, HandleArrayObject, HandleValue, uint32_t *); static const VMFunction ArrayPushDenseInfo = FunctionInfo(jit::ArrayPushDense); @@ -6165,7 +6165,7 @@ CodeGenerator::emitArrayPush(LInstruction *lir, const MArrayPush *mir, Register return false; // Load elements and length. - masm.loadPtr(Address(obj, JSObject::offsetOfElements()), elementsTemp); + masm.loadPtr(Address(obj, NativeObject::offsetOfElements()), elementsTemp); masm.load32(Address(elementsTemp, ObjectElements::offsetOfLength()), length); Int32Key key = Int32Key(length); @@ -6227,11 +6227,11 @@ CodeGenerator::visitArrayConcat(LArrayConcat *lir) // inline and pass it to the stub. Else, we just pass nullptr and the stub falls // back to a slow path. Label fail, call; - masm.loadPtr(Address(lhs, JSObject::offsetOfElements()), temp1); + masm.loadPtr(Address(lhs, NativeObject::offsetOfElements()), temp1); masm.load32(Address(temp1, ObjectElements::offsetOfInitializedLength()), temp2); masm.branch32(Assembler::NotEqual, Address(temp1, ObjectElements::offsetOfLength()), temp2, &fail); - masm.loadPtr(Address(rhs, JSObject::offsetOfElements()), temp1); + masm.loadPtr(Address(rhs, NativeObject::offsetOfElements()), temp1); masm.load32(Address(temp1, ObjectElements::offsetOfInitializedLength()), temp2); masm.branch32(Assembler::NotEqual, Address(temp1, ObjectElements::offsetOfLength()), temp2, &fail); @@ -6326,7 +6326,7 @@ CodeGenerator::visitIteratorStart(LIteratorStart *lir) // Ensure the object does not have any elements. The presence of dense // elements is not captured by the shape tests above. masm.branchPtr(Assembler::NotEqual, - Address(obj, JSObject::offsetOfElements()), + Address(obj, NativeObject::offsetOfElements()), ImmPtr(js::emptyObjectElements), ool->entry()); @@ -6571,7 +6571,7 @@ CodeGenerator::visitRunOncePrologue(LRunOncePrologue *lir) typedef JSObject *(*InitRestParameterFn)(JSContext *, uint32_t, Value *, HandleObject, HandleObject); typedef JSObject *(*InitRestParameterParFn)(ForkJoinContext *, uint32_t, Value *, - HandleObject, HandleObject); + HandleObject, HandleArrayObject); static const VMFunctionsModal InitRestParameterInfo = VMFunctionsModal( FunctionInfo(InitRestParameter), FunctionInfo(InitRestParameterPar)); @@ -6624,7 +6624,7 @@ CodeGenerator::visitRest(LRest *lir) Register temp1 = ToRegister(lir->getTemp(1)); Register temp2 = ToRegister(lir->getTemp(2)); unsigned numFormals = lir->mir()->numFormals(); - JSObject *templateObject = lir->mir()->templateObject(); + ArrayObject *templateObject = lir->mir()->templateObject(); Label joinAlloc, failAlloc; masm.createGCObject(temp2, temp0, templateObject, gc::DefaultHeap, &failAlloc); @@ -6650,7 +6650,7 @@ CodeGenerator::visitRestPar(LRestPar *lir) Register temp1 = ToRegister(lir->getTemp(1)); Register temp2 = ToRegister(lir->getTemp(2)); unsigned numFormals = lir->mir()->numFormals(); - JSObject *templateObject = lir->mir()->templateObject(); + ArrayObject *templateObject = lir->mir()->templateObject(); if (!emitAllocateGCThingPar(lir, temp2, cx, temp0, temp1, templateObject)) return false; @@ -7213,7 +7213,7 @@ CodeGenerator::visitLoadFixedSlotV(LLoadFixedSlotV *ins) size_t slot = ins->mir()->slot(); ValueOperand result = GetValueOutput(ins); - masm.loadValue(Address(obj, JSObject::getFixedSlotOffset(slot)), result); + masm.loadValue(Address(obj, NativeObject::getFixedSlotOffset(slot)), result); return true; } @@ -7225,7 +7225,7 @@ CodeGenerator::visitLoadFixedSlotT(LLoadFixedSlotT *ins) AnyRegister result = ToAnyRegister(ins->getDef(0)); MIRType type = ins->mir()->type(); - masm.loadUnboxedValue(Address(obj, JSObject::getFixedSlotOffset(slot)), type, result); + masm.loadUnboxedValue(Address(obj, NativeObject::getFixedSlotOffset(slot)), type, result); return true; } @@ -7238,7 +7238,7 @@ CodeGenerator::visitStoreFixedSlotV(LStoreFixedSlotV *ins) const ValueOperand value = ToValue(ins, LStoreFixedSlotV::Value); - Address address(obj, JSObject::getFixedSlotOffset(slot)); + Address address(obj, NativeObject::getFixedSlotOffset(slot)); if (ins->mir()->needsBarrier()) emitPreBarrier(address); @@ -7260,7 +7260,7 @@ CodeGenerator::visitStoreFixedSlotT(LStoreFixedSlotT *ins) ? ConstantOrRegister(*value->toConstant()) : TypedOrValueRegister(valueType, ToAnyRegister(value)); - Address address(obj, JSObject::getFixedSlotOffset(slot)); + Address address(obj, NativeObject::getFixedSlotOffset(slot)); if (ins->mir()->needsBarrier()) emitPreBarrier(address); @@ -8595,14 +8595,14 @@ CodeGenerator::visitGetDOMProperty(LGetDOMProperty *ins) // It's a bit annoying to redo these slot calculations, which duplcate // LSlots and a few other things like that, but I'm not sure there's a // way to reuse those here. - if (slot < JSObject::MAX_FIXED_SLOTS) { - masm.loadValue(Address(ObjectReg, JSObject::getFixedSlotOffset(slot)), + if (slot < NativeObject::MAX_FIXED_SLOTS) { + masm.loadValue(Address(ObjectReg, NativeObject::getFixedSlotOffset(slot)), JSReturnOperand); } else { // It's a dynamic slot. - slot -= JSObject::MAX_FIXED_SLOTS; + slot -= NativeObject::MAX_FIXED_SLOTS; // Use PrivateReg as a scratch register for the slots pointer. - masm.loadPtr(Address(ObjectReg, JSObject::offsetOfSlots()), + masm.loadPtr(Address(ObjectReg, NativeObject::offsetOfSlots()), PrivateReg); masm.loadValue(Address(PrivateReg, slot*sizeof(js::Value)), JSReturnOperand); @@ -8625,7 +8625,7 @@ CodeGenerator::visitGetDOMProperty(LGetDOMProperty *ins) masm.Push(ObjectReg); // GetReservedSlot(obj, DOM_OBJECT_SLOT).toPrivate() - masm.loadPrivate(Address(ObjectReg, JSObject::getFixedSlotOffset(0)), PrivateReg); + masm.loadPrivate(Address(ObjectReg, NativeObject::getFixedSlotOffset(0)), PrivateReg); // Rooting will happen at GC time. masm.movePtr(StackPointer, ObjectReg); @@ -8677,7 +8677,7 @@ CodeGenerator::visitGetDOMMember(LGetDOMMember *ins) size_t slot = ins->mir()->domMemberSlotIndex(); ValueOperand result = GetValueOutput(ins); - masm.loadValue(Address(object, JSObject::getFixedSlotOffset(slot)), result); + masm.loadValue(Address(object, NativeObject::getFixedSlotOffset(slot)), result); return true; } @@ -8704,7 +8704,7 @@ CodeGenerator::visitSetDOMProperty(LSetDOMProperty *ins) masm.Push(ObjectReg); // GetReservedSlot(obj, DOM_OBJECT_SLOT).toPrivate() - masm.loadPrivate(Address(ObjectReg, JSObject::getFixedSlotOffset(0)), PrivateReg); + masm.loadPrivate(Address(ObjectReg, NativeObject::getFixedSlotOffset(0)), PrivateReg); // Rooting will happen at GC time. masm.movePtr(StackPointer, ObjectReg); diff --git a/js/src/jit/CodeGenerator.h b/js/src/jit/CodeGenerator.h index efac65ff97b..e8296285821 100644 --- a/js/src/jit/CodeGenerator.h +++ b/js/src/jit/CodeGenerator.h @@ -388,7 +388,7 @@ class CodeGenerator : public CodeGeneratorSpecific bool emitAllocateGCThingPar(LInstruction *lir, Register objReg, Register cxReg, Register tempReg1, Register tempReg2, - JSObject *templateObj); + NativeObject *templateObj); bool emitCallToUncompiledScriptPar(LInstruction *lir, Register calleeReg); diff --git a/js/src/jit/IonAnalysis.cpp b/js/src/jit/IonAnalysis.cpp index b67db7e8833..f1de86155f6 100644 --- a/js/src/jit/IonAnalysis.cpp +++ b/js/src/jit/IonAnalysis.cpp @@ -2689,7 +2689,7 @@ jit::ConvertLinearInequality(TempAllocator &alloc, MBasicBlock *block, const Lin static bool AnalyzePoppedThis(JSContext *cx, types::TypeObject *type, MDefinition *thisValue, MInstruction *ins, bool definitelyExecuted, - HandleObject baseobj, + HandleNativeObject baseobj, Vector *initializerList, Vector *accessedProperties, bool *phandled) @@ -2714,7 +2714,7 @@ AnalyzePoppedThis(JSContext *cx, types::TypeObject *type, } // Ignore assignments to properties that were already written to. - if (baseobj->nativeLookup(cx, NameToId(setprop->name()))) { + if (baseobj->lookup(cx, NameToId(setprop->name()))) { *phandled = true; return true; } @@ -2744,7 +2744,7 @@ AnalyzePoppedThis(JSContext *cx, types::TypeObject *type, // Add the property to the object, being careful not to update type information. DebugOnly slotSpan = baseobj->slotSpan(); - MOZ_ASSERT(!baseobj->nativeContainsPure(id)); + MOZ_ASSERT(!baseobj->containsPure(id)); if (!baseobj->addDataProperty(cx, id, baseobj->slotSpan(), JSPROP_ENUMERATE)) return false; MOZ_ASSERT(baseobj->slotSpan() != slotSpan); @@ -2793,7 +2793,7 @@ AnalyzePoppedThis(JSContext *cx, types::TypeObject *type, * definite property before it is assigned could incorrectly hit. */ RootedId id(cx, NameToId(get->name())); - if (!baseobj->nativeLookup(cx, id) && !accessedProperties->append(get->name())) + if (!baseobj->lookup(cx, id) && !accessedProperties->append(get->name())) return false; if (!types::AddClearDefiniteGetterSetterForPrototypeChain(cx, type, id)) { @@ -2823,7 +2823,7 @@ CmpInstructions(const void *a, const void *b) bool jit::AnalyzeNewScriptDefiniteProperties(JSContext *cx, JSFunction *fun, - types::TypeObject *type, HandleObject baseobj, + types::TypeObject *type, HandleNativeObject baseobj, Vector *initializerList) { MOZ_ASSERT(cx->compartment()->activeAnalysis); diff --git a/js/src/jit/IonAnalysis.h b/js/src/jit/IonAnalysis.h index 7bafb83d63e..3b964c6ceb3 100644 --- a/js/src/jit/IonAnalysis.h +++ b/js/src/jit/IonAnalysis.h @@ -162,7 +162,7 @@ ConvertLinearInequality(TempAllocator &alloc, MBasicBlock *block, const LinearSu bool AnalyzeNewScriptDefiniteProperties(JSContext *cx, JSFunction *fun, - types::TypeObject *type, HandleObject baseobj, + types::TypeObject *type, HandleNativeObject baseobj, Vector *initializerList); bool diff --git a/js/src/jit/IonBuilder.cpp b/js/src/jit/IonBuilder.cpp index 25bce093457..f943fdec923 100644 --- a/js/src/jit/IonBuilder.cpp +++ b/js/src/jit/IonBuilder.cpp @@ -29,6 +29,7 @@ #include "jit/CompileInfo-inl.h" #include "jit/ExecutionMode-inl.h" +#include "vm/ObjectImpl-inl.h" using namespace js; using namespace js::jit; @@ -5809,7 +5810,7 @@ IonBuilder::jsop_compare(JSOp op) bool IonBuilder::jsop_newarray(uint32_t count) { - JSObject *templateObject = inspector->getTemplateObject(pc); + NativeObject *templateObject = inspector->getTemplateObject(pc); if (!templateObject) { if (info().executionMode() == ArgumentsUsageAnalysis) { MUnknownValue *unknown = MUnknownValue::New(alloc()); @@ -5855,7 +5856,7 @@ IonBuilder::jsop_newarray(uint32_t count) bool IonBuilder::jsop_newarray_copyonwrite() { - JSObject *templateObject = types::GetCopyOnWriteObject(script(), pc); + ArrayObject *templateObject = types::GetCopyOnWriteObject(script(), pc); // The baseline compiler should have ensured the template object has a type // with the copy on write flag set already. During the arguments usage @@ -5958,7 +5959,7 @@ IonBuilder::jsop_initelem_array() MElements *elements = MElements::New(alloc(), obj); current->add(elements); - JSObject *templateObject = obj->toNewArray()->templateObject(); + NativeObject *templateObject = obj->toNewArray()->templateObject(); if (templateObject->shouldConvertDoubleElements()) { MInstruction *valueDouble = MToDouble::New(alloc(), value); @@ -5999,7 +6000,7 @@ IonBuilder::jsop_initprop(PropertyName *name) MDefinition *value = current->pop(); MDefinition *obj = current->peek(-1); - JSObject *templateObject = nullptr; + NativeObject *templateObject = nullptr; Shape *shape = nullptr; bool useSlowPath = false; @@ -8576,8 +8577,7 @@ IonBuilder::jsop_arguments() bool IonBuilder::jsop_rest() { - JSObject *templateObject = inspector->getTemplateObject(pc); - MOZ_ASSERT(templateObject->is()); + ArrayObject *templateObject = &inspector->getTemplateObject(pc)->as(); if (inliningDepth_ == 0) { // We don't know anything about the callee. @@ -9323,13 +9323,13 @@ IonBuilder::getPropTryDefiniteSlot(bool *emitted, MDefinition *obj, PropertyName } MInstruction *load; - if (slot < JSObject::MAX_FIXED_SLOTS) { + if (slot < NativeObject::MAX_FIXED_SLOTS) { load = MLoadFixedSlot::New(alloc(), obj, slot); } else { MInstruction *slots = MSlots::New(alloc(), obj); current->add(slots); - load = MLoadSlot::New(alloc(), slots, slot - JSObject::MAX_FIXED_SLOTS); + load = MLoadSlot::New(alloc(), slots, slot - NativeObject::MAX_FIXED_SLOTS); } if (barrier == BarrierKind::NoBarrier) @@ -9977,7 +9977,7 @@ IonBuilder::setPropTryDefiniteSlot(bool *emitted, MDefinition *obj, } MInstruction *store; - if (slot < JSObject::MAX_FIXED_SLOTS) { + if (slot < NativeObject::MAX_FIXED_SLOTS) { store = MStoreFixedSlot::New(alloc(), obj, slot, value); if (writeBarrier) store->toStoreFixedSlot()->setNeedsBarrier(); @@ -9985,7 +9985,7 @@ IonBuilder::setPropTryDefiniteSlot(bool *emitted, MDefinition *obj, MInstruction *slots = MSlots::New(alloc(), obj); current->add(slots); - store = MStoreSlot::New(alloc(), slots, slot - JSObject::MAX_FIXED_SLOTS, value); + store = MStoreSlot::New(alloc(), slots, slot - NativeObject::MAX_FIXED_SLOTS, value); if (writeBarrier) store->toStoreSlot()->setNeedsBarrier(); } diff --git a/js/src/jit/IonCaches.cpp b/js/src/jit/IonCaches.cpp index c2ff75cd54c..e12ae12df3f 100644 --- a/js/src/jit/IonCaches.cpp +++ b/js/src/jit/IonCaches.cpp @@ -654,15 +654,15 @@ IsCacheableGetPropCallPropertyOp(JSObject *obj, JSObject *holder, Shape *shape) } static inline void -EmitLoadSlot(MacroAssembler &masm, JSObject *holder, Shape *shape, Register holderReg, +EmitLoadSlot(MacroAssembler &masm, NativeObject *holder, Shape *shape, Register holderReg, TypedOrValueRegister output, Register scratchReg) { MOZ_ASSERT(holder); if (holder->isFixedSlot(shape->slot())) { - Address addr(holderReg, JSObject::getFixedSlotOffset(shape->slot())); + Address addr(holderReg, NativeObject::getFixedSlotOffset(shape->slot())); masm.loadTypedOrValue(addr, output); } else { - masm.loadPtr(Address(holderReg, JSObject::offsetOfSlots()), scratchReg); + masm.loadPtr(Address(holderReg, NativeObject::offsetOfSlots()), scratchReg); Address addr(scratchReg, holder->dynamicSlotIndex(shape->slot()) * sizeof(Value)); masm.loadTypedOrValue(addr, output); @@ -681,7 +681,7 @@ GenerateDOMProxyChecks(JSContext *cx, MacroAssembler &masm, JSObject *obj, // 2. The object does not have expando properties, or has an expando // which is known to not have the desired property. Address handlerAddr(object, ProxyObject::offsetOfHandler()); - Address expandoSlotAddr(object, JSObject::getFixedSlotOffset(GetDOMProxyExpandoSlot())); + Address expandoSlotAddr(object, NativeObject::getFixedSlotOffset(GetDOMProxyExpandoSlot())); // Check that object is a DOMProxy. masm.branchPrivatePtr(Assembler::NotEqual, handlerAddr, @@ -700,7 +700,7 @@ GenerateDOMProxyChecks(JSContext *cx, MacroAssembler &masm, JSObject *obj, Label failDOMProxyCheck; Label domProxyOk; - Value expandoVal = obj->getFixedSlot(GetDOMProxyExpandoSlot()); + Value expandoVal = obj->fakeNativeGetSlot(GetDOMProxyExpandoSlot()); masm.loadValue(expandoSlotAddr, tempVal); if (!expandoVal.isObject() && !expandoVal.isUndefined()) { @@ -726,7 +726,7 @@ GenerateDOMProxyChecks(JSContext *cx, MacroAssembler &masm, JSObject *obj, masm.branchTestUndefined(Assembler::Equal, tempVal, &domProxyOk); if (expandoVal.isObject()) { - MOZ_ASSERT(!expandoVal.toObject().nativeContains(cx, name)); + MOZ_ASSERT(!expandoVal.toObject().as().contains(cx, name)); // Reference object has an expando object that doesn't define the name. Check that // the incoming object has an expando object with the same shape. @@ -750,7 +750,7 @@ GenerateDOMProxyChecks(JSContext *cx, MacroAssembler &masm, JSObject *obj, static void GenerateReadSlot(JSContext *cx, IonScript *ion, MacroAssembler &masm, - IonCache::StubAttacher &attacher, JSObject *obj, JSObject *holder, + IonCache::StubAttacher &attacher, JSObject *obj, NativeObject *holder, Shape *shape, Register object, TypedOrValueRegister output, Label *failures = nullptr) { @@ -1075,7 +1075,7 @@ GenerateArrayLength(JSContext *cx, MacroAssembler &masm, IonCache::StubAttacher outReg = output.typedReg().gpr(); } - masm.loadPtr(Address(object, JSObject::offsetOfElements()), outReg); + masm.loadPtr(Address(object, NativeObject::offsetOfElements()), outReg); masm.load32(Address(outReg, ObjectElements::offsetOfLength()), outReg); // The length is an unsigned int, but the value encodes a signed int. @@ -1152,7 +1152,7 @@ template static GetPropertyIC::NativeGetPropCacheability CanAttachNativeGetProp(typename GetPropCache::Context cx, const GetPropCache &cache, HandleObject obj, HandlePropertyName name, - MutableHandleObject holder, MutableHandleShape shape, + MutableHandleNativeObject holder, MutableHandleShape shape, bool skipArrayLen = false) { if (!obj || !obj->isNative()) @@ -1241,7 +1241,7 @@ GetPropertyIC::tryAttachNative(JSContext *cx, HandleScript outerScript, IonScrip MOZ_ASSERT(outerScript->ionScript() == ion); RootedShape shape(cx); - RootedObject holder(cx); + RootedNativeObject holder(cx); NativeGetPropCacheability type = CanAttachNativeGetProp(cx, *this, obj, name, &holder, &shape); @@ -1448,7 +1448,7 @@ GetPropertyIC::tryAttachDOMProxyUnshadowed(JSContext *cx, HandleScript outerScri MOZ_ASSERT(output().hasValue()); RootedObject checkObj(cx, obj->getTaggedProto().toObjectOrNull()); - RootedObject holder(cx); + RootedNativeObject holder(cx); RootedShape shape(cx); NativeGetPropCacheability canCache = @@ -1844,7 +1844,7 @@ GetPropertyParIC::reset() bool GetPropertyParIC::attachReadSlot(LockedJSContext &cx, IonScript *ion, HandleObject obj, - HandleObject holder, HandleShape shape) + HandleNativeObject holder, HandleShape shape) { // Ready to generate the read slot stub. DispatchStubPrepender attacher(*this); @@ -1910,7 +1910,7 @@ GetPropertyParIC::update(ForkJoinContext *cx, size_t cacheIndex, { RootedShape shape(ncx); - RootedObject holder(ncx); + RootedNativeObject holder(ncx); RootedPropertyName name(ncx, cache.name()); GetPropertyIC::NativeGetPropCacheability canCache = @@ -1963,7 +1963,7 @@ IonCache::destroy() static void GenerateSetSlot(JSContext *cx, MacroAssembler &masm, IonCache::StubAttacher &attacher, - JSObject *obj, Shape *shape, Register object, ConstantOrRegister value, + NativeObject *obj, Shape *shape, Register object, ConstantOrRegister value, bool needsTypeBarrier, bool checkTypeset) { MOZ_ASSERT(obj->isNative()); @@ -2003,7 +2003,7 @@ GenerateSetSlot(JSContext *cx, MacroAssembler &masm, IonCache::StubAttacher &att } if (obj->isFixedSlot(shape->slot())) { - Address addr(object, JSObject::getFixedSlotOffset(shape->slot())); + Address addr(object, NativeObject::getFixedSlotOffset(shape->slot())); if (cx->zone()->needsIncrementalBarrier()) masm.callPreBarrier(addr, MIRType_Value); @@ -2011,7 +2011,7 @@ GenerateSetSlot(JSContext *cx, MacroAssembler &masm, IonCache::StubAttacher &att masm.storeConstantOrRegister(value, addr); } else { Register slotsReg = object; - masm.loadPtr(Address(object, JSObject::offsetOfSlots()), slotsReg); + masm.loadPtr(Address(object, NativeObject::offsetOfSlots()), slotsReg); Address addr(slotsReg, obj->dynamicSlotIndex(shape->slot()) * sizeof(Value)); @@ -2034,7 +2034,7 @@ GenerateSetSlot(JSContext *cx, MacroAssembler &masm, IonCache::StubAttacher &att bool SetPropertyIC::attachSetSlot(JSContext *cx, HandleScript outerScript, IonScript *ion, - HandleObject obj, HandleShape shape, bool checkTypeset) + HandleNativeObject obj, HandleShape shape, bool checkTypeset) { MacroAssembler masm(cx, ion, outerScript, profilerLeavePc_); RepatchStubAppender attacher(*this); @@ -2523,7 +2523,7 @@ SetPropertyIC::attachCallSetter(JSContext *cx, HandleScript outerScript, IonScri static void GenerateAddSlot(JSContext *cx, MacroAssembler &masm, IonCache::StubAttacher &attacher, - JSObject *obj, Shape *oldShape, types::TypeObject *oldType, + NativeObject *obj, Shape *oldShape, types::TypeObject *oldType, Register object, ConstantOrRegister value, bool checkTypeset) { @@ -2609,12 +2609,12 @@ GenerateAddSlot(JSContext *cx, MacroAssembler &masm, IonCache::StubAttacher &att // Set the value on the object. Since this is an add, obj->lastProperty() // must be the shape of the property we are adding. if (obj->isFixedSlot(newShape->slot())) { - Address addr(object, JSObject::getFixedSlotOffset(newShape->slot())); + Address addr(object, NativeObject::getFixedSlotOffset(newShape->slot())); masm.storeConstantOrRegister(value, addr); } else { Register slotsReg = object; - masm.loadPtr(Address(object, JSObject::offsetOfSlots()), slotsReg); + masm.loadPtr(Address(object, NativeObject::offsetOfSlots()), slotsReg); Address addr(slotsReg, obj->dynamicSlotIndex(newShape->slot()) * sizeof(Value)); masm.storeConstantOrRegister(value, addr); @@ -2633,7 +2633,7 @@ GenerateAddSlot(JSContext *cx, MacroAssembler &masm, IonCache::StubAttacher &att bool SetPropertyIC::attachAddSlot(JSContext *cx, HandleScript outerScript, IonScript *ion, - HandleObject obj, HandleShape oldShape, HandleTypeObject oldType, + HandleNativeObject obj, HandleShape oldShape, HandleTypeObject oldType, bool checkTypeset) { MOZ_ASSERT_IF(!needsTypeBarrier(), !checkTypeset); @@ -2683,14 +2683,12 @@ CanInlineSetPropTypeCheck(JSObject *obj, jsid id, ConstantOrRegister val, bool * } static bool -IsPropertySetInlineable(HandleObject obj, HandleId id, MutableHandleShape pshape, +IsPropertySetInlineable(NativeObject *obj, HandleId id, MutableHandleShape pshape, ConstantOrRegister val, bool needsTypeBarrier, bool *checkTypeset) { - MOZ_ASSERT(obj->isNative()); - // Do a pure non-proto chain climbing lookup. See note in // CanAttachNativeGetProp. - pshape.set(obj->nativeLookupPure(id)); + pshape.set(obj->lookupPure(id)); if (!pshape) return false; @@ -2711,16 +2709,14 @@ IsPropertySetInlineable(HandleObject obj, HandleId id, MutableHandleShape pshape } static bool -IsPropertyAddInlineable(HandleObject obj, HandleId id, ConstantOrRegister val, uint32_t oldSlots, +IsPropertyAddInlineable(NativeObject *obj, HandleId id, ConstantOrRegister val, uint32_t oldSlots, HandleShape oldShape, bool needsTypeBarrier, bool *checkTypeset) { - MOZ_ASSERT(obj->isNative()); - // If the shape of the object did not change, then this was not an add. if (obj->lastProperty() == oldShape) return false; - Shape *shape = obj->nativeLookupPure(id); + Shape *shape = obj->lookupPure(id); if (!shape || shape->inDictionary() || !shape->hasSlot() || !shape->hasDefaultSetter()) return false; @@ -2749,7 +2745,7 @@ IsPropertyAddInlineable(HandleObject obj, HandleId id, ConstantOrRegister val, u return false; // If prototype defines this property in a non-plain way, don't optimize - Shape *protoShape = proto->nativeLookupPure(id); + Shape *protoShape = proto->as().lookupPure(id); if (protoShape && !protoShape->hasDefaultSetter()) return false; @@ -2781,14 +2777,14 @@ IsPropertyAddInlineable(HandleObject obj, HandleId id, ConstantOrRegister val, u static SetPropertyIC::NativeSetPropCacheability CanAttachNativeSetProp(HandleObject obj, HandleId id, ConstantOrRegister val, - bool needsTypeBarrier, MutableHandleObject holder, + bool needsTypeBarrier, MutableHandleNativeObject holder, MutableHandleShape shape, bool *checkTypeset) { if (!obj->isNative()) return SetPropertyIC::CanAttachNone; // See if the property exists on the object. - if (IsPropertySetInlineable(obj, id, shape, val, needsTypeBarrier, checkTypeset)) + if (IsPropertySetInlineable(&obj->as(), id, shape, val, needsTypeBarrier, checkTypeset)) return SetPropertyIC::CanAttachSetSlot; // If we couldn't find the property on the object itself, do a full, but @@ -2859,13 +2855,14 @@ SetPropertyIC::update(JSContext *cx, size_t cacheIndex, HandleObject obj, } RootedShape shape(cx); - RootedObject holder(cx); + RootedNativeObject holder(cx); bool checkTypeset; canCache = CanAttachNativeSetProp(obj, id, cache.value(), cache.needsTypeBarrier(), &holder, &shape, &checkTypeset); if (!addedSetterStub && canCache == CanAttachSetSlot) { - if (!cache.attachSetSlot(cx, script, ion, obj, shape, checkTypeset)) + RootedNativeObject nobj(cx, &obj->as()); + if (!cache.attachSetSlot(cx, script, ion, nobj, shape, checkTypeset)) return false; addedSetterStub = true; } @@ -2877,7 +2874,7 @@ SetPropertyIC::update(JSContext *cx, size_t cacheIndex, HandleObject obj, } } - uint32_t oldSlots = obj->numDynamicSlots(); + uint32_t oldSlots = obj->fakeNativeNumDynamicSlots(); RootedShape oldShape(cx, obj->lastProperty()); // Set/Add the property on the object, the inlined cache are setup for the next execution. @@ -2887,10 +2884,12 @@ SetPropertyIC::update(JSContext *cx, size_t cacheIndex, HandleObject obj, // The property did not exist before, now we can try to inline the property add. bool checkTypeset; if (!addedSetterStub && canCache == MaybeCanAttachAddSlot && - IsPropertyAddInlineable(obj, id, cache.value(), oldSlots, oldShape, cache.needsTypeBarrier(), + IsPropertyAddInlineable(&obj->as(), id, + cache.value(), oldSlots, oldShape, cache.needsTypeBarrier(), &checkTypeset)) { - if (!cache.attachAddSlot(cx, script, ion, obj, oldShape, oldType, checkTypeset)) + RootedNativeObject nobj(cx, &obj->as()); + if (!cache.attachAddSlot(cx, script, ion, nobj, oldShape, oldType, checkTypeset)) return false; } @@ -2916,10 +2915,14 @@ SetPropertyParIC::update(ForkJoinContext *cx, size_t cacheIndex, HandleObject ob RootedValue v(cx, value); RootedId id(cx, AtomToId(cache.name())); + if (!obj->isNative()) + return false; + RootedNativeObject nobj(cx, &obj->as()); + // Avoid unnecessary locking if cannot attach stubs. if (!cache.canAttachStub()) { return baseops::SetPropertyHelper( - cx, obj, obj, id, baseops::Qualified, &v, cache.strict()); + cx, nobj, nobj, id, baseops::Qualified, &v, cache.strict()); } SetPropertyIC::NativeSetPropCacheability canCache = SetPropertyIC::CanAttachNone; @@ -2931,27 +2934,27 @@ SetPropertyParIC::update(ForkJoinContext *cx, size_t cacheIndex, HandleObject ob if (cache.canAttachStub()) { bool alreadyStubbed; - if (!cache.hasOrAddStubbedShape(ncx, obj->lastProperty(), &alreadyStubbed)) + if (!cache.hasOrAddStubbedShape(ncx, nobj->lastProperty(), &alreadyStubbed)) return cx->setPendingAbortFatal(ParallelBailoutOutOfMemory); if (alreadyStubbed) { return baseops::SetPropertyHelper( - cx, obj, obj, id, baseops::Qualified, &v, cache.strict()); + cx, nobj, nobj, id, baseops::Qualified, &v, cache.strict()); } // If the object has a lazy type, we need to de-lazify it, but // this is not safe in parallel. - if (obj->hasLazyType()) + if (nobj->hasLazyType()) return false; { RootedShape shape(cx); - RootedObject holder(cx); + RootedNativeObject holder(cx); bool checkTypeset; - canCache = CanAttachNativeSetProp(obj, id, cache.value(), cache.needsTypeBarrier(), + canCache = CanAttachNativeSetProp(nobj, id, cache.value(), cache.needsTypeBarrier(), &holder, &shape, &checkTypeset); if (canCache == SetPropertyIC::CanAttachSetSlot) { - if (!cache.attachSetSlot(ncx, ion, obj, shape, checkTypeset)) + if (!cache.attachSetSlot(ncx, ion, nobj, shape, checkTypeset)) return cx->setPendingAbortFatal(ParallelBailoutOutOfMemory); attachedStub = true; } @@ -2959,11 +2962,11 @@ SetPropertyParIC::update(ForkJoinContext *cx, size_t cacheIndex, HandleObject ob } } - uint32_t oldSlots = obj->numDynamicSlots(); - RootedShape oldShape(cx, obj->lastProperty()); - RootedTypeObject oldType(cx, obj->type()); + uint32_t oldSlots = nobj->numDynamicSlots(); + RootedShape oldShape(cx, nobj->lastProperty()); + RootedTypeObject oldType(cx, nobj->type()); - if (!baseops::SetPropertyHelper(cx, obj, obj, id, baseops::Qualified, &v, + if (!baseops::SetPropertyHelper(cx, nobj, nobj, id, baseops::Qualified, &v, cache.strict())) { return false; @@ -2971,11 +2974,12 @@ SetPropertyParIC::update(ForkJoinContext *cx, size_t cacheIndex, HandleObject ob bool checkTypeset; if (!attachedStub && canCache == SetPropertyIC::MaybeCanAttachAddSlot && - IsPropertyAddInlineable(obj, id, cache.value(), oldSlots, oldShape, cache.needsTypeBarrier(), + IsPropertyAddInlineable(nobj, id, + cache.value(), oldSlots, oldShape, cache.needsTypeBarrier(), &checkTypeset)) { LockedJSContext ncx(cx); - if (cache.canAttachStub() && !cache.attachAddSlot(ncx, ion, obj, oldShape, oldType, checkTypeset)) + if (cache.canAttachStub() && !cache.attachAddSlot(ncx, ion, nobj, oldShape, oldType, checkTypeset)) return cx->setPendingAbortFatal(ParallelBailoutOutOfMemory); } @@ -2983,7 +2987,7 @@ SetPropertyParIC::update(ForkJoinContext *cx, size_t cacheIndex, HandleObject ob } bool -SetPropertyParIC::attachSetSlot(LockedJSContext &cx, IonScript *ion, HandleObject obj, +SetPropertyParIC::attachSetSlot(LockedJSContext &cx, IonScript *ion, HandleNativeObject obj, HandleShape shape, bool checkTypeset) { MacroAssembler masm(cx, ion); @@ -2994,7 +2998,7 @@ SetPropertyParIC::attachSetSlot(LockedJSContext &cx, IonScript *ion, HandleObjec } bool -SetPropertyParIC::attachAddSlot(LockedJSContext &cx, IonScript *ion, HandleObject obj, +SetPropertyParIC::attachAddSlot(LockedJSContext &cx, IonScript *ion, HandleNativeObject obj, HandleShape oldShape, HandleTypeObject oldType, bool checkTypeset) { MOZ_ASSERT_IF(!needsTypeBarrier(), !checkTypeset); @@ -3038,7 +3042,7 @@ GetElementIC::attachGetProp(JSContext *cx, HandleScript outerScript, IonScript * { MOZ_ASSERT(index().reg().hasValue()); - RootedObject holder(cx); + RootedNativeObject holder(cx); RootedShape shape(cx); GetPropertyIC::NativeGetPropCacheability canCache = @@ -3165,7 +3169,7 @@ GenerateDenseElement(JSContext *cx, MacroAssembler &masm, IonCache::StubAttacher // Load elements vector. masm.push(object); - masm.loadPtr(Address(object, JSObject::offsetOfElements()), object); + masm.loadPtr(Address(object, NativeObject::offsetOfElements()), object); Label hole; @@ -3673,7 +3677,7 @@ GenerateSetDenseElement(JSContext *cx, MacroAssembler &masm, IonCache::StubAttac { // Load obj->elements. Register elements = temp; - masm.loadPtr(Address(object, JSObject::offsetOfElements()), elements); + masm.loadPtr(Address(object, NativeObject::offsetOfElements()), elements); // Compute the location of the element. BaseIndex target(elements, index, TimesEight); @@ -3964,7 +3968,7 @@ SetElementParIC::update(ForkJoinContext *cx, size_t cacheIndex, HandleObject obj bool GetElementParIC::attachReadSlot(LockedJSContext &cx, IonScript *ion, HandleObject obj, - const Value &idval, HandlePropertyName name, HandleObject holder, + const Value &idval, HandlePropertyName name, HandleNativeObject holder, HandleShape shape) { MacroAssembler masm(cx, ion); @@ -4040,7 +4044,7 @@ GetElementParIC::update(ForkJoinContext *cx, size_t cacheIndex, HandleObject obj GetElementIC::canAttachGetProp(obj, idval, id)) { RootedShape shape(ncx); - RootedObject holder(ncx); + RootedNativeObject holder(ncx); RootedPropertyName name(ncx, JSID_TO_ATOM(id)->asPropertyName()); GetPropertyIC::NativeGetPropCacheability canCache = @@ -4245,7 +4249,7 @@ BindNameIC::update(JSContext *cx, size_t cacheIndex, HandleObject scopeChain) bool NameIC::attachReadSlot(JSContext *cx, HandleScript outerScript, IonScript *ion, HandleObject scopeChain, HandleObject holderBase, - HandleObject holder, HandleShape shape) + HandleNativeObject holder, HandleShape shape) { MacroAssembler masm(cx, ion, outerScript, profilerLeavePc_); Label failures; @@ -4381,8 +4385,11 @@ NameIC::update(JSContext *cx, size_t cacheIndex, HandleObject scopeChain, if (cache.canAttachStub()) { if (IsCacheableNameReadSlot(scopeChain, obj, holder, shape, pc, cache.outputReg())) { - if (!cache.attachReadSlot(cx, outerScript, ion, scopeChain, obj, holder, shape)) + if (!cache.attachReadSlot(cx, outerScript, ion, scopeChain, obj, + holder.as(), shape)) + { return false; + } } else if (IsCacheableNameCallGetter(scopeChain, obj, holder, shape)) { if (!cache.attachCallGetter(cx, outerScript, ion, scopeChain, obj, holder, shape, returnAddr)) return false; diff --git a/js/src/jit/IonCaches.h b/js/src/jit/IonCaches.h index e5485c7ffc6..3e02b335862 100644 --- a/js/src/jit/IonCaches.h +++ b/js/src/jit/IonCaches.h @@ -738,14 +738,14 @@ class SetPropertyIC : public RepatchIonCache }; bool attachSetSlot(JSContext *cx, HandleScript outerScript, IonScript *ion, - HandleObject obj, HandleShape shape, bool checkTypeset); + HandleNativeObject obj, HandleShape shape, bool checkTypeset); bool attachCallSetter(JSContext *cx, HandleScript outerScript, IonScript *ion, HandleObject obj, HandleObject holder, HandleShape shape, void *returnAddr); bool attachAddSlot(JSContext *cx, HandleScript outerScript, IonScript *ion, - HandleObject obj, HandleShape oldShape, HandleTypeObject oldType, + HandleNativeObject obj, HandleShape oldShape, HandleTypeObject oldType, bool checkTypeset); bool attachGenericProxy(JSContext *cx, HandleScript outerScript, IonScript *ion, @@ -1029,7 +1029,7 @@ class NameIC : public RepatchIonCache bool attachReadSlot(JSContext *cx, HandleScript outerScript, IonScript *ion, HandleObject scopeChain, HandleObject holderBase, - HandleObject holder, HandleShape shape); + HandleNativeObject holder, HandleShape shape); bool attachCallGetter(JSContext *cx, HandleScript outerScript, IonScript *ion, HandleObject scopeChain, HandleObject obj, HandleObject holder, @@ -1156,7 +1156,8 @@ class GetPropertyParIC : public ParallelIonCache bool allowGetters() const { return false; } bool allowArrayLength(Context, HandleObject) const { return true; } - bool attachReadSlot(LockedJSContext &cx, IonScript *ion, HandleObject obj, HandleObject holder, + bool attachReadSlot(LockedJSContext &cx, IonScript *ion, + HandleObject obj, HandleNativeObject holder, HandleShape shape); bool attachArrayLength(LockedJSContext &cx, IonScript *ion, HandleObject obj); bool attachTypedArrayLength(LockedJSContext &cx, IonScript *ion, HandleObject obj); @@ -1217,7 +1218,7 @@ class GetElementParIC : public ParallelIonCache bool allowArrayLength(Context, HandleObject) const { return false; } bool attachReadSlot(LockedJSContext &cx, IonScript *ion, HandleObject obj, const Value &idval, - HandlePropertyName name, HandleObject holder, HandleShape shape); + HandlePropertyName name, HandleNativeObject holder, HandleShape shape); bool attachDenseElement(LockedJSContext &cx, IonScript *ion, HandleObject obj, const Value &idval); bool attachTypedArrayElement(LockedJSContext &cx, IonScript *ion, HandleObject tarr, @@ -1272,9 +1273,9 @@ class SetPropertyParIC : public ParallelIonCache return needsTypeBarrier_; } - bool attachSetSlot(LockedJSContext &cx, IonScript *ion, HandleObject obj, HandleShape shape, + bool attachSetSlot(LockedJSContext &cx, IonScript *ion, HandleNativeObject obj, HandleShape shape, bool checkTypeset); - bool attachAddSlot(LockedJSContext &cx, IonScript *ion, HandleObject obj, + bool attachAddSlot(LockedJSContext &cx, IonScript *ion, HandleNativeObject obj, HandleShape oldShape, HandleTypeObject oldType, bool checkTypeset); static bool update(ForkJoinContext *cx, size_t cacheIndex, HandleObject obj, diff --git a/js/src/jit/IonMacroAssembler.cpp b/js/src/jit/IonMacroAssembler.cpp index 7e8eefe5a5f..4980d42e782 100644 --- a/js/src/jit/IonMacroAssembler.cpp +++ b/js/src/jit/IonMacroAssembler.cpp @@ -587,7 +587,7 @@ MacroAssembler::allocateObject(Register result, Register slots, gc::AllocKind al } void -MacroAssembler::newGCThing(Register result, Register temp, JSObject *templateObj, +MacroAssembler::newGCThing(Register result, Register temp, NativeObject *templateObj, gc::InitialHeap initialHeap, Label *fail) { // This method does not initialize the object: if external slots get @@ -602,7 +602,7 @@ MacroAssembler::newGCThing(Register result, Register temp, JSObject *templateObj } void -MacroAssembler::createGCObject(Register obj, Register temp, JSObject *templateObj, +MacroAssembler::createGCObject(Register obj, Register temp, NativeObject *templateObj, gc::InitialHeap initialHeap, Label *fail, bool initFixedSlots) { uint32_t nDynamicSlots = templateObj->numDynamicSlots(); @@ -734,7 +734,7 @@ MacroAssembler::newGCTenuredThingPar(Register result, Register cx, void MacroAssembler::newGCThingPar(Register result, Register cx, Register tempReg1, Register tempReg2, - JSObject *templateObject, Label *fail) + NativeObject *templateObject, Label *fail) { gc::AllocKind allocKind = templateObject->asTenured().getAllocKind(); MOZ_ASSERT(allocKind >= gc::FINALIZE_OBJECT0 && allocKind <= gc::FINALIZE_OBJECT_LAST); @@ -758,12 +758,12 @@ MacroAssembler::newGCFatInlineStringPar(Register result, Register cx, Register t } void -MacroAssembler::copySlotsFromTemplate(Register obj, const JSObject *templateObj, +MacroAssembler::copySlotsFromTemplate(Register obj, const NativeObject *templateObj, uint32_t start, uint32_t end) { uint32_t nfixed = Min(templateObj->numFixedSlots(), end); for (unsigned i = start; i < nfixed; i++) - storeValue(templateObj->getFixedSlot(i), Address(obj, JSObject::getFixedSlotOffset(i))); + storeValue(templateObj->getFixedSlot(i), Address(obj, NativeObject::getFixedSlotOffset(i))); } void @@ -791,7 +791,7 @@ MacroAssembler::fillSlotsWithUndefined(Address base, Register temp, uint32_t sta } static uint32_t -FindStartOfUndefinedSlots(JSObject *templateObj, uint32_t nslots) +FindStartOfUndefinedSlots(NativeObject *templateObj, uint32_t nslots) { MOZ_ASSERT(nslots == templateObj->lastProperty()->slotSpan(templateObj->getClass())); MOZ_ASSERT(nslots > 0); @@ -803,7 +803,7 @@ FindStartOfUndefinedSlots(JSObject *templateObj, uint32_t nslots) } void -MacroAssembler::initGCSlots(Register obj, Register slots, JSObject *templateObj, +MacroAssembler::initGCSlots(Register obj, Register slots, NativeObject *templateObj, bool initFixedSlots) { // Slots of non-array objects are required to be initialized. @@ -830,7 +830,7 @@ MacroAssembler::initGCSlots(Register obj, Register slots, JSObject *templateObj, // Fill the rest of the fixed slots with undefined. if (initFixedSlots) { - fillSlotsWithUndefined(Address(obj, JSObject::getFixedSlotOffset(startOfUndefined)), slots, + fillSlotsWithUndefined(Address(obj, NativeObject::getFixedSlotOffset(startOfUndefined)), slots, startOfUndefined, nfixed); } @@ -838,14 +838,14 @@ MacroAssembler::initGCSlots(Register obj, Register slots, JSObject *templateObj, // We are short one register to do this elegantly. Borrow the obj // register briefly for our slots base address. push(obj); - loadPtr(Address(obj, JSObject::offsetOfSlots()), obj); + loadPtr(Address(obj, NativeObject::offsetOfSlots()), obj); fillSlotsWithUndefined(Address(obj, 0), slots, 0, ndynamic); pop(obj); } } void -MacroAssembler::initGCThing(Register obj, Register slots, JSObject *templateObj, +MacroAssembler::initGCThing(Register obj, Register slots, NativeObject *templateObj, bool initFixedSlots) { // Fast initialization of an empty object returned by allocateObject(). @@ -855,21 +855,21 @@ MacroAssembler::initGCThing(Register obj, Register slots, JSObject *templateObj, storePtr(ImmGCPtr(templateObj->lastProperty()), Address(obj, JSObject::offsetOfShape())); storePtr(ImmGCPtr(templateObj->type()), Address(obj, JSObject::offsetOfType())); if (templateObj->hasDynamicSlots()) - storePtr(slots, Address(obj, JSObject::offsetOfSlots())); + storePtr(slots, Address(obj, NativeObject::offsetOfSlots())); else - storePtr(ImmPtr(nullptr), Address(obj, JSObject::offsetOfSlots())); + storePtr(ImmPtr(nullptr), Address(obj, NativeObject::offsetOfSlots())); if (templateObj->denseElementsAreCopyOnWrite()) { storePtr(ImmPtr((const Value *) templateObj->getDenseElements()), - Address(obj, JSObject::offsetOfElements())); + Address(obj, NativeObject::offsetOfElements())); } else if (templateObj->is()) { Register temp = slots; MOZ_ASSERT(!templateObj->getDenseInitializedLength()); - int elementsOffset = JSObject::offsetOfFixedElements(); + int elementsOffset = NativeObject::offsetOfFixedElements(); computeEffectiveAddress(Address(obj, elementsOffset), temp); - storePtr(temp, Address(obj, JSObject::offsetOfElements())); + storePtr(temp, Address(obj, NativeObject::offsetOfElements())); // Fill in the elements header. store32(Imm32(templateObj->getDenseCapacity()), @@ -884,14 +884,14 @@ MacroAssembler::initGCThing(Register obj, Register slots, JSObject *templateObj, Address(obj, elementsOffset + ObjectElements::offsetOfFlags())); MOZ_ASSERT(!templateObj->hasPrivate()); } else { - storePtr(ImmPtr(emptyObjectElements), Address(obj, JSObject::offsetOfElements())); + storePtr(ImmPtr(emptyObjectElements), Address(obj, NativeObject::offsetOfElements())); initGCSlots(obj, slots, templateObj, initFixedSlots); if (templateObj->hasPrivate()) { uint32_t nfixed = templateObj->numFixedSlots(); storePtr(ImmPtr(templateObj->getPrivate()), - Address(obj, JSObject::getPrivateDataOffset(nfixed))); + Address(obj, NativeObject::getPrivateDataOffset(nfixed))); } } diff --git a/js/src/jit/IonMacroAssembler.h b/js/src/jit/IonMacroAssembler.h index 67deac321f4..18359d14b14 100644 --- a/js/src/jit/IonMacroAssembler.h +++ b/js/src/jit/IonMacroAssembler.h @@ -364,7 +364,7 @@ class MacroAssembler : public MacroAssemblerSpecific } void loadObjPrivate(Register obj, uint32_t nfixed, Register dest) { - loadPtr(Address(obj, JSObject::getPrivateDataOffset(nfixed)), dest); + loadPtr(Address(obj, NativeObject::getPrivateDataOffset(nfixed)), dest); } void loadObjProto(Register obj, Register dest) { @@ -811,20 +811,20 @@ class MacroAssembler : public MacroAssemblerSpecific void allocateObject(Register result, Register slots, gc::AllocKind allocKind, uint32_t nDynamicSlots, gc::InitialHeap initialHeap, Label *fail); void allocateNonObject(Register result, Register temp, gc::AllocKind allocKind, Label *fail); - void copySlotsFromTemplate(Register obj, const JSObject *templateObj, + void copySlotsFromTemplate(Register obj, const NativeObject *templateObj, uint32_t start, uint32_t end); void fillSlotsWithUndefined(Address addr, Register temp, uint32_t start, uint32_t end); - void initGCSlots(Register obj, Register temp, JSObject *templateObj, bool initFixedSlots); + void initGCSlots(Register obj, Register temp, NativeObject *templateObj, bool initFixedSlots); public: void callMallocStub(size_t nbytes, Register result, Label *fail); void callFreeStub(Register slots); - void createGCObject(Register result, Register temp, JSObject *templateObj, + void createGCObject(Register result, Register temp, NativeObject *templateObj, gc::InitialHeap initialHeap, Label *fail, bool initFixedSlots = true); - void newGCThing(Register result, Register temp, JSObject *templateObj, + void newGCThing(Register result, Register temp, NativeObject *templateObj, gc::InitialHeap initialHeap, Label *fail); - void initGCThing(Register obj, Register temp, JSObject *templateObj, + void initGCThing(Register obj, Register temp, NativeObject *templateObj, bool initFixedSlots = true); void newGCString(Register result, Register temp, Label *fail); @@ -839,7 +839,7 @@ class MacroAssembler : public MacroAssemblerSpecific void newGCTenuredThingPar(Register result, Register cx, Register tempReg1, Register tempReg2, gc::AllocKind allocKind, Label *fail); void newGCThingPar(Register result, Register cx, Register tempReg1, Register tempReg2, - JSObject *templateObject, Label *fail); + NativeObject *templateObject, Label *fail); void newGCStringPar(Register result, Register cx, Register tempReg1, Register tempReg2, Label *fail); void newGCFatInlineStringPar(Register result, Register cx, Register tempReg1, Register tempReg2, diff --git a/js/src/jit/MCallOptimize.cpp b/js/src/jit/MCallOptimize.cpp index c1b2182611e..4884c365f4e 100644 --- a/js/src/jit/MCallOptimize.cpp +++ b/js/src/jit/MCallOptimize.cpp @@ -17,6 +17,7 @@ #include "jsscriptinlines.h" +#include "vm/ObjectImpl-inl.h" #include "vm/StringObject-inl.h" using mozilla::ArrayLength; @@ -301,7 +302,7 @@ IonBuilder::inlineArray(CallInfo &callInfo) uint32_t initLength = 0; AllocatingBehaviour allocating = NewArray_Unallocating; - JSObject *templateObject = inspector->getTemplateObjectForNative(pc, js_Array); + NativeObject *templateObject = inspector->getTemplateObjectForNative(pc, js_Array); if (!templateObject) return InliningStatus_NotInlined; MOZ_ASSERT(templateObject->is()); @@ -335,7 +336,7 @@ IonBuilder::inlineArray(CallInfo &callInfo) // Negative lengths generate a RangeError, unhandled by the inline path. initLength = arg->toConstant()->value().toInt32(); - if (initLength >= JSObject::NELEMENTS_LIMIT) + if (initLength >= NativeObject::NELEMENTS_LIMIT) return InliningStatus_NotInlined; // Make sure initLength matches the template object's length. This is @@ -672,7 +673,7 @@ IonBuilder::inlineArrayConcat(CallInfo &callInfo) } // Inline the call. - JSObject *templateObj = inspector->getTemplateObjectForNative(pc, js::array_concat); + NativeObject *templateObj = inspector->getTemplateObjectForNative(pc, js::array_concat); if (!templateObj || templateObj->type() != baseThisType) return InliningStatus_NotInlined; MOZ_ASSERT(templateObj->is()); @@ -680,7 +681,8 @@ IonBuilder::inlineArrayConcat(CallInfo &callInfo) callInfo.setImplicitlyUsedUnchecked(); MArrayConcat *ins = MArrayConcat::New(alloc(), constraints(), callInfo.thisArg(), callInfo.getArg(0), - templateObj, templateObj->type()->initialHeap(constraints())); + &templateObj->as(), + templateObj->type()->initialHeap(constraints())); current->add(ins); current->push(ins); @@ -1748,7 +1750,7 @@ IonBuilder::inlineNewDenseArrayForParallelExecution(CallInfo &callInfo) return InliningStatus_NotInlined; types::TypeObject *typeObject = returnTypes->getTypeObject(0); - JSObject *templateObject = inspector->getTemplateObjectForNative(pc, intrinsic_NewDenseArray); + NativeObject *templateObject = inspector->getTemplateObjectForNative(pc, intrinsic_NewDenseArray); if (!templateObject || templateObject->type() != typeObject) return InliningStatus_NotInlined; @@ -1757,7 +1759,7 @@ IonBuilder::inlineNewDenseArrayForParallelExecution(CallInfo &callInfo) MNewDenseArrayPar *newObject = MNewDenseArrayPar::New(alloc(), graph().forkJoinContext(), callInfo.getArg(0), - templateObject); + &templateObject->as()); current->add(newObject); current->push(newObject); diff --git a/js/src/jit/MIR.cpp b/js/src/jit/MIR.cpp index 3ae47f113a0..5fadc1ec105 100644 --- a/js/src/jit/MIR.cpp +++ b/js/src/jit/MIR.cpp @@ -3180,7 +3180,7 @@ MBeta::printOpcode(FILE *fp) const bool MNewObject::shouldUseVM() const { - JSObject *obj = templateObject(); + NativeObject *obj = templateObject(); return obj->hasSingletonType() || obj->hasDynamicSlots(); } @@ -3197,7 +3197,7 @@ MObjectState::MObjectState(MDefinition *obj) // This instruction is only used as a summary for bailout paths. setResultType(MIRType_Object); setRecoveredOnBailout(); - JSObject *templateObject = nullptr; + NativeObject *templateObject = nullptr; if (obj->isNewObject()) templateObject = obj->toNewObject()->templateObject(); else @@ -3241,7 +3241,7 @@ MObjectState::Copy(TempAllocator &alloc, MObjectState *state) bool MNewArray::shouldUseVM() const { - MOZ_ASSERT(count() < JSObject::NELEMENTS_LIMIT); + MOZ_ASSERT(count() < NativeObject::NELEMENTS_LIMIT); size_t arraySlots = gc::GetGCKindSlots(templateObject()->asTenured().getAllocKind()) - ObjectElements::VALUES_PER_HEADER; diff --git a/js/src/jit/MIR.h b/js/src/jit/MIR.h index 08e39e9c26f..22687ed1c5e 100644 --- a/js/src/jit/MIR.h +++ b/js/src/jit/MIR.h @@ -22,6 +22,7 @@ #include "jit/MOpcodes.h" #include "jit/TypedObjectPrediction.h" #include "jit/TypePolicy.h" +#include "vm/ArrayObject.h" #include "vm/ScopeObject.h" #include "vm/TypedArrayCommon.h" @@ -2269,6 +2270,7 @@ class AlwaysTenured }; typedef AlwaysTenured AlwaysTenuredObject; +typedef AlwaysTenured AlwaysTenuredNativeObject; typedef AlwaysTenured AlwaysTenuredFunction; typedef AlwaysTenured AlwaysTenuredScript; typedef AlwaysTenured AlwaysTenuredPropertyName; @@ -2292,7 +2294,7 @@ class MNewArray : public MUnaryInstruction initialHeap_(initialHeap), allocating_(allocating) { - JSObject *obj = templateObject(); + ArrayObject *obj = templateObject(); setResultType(MIRType_Object); if (!obj->hasSingletonType()) setResultTypeSet(MakeSingletonTypeSet(constraints, obj)); @@ -2312,8 +2314,8 @@ class MNewArray : public MUnaryInstruction return count_; } - JSObject *templateObject() const { - return &getOperand(0)->toConstant()->value().toObject(); + ArrayObject *templateObject() const { + return &getOperand(0)->toConstant()->value().toObject().as(); } gc::InitialHeap initialHeap() const { @@ -2348,10 +2350,10 @@ class MNewArray : public MUnaryInstruction class MNewArrayCopyOnWrite : public MNullaryInstruction { - AlwaysTenuredObject templateObject_; + AlwaysTenured templateObject_; gc::InitialHeap initialHeap_; - MNewArrayCopyOnWrite(types::CompilerConstraintList *constraints, JSObject *templateObject, + MNewArrayCopyOnWrite(types::CompilerConstraintList *constraints, ArrayObject *templateObject, gc::InitialHeap initialHeap) : templateObject_(templateObject), initialHeap_(initialHeap) @@ -2366,13 +2368,13 @@ class MNewArrayCopyOnWrite : public MNullaryInstruction static MNewArrayCopyOnWrite *New(TempAllocator &alloc, types::CompilerConstraintList *constraints, - JSObject *templateObject, + ArrayObject *templateObject, gc::InitialHeap initialHeap) { return new(alloc) MNewArrayCopyOnWrite(constraints, templateObject, initialHeap); } - JSObject *templateObject() const { + ArrayObject *templateObject() const { return templateObject_; } @@ -2429,8 +2431,8 @@ class MNewObject : public MUnaryInstruction return templateObjectIsClassPrototype_; } - JSObject *templateObject() const { - return &getOperand(0)->toConstant()->value().toObject(); + NativeObject *templateObject() const { + return &getOperand(0)->toConstant()->value().toObject().as(); } gc::InitialHeap initialHeap() const { @@ -2448,9 +2450,9 @@ class MNewObject : public MUnaryInstruction // Could be allocating either a new array or a new object. class MNewPar : public MUnaryInstruction { - AlwaysTenuredObject templateObject_; + AlwaysTenuredNativeObject templateObject_; - MNewPar(MDefinition *cx, JSObject *templateObject) + MNewPar(MDefinition *cx, NativeObject *templateObject) : MUnaryInstruction(cx), templateObject_(templateObject) { @@ -2460,7 +2462,7 @@ class MNewPar : public MUnaryInstruction public: INSTRUCTION_HEADER(NewPar); - static MNewPar *New(TempAllocator &alloc, MDefinition *cx, JSObject *templateObject) { + static MNewPar *New(TempAllocator &alloc, MDefinition *cx, NativeObject *templateObject) { return new(alloc) MNewPar(cx, templateObject); } @@ -2468,7 +2470,7 @@ class MNewPar : public MUnaryInstruction return getOperand(0); } - JSObject *templateObject() const { + NativeObject *templateObject() const { return templateObject_; } @@ -3717,8 +3719,8 @@ class MCreateThisWithTemplate } // Template for |this|, provided by TI. - JSObject *templateObject() const { - return &getOperand(0)->toConstant()->value().toObject(); + NativeObject *templateObject() const { + return &getOperand(0)->toConstant()->value().toObject().as(); } gc::InitialHeap initialHeap() const { @@ -7669,11 +7671,11 @@ class MArrayConcat : public MBinaryInstruction, public MixPolicy, ObjectPolicy<1> >::Data { - AlwaysTenuredObject templateObj_; + AlwaysTenured templateObj_; gc::InitialHeap initialHeap_; MArrayConcat(types::CompilerConstraintList *constraints, MDefinition *lhs, MDefinition *rhs, - JSObject *templateObj, gc::InitialHeap initialHeap) + ArrayObject *templateObj, gc::InitialHeap initialHeap) : MBinaryInstruction(lhs, rhs), templateObj_(templateObj), initialHeap_(initialHeap) @@ -7687,12 +7689,12 @@ class MArrayConcat static MArrayConcat *New(TempAllocator &alloc, types::CompilerConstraintList *constraints, MDefinition *lhs, MDefinition *rhs, - JSObject *templateObj, gc::InitialHeap initialHeap) + ArrayObject *templateObj, gc::InitialHeap initialHeap) { return new(alloc) MArrayConcat(constraints, lhs, rhs, templateObj, initialHeap); } - JSObject *templateObj() const { + ArrayObject *templateObj() const { return templateObj_; } @@ -10308,10 +10310,10 @@ class MSetFrameArgument class MRestCommon { unsigned numFormals_; - AlwaysTenuredObject templateObject_; + AlwaysTenured templateObject_; protected: - MRestCommon(unsigned numFormals, JSObject *templateObject) + MRestCommon(unsigned numFormals, ArrayObject *templateObject) : numFormals_(numFormals), templateObject_(templateObject) { } @@ -10320,7 +10322,7 @@ class MRestCommon unsigned numFormals() const { return numFormals_; } - JSObject *templateObject() const { + ArrayObject *templateObject() const { return templateObject_; } }; @@ -10331,7 +10333,7 @@ class MRest public IntPolicy<0>::Data { MRest(types::CompilerConstraintList *constraints, MDefinition *numActuals, unsigned numFormals, - JSObject *templateObject) + ArrayObject *templateObject) : MUnaryInstruction(numActuals), MRestCommon(numFormals, templateObject) { @@ -10344,7 +10346,7 @@ class MRest static MRest *New(TempAllocator &alloc, types::CompilerConstraintList *constraints, MDefinition *numActuals, unsigned numFormals, - JSObject *templateObject) + ArrayObject *templateObject) { return new(alloc) MRest(constraints, numActuals, numFormals, templateObject); } @@ -10367,7 +10369,7 @@ class MRestPar public IntPolicy<1>::Data { MRestPar(MDefinition *cx, MDefinition *numActuals, unsigned numFormals, - JSObject *templateObject, types::TemporaryTypeSet *resultTypes) + ArrayObject *templateObject, types::TemporaryTypeSet *resultTypes) : MBinaryInstruction(cx, numActuals), MRestCommon(numFormals, templateObject) { @@ -10607,9 +10609,9 @@ class MPostWriteBarrier : public MBinaryInstruction, public ObjectPolicy<0>::Dat class MNewDeclEnvObject : public MNullaryInstruction { - AlwaysTenuredObject templateObj_; + AlwaysTenuredNativeObject templateObj_; - explicit MNewDeclEnvObject(JSObject *templateObj) + explicit MNewDeclEnvObject(NativeObject *templateObj) : MNullaryInstruction(), templateObj_(templateObj) { @@ -10619,11 +10621,11 @@ class MNewDeclEnvObject : public MNullaryInstruction public: INSTRUCTION_HEADER(NewDeclEnvObject); - static MNewDeclEnvObject *New(TempAllocator &alloc, JSObject *templateObj) { + static MNewDeclEnvObject *New(TempAllocator &alloc, NativeObject *templateObj) { return new(alloc) MNewDeclEnvObject(templateObj); } - JSObject *templateObj() { + NativeObject *templateObj() { return templateObj_; } AliasSet getAliasSet() const { @@ -10633,10 +10635,10 @@ class MNewDeclEnvObject : public MNullaryInstruction class MNewCallObjectBase : public MNullaryInstruction { - AlwaysTenuredObject templateObj_; + AlwaysTenuredNativeObject templateObj_; protected: - explicit MNewCallObjectBase(JSObject *templateObj) + explicit MNewCallObjectBase(NativeObject *templateObj) : MNullaryInstruction(), templateObj_(templateObj) { @@ -10644,7 +10646,7 @@ class MNewCallObjectBase : public MNullaryInstruction } public: - JSObject *templateObject() { + NativeObject *templateObject() { return templateObj_; } AliasSet getAliasSet() const { @@ -10657,12 +10659,12 @@ class MNewCallObject : public MNewCallObjectBase public: INSTRUCTION_HEADER(NewCallObject) - explicit MNewCallObject(JSObject *templateObj) + explicit MNewCallObject(NativeObject *templateObj) : MNewCallObjectBase(templateObj) {} static MNewCallObject * - New(TempAllocator &alloc, JSObject *templateObj) + New(TempAllocator &alloc, NativeObject *templateObj) { return new(alloc) MNewCallObject(templateObj); } @@ -10673,12 +10675,12 @@ class MNewRunOnceCallObject : public MNewCallObjectBase public: INSTRUCTION_HEADER(NewRunOnceCallObject) - explicit MNewRunOnceCallObject(JSObject *templateObj) + explicit MNewRunOnceCallObject(NativeObject *templateObj) : MNewCallObjectBase(templateObj) {} static MNewRunOnceCallObject * - New(TempAllocator &alloc, JSObject *templateObj) + New(TempAllocator &alloc, NativeObject *templateObj) { return new(alloc) MNewRunOnceCallObject(templateObj); } @@ -10686,9 +10688,9 @@ class MNewRunOnceCallObject : public MNewCallObjectBase class MNewCallObjectPar : public MUnaryInstruction { - AlwaysTenuredObject templateObj_; + AlwaysTenuredNativeObject templateObj_; - MNewCallObjectPar(MDefinition *cx, JSObject *templateObj) + MNewCallObjectPar(MDefinition *cx, NativeObject *templateObj) : MUnaryInstruction(cx), templateObj_(templateObj) { @@ -10706,7 +10708,7 @@ class MNewCallObjectPar : public MUnaryInstruction return getOperand(0); } - JSObject *templateObj() const { + NativeObject *templateObj() const { return templateObj_; } @@ -10805,9 +10807,9 @@ class MEnclosingScope : public MLoadFixedSlot // Note: the template object should be an *empty* dense array! class MNewDenseArrayPar : public MBinaryInstruction { - AlwaysTenuredObject templateObject_; + AlwaysTenured templateObject_; - MNewDenseArrayPar(MDefinition *cx, MDefinition *length, JSObject *templateObject) + MNewDenseArrayPar(MDefinition *cx, MDefinition *length, ArrayObject *templateObject) : MBinaryInstruction(cx, length), templateObject_(templateObject) { @@ -10819,7 +10821,7 @@ class MNewDenseArrayPar : public MBinaryInstruction INSTRUCTION_HEADER(NewDenseArrayPar); static MNewDenseArrayPar *New(TempAllocator &alloc, MDefinition *cx, MDefinition *length, - JSObject *templateObject) + ArrayObject *templateObject) { return new(alloc) MNewDenseArrayPar(cx, length, templateObject); } @@ -10832,7 +10834,7 @@ class MNewDenseArrayPar : public MBinaryInstruction return getOperand(1); } - JSObject *templateObject() const { + ArrayObject *templateObject() const { return templateObject_; } diff --git a/js/src/jit/ParallelFunctions.cpp b/js/src/jit/ParallelFunctions.cpp index 92d4287ebda..ac0a8c1b447 100644 --- a/js/src/jit/ParallelFunctions.cpp +++ b/js/src/jit/ParallelFunctions.cpp @@ -15,6 +15,8 @@ #include "jsgcinlines.h" #include "jsobjinlines.h" +#include "vm/ObjectImpl-inl.h" + using namespace js; using namespace jit; @@ -165,12 +167,12 @@ jit::InterruptCheckPar(ForkJoinContext *cx) return true; } -JSObject * -jit::ExtendArrayPar(ForkJoinContext *cx, JSObject *array, uint32_t length) +ArrayObject * +jit::ExtendArrayPar(ForkJoinContext *cx, ArrayObject *array, uint32_t length) { - JSObject::EnsureDenseResult res = + NativeObject::EnsureDenseResult res = array->ensureDenseElementsPreservePackedFlag(cx, 0, length); - if (res != JSObject::ED_OK) + if (res != NativeObject::ED_OK) return nullptr; return array; } @@ -183,9 +185,9 @@ jit::SetPropertyPar(ForkJoinContext *cx, HandleObject obj, HandlePropertyName na if (*pc == JSOP_SETALIASEDVAR) { // See comment in jit::SetProperty. - Shape *shape = obj->nativeLookupPure(name); + Shape *shape = obj->as().lookupPure(name); MOZ_ASSERT(shape && shape->hasSlot()); - return obj->nativeSetSlotIfHasType(shape, value); + return obj->as().setSlotIfHasType(shape, value); } // Fail early on hooks. @@ -194,7 +196,10 @@ jit::SetPropertyPar(ForkJoinContext *cx, HandleObject obj, HandlePropertyName na RootedValue v(cx, value); RootedId id(cx, NameToId(name)); - return baseops::SetPropertyHelper(cx, obj, obj, id, baseops::Qualified, &v, + return baseops::SetPropertyHelper(cx, + obj.as(), + obj.as(), + id, baseops::Qualified, &v, strict); } @@ -206,13 +211,19 @@ jit::SetElementPar(ForkJoinContext *cx, HandleObject obj, HandleValue index, Han if (!ValueToIdPure(index, id.address())) return false; + if (!obj->isNative()) + return false; + // SetObjectElementOperation, the sequential version, has several checks // for certain deoptimizing behaviors, such as marking having written to // holes and non-indexed element accesses. We don't do that here, as we // can't modify any TI state anyways. If we need to add a new type, we // would bail out. RootedValue v(cx, value); - return baseops::SetPropertyHelper(cx, obj, obj, id, baseops::Qualified, &v, + return baseops::SetPropertyHelper(cx, + obj.as(), + obj.as(), + id, baseops::Qualified, &v, strict); } @@ -589,23 +600,22 @@ jit::CallToUncompiledScriptPar(ForkJoinContext *cx, JSObject *obj) JSObject * jit::InitRestParameterPar(ForkJoinContext *cx, uint32_t length, Value *rest, - HandleObject templateObj, HandleObject res) + HandleObject templateObj, HandleArrayObject res) { // In parallel execution, we should always have succeeded in allocation // before this point. We can do the allocation here like in the sequential // path, but duplicating the initGCThing logic is too tedious. MOZ_ASSERT(res); - MOZ_ASSERT(res->is()); MOZ_ASSERT(!res->getDenseInitializedLength()); MOZ_ASSERT(res->type() == templateObj->type()); if (length > 0) { - JSObject::EnsureDenseResult edr = + NativeObject::EnsureDenseResult edr = res->ensureDenseElementsPreservePackedFlag(cx, 0, length); - if (edr != JSObject::ED_OK) + if (edr != NativeObject::ED_OK) return nullptr; res->initDenseElementsUnbarriered(0, rest, length); - res->as().setLengthInt32(length); + res->setLengthInt32(length); } return res; diff --git a/js/src/jit/ParallelFunctions.h b/js/src/jit/ParallelFunctions.h index 3cf238e0daa..a4010c690ee 100644 --- a/js/src/jit/ParallelFunctions.h +++ b/js/src/jit/ParallelFunctions.h @@ -26,7 +26,7 @@ bool InterruptCheckPar(ForkJoinContext *cx); // Extends the given array with `length` new holes. Returns nullptr on // failure or else `array`, which is convenient during code // generation. -JSObject *ExtendArrayPar(ForkJoinContext *cx, JSObject *array, uint32_t length); +ArrayObject *ExtendArrayPar(ForkJoinContext *cx, ArrayObject *array, uint32_t length); // Set properties and elements on thread local objects. bool SetPropertyPar(ForkJoinContext *cx, HandleObject obj, HandlePropertyName name, @@ -69,7 +69,7 @@ bool UrshValuesPar(ForkJoinContext *cx, HandleValue lhs, HandleValue rhs, Mutabl // Make a new rest parameter in parallel. JSObject *InitRestParameterPar(ForkJoinContext *cx, uint32_t length, Value *rest, - HandleObject templateObj, HandleObject res); + HandleObject templateObj, HandleArrayObject res); // Abort and debug tracing functions. void BailoutPar(BailoutStack *sp, uint8_t **entryFramePointer); diff --git a/js/src/jit/ParallelSafetyAnalysis.cpp b/js/src/jit/ParallelSafetyAnalysis.cpp index ef48782ad69..5df70fd88ba 100644 --- a/js/src/jit/ParallelSafetyAnalysis.cpp +++ b/js/src/jit/ParallelSafetyAnalysis.cpp @@ -72,7 +72,7 @@ class ParallelSafetyVisitor : public MDefinitionVisitor bool insertWriteGuard(MInstruction *writeInstruction, MDefinition *valueBeingWritten); - bool replaceWithNewPar(MInstruction *newInstruction, JSObject *templateObject); + bool replaceWithNewPar(MInstruction *newInstruction, NativeObject *templateObject); bool replace(MInstruction *oldInstruction, MInstruction *replacementInstruction); bool visitSpecializedInstruction(MInstruction *ins, MIRType spec, uint32_t flags); @@ -584,7 +584,7 @@ ParallelSafetyVisitor::visitToString(MToString *ins) bool ParallelSafetyVisitor::replaceWithNewPar(MInstruction *newInstruction, - JSObject *templateObject) + NativeObject *templateObject) { return replace(newInstruction, MNewPar::New(alloc(), ForkJoinContext(), templateObject)); } diff --git a/js/src/jit/RangeAnalysis.cpp b/js/src/jit/RangeAnalysis.cpp index 01c5001645c..9ddd17a7fe2 100644 --- a/js/src/jit/RangeAnalysis.cpp +++ b/js/src/jit/RangeAnalysis.cpp @@ -1507,7 +1507,7 @@ MArrayLength::computeRange(TempAllocator &alloc) void MInitializedLength::computeRange(TempAllocator &alloc) { - setRange(Range::NewUInt32Range(alloc, 0, JSObject::NELEMENTS_LIMIT)); + setRange(Range::NewUInt32Range(alloc, 0, NativeObject::NELEMENTS_LIMIT)); } void diff --git a/js/src/jit/Recover.cpp b/js/src/jit/Recover.cpp index de66b5fedcf..7c9f3745960 100644 --- a/js/src/jit/Recover.cpp +++ b/js/src/jit/Recover.cpp @@ -21,9 +21,10 @@ #include "jit/MIR.h" #include "jit/MIRGraph.h" #include "jit/VMFunctions.h" - #include "vm/Interpreter.h" + #include "vm/Interpreter-inl.h" +#include "vm/ObjectImpl-inl.h" using namespace js; using namespace js::jit; @@ -1003,7 +1004,7 @@ RNewObject::RNewObject(CompactBufferReader &reader) bool RNewObject::recover(JSContext *cx, SnapshotIterator &iter) const { - RootedObject templateObject(cx, &iter.read().toObject()); + RootedNativeObject templateObject(cx, &iter.read().toObject().as()); RootedValue result(cx); JSObject *resultObject = nullptr; @@ -1113,7 +1114,7 @@ RCreateThisWithTemplate::RCreateThisWithTemplate(CompactBufferReader &reader) bool RCreateThisWithTemplate::recover(JSContext *cx, SnapshotIterator &iter) const { - RootedObject templateObject(cx, &iter.read().toObject()); + RootedNativeObject templateObject(cx, &iter.read().toObject().as()); // Use AutoEnterAnalysis to avoid invoking the object metadata callback // while bailing out, which could try to walk the stack. @@ -1122,7 +1123,7 @@ RCreateThisWithTemplate::recover(JSContext *cx, SnapshotIterator &iter) const // See CodeGenerator::visitCreateThisWithTemplate gc::AllocKind allocKind = templateObject->asTenured().getAllocKind(); gc::InitialHeap initialHeap = tenuredHeap_ ? gc::TenuredHeap : gc::DefaultHeap; - JSObject *resultObject = JSObject::copy(cx, allocKind, initialHeap, templateObject); + JSObject *resultObject = NativeObject::copy(cx, allocKind, initialHeap, templateObject); if (!resultObject) return false; @@ -1149,13 +1150,13 @@ RObjectState::RObjectState(CompactBufferReader &reader) bool RObjectState::recover(JSContext *cx, SnapshotIterator &iter) const { - RootedObject object(cx, &iter.read().toObject()); + RootedNativeObject object(cx, &iter.read().toObject().as()); MOZ_ASSERT(object->slotSpan() == numSlots()); RootedValue val(cx); for (size_t i = 0; i < numSlots(); i++) { val = iter.read(); - object->nativeSetSlot(i, val); + object->setSlot(i, val); } val.setObject(*object); @@ -1181,7 +1182,7 @@ bool RArrayState::recover(JSContext *cx, SnapshotIterator &iter) const { RootedValue result(cx); - JSObject *object = &iter.read().toObject(); + ArrayObject *object = &iter.read().toObject().as(); uint32_t initLength = iter.read().toInt32(); object->setDenseInitializedLength(initLength); diff --git a/js/src/jit/VMFunctions.cpp b/js/src/jit/VMFunctions.cpp index a155547dc05..c30885f2d75 100644 --- a/js/src/jit/VMFunctions.cpp +++ b/js/src/jit/VMFunctions.cpp @@ -23,6 +23,7 @@ #include "jit/BaselineFrame-inl.h" #include "jit/IonFrames-inl.h" #include "vm/Interpreter-inl.h" +#include "vm/ObjectImpl-inl.h" #include "vm/StringObject-inl.h" using namespace js; @@ -221,7 +222,7 @@ MutatePrototype(JSContext *cx, HandleObject obj, HandleValue value) } bool -InitProp(JSContext *cx, HandleObject obj, HandlePropertyName name, HandleValue value) +InitProp(JSContext *cx, HandleNativeObject obj, HandlePropertyName name, HandleValue value) { RootedId id(cx, NameToId(name)); return DefineNativeProperty(cx, obj, id, value, nullptr, nullptr, JSPROP_ENUMERATE); @@ -294,7 +295,7 @@ template bool StringsEqual(JSContext *cx, HandleString lhs, HandleString r template bool StringsEqual(JSContext *cx, HandleString lhs, HandleString rhs, bool *res); JSObject* -NewInitObject(JSContext *cx, HandleObject templateObject) +NewInitObject(JSContext *cx, HandleNativeObject templateObject) { NewObjectKind newKind = templateObject->hasSingletonType() ? SingletonObject : GenericObject; if (!templateObject->hasLazyType() && templateObject->type()->shouldPreTenure()) @@ -366,21 +367,19 @@ ArrayPopDense(JSContext *cx, HandleObject obj, MutableHandleValue rval) } bool -ArrayPushDense(JSContext *cx, HandleObject obj, HandleValue v, uint32_t *length) +ArrayPushDense(JSContext *cx, HandleArrayObject obj, HandleValue v, uint32_t *length) { - MOZ_ASSERT(obj->is()); - - if (MOZ_LIKELY(obj->as().lengthIsWritable())) { - uint32_t idx = obj->as().length(); - JSObject::EnsureDenseResult result = obj->ensureDenseElements(cx, idx, 1); - if (result == JSObject::ED_FAILED) + if (MOZ_LIKELY(obj->lengthIsWritable())) { + uint32_t idx = obj->length(); + NativeObject::EnsureDenseResult result = obj->ensureDenseElements(cx, idx, 1); + if (result == NativeObject::ED_FAILED) return false; - if (result == JSObject::ED_OK) { + if (result == NativeObject::ED_OK) { obj->setDenseElement(idx, v); MOZ_ASSERT(idx < INT32_MAX); *length = idx + 1; - obj->as().setLengthInt32(*length); + obj->setLengthInt32(*length); return true; } } @@ -511,15 +510,15 @@ SetProperty(JSContext *cx, HandleObject obj, HandlePropertyName name, HandleValu if (op == JSOP_SETALIASEDVAR) { // Aliased var assigns ignore readonly attributes on the property, as // required for initializing 'const' closure variables. - Shape *shape = obj->nativeLookup(cx, name); + Shape *shape = obj->as().lookup(cx, name); MOZ_ASSERT(shape && shape->hasSlot()); - obj->nativeSetSlotWithType(cx, shape, value); + obj->as().setSlotWithType(cx, shape, value); return true; } if (MOZ_LIKELY(!obj->getOps()->setProperty)) { return baseops::SetPropertyHelper( - cx, obj, obj, id, + cx, obj.as(), obj.as(), id, (op == JSOP_SETNAME || op == JSOP_SETGNAME) ? baseops::Unqualified : baseops::Qualified, @@ -1074,17 +1073,15 @@ Recompile(JSContext *cx) } bool -SetDenseElement(JSContext *cx, HandleObject obj, int32_t index, HandleValue value, +SetDenseElement(JSContext *cx, HandleNativeObject obj, int32_t index, HandleValue value, bool strict) { // This function is called from Ion code for StoreElementHole's OOL path. // In this case we know the object is native, has no indexed properties // and we can use setDenseElement instead of setDenseElementWithType. - - MOZ_ASSERT(obj->isNative()); MOZ_ASSERT(!obj->isIndexed()); - JSObject::EnsureDenseResult result = JSObject::ED_SPARSE; + NativeObject::EnsureDenseResult result = NativeObject::ED_SPARSE; do { if (index < 0) break; @@ -1093,7 +1090,7 @@ SetDenseElement(JSContext *cx, HandleObject obj, int32_t index, HandleValue valu break; uint32_t idx = uint32_t(index); result = obj->ensureDenseElements(cx, idx, 1); - if (result != JSObject::ED_OK) + if (result != NativeObject::ED_OK) break; if (isArray) { ArrayObject &arr = obj->as(); @@ -1104,9 +1101,9 @@ SetDenseElement(JSContext *cx, HandleObject obj, int32_t index, HandleValue valu return true; } while (false); - if (result == JSObject::ED_FAILED) + if (result == NativeObject::ED_FAILED) return false; - MOZ_ASSERT(result == JSObject::ED_SPARSE); + MOZ_ASSERT(result == NativeObject::ED_SPARSE); RootedValue indexVal(cx, Int32Value(index)); return SetObjectElement(cx, obj, indexVal, value, strict); diff --git a/js/src/jit/VMFunctions.h b/js/src/jit/VMFunctions.h index ffb504d8d58..583ff69937c 100644 --- a/js/src/jit/VMFunctions.h +++ b/js/src/jit/VMFunctions.h @@ -288,6 +288,7 @@ struct VMFunctionsModal template struct TypeToDataType { /* Unexpected return type for a VMFunction. */ }; template <> struct TypeToDataType { static const DataType result = Type_Bool; }; template <> struct TypeToDataType { static const DataType result = Type_Object; }; +template <> struct TypeToDataType { static const DataType result = Type_Object; }; template <> struct TypeToDataType { static const DataType result = Type_Object; }; template <> struct TypeToDataType { static const DataType result = Type_Object; }; template <> struct TypeToDataType { static const DataType result = Type_Object; }; @@ -296,6 +297,8 @@ template <> struct TypeToDataType { static const DataType result = template <> struct TypeToDataType { static const DataType result = Type_Handle; }; template <> struct TypeToDataType { static const DataType result = Type_Handle; }; template <> struct TypeToDataType { static const DataType result = Type_Handle; }; +template <> struct TypeToDataType > { static const DataType result = Type_Handle; }; +template <> struct TypeToDataType > { static const DataType result = Type_Handle; }; template <> struct TypeToDataType > { static const DataType result = Type_Handle; }; template <> struct TypeToDataType > { static const DataType result = Type_Handle; }; template <> struct TypeToDataType { static const DataType result = Type_Handle; }; @@ -322,6 +325,12 @@ template <> struct TypeToArgProperties { template <> struct TypeToArgProperties { static const uint32_t result = TypeToArgProperties::result | VMFunction::ByRef; }; +template <> struct TypeToArgProperties > { + static const uint32_t result = TypeToArgProperties::result | VMFunction::ByRef; +}; +template <> struct TypeToArgProperties > { + static const uint32_t result = TypeToArgProperties::result | VMFunction::ByRef; +}; template <> struct TypeToArgProperties > { static const uint32_t result = TypeToArgProperties::result | VMFunction::ByRef; }; @@ -384,6 +393,12 @@ template <> struct TypeToRootType { template <> struct TypeToRootType { static const uint32_t result = VMFunction::RootCell; }; +template <> struct TypeToRootType > { + static const uint32_t result = VMFunction::RootObject; +}; +template <> struct TypeToRootType > { + static const uint32_t result = VMFunction::RootObject; +}; template <> struct TypeToRootType > { static const uint32_t result = VMFunction::RootObject; }; @@ -616,7 +631,7 @@ bool CheckOverRecursedWithExtra(JSContext *cx, BaselineFrame *frame, bool DefVarOrConst(JSContext *cx, HandlePropertyName dn, unsigned attrs, HandleObject scopeChain); bool SetConst(JSContext *cx, HandlePropertyName name, HandleObject scopeChain, HandleValue rval); bool MutatePrototype(JSContext *cx, HandleObject obj, HandleValue value); -bool InitProp(JSContext *cx, HandleObject obj, HandlePropertyName name, HandleValue value); +bool InitProp(JSContext *cx, HandleNativeObject obj, HandlePropertyName name, HandleValue value); template bool LooselyEqual(JSContext *cx, MutableHandleValue lhs, MutableHandleValue rhs, bool *res); @@ -634,11 +649,11 @@ bool StringsEqual(JSContext *cx, HandleString left, HandleString right, bool *re // Allocation functions for JSOP_NEWARRAY and JSOP_NEWOBJECT and parallel array inlining JSObject *NewInitParallelArray(JSContext *cx, HandleObject templateObj); -JSObject *NewInitObject(JSContext *cx, HandleObject templateObject); +JSObject *NewInitObject(JSContext *cx, HandleNativeObject templateObject); JSObject *NewInitObjectWithClassPrototype(JSContext *cx, HandleObject templateObject); bool ArrayPopDense(JSContext *cx, HandleObject obj, MutableHandleValue rval); -bool ArrayPushDense(JSContext *cx, HandleObject obj, HandleValue v, uint32_t *length); +bool ArrayPushDense(JSContext *cx, HandleArrayObject obj, HandleValue v, uint32_t *length); bool ArrayShiftDense(JSContext *cx, HandleObject obj, MutableHandleValue rval); JSObject *ArrayConcatDense(JSContext *cx, HandleObject obj1, HandleObject obj2, HandleObject res); JSString *ArrayJoin(JSContext *cx, HandleObject array, HandleString sep); @@ -714,7 +729,7 @@ JSString *RegExpReplace(JSContext *cx, HandleString string, HandleObject regexp, JSString *StringReplace(JSContext *cx, HandleString string, HandleString pattern, HandleString repl); -bool SetDenseElement(JSContext *cx, HandleObject obj, int32_t index, HandleValue value, +bool SetDenseElement(JSContext *cx, HandleNativeObject obj, int32_t index, HandleValue value, bool strict); #ifdef DEBUG diff --git a/js/src/jsapi-tests/testDefinePropertyIgnoredAttributes.cpp b/js/src/jsapi-tests/testDefinePropertyIgnoredAttributes.cpp index f3f30e52376..e0ac5469ccf 100644 --- a/js/src/jsapi-tests/testDefinePropertyIgnoredAttributes.cpp +++ b/js/src/jsapi-tests/testDefinePropertyIgnoredAttributes.cpp @@ -90,7 +90,7 @@ BEGIN_TEST(testDefinePropertyIgnoredAttributes) CHECK(JS_DefineProperty(cx, obj, "quox", defineValue, AllowWritable)); CHECK(JS_GetPropertyDescriptor(cx, obj, "quox", &desc)); CHECK(CheckDescriptor(desc, false, true, true)); - CHECK_SAME(ObjectValue(*obj), desc.value()); + CHECK_SAME(JS::ObjectValue(*obj), desc.value()); return true; } diff --git a/js/src/jsapi-tests/testLookup.cpp b/js/src/jsapi-tests/testLookup.cpp index 0163c5a76a5..5be8aec3174 100644 --- a/js/src/jsapi-tests/testLookup.cpp +++ b/js/src/jsapi-tests/testLookup.cpp @@ -67,7 +67,7 @@ document_resolve(JSContext *cx, JS::HandleObject obj, JS::HandleId id, JS_NewObject(cx, &DocumentAllClass, JS::NullPtr(), JS::NullPtr())); if (!docAll) return false; - JS::Rooted allValue(cx, ObjectValue(*docAll)); + JS::Rooted allValue(cx, JS::ObjectValue(*docAll)); bool ok = JS_DefinePropertyById(cx, obj, id, allValue, 0); objp.set(ok ? obj.get() : nullptr); return ok; diff --git a/js/src/jsapi-tests/testResolveRecursion.cpp b/js/src/jsapi-tests/testResolveRecursion.cpp index c41e1180866..90395e19a7a 100644 --- a/js/src/jsapi-tests/testResolveRecursion.cpp +++ b/js/src/jsapi-tests/testResolveRecursion.cpp @@ -38,8 +38,8 @@ BEGIN_TEST(testResolveRecursion) JS_SetPrivate(obj1, this); JS_SetPrivate(obj2, this); - JS::RootedValue obj1Val(cx, ObjectValue(*obj1)); - JS::RootedValue obj2Val(cx, ObjectValue(*obj2)); + JS::RootedValue obj1Val(cx, JS::ObjectValue(*obj1)); + JS::RootedValue obj2Val(cx, JS::ObjectValue(*obj2)); CHECK(JS_DefineProperty(cx, global, "obj1", obj1Val, 0)); CHECK(JS_DefineProperty(cx, global, "obj2", obj2Val, 0)); diff --git a/js/src/jsapi.cpp b/js/src/jsapi.cpp index e186e24ea36..6fa63b1fc68 100644 --- a/js/src/jsapi.cpp +++ b/js/src/jsapi.cpp @@ -2233,14 +2233,14 @@ JS_PUBLIC_API(void *) JS_GetPrivate(JSObject *obj) { /* This function can be called by a finalizer. */ - return obj->getPrivate(); + return obj->fakeNativeGetPrivate(); } JS_PUBLIC_API(void) JS_SetPrivate(JSObject *obj, void *data) { /* This function can be called by a finalizer. */ - obj->setPrivate(data); + obj->fakeNativeSetPrivate(data); } JS_PUBLIC_API(void *) @@ -2248,7 +2248,7 @@ JS_GetInstancePrivate(JSContext *cx, HandleObject obj, const JSClass *clasp, Cal { if (!JS_InstanceOf(cx, obj, clasp, args)) return nullptr; - return obj->getPrivate(); + return obj->fakeNativeGetPrivate(); } JS_PUBLIC_API(bool) @@ -2566,8 +2566,8 @@ JS_DeepFreezeObject(JSContext *cx, HandleObject obj) return false; /* Walk slots in obj and if any value is a non-null object, seal it. */ - for (uint32_t i = 0, n = obj->slotSpan(); i < n; ++i) { - const Value &v = obj->getSlot(i); + for (uint32_t i = 0, n = obj->fakeNativeSlotSpan(); i < n; ++i) { + const Value &v = obj->fakeNativeGetSlot(i); if (v.isPrimitive()) continue; RootedObject obj(cx, &v.toObject()); @@ -2612,12 +2612,12 @@ LookupResult(JSContext *cx, HandleObject obj, HandleObject obj2, HandleId id, } } } else if (IsImplicitDenseOrTypedArrayElement(shape)) { - vp.set(obj2->getDenseOrTypedArrayElement(JSID_TO_INT(id))); + vp.set(obj2->as().getDenseOrTypedArrayElement(JSID_TO_INT(id))); return true; } else { /* Peek at the native property's slot value, without doing a Get. */ if (shape->hasSlot()) { - vp.set(obj2->nativeGetSlot(shape->slot())); + vp.set(obj2->as().getSlot(shape->slot())); return true; } } @@ -2735,7 +2735,7 @@ JS_AlreadyHasOwnPropertyById(JSContext *cx, HandleObject obj, HandleId id, bool if (JSID_IS_INT(id)) { uint32_t index = JSID_TO_INT(id); - if (obj->containsDenseElement(index)) { + if (obj->as().containsDenseElement(index)) { *foundp = true; return true; } @@ -2746,7 +2746,7 @@ JS_AlreadyHasOwnPropertyById(JSContext *cx, HandleObject obj, HandleId id, bool } } - *foundp = obj->nativeContains(cx, id); + *foundp = obj->as().contains(cx, id); return true; } @@ -3321,14 +3321,14 @@ GetPropertyDescriptorById(JSContext *cx, HandleObject obj, HandleId id, if (obj2->isNative()) { if (IsImplicitDenseOrTypedArrayElement(shape)) { desc.setEnumerable(); - desc.value().set(obj2->getDenseOrTypedArrayElement(JSID_TO_INT(id))); + desc.value().set(obj2->as().getDenseOrTypedArrayElement(JSID_TO_INT(id))); } else { desc.setAttributes(shape->attributes()); desc.setGetter(shape->getter()); desc.setSetter(shape->setter()); MOZ_ASSERT(desc.value().isUndefined()); if (shape->hasSlot()) - desc.value().set(obj2->nativeGetSlot(shape->slot())); + desc.value().set(obj2->as().getSlot(shape->slot())); } } else { if (obj2->is()) @@ -3602,9 +3602,9 @@ JS_SetAllNonReservedSlotsToUndefined(JSContext *cx, JSObject *objArg) const Class *clasp = obj->getClass(); unsigned numReserved = JSCLASS_RESERVED_SLOTS(clasp); - unsigned numSlots = obj->slotSpan(); + unsigned numSlots = obj->as().slotSpan(); for (unsigned i = numReserved; i < numSlots; i++) - obj->setSlot(i, UndefinedValue()); + obj->as().setSlot(i, UndefinedValue()); } JS_PUBLIC_API(JSIdArray *) @@ -3633,11 +3633,11 @@ static const uint32_t JSSLOT_ITER_INDEX = 0; static void prop_iter_finalize(FreeOp *fop, JSObject *obj) { - void *pdata = obj->getPrivate(); + void *pdata = obj->as().getPrivate(); if (!pdata) return; - if (obj->getSlot(JSSLOT_ITER_INDEX).toInt32() >= 0) { + if (obj->as().getSlot(JSSLOT_ITER_INDEX).toInt32() >= 0) { /* Non-native case: destroy the ida enumerated when obj was created. */ JSIdArray *ida = (JSIdArray *) pdata; fop->free_(ida); @@ -3647,11 +3647,11 @@ prop_iter_finalize(FreeOp *fop, JSObject *obj) static void prop_iter_trace(JSTracer *trc, JSObject *obj) { - void *pdata = obj->getPrivate(); + void *pdata = obj->as().getPrivate(); if (!pdata) return; - if (obj->getSlot(JSSLOT_ITER_INDEX).toInt32() < 0) { + if (obj->as().getSlot(JSSLOT_ITER_INDEX).toInt32() < 0) { /* * Native case: just mark the next property to visit. We don't need a * barrier here because the pointer is updated via setPrivate, which @@ -3659,7 +3659,7 @@ prop_iter_trace(JSTracer *trc, JSObject *obj) */ Shape *tmp = static_cast(pdata); MarkShapeUnbarriered(trc, &tmp, "prop iter shape"); - obj->setPrivateUnbarriered(tmp); + obj->as().setPrivateUnbarriered(tmp); } else { /* Non-native case: mark each id in the JSIdArray private. */ JSIdArray *ida = (JSIdArray *) pdata; @@ -3691,7 +3691,7 @@ JS_NewPropertyIterator(JSContext *cx, HandleObject obj) CHECK_REQUEST(cx); assertSameCompartment(cx, obj); - RootedObject iterobj(cx, NewObjectWithClassProto(cx, &prop_iter_class, nullptr, obj)); + NativeObject *iterobj = NewNativeObjectWithClassProto(cx, &prop_iter_class, nullptr, obj); if (!iterobj) return nullptr; @@ -3720,11 +3720,11 @@ JS_NextProperty(JSContext *cx, HandleObject iterobj, MutableHandleId idp) AssertHeapIsIdle(cx); CHECK_REQUEST(cx); assertSameCompartment(cx, iterobj); - int32_t i = iterobj->getSlot(JSSLOT_ITER_INDEX).toInt32(); + int32_t i = iterobj->as().getSlot(JSSLOT_ITER_INDEX).toInt32(); if (i < 0) { /* Native case: private data is a property tree node pointer. */ MOZ_ASSERT(iterobj->getParent()->isNative()); - Shape *shape = static_cast(iterobj->getPrivate()); + Shape *shape = static_cast(iterobj->as().getPrivate()); while (shape->previous() && !shape->enumerable()) shape = shape->previous(); @@ -3733,19 +3733,19 @@ JS_NextProperty(JSContext *cx, HandleObject iterobj, MutableHandleId idp) MOZ_ASSERT(shape->isEmptyShape()); idp.set(JSID_VOID); } else { - iterobj->setPrivateGCThing(const_cast(shape->previous().get())); + iterobj->as().setPrivateGCThing(const_cast(shape->previous().get())); idp.set(shape->propid()); } } else { /* Non-native case: use the ida enumerated when iterobj was created. */ - JSIdArray *ida = (JSIdArray *) iterobj->getPrivate(); + JSIdArray *ida = (JSIdArray *) iterobj->as().getPrivate(); MOZ_ASSERT(i <= ida->length); STATIC_ASSUME(i <= ida->length); if (i == 0) { idp.set(JSID_VOID); } else { idp.set(ida->vector[--i]); - iterobj->setSlot(JSSLOT_ITER_INDEX, Int32Value(i)); + iterobj->as().setSlot(JSSLOT_ITER_INDEX, Int32Value(i)); } } return true; @@ -3754,13 +3754,13 @@ JS_NextProperty(JSContext *cx, HandleObject iterobj, MutableHandleId idp) JS_PUBLIC_API(jsval) JS_GetReservedSlot(JSObject *obj, uint32_t index) { - return obj->getReservedSlot(index); + return obj->fakeNativeGetReservedSlot(index); } JS_PUBLIC_API(void) JS_SetReservedSlot(JSObject *obj, uint32_t index, Value value) { - obj->setReservedSlot(index, value); + obj->fakeNativeSetReservedSlot(index, value); } JS_PUBLIC_API(JSObject *) @@ -6476,7 +6476,7 @@ JS_PUBLIC_API(void *) JS_EncodeInterpretedFunction(JSContext *cx, HandleObject funobjArg, uint32_t *lengthp) { XDREncoder encoder(cx); - RootedObject funobj(cx, funobjArg); + RootedFunction funobj(cx, &funobjArg->as()); if (!encoder.codeFunction(&funobj)) return nullptr; return encoder.forgetData(lengthp); @@ -6496,7 +6496,7 @@ JS_PUBLIC_API(JSObject *) JS_DecodeInterpretedFunction(JSContext *cx, const void *data, uint32_t length) { XDRDecoder decoder(cx, data, length); - RootedObject funobj(cx); + RootedFunction funobj(cx); if (!decoder.codeFunction(&funobj)) return nullptr; return funobj; diff --git a/js/src/jsarray.cpp b/js/src/jsarray.cpp index 7046ae2d40f..8832fb0292b 100644 --- a/js/src/jsarray.cpp +++ b/js/src/jsarray.cpp @@ -39,6 +39,7 @@ #include "vm/ArgumentsObject-inl.h" #include "vm/ArrayObject-inl.h" #include "vm/Interpreter-inl.h" +#include "vm/ObjectImpl-inl.h" #include "vm/Runtime-inl.h" using namespace js; @@ -213,8 +214,8 @@ GetElement(JSContext *cx, HandleObject obj, HandleObject receiver, IndexType index, bool *hole, MutableHandleValue vp) { AssertGreaterThanZero(index); - if (obj->isNative() && index < obj->getDenseInitializedLength()) { - vp.set(obj->getDenseElement(uint32_t(index))); + if (obj->isNative() && index < obj->as().getDenseInitializedLength()) { + vp.set(obj->as().getDenseElement(uint32_t(index))); if (!vp.isMagic(JS_ELEMENTS_HOLE)) { *hole = false; return true; @@ -251,11 +252,12 @@ GetElementsSlow(JSContext *cx, HandleObject aobj, uint32_t length, Value *vp) bool js::GetElements(JSContext *cx, HandleObject aobj, uint32_t length, Value *vp) { - if (aobj->is() && length <= aobj->getDenseInitializedLength() && + if (aobj->is() && + length <= aobj->as().getDenseInitializedLength() && !ObjectMayHaveExtraIndexedProperties(aobj)) { /* No other indexed properties so hole = undefined */ - const Value *srcbeg = aobj->getDenseElements(); + const Value *srcbeg = aobj->as().getDenseElements(); const Value *srcend = srcbeg + length; const Value *src = srcbeg; for (Value *dst = vp; src < srcend; ++dst, ++src) @@ -285,7 +287,7 @@ SetArrayElement(JSContext *cx, HandleObject obj, double index, HandleValue v) if (obj->is() && !obj->isIndexed()) { Rooted arr(cx, &obj->as()); /* Predicted/prefetched code should favor the remains-dense case. */ - JSObject::EnsureDenseResult result = JSObject::ED_SPARSE; + NativeObject::EnsureDenseResult result = NativeObject::ED_SPARSE; do { if (index > uint32_t(-1)) break; @@ -296,7 +298,7 @@ SetArrayElement(JSContext *cx, HandleObject obj, double index, HandleValue v) return false; } result = arr->ensureDenseElements(cx, idx, 1); - if (result != JSObject::ED_OK) + if (result != NativeObject::ED_OK) break; if (idx >= arr->length()) arr->setLengthInt32(idx + 1); @@ -304,9 +306,9 @@ SetArrayElement(JSContext *cx, HandleObject obj, double index, HandleValue v) return true; } while (false); - if (result == JSObject::ED_FAILED) + if (result == NativeObject::ED_FAILED) return false; - MOZ_ASSERT(result == JSObject::ED_SPARSE); + MOZ_ASSERT(result == NativeObject::ED_SPARSE); } RootedId id(cx); @@ -336,16 +338,17 @@ DeleteArrayElement(JSContext *cx, HandleObject obj, double index, bool *succeede MOZ_ASSERT(floor(index) == index); if (obj->is() && !obj->isIndexed()) { + ArrayObject *aobj = &obj->as(); if (index <= UINT32_MAX) { uint32_t idx = uint32_t(index); - if (idx < obj->getDenseInitializedLength()) { - if (!obj->maybeCopyElementsForWrite(cx)) + if (idx < aobj->getDenseInitializedLength()) { + if (!aobj->maybeCopyElementsForWrite(cx)) return false; - if (idx+1 == obj->getDenseInitializedLength()) { - obj->setDenseInitializedLength(idx); + if (idx+1 == aobj->getDenseInitializedLength()) { + aobj->setDenseInitializedLength(idx); } else { - obj->markDenseElementsNotPacked(cx); - obj->setDenseElement(idx, MagicValue(JS_ELEMENTS_HOLE)); + aobj->markDenseElementsNotPacked(cx); + aobj->setDenseElement(idx, MagicValue(JS_ELEMENTS_HOLE)); } if (!SuppressDeletedElement(cx, obj, idx)) return false; @@ -514,7 +517,7 @@ js::ArraySetLength(typename ExecutionModeTraits::ContextType cxArg, bool lengthIsWritable = arr->lengthIsWritable(); #ifdef DEBUG { - RootedShape lengthShape(cxArg, arr->nativeLookupPure(id)); + RootedShape lengthShape(cxArg, arr->lookupPure(id)); MOZ_ASSERT(lengthShape); MOZ_ASSERT(lengthShape->writable() == lengthIsWritable); } @@ -692,11 +695,11 @@ js::ArraySetLength(typename ExecutionModeTraits::ContextType cxArg, // the long run, with accessors replacing them both internally and at the // API level, just run with this. RootedShape lengthShape(cxArg, mode == ParallelExecution - ? arr->nativeLookupPure(id) - : arr->nativeLookup(cxArg->asJSContext(), id)); - if (!JSObject::changeProperty(cxArg, arr, lengthShape, attrs, - JSPROP_PERMANENT | JSPROP_READONLY | JSPROP_SHARED, - array_length_getter, array_length_setter)) + ? arr->lookupPure(id) + : arr->lookup(cxArg->asJSContext(), id)); + if (!NativeObject::changeProperty(cxArg, arr, lengthShape, attrs, + JSPROP_PERMANENT | JSPROP_READONLY | JSPROP_SHARED, + array_length_getter, array_length_setter)) { return false; } @@ -847,7 +850,7 @@ js::ObjectMayHaveExtraIndexedProperties(JSObject *obj) return true; if (obj->isIndexed()) return true; - if (obj->getDenseInitializedLength() > 0) + if (obj->as().getDenseInitializedLength() > 0) return true; if (IsAnyTypedArray(obj)) return true; @@ -857,7 +860,7 @@ js::ObjectMayHaveExtraIndexedProperties(JSObject *obj) } static bool -AddLengthProperty(ExclusiveContext *cx, HandleObject obj) +AddLengthProperty(ExclusiveContext *cx, HandleArrayObject obj) { /* * Add the 'length' property for a newly created array, @@ -867,11 +870,11 @@ AddLengthProperty(ExclusiveContext *cx, HandleObject obj) */ RootedId lengthId(cx, NameToId(cx->names().length)); - MOZ_ASSERT(!obj->nativeLookup(cx, lengthId)); + MOZ_ASSERT(!obj->lookup(cx, lengthId)); - return JSObject::addProperty(cx, obj, lengthId, array_length_getter, array_length_setter, - SHAPE_INVALID_SLOT, JSPROP_PERMANENT | JSPROP_SHARED, 0, - /* allowDictionary = */ false); + return NativeObject::addProperty(cx, obj, lengthId, array_length_getter, array_length_setter, + SHAPE_INVALID_SLOT, JSPROP_PERMANENT | JSPROP_SHARED, 0, + /* allowDictionary = */ false); } #if JS_HAS_TOSOURCE @@ -988,12 +991,12 @@ ArrayJoinKernel(JSContext *cx, SeparatorOp sepOp, HandleObject obj, uint32_t len // This loop handles all elements up to initializedLength. If // length > initLength we rely on the second loop to add the // other elements. - uint32_t initLength = obj->getDenseInitializedLength(); + uint32_t initLength = obj->as().getDenseInitializedLength(); while (i < initLength) { if (!CheckForInterrupt(cx)) return false; - const Value &elem = obj->getDenseElement(i); + const Value &elem = obj->as().getDenseElement(i); if (elem.isString()) { if (!sb.append(elem.toString())) @@ -1069,9 +1072,9 @@ js::ArrayJoin(JSContext *cx, HandleObject obj, HandleLinearString sepstr, uint32 // the 0th element is a string, ToString() of that element is a no-op and // so it can be immediately returned as the result. if (length == 1 && !Locale && obj->is() && - obj->getDenseInitializedLength() == 1) + obj->as().getDenseInitializedLength() == 1) { - const Value &elem0 = obj->getDenseElement(0); + const Value &elem0 = obj->as().getDenseElement(0); if (elem0.isString()) { return elem0.toString(); } @@ -1270,19 +1273,19 @@ InitArrayElements(JSContext *cx, HandleObject obj, uint32_t start, uint32_t coun if (ObjectMayHaveExtraIndexedProperties(obj)) break; - if (obj->shouldConvertDoubleElements()) - break; + HandleArrayObject arr = obj.as(); - Rooted arr(cx, &obj->as()); + if (arr->shouldConvertDoubleElements()) + break; if (!arr->lengthIsWritable() && start + count > arr->length()) break; - JSObject::EnsureDenseResult result = arr->ensureDenseElements(cx, start, count); - if (result != JSObject::ED_OK) { - if (result == JSObject::ED_FAILED) + NativeObject::EnsureDenseResult result = arr->ensureDenseElements(cx, start, count); + if (result != NativeObject::ED_OK) { + if (result == NativeObject::ED_FAILED) return false; - MOZ_ASSERT(result == JSObject::ED_SPARSE); + MOZ_ASSERT(result == NativeObject::ED_SPARSE); break; } @@ -1344,8 +1347,10 @@ array_reverse(JSContext *cx, unsigned argc, Value *vp) if (ObjectMayHaveExtraIndexedProperties(obj)) break; + HandleArrayObject arr = obj.as(); + /* An empty array or an array with no elements is already reversed. */ - if (len == 0 || obj->getDenseCapacity() == 0) { + if (len == 0 || arr->getDenseCapacity() == 0) { args.rval().setObject(*obj); return true; } @@ -1359,32 +1364,33 @@ array_reverse(JSContext *cx, unsigned argc, Value *vp) * holes in the array at its start) and ensure that the capacity is * sufficient to hold all the elements in the array if it were full. */ - JSObject::EnsureDenseResult result = obj->ensureDenseElements(cx, len, 0); - if (result != JSObject::ED_OK) { - if (result == JSObject::ED_FAILED) + NativeObject::EnsureDenseResult result = + arr->ensureDenseElements(cx, len, 0); + if (result != NativeObject::ED_OK) { + if (result == NativeObject::ED_FAILED) return false; - MOZ_ASSERT(result == JSObject::ED_SPARSE); + MOZ_ASSERT(result == NativeObject::ED_SPARSE); break; } /* Fill out the array's initialized length to its proper length. */ - obj->ensureDenseInitializedLength(cx, len, 0); + arr->ensureDenseInitializedLength(cx, len, 0); RootedValue origlo(cx), orighi(cx); uint32_t lo = 0, hi = len - 1; for (; lo < hi; lo++, hi--) { - origlo = obj->getDenseElement(lo); - orighi = obj->getDenseElement(hi); - obj->setDenseElement(lo, orighi); + origlo = arr->getDenseElement(lo); + orighi = arr->getDenseElement(hi); + arr->setDenseElement(lo, orighi); if (orighi.isMagic(JS_ELEMENTS_HOLE) && - !SuppressDeletedProperty(cx, obj, INT_TO_JSID(lo))) + !SuppressDeletedProperty(cx, arr, INT_TO_JSID(lo))) { return false; } - obj->setDenseElement(hi, origlo); + arr->setDenseElement(hi, origlo); if (origlo.isMagic(JS_ELEMENTS_HOLE) && - !SuppressDeletedProperty(cx, obj, INT_TO_JSID(hi))) + !SuppressDeletedProperty(cx, arr, INT_TO_JSID(hi))) { return false; } @@ -1395,7 +1401,7 @@ array_reverse(JSContext *cx, unsigned argc, Value *vp) * array has trailing holes (and thus the original array began with * holes). */ - args.rval().setObject(*obj); + args.rval().setObject(*arr); return true; } while (false); @@ -2084,13 +2090,14 @@ js::array_push(JSContext *cx, unsigned argc, Value *vp) break; uint32_t argCount = args.length(); - JSObject::EnsureDenseResult result = obj->ensureDenseElements(cx, length, argCount); - if (result == JSObject::ED_FAILED) + NativeObject::EnsureDenseResult result = + obj->as().ensureDenseElements(cx, length, argCount); + if (result == NativeObject::ED_FAILED) return false; - if (result == JSObject::ED_OK) { + if (result == NativeObject::ED_OK) { for (uint32_t i = 0, index = length; i < argCount; index++, i++) - obj->setDenseElementWithType(cx, index, args[i]); + obj->as().setDenseElementWithType(cx, index, args[i]); uint32_t newlength = length + argCount; args.rval().setNumber(newlength); if (obj->is()) { @@ -2100,7 +2107,7 @@ js::array_push(JSContext *cx, unsigned argc, Value *vp) return SetLengthProperty(cx, obj, newlength); } - MOZ_ASSERT(result == JSObject::ED_SPARSE); + MOZ_ASSERT(result == NativeObject::ED_SPARSE); } while (false); /* Steps 4-5. */ @@ -2152,10 +2159,9 @@ js::array_pop(JSContext *cx, unsigned argc, Value *vp) } void -js::ArrayShiftMoveElements(JSObject *obj) +js::ArrayShiftMoveElements(ArrayObject *obj) { - MOZ_ASSERT(obj->is()); - MOZ_ASSERT(obj->as().lengthIsWritable()); + MOZ_ASSERT(obj->lengthIsWritable()); /* * At this point the length and initialized length have already been @@ -2196,25 +2202,27 @@ js::array_shift(JSContext *cx, unsigned argc, Value *vp) uint32_t newlen = len - 1; /* Fast paths. */ - if (obj->is() && - obj->getDenseInitializedLength() > 0 && - newlen < obj->getDenseCapacity() && - !ObjectMayHaveExtraIndexedProperties(obj)) - { - args.rval().set(obj->getDenseElement(0)); - if (args.rval().isMagic(JS_ELEMENTS_HOLE)) - args.rval().setUndefined(); + if (obj->is()) { + ArrayObject *aobj = &obj->as(); + if (aobj->getDenseInitializedLength() > 0 && + newlen < aobj->getDenseCapacity() && + !ObjectMayHaveExtraIndexedProperties(aobj)) + { + args.rval().set(aobj->getDenseElement(0)); + if (args.rval().isMagic(JS_ELEMENTS_HOLE)) + args.rval().setUndefined(); - if (!obj->maybeCopyElementsForWrite(cx)) - return false; + if (!aobj->maybeCopyElementsForWrite(cx)) + return false; - obj->moveDenseElements(0, 1, obj->getDenseInitializedLength() - 1); - obj->setDenseInitializedLength(obj->getDenseInitializedLength() - 1); + aobj->moveDenseElements(0, 1, aobj->getDenseInitializedLength() - 1); + aobj->setDenseInitializedLength(aobj->getDenseInitializedLength() - 1); - if (!SetLengthProperty(cx, obj, newlen)) - return false; + if (!SetLengthProperty(cx, obj, newlen)) + return false; - return SuppressDeletedProperty(cx, obj, INT_TO_JSID(newlen)); + return SuppressDeletedProperty(cx, obj, INT_TO_JSID(newlen)); + } } /* Steps 5, 10. */ @@ -2268,18 +2276,20 @@ js::array_unshift(JSContext *cx, unsigned argc, Value *vp) break; if (ObjectMayHaveExtraIndexedProperties(obj)) break; - if (!obj->as().lengthIsWritable()) + ArrayObject *aobj = &obj->as(); + if (!aobj->lengthIsWritable()) break; - JSObject::EnsureDenseResult result = obj->ensureDenseElements(cx, length, args.length()); - if (result != JSObject::ED_OK) { - if (result == JSObject::ED_FAILED) + NativeObject::EnsureDenseResult result = + aobj->ensureDenseElements(cx, length, args.length()); + if (result != NativeObject::ED_OK) { + if (result == NativeObject::ED_FAILED) return false; - MOZ_ASSERT(result == JSObject::ED_SPARSE); + MOZ_ASSERT(result == NativeObject::ED_SPARSE); break; } - obj->moveDenseElements(args.length(), 0, length); + aobj->moveDenseElements(args.length(), 0, length); for (uint32_t i = 0; i < args.length(); i++) - obj->setDenseElement(i, MagicValue(JS_ELEMENTS_HOLE)); + aobj->setDenseElement(i, MagicValue(JS_ELEMENTS_HOLE)); optimized = true; } while (false); @@ -2374,7 +2384,7 @@ CanOptimizeForDenseStorage(HandleObject arr, uint32_t startingIndex, uint32_t co * is subsumed by the initializedLength comparison.) */ return !ObjectMayHaveExtraIndexedProperties(arr) && - startingIndex + count <= arr->getDenseInitializedLength(); + startingIndex + count <= arr->as().getDenseInitializedLength(); } /* ES5 15.4.4.12. */ @@ -2433,7 +2443,7 @@ js::array_splice_impl(JSContext *cx, unsigned argc, Value *vp, bool returnValueI Rooted arr(cx); if (CanOptimizeForDenseStorage(obj, actualStart, actualDeleteCount, cx)) { if (returnValueIsUsed) { - arr = NewDenseCopiedArray(cx, actualDeleteCount, obj, actualStart); + arr = NewDenseCopiedArray(cx, actualDeleteCount, obj.as(), actualStart); if (!arr) return false; TryReuseArrayType(obj, arr); @@ -2466,20 +2476,22 @@ js::array_splice_impl(JSContext *cx, unsigned argc, Value *vp, bool returnValueI uint32_t finalLength = len - actualDeleteCount + itemCount; if (CanOptimizeForDenseStorage(obj, 0, len, cx)) { - if (!obj->maybeCopyElementsForWrite(cx)) + ArrayObject *aobj = &obj->as(); + + if (!aobj->maybeCopyElementsForWrite(cx)) return false; /* Steps 12(a)-(b). */ - obj->moveDenseElements(targetIndex, sourceIndex, len - sourceIndex); + aobj->moveDenseElements(targetIndex, sourceIndex, len - sourceIndex); /* * Update the initialized length. Do so before shrinking so that we * can apply the write barrier to the old slots. */ - obj->setDenseInitializedLength(finalLength); + aobj->setDenseInitializedLength(finalLength); /* Steps 12(c)-(d). */ - obj->shrinkElements(cx, finalLength); + aobj->shrinkElements(cx, finalLength); } else { /* * This is all very slow if the length is very large. We don't yet @@ -2542,20 +2554,21 @@ js::array_splice_impl(JSContext *cx, unsigned argc, Value *vp, bool returnValueI if (obj->is()) { Rooted arr(cx, &obj->as()); if (arr->lengthIsWritable()) { - JSObject::EnsureDenseResult res = + NativeObject::EnsureDenseResult res = arr->ensureDenseElements(cx, arr->length(), itemCount - actualDeleteCount); - if (res == JSObject::ED_FAILED) + if (res == NativeObject::ED_FAILED) return false; } } if (CanOptimizeForDenseStorage(obj, len, itemCount - actualDeleteCount, cx)) { - if (!obj->maybeCopyElementsForWrite(cx)) + ArrayObject *aobj = &obj->as(); + if (!aobj->maybeCopyElementsForWrite(cx)) return false; - obj->moveDenseElements(actualStart + itemCount, - actualStart + actualDeleteCount, - len - (actualStart + actualDeleteCount)); - obj->setDenseInitializedLength(len + itemCount - actualDeleteCount); + aobj->moveDenseElements(actualStart + itemCount, + actualStart + actualDeleteCount, + len - (actualStart + actualDeleteCount)); + aobj->setDenseInitializedLength(len + itemCount - actualDeleteCount); } else { RootedValue fromValue(cx); for (double k = len - actualDeleteCount; k > actualStart; k--) { @@ -2646,8 +2659,8 @@ js::array_concat(JSContext *cx, unsigned argc, Value *vp) uint32_t length; if (aobj->is() && !aobj->isIndexed()) { length = aobj->as().length(); - uint32_t initlen = aobj->getDenseInitializedLength(); - narr = NewDenseCopiedArray(cx, initlen, aobj, 0); + uint32_t initlen = aobj->as().getDenseInitializedLength(); + narr = NewDenseCopiedArray(cx, initlen, aobj.as(), 0); if (!narr) return false; TryReuseArrayType(aobj, narr); @@ -2754,11 +2767,12 @@ js::array_slice(JSContext *cx, unsigned argc, Value *vp) TryReuseArrayType(obj, narr); if (obj->is() && !ObjectMayHaveExtraIndexedProperties(obj)) { - if (obj->getDenseInitializedLength() > begin) { - uint32_t numSourceElements = obj->getDenseInitializedLength() - begin; + ArrayObject *aobj = &obj->as(); + if (aobj->getDenseInitializedLength() > begin) { + uint32_t numSourceElements = aobj->getDenseInitializedLength() - begin; uint32_t initLength = Min(numSourceElements, end - begin); narr->setDenseInitializedLength(initLength); - narr->initDenseElements(0, &obj->getDenseElement(begin), initLength); + narr->initDenseElements(0, &aobj->getDenseElement(begin), initLength); } args.rval().setObject(*narr); return true; @@ -2766,11 +2780,11 @@ js::array_slice(JSContext *cx, unsigned argc, Value *vp) if (js::SliceOp op = obj->getOps()->slice) { // Ensure that we have dense elements, so that DOM can use js::UnsafeDefineElement. - JSObject::EnsureDenseResult result = narr->ensureDenseElements(cx, 0, end - begin); - if (result == JSObject::ED_FAILED) + NativeObject::EnsureDenseResult result = narr->ensureDenseElements(cx, 0, end - begin); + if (result == NativeObject::ED_FAILED) return false; - if (result == JSObject::ED_OK) { + if (result == NativeObject::ED_OK) { if (!op(cx, obj, begin, end, narr)) return false; @@ -2779,7 +2793,7 @@ js::array_slice(JSContext *cx, unsigned argc, Value *vp) } // Fallthrough - MOZ_ASSERT(result == JSObject::ED_SPARSE); + MOZ_ASSERT(result == NativeObject::ED_SPARSE); } @@ -3114,9 +3128,14 @@ CreateArrayPrototype(JSContext *cx, JSProtoKey key) if (!shape) return nullptr; - RootedObject arrayProto(cx, JSObject::createArray(cx, gc::FINALIZE_OBJECT4, gc::TenuredHeap, shape, type, 0)); - if (!arrayProto || !JSObject::setSingletonType(cx, arrayProto) || !AddLengthProperty(cx, arrayProto)) + RootedArrayObject arrayProto(cx, ArrayObject::createArray(cx, gc::FINALIZE_OBJECT4, + gc::TenuredHeap, shape, type, 0)); + if (!arrayProto || + !JSObject::setSingletonType(cx, arrayProto) || + !AddLengthProperty(cx, arrayProto)) + { return nullptr; + } /* * The default 'new' type of Array.prototype is required by type inference @@ -3158,7 +3177,7 @@ const Class ArrayObject::class_ = { */ static inline bool -EnsureNewArrayElements(ExclusiveContext *cx, JSObject *obj, uint32_t length) +EnsureNewArrayElements(ExclusiveContext *cx, ArrayObject *obj, uint32_t length) { /* * If ensureElements creates dynamically allocated slots, then having @@ -3237,7 +3256,7 @@ NewArray(ExclusiveContext *cxArg, uint32_t length, if (!shape) return nullptr; - Rooted arr(cxArg, JSObject::createArray(cxArg, allocKind, + RootedArrayObject arr(cxArg, ArrayObject::createArray(cxArg, allocKind, GetInitialHeap(newKind, &ArrayObject::class_), shape, type, length)); if (!arr) @@ -3277,7 +3296,7 @@ js::NewDenseFullyAllocatedArray(ExclusiveContext *cx, uint32_t length, JSObject *proto /* = nullptr */, NewObjectKind newKind /* = GenericObject */) { - return NewArray(cx, length, proto, newKind); + return NewArray(cx, length, proto, newKind); } ArrayObject * JS_FASTCALL @@ -3327,12 +3346,12 @@ js::NewDenseArray(ExclusiveContext *cx, uint32_t length, HandleTypeObject type, } ArrayObject * -js::NewDenseCopiedArray(JSContext *cx, uint32_t length, HandleObject src, uint32_t elementOffset, - JSObject *proto /* = nullptr */) +js::NewDenseCopiedArray(JSContext *cx, uint32_t length, HandleArrayObject src, + uint32_t elementOffset, JSObject *proto /* = nullptr */) { MOZ_ASSERT(!src->isIndexed()); - ArrayObject* arr = NewArray(cx, length, proto); + ArrayObject *arr = NewArray(cx, length, proto); if (!arr) return nullptr; @@ -3352,7 +3371,7 @@ ArrayObject * js::NewDenseCopiedArray(JSContext *cx, uint32_t length, const Value *values, JSObject *proto /* = nullptr */, NewObjectKind newKind /* = GenericObject */) { - ArrayObject* arr = NewArray(cx, length, proto); + ArrayObject *arr = NewArray(cx, length, proto); if (!arr) return nullptr; @@ -3377,7 +3396,8 @@ js::NewDenseFullyAllocatedArrayWithTemplate(JSContext *cx, uint32_t length, JSOb RootedShape shape(cx, templateObject->lastProperty()); gc::InitialHeap heap = GetInitialHeap(GenericObject, &ArrayObject::class_); - Rooted arr(cx, JSObject::createArray(cx, allocKind, heap, shape, type, length)); + Rooted arr(cx, ArrayObject::createArray(cx, allocKind, + heap, shape, type, length)); if (!arr) return nullptr; @@ -3390,7 +3410,7 @@ js::NewDenseFullyAllocatedArrayWithTemplate(JSContext *cx, uint32_t length, JSOb } JSObject * -js::NewDenseCopyOnWriteArray(JSContext *cx, HandleObject templateObject, gc::InitialHeap heap) +js::NewDenseCopyOnWriteArray(JSContext *cx, HandleNativeObject templateObject, gc::InitialHeap heap) { RootedShape shape(cx, templateObject->lastProperty()); @@ -3405,7 +3425,7 @@ js::NewDenseCopyOnWriteArray(JSContext *cx, HandleObject templateObject, gc::Ini return nullptr; } - Rooted arr(cx, JSObject::createCopyOnWriteArray(cx, heap, shape, templateObject)); + ArrayObject *arr = ArrayObject::createCopyOnWriteArray(cx, heap, shape, templateObject); if (!arr) return nullptr; @@ -3433,7 +3453,7 @@ js_ArrayInfo(JSContext *cx, unsigned argc, Value *vp) continue; } fprintf(stderr, "%s: (len %u", bytes, obj->as().length()); - fprintf(stderr, ", capacity %u", obj->getDenseCapacity()); + fprintf(stderr, ", capacity %u", obj->as().getDenseCapacity()); fputs(")\n", stderr); js_free(bytes); } diff --git a/js/src/jsarray.h b/js/src/jsarray.h index 762e74e55e1..efb79d9d3bf 100644 --- a/js/src/jsarray.h +++ b/js/src/jsarray.h @@ -85,7 +85,8 @@ NewDenseArray(ExclusiveContext *cx, uint32_t length, HandleTypeObject type, /* Create a dense array with a copy of the dense array elements in src. */ extern ArrayObject * -NewDenseCopiedArray(JSContext *cx, uint32_t length, HandleObject src, uint32_t elementOffset, JSObject *proto = nullptr); +NewDenseCopiedArray(JSContext *cx, uint32_t length, HandleArrayObject src, + uint32_t elementOffset, JSObject *proto = nullptr); /* Create a dense array from the given array values, which must be rooted */ extern ArrayObject * @@ -98,7 +99,7 @@ NewDenseFullyAllocatedArrayWithTemplate(JSContext *cx, uint32_t length, JSObject /* Create a dense array with the same copy-on-write elements as another object. */ extern JSObject * -NewDenseCopyOnWriteArray(JSContext *cx, HandleObject templateObject, gc::InitialHeap heap); +NewDenseCopyOnWriteArray(JSContext *cx, HandleNativeObject templateObject, gc::InitialHeap heap); /* * Determines whether a write to the given element on |obj| should fail because @@ -174,7 +175,7 @@ extern JSString * array_join_impl(JSContext *cx, HandleValue array, HandleString sep); extern void -ArrayShiftMoveElements(JSObject *obj); +ArrayShiftMoveElements(ArrayObject *obj); extern bool array_shift(JSContext *cx, unsigned argc, js::Value *vp); diff --git a/js/src/jsbool.cpp b/js/src/jsbool.cpp index 266ba60ec5d..9c52d81d90c 100644 --- a/js/src/jsbool.cpp +++ b/js/src/jsbool.cpp @@ -140,7 +140,7 @@ js_InitBooleanClass(JSContext *cx, HandleObject obj) Rooted global(cx, &obj->as()); - RootedObject booleanProto (cx, global->createBlankPrototype(cx, &BooleanObject::class_)); + RootedNativeObject booleanProto(cx, global->createBlankPrototype(cx, &BooleanObject::class_)); if (!booleanProto) return nullptr; booleanProto->setFixedSlot(BooleanObject::PRIMITIVE_VALUE_SLOT, BooleanValue(false)); diff --git a/js/src/jscntxt.cpp b/js/src/jscntxt.cpp index f765d87cbc3..aa6a67adeee 100644 --- a/js/src/jscntxt.cpp +++ b/js/src/jscntxt.cpp @@ -524,7 +524,7 @@ js::ReportUsageError(JSContext *cx, HandleObject callee, const char *msg) const char *usageStr = "usage"; PropertyName *usageAtom = Atomize(cx, usageStr, strlen(usageStr))->asPropertyName(); RootedId id(cx, NameToId(usageAtom)); - DebugOnly shape = static_cast(callee->nativeLookup(cx, id)); + DebugOnly shape = static_cast(callee->as().lookup(cx, id)); MOZ_ASSERT(!shape->configurable()); MOZ_ASSERT(!shape->writable()); MOZ_ASSERT(shape->hasDefaultGetter()); diff --git a/js/src/jscntxt.h b/js/src/jscntxt.h index afd0425f90f..d044b1dca16 100644 --- a/js/src/jscntxt.h +++ b/js/src/jscntxt.h @@ -161,9 +161,9 @@ struct ThreadSafeContext : ContextFriendFields, friend class Activation; friend UnownedBaseShape *BaseShape::lookupUnowned(ThreadSafeContext *cx, const StackBaseShape &base); - friend Shape *JSObject::lookupChildProperty(ThreadSafeContext *cx, - JS::HandleObject obj, js::HandleShape parent, - js::StackShape &child); + friend Shape *NativeObject::lookupChildProperty(ThreadSafeContext *cx, + HandleNativeObject obj, HandleShape parent, + StackShape &child); public: enum ContextKind { diff --git a/js/src/jsdate.cpp b/js/src/jsdate.cpp index e9e18914aad..50e3824c759 100644 --- a/js/src/jsdate.cpp +++ b/js/src/jsdate.cpp @@ -3009,8 +3009,8 @@ FinishDateClassInit(JSContext *cx, HandleObject ctor, HandleObject proto) RootedValue toUTCStringFun(cx); RootedId toUTCStringId(cx, NameToId(cx->names().toUTCString)); RootedId toGMTStringId(cx, NameToId(cx->names().toGMTString)); - return baseops::GetProperty(cx, proto, toUTCStringId, &toUTCStringFun) && - baseops::DefineGeneric(cx, proto, toGMTStringId, toUTCStringFun, + return baseops::GetProperty(cx, proto.as(), toUTCStringId, &toUTCStringFun) && + baseops::DefineGeneric(cx, proto.as(), toGMTStringId, toUTCStringFun, JS_PropertyStub, JS_StrictPropertyStub, 0); } diff --git a/js/src/jsfriendapi.cpp b/js/src/jsfriendapi.cpp index e042fa3a642..507f1c1d15c 100644 --- a/js/src/jsfriendapi.cpp +++ b/js/src/jsfriendapi.cpp @@ -29,6 +29,7 @@ #include "jsobjinlines.h" #include "jsscriptinlines.h" +#include "vm/ObjectImpl-inl.h" #include "vm/ScopeObject-inl.h" using namespace js; @@ -434,7 +435,7 @@ js::NotifyAnimationActivity(JSObject *obj) JS_FRIEND_API(uint32_t) js::GetObjectSlotSpan(JSObject *obj) { - return obj->slotSpan(); + return obj->fakeNativeSlotSpan(); } JS_FRIEND_API(bool) @@ -556,7 +557,7 @@ js::GetOriginalEval(JSContext *cx, HandleObject scope, MutableHandleObject eval) JS_FRIEND_API(void) js::SetReservedSlotWithBarrier(JSObject *obj, size_t slot, const js::Value &value) { - obj->setSlot(slot, value); + obj->fakeNativeSetSlot(slot, value); } JS_FRIEND_API(bool) @@ -1384,8 +1385,8 @@ JS_FRIEND_API(void) js::UnsafeDefineElement(JSContext *cx, JS::HandleObject obj, uint32_t index, JS::HandleValue value) { MOZ_ASSERT(obj->isNative()); - MOZ_ASSERT(index < obj->getDenseInitializedLength()); - obj->setDenseElementWithType(cx, index, value); + MOZ_ASSERT(index < obj->as().getDenseInitializedLength()); + obj->as().setDenseElementWithType(cx, index, value); } JS_FRIEND_API(bool) diff --git a/js/src/jsfun.cpp b/js/src/jsfun.cpp index c971f05a393..89c9c23ad64 100644 --- a/js/src/jsfun.cpp +++ b/js/src/jsfun.cpp @@ -508,7 +508,7 @@ js::fun_resolve(JSContext *cx, HandleObject obj, HandleId id, MutableHandleObjec template bool js::XDRInterpretedFunction(XDRState *xdr, HandleObject enclosingScope, HandleScript enclosingScript, - MutableHandleObject objp) + MutableHandleFunction objp) { enum FirstWordFlag { HasAtom = 0x1, @@ -528,7 +528,7 @@ js::XDRInterpretedFunction(XDRState *xdr, HandleObject enclosingScope, Han Rooted lazy(cx); if (mode == XDR_ENCODE) { - fun = &objp->as(); + fun = objp; if (!fun->isInterpreted()) { JSAutoByteString funNameBytes; if (const char *name = GetFunctionNameBytes(cx, fun, &funNameBytes)) { @@ -629,10 +629,10 @@ js::XDRInterpretedFunction(XDRState *xdr, HandleObject enclosingScope, Han } template bool -js::XDRInterpretedFunction(XDRState *, HandleObject, HandleScript, MutableHandleObject); +js::XDRInterpretedFunction(XDRState *, HandleObject, HandleScript, MutableHandleFunction); template bool -js::XDRInterpretedFunction(XDRState *, HandleObject, HandleScript, MutableHandleObject); +js::XDRInterpretedFunction(XDRState *, HandleObject, HandleScript, MutableHandleFunction); JSObject * js::CloneFunctionAndScript(JSContext *cx, HandleObject enclosingScope, HandleFunction srcFun) @@ -1329,7 +1329,7 @@ JSFunction::initBoundFunction(JSContext *cx, HandleValue thisArg, if (!self->setFlag(cx, BaseShape::BOUND_FUNCTION)) return false; - if (!JSObject::setSlotSpan(cx, self, BOUND_FUNCTION_RESERVED_SLOTS + argslen)) + if (!NativeObject::setSlotSpan(cx, self, BOUND_FUNCTION_RESERVED_SLOTS + argslen)) return false; self->setSlot(JSSLOT_BOUND_FUNCTION_THIS, thisArg); diff --git a/js/src/jsfun.h b/js/src/jsfun.h index ce4207f30d8..82360b3fac1 100644 --- a/js/src/jsfun.h +++ b/js/src/jsfun.h @@ -25,7 +25,7 @@ typedef JSThreadSafeNative ThreadSafeNative; struct JSAtomState; -class JSFunction : public JSObject +class JSFunction : public js::NativeObject { public: static const js::Class class_; @@ -631,7 +631,7 @@ JSString *FunctionToString(JSContext *cx, HandleFunction fun, bool bodyOnly, boo template bool XDRInterpretedFunction(XDRState *xdr, HandleObject enclosingScope, - HandleScript enclosingScript, MutableHandleObject objp); + HandleScript enclosingScript, MutableHandleFunction objp); extern JSObject * CloneFunctionAndScript(JSContext *cx, HandleObject enclosingScope, HandleFunction fun); diff --git a/js/src/jsgc.cpp b/js/src/jsgc.cpp index 2ef3d8faec4..ed9dde2ddf8 100644 --- a/js/src/jsgc.cpp +++ b/js/src/jsgc.cpp @@ -4379,7 +4379,7 @@ static void AssertNotOnGrayList(JSObject *obj) { MOZ_ASSERT_IF(IsGrayListObject(obj), - obj->getReservedSlot(ProxyObject::grayLinkSlot(obj)).isUndefined()); + obj->fakeNativeGetReservedSlot(ProxyObject::grayLinkSlot(obj)).isUndefined()); } #endif @@ -4394,11 +4394,11 @@ static JSObject * NextIncomingCrossCompartmentPointer(JSObject *prev, bool unlink) { unsigned slot = ProxyObject::grayLinkSlot(prev); - JSObject *next = prev->getReservedSlot(slot).toObjectOrNull(); + JSObject *next = prev->fakeNativeGetReservedSlot(slot).toObjectOrNull(); MOZ_ASSERT_IF(next, IsGrayListObject(next)); if (unlink) - prev->setSlot(slot, UndefinedValue()); + prev->fakeNativeSetSlot(slot, UndefinedValue()); return next; } @@ -4413,11 +4413,11 @@ js::DelayCrossCompartmentGrayMarking(JSObject *src) JSObject *dest = CrossCompartmentPointerReferent(src); JSCompartment *comp = dest->compartment(); - if (src->getReservedSlot(slot).isUndefined()) { - src->setCrossCompartmentSlot(slot, ObjectOrNullValue(comp->gcIncomingGrayPointers)); + if (src->fakeNativeGetReservedSlot(slot).isUndefined()) { + src->fakeNativeSetCrossCompartmentSlot(slot, ObjectOrNullValue(comp->gcIncomingGrayPointers)); comp->gcIncomingGrayPointers = src; } else { - MOZ_ASSERT(src->getReservedSlot(slot).isObjectOrNull()); + MOZ_ASSERT(src->fakeNativeGetReservedSlot(slot).isObjectOrNull()); } #ifdef DEBUG @@ -4487,11 +4487,11 @@ RemoveFromGrayList(JSObject *wrapper) return false; unsigned slot = ProxyObject::grayLinkSlot(wrapper); - if (wrapper->getReservedSlot(slot).isUndefined()) + if (wrapper->fakeNativeGetReservedSlot(slot).isUndefined()) return false; /* Not on our list. */ - JSObject *tail = wrapper->getReservedSlot(slot).toObjectOrNull(); - wrapper->setReservedSlot(slot, UndefinedValue()); + JSObject *tail = wrapper->fakeNativeGetReservedSlot(slot).toObjectOrNull(); + wrapper->fakeNativeSetReservedSlot(slot, UndefinedValue()); JSCompartment *comp = CrossCompartmentPointerReferent(wrapper)->compartment(); JSObject *obj = comp->gcIncomingGrayPointers; @@ -4502,9 +4502,9 @@ RemoveFromGrayList(JSObject *wrapper) while (obj) { unsigned slot = ProxyObject::grayLinkSlot(obj); - JSObject *next = obj->getReservedSlot(slot).toObjectOrNull(); + JSObject *next = obj->fakeNativeGetReservedSlot(slot).toObjectOrNull(); if (next == wrapper) { - obj->setCrossCompartmentSlot(slot, ObjectOrNullValue(tail)); + obj->fakeNativeSetCrossCompartmentSlot(slot, ObjectOrNullValue(tail)); return true; } obj = next; diff --git a/js/src/jsgc.h b/js/src/jsgc.h index 040c5793c23..abd69e03f70 100644 --- a/js/src/jsgc.h +++ b/js/src/jsgc.h @@ -15,12 +15,13 @@ #include "mozilla/TypeTraits.h" #include "jslock.h" -#include "jsobj.h" #include "js/GCAPI.h" #include "js/SliceBudget.h" #include "js/Vector.h" +#include "vm/ObjectImpl.h" + namespace js { namespace gc { @@ -215,7 +216,7 @@ GetGCArrayKind(size_t numSlots) * unused. */ JS_STATIC_ASSERT(ObjectElements::VALUES_PER_HEADER == 2); - if (numSlots > JSObject::NELEMENTS_LIMIT || numSlots + 2 >= SLOTS_TO_THING_KIND_LIMIT) + if (numSlots > NativeObject::NELEMENTS_LIMIT || numSlots + 2 >= SLOTS_TO_THING_KIND_LIMIT) return FINALIZE_OBJECT2; return slotsToThingKind[numSlots + 2]; } @@ -1187,7 +1188,7 @@ class RelocationOverlay static const uintptr_t Relocated = uintptr_t(0xbad0bad1); // Putting the magic value after the forwarding pointer is a terrible hack - // to make ObjectImpl::zone() work on forwarded objects. + // to make JSObject::zone() work on forwarded objects. /* The location |this| was moved to. */ Cell *newLocation_; @@ -1214,7 +1215,7 @@ class RelocationOverlay void forwardTo(Cell *cell) { MOZ_ASSERT(!isForwarded()); - MOZ_ASSERT(ObjectImpl::offsetOfShape() == offsetof(RelocationOverlay, newLocation_)); + MOZ_ASSERT(JSObject::offsetOfShape() == offsetof(RelocationOverlay, newLocation_)); newLocation_ = cell; magic_ = Relocated; next_ = nullptr; diff --git a/js/src/jsgcinlines.h b/js/src/jsgcinlines.h index c32d7771eb5..b0733ee0327 100644 --- a/js/src/jsgcinlines.h +++ b/js/src/jsgcinlines.h @@ -606,7 +606,7 @@ AllocateObject(ThreadSafeContext *cx, AllocKind kind, size_t nDynamicSlots, Init obj = static_cast(js::gc::ArenaLists::refillFreeList(cx, kind)); if (obj) - obj->setInitialSlots(slots); + obj->fakeNativeSetInitialSlots(slots); else js_free(slots); diff --git a/js/src/jsinfer.cpp b/js/src/jsinfer.cpp index 2ba42ff1f9f..af06d97f9a2 100644 --- a/js/src/jsinfer.cpp +++ b/js/src/jsinfer.cpp @@ -36,6 +36,7 @@ #include "jsscriptinlines.h" #include "jit/ExecutionMode-inl.h" +#include "vm/ObjectImpl-inl.h" using namespace js; using namespace js::gc; @@ -1036,7 +1037,7 @@ TypeObjectKey::proto() } bool -ObjectImpl::hasTenuredProto() const +JSObject::hasTenuredProto() const { return type_->hasTenuredProto(); } @@ -1102,7 +1103,7 @@ TypeObjectKey::ensureTrackedProperty(JSContext *cx, jsid id) if (!JSID_IS_VOID(id) && !JSID_IS_EMPTY(id)) { MOZ_ASSERT(CurrentThreadCanAccessRuntime(cx->runtime())); if (JSObject *obj = singleton()) { - if (obj->isNative() && obj->nativeLookupPure(id)) + if (obj->isNative() && obj->as().containsPure(id)) EnsureTrackPropertyTypes(cx, obj, id); } } @@ -1822,11 +1823,11 @@ HeapTypeSetKey::constant(CompilerConstraintList *constraints, Value *valOut) return false; // Get the current value of the property. - Shape *shape = obj->nativeLookupPure(id()); + Shape *shape = obj->as().lookupPure(id()); if (!shape || !shape->hasDefaultGetter() || !shape->hasSlot() || shape->hadOverwrite()) return false; - Value val = obj->nativeGetSlot(shape->slot()); + Value val = obj->as().getSlot(shape->slot()); // If the value is a pointer to an object in the nursery, don't optimize. if (val.isGCThing() && IsInsideNursery(val.toGCThing())) @@ -2615,7 +2616,7 @@ TypeCompartment::setTypeToHomogenousArray(ExclusiveContext *cx, } void -TypeCompartment::fixArrayType(ExclusiveContext *cx, JSObject *obj) +TypeCompartment::fixArrayType(ExclusiveContext *cx, ArrayObject *obj) { AutoEnterAnalysis enter(cx); @@ -2625,7 +2626,6 @@ TypeCompartment::fixArrayType(ExclusiveContext *cx, JSObject *obj) * If the array is heterogenous, keep the existing type object, which has * unknown properties. */ - MOZ_ASSERT(obj->is()); unsigned len = obj->getDenseInitializedLength(); if (len == 0) @@ -2647,13 +2647,13 @@ TypeCompartment::fixArrayType(ExclusiveContext *cx, JSObject *obj) } void -types::FixRestArgumentsType(ExclusiveContext *cx, JSObject *obj) +types::FixRestArgumentsType(ExclusiveContext *cx, ArrayObject *obj) { cx->compartment()->types.fixRestArgumentsType(cx, obj); } void -TypeCompartment::fixRestArgumentsType(ExclusiveContext *cx, JSObject *obj) +TypeCompartment::fixRestArgumentsType(ExclusiveContext *cx, ArrayObject *obj) { AutoEnterAnalysis enter(cx); @@ -2661,8 +2661,6 @@ TypeCompartment::fixRestArgumentsType(ExclusiveContext *cx, JSObject *obj) * Tracking element types for rest argument arrays is not worth it, but we * still want it to be known that it's a dense array. */ - MOZ_ASSERT(obj->is()); - setTypeToHomogenousArray(cx, obj, Type::UnknownType()); } @@ -2740,7 +2738,7 @@ UpdateObjectTableEntryTypes(ExclusiveContext *cx, ObjectTableEntry &entry, } void -TypeCompartment::fixObjectType(ExclusiveContext *cx, JSObject *obj) +TypeCompartment::fixObjectType(ExclusiveContext *cx, NativeObject *obj) { AutoEnterAnalysis enter(cx); @@ -2874,7 +2872,7 @@ TypeCompartment::newTypedObject(JSContext *cx, IdValuePair *properties, size_t n if (!p) return nullptr; - RootedObject obj(cx, NewBuiltinClassInstance(cx, &JSObject::class_, allocKind)); + RootedNativeObject obj(cx, NewNativeBuiltinClassInstance(cx, &JSObject::class_, allocKind)); if (!obj) { cx->clearPendingException(); return nullptr; @@ -2882,7 +2880,7 @@ TypeCompartment::newTypedObject(JSContext *cx, IdValuePair *properties, size_t n MOZ_ASSERT(obj->getProto() == p->value().object->proto().toObject()); RootedShape shape(cx, p->value().shape); - if (!JSObject::setLastProperty(cx, obj, shape)) { + if (!NativeObject::setLastProperty(cx, obj, shape)) { cx->clearPendingException(); return nullptr; } @@ -2912,7 +2910,7 @@ TypeObject::setProto(JSContext *cx, TaggedProto proto) } static inline void -UpdatePropertyType(ExclusiveContext *cx, HeapTypeSet *types, JSObject *obj, Shape *shape, +UpdatePropertyType(ExclusiveContext *cx, HeapTypeSet *types, NativeObject *obj, Shape *shape, bool indexed) { MOZ_ASSERT(obj->hasSingletonType() && !obj->hasLazyType()); @@ -2927,7 +2925,7 @@ UpdatePropertyType(ExclusiveContext *cx, HeapTypeSet *types, JSObject *obj, Shap if (!indexed && types->canSetDefinite(shape->slot())) types->setDefinite(shape->slot()); - const Value &value = obj->nativeGetSlot(shape->slot()); + const Value &value = obj->getSlot(shape->slot()); /* * Don't add initial undefined types for properties of global objects @@ -2961,6 +2959,8 @@ TypeObject::updateNewPropertyTypes(ExclusiveContext *cx, jsid id, HeapTypeSet *t return; } + NativeObject *obj = &singleton()->as(); + /* * Fill the property in with any type the object already has in an own * property. We are only interested in plain native properties and @@ -2970,16 +2970,16 @@ TypeObject::updateNewPropertyTypes(ExclusiveContext *cx, jsid id, HeapTypeSet *t if (JSID_IS_VOID(id)) { /* Go through all shapes on the object to get integer-valued properties. */ - RootedShape shape(cx, singleton()->lastProperty()); + RootedShape shape(cx, obj->lastProperty()); while (!shape->isEmptyShape()) { if (JSID_IS_VOID(IdToTypeId(shape->propid()))) - UpdatePropertyType(cx, types, singleton(), shape, true); + UpdatePropertyType(cx, types, obj, shape, true); shape = shape->previous(); } /* Also get values of any dense elements in the object. */ - for (size_t i = 0; i < singleton()->getDenseInitializedLength(); i++) { - const Value &value = singleton()->getDenseElement(i); + for (size_t i = 0; i < obj->getDenseInitializedLength(); i++) { + const Value &value = obj->getDenseElement(i); if (!value.isMagic(JS_ELEMENTS_HOLE)) { Type type = GetValueType(value); types->TypeSet::addType(type, &cx->typeLifoAlloc()); @@ -2987,12 +2987,12 @@ TypeObject::updateNewPropertyTypes(ExclusiveContext *cx, jsid id, HeapTypeSet *t } } else if (!JSID_IS_EMPTY(id)) { RootedId rootedId(cx, id); - Shape *shape = singleton()->nativeLookup(cx, rootedId); + Shape *shape = obj->lookup(cx, rootedId); if (shape) - UpdatePropertyType(cx, types, singleton(), shape, false); + UpdatePropertyType(cx, types, obj, shape, false); } - if (singleton()->watched()) { + if (obj->watched()) { /* * Mark the property as non-data, to inhibit optimizations on it * and avoid bypassing the watchpoint handler. @@ -3014,7 +3014,7 @@ TypeObject::addDefiniteProperties(ExclusiveContext *cx, Shape *shape) jsid id = IdToTypeId(shape->propid()); if (!JSID_IS_VOID(id)) { MOZ_ASSERT_IF(shape->slot() >= shape->numFixedSlots(), - shape->numFixedSlots() == JSObject::MAX_FIXED_SLOTS); + shape->numFixedSlots() == NativeObject::MAX_FIXED_SLOTS); TypeSet *types = getProperty(cx, id); if (!types) return false; @@ -3322,7 +3322,8 @@ TypeObject::print() if (newScript()) { if (newScript()->analyzed()) { - fprintf(stderr, "\n newScript %d properties", (int) newScript()->templateObject()->slotSpan()); + fprintf(stderr, "\n newScript %d properties", + (int) newScript()->templateObject()->slotSpan()); if (newScript()->initializedType()) { fprintf(stderr, " initializedType %p with %d properties", newScript()->initializedType(), (int) newScript()->initializedShape()->slotSpan()); @@ -3530,13 +3531,12 @@ types::FillBytecodeTypeMap(JSScript *script, uint32_t *bytecodeMap) MOZ_ASSERT(added == script->nTypeSets()); } -JSObject * +ArrayObject * types::GetOrFixupCopyOnWriteObject(JSContext *cx, HandleScript script, jsbytecode *pc) { // Make sure that the template object for script/pc has a type indicating // that the object and its copies have copy on write elements. - RootedObject obj(cx, script->getObject(GET_UINT32_INDEX(pc))); - MOZ_ASSERT(obj->is()); + RootedArrayObject obj(cx, &script->getObject(GET_UINT32_INDEX(pc))->as()); MOZ_ASSERT(obj->denseElementsAreCopyOnWrite()); if (obj->type()->fromAllocationSite()) { @@ -3561,7 +3561,7 @@ types::GetOrFixupCopyOnWriteObject(JSContext *cx, HandleScript script, jsbytecod return obj; } -JSObject * +ArrayObject * types::GetCopyOnWriteObject(JSScript *script, jsbytecode *pc) { // GetOrFixupCopyOnWriteObject should already have been called for @@ -3569,8 +3569,7 @@ types::GetCopyOnWriteObject(JSScript *script, jsbytecode *pc) // COPY_ON_WRITE flag. We don't assert this here, due to a corner case // where this property doesn't hold. See jsop_newarray_copyonwrite in // IonBuilder. - JSObject *obj = script->getObject(GET_UINT32_INDEX(pc)); - MOZ_ASSERT(obj->is()); + ArrayObject *obj = &script->getObject(GET_UINT32_INDEX(pc))->as(); MOZ_ASSERT(obj->denseElementsAreCopyOnWrite()); return obj; @@ -3739,7 +3738,8 @@ TypeNewScript::make(JSContext *cx, TypeObject *type, JSFunction *fun) newScript->fun = fun; - JSObject **preliminaryObjects = type->zone()->pod_calloc(PRELIMINARY_OBJECT_COUNT); + NativeObject **preliminaryObjects = + type->zone()->pod_calloc(PRELIMINARY_OBJECT_COUNT); if (!preliminaryObjects) return; @@ -3750,7 +3750,7 @@ TypeNewScript::make(JSContext *cx, TypeObject *type, JSFunction *fun) } void -TypeNewScript::registerNewObject(JSObject *res) +TypeNewScript::registerNewObject(NativeObject *res) { MOZ_ASSERT(!analyzed()); @@ -3762,7 +3762,7 @@ TypeNewScript::registerNewObject(JSObject *res) // New script objects must have the maximum number of fixed slots, so that // we can adjust their shape later to match the number of fixed slots used // by the template object we eventually create. - MOZ_ASSERT(res->numFixedSlots() == JSObject::MAX_FIXED_SLOTS); + MOZ_ASSERT(res->numFixedSlots() == NativeObject::MAX_FIXED_SLOTS); for (size_t i = 0; i < PRELIMINARY_OBJECT_COUNT; i++) { if (!preliminaryObjects[i]) { @@ -3775,7 +3775,7 @@ TypeNewScript::registerNewObject(JSObject *res) } void -TypeNewScript::unregisterNewObject(JSObject *res) +TypeNewScript::unregisterNewObject(NativeObject *res) { MOZ_ASSERT(!analyzed()); @@ -3833,7 +3833,7 @@ CommonPrefix(Shape *first, Shape *second) } static bool -ChangeObjectFixedSlotCount(JSContext *cx, JSObject *obj, gc::AllocKind allocKind) +ChangeObjectFixedSlotCount(JSContext *cx, NativeObject *obj, gc::AllocKind allocKind) { MOZ_ASSERT(OnlyHasDataProperties(obj->lastProperty())); @@ -3901,7 +3901,7 @@ TypeNewScript::maybeAnalyze(JSContext *cx, TypeObject *type, bool *regenerate, b Shape *prefixShape = nullptr; size_t maxSlotSpan = 0; for (size_t i = 0; i < PRELIMINARY_OBJECT_COUNT; i++) { - JSObject *obj = preliminaryObjects[i]; + NativeObject *obj = preliminaryObjects[i]; if (!obj) continue; @@ -3929,7 +3929,7 @@ TypeNewScript::maybeAnalyze(JSContext *cx, TypeObject *type, bool *regenerate, b gc::AllocKind kind = gc::GetGCObjectKind(maxSlotSpan); - if (kind != gc::GetGCObjectKind(JSObject::MAX_FIXED_SLOTS)) { + if (kind != gc::GetGCObjectKind(NativeObject::MAX_FIXED_SLOTS)) { // The template object will have a different allocation kind from the // preliminary objects that have already been constructed. Optimizing // definite property accesses requires both that the property is @@ -3940,7 +3940,7 @@ TypeNewScript::maybeAnalyze(JSContext *cx, TypeObject *type, bool *regenerate, b // old number of fixed slots. Shape *newPrefixShape = nullptr; for (size_t i = 0; i < PRELIMINARY_OBJECT_COUNT; i++) { - JSObject *obj = preliminaryObjects[i]; + NativeObject *obj = preliminaryObjects[i]; if (!obj) continue; if (!ChangeObjectFixedSlotCount(cx, obj, kind)) @@ -3957,13 +3957,13 @@ TypeNewScript::maybeAnalyze(JSContext *cx, TypeObject *type, bool *regenerate, b } RootedTypeObject typeRoot(cx, type); - templateObject_ = NewObjectWithType(cx, typeRoot, cx->global(), kind, MaybeSingletonObject); + templateObject_ = NewNativeObjectWithType(cx, typeRoot, cx->global(), kind, MaybeSingletonObject); if (!templateObject_) return false; Vector initializerVector(cx); - RootedObject templateRoot(cx, templateObject()); + RootedNativeObject templateRoot(cx, templateObject()); if (!jit::AnalyzeNewScriptDefiniteProperties(cx, fun, type, templateRoot, &initializerVector)) return false; @@ -4097,7 +4097,7 @@ TypeNewScript::rollbackPartiallyInitializedObjects(JSContext *cx, TypeObject *ty } // Found a matching frame. - RootedObject obj(cx, &thisv.toObject()); + RootedNativeObject obj(cx, &thisv.toObject().as()); // Whether all identified 'new' properties have been initialized. bool finished = false; @@ -4149,7 +4149,7 @@ TypeNewScript::rollbackPartiallyInitializedObjects(JSContext *cx, TypeObject *ty } if (!finished) - (void) JSObject::rollbackProperties(cx, obj, numProperties); + (void) NativeObject::rollbackProperties(cx, obj, numProperties); } } @@ -4175,7 +4175,7 @@ TypeNewScript::sweep(FreeOp *fop) // are about to be destroyed. if (preliminaryObjects) { for (size_t i = 0; i < PRELIMINARY_OBJECT_COUNT; i++) { - JSObject **ptr = &preliminaryObjects[i]; + NativeObject **ptr = &preliminaryObjects[i]; if (*ptr && IsObjectAboutToBeFinalized(ptr)) *ptr = nullptr; } diff --git a/js/src/jsinfer.h b/js/src/jsinfer.h index 507b4037d9f..2cf97b394e9 100644 --- a/js/src/jsinfer.h +++ b/js/src/jsinfer.h @@ -904,13 +904,13 @@ class TypeNewScript // analyses are performed and this array is cleared. The pointers in this // array are weak. static const uint32_t PRELIMINARY_OBJECT_COUNT = 20; - JSObject **preliminaryObjects; + NativeObject **preliminaryObjects; // After the new script properties analyses have been performed, a template // object to use for newly constructed objects. The shape of this object // reflects all definite properties the object will have, and the // allocation kind to use. - HeapPtrObject templateObject_; + HeapPtrNativeObject templateObject_; // Order in which definite properties become initialized. We need this in // case the definite properties are invalidated (such as by adding a setter @@ -955,7 +955,7 @@ class TypeNewScript return true; } - JSObject *templateObject() const { + NativeObject *templateObject() const { return templateObject_; } @@ -974,8 +974,8 @@ class TypeNewScript void fixupAfterMovingGC(); #endif - void registerNewObject(JSObject *res); - void unregisterNewObject(JSObject *res); + void registerNewObject(NativeObject *res); + void unregisterNewObject(NativeObject *res); bool maybeAnalyze(JSContext *cx, TypeObject *type, bool *regenerate, bool force = false); void rollbackPartiallyInitializedObjects(JSContext *cx, TypeObject *type); @@ -1402,10 +1402,10 @@ class TypeScript void FillBytecodeTypeMap(JSScript *script, uint32_t *bytecodeMap); -JSObject * +ArrayObject * GetOrFixupCopyOnWriteObject(JSContext *cx, HandleScript script, jsbytecode *pc); -JSObject * +ArrayObject * GetCopyOnWriteObject(JSScript *script, jsbytecode *pc); class RecompileInfo; @@ -1629,9 +1629,9 @@ struct TypeCompartment void setTypeToHomogenousArray(ExclusiveContext *cx, JSObject *obj, Type type); public: - void fixArrayType(ExclusiveContext *cx, JSObject *obj); - void fixObjectType(ExclusiveContext *cx, JSObject *obj); - void fixRestArgumentsType(ExclusiveContext *cx, JSObject *obj); + void fixArrayType(ExclusiveContext *cx, ArrayObject *obj); + void fixObjectType(ExclusiveContext *cx, NativeObject *obj); + void fixRestArgumentsType(ExclusiveContext *cx, ArrayObject *obj); JSObject *newTypedObject(JSContext *cx, IdValuePair *properties, size_t nproperties); @@ -1668,7 +1668,7 @@ struct TypeCompartment size_t *objectTypeTables); }; -void FixRestArgumentsType(ExclusiveContext *cxArg, JSObject *obj); +void FixRestArgumentsType(ExclusiveContext *cxArg, ArrayObject *obj); struct TypeZone { diff --git a/js/src/jsinferinlines.h b/js/src/jsinferinlines.h index d3b54a38d19..c04fc4dc45e 100644 --- a/js/src/jsinferinlines.h +++ b/js/src/jsinferinlines.h @@ -547,13 +547,13 @@ MarkObjectStateChange(ExclusiveContext *cx, JSObject *obj) */ inline void -FixArrayType(ExclusiveContext *cx, HandleObject obj) +FixArrayType(ExclusiveContext *cx, ArrayObject *obj) { cx->compartment()->types.fixArrayType(cx, obj); } inline void -FixObjectType(ExclusiveContext *cx, HandleObject obj) +FixObjectType(ExclusiveContext *cx, NativeObject *obj) { cx->compartment()->types.fixObjectType(cx, obj); } diff --git a/js/src/jsiter.cpp b/js/src/jsiter.cpp index fb3a7cf2839..d91be5e73ee 100644 --- a/js/src/jsiter.cpp +++ b/js/src/jsiter.cpp @@ -36,6 +36,7 @@ #include "jsobjinlines.h" #include "jsscriptinlines.h" +#include "vm/ObjectImpl-inl.h" #include "vm/Stack-inl.h" #include "vm/String-inl.h" @@ -131,7 +132,7 @@ Enumerate(JSContext *cx, HandleObject pobj, jsid id, } static bool -EnumerateNativeProperties(JSContext *cx, HandleObject pobj, unsigned flags, IdSet &ht, +EnumerateNativeProperties(JSContext *cx, HandleNativeObject pobj, unsigned flags, IdSet &ht, AutoIdVector *props) { bool enumerateSymbols; @@ -284,9 +285,9 @@ Snapshot(JSContext *cx, JSObject *pobj_, unsigned flags, AutoIdVector *props) !pobj->getOps()->enumerate && !(clasp->flags & JSCLASS_NEW_ENUMERATE)) { - if (!clasp->enumerate(cx, pobj)) + if (!clasp->enumerate(cx, pobj.as())) return false; - if (!EnumerateNativeProperties(cx, pobj, flags, ht, props)) + if (!EnumerateNativeProperties(cx, pobj.as(), flags, ht, props)) return false; } else { if (pobj->is()) { @@ -322,7 +323,7 @@ Snapshot(JSContext *cx, JSObject *pobj_, unsigned flags, AutoIdVector *props) if (!JSObject::enumerate(cx, pobj, op, &state, &id)) return false; if (state.isMagic(JS_NATIVE_ENUMERATE)) { - if (!EnumerateNativeProperties(cx, pobj, flags, ht, props)) + if (!EnumerateNativeProperties(cx, pobj.as(), flags, ht, props)) return false; } else { while (true) { @@ -480,8 +481,9 @@ NewPropertyIteratorObject(JSContext *cx, unsigned flags) if (!shape) return nullptr; - JSObject *obj = JSObject::create(cx, ITERATOR_FINALIZE_KIND, - GetInitialHeap(GenericObject, clasp), shape, type); + NativeObject *obj = + MaybeNativeObject(JSObject::create(cx, ITERATOR_FINALIZE_KIND, + GetInitialHeap(GenericObject, clasp), shape, type)); if (!obj) return nullptr; @@ -684,12 +686,12 @@ js::GetIterator(JSContext *cx, HandleObject obj, unsigned flags, MutableHandleVa NativeIterator *lastni = last->getNativeIterator(); if (!(lastni->flags & (JSITER_ACTIVE|JSITER_UNREUSABLE)) && obj->isNative() && - obj->hasEmptyElements() && + obj->as().hasEmptyElements() && obj->lastProperty() == lastni->shapes_array[0]) { JSObject *proto = obj->getProto(); if (proto->isNative() && - proto->hasEmptyElements() && + proto->as().hasEmptyElements() && proto->lastProperty() == lastni->shapes_array[1] && !proto->getProto()) { @@ -711,12 +713,12 @@ js::GetIterator(JSContext *cx, HandleObject obj, unsigned flags, MutableHandleVa JSObject *pobj = obj; do { if (!pobj->isNative() || - !pobj->hasEmptyElements() || + !pobj->as().hasEmptyElements() || IsAnyTypedArray(pobj) || pobj->hasUncacheableProto() || pobj->getOps()->enumerate || pobj->getClass()->enumerate != JS_EnumerateStub || - pobj->nativeContainsPure(cx->names().iteratorIntrinsic)) + pobj->as().containsPure(cx->names().iteratorIntrinsic)) { shapes.clear(); goto miss; @@ -1343,7 +1345,7 @@ ForOfIterator::init(HandleValue iterable, NonIterableBehavior nonIterableBehavio return false; bool optimized; - if (!stubChain->tryOptimizeArray(cx, iterableObj, &optimized)) + if (!stubChain->tryOptimizeArray(cx, iterableObj.as(), &optimized)) return false; if (optimized) { @@ -1412,10 +1414,9 @@ ForOfIterator::nextFromOptimizedArray(MutableHandleValue vp, bool *done) if (!CheckForInterrupt(cx_)) return false; - MOZ_ASSERT(iterator->isNative()); - MOZ_ASSERT(iterator->is()); + ArrayObject *arr = &iterator->as(); - if (index >= iterator->as().length()) { + if (index >= arr->length()) { vp.setUndefined(); *done = true; return true; @@ -1423,8 +1424,8 @@ ForOfIterator::nextFromOptimizedArray(MutableHandleValue vp, bool *done) *done = false; // Try to get array element via direct access. - if (index < iterator->getDenseInitializedLength()) { - vp.set(iterator->getDenseElement(index)); + if (index < arr->getDenseInitializedLength()) { + vp.set(arr->getDenseElement(index)); if (!vp.isMagic(JS_ELEMENTS_HOLE)) { ++index; return true; @@ -1704,7 +1705,7 @@ js_NewGenerator(JSContext *cx, const InterpreterRegs &stackRegs) MOZ_ASSERT(stackfp->script()->isGenerator()); Rooted global(cx, &stackfp->global()); - RootedObject obj(cx); + RootedNativeObject obj(cx); if (stackfp->script()->isStarGenerator()) { RootedValue pval(cx); RootedObject fun(cx, stackfp->fun()); @@ -1718,13 +1719,13 @@ js_NewGenerator(JSContext *cx, const InterpreterRegs &stackRegs) if (!proto) return nullptr; } - obj = NewObjectWithGivenProto(cx, &StarGeneratorObject::class_, proto, global); + obj = NewNativeObjectWithGivenProto(cx, &StarGeneratorObject::class_, proto, global); } else { MOZ_ASSERT(stackfp->script()->isLegacyGenerator()); JSObject *proto = GlobalObject::getOrCreateLegacyGeneratorObjectPrototype(cx, global); if (!proto) return nullptr; - obj = NewObjectWithGivenProto(cx, &LegacyGeneratorObject::class_, proto, global); + obj = NewNativeObjectWithGivenProto(cx, &LegacyGeneratorObject::class_, proto, global); } if (!obj) return nullptr; diff --git a/js/src/jsiter.h b/js/src/jsiter.h index bec133cfe84..402cef8a5a9 100644 --- a/js/src/jsiter.h +++ b/js/src/jsiter.h @@ -114,7 +114,7 @@ struct NativeIterator } }; -class PropertyIteratorObject : public JSObject +class PropertyIteratorObject : public NativeObject { public: static const Class class_; diff --git a/js/src/jsobj.cpp b/js/src/jsobj.cpp index b62324c37d8..1b4080802f3 100644 --- a/js/src/jsobj.cpp +++ b/js/src/jsobj.cpp @@ -75,7 +75,8 @@ using mozilla::DebugOnly; using mozilla::Maybe; using mozilla::RoundUpPow2; -JS_STATIC_ASSERT(int32_t((JSObject::NELEMENTS_LIMIT - 1) * sizeof(Value)) == int64_t((JSObject::NELEMENTS_LIMIT - 1) * sizeof(Value))); +JS_STATIC_ASSERT(int32_t((NativeObject::NELEMENTS_LIMIT - 1) * sizeof(Value)) == + int64_t((NativeObject::NELEMENTS_LIMIT - 1) * sizeof(Value))); static JSObject * CreateObjectConstructor(JSContext *cx, JSProtoKey key) @@ -757,7 +758,7 @@ js::CheckDefineProperty(JSContext *cx, HandleObject obj, HandleId id, HandleValu } static bool -DefinePropertyOnObject(JSContext *cx, HandleObject obj, HandleId id, const PropDesc &desc, +DefinePropertyOnObject(JSContext *cx, HandleNativeObject obj, HandleId id, const PropDesc &desc, bool throwError, bool *rval) { /* 8.12.9 step 1. */ @@ -870,7 +871,7 @@ DefinePropertyOnObject(JSContext *cx, HandleObject obj, HandleId id, const PropD return Reject(cx, JSMSG_CANT_REDEFINE_PROP, throwError, id, rval); } - if (!NativeGet(cx, obj, obj2, shape, &v)) + if (!NativeGet(cx, obj, obj2.as(), shape, &v)) return false; } @@ -1091,7 +1092,7 @@ DefinePropertyOnArray(JSContext *cx, Handle arr, HandleId id, cons if (desc.isAccessorDescriptor()) return Reject(cx, id, JSMSG_CANT_REDEFINE_PROP, throwError, rval); - unsigned attrs = arr->nativeLookup(cx, id)->attributes(); + unsigned attrs = arr->lookup(cx, id)->attributes(); if (!arr->lengthIsWritable()) { if (desc.hasWritable() && desc.writable()) return Reject(cx, id, JSMSG_CANT_REDEFINE_PROP, throwError, rval); @@ -1140,7 +1141,7 @@ js::DefineProperty(JSContext *cx, HandleObject obj, HandleId id, const PropDesc return Reject(cx, obj, JSMSG_OBJECT_NOT_EXTENSIBLE, throwError, rval); } - return DefinePropertyOnObject(cx, obj, id, desc, throwError, rval); + return DefinePropertyOnObject(cx, obj.as(), id, desc, throwError, rval); } bool @@ -1230,7 +1231,7 @@ js::DefineProperties(JSContext *cx, HandleObject obj, HandleObject props) bool dummy; for (size_t i = 0, len = ids.length(); i < len; i++) { - if (!DefinePropertyOnObject(cx, obj, ids[i], descs[i], true, &dummy)) + if (!DefinePropertyOnObject(cx, obj.as(), ids[i], descs[i], true, &dummy)) return false; } @@ -1278,9 +1279,11 @@ JSObject::sealOrFreeze(JSContext *cx, HandleObject obj, ImmutabilityType it) return false; /* preventExtensions must sparsify dense objects, so we can assign to holes without checks. */ - MOZ_ASSERT_IF(obj->isNative(), obj->getDenseCapacity() == 0); + MOZ_ASSERT_IF(obj->isNative(), obj->as().getDenseCapacity() == 0); + + if (obj->isNative() && !obj->as().inDictionaryMode() && !IsAnyTypedArray(obj)) { + HandleNativeObject nobj = obj.as(); - if (obj->isNative() && !obj->inDictionaryMode() && !IsAnyTypedArray(obj)) { /* * Seal/freeze non-dictionary objects by constructing a new shape * hierarchy mirroring the original one, which can be shared if many @@ -1288,18 +1291,18 @@ JSObject::sealOrFreeze(JSContext *cx, HandleObject obj, ImmutabilityType it) * generic path below then any non-empty object will be converted to * dictionary mode. */ - RootedShape last(cx, EmptyShape::getInitialShape(cx, obj->getClass(), - obj->getTaggedProto(), - obj->getParent(), - obj->getMetadata(), - obj->numFixedSlots(), - obj->lastProperty()->getObjectFlags())); + RootedShape last(cx, EmptyShape::getInitialShape(cx, nobj->getClass(), + nobj->getTaggedProto(), + nobj->getParent(), + nobj->getMetadata(), + nobj->numFixedSlots(), + nobj->lastProperty()->getObjectFlags())); if (!last) return false; /* Get an in order list of the shapes in this object. */ AutoShapeVector shapes(cx); - for (Shape::Range r(obj->lastProperty()); !r.empty(); r.popFront()) { + for (Shape::Range r(nobj->lastProperty()); !r.empty(); r.popFront()) { if (!shapes.append(&r.front())) return false; } @@ -1311,15 +1314,15 @@ JSObject::sealOrFreeze(JSContext *cx, HandleObject obj, ImmutabilityType it) child->attrs |= getSealedOrFrozenAttributes(child->attrs, it); if (!JSID_IS_EMPTY(child->propid) && it == FREEZE) - MarkTypePropertyNonWritable(cx, obj, child->propid); + MarkTypePropertyNonWritable(cx, nobj, child->propid); last = cx->compartment()->propertyTree.getChild(cx, last, *child); if (!last) return false; } - MOZ_ASSERT(obj->lastProperty()->slotSpan() == last->slotSpan()); - JS_ALWAYS_TRUE(setLastProperty(cx, obj, last)); + MOZ_ASSERT(nobj->lastProperty()->slotSpan() == last->slotSpan()); + JS_ALWAYS_TRUE(NativeObject::setLastProperty(cx, nobj, last)); } else { RootedId id(cx); for (size_t i = 0; i < props.length(); i++) { @@ -1351,9 +1354,9 @@ JSObject::sealOrFreeze(JSContext *cx, HandleObject obj, ImmutabilityType it) // for that, because capacity was zeroed out by preventExtensions. (See // the assertion before the if-else above.) if (it == FREEZE && obj->is()) { - if (!obj->maybeCopyElementsForWrite(cx)) + if (!obj->as().maybeCopyElementsForWrite(cx)) return false; - obj->getElementsHeader()->setNonwritableArrayLength(); + obj->as().getElementsHeader()->setNonwritableArrayLength(); } return true; @@ -1573,7 +1576,7 @@ js::NewObjectWithGivenProto(ExclusiveContext *cxArg, const js::Class *clasp, if (!obj) return nullptr; - if (entry != -1 && !obj->hasDynamicSlots() && + if (entry != -1 && !obj->fakeNativeHasDynamicSlots() && cxArg->asJSContext()->runtime()->gc.gcNumber() == gcNumber) { cxArg->asJSContext()->runtime()->newObjectCache.fillProto(entry, clasp, @@ -1658,7 +1661,7 @@ js::NewObjectWithClassProtoCommon(ExclusiveContext *cxArg, if (!obj) return nullptr; - if (entry != -1 && !obj->hasDynamicSlots()) { + if (entry != -1 && !obj->fakeNativeHasDynamicSlots()) { cxArg->asJSContext()->runtime()->newObjectCache.fillGlobal(entry, clasp, &parent->as(), allocKind, obj); @@ -1703,7 +1706,7 @@ js::NewObjectWithType(JSContext *cx, HandleTypeObject type, JSObject *parent, gc if (!obj) return nullptr; - if (entry != -1 && !obj->hasDynamicSlots()) + if (entry != -1 && !obj->fakeNativeHasDynamicSlots()) cache.fillType(entry, type, allocKind, obj); return obj; @@ -1745,7 +1748,7 @@ js::CreateThis(JSContext *cx, const Class *newclasp, HandleObject callee) return NewObjectWithClassProto(cx, newclasp, proto, parent, kind); } -static inline JSObject * +static inline NativeObject * CreateThisForFunctionWithType(JSContext *cx, HandleTypeObject type, JSObject *parent, NewObjectKind newKind) { @@ -1754,10 +1757,10 @@ CreateThisForFunctionWithType(JSContext *cx, HandleTypeObject type, JSObject *pa // The definite properties analysis has been performed for this // type, so get the shape and finalize kind to use from the // TypeNewScript's template. - RootedObject templateObject(cx, newScript->templateObject()); + RootedNativeObject templateObject(cx, newScript->templateObject()); MOZ_ASSERT(templateObject->type() == type); - RootedObject res(cx, CopyInitializerObject(cx, templateObject, newKind)); + RootedNativeObject res(cx, CopyInitializerObject(cx, templateObject, newKind)); if (!res) return nullptr; @@ -1779,8 +1782,8 @@ CreateThisForFunctionWithType(JSContext *cx, HandleTypeObject type, JSObject *pa // Not enough objects with this type have been created yet, so make a // plain object and register it with the type. Use the maximum number // of fixed slots, as is also required by the TypeNewScript. - gc::AllocKind allocKind = GuessObjectGCKind(JSObject::MAX_FIXED_SLOTS); - JSObject *res = NewObjectWithType(cx, type, parent, allocKind, newKind); + gc::AllocKind allocKind = GuessObjectGCKind(NativeObject::MAX_FIXED_SLOTS); + NativeObject *res = NewNativeObjectWithType(cx, type, parent, allocKind, newKind); if (!res) return nullptr; @@ -1791,14 +1794,14 @@ CreateThisForFunctionWithType(JSContext *cx, HandleTypeObject type, JSObject *pa } gc::AllocKind allocKind = NewObjectGCKind(&JSObject::class_); - return NewObjectWithType(cx, type, parent, allocKind, newKind); + return NewNativeObjectWithType(cx, type, parent, allocKind, newKind); } -JSObject * +NativeObject * js::CreateThisForFunctionWithProto(JSContext *cx, HandleObject callee, JSObject *proto, NewObjectKind newKind /* = GenericObject */) { - RootedObject res(cx); + RootedNativeObject res(cx); if (proto) { RootedTypeObject type(cx, cx->getNewType(&JSObject::class_, TaggedProto(proto), &callee->as())); @@ -1820,7 +1823,8 @@ js::CreateThisForFunctionWithProto(JSContext *cx, HandleObject callee, JSObject res = CreateThisForFunctionWithType(cx, type, callee->getParent(), newKind); } else { gc::AllocKind allocKind = NewObjectGCKind(&JSObject::class_); - res = NewObjectWithClassProto(cx, &JSObject::class_, proto, callee->getParent(), allocKind, newKind); + res = NewNativeObjectWithClassProto(cx, &JSObject::class_, proto, + callee->getParent(), allocKind, newKind); } if (res) { @@ -1833,7 +1837,7 @@ js::CreateThisForFunctionWithProto(JSContext *cx, HandleObject callee, JSObject return res; } -JSObject * +NativeObject * js::CreateThisForFunction(JSContext *cx, HandleObject callee, NewObjectKind newKind) { RootedValue protov(cx); @@ -1844,13 +1848,13 @@ js::CreateThisForFunction(JSContext *cx, HandleObject callee, NewObjectKind newK proto = &protov.toObject(); else proto = nullptr; - JSObject *obj = CreateThisForFunctionWithProto(cx, callee, proto, newKind); + NativeObject *obj = CreateThisForFunctionWithProto(cx, callee, proto, newKind); if (obj && newKind == SingletonObject) { - RootedObject nobj(cx, obj); + RootedNativeObject nobj(cx, obj); /* Reshape the singleton before passing it as the 'this' value. */ - JSObject::clear(cx, nobj); + NativeObject::clear(cx, nobj); JSScript *calleeScript = callee->as().nonLazyScript(); TypeScript::SetThis(cx, calleeScript, types::Type::ObjectType(nobj)); @@ -1989,18 +1993,18 @@ CopySlots(JSContext *cx, HandleObject from, HandleObject to) if (from->is() && (Wrapper::wrapperHandler(from)->flags() & Wrapper::CROSS_COMPARTMENT)) { - to->setSlot(0, from->getSlot(0)); - to->setSlot(1, from->getSlot(1)); + to->fakeNativeSetSlot(0, from->fakeNativeGetSlot(0)); + to->fakeNativeSetSlot(1, from->fakeNativeGetSlot(1)); n = 2; } size_t span = JSCLASS_RESERVED_SLOTS(from->getClass()); RootedValue v(cx); for (; n < span; ++n) { - v = from->getSlot(n); + v = from->fakeNativeGetSlot(n); if (!cx->compartment()->wrap(cx, &v)) return false; - to->setSlot(n, v); + to->fakeNativeSetSlot(n, v); } return true; } @@ -2024,8 +2028,8 @@ js::CloneObject(JSContext *cx, HandleObject obj, Handle proto, return nullptr; } - if (obj->hasPrivate()) - clone->setPrivate(obj->getPrivate()); + if (obj->fakeNativeHasPrivate()) + clone->fakeNativeSetPrivate(obj->fakeNativeGetPrivate()); } else { MOZ_ASSERT(obj->is()); if (!CopySlots(cx, obj, clone)) @@ -2035,8 +2039,8 @@ js::CloneObject(JSContext *cx, HandleObject obj, Handle proto, return clone; } -JSObject * -js::DeepCloneObjectLiteral(JSContext *cx, HandleObject obj, NewObjectKind newKind) +NativeObject * +js::DeepCloneObjectLiteral(JSContext *cx, HandleNativeObject obj, NewObjectKind newKind) { /* NB: Keep this in sync with XDRObjectLiteral. */ MOZ_ASSERT_IF(obj->hasSingletonType(), @@ -2044,11 +2048,11 @@ js::DeepCloneObjectLiteral(JSContext *cx, HandleObject obj, NewObjectKind newKin MOZ_ASSERT(obj->is() || obj->is()); // Result of the clone function. - RootedObject clone(cx); + RootedNativeObject clone(cx); // Temporary element/slot which would be stored in the cloned object. RootedValue v(cx); - RootedObject deepObj(cx); + RootedNativeObject deepObj(cx); if (obj->is()) { clone = NewDenseUnallocatedArray(cx, obj->as().length(), nullptr, newKind); @@ -2060,8 +2064,9 @@ js::DeepCloneObjectLiteral(JSContext *cx, HandleObject obj, NewObjectKind newKin if (!typeObj) return nullptr; RootedObject parent(cx, obj->getParent()); - clone = NewObjectWithGivenProto(cx, &JSObject::class_, TaggedProto(typeObj->proto().toObject()), - parent, kind, newKind); + clone = NewNativeObjectWithGivenProto(cx, &JSObject::class_, + TaggedProto(typeObj->proto().toObject()), + parent, kind, newKind); } // Allocate the same number of slots. @@ -2077,7 +2082,7 @@ js::DeepCloneObjectLiteral(JSContext *cx, HandleObject obj, NewObjectKind newKin for (uint32_t i = 0; i < initialized; ++i) { v = obj->getDenseElement(i); if (v.isObject()) { - deepObj = &v.toObject(); + deepObj = &v.toObject().as(); deepObj = js::DeepCloneObjectLiteral(cx, deepObj, newKind); if (!deepObj) { JS_ReportOutOfMemory(cx); @@ -2096,7 +2101,7 @@ js::DeepCloneObjectLiteral(JSContext *cx, HandleObject obj, NewObjectKind newKin for (size_t i = 0; i < span; i++) { v = obj->getSlot(i); if (v.isObject()) { - deepObj = &v.toObject(); + deepObj = &v.toObject().as(); deepObj = js::DeepCloneObjectLiteral(cx, deepObj, newKind); if (!deepObj) return nullptr; @@ -2108,8 +2113,8 @@ js::DeepCloneObjectLiteral(JSContext *cx, HandleObject obj, NewObjectKind newKin if (obj->hasSingletonType()) { if (!JSObject::setSingletonType(cx, clone)) return nullptr; - } else if (obj->getClass() == &ArrayObject::class_) { - FixArrayType(cx, clone); + } else if (obj->is()) { + FixArrayType(cx, &clone->as()); } else { FixObjectType(cx, clone); } @@ -2124,7 +2129,7 @@ js::DeepCloneObjectLiteral(JSContext *cx, HandleObject obj, NewObjectKind newKin template bool -js::XDRObjectLiteral(XDRState *xdr, MutableHandleObject obj) +js::XDRObjectLiteral(XDRState *xdr, MutableHandleNativeObject obj) { /* NB: Keep this in sync with DeepCloneObjectLiteral. */ @@ -2169,8 +2174,12 @@ js::XDRObjectLiteral(XDRState *xdr, MutableHandleObject obj) if (!xdr->codeEnum32(&kind)) return false; - if (mode == XDR_DECODE) - obj.set(NewBuiltinClassInstance(cx, &JSObject::class_, kind, js::MaybeSingletonObject)); + if (mode == XDR_DECODE) { + obj.set(NewNativeBuiltinClassInstance(cx, &JSObject::class_, kind, + MaybeSingletonObject)); + if (!obj) + return false; + } } } @@ -2313,7 +2322,7 @@ js::XDRObjectLiteral(XDRState *xdr, MutableHandleObject obj) if (!JSObject::setSingletonType(cx, obj)) return false; } else if (isArray) { - FixArrayType(cx, obj); + FixArrayType(cx, &obj->as()); } else { FixObjectType(cx, obj); } @@ -2351,16 +2360,16 @@ js::XDRObjectLiteral(XDRState *xdr, MutableHandleObject obj) } template bool -js::XDRObjectLiteral(XDRState *xdr, MutableHandleObject obj); +js::XDRObjectLiteral(XDRState *xdr, MutableHandleNativeObject obj); template bool -js::XDRObjectLiteral(XDRState *xdr, MutableHandleObject obj); +js::XDRObjectLiteral(XDRState *xdr, MutableHandleNativeObject obj); JSObject * js::CloneObjectLiteral(JSContext *cx, HandleObject parent, HandleObject srcObj) { if (srcObj->getClass() == &JSObject::class_) { - AllocKind kind = GetBackgroundAllocKind(GuessObjectGCKind(srcObj->numFixedSlots())); + AllocKind kind = GetBackgroundAllocKind(GuessObjectGCKind(srcObj->as().numFixedSlots())); MOZ_ASSERT_IF(srcObj->isTenured(), kind == srcObj->asTenured().getAllocKind()); JSObject *proto = cx->global()->getOrCreateObjectPrototype(cx); @@ -2374,12 +2383,12 @@ js::CloneObjectLiteral(JSContext *cx, HandleObject parent, HandleObject srcObj) return NewReshapedObject(cx, typeObj, parent, kind, shape); } - MOZ_ASSERT(srcObj->is()); - MOZ_ASSERT(srcObj->denseElementsAreCopyOnWrite()); - MOZ_ASSERT(srcObj->getElementsHeader()->ownerObject() == srcObj); + RootedArrayObject srcArray(cx, &srcObj->as()); + MOZ_ASSERT(srcArray->denseElementsAreCopyOnWrite()); + MOZ_ASSERT(srcArray->getElementsHeader()->ownerObject() == srcObj); - size_t length = srcObj->as().length(); - RootedObject res(cx, NewDenseFullyAllocatedArray(cx, length, nullptr, MaybeSingletonObject)); + size_t length = srcArray->as().length(); + RootedArrayObject res(cx, NewDenseFullyAllocatedArray(cx, length, nullptr, MaybeSingletonObject)); if (!res) return nullptr; @@ -2388,7 +2397,7 @@ js::CloneObjectLiteral(JSContext *cx, HandleObject parent, HandleObject srcObj) for (size_t i = 0; i < length; i++) { // The only markable values in copy on write arrays are atoms, which // can be freely copied between compartments. - value = srcObj->getDenseElement(i); + value = srcArray->getDenseElement(i); MOZ_ASSERT_IF(value.isMarkable(), value.toGCThing()->isTenured() && cx->runtime()->isAtomsZone(value.toGCThing()->asTenured().zone())); @@ -2472,7 +2481,7 @@ JSObject::ReserveForTradeGuts(JSContext *cx, JSObject *aArg, JSObject *bArg, * Non-native objects need to be reshaped according to the new count. */ if (a->isNative()) { - if (!a->generateOwnShape(cx)) + if (!a->as().generateOwnShape(cx)) MOZ_CRASH(); } else { reserved.newbshape = EmptyShape::getInitialShape(cx, aClass, aProto, a->getParent(), a->getMetadata(), @@ -2481,7 +2490,7 @@ JSObject::ReserveForTradeGuts(JSContext *cx, JSObject *aArg, JSObject *bArg, MOZ_CRASH(); } if (b->isNative()) { - if (!b->generateOwnShape(cx)) + if (!b->as().generateOwnShape(cx)) MOZ_CRASH(); } else { reserved.newashape = EmptyShape::getInitialShape(cx, bClass, bProto, b->getParent(), b->getMetadata(), @@ -2492,9 +2501,9 @@ JSObject::ReserveForTradeGuts(JSContext *cx, JSObject *aArg, JSObject *bArg, /* The avals/bvals vectors hold all original values from the objects. */ - if (!reserved.avals.reserve(a->slotSpan())) + if (!reserved.avals.reserve(a->fakeNativeSlotSpan())) MOZ_CRASH(); - if (!reserved.bvals.reserve(b->slotSpan())) + if (!reserved.bvals.reserve(b->fakeNativeSlotSpan())) MOZ_CRASH(); /* @@ -2503,8 +2512,8 @@ JSObject::ReserveForTradeGuts(JSContext *cx, JSObject *aArg, JSObject *bArg, * use their last fixed slot for storing private data. */ - reserved.newafixed = a->numFixedSlots(); - reserved.newbfixed = b->numFixedSlots(); + reserved.newafixed = a->fakeNativeNumFixedSlots(); + reserved.newbfixed = b->fakeNativeNumFixedSlots(); if (aClass->hasPrivate()) { reserved.newafixed++; @@ -2524,8 +2533,12 @@ JSObject::ReserveForTradeGuts(JSContext *cx, JSObject *aArg, JSObject *bArg, * other object. */ - unsigned adynamic = dynamicSlotsCount(reserved.newafixed, b->slotSpan(), b->getClass()); - unsigned bdynamic = dynamicSlotsCount(reserved.newbfixed, a->slotSpan(), a->getClass()); + unsigned adynamic = NativeObject::dynamicSlotsCount(reserved.newafixed, + b->fakeNativeSlotSpan(), + b->getClass()); + unsigned bdynamic = NativeObject::dynamicSlotsCount(reserved.newbfixed, + a->fakeNativeSlotSpan(), + a->getClass()); if (adynamic) { reserved.newaslots = a->zone()->pod_malloc(adynamic); @@ -2608,23 +2621,23 @@ JSObject::TradeGuts(JSContext *cx, JSObject *a, JSObject *b, TradeGutsReserved & * the new layout for the other object. */ - uint32_t acap = a->slotSpan(); - uint32_t bcap = b->slotSpan(); + uint32_t acap = a->fakeNativeSlotSpan(); + uint32_t bcap = b->fakeNativeSlotSpan(); for (size_t i = 0; i < acap; i++) - reserved.avals.infallibleAppend(a->getSlot(i)); + reserved.avals.infallibleAppend(a->fakeNativeGetSlot(i)); for (size_t i = 0; i < bcap; i++) - reserved.bvals.infallibleAppend(b->getSlot(i)); + reserved.bvals.infallibleAppend(b->fakeNativeGetSlot(i)); /* Done with the dynamic slots. */ - if (a->hasDynamicSlots()) - js_free(a->slots); - if (b->hasDynamicSlots()) - js_free(b->slots); + if (a->fakeNativeHasDynamicSlots()) + js_free(a->fakeNativeSlots()); + if (b->fakeNativeHasDynamicSlots()) + js_free(b->fakeNativeSlots()); - void *apriv = a->hasPrivate() ? a->getPrivate() : nullptr; - void *bpriv = b->hasPrivate() ? b->getPrivate() : nullptr; + void *apriv = a->fakeNativeHasPrivate() ? a->fakeNativeGetPrivate() : nullptr; + void *bpriv = b->fakeNativeHasPrivate() ? b->fakeNativeGetPrivate() : nullptr; char tmp[sizeof(JSObject)]; js_memcpy(&tmp, a, sizeof tmp); @@ -2636,29 +2649,29 @@ JSObject::TradeGuts(JSContext *cx, JSObject *a, JSObject *b, TradeGutsReserved & else a->shape_ = reserved.newashape; - a->slots = reserved.newaslots; - a->initSlotRange(0, reserved.bvals.begin(), bcap); - if (a->hasPrivate()) - a->initPrivate(bpriv); + a->fakeNativeSlots() = reserved.newaslots; + a->fakeNativeInitSlotRange(0, reserved.bvals.begin(), bcap); + if (a->fakeNativeHasPrivate()) + a->fakeNativeInitPrivate(bpriv); if (b->isNative()) b->shape_->setNumFixedSlots(reserved.newbfixed); else b->shape_ = reserved.newbshape; - b->slots = reserved.newbslots; - b->initSlotRange(0, reserved.avals.begin(), acap); - if (b->hasPrivate()) - b->initPrivate(apriv); + b->fakeNativeSlots() = reserved.newbslots; + b->fakeNativeInitSlotRange(0, reserved.avals.begin(), acap); + if (b->fakeNativeHasPrivate()) + b->fakeNativeInitPrivate(apriv); /* Make sure the destructor for reserved doesn't free the slots. */ reserved.newaslots = nullptr; reserved.newbslots = nullptr; } - if (a->inDictionaryMode()) + if (a->isNative() && a->as().inDictionaryMode()) a->lastProperty()->listp = &a->shape_; - if (b->inDictionaryMode()) + if (b->isNative() && b->as().inDictionaryMode()) b->lastProperty()->listp = &b->shape_; #ifdef JSGC_INCREMENTAL @@ -2714,14 +2727,13 @@ DefineStandardSlot(JSContext *cx, HandleObject obj, JSProtoKey key, JSAtom *atom * property is not yet present, force it into a new one bound to a * reserved slot. Otherwise, go through the normal property path. */ - MOZ_ASSERT(obj->is()); - MOZ_ASSERT(obj->isNative()); + Rooted global(cx, &obj->as()); - if (!obj->nativeLookup(cx, id)) { - obj->as().setConstructorPropertySlot(key, v); + if (!global->lookup(cx, id)) { + global->setConstructorPropertySlot(key, v); uint32_t slot = GlobalObject::constructorPropertySlot(key); - if (!JSObject::addProperty(cx, obj, id, JS_PropertyStub, JS_StrictPropertyStub, slot, attrs, 0)) + if (!NativeObject::addProperty(cx, global, id, JS_PropertyStub, JS_StrictPropertyStub, slot, attrs, 0)) return false; named = true; @@ -2756,13 +2768,13 @@ ClearClassObject(JSObject *obj, JSProtoKey key) obj->as().setPrototype(key, UndefinedValue()); } -static JSObject * +static NativeObject * DefineConstructorAndPrototype(JSContext *cx, HandleObject obj, JSProtoKey key, HandleAtom atom, JSObject *protoProto, const Class *clasp, Native constructor, unsigned nargs, const JSPropertySpec *ps, const JSFunctionSpec *fs, const JSPropertySpec *static_ps, const JSFunctionSpec *static_fs, - JSObject **ctorp, AllocKind ctorKind) + NativeObject **ctorp, AllocKind ctorKind) { /* * Create a prototype object for this class. @@ -2793,12 +2805,12 @@ DefineConstructorAndPrototype(JSContext *cx, HandleObject obj, JSProtoKey key, H * [which already needs to happen for bug 638316], figure out nicer * semantics for null-protoProto, and use createBlankPrototype.) */ - RootedObject proto(cx, NewObjectWithClassProto(cx, clasp, protoProto, obj, SingletonObject)); + RootedNativeObject proto(cx, NewNativeObjectWithClassProto(cx, clasp, protoProto, obj, SingletonObject)); if (!proto) return nullptr; /* After this point, control must exit via label bad or out. */ - RootedObject ctor(cx); + RootedNativeObject ctor(cx); bool named = false; bool cached = false; if (!constructor) { @@ -2886,12 +2898,12 @@ bad: return nullptr; } -JSObject * +NativeObject * js_InitClass(JSContext *cx, HandleObject obj, JSObject *protoProto_, const Class *clasp, Native constructor, unsigned nargs, const JSPropertySpec *ps, const JSFunctionSpec *fs, const JSPropertySpec *static_ps, const JSFunctionSpec *static_fs, - JSObject **ctorp, AllocKind ctorKind) + NativeObject **ctorp, AllocKind ctorKind) { RootedObject protoProto(cx, protoProto_); @@ -2930,8 +2942,8 @@ js_InitClass(JSContext *cx, HandleObject obj, JSObject *protoProto_, } /* static */ inline bool -JSObject::updateSlotsForSpan(ThreadSafeContext *cx, - HandleObject obj, size_t oldSpan, size_t newSpan) +NativeObject::updateSlotsForSpan(ThreadSafeContext *cx, + HandleNativeObject obj, size_t oldSpan, size_t newSpan) { MOZ_ASSERT(cx->isThreadLocal(obj)); MOZ_ASSERT(oldSpan != newSpan); @@ -2940,7 +2952,7 @@ JSObject::updateSlotsForSpan(ThreadSafeContext *cx, size_t newCount = dynamicSlotsCount(obj->numFixedSlots(), newSpan, obj->getClass()); if (oldSpan < newSpan) { - if (oldCount < newCount && !JSObject::growSlots(cx, obj, oldCount, newCount)) + if (oldCount < newCount && !growSlots(cx, obj, oldCount, newCount)) return false; if (newSpan == oldSpan + 1) @@ -2953,14 +2965,14 @@ JSObject::updateSlotsForSpan(ThreadSafeContext *cx, obj->invalidateSlotRange(newSpan, oldSpan - newSpan); if (oldCount > newCount) - JSObject::shrinkSlots(cx, obj, oldCount, newCount); + shrinkSlots(cx, obj, oldCount, newCount); } return true; } /* static */ bool -JSObject::setLastProperty(ThreadSafeContext *cx, HandleObject obj, HandleShape shape) +NativeObject::setLastProperty(ThreadSafeContext *cx, HandleNativeObject obj, HandleShape shape) { MOZ_ASSERT(cx->isThreadLocal(obj)); MOZ_ASSERT(!obj->inDictionaryMode()); @@ -2984,7 +2996,7 @@ JSObject::setLastProperty(ThreadSafeContext *cx, HandleObject obj, HandleShape s } void -JSObject::setLastPropertyShrinkFixedSlots(Shape *shape) +NativeObject::setLastPropertyShrinkFixedSlots(Shape *shape) { MOZ_ASSERT(!inDictionaryMode()); MOZ_ASSERT(!shape->inDictionary()); @@ -3003,7 +3015,7 @@ JSObject::setLastPropertyShrinkFixedSlots(Shape *shape) } /* static */ bool -JSObject::setSlotSpan(ThreadSafeContext *cx, HandleObject obj, uint32_t span) +NativeObject::setSlotSpan(ThreadSafeContext *cx, HandleNativeObject obj, uint32_t span) { MOZ_ASSERT(cx->isThreadLocal(obj)); MOZ_ASSERT(obj->inDictionaryMode()); @@ -3012,7 +3024,7 @@ JSObject::setSlotSpan(ThreadSafeContext *cx, HandleObject obj, uint32_t span) if (oldSpan == span) return true; - if (!JSObject::updateSlotsForSpan(cx, obj, oldSpan, span)) + if (!updateSlotsForSpan(cx, obj, oldSpan, span)) return false; obj->lastProperty()->base()->setSlotSpan(span); @@ -3059,7 +3071,7 @@ ReallocateSlots(ThreadSafeContext *cx, JSObject *obj, HeapSlot *oldSlots, } /* static */ bool -JSObject::growSlots(ThreadSafeContext *cx, HandleObject obj, uint32_t oldCount, uint32_t newCount) +NativeObject::growSlots(ThreadSafeContext *cx, HandleNativeObject obj, uint32_t oldCount, uint32_t newCount) { MOZ_ASSERT(cx->isThreadLocal(obj)); MOZ_ASSERT(newCount > oldCount); @@ -3107,7 +3119,8 @@ FreeSlots(ThreadSafeContext *cx, HeapSlot *slots) } /* static */ void -JSObject::shrinkSlots(ThreadSafeContext *cx, HandleObject obj, uint32_t oldCount, uint32_t newCount) +NativeObject::shrinkSlots(ThreadSafeContext *cx, HandleNativeObject obj, + uint32_t oldCount, uint32_t newCount) { MOZ_ASSERT(cx->isThreadLocal(obj)); MOZ_ASSERT(newCount < oldCount); @@ -3128,7 +3141,7 @@ JSObject::shrinkSlots(ThreadSafeContext *cx, HandleObject obj, uint32_t oldCount } /* static */ bool -JSObject::sparsifyDenseElement(ExclusiveContext *cx, HandleObject obj, uint32_t index) +NativeObject::sparsifyDenseElement(ExclusiveContext *cx, HandleNativeObject obj, uint32_t index) { if (!obj->maybeCopyElementsForWrite(cx)) return false; @@ -3136,7 +3149,7 @@ JSObject::sparsifyDenseElement(ExclusiveContext *cx, HandleObject obj, uint32_t RootedValue value(cx, obj->getDenseElement(index)); MOZ_ASSERT(!value.isMagic(JS_ELEMENTS_HOLE)); - JSObject::removeDenseElementForSparseIndex(cx, obj, index); + removeDenseElementForSparseIndex(cx, obj, index); uint32_t slot = obj->slotSpan(); if (!obj->addDataProperty(cx, INT_TO_JSID(index), slot, JSPROP_ENUMERATE)) { @@ -3151,7 +3164,7 @@ JSObject::sparsifyDenseElement(ExclusiveContext *cx, HandleObject obj, uint32_t } /* static */ bool -JSObject::sparsifyDenseElements(js::ExclusiveContext *cx, HandleObject obj) +NativeObject::sparsifyDenseElements(js::ExclusiveContext *cx, HandleNativeObject obj) { if (!obj->maybeCopyElementsForWrite(cx)) return false; @@ -3184,7 +3197,7 @@ JSObject::sparsifyDenseElements(js::ExclusiveContext *cx, HandleObject obj) } bool -JSObject::willBeSparseElements(uint32_t requiredCapacity, uint32_t newElementsHint) +NativeObject::willBeSparseElements(uint32_t requiredCapacity, uint32_t newElementsHint) { MOZ_ASSERT(isNative()); MOZ_ASSERT(requiredCapacity > MIN_SPARSE_INDEX); @@ -3212,8 +3225,8 @@ JSObject::willBeSparseElements(uint32_t requiredCapacity, uint32_t newElementsHi return true; } -/* static */ JSObject::EnsureDenseResult -JSObject::maybeDensifySparseElements(js::ExclusiveContext *cx, HandleObject obj) +/* static */ NativeObject::EnsureDenseResult +NativeObject::maybeDensifySparseElements(js::ExclusiveContext *cx, HandleNativeObject obj) { /* * Wait until after the object goes into dictionary mode, which must happen @@ -3393,7 +3406,7 @@ ReallocateElements(ThreadSafeContext *cx, JSObject *obj, ObjectElements *oldHead // usual doubling. // /* static */ uint32_t -JSObject::goodAllocated(uint32_t reqAllocated, uint32_t length = 0) +NativeObject::goodAllocated(uint32_t reqAllocated, uint32_t length = 0) { static const uint32_t Mebi = 1024 * 1024; @@ -3430,8 +3443,8 @@ JSObject::goodAllocated(uint32_t reqAllocated, uint32_t length = 0) if (length >= reqCapacity && goodCapacity > (length / 3) * 2) goodAllocated = length + ObjectElements::VALUES_PER_HEADER; - if (goodAllocated < JSObject::SLOT_CAPACITY_MIN) - goodAllocated = JSObject::SLOT_CAPACITY_MIN; + if (goodAllocated < SLOT_CAPACITY_MIN) + goodAllocated = SLOT_CAPACITY_MIN; } else { uint32_t i = 0; @@ -3454,7 +3467,7 @@ JSObject::goodAllocated(uint32_t reqAllocated, uint32_t length = 0) } bool -JSObject::growElements(ThreadSafeContext *cx, uint32_t reqCapacity) +NativeObject::growElements(ThreadSafeContext *cx, uint32_t reqCapacity) { MOZ_ASSERT(nonProxyIsExtensible()); MOZ_ASSERT(canHaveNonEmptyElements()); @@ -3518,7 +3531,7 @@ JSObject::growElements(ThreadSafeContext *cx, uint32_t reqCapacity) } void -JSObject::shrinkElements(ThreadSafeContext *cx, uint32_t reqCapacity) +NativeObject::shrinkElements(ThreadSafeContext *cx, uint32_t reqCapacity) { MOZ_ASSERT(cx->isThreadLocal(this)); MOZ_ASSERT(canHaveNonEmptyElements()); @@ -3552,7 +3565,7 @@ JSObject::shrinkElements(ThreadSafeContext *cx, uint32_t reqCapacity) } /* static */ bool -JSObject::CopyElementsForWrite(ThreadSafeContext *cx, JSObject *obj) +NativeObject::CopyElementsForWrite(ThreadSafeContext *cx, NativeObject *obj) { MOZ_ASSERT(obj->denseElementsAreCopyOnWrite()); @@ -3593,13 +3606,13 @@ JSObject::fixupAfterMovingGC() * elements' pointer back to the owner object, and the elements pointer * itself if it points to inline elements in another object. */ - if (hasDynamicElements()) { - ObjectElements *header = getElementsHeader(); + if (fakeNativeHasDynamicElements()) { + ObjectElements *header = fakeNativeGetElementsHeader(); if (header->isCopyOnWrite()) { - HeapPtrObject &owner = header->ownerObject(); + HeapPtrNativeObject &owner = header->ownerObject(); if (IsForwarded(owner.get())) owner = Forwarded(owner.get()); - elements = owner->getElementsHeader()->elements(); + fakeNativeElements() = owner->getElementsHeader()->elements(); } } } @@ -3631,7 +3644,7 @@ js::SetClassAndProto(JSContext *cx, HandleObject obj, RootedObject oldproto(cx, obj); while (oldproto && oldproto->isNative()) { if (oldproto->hasSingletonType()) { - if (!oldproto->generateOwnShape(cx)) { + if (!oldproto->as().generateOwnShape(cx)) { if (crashOnFailure) MOZ_CRASH(); return false; @@ -3806,7 +3819,7 @@ js::FindClassObject(ExclusiveContext *cx, MutableHandleObject protop, const Clas RootedValue v(cx); if (shape && pobj->isNative()) { if (shape->hasSlot()) - v = pobj->nativeGetSlot(shape->slot()); + v = pobj->as().getSlot(shape->slot()); } if (v.isObject()) protop.set(&v.toObject()); @@ -3864,7 +3877,7 @@ JSObject::constructHook() const } /* static */ bool -JSObject::allocSlot(ThreadSafeContext *cx, HandleObject obj, uint32_t *slotp) +NativeObject::allocSlot(ThreadSafeContext *cx, HandleNativeObject obj, uint32_t *slotp) { MOZ_ASSERT(cx->isThreadLocal(obj)); @@ -3908,7 +3921,7 @@ JSObject::allocSlot(ThreadSafeContext *cx, HandleObject obj, uint32_t *slotp) } void -JSObject::freeSlot(uint32_t slot) +NativeObject::freeSlot(uint32_t slot) { MOZ_ASSERT(slot < slotSpan()); @@ -3944,14 +3957,10 @@ PurgeProtoChain(ExclusiveContext *cx, JSObject *objArg, HandleId id) if (!obj->isNative()) break; - shape = obj->nativeLookup(cx, id); - if (shape) { - if (!obj->shadowingShapeChange(cx, *shape)) - return false; + shape = obj->as().lookup(cx, id); + if (shape) + return obj->as().shadowingShapeChange(cx, *shape); - obj->shadowingShapeChange(cx, *shape); - return true; - } obj = obj->getProto(); } @@ -4004,7 +4013,7 @@ PurgeScopeChain(ExclusiveContext *cx, JS::HandleObject obj, JS::HandleId id) } bool -baseops::DefineGeneric(ExclusiveContext *cx, HandleObject obj, HandleId id, HandleValue value, +baseops::DefineGeneric(ExclusiveContext *cx, HandleNativeObject obj, HandleId id, HandleValue value, PropertyOp getter, StrictPropertyOp setter, unsigned attrs) { return DefineNativeProperty(cx, obj, id, value, getter, setter, attrs); @@ -4022,7 +4031,7 @@ JSObject::defineGeneric(ExclusiveContext *cx, HandleObject obj, return false; return op(cx->asJSContext(), obj, id, value, getter, setter, attrs); } - return baseops::DefineGeneric(cx, obj, id, value, getter, setter, attrs); + return baseops::DefineGeneric(cx, obj.as(), id, value, getter, setter, attrs); } /* static */ bool @@ -4035,7 +4044,7 @@ JSObject::defineProperty(ExclusiveContext *cx, HandleObject obj, } bool -baseops::DefineElement(ExclusiveContext *cx, HandleObject obj, uint32_t index, HandleValue value, +baseops::DefineElement(ExclusiveContext *cx, HandleNativeObject obj, uint32_t index, HandleValue value, PropertyOp getter, StrictPropertyOp setter, unsigned attrs) { RootedId id(cx); @@ -4063,24 +4072,24 @@ JSObject::defineElement(ExclusiveContext *cx, HandleObject obj, return false; return op(cx->asJSContext(), obj, index, value, getter, setter, attrs); } - return baseops::DefineElement(cx, obj, index, value, getter, setter, attrs); + return baseops::DefineElement(cx, obj.as(), index, value, getter, setter, attrs); } Shape * -JSObject::addDataProperty(ExclusiveContext *cx, jsid idArg, uint32_t slot, unsigned attrs) +NativeObject::addDataProperty(ExclusiveContext *cx, jsid idArg, uint32_t slot, unsigned attrs) { MOZ_ASSERT(!(attrs & (JSPROP_GETTER | JSPROP_SETTER))); - RootedObject self(cx, this); + RootedNativeObject self(cx, this); RootedId id(cx, idArg); return addProperty(cx, self, id, nullptr, nullptr, slot, attrs, 0); } Shape * -JSObject::addDataProperty(ExclusiveContext *cx, HandlePropertyName name, +NativeObject::addDataProperty(ExclusiveContext *cx, HandlePropertyName name, uint32_t slot, unsigned attrs) { MOZ_ASSERT(!(attrs & (JSPROP_GETTER | JSPROP_SETTER))); - RootedObject self(cx, this); + RootedNativeObject self(cx, this); RootedId id(cx, NameToId(name)); return addProperty(cx, self, id, nullptr, nullptr, slot, attrs, 0); } @@ -4094,7 +4103,7 @@ JSObject::addDataProperty(ExclusiveContext *cx, HandlePropertyName name, template static inline bool CallAddPropertyHook(typename ExecutionModeTraits::ExclusiveContextType cxArg, - const Class *clasp, HandleObject obj, HandleShape shape, + const Class *clasp, HandleNativeObject obj, HandleShape shape, HandleValue nominal) { if (clasp->addProperty != JS_PropertyStub) { @@ -4115,7 +4124,7 @@ CallAddPropertyHook(typename ExecutionModeTraits::ExclusiveContextType cxA } if (value.get() != nominal) { if (shape->hasSlot()) - obj->nativeSetSlotWithType(cx, shape, value); + obj->setSlotWithType(cx, shape, value); } } return true; @@ -4124,7 +4133,7 @@ CallAddPropertyHook(typename ExecutionModeTraits::ExclusiveContextType cxA template static inline bool CallAddPropertyHookDense(typename ExecutionModeTraits::ExclusiveContextType cxArg, - const Class *clasp, HandleObject obj, uint32_t index, + const Class *clasp, HandleNativeObject obj, uint32_t index, HandleValue nominal) { /* Inline addProperty for array objects. */ @@ -4173,15 +4182,15 @@ CallAddPropertyHookDense(typename ExecutionModeTraits::ExclusiveContextTyp template static bool UpdateShapeTypeAndValue(typename ExecutionModeTraits::ExclusiveContextType cx, - JSObject *obj, Shape *shape, const Value &value) + NativeObject *obj, Shape *shape, const Value &value) { jsid id = shape->propid(); if (shape->hasSlot()) { if (mode == ParallelExecution) { - if (!obj->nativeSetSlotIfHasType(shape, value, /* overwriting = */ false)) + if (!obj->setSlotIfHasType(shape, value, /* overwriting = */ false)) return false; } else { - obj->nativeSetSlotWithType(cx->asExclusiveContext(), shape, value, /* overwriting = */ false); + obj->setSlotWithType(cx->asExclusiveContext(), shape, value, /* overwriting = */ false); } // Per the acquired properties analysis, when the shape of a partially @@ -4214,7 +4223,7 @@ UpdateShapeTypeAndValue(typename ExecutionModeTraits::ExclusiveContextType template static inline bool DefinePropertyOrElement(typename ExecutionModeTraits::ExclusiveContextType cx, - HandleObject obj, HandleId id, + HandleNativeObject obj, HandleId id, PropertyOp getter, StrictPropertyOp setter, unsigned attrs, HandleValue value, bool callSetterAfterwards, bool setterIsStrict) @@ -4224,7 +4233,7 @@ DefinePropertyOrElement(typename ExecutionModeTraits::ExclusiveContextType getter == JS_PropertyStub && setter == JS_StrictPropertyStub && attrs == JSPROP_ENUMERATE && - (!obj->isIndexed() || !obj->nativeContainsPure(id)) && + (!obj->isIndexed() || !obj->containsPure(id)) && !IsAnyTypedArray(obj)) { uint32_t index = JSID_TO_INT(id); @@ -4234,7 +4243,7 @@ DefinePropertyOrElement(typename ExecutionModeTraits::ExclusiveContextType if (definesPast) return true; - JSObject::EnsureDenseResult result; + NativeObject::EnsureDenseResult result; if (mode == ParallelExecution) { if (obj->writeToIndexWouldMarkNotPacked(index)) return false; @@ -4243,9 +4252,9 @@ DefinePropertyOrElement(typename ExecutionModeTraits::ExclusiveContextType result = obj->ensureDenseElements(cx->asExclusiveContext(), index, 1); } - if (result == JSObject::ED_FAILED) + if (result == NativeObject::ED_FAILED) return false; - if (result == JSObject::ED_OK) { + if (result == NativeObject::ED_OK) { if (mode == ParallelExecution) { if (!obj->setDenseElementIfHasType(index, value)) return false; @@ -4284,8 +4293,8 @@ DefinePropertyOrElement(typename ExecutionModeTraits::ExclusiveContextType AutoRooterGetterSetter gsRoot(cx, attrs, &getter, &setter); - RootedShape shape(cx, JSObject::putProperty(cx, obj, id, getter, setter, - SHAPE_INVALID_SLOT, attrs, 0)); + RootedShape shape(cx, NativeObject::putProperty(cx, obj, id, getter, setter, + SHAPE_INVALID_SLOT, attrs, 0)); if (!shape) return false; @@ -4305,11 +4314,11 @@ DefinePropertyOrElement(typename ExecutionModeTraits::ExclusiveContextType ExclusiveContext *ncx = cx->asExclusiveContext(); uint32_t index = JSID_TO_INT(id); - JSObject::removeDenseElementForSparseIndex(ncx, obj, index); - JSObject::EnsureDenseResult result = JSObject::maybeDensifySparseElements(ncx, obj); - if (result == JSObject::ED_FAILED) + NativeObject::removeDenseElementForSparseIndex(ncx, obj, index); + NativeObject::EnsureDenseResult result = NativeObject::maybeDensifySparseElements(ncx, obj); + if (result == NativeObject::ED_FAILED) return false; - if (result == JSObject::ED_OK) { + if (result == NativeObject::ED_OK) { MOZ_ASSERT(setter == JS_StrictPropertyStub); return CallAddPropertyHookDense(cx, obj->getClass(), obj, index, value); } @@ -4329,11 +4338,11 @@ DefinePropertyOrElement(typename ExecutionModeTraits::ExclusiveContextType } static bool -NativeLookupOwnProperty(ExclusiveContext *cx, HandleObject obj, HandleId id, +NativeLookupOwnProperty(ExclusiveContext *cx, HandleNativeObject obj, HandleId id, MutableHandle shapep); bool -js::DefineNativeProperty(ExclusiveContext *cx, HandleObject obj, HandleId id, HandleValue value, +js::DefineNativeProperty(ExclusiveContext *cx, HandleNativeObject obj, HandleId id, HandleValue value, PropertyOp getter, StrictPropertyOp setter, unsigned attrs) { MOZ_ASSERT(!(attrs & JSPROP_NATIVE_ACCESSORS)); @@ -4362,20 +4371,20 @@ js::DefineNativeProperty(ExclusiveContext *cx, HandleObject obj, HandleId id, Ha /* Ignore getter/setter properties added to typed arrays. */ return true; } - if (!JSObject::sparsifyDenseElement(cx, obj, JSID_TO_INT(id))) + if (!NativeObject::sparsifyDenseElement(cx, obj, JSID_TO_INT(id))) return false; - shape = obj->nativeLookup(cx, id); + shape = obj->lookup(cx, id); } if (shape->isAccessorDescriptor()) { attrs = ApplyOrDefaultAttributes(attrs, shape); - shape = JSObject::changeProperty(cx, obj, shape, attrs, - JSPROP_GETTER | JSPROP_SETTER, - (attrs & JSPROP_GETTER) - ? getter - : shape->getter(), - (attrs & JSPROP_SETTER) - ? setter - : shape->setter()); + shape = NativeObject::changeProperty(cx, obj, shape, attrs, + JSPROP_GETTER | JSPROP_SETTER, + (attrs & JSPROP_GETTER) + ? getter + : shape->getter(), + (attrs & JSPROP_SETTER) + ? setter + : shape->setter()); if (!shape) return false; shouldDefine = false; @@ -4392,7 +4401,7 @@ js::DefineNativeProperty(ExclusiveContext *cx, HandleObject obj, HandleId id, Ha * by accident, but we can't use NativeLookupOwnProperty in this case, because of resolve * loops. */ - shape = obj->nativeLookup(cx, id); + shape = obj->lookup(cx, id); if (shape && shape->isDataDescriptor()) attrs = ApplyOrDefaultAttributes(attrs, shape); } else { @@ -4415,17 +4424,17 @@ js::DefineNativeProperty(ExclusiveContext *cx, HandleObject obj, HandleId id, Ha */ return true; } - if (!JSObject::sparsifyDenseElement(cx, obj, JSID_TO_INT(id))) + if (!NativeObject::sparsifyDenseElement(cx, obj, JSID_TO_INT(id))) return false; - shape = obj->nativeLookup(cx, id); + shape = obj->lookup(cx, id); } attrs = ApplyOrDefaultAttributes(attrs, shape); /* Keep everything from the shape that isn't the things we're changing */ unsigned attrMask = ~(JSPROP_ENUMERATE | JSPROP_READONLY | JSPROP_PERMANENT); - shape = JSObject::changeProperty(cx, obj, shape, attrs, attrMask, - shape->getter(), shape->setter()); + shape = NativeObject::changeProperty(cx, obj, shape, attrs, attrMask, + shape->getter(), shape->setter()); if (!shape) return false; if (shape->hasSlot()) @@ -4485,7 +4494,7 @@ js::DefineNativeProperty(ExclusiveContext *cx, HandleObject obj, HandleId id, Ha * *recursedp = false and return true. */ static MOZ_ALWAYS_INLINE bool -CallResolveOp(JSContext *cx, HandleObject obj, HandleId id, MutableHandleObject objp, +CallResolveOp(JSContext *cx, HandleNativeObject obj, HandleId id, MutableHandleObject objp, MutableHandleShape propp, bool *recursedp) { const Class *clasp = obj->getClass(); @@ -4538,13 +4547,15 @@ CallResolveOp(JSContext *cx, HandleObject obj, HandleId id, MutableHandleObject objp.set(obj); } - if (JSID_IS_INT(id) && objp->containsDenseElement(JSID_TO_INT(id))) { + NativeObject *nobjp = &objp->as(); + + if (JSID_IS_INT(id) && nobjp->containsDenseElement(JSID_TO_INT(id))) { MarkDenseOrTypedArrayElementFound(propp); return true; } Shape *shape; - if (!objp->nativeEmpty() && (shape = objp->nativeLookup(cx, id))) + if (!nobjp->empty() && (shape = nobjp->lookup(cx, id))) propp.set(shape); else objp.set(nullptr); @@ -4555,7 +4566,7 @@ CallResolveOp(JSContext *cx, HandleObject obj, HandleId id, MutableHandleObject template static MOZ_ALWAYS_INLINE bool LookupOwnPropertyInline(ExclusiveContext *cx, - typename MaybeRooted::HandleType obj, + typename MaybeRooted::HandleType obj, typename MaybeRooted::HandleType id, typename MaybeRooted::MutableHandleType objp, typename MaybeRooted::MutableHandleType propp, @@ -4588,7 +4599,7 @@ LookupOwnPropertyInline(ExclusiveContext *cx, } // Check for a native property. - if (Shape *shape = obj->nativeLookup(cx, id)) { + if (Shape *shape = obj->lookup(cx, id)) { objp.set(obj); propp.set(shape); *donep = true; @@ -4602,7 +4613,7 @@ LookupOwnPropertyInline(ExclusiveContext *cx, bool recursed; if (!CallResolveOp(cx->asJSContext(), - MaybeRooted::toHandle(obj), + MaybeRooted::toHandle(obj), MaybeRooted::toHandle(id), MaybeRooted::toMutableHandle(objp), MaybeRooted::toMutableHandle(propp), @@ -4629,7 +4640,7 @@ LookupOwnPropertyInline(ExclusiveContext *cx, } static bool -NativeLookupOwnProperty(ExclusiveContext *cx, HandleObject obj, HandleId id, +NativeLookupOwnProperty(ExclusiveContext *cx, HandleNativeObject obj, HandleId id, MutableHandle shapep) { RootedObject pobj(cx); @@ -4645,7 +4656,7 @@ NativeLookupOwnProperty(ExclusiveContext *cx, HandleObject obj, HandleId id, template static MOZ_ALWAYS_INLINE bool LookupPropertyInline(ExclusiveContext *cx, - typename MaybeRooted::HandleType obj, + typename MaybeRooted::HandleType obj, typename MaybeRooted::HandleType id, typename MaybeRooted::MutableHandleType objp, typename MaybeRooted::MutableHandleType propp) @@ -4656,7 +4667,7 @@ LookupPropertyInline(ExclusiveContext *cx, */ /* Search scopes starting with obj and following the prototype link. */ - typename MaybeRooted::RootType current(cx, obj); + typename MaybeRooted::RootType current(cx, obj); while (true) { bool done; @@ -4679,7 +4690,7 @@ LookupPropertyInline(ExclusiveContext *cx, MaybeRooted::toMutableHandle(propp)); } - current = proto; + current = &proto->template as(); } objp.set(nullptr); @@ -4690,7 +4701,7 @@ LookupPropertyInline(ExclusiveContext *cx, template bool baseops::LookupProperty(ExclusiveContext *cx, - typename MaybeRooted::HandleType obj, + typename MaybeRooted::HandleType obj, typename MaybeRooted::HandleType id, typename MaybeRooted::MutableHandleType objp, typename MaybeRooted::MutableHandleType propp) @@ -4699,11 +4710,11 @@ baseops::LookupProperty(ExclusiveContext *cx, } template bool -baseops::LookupProperty(ExclusiveContext *cx, HandleObject obj, HandleId id, +baseops::LookupProperty(ExclusiveContext *cx, HandleNativeObject obj, HandleId id, MutableHandleObject objp, MutableHandleShape propp); template bool -baseops::LookupProperty(ExclusiveContext *cx, JSObject *obj, jsid id, +baseops::LookupProperty(ExclusiveContext *cx, NativeObject *obj, jsid id, FakeMutableHandle objp, FakeMutableHandle propp); @@ -4718,11 +4729,11 @@ JSObject::lookupGeneric(JSContext *cx, HandleObject obj, js::HandleId id, LookupGenericOp op = obj->getOps()->lookupGeneric; if (op) return op(cx, obj, id, objp, propp); - return baseops::LookupProperty(cx, obj, id, objp, propp); + return baseops::LookupProperty(cx, obj.as(), id, objp, propp); } bool -baseops::LookupElement(JSContext *cx, HandleObject obj, uint32_t index, +baseops::LookupElement(JSContext *cx, HandleNativeObject obj, uint32_t index, MutableHandleObject objp, MutableHandleShape propp) { RootedId id(cx); @@ -4733,7 +4744,7 @@ baseops::LookupElement(JSContext *cx, HandleObject obj, uint32_t index, } bool -js::LookupNativeProperty(ExclusiveContext *cx, HandleObject obj, HandleId id, +js::LookupNativeProperty(ExclusiveContext *cx, HandleNativeObject obj, HandleId id, MutableHandleObject objp, MutableHandleShape propp) { return LookupPropertyInline(cx, obj, id, objp, propp); @@ -4771,7 +4782,7 @@ js::LookupNameNoGC(JSContext *cx, PropertyName *name, JSObject *scopeChain, for (JSObject *scope = scopeChain; scope; scope = scope->enclosingScope()) { if (scope->getOps()->lookupGeneric) return false; - if (!LookupPropertyInline(cx, scope, NameToId(name), pobjp, propp)) + if (!LookupPropertyInline(cx, &scope->as(), NameToId(name), pobjp, propp)) return false; if (*propp) { *objp = scope; @@ -4851,8 +4862,11 @@ js::HasOwnProperty(JSContext *cx, LookupGenericOp lookup, return false; } } else { + typename MaybeRooted::HandleType nobj = + MaybeRooted::template downcastHandle(obj); + bool done; - if (!LookupOwnPropertyInline(cx, obj, id, objp, propp, &done)) + if (!LookupOwnPropertyInline(cx, nobj, id, objp, propp, &done)) return false; if (!done) { objp.set(nullptr); @@ -4908,14 +4922,12 @@ static MOZ_ALWAYS_INLINE bool NativeGetInline(JSContext *cx, typename MaybeRooted::HandleType obj, typename MaybeRooted::HandleType receiver, - typename MaybeRooted::HandleType pobj, + typename MaybeRooted::HandleType pobj, typename MaybeRooted::HandleType shape, typename MaybeRooted::MutableHandleType vp) { - MOZ_ASSERT(pobj->isNative()); - if (shape->hasSlot()) { - vp.set(pobj->nativeGetSlot(shape->slot())); + vp.set(pobj->getSlot(shape->slot())); MOZ_ASSERT_IF(!vp.isMagic(JS_UNINITIALIZED_LEXICAL) && !pobj->hasSingletonType() && !pobj->template is() && @@ -4956,15 +4968,15 @@ NativeGetInline(JSContext *cx, } /* Update slotful shapes according to the value produced by the getter. */ - if (shape->hasSlot() && pobj->nativeContains(cx, shape)) - pobj->nativeSetSlot(shape->slot(), vp); + if (shape->hasSlot() && pobj->contains(cx, shape)) + pobj->setSlot(shape->slot(), vp); return true; } bool -js::NativeGet(JSContext *cx, Handle obj, Handle pobj, Handle shape, - MutableHandle vp) +js::NativeGet(JSContext *cx, HandleObject obj, HandleNativeObject pobj, HandleShape shape, + MutableHandleValue vp) { return NativeGetInline(cx, obj, obj, pobj, shape, vp); } @@ -4972,7 +4984,7 @@ js::NativeGet(JSContext *cx, Handle obj, Handle pobj, Hand template bool js::NativeSet(typename ExecutionModeTraits::ContextType cxArg, - Handle obj, Handle receiver, + HandleNativeObject obj, Handle receiver, HandleShape shape, bool strict, MutableHandleValue vp) { MOZ_ASSERT(cxArg->isThreadLocal(obj)); @@ -4982,14 +4994,14 @@ js::NativeSet(typename ExecutionModeTraits::ContextType cxArg, /* If shape has a stub setter, just store vp. */ if (shape->hasDefaultSetter()) { if (mode == ParallelExecution) { - if (!obj->nativeSetSlotIfHasType(shape, vp)) + if (!obj->setSlotIfHasType(shape, vp)) return false; } else { // Global properties declared with 'var' will be initially // defined with an undefined value, so don't treat the initial // assignments to such properties as overwrites. - bool overwriting = !obj->is() || !obj->nativeGetSlot(shape->slot()).isUndefined(); - obj->nativeSetSlotWithType(cxArg->asExclusiveContext(), shape, vp, overwriting); + bool overwriting = !obj->is() || !obj->getSlot(shape->slot()).isUndefined(); + obj->setSlotWithType(cxArg->asExclusiveContext(), shape, vp, overwriting); } return true; @@ -5023,7 +5035,7 @@ js::NativeSet(typename ExecutionModeTraits::ContextType cxArg, */ if (shape->hasSlot() && (MOZ_LIKELY(cx->runtime()->propertyRemovals == sample) || - obj->nativeContains(cx, shape))) + obj->contains(cx, shape))) { obj->setSlot(shape->slot(), vp); } @@ -5033,17 +5045,17 @@ js::NativeSet(typename ExecutionModeTraits::ContextType cxArg, template bool js::NativeSet(JSContext *cx, - Handle obj, Handle receiver, + HandleNativeObject obj, HandleObject receiver, HandleShape shape, bool strict, MutableHandleValue vp); template bool js::NativeSet(ForkJoinContext *cx, - Handle obj, Handle receiver, + HandleNativeObject obj, HandleObject receiver, HandleShape shape, bool strict, MutableHandleValue vp); template static MOZ_ALWAYS_INLINE bool GetPropertyHelperInline(JSContext *cx, - typename MaybeRooted::HandleType obj, + typename MaybeRooted::HandleType obj, typename MaybeRooted::HandleType receiver, typename MaybeRooted::HandleType id, typename MaybeRooted::MutableHandleType vp) @@ -5139,39 +5151,42 @@ GetPropertyHelperInline(JSContext *cx, : JSObject::getGeneric(cx, obj2Handle, obj2Handle, idHandle, vpHandle); } + typename MaybeRooted::HandleType nobj2 = + MaybeRooted::template downcastHandle(obj2); + if (IsImplicitDenseOrTypedArrayElement(shape)) { - vp.set(obj2->getDenseOrTypedArrayElement(JSID_TO_INT(id))); + vp.set(nobj2->getDenseOrTypedArrayElement(JSID_TO_INT(id))); return true; } /* This call site is hot -- use the always-inlined variant of NativeGet(). */ - if (!NativeGetInline(cx, obj, receiver, obj2, shape, vp)) + if (!NativeGetInline(cx, obj, receiver, nobj2, shape, vp)) return false; return true; } bool -baseops::GetProperty(JSContext *cx, HandleObject obj, HandleObject receiver, HandleId id, MutableHandleValue vp) +baseops::GetProperty(JSContext *cx, HandleNativeObject obj, HandleObject receiver, HandleId id, MutableHandleValue vp) { /* This call site is hot -- use the always-inlined variant of GetPropertyHelper(). */ return GetPropertyHelperInline(cx, obj, receiver, id, vp); } bool -baseops::GetPropertyNoGC(JSContext *cx, JSObject *obj, JSObject *receiver, jsid id, Value *vp) +baseops::GetPropertyNoGC(JSContext *cx, NativeObject *obj, JSObject *receiver, jsid id, Value *vp) { AutoAssertNoException nogc(cx); return GetPropertyHelperInline(cx, obj, receiver, id, vp); } static MOZ_ALWAYS_INLINE bool -LookupPropertyPureInline(JSObject *obj, jsid id, JSObject **objp, Shape **propp) +LookupPropertyPureInline(JSObject *obj, jsid id, NativeObject **objp, Shape **propp) { if (!obj->isNative()) return false; - JSObject *current = obj; + NativeObject *current = &obj->as(); while (true) { /* Search for a native dense element, typed array element, or property. */ @@ -5195,7 +5210,7 @@ LookupPropertyPureInline(JSObject *obj, jsid id, JSObject **objp, Shape **propp) } } - if (Shape *shape = current->nativeLookupPure(id)) { + if (Shape *shape = current->lookupPure(id)) { *objp = current; *propp = shape; return true; @@ -5212,7 +5227,7 @@ LookupPropertyPureInline(JSObject *obj, jsid id, JSObject **objp, Shape **propp) if (!proto->isNative()) return false; - current = proto; + current = &proto->as(); } *objp = nullptr; @@ -5221,12 +5236,10 @@ LookupPropertyPureInline(JSObject *obj, jsid id, JSObject **objp, Shape **propp) } static MOZ_ALWAYS_INLINE bool -NativeGetPureInline(JSObject *pobj, Shape *shape, Value *vp) +NativeGetPureInline(NativeObject *pobj, Shape *shape, Value *vp) { - MOZ_ASSERT(pobj->isNative()); - if (shape->hasSlot()) { - *vp = pobj->nativeGetSlot(shape->slot()); + *vp = pobj->getSlot(shape->slot()); MOZ_ASSERT(!vp->isMagic()); } else { vp->setUndefined(); @@ -5237,7 +5250,7 @@ NativeGetPureInline(JSObject *pobj, Shape *shape, Value *vp) } bool -js::LookupPropertyPure(JSObject *obj, jsid id, JSObject **objp, Shape **propp) +js::LookupPropertyPure(JSObject *obj, jsid id, NativeObject **objp, Shape **propp) { return LookupPropertyPureInline(obj, id, objp, propp); } @@ -5256,7 +5269,7 @@ bool js::GetPropertyPure(ThreadSafeContext *cx, JSObject *obj, jsid id, Value *vp) { /* Deal with native objects. */ - JSObject *obj2; + NativeObject *obj2; Shape *shape; if (!LookupPropertyPureInline(obj, id, &obj2, &shape)) return false; @@ -5329,7 +5342,7 @@ js::GetObjectElementOperationPure(ThreadSafeContext *cx, JSObject *obj, const Va } bool -baseops::GetElement(JSContext *cx, HandleObject obj, HandleObject receiver, uint32_t index, +baseops::GetElement(JSContext *cx, HandleNativeObject obj, HandleObject receiver, uint32_t index, MutableHandleValue vp) { RootedId id(cx); @@ -5424,7 +5437,7 @@ JSObject::callMethod(JSContext *cx, HandleId id, unsigned argc, Value *argv, Mut template bool baseops::SetPropertyHelper(typename ExecutionModeTraits::ContextType cxArg, - HandleObject obj, HandleObject receiver, HandleId id, + HandleNativeObject obj, HandleObject receiver, HandleId id, QualifiedBool qualified, MutableHandleValue vp, bool strict) { MOZ_ASSERT(cxArg->isThreadLocal(obj)); @@ -5443,8 +5456,10 @@ baseops::SetPropertyHelper(typename ExecutionModeTraits::ContextType cxArg RootedObject pobj(cxArg); RootedShape shape(cxArg); if (mode == ParallelExecution) { - if (!LookupPropertyPure(obj, id, pobj.address(), shape.address())) + NativeObject *npobj; + if (!LookupPropertyPure(obj, id, &npobj, shape.address())) return false; + pobj = npobj; } else { JSContext *cx = cxArg->asJSContext(); if (!LookupNativeProperty(cx, obj, id, &pobj, &shape)) @@ -5676,18 +5691,18 @@ baseops::SetPropertyHelper(typename ExecutionModeTraits::ContextType cxArg } template bool -baseops::SetPropertyHelper(JSContext *cx, HandleObject obj, +baseops::SetPropertyHelper(JSContext *cx, HandleNativeObject obj, HandleObject receiver, HandleId id, QualifiedBool qualified, MutableHandleValue vp, bool strict); template bool -baseops::SetPropertyHelper(ForkJoinContext *cx, HandleObject obj, +baseops::SetPropertyHelper(ForkJoinContext *cx, HandleNativeObject obj, HandleObject receiver, HandleId id, QualifiedBool qualified, MutableHandleValue vp, bool strict); bool -baseops::SetElementHelper(JSContext *cx, HandleObject obj, HandleObject receiver, uint32_t index, +baseops::SetElementHelper(JSContext *cx, HandleNativeObject obj, HandleObject receiver, uint32_t index, MutableHandleValue vp, bool strict) { RootedId id(cx); @@ -5698,7 +5713,7 @@ baseops::SetElementHelper(JSContext *cx, HandleObject obj, HandleObject receiver } bool -baseops::GetAttributes(JSContext *cx, HandleObject obj, HandleId id, unsigned *attrsp) +baseops::GetAttributes(JSContext *cx, HandleNativeObject obj, HandleId id, unsigned *attrsp) { RootedObject nobj(cx); RootedShape shape(cx); @@ -5716,7 +5731,7 @@ baseops::GetAttributes(JSContext *cx, HandleObject obj, HandleId id, unsigned *a } bool -baseops::SetAttributes(JSContext *cx, HandleObject obj, HandleId id, unsigned *attrsp) +baseops::SetAttributes(JSContext *cx, HandleNativeObject obj, HandleId id, unsigned *attrsp) { RootedObject nobj(cx); RootedShape shape(cx); @@ -5731,15 +5746,15 @@ baseops::SetAttributes(JSContext *cx, HandleObject obj, HandleId id, unsigned *a JS_ReportErrorNumber(cx, js_GetErrorMessage, nullptr, JSMSG_CANT_SET_ARRAY_ATTRS); return false; } - if (!JSObject::sparsifyDenseElement(cx, nobj, JSID_TO_INT(id))) + if (!NativeObject::sparsifyDenseElement(cx, nobj.as(), JSID_TO_INT(id))) return false; - shape = obj->nativeLookup(cx, id); + shape = nobj->as().lookup(cx, id); } if (nobj->isNative()) { - if (!JSObject::changePropertyAttributes(cx, nobj, shape, *attrsp)) + if (!NativeObject::changePropertyAttributes(cx, nobj.as(), shape, *attrsp)) return false; if (*attrsp & JSPROP_READONLY) - MarkTypePropertyNonWritable(cx, obj, id); + MarkTypePropertyNonWritable(cx, nobj, id); return true; } else { return JSObject::setGenericAttributes(cx, nobj, id, attrsp); @@ -5747,7 +5762,7 @@ baseops::SetAttributes(JSContext *cx, HandleObject obj, HandleId id, unsigned *a } bool -baseops::DeleteGeneric(JSContext *cx, HandleObject obj, HandleId id, bool *succeeded) +baseops::DeleteGeneric(JSContext *cx, HandleNativeObject obj, HandleId id, bool *succeeded) { RootedObject proto(cx); RootedShape shape(cx); @@ -5775,10 +5790,11 @@ baseops::DeleteGeneric(JSContext *cx, HandleObject obj, HandleId id, bool *succe if (!succeeded) return true; - if (!obj->maybeCopyElementsForWrite(cx)) + NativeObject *nobj = &obj->as(); + if (!nobj->maybeCopyElementsForWrite(cx)) return false; - obj->setDenseElementHole(cx, JSID_TO_INT(id)); + nobj->setDenseElementHole(cx, JSID_TO_INT(id)); return SuppressDeletedProperty(cx, obj, id); } @@ -5803,7 +5819,7 @@ js::WatchGuts(JSContext *cx, JS::HandleObject origObj, JS::HandleId id, JS::Hand if (obj->isNative()) { // Use sparse indexes for watched objects, as dense elements can be // written to without checking the watchpoint map. - if (!JSObject::sparsifyDenseElements(cx, obj)) + if (!NativeObject::sparsifyDenseElements(cx, obj.as())) return false; types::MarkTypePropertyNonData(cx, obj, id); @@ -5852,16 +5868,16 @@ baseops::Unwatch(JSContext *cx, JS::HandleObject obj, JS::HandleId id) } bool -js::HasDataProperty(JSContext *cx, JSObject *obj, jsid id, Value *vp) +js::HasDataProperty(JSContext *cx, NativeObject *obj, jsid id, Value *vp) { if (JSID_IS_INT(id) && obj->containsDenseElement(JSID_TO_INT(id))) { *vp = obj->getDenseElement(JSID_TO_INT(id)); return true; } - if (Shape *shape = obj->nativeLookup(cx, id)) { + if (Shape *shape = obj->lookup(cx, id)) { if (shape->hasDefaultGetter() && shape->hasSlot()) { - *vp = obj->nativeGetSlot(shape->slot()); + *vp = obj->getSlot(shape->slot()); return true; } } @@ -5902,8 +5918,9 @@ js::DefaultValue(JSContext *cx, HandleObject obj, JSType hint, MutableHandleValu /* Optimize (new String(...)).toString(). */ if (clasp == &StringObject::class_) { - if (ClassMethodIsNative(cx, obj, &StringObject::class_, id, js_str_toString)) { - vp.setString(obj->as().unbox()); + StringObject *nobj = &obj->as(); + if (ClassMethodIsNative(cx, nobj, &StringObject::class_, id, js_str_toString)) { + vp.setString(nobj->unbox()); return true; } } @@ -5923,8 +5940,9 @@ js::DefaultValue(JSContext *cx, HandleObject obj, JSType hint, MutableHandleValu /* Optimize new String(...).valueOf(). */ if (clasp == &StringObject::class_) { id = NameToId(cx->names().valueOf); - if (ClassMethodIsNative(cx, obj, &StringObject::class_, id, js_str_toString)) { - vp.setString(obj->as().unbox()); + StringObject *nobj = &obj->as(); + if (ClassMethodIsNative(cx, nobj, &StringObject::class_, id, js_str_toString)) { + vp.setString(nobj->unbox()); return true; } } @@ -5932,8 +5950,9 @@ js::DefaultValue(JSContext *cx, HandleObject obj, JSType hint, MutableHandleValu /* Optimize new Number(...).valueOf(). */ if (clasp == &NumberObject::class_) { id = NameToId(cx->names().valueOf); - if (ClassMethodIsNative(cx, obj, &NumberObject::class_, id, js_num_valueOf)) { - vp.setNumber(obj->as().unbox()); + NumberObject *nobj = &obj->as(); + if (ClassMethodIsNative(cx, nobj, &NumberObject::class_, id, js_num_valueOf)) { + vp.setNumber(nobj->unbox()); return true; } } @@ -6050,6 +6069,7 @@ js::FindClassPrototype(ExclusiveContext *cx, MutableHandleObject protop, const C return false; if (ctor && ctor->is()) { + JSFunction *nctor = &ctor->as(); RootedValue v(cx); if (cx->isJSContext()) { if (!JSObject::getProperty(cx->asJSContext(), @@ -6058,8 +6078,8 @@ js::FindClassPrototype(ExclusiveContext *cx, MutableHandleObject protop, const C return false; } } else { - Shape *shape = ctor->nativeLookup(cx, cx->names().prototype); - if (!shape || !NativeGetPureInline(ctor, shape, v.address())) + Shape *shape = nctor->lookup(cx, cx->names().prototype); + if (!shape || !NativeGetPureInline(nctor, shape, v.address())) return false; } if (v.isObject()) @@ -6265,7 +6285,7 @@ js_DumpId(jsid id) } static void -DumpProperty(JSObject *obj, Shape &shape) +DumpProperty(NativeObject *obj, Shape &shape) { jsid id = shape.propid(); uint8_t attrs = shape.attributes(); @@ -6330,20 +6350,22 @@ JSObject::dump() if (obj->hadElementsAccess()) fprintf(stderr, " had_elements_access"); if (obj->isNative()) { - if (obj->inDictionaryMode()) + NativeObject *nobj = &obj->as(); + if (nobj->inDictionaryMode()) fprintf(stderr, " inDictionaryMode"); - if (obj->hasShapeTable()) + if (nobj->hasShapeTable()) fprintf(stderr, " hasShapeTable"); } fprintf(stderr, "\n"); if (obj->isNative()) { - uint32_t slots = obj->getDenseInitializedLength(); + NativeObject *nobj = &obj->as(); + uint32_t slots = nobj->getDenseInitializedLength(); if (slots) { fprintf(stderr, "elements\n"); for (uint32_t i = 0; i < slots; i++) { fprintf(stderr, " %3d: ", i); - dumpValue(obj->getDenseElement(i)); + dumpValue(nobj->getDenseElement(i)); fprintf(stderr, "\n"); fflush(stderr); } @@ -6363,13 +6385,13 @@ JSObject::dump() fputc('\n', stderr); if (clasp->flags & JSCLASS_HAS_PRIVATE) - fprintf(stderr, "private %p\n", obj->getPrivate()); + fprintf(stderr, "private %p\n", obj->fakeNativeGetPrivate()); if (!obj->isNative()) fprintf(stderr, "not native\n"); uint32_t reservedEnd = JSCLASS_RESERVED_SLOTS(clasp); - uint32_t slots = obj->slotSpan(); + uint32_t slots = obj->fakeNativeSlotSpan(); uint32_t stop = obj->isNative() ? reservedEnd : slots; if (stop > 0) fprintf(stderr, obj->isNative() ? "reserved slots:\n" : "slots:\n"); @@ -6378,7 +6400,7 @@ JSObject::dump() if (i < reservedEnd) fprintf(stderr, "(reserved) "); fprintf(stderr, "= "); - dumpValue(obj->getSlot(i)); + dumpValue(obj->fakeNativeGetSlot(i)); fputc('\n', stderr); } @@ -6388,7 +6410,7 @@ JSObject::dump() for (Shape::Range r(obj->lastProperty()); !r.empty(); r.popFront()) props.append(&r.front()); for (size_t i = props.length(); i-- != 0;) - DumpProperty(obj, *props[i]); + DumpProperty(&obj->as(), *props[i]); } fputc('\n', stderr); } @@ -6504,11 +6526,11 @@ js_DumpBacktrace(JSContext *cx) void JSObject::addSizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf, JS::ClassInfo *info) { - if (hasDynamicSlots()) - info->objectsMallocHeapSlots += mallocSizeOf(slots); + if (fakeNativeHasDynamicSlots()) + info->objectsMallocHeapSlots += mallocSizeOf(fakeNativeSlots()); - if (hasDynamicElements()) { - js::ObjectElements *elements = getElementsHeader(); + if (fakeNativeHasDynamicElements()) { + js::ObjectElements *elements = fakeNativeGetElementsHeader(); if (!elements->isCopyOnWrite() || elements->ownerObject() == this) info->objectsMallocHeapElementsNonAsmJS += mallocSizeOf(elements); } diff --git a/js/src/jsobj.h b/js/src/jsobj.h index 82499bef1e7..84758fbd3ee 100644 --- a/js/src/jsobj.h +++ b/js/src/jsobj.h @@ -22,8 +22,8 @@ #include "gc/Marking.h" #include "js/GCAPI.h" #include "js/HeapAPI.h" -#include "vm/ObjectImpl.h" #include "vm/Shape.h" +#include "vm/String.h" #include "vm/Xdr.h" namespace JS { @@ -66,99 +66,6 @@ CastAsObjectJsval(StrictPropertyOp op) typedef Vector PropDescArray; -/* - * The baseops namespace encapsulates the default behavior when performing - * various operations on an object, irrespective of hooks installed in the - * object's class. In general, instance methods on the object itself should be - * called instead of calling these methods directly. - */ -namespace baseops { - -/* - * On success, and if id was found, return true with *objp non-null and with a - * property of *objp stored in *propp. If successful but id was not found, - * return true with both *objp and *propp null. - */ -template -extern bool -LookupProperty(ExclusiveContext *cx, - typename MaybeRooted::HandleType obj, - typename MaybeRooted::HandleType id, - typename MaybeRooted::MutableHandleType objp, - typename MaybeRooted::MutableHandleType propp); - -extern bool -LookupElement(JSContext *cx, HandleObject obj, uint32_t index, - MutableHandleObject objp, MutableHandleShape propp); - -extern bool -DefineGeneric(ExclusiveContext *cx, HandleObject obj, HandleId id, HandleValue value, - JSPropertyOp getter, JSStrictPropertyOp setter, unsigned attrs); - -extern bool -DefineElement(ExclusiveContext *cx, HandleObject obj, uint32_t index, HandleValue value, - JSPropertyOp getter, JSStrictPropertyOp setter, unsigned attrs); - -extern bool -GetProperty(JSContext *cx, HandleObject obj, HandleObject receiver, HandleId id, MutableHandleValue vp); - -extern bool -GetPropertyNoGC(JSContext *cx, JSObject *obj, JSObject *receiver, jsid id, Value *vp); - -extern bool -GetElement(JSContext *cx, HandleObject obj, HandleObject receiver, uint32_t index, MutableHandleValue vp); - -inline bool -GetProperty(JSContext *cx, HandleObject obj, HandleId id, MutableHandleValue vp) -{ - return GetProperty(cx, obj, obj, id, vp); -} - -inline bool -GetElement(JSContext *cx, HandleObject obj, uint32_t index, MutableHandleValue vp) -{ - return GetElement(cx, obj, obj, index, vp); -} - -/* - * Indicates whether an assignment operation is qualified (`x.y = 0`) or - * unqualified (`y = 0`). In strict mode, the latter is an error if no such - * variable already exists. - * - * Used as an argument to baseops::SetPropertyHelper. - */ -enum QualifiedBool { - Unqualified = 0, - Qualified = 1 -}; - -template -extern bool -SetPropertyHelper(typename ExecutionModeTraits::ContextType cx, HandleObject obj, - HandleObject receiver, HandleId id, QualifiedBool qualified, - MutableHandleValue vp, bool strict); - -extern bool -SetElementHelper(JSContext *cx, HandleObject obj, HandleObject Receiver, uint32_t index, - MutableHandleValue vp, bool strict); - -extern bool -GetAttributes(JSContext *cx, HandleObject obj, HandleId id, unsigned *attrsp); - -extern bool -SetAttributes(JSContext *cx, HandleObject obj, HandleId id, unsigned *attrsp); - -extern bool -DeleteGeneric(JSContext *cx, HandleObject obj, HandleId id, bool *succeeded); - -extern bool -Watch(JSContext *cx, JS::HandleObject obj, JS::HandleId id, JS::HandleObject callable); - -extern bool -Unwatch(JSContext *cx, JS::HandleObject obj, JS::HandleId id); - -} /* namespace js::baseops */ - extern const Class IntlClass; extern const Class JSONClass; extern const Class MathClass; @@ -194,17 +101,76 @@ class ForkJoinNursery; } /* namespace js */ /* - * The public interface for an object. + * A JavaScript object. The members common to all objects are as follows: * - * Implementation of the underlying structure occurs in ObjectImpl, from which - * this struct inherits. This inheritance is currently public, but it will - * eventually be made protected. For full details, see vm/ObjectImpl.{h,cpp}. + * - The |shape_| member stores the shape of the object, which includes the + * object's class and the layout of all its properties. * - * The JSFunction struct is an extension of this struct allocated from a larger - * GC size-class. + * - The |type_| member stores the type of the object, which contains its + * prototype object and the possible types of its properties. + * + * Subclasses of JSObject --- mainly NativeObject and JSFunction --- add more + * members. */ -class JSObject : public js::ObjectImpl +class JSObject : public js::gc::Cell { + protected: + /* + * Shape of the object, encodes the layout of the object's properties and + * all other information about its structure. See vm/Shape.h. + */ + js::HeapPtrShape shape_; + + /* + * The object's type and prototype. For objects with the LAZY_TYPE flag + * set, this is the prototype's default 'new' type and can only be used + * to get that prototype. + */ + js::HeapPtrTypeObject type_; + + // FIXME bug 1073842: this is temporary until these members are no longer + // accessed by non-native objects. + js::HeapSlot *slots; /* Slots for object properties. */ + js::HeapSlot *elements; /* Slots for object elements. */ + + public: + // Methods for accessing slots/elements storage in objects that might be + // non-native. These will be removed soon as part of bug 1073842. + inline void *fakeNativeGetPrivate() const; + inline void *fakeNativeGetPrivate(uint32_t nfixed) const; + inline void fakeNativeSetPrivate(void *data); + inline bool fakeNativeHasPrivate() const; + inline void fakeNativeInitPrivate(void *data); + inline void *&fakeNativePrivateRef(uint32_t nfixed) const; + inline uint32_t fakeNativeSlotSpan(); + inline const js::Value &fakeNativeGetSlot(uint32_t slot); + inline void fakeNativeSetSlot(uint32_t slot, const js::Value &value); + inline js::HeapSlot &fakeNativeGetSlotRef(uint32_t slot); + inline const js::Value &fakeNativeGetReservedSlot(uint32_t slot) const; + inline js::HeapSlot &fakeNativeGetReservedSlotRef(uint32_t slot); + inline void fakeNativeSetReservedSlot(uint32_t slot, const js::Value &value); + inline void fakeNativeInitReservedSlot(uint32_t slot, const js::Value &value); + inline void fakeNativeSetCrossCompartmentSlot(uint32_t slot, const js::Value &value); + inline void fakeNativeInitCrossCompartmentSlot(uint32_t slot, const js::Value &value); + inline void fakeNativeSetInitialSlots(js::HeapSlot *newSlots); + inline bool fakeNativeHasDynamicSlots() const; + inline uint32_t fakeNativeNumFixedSlots() const; + inline uint32_t fakeNativeNumDynamicSlots() const; + inline js::HeapSlot *&fakeNativeSlots(); + inline void fakeNativeInitSlot(uint32_t slot, const js::Value &value); + inline void fakeNativeInitSlotRange(uint32_t start, const js::Value *vector, uint32_t length); + inline void fakeNativeInitializeSlotRange(uint32_t start, uint32_t count); + inline bool fakeNativeHasDynamicElements() const; + inline bool fakeNativeHasEmptyElements() const; + inline js::HeapSlotArray fakeNativeGetDenseElements(); + inline bool fakeNativeDenseElementsAreCopyOnWrite(); + inline js::ObjectElements *fakeNativeGetElementsHeader() const; + inline js::HeapSlot *&fakeNativeElements(); + inline uint8_t *fakeNativeFixedData(size_t nslots) const; + inline const js::Value &fakeNativeGetDenseElement(uint32_t idx); + inline uint32_t fakeNativeGetDenseInitializedLength(); + inline const js::HeapSlot *fakeNativeGetSlotAddressUnchecked(uint32_t slot) const; + private: friend class js::Shape; friend class js::GCMarker; @@ -218,18 +184,56 @@ class JSObject : public js::ObjectImpl public: static const js::Class class_; - /* - * Update the last property, keeping the number of allocated slots in sync - * with the object's new slot span. - */ - static bool setLastProperty(js::ThreadSafeContext *cx, - JS::HandleObject obj, js::HandleShape shape); + js::Shape * lastProperty() const { + MOZ_ASSERT(shape_); + return shape_; + } - // As for setLastProperty(), but allows the number of fixed slots to - // change. This can only be used when fixed slots are being erased from the - // object, and only when the object will not require dynamic slots to cover - // the new properties. - void setLastPropertyShrinkFixedSlots(js::Shape *shape); + bool isNative() const { + return lastProperty()->isNative(); + } + + const js::Class *getClass() const { + return type_->clasp(); + } + const JSClass *getJSClass() const { + return Jsvalify(getClass()); + } + bool hasClass(const js::Class *c) const { + return getClass() == c; + } + const js::ObjectOps *getOps() const { + return &getClass()->ops; + } + + js::types::TypeObject *type() const { + MOZ_ASSERT(!hasLazyType()); + return typeRaw(); + } + + js::types::TypeObject *typeRaw() const { + return type_; + } + + /* + * Whether this is the only object which has its specified type. This + * object will have its type constructed lazily as needed by analysis. + */ + bool hasSingletonType() const { + return !!type_->singleton(); + } + + /* + * Whether the object's type has not been constructed yet. If an object + * might have a lazy type, use getType() below, otherwise type(). + */ + bool hasLazyType() const { + return type_->lazy(); + } + + JSCompartment *compartment() const { + return lastProperty()->base()->compartment(); + } /* * Make a non-array object with the specified initial state. This method @@ -241,60 +245,29 @@ class JSObject : public js::ObjectImpl js::HandleShape shape, js::HandleTypeObject type); - static inline JSObject *copy(js::ExclusiveContext *cx, - js::gc::AllocKind kind, - js::gc::InitialHeap heap, - js::HandleObject templateObject); + protected: + enum GenerateShape { + GENERATE_NONE, + GENERATE_SHAPE + }; - /* Make an array object with the specified initial state. */ - static inline js::ArrayObject *createArray(js::ExclusiveContext *cx, - js::gc::AllocKind kind, - js::gc::InitialHeap heap, - js::HandleShape shape, - js::HandleTypeObject type, - uint32_t length); - - /* Make an array object with the specified initial state and elements. */ - static inline js::ArrayObject *createArray(js::ExclusiveContext *cx, - js::gc::InitialHeap heap, - js::HandleShape shape, - js::HandleTypeObject type, - js::HeapSlot *elements); - - /* Make an copy-on-write array object which shares the elements of an existing object. */ - static inline js::ArrayObject *createCopyOnWriteArray(js::ExclusiveContext *cx, - js::gc::InitialHeap heap, - js::HandleShape shape, - js::HandleObject sharedElementsOwner); - - private: - // Helper for the above two methods. - static inline JSObject * - createArrayInternal(js::ExclusiveContext *cx, js::gc::AllocKind kind, js::gc::InitialHeap heap, - js::HandleShape shape, js::HandleTypeObject type); - - static inline js::ArrayObject *finishCreateArray(JSObject *obj, - js::HandleShape shape); - public: - - /* - * Remove the last property of an object, provided that it is safe to do so - * (the shape and previous shape do not carry conflicting information about - * the object itself). - */ - inline void removeLastProperty(js::ExclusiveContext *cx); - inline bool canRemoveLastProperty(); - - /* - * Update the slot span directly for a dictionary object, and allocate - * slots to cover the new span if necessary. - */ - static bool setSlotSpan(js::ThreadSafeContext *cx, JS::HandleObject obj, uint32_t span); - - /* Upper bound on the number of elements in an object. */ - static const uint32_t NELEMENTS_LIMIT = JS_BIT(28); + bool setFlag(js::ExclusiveContext *cx, /*BaseShape::Flag*/ uint32_t flag, + GenerateShape generateShape = GENERATE_NONE); public: + /* + * An object is a delegate if it is on another object's prototype or scope + * chain, and therefore the delegate might be asked implicitly to get or + * set a property on behalf of another object. Delegates may be accessed + * directly too, as may any object, but only those objects linked after the + * head of any prototype or scope chain are flagged as delegates. This + * definition helps to optimize shape-based property cache invalidation + * (see Purge{Scope,Proto}Chain in jsobj.cpp). + */ + bool isDelegate() const { + return lastProperty()->hasObjectFlag(js::BaseShape::DELEGATE); + } + bool setDelegate(js::ExclusiveContext *cx) { return setFlag(cx, js::BaseShape::DELEGATE, GENERATE_SHAPE); } @@ -347,13 +320,6 @@ class JSObject : public js::ObjectImpl return setFlag(cx, js::BaseShape::HAD_ELEMENTS_ACCESS); } - public: - bool nativeEmpty() const { - return lastProperty()->isEmptyShape(); - } - - bool shadowingShapeChange(js::ExclusiveContext *cx, const js::Shape &shape); - /* * Whether there may be indexed properties on this object, excluding any in * the object's elements. @@ -370,101 +336,43 @@ class JSObject : public js::ObjectImpl return lastProperty()->hasTable(); } + /* GC support. */ + + void markChildren(JSTracer *trc); + + void fixupAfterMovingGC(); + + static js::ThingRootKind rootKind() { return js::THING_ROOT_OBJECT; } + static const size_t MaxTagBits = 3; + static bool isNullLike(const JSObject *obj) { return uintptr_t(obj) < (1 << MaxTagBits); } + + MOZ_ALWAYS_INLINE JS::Zone *zone() const { + return shape_->zone(); + } + MOZ_ALWAYS_INLINE JS::shadow::Zone *shadowZone() const { + return JS::shadow::Zone::asShadowZone(zone()); + } + MOZ_ALWAYS_INLINE JS::Zone *zoneFromAnyThread() const { + return shape_->zoneFromAnyThread(); + } + MOZ_ALWAYS_INLINE JS::shadow::Zone *shadowZoneFromAnyThread() const { + return JS::shadow::Zone::asShadowZone(zoneFromAnyThread()); + } + static MOZ_ALWAYS_INLINE void readBarrier(JSObject *obj); + static MOZ_ALWAYS_INLINE void writeBarrierPre(JSObject *obj); + static MOZ_ALWAYS_INLINE void writeBarrierPost(JSObject *obj, void *cellp); + static MOZ_ALWAYS_INLINE void writeBarrierPostRelocate(JSObject *obj, void *cellp); + static MOZ_ALWAYS_INLINE void writeBarrierPostRemove(JSObject *obj, void *cellp); + + size_t tenuredSizeOfThis() const { + MOZ_ASSERT(isTenured()); + return js::gc::Arena::thingSize(asTenured().getAllocKind()); + } + void addSizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf, JS::ClassInfo *info); bool hasIdempotentProtoChain() const; - // MAX_FIXED_SLOTS is the biggest number of fixed slots our GC - // size classes will give an object. - static const uint32_t MAX_FIXED_SLOTS = 16; - - public: - - /* Accessors for properties. */ - - /* Whether a slot is at a fixed offset from this object. */ - bool isFixedSlot(size_t slot) { - return slot < numFixedSlots(); - } - - /* Index into the dynamic slots array to use for a dynamic slot. */ - size_t dynamicSlotIndex(size_t slot) { - MOZ_ASSERT(slot >= numFixedSlots()); - return slot - numFixedSlots(); - } - - /* - * Grow or shrink slots immediately before changing the slot span. - * The number of allocated slots is not stored explicitly, and changes to - * the slots must track changes in the slot span. - */ - static bool growSlots(js::ThreadSafeContext *cx, js::HandleObject obj, uint32_t oldCount, - uint32_t newCount); - static void shrinkSlots(js::ThreadSafeContext *cx, js::HandleObject obj, uint32_t oldCount, - uint32_t newCount); - - bool hasDynamicSlots() const { return !!slots; } - - protected: - static inline bool updateSlotsForSpan(js::ThreadSafeContext *cx, - js::HandleObject obj, size_t oldSpan, size_t newSpan); - - public: - /* - * Trigger the write barrier on a range of slots that will no longer be - * reachable. - */ - void prepareSlotRangeForOverwrite(size_t start, size_t end) { - for (size_t i = start; i < end; i++) - getSlotAddressUnchecked(i)->js::HeapSlot::~HeapSlot(); - } - - void prepareElementRangeForOverwrite(size_t start, size_t end) { - MOZ_ASSERT(end <= getDenseInitializedLength()); - MOZ_ASSERT(!denseElementsAreCopyOnWrite()); - for (size_t i = start; i < end; i++) - elements[i].js::HeapSlot::~HeapSlot(); - } - - static bool rollbackProperties(js::ExclusiveContext *cx, js::HandleObject obj, - uint32_t slotSpan); - - void nativeSetSlot(uint32_t slot, const js::Value &value) { - MOZ_ASSERT(isNative()); - MOZ_ASSERT(slot < slotSpan()); - return setSlot(slot, value); - } - - inline bool nativeSetSlotIfHasType(js::Shape *shape, const js::Value &value, - bool overwriting = true); - inline void nativeSetSlotWithType(js::ExclusiveContext *cx, js::Shape *shape, - const js::Value &value, bool overwriting = true); - - inline const js::Value &getReservedSlot(uint32_t index) const { - MOZ_ASSERT(index < JSSLOT_FREE(getClass())); - return getSlot(index); - } - - const js::HeapSlot &getReservedSlotRef(uint32_t index) const { - MOZ_ASSERT(index < JSSLOT_FREE(getClass())); - return getSlotRef(index); - } - - js::HeapSlot &getReservedSlotRef(uint32_t index) { - MOZ_ASSERT(index < JSSLOT_FREE(getClass())); - return getSlotRef(index); - } - - void initReservedSlot(uint32_t index, const js::Value &v) { - MOZ_ASSERT(index < JSSLOT_FREE(getClass())); - initSlot(index, v); - } - - void setReservedSlot(uint32_t index, const js::Value &v) { - MOZ_ASSERT(index < JSSLOT_FREE(getClass())); - setSlot(index, v); - } - /* * Marks this object as having a singleton type, and leave the type lazy. * Constructs a new, unique shape for the object. @@ -492,6 +400,12 @@ class JSObject : public js::ObjectImpl * If obj is a proxy and the proto is lazy, this code may allocate or * GC in order to compute the proto. Currently, it will not run JS code. */ + + js::TaggedProto getTaggedProto() const { + return type_->proto(); + } + bool hasTenuredProto() const; + bool uninlinedIsProxy() const; JSObject *getProto() const { MOZ_ASSERT(!uninlinedIsProxy()); @@ -597,6 +511,26 @@ class JSObject : public js::ObjectImpl * ES5 meta-object properties and operations. */ + public: + static inline bool + isExtensible(js::ExclusiveContext *cx, js::HandleObject obj, bool *extensible); + + // Indicates whether a non-proxy is extensible. Don't call on proxies! + // This method really shouldn't exist -- but there are a few internal + // places that want it (JITs and the like), and it'd be a pain to mark them + // all as friends. + bool nonProxyIsExtensible() const { + MOZ_ASSERT(!uninlinedIsProxy()); + + // [[Extensible]] for ordinary non-proxy objects is an object flag. + return !lastProperty()->hasObjectFlag(js::BaseShape::NOT_EXTENSIBLE); + } + + // Attempt to change the [[Extensible]] bit on |obj| to false. Callers + // must ensure that |obj| is currently extensible before calling this! + static bool + preventExtensions(JSContext *cx, js::HandleObject obj); + private: enum ImmutabilityType { SEAL, FREEZE }; @@ -628,224 +562,6 @@ class JSObject : public js::ObjectImpl /* toString support. */ static const char *className(JSContext *cx, js::HandleObject obj); - /* Accessors for elements. */ - bool ensureElements(js::ThreadSafeContext *cx, uint32_t capacity) { - MOZ_ASSERT(!denseElementsAreCopyOnWrite()); - if (capacity > getDenseCapacity()) - return growElements(cx, capacity); - return true; - } - - static uint32_t goodAllocated(uint32_t n, uint32_t length); - bool growElements(js::ThreadSafeContext *cx, uint32_t newcap); - void shrinkElements(js::ThreadSafeContext *cx, uint32_t cap); - void setDynamicElements(js::ObjectElements *header) { - MOZ_ASSERT(!hasDynamicElements()); - elements = header->elements(); - MOZ_ASSERT(hasDynamicElements()); - } - - uint32_t getDenseCapacity() { - MOZ_ASSERT(isNative()); - MOZ_ASSERT(getElementsHeader()->capacity >= getElementsHeader()->initializedLength); - return getElementsHeader()->capacity; - } - - static bool CopyElementsForWrite(js::ThreadSafeContext *cx, JSObject *obj); - - bool maybeCopyElementsForWrite(js::ThreadSafeContext *cx) { - if (denseElementsAreCopyOnWrite()) - return CopyElementsForWrite(cx, this); - return true; - } - - private: - inline void ensureDenseInitializedLengthNoPackedCheck(js::ThreadSafeContext *cx, - uint32_t index, uint32_t extra); - - public: - void setDenseInitializedLength(uint32_t length) { - MOZ_ASSERT(isNative()); - MOZ_ASSERT(length <= getDenseCapacity()); - MOZ_ASSERT(!denseElementsAreCopyOnWrite()); - prepareElementRangeForOverwrite(length, getElementsHeader()->initializedLength); - getElementsHeader()->initializedLength = length; - } - - inline void ensureDenseInitializedLength(js::ExclusiveContext *cx, - uint32_t index, uint32_t extra); - inline void ensureDenseInitializedLengthPreservePackedFlag(js::ThreadSafeContext *cx, - uint32_t index, uint32_t extra); - void setDenseElement(uint32_t index, const js::Value &val) { - MOZ_ASSERT(isNative() && index < getDenseInitializedLength()); - MOZ_ASSERT(!denseElementsAreCopyOnWrite()); - elements[index].set(this, js::HeapSlot::Element, index, val); - } - - void initDenseElement(uint32_t index, const js::Value &val) { - MOZ_ASSERT(isNative() && index < getDenseInitializedLength()); - MOZ_ASSERT(!denseElementsAreCopyOnWrite()); - elements[index].init(this, js::HeapSlot::Element, index, val); - } - - void setDenseElementMaybeConvertDouble(uint32_t index, const js::Value &val) { - if (val.isInt32() && shouldConvertDoubleElements()) - setDenseElement(index, js::DoubleValue(val.toInt32())); - else - setDenseElement(index, val); - } - - inline bool setDenseElementIfHasType(uint32_t index, const js::Value &val); - inline void setDenseElementWithType(js::ExclusiveContext *cx, uint32_t index, - const js::Value &val); - inline void initDenseElementWithType(js::ExclusiveContext *cx, uint32_t index, - const js::Value &val); - inline void setDenseElementHole(js::ExclusiveContext *cx, uint32_t index); - static inline void removeDenseElementForSparseIndex(js::ExclusiveContext *cx, - js::HandleObject obj, uint32_t index); - - inline js::Value getDenseOrTypedArrayElement(uint32_t idx); - - void copyDenseElements(uint32_t dstStart, const js::Value *src, uint32_t count) { - MOZ_ASSERT(dstStart + count <= getDenseCapacity()); - MOZ_ASSERT(!denseElementsAreCopyOnWrite()); - JSRuntime *rt = runtimeFromMainThread(); - if (JS::IsIncrementalBarrierNeeded(rt)) { - JS::Zone *zone = this->zone(); - for (uint32_t i = 0; i < count; ++i) - elements[dstStart + i].set(zone, this, js::HeapSlot::Element, dstStart + i, src[i]); - } else { - memcpy(&elements[dstStart], src, count * sizeof(js::HeapSlot)); - DenseRangeWriteBarrierPost(rt, this, dstStart, count); - } - } - - void initDenseElements(uint32_t dstStart, const js::Value *src, uint32_t count) { - MOZ_ASSERT(dstStart + count <= getDenseCapacity()); - MOZ_ASSERT(!denseElementsAreCopyOnWrite()); - memcpy(&elements[dstStart], src, count * sizeof(js::HeapSlot)); - DenseRangeWriteBarrierPost(runtimeFromMainThread(), this, dstStart, count); - } - - void initDenseElementsUnbarriered(uint32_t dstStart, const js::Value *src, uint32_t count); - - void moveDenseElements(uint32_t dstStart, uint32_t srcStart, uint32_t count) { - MOZ_ASSERT(dstStart + count <= getDenseCapacity()); - MOZ_ASSERT(srcStart + count <= getDenseInitializedLength()); - MOZ_ASSERT(!denseElementsAreCopyOnWrite()); - - /* - * Using memmove here would skip write barriers. Also, we need to consider - * an array containing [A, B, C], in the following situation: - * - * 1. Incremental GC marks slot 0 of array (i.e., A), then returns to JS code. - * 2. JS code moves slots 1..2 into slots 0..1, so it contains [B, C, C]. - * 3. Incremental GC finishes by marking slots 1 and 2 (i.e., C). - * - * Since normal marking never happens on B, it is very important that the - * write barrier is invoked here on B, despite the fact that it exists in - * the array before and after the move. - */ - JS::Zone *zone = this->zone(); - JS::shadow::Zone *shadowZone = JS::shadow::Zone::asShadowZone(zone); - if (shadowZone->needsIncrementalBarrier()) { - if (dstStart < srcStart) { - js::HeapSlot *dst = elements + dstStart; - js::HeapSlot *src = elements + srcStart; - for (uint32_t i = 0; i < count; i++, dst++, src++) - dst->set(zone, this, js::HeapSlot::Element, dst - elements, *src); - } else { - js::HeapSlot *dst = elements + dstStart + count - 1; - js::HeapSlot *src = elements + srcStart + count - 1; - for (uint32_t i = 0; i < count; i++, dst--, src--) - dst->set(zone, this, js::HeapSlot::Element, dst - elements, *src); - } - } else { - memmove(elements + dstStart, elements + srcStart, count * sizeof(js::HeapSlot)); - DenseRangeWriteBarrierPost(runtimeFromMainThread(), this, dstStart, count); - } - } - - void moveDenseElementsNoPreBarrier(uint32_t dstStart, uint32_t srcStart, uint32_t count) { - MOZ_ASSERT(!shadowZone()->needsIncrementalBarrier()); - - MOZ_ASSERT(dstStart + count <= getDenseCapacity()); - MOZ_ASSERT(srcStart + count <= getDenseCapacity()); - MOZ_ASSERT(!denseElementsAreCopyOnWrite()); - - memmove(elements + dstStart, elements + srcStart, count * sizeof(js::Value)); - DenseRangeWriteBarrierPost(runtimeFromMainThread(), this, dstStart, count); - } - - bool shouldConvertDoubleElements() { - MOZ_ASSERT(getClass()->isNative()); - return getElementsHeader()->shouldConvertDoubleElements(); - } - - inline void setShouldConvertDoubleElements(); - inline void clearShouldConvertDoubleElements(); - - bool denseElementsAreCopyOnWrite() { - MOZ_ASSERT(isNative()); - return getElementsHeader()->isCopyOnWrite(); - } - - void fixupAfterMovingGC(); - - /* Packed information for this object's elements. */ - inline bool writeToIndexWouldMarkNotPacked(uint32_t index); - inline void markDenseElementsNotPacked(js::ExclusiveContext *cx); - - /* - * ensureDenseElements ensures that the object can hold at least - * index + extra elements. It returns ED_OK on success, ED_FAILED on - * failure to grow the array, ED_SPARSE when the object is too sparse to - * grow (this includes the case of index + extra overflow). In the last - * two cases the object is kept intact. - */ - enum EnsureDenseResult { ED_OK, ED_FAILED, ED_SPARSE }; - - private: - inline EnsureDenseResult ensureDenseElementsNoPackedCheck(js::ThreadSafeContext *cx, - uint32_t index, uint32_t extra); - - public: - inline EnsureDenseResult ensureDenseElements(js::ExclusiveContext *cx, - uint32_t index, uint32_t extra); - inline EnsureDenseResult ensureDenseElementsPreservePackedFlag(js::ThreadSafeContext *cx, - uint32_t index, uint32_t extra); - - inline EnsureDenseResult extendDenseElements(js::ThreadSafeContext *cx, - uint32_t requiredCapacity, uint32_t extra); - - /* Convert a single dense element to a sparse property. */ - static bool sparsifyDenseElement(js::ExclusiveContext *cx, - js::HandleObject obj, uint32_t index); - - /* Convert all dense elements to sparse properties. */ - static bool sparsifyDenseElements(js::ExclusiveContext *cx, js::HandleObject obj); - - /* Small objects are dense, no matter what. */ - static const uint32_t MIN_SPARSE_INDEX = 1000; - - /* - * Element storage for an object will be sparse if fewer than 1/8 indexes - * are filled in. - */ - static const unsigned SPARSE_DENSITY_RATIO = 8; - - /* - * Check if after growing the object's elements will be too sparse. - * newElementsHint is an estimated number of elements to be added. - */ - bool willBeSparseElements(uint32_t requiredCapacity, uint32_t newElementsHint); - - /* - * After adding a sparse index to obj, see if it should be converted to use - * dense elements. - */ - static EnsureDenseResult maybeDensifySparseElements(js::ExclusiveContext *cx, js::HandleObject obj); - public: /* * Iterator-specific getters and setters. @@ -867,16 +583,6 @@ class JSObject : public js::ObjectImpl static inline bool hasProperty(JSContext *cx, js::HandleObject obj, js::HandleId id, bool *foundp); - /* - * Allocate and free an object slot. - * - * FIXME: bug 593129 -- slot allocation should be done by object methods - * after calling object-parameter-free shape methods, avoiding coupling - * logic across the object vs. shape module wall. - */ - static bool allocSlot(js::ThreadSafeContext *cx, JS::HandleObject obj, uint32_t *slotp); - void freeSlot(uint32_t slot); - public: static bool reportReadOnly(js::ThreadSafeContext *cx, jsid id, unsigned report = JSREPORT_ERROR); bool reportNotConfigurable(js::ThreadSafeContext *cx, jsid id, unsigned report = JSREPORT_ERROR); @@ -891,46 +597,6 @@ class JSObject : public js::ObjectImpl bool callMethod(JSContext *cx, js::HandleId id, unsigned argc, js::Value *argv, js::MutableHandleValue vp); - private: - static js::Shape *getChildPropertyOnDictionary(js::ThreadSafeContext *cx, JS::HandleObject obj, - js::HandleShape parent, js::StackShape &child); - static js::Shape *getChildProperty(js::ExclusiveContext *cx, JS::HandleObject obj, - js::HandleShape parent, js::StackShape &child); - template - static inline js::Shape * - getOrLookupChildProperty(typename js::ExecutionModeTraits::ExclusiveContextType cx, - JS::HandleObject obj, js::HandleShape parent, js::StackShape &child) - { - if (mode == js::ParallelExecution) - return lookupChildProperty(cx, obj, parent, child); - return getChildProperty(cx->asExclusiveContext(), obj, parent, child); - } - - public: - /* - * XXX: This should be private, but is public because it needs to be a - * friend of ThreadSafeContext to get to the propertyTree on cx->compartment_. - */ - static js::Shape *lookupChildProperty(js::ThreadSafeContext *cx, JS::HandleObject obj, - js::HandleShape parent, js::StackShape &child); - - - protected: - /* - * Internal helper that adds a shape not yet mapped by this object. - * - * Notes: - * 1. getter and setter must be normalized based on flags (see jsscope.cpp). - * 2. Checks for non-extensibility must be done by callers. - */ - template - static js::Shape * - addPropertyInternal(typename js::ExecutionModeTraits::ExclusiveContextType cx, - JS::HandleObject obj, JS::HandleId id, - JSPropertyOp getter, JSStrictPropertyOp setter, - uint32_t slot, unsigned attrs, unsigned flags, js::Shape **spp, - bool allowDictionary); - private: struct TradeGutsReserved; static bool ReserveForTradeGuts(JSContext *cx, JSObject *a, JSObject *b, @@ -940,50 +606,6 @@ class JSObject : public js::ObjectImpl TradeGutsReserved &reserved); public: - /* Add a property whose id is not yet in this scope. */ - static js::Shape *addProperty(js::ExclusiveContext *cx, JS::HandleObject, JS::HandleId id, - JSPropertyOp getter, JSStrictPropertyOp setter, - uint32_t slot, unsigned attrs, unsigned flags, - bool allowDictionary = true); - - /* Add a data property whose id is not yet in this scope. */ - js::Shape *addDataProperty(js::ExclusiveContext *cx, - jsid id_, uint32_t slot, unsigned attrs); - js::Shape *addDataProperty(js::ExclusiveContext *cx, js::HandlePropertyName name, - uint32_t slot, unsigned attrs); - - /* Add or overwrite a property for id in this scope. */ - template - static js::Shape * - putProperty(typename js::ExecutionModeTraits::ExclusiveContextType cx, - JS::HandleObject obj, JS::HandleId id, - JSPropertyOp getter, JSStrictPropertyOp setter, - uint32_t slot, unsigned attrs, - unsigned flags); - template - static inline js::Shape * - putProperty(typename js::ExecutionModeTraits::ExclusiveContextType cx, - JS::HandleObject obj, js::PropertyName *name, - JSPropertyOp getter, JSStrictPropertyOp setter, - uint32_t slot, unsigned attrs, - unsigned flags); - - /* Change the given property into a sibling with the same id in this scope. */ - template - static js::Shape * - changeProperty(typename js::ExecutionModeTraits::ExclusiveContextType cx, - js::HandleObject obj, js::HandleShape shape, unsigned attrs, unsigned mask, - JSPropertyOp getter, JSStrictPropertyOp setter); - - static inline bool changePropertyAttributes(JSContext *cx, js::HandleObject obj, - js::HandleShape shape, unsigned attrs); - - /* Remove the property named by id from this object. */ - bool removeProperty(js::ExclusiveContext *cx, jsid id); - - /* Clear the scope, making it empty. */ - static void clear(JSContext *cx, js::HandleObject obj); - static bool lookupGeneric(JSContext *cx, js::HandleObject obj, js::HandleId id, js::MutableHandleObject objp, js::MutableHandleShape propp); @@ -994,12 +616,8 @@ class JSObject : public js::ObjectImpl return lookupGeneric(cx, obj, id, objp, propp); } - static bool lookupElement(JSContext *cx, js::HandleObject obj, uint32_t index, - js::MutableHandleObject objp, js::MutableHandleShape propp) - { - js::LookupElementOp op = obj->getOps()->lookupElement; - return (op ? op : js::baseops::LookupElement)(cx, obj, index, objp, propp); - } + static inline bool lookupElement(JSContext *cx, js::HandleObject obj, uint32_t index, + js::MutableHandleObject objp, js::MutableHandleShape propp); static bool defineGeneric(js::ExclusiveContext *cx, js::HandleObject obj, js::HandleId id, js::HandleValue value, @@ -1019,29 +637,11 @@ class JSObject : public js::ObjectImpl JSStrictPropertyOp setter = JS_StrictPropertyStub, unsigned attrs = JSPROP_ENUMERATE); - static bool getGeneric(JSContext *cx, js::HandleObject obj, js::HandleObject receiver, - js::HandleId id, js::MutableHandleValue vp) - { - MOZ_ASSERT(!!obj->getOps()->getGeneric == !!obj->getOps()->getProperty); - js::GenericIdOp op = obj->getOps()->getGeneric; - if (op) { - if (!op(cx, obj, receiver, id, vp)) - return false; - } else { - if (!js::baseops::GetProperty(cx, obj, receiver, id, vp)) - return false; - } - return true; - } + static inline bool getGeneric(JSContext *cx, js::HandleObject obj, js::HandleObject receiver, + js::HandleId id, js::MutableHandleValue vp); - static bool getGenericNoGC(JSContext *cx, JSObject *obj, JSObject *receiver, - jsid id, js::Value *vp) - { - js::GenericIdOp op = obj->getOps()->getGeneric; - if (op) - return false; - return js::baseops::GetPropertyNoGC(cx, obj, receiver, id, vp); - } + static inline bool getGenericNoGC(JSContext *cx, JSObject *obj, JSObject *receiver, + jsid id, js::Value *vp); static bool getProperty(JSContext *cx, js::HandleObject obj, js::HandleObject receiver, js::PropertyName *name, js::MutableHandleValue vp) @@ -1061,14 +661,8 @@ class JSObject : public js::ObjectImpl static inline bool getElementNoGC(JSContext *cx, JSObject *obj, JSObject *receiver, uint32_t index, js::Value *vp); - static bool setGeneric(JSContext *cx, js::HandleObject obj, js::HandleObject receiver, - js::HandleId id, js::MutableHandleValue vp, bool strict) - { - if (obj->getOps()->setGeneric) - return nonNativeSetProperty(cx, obj, id, vp, strict); - return js::baseops::SetPropertyHelper( - cx, obj, receiver, id, js::baseops::Qualified, vp, strict); - } + static inline bool setGeneric(JSContext *cx, js::HandleObject obj, js::HandleObject receiver, + js::HandleId id, js::MutableHandleValue vp, bool strict); static bool setProperty(JSContext *cx, js::HandleObject obj, js::HandleObject receiver, js::PropertyName *name, @@ -1078,25 +672,16 @@ class JSObject : public js::ObjectImpl return setGeneric(cx, obj, receiver, id, vp, strict); } - static bool setElement(JSContext *cx, js::HandleObject obj, js::HandleObject receiver, - uint32_t index, js::MutableHandleValue vp, bool strict) - { - if (obj->getOps()->setElement) - return nonNativeSetElement(cx, obj, index, vp, strict); - return js::baseops::SetElementHelper(cx, obj, receiver, index, vp, strict); - } + static inline bool setElement(JSContext *cx, js::HandleObject obj, js::HandleObject receiver, + uint32_t index, js::MutableHandleValue vp, bool strict); static bool nonNativeSetProperty(JSContext *cx, js::HandleObject obj, js::HandleId id, js::MutableHandleValue vp, bool strict); static bool nonNativeSetElement(JSContext *cx, js::HandleObject obj, uint32_t index, js::MutableHandleValue vp, bool strict); - static bool getGenericAttributes(JSContext *cx, js::HandleObject obj, - js::HandleId id, unsigned *attrsp) - { - js::GenericAttributesOp op = obj->getOps()->getGenericAttributes; - return (op ? op : js::baseops::GetAttributes)(cx, obj, id, attrsp); - } + static inline bool getGenericAttributes(JSContext *cx, js::HandleObject obj, + js::HandleId id, unsigned *attrsp); static inline bool setGenericAttributes(JSContext *cx, js::HandleObject obj, js::HandleId id, unsigned *attrsp); @@ -1181,25 +766,19 @@ class JSObject : public js::ObjectImpl return *static_cast(this); } - static inline js::ThingRootKind rootKind() { return js::THING_ROOT_OBJECT; } - #ifdef DEBUG void dump(); #endif - private: - static void staticAsserts() { - static_assert(sizeof(JSObject) == sizeof(js::shadow::Object), - "shadow interface must match actual interface"); - static_assert(sizeof(JSObject) == sizeof(js::ObjectImpl), - "JSObject itself must not have any fields"); - static_assert(sizeof(JSObject) % sizeof(js::Value) == 0, - "fixed slots after an object must be aligned"); - static_assert(js::shadow::Object::MAX_FIXED_SLOTS == MAX_FIXED_SLOTS, - "We shouldn't be confused about our actual maximum " - "number of fixed slots"); - } + /* JIT Accessors */ + static size_t offsetOfShape() { return offsetof(JSObject, shape_); } + js::HeapPtrShape *addressOfShape() { return &shape_; } + + static size_t offsetOfType() { return offsetof(JSObject, type_); } + js::HeapPtrTypeObject *addressOfType() { return &type_; } + + private: JSObject() MOZ_DELETE; JSObject(const JSObject &other) MOZ_DELETE; void operator=(const JSObject &other) MOZ_DELETE; @@ -1214,6 +793,15 @@ js::RootedBase::as() const return Handle::fromMarkedLocation(reinterpret_cast(self.address())); } +template +MOZ_ALWAYS_INLINE JS::Handle +js::HandleBase::as() const +{ + const JS::Handle &self = *static_cast*>(this); + MOZ_ASSERT(self->is()); + return Handle::fromMarkedLocation(reinterpret_cast(self.address())); +} + /* * The only sensible way to compare JSObject with == is by identity. We use * const& instead of * as a syntactic way to assert non-null. This leads to an @@ -1237,6 +825,59 @@ struct JSObject_Slots8 : JSObject { js::Value fslots[8]; }; struct JSObject_Slots12 : JSObject { js::Value fslots[12]; }; struct JSObject_Slots16 : JSObject { js::Value fslots[16]; }; +/* static */ MOZ_ALWAYS_INLINE void +JSObject::readBarrier(JSObject *obj) +{ + if (!isNullLike(obj) && obj->isTenured()) + obj->asTenured().readBarrier(&obj->asTenured()); +} + +/* static */ MOZ_ALWAYS_INLINE void +JSObject::writeBarrierPre(JSObject *obj) +{ + if (!isNullLike(obj) && obj->isTenured()) + obj->asTenured().writeBarrierPre(&obj->asTenured()); +} + +/* static */ MOZ_ALWAYS_INLINE void +JSObject::writeBarrierPost(JSObject *obj, void *cellp) +{ + MOZ_ASSERT(cellp); +#ifdef JSGC_GENERATIONAL + if (IsNullTaggedPointer(obj)) + return; + MOZ_ASSERT(obj == *static_cast(cellp)); + js::gc::StoreBuffer *storeBuffer = obj->storeBuffer(); + if (storeBuffer) + storeBuffer->putCellFromAnyThread(static_cast(cellp)); +#endif +} + +/* static */ MOZ_ALWAYS_INLINE void +JSObject::writeBarrierPostRelocate(JSObject *obj, void *cellp) +{ + MOZ_ASSERT(cellp); + MOZ_ASSERT(obj); + MOZ_ASSERT(obj == *static_cast(cellp)); +#ifdef JSGC_GENERATIONAL + js::gc::StoreBuffer *storeBuffer = obj->storeBuffer(); + if (storeBuffer) + storeBuffer->putRelocatableCellFromAnyThread(static_cast(cellp)); +#endif +} + +/* static */ MOZ_ALWAYS_INLINE void +JSObject::writeBarrierPostRemove(JSObject *obj, void *cellp) +{ + MOZ_ASSERT(cellp); + MOZ_ASSERT(obj); + MOZ_ASSERT(obj == *static_cast(cellp)); +#ifdef JSGC_GENERATIONAL + obj->shadowRuntimeFromAnyThread()->gcStoreBufferPtr()->removeRelocatableCellFromAnyThread( + static_cast(cellp)); +#endif +} + namespace js { inline bool @@ -1402,12 +1043,12 @@ GetInitialHeap(NewObjectKind newKind, const Class *clasp) // Specialized call for constructing |this| with a known function callee, // and a known prototype. -extern JSObject * +extern NativeObject * CreateThisForFunctionWithProto(JSContext *cx, js::HandleObject callee, JSObject *proto, NewObjectKind newKind = GenericObject); // Specialized call for constructing |this| with a known function callee. -extern JSObject * +extern NativeObject * CreateThisForFunction(JSContext *cx, js::HandleObject callee, NewObjectKind newKind); // Generic call for constructing |this|. @@ -1417,18 +1058,18 @@ CreateThis(JSContext *cx, const js::Class *clasp, js::HandleObject callee); extern JSObject * CloneObject(JSContext *cx, HandleObject obj, Handle proto, HandleObject parent); -extern JSObject * -DeepCloneObjectLiteral(JSContext *cx, HandleObject obj, NewObjectKind newKind = GenericObject); +extern NativeObject * +DeepCloneObjectLiteral(JSContext *cx, HandleNativeObject obj, NewObjectKind newKind = GenericObject); /* * Return successfully added or changed shape or nullptr on error. */ extern bool -DefineNativeProperty(ExclusiveContext *cx, HandleObject obj, HandleId id, HandleValue value, +DefineNativeProperty(ExclusiveContext *cx, HandleNativeObject obj, HandleId id, HandleValue value, PropertyOp getter, StrictPropertyOp setter, unsigned attrs); extern bool -LookupNativeProperty(ExclusiveContext *cx, HandleObject obj, HandleId id, +LookupNativeProperty(ExclusiveContext *cx, HandleNativeObject obj, HandleId id, js::MutableHandleObject objp, js::MutableHandleShape propp); /* @@ -1496,17 +1137,17 @@ js_FindVariableScope(JSContext *cx, JSFunction **funp); namespace js { bool -NativeGet(JSContext *cx, js::Handle obj, js::Handle pobj, - js::Handle shape, js::MutableHandle vp); +NativeGet(JSContext *cx, HandleObject obj, HandleNativeObject pobj, + HandleShape shape, MutableHandle vp); -template +template bool -NativeSet(typename js::ExecutionModeTraits::ContextType cx, - js::Handle obj, js::Handle receiver, - js::Handle shape, bool strict, js::MutableHandleValue vp); +NativeSet(typename ExecutionModeTraits::ContextType cx, + HandleNativeObject obj, HandleObject receiver, + HandleShape shape, bool strict, MutableHandleValue vp); bool -LookupPropertyPure(JSObject *obj, jsid id, JSObject **objp, Shape **propp); +LookupPropertyPure(JSObject *obj, jsid id, NativeObject **objp, Shape **propp); bool GetPropertyPure(ThreadSafeContext *cx, JSObject *obj, jsid id, Value *vp); @@ -1532,10 +1173,10 @@ NewPropertyDescriptorObject(JSContext *cx, Handle desc, Muta * store the property value in *vp. */ extern bool -HasDataProperty(JSContext *cx, JSObject *obj, jsid id, Value *vp); +HasDataProperty(JSContext *cx, NativeObject *obj, jsid id, Value *vp); inline bool -HasDataProperty(JSContext *cx, JSObject *obj, PropertyName *name, Value *vp) +HasDataProperty(JSContext *cx, NativeObject *obj, PropertyName *name, Value *vp) { return HasDataProperty(cx, obj, NameToId(name), vp); } @@ -1587,7 +1228,7 @@ ToObjectFromStack(JSContext *cx, HandleValue vp) template bool -XDRObjectLiteral(XDRState *xdr, MutableHandleObject obj); +XDRObjectLiteral(XDRState *xdr, MutableHandleNativeObject obj); extern JSObject * CloneObjectLiteral(JSContext *cx, HandleObject parent, HandleObject srcObj); diff --git a/js/src/jsobjinlines.h b/js/src/jsobjinlines.h index eb5bc4982ed..074c6796f0c 100644 --- a/js/src/jsobjinlines.h +++ b/js/src/jsobjinlines.h @@ -25,7 +25,6 @@ #include "jsinferinlines.h" #include "gc/ForkJoinNursery-inl.h" -#include "vm/ObjectImpl-inl.h" /* static */ inline bool JSObject::setGenericAttributes(JSContext *cx, js::HandleObject obj, @@ -33,15 +32,9 @@ JSObject::setGenericAttributes(JSContext *cx, js::HandleObject obj, { js::types::MarkTypePropertyNonData(cx, obj, id); js::GenericAttributesOp op = obj->getOps()->setGenericAttributes; - return (op ? op : js::baseops::SetAttributes)(cx, obj, id, attrsp); -} - -/* static */ inline bool -JSObject::changePropertyAttributes(JSContext *cx, js::HandleObject obj, - js::HandleShape shape, unsigned attrs) -{ - return !!changeProperty(cx, obj, shape, attrs, 0, - shape->getter(), shape->setter()); + if (op) + return op(cx, obj, id, attrsp); + return js::baseops::SetAttributes(cx, obj.as(), id, attrsp); } /* static */ inline bool @@ -50,7 +43,9 @@ JSObject::deleteGeneric(JSContext *cx, js::HandleObject obj, js::HandleId id, { js::types::MarkTypePropertyNonData(cx, obj, id); js::DeleteGenericOp op = obj->getOps()->deleteGeneric; - return (op ? op : js::baseops::DeleteGeneric)(cx, obj, id, succeeded); + if (op) + return op(cx, obj, id, succeeded); + return js::baseops::DeleteGeneric(cx, obj.as(), id, succeeded); } /* static */ inline bool @@ -96,281 +91,6 @@ JSObject::finalize(js::FreeOp *fop) finish(fop); } -inline void -JSObject::removeLastProperty(js::ExclusiveContext *cx) -{ - MOZ_ASSERT(canRemoveLastProperty()); - JS::RootedObject self(cx, this); - js::RootedShape prev(cx, lastProperty()->previous()); - JS_ALWAYS_TRUE(setLastProperty(cx, self, prev)); -} - -inline bool -JSObject::canRemoveLastProperty() -{ - /* - * Check that the information about the object stored in the last - * property's base shape is consistent with that stored in the previous - * shape. If not consistent, then the last property cannot be removed as it - * will induce a change in the object itself, and the object must be - * converted to dictionary mode instead. See BaseShape comment in jsscope.h - */ - MOZ_ASSERT(!inDictionaryMode()); - js::Shape *previous = lastProperty()->previous().get(); - return previous->getObjectParent() == lastProperty()->getObjectParent() - && previous->getObjectMetadata() == lastProperty()->getObjectMetadata() - && previous->getObjectFlags() == lastProperty()->getObjectFlags(); -} - -inline void -JSObject::setShouldConvertDoubleElements() -{ - MOZ_ASSERT(is() && !hasEmptyElements()); - getElementsHeader()->setShouldConvertDoubleElements(); -} - -inline void -JSObject::clearShouldConvertDoubleElements() -{ - MOZ_ASSERT(is() && !hasEmptyElements()); - getElementsHeader()->clearShouldConvertDoubleElements(); -} - -inline bool -JSObject::setDenseElementIfHasType(uint32_t index, const js::Value &val) -{ - if (!js::types::HasTypePropertyId(this, JSID_VOID, val)) - return false; - setDenseElementMaybeConvertDouble(index, val); - return true; -} - -inline void -JSObject::setDenseElementWithType(js::ExclusiveContext *cx, uint32_t index, - const js::Value &val) -{ - // Avoid a slow AddTypePropertyId call if the type is the same as the type - // of the previous element. - js::types::Type thisType = js::types::GetValueType(val); - if (index == 0 || js::types::GetValueType(elements[index - 1]) != thisType) - js::types::AddTypePropertyId(cx, this, JSID_VOID, thisType); - setDenseElementMaybeConvertDouble(index, val); -} - -inline void -JSObject::initDenseElementWithType(js::ExclusiveContext *cx, uint32_t index, - const js::Value &val) -{ - MOZ_ASSERT(!shouldConvertDoubleElements()); - js::types::AddTypePropertyId(cx, this, JSID_VOID, val); - initDenseElement(index, val); -} - -inline void -JSObject::setDenseElementHole(js::ExclusiveContext *cx, uint32_t index) -{ - js::types::MarkTypeObjectFlags(cx, this, js::types::OBJECT_FLAG_NON_PACKED); - setDenseElement(index, js::MagicValue(JS_ELEMENTS_HOLE)); -} - -/* static */ inline void -JSObject::removeDenseElementForSparseIndex(js::ExclusiveContext *cx, - js::HandleObject obj, uint32_t index) -{ - js::types::MarkTypeObjectFlags(cx, obj, - js::types::OBJECT_FLAG_NON_PACKED | - js::types::OBJECT_FLAG_SPARSE_INDEXES); - if (obj->containsDenseElement(index)) - obj->setDenseElement(index, js::MagicValue(JS_ELEMENTS_HOLE)); -} - -inline bool -JSObject::writeToIndexWouldMarkNotPacked(uint32_t index) -{ - return getElementsHeader()->initializedLength < index; -} - -inline void -JSObject::markDenseElementsNotPacked(js::ExclusiveContext *cx) -{ - MOZ_ASSERT(isNative()); - MarkTypeObjectFlags(cx, this, js::types::OBJECT_FLAG_NON_PACKED); -} - -inline void -JSObject::ensureDenseInitializedLengthNoPackedCheck(js::ThreadSafeContext *cx, uint32_t index, - uint32_t extra) -{ - MOZ_ASSERT(cx->isThreadLocal(this)); - MOZ_ASSERT(!denseElementsAreCopyOnWrite()); - - /* - * Ensure that the array's contents have been initialized up to index, and - * mark the elements through 'index + extra' as initialized in preparation - * for a write. - */ - MOZ_ASSERT(index + extra <= getDenseCapacity()); - uint32_t &initlen = getElementsHeader()->initializedLength; - - if (initlen < index + extra) { - size_t offset = initlen; - for (js::HeapSlot *sp = elements + initlen; - sp != elements + (index + extra); - sp++, offset++) - { - sp->init(this, js::HeapSlot::Element, offset, js::MagicValue(JS_ELEMENTS_HOLE)); - } - initlen = index + extra; - } -} - -inline void -JSObject::ensureDenseInitializedLength(js::ExclusiveContext *cx, uint32_t index, uint32_t extra) -{ - if (writeToIndexWouldMarkNotPacked(index)) - markDenseElementsNotPacked(cx); - ensureDenseInitializedLengthNoPackedCheck(cx, index, extra); -} - -inline void -JSObject::ensureDenseInitializedLengthPreservePackedFlag(js::ThreadSafeContext *cx, - uint32_t index, uint32_t extra) -{ - MOZ_ASSERT(!writeToIndexWouldMarkNotPacked(index)); - ensureDenseInitializedLengthNoPackedCheck(cx, index, extra); -} - -JSObject::EnsureDenseResult -JSObject::extendDenseElements(js::ThreadSafeContext *cx, - uint32_t requiredCapacity, uint32_t extra) -{ - MOZ_ASSERT(cx->isThreadLocal(this)); - MOZ_ASSERT(!denseElementsAreCopyOnWrite()); - - /* - * Don't grow elements for non-extensible objects or watched objects. Dense - * elements can be added/written with no extensible or watchpoint checks as - * long as there is capacity for them. - */ - if (!nonProxyIsExtensible() || watched()) { - MOZ_ASSERT(getDenseCapacity() == 0); - return ED_SPARSE; - } - - /* - * Don't grow elements for objects which already have sparse indexes. - * This avoids needing to count non-hole elements in willBeSparseElements - * every time a new index is added. - */ - if (isIndexed()) - return ED_SPARSE; - - /* - * We use the extra argument also as a hint about number of non-hole - * elements to be inserted. - */ - if (requiredCapacity > MIN_SPARSE_INDEX && - willBeSparseElements(requiredCapacity, extra)) { - return ED_SPARSE; - } - - if (!growElements(cx, requiredCapacity)) - return ED_FAILED; - - return ED_OK; -} - -inline JSObject::EnsureDenseResult -JSObject::ensureDenseElementsNoPackedCheck(js::ThreadSafeContext *cx, uint32_t index, uint32_t extra) -{ - MOZ_ASSERT(isNative()); - - if (!maybeCopyElementsForWrite(cx)) - return ED_FAILED; - - uint32_t currentCapacity = getDenseCapacity(); - - uint32_t requiredCapacity; - if (extra == 1) { - /* Optimize for the common case. */ - if (index < currentCapacity) { - ensureDenseInitializedLengthNoPackedCheck(cx, index, 1); - return ED_OK; - } - requiredCapacity = index + 1; - if (requiredCapacity == 0) { - /* Overflow. */ - return ED_SPARSE; - } - } else { - requiredCapacity = index + extra; - if (requiredCapacity < index) { - /* Overflow. */ - return ED_SPARSE; - } - if (requiredCapacity <= currentCapacity) { - ensureDenseInitializedLengthNoPackedCheck(cx, index, extra); - return ED_OK; - } - } - - EnsureDenseResult edr = extendDenseElements(cx, requiredCapacity, extra); - if (edr != ED_OK) - return edr; - - ensureDenseInitializedLengthNoPackedCheck(cx, index, extra); - return ED_OK; -} - -inline JSObject::EnsureDenseResult -JSObject::ensureDenseElements(js::ExclusiveContext *cx, uint32_t index, uint32_t extra) -{ - if (writeToIndexWouldMarkNotPacked(index)) - markDenseElementsNotPacked(cx); - return ensureDenseElementsNoPackedCheck(cx, index, extra); -} - -inline JSObject::EnsureDenseResult -JSObject::ensureDenseElementsPreservePackedFlag(js::ThreadSafeContext *cx, uint32_t index, - uint32_t extra) -{ - MOZ_ASSERT(!writeToIndexWouldMarkNotPacked(index)); - return ensureDenseElementsNoPackedCheck(cx, index, extra); -} - -inline js::Value -JSObject::getDenseOrTypedArrayElement(uint32_t idx) -{ - if (is()) - return as().getElement(idx); - if (is()) - return as().getElement(idx); - return getDenseElement(idx); -} - -inline void -JSObject::initDenseElementsUnbarriered(uint32_t dstStart, const js::Value *src, uint32_t count) { - /* - * For use by parallel threads, which since they cannot see nursery - * things do not require a barrier. - */ - MOZ_ASSERT(dstStart + count <= getDenseCapacity()); - MOZ_ASSERT(!denseElementsAreCopyOnWrite()); -#if defined(DEBUG) && defined(JSGC_GENERATIONAL) - /* - * This asserts a global invariant: parallel code does not - * observe objects inside the generational GC's nursery. - */ - MOZ_ASSERT(!js::gc::IsInsideGGCNursery(this)); - for (uint32_t index = 0; index < count; ++index) { - const JS::Value& value = src[index]; - if (value.isMarkable()) - MOZ_ASSERT(!js::gc::IsInsideGGCNursery(static_cast(value.toGCThing()))); - } -#endif - memcpy(&elements[dstStart], src, count * sizeof(js::HeapSlot)); -} - /* static */ inline bool JSObject::setSingletonType(js::ExclusiveContext *cx, js::HandleObject obj) { @@ -496,6 +216,19 @@ JSObject::setProto(JSContext *cx, JS::HandleObject obj, JS::HandleObject proto, return *succeeded; } +/* static */ inline bool +JSObject::isExtensible(js::ExclusiveContext *cx, js::HandleObject obj, bool *extensible) +{ + if (obj->is()) { + if (!cx->shouldBeJSContext()) + return false; + return js::Proxy::isExtensible(cx->asJSContext(), obj, extensible); + } + + *extensible = obj->nonProxyIsExtensible(); + return true; +} + inline bool JSObject::isQualifiedVarObj() { @@ -512,6 +245,19 @@ JSObject::isUnqualifiedVarObj() return lastProperty()->hasObjectFlag(js::BaseShape::UNQUALIFIED_VAROBJ); } +inline bool +ClassCanHaveFixedData(const js::Class *clasp) +{ + // Normally, the number of fixed slots given an object is the maximum + // permitted for its size class. For array buffers and non-shared typed + // arrays we only use enough to cover the class reserved slots, so that + // the remaining space in the object's allocation is available for the + // buffer's data. + return clasp == &js::ArrayBufferObject::class_ + || clasp == &js::InlineOpaqueTypedObject::class_ + || js::IsTypedArrayClass(clasp); +} + /* static */ inline JSObject * JSObject::create(js::ExclusiveContext *cx, js::gc::AllocKind kind, js::gc::InitialHeap heap, js::HandleShape shape, js::HandleTypeObject type) @@ -525,7 +271,8 @@ JSObject::create(js::ExclusiveContext *cx, js::gc::AllocKind kind, js::gc::Initi MOZ_ASSERT_IF(type->clasp()->finalize, heap == js::gc::TenuredHeap); const js::Class *clasp = type->clasp(); - size_t nDynamicSlots = dynamicSlotsCount(shape->numFixedSlots(), shape->slotSpan(), clasp); + size_t nDynamicSlots = + js::NativeObject::dynamicSlotsCount(shape->numFixedSlots(), shape->slotSpan(), clasp); JSObject *obj = js::NewGCObject(cx, kind, nDynamicSlots, heap); if (!obj) @@ -534,157 +281,32 @@ JSObject::create(js::ExclusiveContext *cx, js::gc::AllocKind kind, js::gc::Initi obj->shape_.init(shape); obj->type_.init(type); // Note: slots are created and assigned internally by NewGCObject. - obj->elements = js::emptyObjectElements; + obj->fakeNativeElements() = js::emptyObjectElements; if (clasp->hasPrivate()) - obj->privateRef(shape->numFixedSlots()) = nullptr; + obj->fakeNativePrivateRef(shape->numFixedSlots()) = nullptr; size_t span = shape->slotSpan(); if (span) - obj->initializeSlotRange(0, span); + obj->fakeNativeInitializeSlotRange(0, span); // JSFunction's fixed slots expect POD-style initialization. if (type->clasp()->isJSFunction()) - memset(obj->fixedSlots(), 0, sizeof(js::HeapSlot) * GetGCKindSlots(kind)); + memset(obj->as().fixedSlots(), 0, sizeof(js::HeapSlot) * GetGCKindSlots(kind)); js::gc::TraceCreateObject(obj); return obj; } -/* static */ inline JSObject * -JSObject::copy(js::ExclusiveContext *cx, js::gc::AllocKind kind, js::gc::InitialHeap heap, - js::HandleObject templateObject) -{ - js::RootedShape shape(cx, templateObject->lastProperty()); - js::RootedTypeObject type(cx, templateObject->type()); - MOZ_ASSERT(!templateObject->denseElementsAreCopyOnWrite()); - - JSObject *obj = create(cx, kind, heap, shape, type); - if (!obj) - return nullptr; - - size_t span = shape->slotSpan(); - if (span) { - uint32_t numFixed = templateObject->numFixedSlots(); - const js::Value *fixed = &templateObject->getSlot(0); - MOZ_ASSERT(numFixed <= span); - obj->copySlotRange(0, fixed, numFixed); - - if (numFixed < span) { - uint32_t numSlots = span - numFixed; - const js::Value *slots = &templateObject->getSlot(numFixed); - obj->copySlotRange(numFixed, slots, numSlots); - } - } - - return obj; -} - -/* static */ inline JSObject * -JSObject::createArrayInternal(js::ExclusiveContext *cx, js::gc::AllocKind kind, js::gc::InitialHeap heap, - js::HandleShape shape, js::HandleTypeObject type) -{ - // Create a new array and initialize everything except for its elements. - MOZ_ASSERT(shape && type); - MOZ_ASSERT(type->clasp() == shape->getObjectClass()); - MOZ_ASSERT(type->clasp() == &js::ArrayObject::class_); - MOZ_ASSERT_IF(type->clasp()->finalize, heap == js::gc::TenuredHeap); - - // Arrays can use their fixed slots to store elements, so can't have shapes - // which allow named properties to be stored in the fixed slots. - MOZ_ASSERT(shape->numFixedSlots() == 0); - - size_t nDynamicSlots = dynamicSlotsCount(0, shape->slotSpan(), type->clasp()); - JSObject *obj = js::NewGCObject(cx, kind, nDynamicSlots, heap); - if (!obj) - return nullptr; - - obj->shape_.init(shape); - obj->type_.init(type); - - return obj; -} - -/* static */ inline js::ArrayObject * -JSObject::finishCreateArray(JSObject *obj, js::HandleShape shape) -{ - size_t span = shape->slotSpan(); - if (span) - obj->initializeSlotRange(0, span); - - js::gc::TraceCreateObject(obj); - - return &obj->as(); -} - -/* static */ inline js::ArrayObject * -JSObject::createArray(js::ExclusiveContext *cx, js::gc::AllocKind kind, js::gc::InitialHeap heap, - js::HandleShape shape, js::HandleTypeObject type, - uint32_t length) -{ - JSObject *obj = createArrayInternal(cx, kind, heap, shape, type); - if (!obj) - return nullptr; - - uint32_t capacity = js::gc::GetGCKindSlots(kind) - js::ObjectElements::VALUES_PER_HEADER; - - obj->setFixedElements(); - new (obj->getElementsHeader()) js::ObjectElements(capacity, length); - - return finishCreateArray(obj, shape); -} - -/* static */ inline js::ArrayObject * -JSObject::createArray(js::ExclusiveContext *cx, js::gc::InitialHeap heap, - js::HandleShape shape, js::HandleTypeObject type, - js::HeapSlot *elements) -{ - // Use the smallest allocation kind for the array, as it can't have any - // fixed slots (see the assert in createArrayInternal) and will not be using - // its fixed elements. - js::gc::AllocKind kind = js::gc::FINALIZE_OBJECT0_BACKGROUND; - - JSObject *obj = createArrayInternal(cx, kind, heap, shape, type); - if (!obj) - return nullptr; - - obj->elements = elements; - - return finishCreateArray(obj, shape); -} - -/* static */ inline js::ArrayObject * -JSObject::createCopyOnWriteArray(js::ExclusiveContext *cx, js::gc::InitialHeap heap, - js::HandleShape shape, - js::HandleObject sharedElementsOwner) -{ - MOZ_ASSERT(sharedElementsOwner->getElementsHeader()->isCopyOnWrite()); - MOZ_ASSERT(sharedElementsOwner->getElementsHeader()->ownerObject() == sharedElementsOwner); - - // Use the smallest allocation kind for the array, as it can't have any - // fixed slots (see the assert in createArrayInternal) and will not be using - // its fixed elements. - js::gc::AllocKind kind = js::gc::FINALIZE_OBJECT0_BACKGROUND; - - js::RootedTypeObject type(cx, sharedElementsOwner->type()); - JSObject *obj = createArrayInternal(cx, kind, heap, shape, type); - if (!obj) - return nullptr; - - obj->elements = sharedElementsOwner->getDenseElementsAllowCopyOnWrite(); - - return finishCreateArray(obj, shape); -} - inline void JSObject::finish(js::FreeOp *fop) { - if (hasDynamicSlots()) - fop->free_(slots); + if (fakeNativeHasDynamicSlots()) + fop->free_(fakeNativeSlots()); - if (hasDynamicElements()) { - js::ObjectElements *elements = getElementsHeader(); + if (fakeNativeHasDynamicElements()) { + js::ObjectElements *elements = fakeNativeGetElementsHeader(); if (elements->isCopyOnWrite()) { if (elements->ownerObject() == this) { // Don't free the elements until object finalization finishes, @@ -718,31 +340,6 @@ JSObject::hasProperty(JSContext *cx, js::HandleObject obj, return true; } -inline bool -JSObject::nativeSetSlotIfHasType(js::Shape *shape, const js::Value &value, bool overwriting) -{ - if (!js::types::HasTypePropertyId(this, shape->propid(), value)) - return false; - nativeSetSlot(shape->slot(), value); - - if (overwriting) - shape->setOverwritten(); - - return true; -} - -inline void -JSObject::nativeSetSlotWithType(js::ExclusiveContext *cx, js::Shape *shape, - const js::Value &value, bool overwriting) -{ - nativeSetSlot(shape->slot(), value); - - if (overwriting) - shape->setOverwritten(); - - js::types::AddTypePropertyId(cx, this, shape->propid(), value); -} - /* static */ inline bool JSObject::getElement(JSContext *cx, js::HandleObject obj, js::HandleObject receiver, uint32_t index, js::MutableHandleValue vp) @@ -855,15 +452,14 @@ IsNativeFunction(const js::Value &v, JSNative native) * TODO: a per-thread shape-based cache would be faster and simpler. */ static MOZ_ALWAYS_INLINE bool -ClassMethodIsNative(JSContext *cx, JSObject *obj, const Class *clasp, jsid methodid, JSNative native) +ClassMethodIsNative(JSContext *cx, NativeObject *obj, const Class *clasp, jsid methodid, JSNative native) { - MOZ_ASSERT(!obj->is()); MOZ_ASSERT(obj->getClass() == clasp); Value v; if (!HasDataProperty(cx, obj, methodid, &v)) { JSObject *proto = obj->getProto(); - if (!proto || proto->getClass() != clasp || !HasDataProperty(cx, proto, methodid, &v)) + if (!proto || proto->getClass() != clasp || !HasDataProperty(cx, &proto->as(), methodid, &v)) return false; } @@ -882,7 +478,7 @@ HasObjectValueOf(JSObject *obj, JSContext *cx) jsid valueOf = NameToId(cx->names().valueOf); Value v; - while (!HasDataProperty(cx, obj, valueOf, &v)) { + while (!HasDataProperty(cx, &obj->as(), valueOf, &v)) { obj = obj->getProto(); if (!obj || obj->is() || !obj->isNative()) return false; @@ -903,8 +499,9 @@ ToPrimitive(JSContext *cx, MutableHandleValue vp) /* Optimize new String(...).valueOf(). */ if (obj->is()) { jsid id = NameToId(cx->names().valueOf); - if (ClassMethodIsNative(cx, obj, &StringObject::class_, id, js_str_toString)) { - vp.setString(obj->as().unbox()); + StringObject *nobj = &obj->as(); + if (ClassMethodIsNative(cx, nobj, &StringObject::class_, id, js_str_toString)) { + vp.setString(nobj->unbox()); return true; } } @@ -912,8 +509,9 @@ ToPrimitive(JSContext *cx, MutableHandleValue vp) /* Optimize new Number(...).valueOf(). */ if (obj->is()) { jsid id = NameToId(cx->names().valueOf); - if (ClassMethodIsNative(cx, obj, &NumberObject::class_, id, js_num_valueOf)) { - vp.setNumber(obj->as().unbox()); + NumberObject *nobj = &obj->as(); + if (ClassMethodIsNative(cx, nobj, &NumberObject::class_, id, js_num_valueOf)) { + vp.setNumber(nobj->unbox()); return true; } } @@ -1090,31 +688,6 @@ NewBuiltinClassInstance(ExclusiveContext *cx, gc::AllocKind allocKind, NewObject bool NewObjectScriptedCall(JSContext *cx, MutableHandleObject obj); -/* Make an object with pregenerated shape from a NEWOBJECT bytecode. */ -static inline JSObject * -CopyInitializerObject(JSContext *cx, HandleObject baseobj, NewObjectKind newKind = GenericObject) -{ - MOZ_ASSERT(baseobj->getClass() == &JSObject::class_); - MOZ_ASSERT(!baseobj->inDictionaryMode()); - - gc::AllocKind allocKind = gc::GetGCObjectFixedSlotsKind(baseobj->numFixedSlots()); - allocKind = gc::GetBackgroundAllocKind(allocKind); - MOZ_ASSERT_IF(baseobj->isTenured(), allocKind == baseobj->asTenured().getAllocKind()); - RootedObject obj(cx); - obj = NewBuiltinClassInstance(cx, &JSObject::class_, allocKind, newKind); - if (!obj) - return nullptr; - - RootedObject metadata(cx, obj->getMetadata()); - RootedShape lastProp(cx, baseobj->lastProperty()); - if (!JSObject::setLastProperty(cx, obj, lastProp)) - return nullptr; - if (metadata && !JSObject::setMetadata(cx, obj, metadata)) - return nullptr; - - return obj; -} - JSObject * NewObjectWithType(JSContext *cx, HandleTypeObject type, JSObject *parent, gc::AllocKind allocKind, NewObjectKind newKind = GenericObject); @@ -1225,7 +798,8 @@ NewObjectMetadata(ExclusiveContext *cxArg, JSObject **pmetadata) } inline bool -DefineNativeProperty(ExclusiveContext *cx, HandleObject obj, PropertyName *name, HandleValue value, +DefineNativeProperty(ExclusiveContext *cx, HandleNativeObject obj, + PropertyName *name, HandleValue value, PropertyOp getter, StrictPropertyOp setter, unsigned attrs) { Rooted id(cx, NameToId(name)); @@ -1235,7 +809,7 @@ DefineNativeProperty(ExclusiveContext *cx, HandleObject obj, PropertyName *name, namespace baseops { inline bool -LookupProperty(ExclusiveContext *cx, HandleObject obj, PropertyName *name, +LookupProperty(ExclusiveContext *cx, HandleNativeObject obj, PropertyName *name, MutableHandleObject objp, MutableHandleShape propp) { Rooted id(cx, NameToId(name)); @@ -1243,7 +817,7 @@ LookupProperty(ExclusiveContext *cx, HandleObject obj, PropertyName *name, } inline bool -DefineProperty(ExclusiveContext *cx, HandleObject obj, PropertyName *name, HandleValue value, +DefineProperty(ExclusiveContext *cx, HandleNativeObject obj, PropertyName *name, HandleValue value, JSPropertyOp getter, JSStrictPropertyOp setter, unsigned attrs) { Rooted id(cx, NameToId(name)); @@ -1254,12 +828,12 @@ DefineProperty(ExclusiveContext *cx, HandleObject obj, PropertyName *name, Handl } /* namespace js */ -extern JSObject * +extern js::NativeObject * js_InitClass(JSContext *cx, js::HandleObject obj, JSObject *parent_proto, const js::Class *clasp, JSNative constructor, unsigned nargs, const JSPropertySpec *ps, const JSFunctionSpec *fs, const JSPropertySpec *static_ps, const JSFunctionSpec *static_fs, - JSObject **ctorp = nullptr, + js::NativeObject **ctorp = nullptr, js::gc::AllocKind ctorKind = JSFunction::FinalizeKind); #endif /* jsobjinlines_h */ diff --git a/js/src/json.cpp b/js/src/json.cpp index 7a7a5fed12b..3e5f1cff8bd 100644 --- a/js/src/json.cpp +++ b/js/src/json.cpp @@ -24,7 +24,8 @@ #include "jsatominlines.h" #include "jsboolinlines.h" -#include "jsobjinlines.h" + +#include "vm/ObjectImpl-inl.h" using namespace js; using namespace js::gc; @@ -565,7 +566,7 @@ js_Stringify(JSContext *cx, MutableHandleValue vp, JSObject *replacer_, Value sp uint32_t len; JS_ALWAYS_TRUE(GetLengthProperty(cx, replacer, &len)); if (replacer->is() && !replacer->isIndexed()) - len = Min(len, replacer->getDenseInitializedLength()); + len = Min(len, replacer->as().getDenseInitializedLength()); // Cap the initial size to a moderately small value. This avoids // ridiculous over-allocation if an array with bogusly-huge length @@ -662,7 +663,7 @@ js_Stringify(JSContext *cx, MutableHandleValue vp, JSObject *replacer_, Value sp } /* Step 9. */ - RootedObject wrapper(cx, NewBuiltinClassInstance(cx, &JSObject::class_)); + RootedNativeObject wrapper(cx, NewNativeBuiltinClassInstance(cx, &JSObject::class_)); if (!wrapper) return false; diff --git a/js/src/jsonparser.cpp b/js/src/jsonparser.cpp index 2a65b511114..125960e7b36 100644 --- a/js/src/jsonparser.cpp +++ b/js/src/jsonparser.cpp @@ -18,7 +18,7 @@ #include "vm/StringBuffer.h" -#include "jsobjinlines.h" +#include "vm/ObjectImpl-inl.h" using namespace js; @@ -596,7 +596,7 @@ JSONParserBase::createFinishedObject(PropertyVector &properties) * shape in manually. */ gc::AllocKind allocKind = gc::GetGCObjectKind(properties.length()); - RootedObject obj(cx, NewBuiltinClassInstance(cx, &JSObject::class_, allocKind)); + RootedNativeObject obj(cx, NewNativeBuiltinClassInstance(cx, &JSObject::class_, allocKind)); if (!obj) return nullptr; @@ -643,7 +643,7 @@ JSONParserBase::finishArray(MutableHandleValue vp, ElementVector &elements) { MOZ_ASSERT(&elements == &stack.back().elements()); - JSObject *obj = NewDenseCopiedArray(cx, elements.length(), elements.begin()); + ArrayObject *obj = NewDenseCopiedArray(cx, elements.length(), elements.begin()); if (!obj) return false; diff --git a/js/src/jsscript.cpp b/js/src/jsscript.cpp index 82c773ae22c..c82c9e8d603 100644 --- a/js/src/jsscript.cpp +++ b/js/src/jsscript.cpp @@ -423,9 +423,9 @@ js::XDRScriptConst(XDRState *xdr, MutableHandleValue vp) vp.set(NullValue()); break; case SCRIPT_OBJECT: { - RootedObject obj(cx); + RootedNativeObject obj(cx); if (mode == XDR_ENCODE) - obj = &vp.toObject(); + obj = &vp.toObject().as(); if (!XDRObjectLiteral(xdr, &obj)) return false; @@ -533,7 +533,7 @@ static inline uint32_t FindScopeObjectIndex(JSScript *script, NestedScopeObject &scope) { ObjectArray *objects = script->objects(); - HeapPtrObject *vector = objects->vector; + HeapPtrNativeObject *vector = objects->vector; unsigned length = objects->length; for (unsigned i = 0; i < length; ++i) { if (vector[i] == &scope) @@ -892,7 +892,7 @@ js::XDRScript(XDRState *xdr, HandleObject enclosingScope, HandleScript enc * after the enclosing block has been XDR'd. */ for (i = 0; i != nobjects; ++i) { - HeapPtrObject *objp = &script->objects()->vector[i]; + HeapPtrNativeObject *objp = &script->objects()->vector[i]; XDRClassKind classk; if (mode == XDR_ENCODE) { @@ -988,7 +988,9 @@ js::XDRScript(XDRState *xdr, HandleObject enclosingScope, HandleScript enc } // Code nested function and script. - RootedObject tmp(cx, *objp); + RootedFunction tmp(cx); + if (mode == XDR_ENCODE) + tmp = &(*objp)->as(); if (!XDRInterpretedFunction(xdr, funEnclosingScope, script, &tmp)) return false; *objp = tmp; @@ -997,7 +999,7 @@ js::XDRScript(XDRState *xdr, HandleObject enclosingScope, HandleScript enc case CK_JSObject: { /* Code object literal. */ - RootedObject tmp(cx, *objp); + RootedNativeObject tmp(cx, *objp); if (!XDRObjectLiteral(xdr, &tmp)) return false; *objp = tmp; @@ -1012,8 +1014,13 @@ js::XDRScript(XDRState *xdr, HandleObject enclosingScope, HandleScript enc } for (i = 0; i != nregexps; ++i) { - if (!XDRScriptRegExpObject(xdr, &script->regexps()->vector[i])) + Rooted regexp(cx); + if (mode == XDR_ENCODE) + regexp = &script->regexps()->vector[i]->as(); + if (!XDRScriptRegExpObject(xdr, ®exp)) return false; + if (mode == XDR_DECODE) + script->regexps()->vector[i] = regexp; } if (ntrynotes != 0) { @@ -1117,7 +1124,7 @@ js::XDRLazyScript(XDRState *xdr, HandleObject enclosingScope, HandleScript // Code inner functions. { - RootedObject func(cx); + RootedFunction func(cx); HeapPtrFunction *innerFunctions = lazy->innerFunctions(); size_t numInnerFunctions = lazy->numInnerFunctions(); for (size_t i = 0; i < numInnerFunctions; i++) { @@ -1128,7 +1135,7 @@ js::XDRLazyScript(XDRState *xdr, HandleObject enclosingScope, HandleScript return false; if (mode == XDR_DECODE) - innerFunctions[i] = &func->as(); + innerFunctions[i] = func; } } @@ -2330,9 +2337,9 @@ ScriptDataSize(uint32_t nbindings, uint32_t nconsts, uint32_t nobjects, uint32_t if (nconsts != 0) size += sizeof(ConstArray) + nconsts * sizeof(Value); if (nobjects != 0) - size += sizeof(ObjectArray) + nobjects * sizeof(JSObject *); + size += sizeof(ObjectArray) + nobjects * sizeof(NativeObject *); if (nregexps != 0) - size += sizeof(ObjectArray) + nregexps * sizeof(JSObject *); + size += sizeof(ObjectArray) + nregexps * sizeof(NativeObject *); if (ntrynotes != 0) size += sizeof(TryNoteArray) + ntrynotes * sizeof(JSTryNote); if (nblockscopes != 0) @@ -2456,13 +2463,13 @@ JSScript::partiallyInit(ExclusiveContext *cx, HandleScript script, uint32_t ncon if (nobjects != 0) { script->objects()->length = nobjects; - script->objects()->vector = (HeapPtrObject *)cursor; + script->objects()->vector = (HeapPtrNativeObject *)cursor; cursor += nobjects * sizeof(script->objects()->vector[0]); } if (nregexps != 0) { script->regexps()->length = nregexps; - script->regexps()->vector = (HeapPtrObject *)cursor; + script->regexps()->vector = (HeapPtrNativeObject *)cursor; cursor += nregexps * sizeof(script->regexps()->vector[0]); } @@ -2943,7 +2950,7 @@ js::CloneScript(JSContext *cx, HandleObject enclosingScope, HandleFunction fun, AutoObjectVector objects(cx); if (nobjects != 0) { - HeapPtrObject *vector = src->objects()->vector; + HeapPtrNativeObject *vector = src->objects()->vector; for (unsigned i = 0; i < nobjects; i++) { RootedObject obj(cx, vector[i]); RootedObject clone(cx); @@ -2998,7 +3005,7 @@ js::CloneScript(JSContext *cx, HandleObject enclosingScope, HandleFunction fun, AutoObjectVector regexps(cx); for (unsigned i = 0; i < nregexps; i++) { - HeapPtrObject *vector = src->regexps()->vector; + HeapPtrNativeObject *vector = src->regexps()->vector; for (unsigned i = 0; i < nregexps; i++) { JSObject *clone = CloneScriptRegExpObject(cx, vector[i]->as()); if (!clone || !regexps.append(clone)) @@ -3093,16 +3100,16 @@ js::CloneScript(JSContext *cx, HandleObject enclosingScope, HandleFunction fun, MOZ_ASSERT_IF(vector[i].isMarkable(), vector[i].toString()->isAtom()); } if (nobjects != 0) { - HeapPtrObject *vector = Rebase(dst, src, src->objects()->vector); + HeapPtrNativeObject *vector = Rebase(dst, src, src->objects()->vector); dst->objects()->vector = vector; for (unsigned i = 0; i < nobjects; ++i) - vector[i].init(objects[i]); + vector[i].init(&objects[i]->as()); } if (nregexps != 0) { - HeapPtrObject *vector = Rebase(dst, src, src->regexps()->vector); + HeapPtrNativeObject *vector = Rebase(dst, src, src->regexps()->vector); dst->regexps()->vector = vector; for (unsigned i = 0; i < nregexps; ++i) - vector[i].init(regexps[i]); + vector[i].init(®exps[i]->as()); } if (ntrynotes != 0) dst->trynotes()->vector = Rebase(dst, src, src->trynotes()->vector); diff --git a/js/src/jsscript.h b/js/src/jsscript.h index 0a9e6bb44d1..0b0c6e8141e 100644 --- a/js/src/jsscript.h +++ b/js/src/jsscript.h @@ -15,7 +15,6 @@ #include "jsatom.h" #include "jslock.h" -#include "jsobj.h" #include "jsopcode.h" #include "jstypes.h" @@ -23,6 +22,7 @@ #include "gc/Rooting.h" #include "jit/IonCode.h" #include "js/UbiNode.h" +#include "vm/ObjectImpl.h" #include "vm/Shape.h" namespace JS { @@ -113,8 +113,8 @@ struct ConstArray { }; struct ObjectArray { - js::HeapPtrObject *vector; // Array of indexed objects. - uint32_t length; // Count of indexed objects. + js::HeapPtrNativeObject *vector; // Array of indexed objects. + uint32_t length; // Count of indexed objects. }; struct TryNoteArray { @@ -651,7 +651,7 @@ struct CompressedSourceHasher typedef HashSet CompressedSourceSet; -class ScriptSourceObject : public JSObject +class ScriptSourceObject : public NativeObject { public: static const Class class_; @@ -1545,7 +1545,7 @@ class JSScript : public js::gc::TenuredCell return getAtom(GET_UINT32_INDEX(pc))->asPropertyName(); } - JSObject *getObject(size_t index) { + js::NativeObject *getObject(size_t index) { js::ObjectArray *arr = objects(); MOZ_ASSERT(index < arr->length); return arr->vector[index]; @@ -1556,7 +1556,7 @@ class JSScript : public js::gc::TenuredCell return savedCallerFun() ? 1 : 0; } - JSObject *getObject(jsbytecode *pc) { + js::NativeObject *getObject(jsbytecode *pc) { MOZ_ASSERT(containsPC(pc) && containsPC(pc + sizeof(uint32_t))); return getObject(GET_UINT32_INDEX(pc)); } diff --git a/js/src/jsstr.cpp b/js/src/jsstr.cpp index 14385cbee3c..8f060f8f87f 100644 --- a/js/src/jsstr.cpp +++ b/js/src/jsstr.cpp @@ -452,9 +452,10 @@ ThisToStringForStringProto(JSContext *cx, CallReceiver call) if (call.thisv().isObject()) { RootedObject obj(cx, &call.thisv().toObject()); if (obj->is()) { + StringObject *nobj = &obj->as(); Rooted id(cx, NameToId(cx->names().toString)); - if (ClassMethodIsNative(cx, obj, &StringObject::class_, id, js_str_toString)) { - JSString *str = obj->as().unbox(); + if (ClassMethodIsNative(cx, nobj, &StringObject::class_, id, js_str_toString)) { + JSString *str = nobj->unbox(); call.setThis(StringValue(str)); return str; } @@ -2257,9 +2258,12 @@ class MOZ_STACK_CLASS StringRegExpGuard // Use a fast path for same-global RegExp objects with writable // lastIndex. - if (obj_->is() && obj_->nativeLookup(cx, cx->names().lastIndex)->writable()) { - obj_->as().zeroLastIndex(); - return true; + if (obj_->is()) { + RegExpObject *nobj = &obj_->as(); + if (nobj->lookup(cx, cx->names().lastIndex)->writable()) { + nobj->zeroLastIndex(); + return true; + } } // Handle everything else generically (including throwing if .lastIndex is non-writable). @@ -2591,7 +2595,7 @@ struct ReplaceData RootedString str; /* 'this' parameter object as a string */ StringRegExpGuard g; /* regexp parameter object and private data */ RootedObject lambda; /* replacement function object or null */ - RootedObject elembase; /* object for function(a){return b[a]} replace */ + RootedNativeObject elembase; /* object for function(a){return b[a]} replace */ RootedLinearString repstr; /* replacement string */ uint32_t dollarIndex; /* index of first $ in repstr, or UINT32_MAX */ int leftIndex; /* left context index in str->chars */ @@ -3501,7 +3505,7 @@ str_replace_flat_lambda(JSContext *cx, CallArgs outerArgs, ReplaceData &rdata, c * code patterns generated by such packers here. */ static bool -LambdaIsGetElem(JSContext *cx, JSObject &lambda, MutableHandleObject pobj) +LambdaIsGetElem(JSContext *cx, JSObject &lambda, MutableHandleNativeObject pobj) { if (!lambda.is()) return true; @@ -3553,7 +3557,7 @@ LambdaIsGetElem(JSContext *cx, JSObject &lambda, MutableHandleObject pobj) if (!clasp->isNative() || clasp->ops.lookupProperty || clasp->ops.getProperty) return true; - pobj.set(&bobj); + pobj.set(&bobj.as()); return true; } @@ -4303,7 +4307,7 @@ static const JSFunctionSpec string_static_methods[] = { /* static */ Shape * StringObject::assignInitialShape(ExclusiveContext *cx, Handle obj) { - MOZ_ASSERT(obj->nativeEmpty()); + MOZ_ASSERT(obj->empty()); return obj->addDataProperty(cx, cx->names().length, LENGTH_SLOT, JSPROP_PERMANENT | JSPROP_READONLY); diff --git a/js/src/jswatchpoint.cpp b/js/src/jswatchpoint.cpp index 40a589e6dc7..2638563f943 100644 --- a/js/src/jswatchpoint.cpp +++ b/js/src/jswatchpoint.cpp @@ -129,9 +129,10 @@ WatchpointMap::triggerWatchpoint(JSContext *cx, HandleObject obj, HandleId id, M Value old; old.setUndefined(); if (obj->isNative()) { - if (Shape *shape = obj->nativeLookup(cx, id)) { + NativeObject *nobj = &obj->as(); + if (Shape *shape = nobj->lookup(cx, id)) { if (shape->hasSlot()) - old = obj->nativeGetSlot(shape->slot()); + old = nobj->getSlot(shape->slot()); } } diff --git a/js/src/perf/jsperf.cpp b/js/src/perf/jsperf.cpp index ab52b136481..f2afeaba6bf 100644 --- a/js/src/perf/jsperf.cpp +++ b/js/src/perf/jsperf.cpp @@ -271,7 +271,7 @@ ExtractPerfMeasurement(jsval wrapper) if (obj->getClass() != js::Valueify(&pm_class)) return 0; - return (PerfMeasurement*) obj->getPrivate(); + return (PerfMeasurement*) obj->as().getPrivate(); } } // namespace JS diff --git a/js/src/proxy/Proxy.cpp b/js/src/proxy/Proxy.cpp index e53d0a770a5..5852698f5a1 100644 --- a/js/src/proxy/Proxy.cpp +++ b/js/src/proxy/Proxy.cpp @@ -868,9 +868,9 @@ ProxyObject::renew(JSContext *cx, const BaseProxyHandler *handler, Value priv) MOZ_ASSERT(getTaggedProto().isLazy()); setHandler(handler); - setCrossCompartmentSlot(PRIVATE_SLOT, priv); - setSlot(EXTRA_SLOT + 0, UndefinedValue()); - setSlot(EXTRA_SLOT + 1, UndefinedValue()); + fakeNativeSetCrossCompartmentSlot(PRIVATE_SLOT, priv); + fakeNativeSetSlot(EXTRA_SLOT + 0, UndefinedValue()); + fakeNativeSetSlot(EXTRA_SLOT + 1, UndefinedValue()); } JS_FRIEND_API(JSObject *) diff --git a/js/src/proxy/ScriptedIndirectProxyHandler.cpp b/js/src/proxy/ScriptedIndirectProxyHandler.cpp index 8693e0a871d..4d499c0623f 100644 --- a/js/src/proxy/ScriptedIndirectProxyHandler.cpp +++ b/js/src/proxy/ScriptedIndirectProxyHandler.cpp @@ -346,7 +346,7 @@ ScriptedIndirectProxyHandler::fun_toString(JSContext *cx, HandleObject proxy, un "object"); return nullptr; } - RootedObject obj(cx, &proxy->as().extra(0).toObject().getReservedSlot(0).toObject()); + RootedObject obj(cx, &proxy->as().extra(0).toObject().as().getReservedSlot(0).toObject()); return fun_toStringHelper(cx, obj, indent); } @@ -358,7 +358,7 @@ CallableScriptedIndirectProxyHandler::call(JSContext *cx, HandleObject proxy, co assertEnteredPolicy(cx, proxy, JSID_VOID, CALL); RootedObject ccHolder(cx, &proxy->as().extra(0).toObject()); MOZ_ASSERT(ccHolder->getClass() == &CallConstructHolder); - RootedValue call(cx, ccHolder->getReservedSlot(0)); + RootedValue call(cx, ccHolder->as().getReservedSlot(0)); MOZ_ASSERT(call.isObject() && call.toObject().isCallable()); return Invoke(cx, args.thisv(), call, args.length(), args.array(), args.rval()); } @@ -369,7 +369,7 @@ CallableScriptedIndirectProxyHandler::construct(JSContext *cx, HandleObject prox assertEnteredPolicy(cx, proxy, JSID_VOID, CALL); RootedObject ccHolder(cx, &proxy->as().extra(0).toObject()); MOZ_ASSERT(ccHolder->getClass() == &CallConstructHolder); - RootedValue construct(cx, ccHolder->getReservedSlot(1)); + RootedValue construct(cx, ccHolder->as().getReservedSlot(1)); MOZ_ASSERT(construct.isObject() && construct.toObject().isCallable()); return InvokeConstructor(cx, construct, args.length(), args.array(), args.rval()); } @@ -445,8 +445,8 @@ js::proxy_createFunction(JSContext *cx, unsigned argc, Value *vp) js::NullPtr(), cx->global())); if (!ccHolder) return false; - ccHolder->setReservedSlot(0, ObjectValue(*call)); - ccHolder->setReservedSlot(1, ObjectValue(*construct)); + ccHolder->as().setReservedSlot(0, ObjectValue(*call)); + ccHolder->as().setReservedSlot(1, ObjectValue(*construct)); RootedValue priv(cx, ObjectValue(*handler)); JSObject *proxy = diff --git a/js/src/shell/js.cpp b/js/src/shell/js.cpp index 45f3fb5e1fc..646813d9239 100644 --- a/js/src/shell/js.cpp +++ b/js/src/shell/js.cpp @@ -2802,7 +2802,7 @@ ShapeOf(JSContext *cx, unsigned argc, JS::Value *vp) * non-native referent may be simplified to data properties. */ static bool -CopyProperty(JSContext *cx, HandleObject obj, HandleObject referent, HandleId id, +CopyProperty(JSContext *cx, HandleNativeObject obj, HandleObject referent, HandleId id, MutableHandleObject objp) { RootedShape shape(cx); @@ -2811,13 +2811,13 @@ CopyProperty(JSContext *cx, HandleObject obj, HandleObject referent, HandleId id objp.set(nullptr); if (referent->isNative()) { - if (!LookupNativeProperty(cx, referent, id, &obj2, &shape)) + if (!LookupNativeProperty(cx, referent.as(), id, &obj2, &shape)) return false; if (obj2 != referent) return true; if (shape->hasSlot()) { - desc.value().set(referent->nativeGetSlot(shape->slot())); + desc.value().set(referent->as().getSlot(shape->slot())); } else { desc.value().setUndefined(); } @@ -2861,7 +2861,7 @@ resolver_resolve(JSContext *cx, HandleObject obj, HandleId id, MutableHandleObje { jsval v = JS_GetReservedSlot(obj, 0); Rooted vobj(cx, &v.toObject()); - return CopyProperty(cx, obj, vobj, id, objp); + return CopyProperty(cx, obj.as(), vobj, id, objp); } static bool @@ -2875,7 +2875,7 @@ resolver_enumerate(JSContext *cx, HandleObject obj) RootedObject ignore(cx); for (size_t i = 0; ok && i < ida.length(); i++) { Rooted id(cx, ida[i]); - ok = CopyProperty(cx, obj, referent, id, &ignore); + ok = CopyProperty(cx, obj.as(), referent, id, &ignore); } return ok; } diff --git a/js/src/vm/ArgumentsObject.cpp b/js/src/vm/ArgumentsObject.cpp index fd3a68aa3bd..292f16ad338 100644 --- a/js/src/vm/ArgumentsObject.cpp +++ b/js/src/vm/ArgumentsObject.cpp @@ -34,7 +34,7 @@ CopyStackFrameArguments(const AbstractFramePtr frame, HeapValue *dst, unsigned t } /* static */ void -ArgumentsObject::MaybeForwardToCallObject(AbstractFramePtr frame, JSObject *obj, +ArgumentsObject::MaybeForwardToCallObject(AbstractFramePtr frame, ArgumentsObject *obj, ArgumentsData *data) { JSScript *script = frame.script(); @@ -47,7 +47,7 @@ ArgumentsObject::MaybeForwardToCallObject(AbstractFramePtr frame, JSObject *obj, /* static */ void ArgumentsObject::MaybeForwardToCallObject(jit::IonJSFrameLayout *frame, HandleObject callObj, - JSObject *obj, ArgumentsData *data) + ArgumentsObject *obj, ArgumentsData *data) { JSFunction *callee = jit::CalleeTokenToFunction(frame->calleeToken()); JSScript *script = callee->nonLazyScript(); @@ -75,7 +75,7 @@ struct CopyFrameArgs * If a call object exists and the arguments object aliases formals, the * call object is the canonical location for formals. */ - void maybeForwardToCallObject(JSObject *obj, ArgumentsData *data) { + void maybeForwardToCallObject(ArgumentsObject *obj, ArgumentsData *data) { ArgumentsObject::MaybeForwardToCallObject(frame_, obj, data); } }; @@ -114,7 +114,7 @@ struct CopyIonJSFrameArgs * If a call object exists and the arguments object aliases formals, the * call object is the canonical location for formals. */ - void maybeForwardToCallObject(JSObject *obj, ArgumentsData *data) { + void maybeForwardToCallObject(ArgumentsObject *obj, ArgumentsData *data) { ArgumentsObject::MaybeForwardToCallObject(frame_, callObj_, obj, data); } }; @@ -149,7 +149,7 @@ struct CopyScriptFrameIterArgs * Ion frames are copying every argument onto the stack, other locations are * invalid. */ - void maybeForwardToCallObject(JSObject *obj, ArgumentsData *data) { + void maybeForwardToCallObject(ArgumentsObject *obj, ArgumentsData *data) { if (!iter_.isIon()) ArgumentsObject::MaybeForwardToCallObject(iter_.abstractFramePtr(), obj, data); } @@ -193,8 +193,10 @@ ArgumentsObject::create(JSContext *cx, HandleScript script, HandleFunction calle if (!data) return nullptr; - RootedObject obj(cx); - obj = JSObject::create(cx, FINALIZE_KIND, GetInitialHeap(GenericObject, clasp), shape, type); + RootedNativeObject obj(cx); + obj = MaybeNativeObject(JSObject::create(cx, FINALIZE_KIND, + GetInitialHeap(GenericObject, clasp), + shape, type)); if (!obj) { js_free(data); return nullptr; @@ -220,7 +222,7 @@ ArgumentsObject::create(JSContext *cx, HandleScript script, HandleFunction calle obj->initFixedSlot(INITIAL_LENGTH_SLOT, Int32Value(numActuals << PACKED_BITS_COUNT)); - copy.maybeForwardToCallObject(obj, data); + copy.maybeForwardToCallObject(&obj->as(), data); ArgumentsObject &argsobj = obj->as(); MOZ_ASSERT(argsobj.initialLength() == numActuals); @@ -321,20 +323,20 @@ ArgSetter(JSContext *cx, HandleObject obj, HandleId id, bool strict, MutableHand { if (!obj->is()) return true; + Handle argsobj = obj.as(); unsigned attrs; - if (!baseops::GetAttributes(cx, obj, id, &attrs)) + if (!baseops::GetAttributes(cx, argsobj, id, &attrs)) return false; MOZ_ASSERT(!(attrs & JSPROP_READONLY)); attrs &= (JSPROP_ENUMERATE | JSPROP_PERMANENT); /* only valid attributes */ - NormalArgumentsObject &argsobj = obj->as(); - RootedScript script(cx, argsobj.containingScript()); + RootedScript script(cx, argsobj->containingScript()); if (JSID_IS_INT(id)) { unsigned arg = unsigned(JSID_TO_INT(id)); - if (arg < argsobj.initialLength() && !argsobj.isElementDeleted(arg)) { - argsobj.setElement(cx, arg, vp); + if (arg < argsobj->initialLength() && !argsobj->isElementDeleted(arg)) { + argsobj->setElement(cx, arg, vp); if (arg < script->functionNonDelazifying()->nargs()) types::TypeScript::SetArgument(cx, script, arg, vp); return true; @@ -352,8 +354,8 @@ ArgSetter(JSContext *cx, HandleObject obj, HandleId id, bool strict, MutableHand * that has a setter for this id. */ bool succeeded; - return baseops::DeleteGeneric(cx, obj, id, &succeeded) && - baseops::DefineGeneric(cx, obj, id, vp, nullptr, nullptr, attrs); + return baseops::DeleteGeneric(cx, argsobj, id, &succeeded) && + baseops::DefineGeneric(cx, argsobj, id, vp, nullptr, nullptr, attrs); } static bool @@ -443,15 +445,14 @@ StrictArgSetter(JSContext *cx, HandleObject obj, HandleId id, bool strict, Mutab { if (!obj->is()) return true; + Handle argsobj = obj.as(); unsigned attrs; - if (!baseops::GetAttributes(cx, obj, id, &attrs)) + if (!baseops::GetAttributes(cx, argsobj, id, &attrs)) return false; MOZ_ASSERT(!(attrs & JSPROP_READONLY)); attrs &= (JSPROP_ENUMERATE | JSPROP_PERMANENT); /* only valid attributes */ - Rooted argsobj(cx, &obj->as()); - if (JSID_IS_INT(id)) { unsigned arg = unsigned(JSID_TO_INT(id)); if (arg < argsobj->initialLength()) { diff --git a/js/src/vm/ArgumentsObject.h b/js/src/vm/ArgumentsObject.h index 8c7d22ccaa9..6955fafe587 100644 --- a/js/src/vm/ArgumentsObject.h +++ b/js/src/vm/ArgumentsObject.h @@ -9,9 +9,8 @@ #include "mozilla/MemoryReporting.h" -#include "jsobj.h" - #include "gc/Barrier.h" +#include "vm/ObjectImpl.h" namespace js { @@ -108,7 +107,7 @@ static const unsigned ARGS_LENGTH_MAX = 500 * 1000; * DATA_SLOT * Stores an ArgumentsData*, described above. */ -class ArgumentsObject : public JSObject +class ArgumentsObject : public NativeObject { protected: static const uint32_t INITIAL_LENGTH_SLOT = 0; @@ -271,9 +270,10 @@ class ArgumentsObject : public JSObject return getFixedSlotOffset(INITIAL_LENGTH_SLOT); } - static void MaybeForwardToCallObject(AbstractFramePtr frame, JSObject *obj, ArgumentsData *data); + static void MaybeForwardToCallObject(AbstractFramePtr frame, ArgumentsObject *obj, + ArgumentsData *data); static void MaybeForwardToCallObject(jit::IonJSFrameLayout *frame, HandleObject callObj, - JSObject *obj, ArgumentsData *data); + ArgumentsObject *obj, ArgumentsData *data); }; class NormalArgumentsObject : public ArgumentsObject diff --git a/js/src/vm/ArrayBufferObject.cpp b/js/src/vm/ArrayBufferObject.cpp index 821c548426c..ea09f7d5a83 100644 --- a/js/src/vm/ArrayBufferObject.cpp +++ b/js/src/vm/ArrayBufferObject.cpp @@ -48,6 +48,7 @@ #include "jsinferinlines.h" #include "jsobjinlines.h" +#include "vm/ObjectImpl-inl.h" #include "vm/Shape-inl.h" using mozilla::DebugOnly; @@ -347,7 +348,7 @@ ArrayBufferObject::changeViewContents(JSContext *cx, ArrayBufferViewObject *view MOZ_ASSERT(newContents); ptrdiff_t offset = viewDataPointer - oldDataPointer; viewDataPointer = static_cast(newContents.data()) + offset; - view->setPrivate(viewDataPointer); + view->fakeNativeSetPrivate(viewDataPointer); } // Notify compiled jit code that the base pointer has moved. @@ -598,7 +599,7 @@ ArrayBufferObject::create(JSContext *cx, uint32_t nbytes, BufferContents content nAllocated = JS_ROUNDUP(nbytes, js::gc::SystemPageSize()); cx->zone()->updateMallocCounter(nAllocated); } else { - size_t usableSlots = JSObject::MAX_FIXED_SLOTS - reservedSlots; + size_t usableSlots = NativeObject::MAX_FIXED_SLOTS - reservedSlots; if (nbytes <= usableSlots * sizeof(Value)) { int newSlots = (nbytes - 1) / sizeof(Value) + 1; MOZ_ASSERT(int(nbytes) <= newSlots * int(sizeof(Value))); @@ -797,8 +798,13 @@ ArrayBufferObject::setFirstView(ArrayBufferViewObject *view) } bool -ArrayBufferObject::addView(JSContext *cx, ArrayBufferViewObject *view) +ArrayBufferObject::addView(JSContext *cx, JSObject *viewArg) { + // Note: we don't pass in an ArrayBufferViewObject as the argument due to + // tricky inheritance in the various view classes. View classes do not + // inherit from ArrayBufferViewObject so won't be upcast automatically. + ArrayBufferViewObject *view = &viewArg->as(); + if (!firstView()) { setFirstView(view); return true; @@ -965,8 +971,9 @@ InnerViewTable::sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf) * stores its data inline. */ /* static */ void -ArrayBufferViewObject::trace(JSTracer *trc, JSObject *obj) +ArrayBufferViewObject::trace(JSTracer *trc, JSObject *objArg) { + NativeObject *obj = &objArg->as(); HeapSlot &bufSlot = obj->getReservedSlotRef(TypedArrayLayout::BUFFER_SLOT); MarkSlot(trc, &bufSlot, "typedarray.buffer"); diff --git a/js/src/vm/ArrayBufferObject.h b/js/src/vm/ArrayBufferObject.h index d4acf369189..e652c67a12d 100644 --- a/js/src/vm/ArrayBufferObject.h +++ b/js/src/vm/ArrayBufferObject.h @@ -63,7 +63,7 @@ uint32_t AnyArrayBufferByteLength(const ArrayBufferObjectMaybeShared *buf); uint8_t *AnyArrayBufferDataPointer(const ArrayBufferObjectMaybeShared *buf); ArrayBufferObjectMaybeShared &AsAnyArrayBuffer(HandleValue val); -class ArrayBufferObjectMaybeShared : public JSObject +class ArrayBufferObjectMaybeShared : public NativeObject { public: uint32_t byteLength() { @@ -224,7 +224,7 @@ class ArrayBufferObject : public ArrayBufferObjectMaybeShared // and non-incrementalized sweep time. ArrayBufferViewObject *firstView(); - bool addView(JSContext *cx, ArrayBufferViewObject *view); + bool addView(JSContext *cx, JSObject *view); void setNewOwnedData(FreeOp* fop, BufferContents newContents); void changeContents(JSContext *cx, BufferContents newContents); @@ -325,7 +325,7 @@ class ArrayBufferObject : public ArrayBufferObjectMaybeShared /* * ArrayBufferViewObject * - * Common definitions shared by all ArrayBufferViews. + * Common definitions shared by all array buffer views. */ class ArrayBufferViewObject : public JSObject @@ -355,7 +355,7 @@ PostBarrierTypedArrayObject(JSObject *obj) } inline void -InitArrayBufferViewDataPointer(ArrayBufferViewObject *obj, ArrayBufferObject *buffer, size_t byteOffset) +InitArrayBufferViewDataPointer(JSObject *obj, ArrayBufferObject *buffer, size_t byteOffset) { /* * N.B. The base of the array's data is stored in the object's @@ -363,7 +363,7 @@ InitArrayBufferViewDataPointer(ArrayBufferViewObject *obj, ArrayBufferObject *bu * private Values that are pointers must have the low bits clear. */ MOZ_ASSERT(buffer->dataPointer() != nullptr); - obj->initPrivate(buffer->dataPointer() + byteOffset); + obj->as().initPrivate(buffer->dataPointer() + byteOffset); PostBarrierTypedArrayObject(obj); } diff --git a/js/src/vm/ArrayObject-inl.h b/js/src/vm/ArrayObject-inl.h index c1a9f9c006b..fe6e17f350e 100644 --- a/js/src/vm/ArrayObject-inl.h +++ b/js/src/vm/ArrayObject-inl.h @@ -28,7 +28,102 @@ ArrayObject::setLength(ExclusiveContext *cx, uint32_t length) getElementsHeader()->length = length; } +/* static */ inline ArrayObject * +ArrayObject::createArrayInternal(ExclusiveContext *cx, gc::AllocKind kind, gc::InitialHeap heap, + HandleShape shape, HandleTypeObject type) +{ + // Create a new array and initialize everything except for its elements. + MOZ_ASSERT(shape && type); + MOZ_ASSERT(type->clasp() == shape->getObjectClass()); + MOZ_ASSERT(type->clasp() == &ArrayObject::class_); + MOZ_ASSERT_IF(type->clasp()->finalize, heap == gc::TenuredHeap); + + // Arrays can use their fixed slots to store elements, so can't have shapes + // which allow named properties to be stored in the fixed slots. + MOZ_ASSERT(shape->numFixedSlots() == 0); + + size_t nDynamicSlots = dynamicSlotsCount(0, shape->slotSpan(), type->clasp()); + JSObject *obj = NewGCObject(cx, kind, nDynamicSlots, heap); + if (!obj) + return nullptr; + + static_cast(obj)->shape_.init(shape); + static_cast(obj)->type_.init(type); + + return &obj->as(); +} + +/* static */ inline ArrayObject * +ArrayObject::finishCreateArray(ArrayObject *obj, HandleShape shape) +{ + size_t span = shape->slotSpan(); + if (span) + obj->initializeSlotRange(0, span); + + gc::TraceCreateObject(obj); + + return obj; +} + +/* static */ inline ArrayObject * +ArrayObject::createArray(ExclusiveContext *cx, gc::AllocKind kind, gc::InitialHeap heap, + HandleShape shape, HandleTypeObject type, + uint32_t length) +{ + ArrayObject *obj = createArrayInternal(cx, kind, heap, shape, type); + if (!obj) + return nullptr; + + uint32_t capacity = gc::GetGCKindSlots(kind) - ObjectElements::VALUES_PER_HEADER; + + obj->setFixedElements(); + new (obj->getElementsHeader()) ObjectElements(capacity, length); + + return finishCreateArray(obj, shape); +} + +/* static */ inline ArrayObject * +ArrayObject::createArray(ExclusiveContext *cx, gc::InitialHeap heap, + HandleShape shape, HandleTypeObject type, + HeapSlot *elements) +{ + // Use the smallest allocation kind for the array, as it can't have any + // fixed slots (see the assert in createArrayInternal) and will not be using + // its fixed elements. + gc::AllocKind kind = gc::FINALIZE_OBJECT0_BACKGROUND; + + ArrayObject *obj = createArrayInternal(cx, kind, heap, shape, type); + if (!obj) + return nullptr; + + obj->elements = elements; + + return finishCreateArray(obj, shape); +} + +/* static */ inline ArrayObject * +ArrayObject::createCopyOnWriteArray(ExclusiveContext *cx, gc::InitialHeap heap, + HandleShape shape, + HandleNativeObject sharedElementsOwner) +{ + MOZ_ASSERT(sharedElementsOwner->getElementsHeader()->isCopyOnWrite()); + MOZ_ASSERT(sharedElementsOwner->getElementsHeader()->ownerObject() == sharedElementsOwner); + + // Use the smallest allocation kind for the array, as it can't have any + // fixed slots (see the assert in createArrayInternal) and will not be using + // its fixed elements. + gc::AllocKind kind = gc::FINALIZE_OBJECT0_BACKGROUND; + + RootedTypeObject type(cx, sharedElementsOwner->type()); + ArrayObject *obj = createArrayInternal(cx, kind, heap, shape, type); + if (!obj) + return nullptr; + + obj->elements = sharedElementsOwner->getDenseElementsAllowCopyOnWrite(); + + return finishCreateArray(obj, shape); +} + } // namespace js #endif // vm_ArrayObject_inl_h - diff --git a/js/src/vm/ArrayObject.h b/js/src/vm/ArrayObject.h index b96e6c2bf0f..7a49638b795 100644 --- a/js/src/vm/ArrayObject.h +++ b/js/src/vm/ArrayObject.h @@ -7,11 +7,11 @@ #ifndef vm_ArrayObject_h #define vm_ArrayObject_h -#include "jsobj.h" +#include "vm/ObjectImpl.h" namespace js { -class ArrayObject : public JSObject +class ArrayObject : public NativeObject { public: // Array(x) eagerly allocates dense elements if x <= this value. Without @@ -37,6 +37,43 @@ class ArrayObject : public JSObject MOZ_ASSERT(length <= INT32_MAX); getElementsHeader()->length = length; } + + // Make an array object with the specified initial state. + static inline ArrayObject * + createArray(ExclusiveContext *cx, + gc::AllocKind kind, + gc::InitialHeap heap, + HandleShape shape, + HandleTypeObject type, + uint32_t length); + + // Make an array object with the specified initial state and elements. + static inline ArrayObject * + createArray(ExclusiveContext *cx, + gc::InitialHeap heap, + HandleShape shape, + HandleTypeObject type, + HeapSlot *elements); + + // Make a copy-on-write array object which shares the elements of an + // existing object. + static inline ArrayObject * + createCopyOnWriteArray(ExclusiveContext *cx, + gc::InitialHeap heap, + HandleShape shape, + HandleNativeObject sharedElementsOwner); + + private: + // Helper for the above methods. + static inline ArrayObject * + createArrayInternal(ExclusiveContext *cx, + gc::AllocKind kind, + gc::InitialHeap heap, + HandleShape shape, + HandleTypeObject type); + + static inline ArrayObject * + finishCreateArray(ArrayObject *obj, HandleShape shape); }; } // namespace js diff --git a/js/src/vm/BooleanObject.h b/js/src/vm/BooleanObject.h index c113e20f973..d94ee87eac9 100644 --- a/js/src/vm/BooleanObject.h +++ b/js/src/vm/BooleanObject.h @@ -8,11 +8,12 @@ #define vm_BooleanObject_h #include "jsbool.h" -#include "jsobj.h" + +#include "vm/ObjectImpl.h" namespace js { -class BooleanObject : public JSObject +class BooleanObject : public NativeObject { /* Stores this Boolean object's [[PrimitiveValue]]. */ static const unsigned PRIMITIVE_VALUE_SLOT = 0; diff --git a/js/src/vm/DateObject.h b/js/src/vm/DateObject.h index a9d5cadb669..bb1cd606221 100644 --- a/js/src/vm/DateObject.h +++ b/js/src/vm/DateObject.h @@ -15,7 +15,7 @@ namespace js { class DateTimeInfo; -class DateObject : public JSObject +class DateObject : public NativeObject { static const uint32_t UTC_TIME_SLOT = 0; static const uint32_t TZA_SLOT = 1; diff --git a/js/src/vm/Debugger-inl.h b/js/src/vm/Debugger-inl.h index 0b1434ab51e..2f5740f706f 100644 --- a/js/src/vm/Debugger-inl.h +++ b/js/src/vm/Debugger-inl.h @@ -26,7 +26,7 @@ js::Debugger::onLeaveFrame(JSContext *cx, AbstractFramePtr frame, bool ok) js::Debugger::fromJSObject(JSObject *obj) { MOZ_ASSERT(js::GetObjectClass(obj) == &jsclass); - return (Debugger *) obj->getPrivate(); + return (Debugger *) obj->as().getPrivate(); } #endif /* vm_Debugger_inl_h */ diff --git a/js/src/vm/Debugger.cpp b/js/src/vm/Debugger.cpp index 3766c8c82f1..55943af48ee 100644 --- a/js/src/vm/Debugger.cpp +++ b/js/src/vm/Debugger.cpp @@ -197,7 +197,7 @@ class Debugger::FrameRange return nextDebugger >= debuggerCount; } - JSObject *frontFrame() const { + NativeObject *frontFrame() const { MOZ_ASSERT(!empty()); return entry->value(); } @@ -342,7 +342,7 @@ Breakpoint::nextInSite() /*** Debugger hook dispatch **********************************************************************/ -Debugger::Debugger(JSContext *cx, JSObject *dbg) +Debugger::Debugger(JSContext *cx, NativeObject *dbg) : object(dbg), uncaughtExceptionHook(nullptr), enabled(true), @@ -405,7 +405,7 @@ Debugger::fromChildJSObject(JSObject *obj) obj->getClass() == &DebuggerSource_class || obj->getClass() == &DebuggerObject_class || obj->getClass() == &DebuggerEnv_class); - JSObject *dbgobj = &obj->getReservedSlot(JSSLOT_DEBUGOBJECT_OWNER).toObject(); + JSObject *dbgobj = &obj->as().getReservedSlot(JSSLOT_DEBUGOBJECT_OWNER).toObject(); return fromJSObject(dbgobj); } @@ -432,8 +432,8 @@ Debugger::getScriptFrameWithIter(JSContext *cx, AbstractFramePtr frame, if (!p) { /* Create and populate the Debugger.Frame object. */ JSObject *proto = &object->getReservedSlot(JSSLOT_DEBUG_FRAME_PROTO).toObject(); - JSObject *frameobj = - NewObjectWithGivenProto(cx, &DebuggerFrame_class, proto, nullptr); + NativeObject *frameobj = + NewNativeObjectWithGivenProto(cx, &DebuggerFrame_class, proto, nullptr); if (!frameobj) return false; @@ -488,7 +488,7 @@ Debugger::hasAnyLiveHooks() const } for (FrameMap::Range r = frames.all(); !r.empty(); r.popFront()) { - JSObject *frameObj = r.front().value(); + NativeObject *frameObj = r.front().value(); if (!frameObj->getReservedSlot(JSSLOT_DEBUGFRAME_ONSTEP_HANDLER).isUndefined() || !frameObj->getReservedSlot(JSSLOT_DEBUGFRAME_ONPOP_HANDLER).isUndefined()) return true; @@ -530,7 +530,7 @@ Debugger::slowPathOnEnterFrame(JSContext *cx, AbstractFramePtr frame, MutableHan static void DebuggerFrame_maybeDecrementFrameScriptStepModeCount(FreeOp *fop, AbstractFramePtr frame, - JSObject *frameobj); + NativeObject *frameobj); static void DebuggerFrame_freeScriptFrameIterData(FreeOp *fop, JSObject *obj); @@ -561,7 +561,7 @@ Debugger::slowPathOnLeaveFrame(JSContext *cx, AbstractFramePtr frame, bool frame /* For each Debugger.Frame, fire its onPop handler, if any. */ for (JSObject **p = frames.begin(); p != frames.end(); p++) { - RootedObject frameobj(cx, *p); + RootedNativeObject frameobj(cx, &(*p)->as()); Debugger *dbg = Debugger::fromChildJSObject(frameobj); if (dbg->enabled && @@ -605,7 +605,7 @@ Debugger::slowPathOnLeaveFrame(JSContext *cx, AbstractFramePtr frame, bool frame * own Debugger.Frame instance. */ for (FrameRange r(frame, global); !r.empty(); r.popFront()) { - RootedObject frameobj(cx, r.frontFrame()); + RootedNativeObject frameobj(cx, r.frontFrame()); Debugger *dbg = r.frontDebugger(); MOZ_ASSERT(dbg == Debugger::fromChildJSObject(frameobj)); @@ -658,14 +658,14 @@ Debugger::wrapEnvironment(JSContext *cx, Handle env, MutableHandleValue rv */ MOZ_ASSERT(!env->is()); - JSObject *envobj; + NativeObject *envobj; DependentAddPtr p(cx, environments, env); if (p) { - envobj = p->value(); + envobj = &p->value()->as(); } else { /* Create a new Debugger.Environment for env. */ JSObject *proto = &object->getReservedSlot(JSSLOT_DEBUG_ENV_PROTO).toObject(); - envobj = NewObjectWithGivenProto(cx, &DebuggerEnv_class, proto, nullptr, TenuredObject); + envobj = NewNativeObjectWithGivenProto(cx, &DebuggerEnv_class, proto, nullptr, TenuredObject); if (!envobj) return false; envobj->setPrivateGCThing(env); @@ -706,8 +706,8 @@ Debugger::wrapDebuggeeValue(JSContext *cx, MutableHandleValue vp) } else { /* Create a new Debugger.Object for obj. */ JSObject *proto = &object->getReservedSlot(JSSLOT_DEBUG_OBJECT_PROTO).toObject(); - JSObject *dobj = - NewObjectWithGivenProto(cx, &DebuggerObject_class, proto, nullptr, TenuredObject); + NativeObject *dobj = + NewNativeObjectWithGivenProto(cx, &DebuggerObject_class, proto, nullptr, TenuredObject); if (!dobj) return false; dobj->setPrivateGCThing(obj); @@ -771,8 +771,9 @@ Debugger::unwrapDebuggeeValue(JSContext *cx, MutableHandleValue vp) "Debugger", "Debugger.Object", dobj->getClass()->name); return false; } + NativeObject *ndobj = &dobj->as(); - Value owner = dobj->getReservedSlot(JSSLOT_DEBUGOBJECT_OWNER); + Value owner = ndobj->getReservedSlot(JSSLOT_DEBUGOBJECT_OWNER); if (owner.isUndefined() || &owner.toObject() != object) { JS_ReportErrorNumber(cx, js_GetErrorMessage, nullptr, owner.isUndefined() @@ -781,7 +782,7 @@ Debugger::unwrapDebuggeeValue(JSContext *cx, MutableHandleValue vp) return false; } - vp.setObject(*static_cast(dobj->getPrivate())); + vp.setObject(*static_cast(ndobj->getPrivate())); } return true; } @@ -932,7 +933,7 @@ Debugger::newCompletionValue(JSContext *cx, JSTrapStatus status, Value value_, } /* Common tail for JSTRAP_RETURN and JSTRAP_THROW. */ - RootedObject obj(cx, NewBuiltinClassInstance(cx, &JSObject::class_)); + RootedNativeObject obj(cx, NewNativeBuiltinClassInstance(cx, &JSObject::class_)); if (!obj || !wrapDebuggeeValue(cx, &value) || !DefineNativeProperty(cx, obj, key, value, JS_PropertyStub, JS_StrictPropertyStub, @@ -998,8 +999,10 @@ Debugger::parseResumptionValue(Maybe &ac, bool ok, const Value return handleUncaughtException(ac, vp, callHook); } + HandleNativeObject nobj = obj.as(); + RootedValue v(cx, vp.get()); - if (!NativeGet(cx, obj, obj, shape, &v) || !unwrapDebuggeeValue(cx, &v)) + if (!NativeGet(cx, obj, nobj, shape, &v) || !unwrapDebuggeeValue(cx, &v)) return handleUncaughtException(ac, &v, callHook); ac.reset(); @@ -1306,7 +1309,7 @@ Debugger::onSingleStep(JSContext *cx, MutableHandleValue vp) */ AutoObjectVector frames(cx); for (FrameRange r(iter.abstractFramePtr()); !r.empty(); r.popFront()) { - JSObject *frame = r.frontFrame(); + NativeObject *frame = r.frontFrame(); if (!frame->getReservedSlot(JSSLOT_DEBUGFRAME_ONSTEP_HANDLER).isUndefined() && !frames.append(frame)) { @@ -1333,7 +1336,7 @@ Debugger::onSingleStep(JSContext *cx, MutableHandleValue vp) Debugger *dbg = *p; for (FrameMap::Range r = dbg->frames.all(); !r.empty(); r.popFront()) { AbstractFramePtr frame = r.front().key(); - JSObject *frameobj = r.front().value(); + NativeObject *frameobj = r.front().value(); if (frame.script() == trappingScript && !frameobj->getReservedSlot(JSSLOT_DEBUGFRAME_ONSTEP_HANDLER).isUndefined()) { @@ -1351,7 +1354,7 @@ Debugger::onSingleStep(JSContext *cx, MutableHandleValue vp) /* Call all the onStep handlers we found. */ for (JSObject **p = frames.begin(); p != frames.end(); p++) { - RootedObject frame(cx, *p); + RootedNativeObject frame(cx, &(*p)->as()); Debugger *dbg = Debugger::fromChildJSObject(frame); Maybe ac; @@ -1607,7 +1610,7 @@ Debugger::markAllIteratively(GCMarker *trc) * - it isn't already marked * - it actually has hooks that might be called */ - HeapPtrObject &dbgobj = dbg->toJSObjectRef(); + HeapPtrNativeObject &dbgobj = dbg->toJSObjectRef(); if (!dbgobj->zone()->isGCMarking()) continue; @@ -1662,7 +1665,7 @@ Debugger::markAll(JSTracer *trc) e.rekeyFront(global); } - HeapPtrObject &dbgobj = dbg->toJSObjectRef(); + HeapPtrNativeObject &dbgobj = dbg->toJSObjectRef(); MarkObject(trc, &dbgobj, "Debugger Object"); dbg->scripts.trace(trc); @@ -1699,7 +1702,7 @@ Debugger::trace(JSTracer *trc) * frames.) */ for (FrameMap::Range r = frames.all(); !r.empty(); r.popFront()) { - RelocatablePtrObject &frameobj = r.front().value(); + RelocatablePtrNativeObject &frameobj = r.front().value(); MOZ_ASSERT(MaybeForwarded(frameobj.get())->getPrivate()); MarkObject(trc, &frameobj, "live Debugger.Frame"); } @@ -2173,7 +2176,7 @@ bool Debugger::getDebuggees(JSContext *cx, unsigned argc, Value *vp) { THIS_DEBUGGER(cx, argc, vp, "getDebuggees", args, dbg); - RootedObject arrobj(cx, NewDenseFullyAllocatedArray(cx, dbg->debuggees.count())); + RootedArrayObject arrobj(cx, NewDenseFullyAllocatedArray(cx, dbg->debuggees.count())); if (!arrobj) return false; arrobj->ensureDenseInitializedLength(cx, 0, dbg->debuggees.count()); @@ -2244,14 +2247,14 @@ Debugger::construct(JSContext *cx, unsigned argc, Value *vp) RootedObject callee(cx, &args.callee()); if (!JSObject::getProperty(cx, callee, callee, cx->names().prototype, &v)) return false; - RootedObject proto(cx, &v.toObject()); + RootedNativeObject proto(cx, &v.toObject().as()); MOZ_ASSERT(proto->getClass() == &Debugger::jsclass); /* * Make the new Debugger object. Each one has a reference to * Debugger.{Frame,Object,Script,Memory}.prototype in reserved slots. The * rest of the reserved slots are for hooks; they default to undefined. */ - RootedObject obj(cx, NewObjectWithGivenProto(cx, &Debugger::jsclass, proto, nullptr)); + RootedNativeObject obj(cx, NewNativeObjectWithGivenProto(cx, &Debugger::jsclass, proto, nullptr)); if (!obj) return false; for (unsigned slot = JSSLOT_DEBUG_PROTO_START; slot < JSSLOT_DEBUG_PROTO_STOP; slot++) @@ -2405,7 +2408,7 @@ Debugger::cleanupDebuggeeGlobalBeforeRemoval(FreeOp *fop, GlobalObject *global, */ for (FrameMap::Enum e(frames); !e.empty(); e.popFront()) { AbstractFramePtr frame = e.front().key(); - JSObject *frameobj = e.front().value(); + NativeObject *frameobj = e.front().value(); if (&frame.script()->global() == global) { DebuggerFrame_freeScriptFrameIterData(fop, frameobj); DebuggerFrame_maybeDecrementFrameScriptStepModeCount(fop, frame, frameobj); @@ -2909,7 +2912,7 @@ Debugger::findScripts(JSContext *cx, unsigned argc, Value *vp) if (!query.findScripts(&scripts)) return false; - RootedObject result(cx, NewDenseFullyAllocatedArray(cx, scripts.length())); + RootedArrayObject result(cx, NewDenseFullyAllocatedArray(cx, scripts.length())); if (!result) return false; @@ -3017,7 +3020,7 @@ static inline JSScript * GetScriptReferent(JSObject *obj) { MOZ_ASSERT(obj->getClass() == &DebuggerScript_class); - return static_cast(obj->getPrivate()); + return static_cast(obj->as().getPrivate()); } static void @@ -3026,7 +3029,7 @@ DebuggerScript_trace(JSTracer *trc, JSObject *obj) /* This comes from a private pointer, so no barrier needed. */ if (JSScript *script = GetScriptReferent(obj)) { MarkCrossCompartmentScriptUnbarriered(trc, obj, &script, "Debugger.Script referent"); - obj->setPrivateUnbarriered(script); + obj->as().setPrivateUnbarriered(script); } } @@ -3049,7 +3052,8 @@ Debugger::newDebuggerScript(JSContext *cx, HandleScript script) JSObject *proto = &object->getReservedSlot(JSSLOT_DEBUG_SCRIPT_PROTO).toObject(); MOZ_ASSERT(proto); - JSObject *scriptobj = NewObjectWithGivenProto(cx, &DebuggerScript_class, proto, nullptr, TenuredObject); + NativeObject *scriptobj = NewNativeObjectWithGivenProto(cx, &DebuggerScript_class, + proto, nullptr, TenuredObject); if (!scriptobj) return nullptr; scriptobj->setReservedSlot(JSSLOT_DEBUGSCRIPT_OWNER, ObjectValue(*object)); @@ -3739,7 +3743,7 @@ Debugger::replaceFrameGuts(JSContext *cx, AbstractFramePtr from, AbstractFramePt ScriptFrameIter &iter) { for (Debugger::FrameRange r(from); !r.empty(); r.popFront()) { - RootedObject frameobj(cx, r.frontFrame()); + RootedNativeObject frameobj(cx, r.frontFrame()); Debugger *dbg = r.frontDebugger(); MOZ_ASSERT(dbg == Debugger::fromChildJSObject(frameobj)); @@ -3980,7 +3984,7 @@ static inline ScriptSourceObject * GetSourceReferent(JSObject *obj) { MOZ_ASSERT(obj->getClass() == &DebuggerSource_class); - return static_cast(obj->getPrivate()); + return static_cast(obj->as().getPrivate()); } static void @@ -3992,7 +3996,7 @@ DebuggerSource_trace(JSTracer *trc, JSObject *obj) */ if (JSObject *referent = GetSourceReferent(obj)) { MarkCrossCompartmentObjectUnbarriered(trc, obj, &referent, "Debugger.Source referent"); - obj->setPrivateUnbarriered(referent); + obj->as().setPrivateUnbarriered(referent); } } @@ -4015,7 +4019,8 @@ Debugger::newDebuggerSource(JSContext *cx, HandleScriptSource source) JSObject *proto = &object->getReservedSlot(JSSLOT_DEBUG_SOURCE_PROTO).toObject(); MOZ_ASSERT(proto); - JSObject *sourceobj = NewObjectWithGivenProto(cx, &DebuggerSource_class, proto, nullptr, TenuredObject); + NativeObject *sourceobj = NewNativeObjectWithGivenProto(cx, &DebuggerSource_class, + proto, nullptr, TenuredObject); if (!sourceobj) return nullptr; sourceobj->setReservedSlot(JSSLOT_DEBUGSOURCE_OWNER, ObjectValue(*object)); @@ -4281,15 +4286,15 @@ UpdateFrameIterPc(FrameIter &iter) static void DebuggerFrame_freeScriptFrameIterData(FreeOp *fop, JSObject *obj) { - AbstractFramePtr frame = AbstractFramePtr::FromRaw(obj->getPrivate()); + AbstractFramePtr frame = AbstractFramePtr::FromRaw(obj->as().getPrivate()); if (frame.isScriptFrameIterData()) fop->delete_((ScriptFrameIter::Data *) frame.raw()); - obj->setPrivate(nullptr); + obj->as().setPrivate(nullptr); } static void DebuggerFrame_maybeDecrementFrameScriptStepModeCount(FreeOp *fop, AbstractFramePtr frame, - JSObject *frameobj) + NativeObject *frameobj) { /* If this frame has an onStep handler, decrement the script's count. */ if (!frameobj->getReservedSlot(JSSLOT_DEBUGFRAME_ONSTEP_HANDLER).isUndefined()) @@ -4308,7 +4313,7 @@ const Class DebuggerFrame_class = { JS_EnumerateStub, JS_ResolveStub, JS_ConvertStub, DebuggerFrame_finalize }; -static JSObject * +static NativeObject * CheckThisFrame(JSContext *cx, const CallArgs &args, const char *fnname, bool checkLive) { if (!args.thisv().isObject()) { @@ -4322,14 +4327,16 @@ CheckThisFrame(JSContext *cx, const CallArgs &args, const char *fnname, bool che return nullptr; } + NativeObject *nthisobj = &thisobj->as(); + /* * Forbid Debugger.Frame.prototype, which is of class DebuggerFrame_class * but isn't really a working Debugger.Frame object. The prototype object * is distinguished by having a nullptr private value. Also, forbid popped * frames. */ - if (!thisobj->getPrivate()) { - if (thisobj->getReservedSlot(JSSLOT_DEBUGFRAME_OWNER).isUndefined()) { + if (!nthisobj->getPrivate()) { + if (nthisobj->getReservedSlot(JSSLOT_DEBUGFRAME_OWNER).isUndefined()) { JS_ReportErrorNumber(cx, js_GetErrorMessage, nullptr, JSMSG_INCOMPATIBLE_PROTO, "Debugger.Frame", fnname, "prototype object"); return nullptr; @@ -4340,7 +4347,7 @@ CheckThisFrame(JSContext *cx, const CallArgs &args, const char *fnname, bool che return nullptr; } } - return thisobj; + return nthisobj; } /* @@ -4362,7 +4369,7 @@ CheckThisFrame(JSContext *cx, const CallArgs &args, const char *fnname, bool che #define THIS_FRAME_THISOBJ(cx, argc, vp, fnname, args, thisobj) \ CallArgs args = CallArgsFromVp(argc, vp); \ - RootedObject thisobj(cx, CheckThisFrame(cx, args, fnname, true)); \ + RootedNativeObject thisobj(cx, CheckThisFrame(cx, args, fnname, true)); \ if (!thisobj) \ return false @@ -4548,7 +4555,7 @@ DebuggerArguments_getArg(JSContext *cx, unsigned argc, Value *vp) * Put the Debugger.Frame into the this-value slot, then use THIS_FRAME * to check that it is still live and get the fp. */ - args.setThis(argsobj->getReservedSlot(JSSLOT_DEBUGARGUMENTS_FRAME)); + args.setThis(argsobj->as().getReservedSlot(JSSLOT_DEBUGARGUMENTS_FRAME)); THIS_FRAME(cx, argc, vp, "get argument", ca2, thisobj, frame); /* @@ -4593,14 +4600,14 @@ DebuggerFrame_getArguments(JSContext *cx, unsigned argc, Value *vp) return true; } - RootedObject argsobj(cx); + RootedNativeObject argsobj(cx); if (frame.hasArgs()) { /* Create an arguments object. */ Rooted global(cx, &args.callee().global()); JSObject *proto = GlobalObject::getOrCreateArrayPrototype(cx, global); if (!proto) return false; - argsobj = NewObjectWithGivenProto(cx, &DebuggerArguments_class, proto, global); + argsobj = NewNativeObjectWithGivenProto(cx, &DebuggerArguments_class, proto, global); if (!argsobj) return false; SetReservedSlot(argsobj, JSSLOT_DEBUGARGUMENTS_FRAME, ObjectValue(*thisobj)); @@ -4686,7 +4693,7 @@ static bool DebuggerFrame_getLive(JSContext *cx, unsigned argc, Value *vp) { CallArgs args = CallArgsFromVp(argc, vp); - JSObject *thisobj = CheckThisFrame(cx, args, "get live", false); + NativeObject *thisobj = CheckThisFrame(cx, args, "get live", false); if (!thisobj) return false; bool hasFrame = !!thisobj->getPrivate(); @@ -4922,19 +4929,21 @@ DebuggerGenericEval(JSContext *cx, const char *fullMethodName, const Value &code /* If evalWithBindings, create the inner environment. */ if (evalWithBindings) { /* TODO - This should probably be a Call object, like ES5 strict eval. */ - env = NewObjectWithGivenProto(cx, &JSObject::class_, nullptr, env); - if (!env) + RootedNativeObject nenv(cx, NewNativeObjectWithGivenProto(cx, &JSObject::class_, + nullptr, env)); + if (!nenv) return false; RootedId id(cx); for (size_t i = 0; i < keys.length(); i++) { id = keys[i]; MutableHandleValue val = values[i]; if (!cx->compartment()->wrap(cx, val) || - !DefineNativeProperty(cx, env, id, val, nullptr, nullptr, 0)) + !DefineNativeProperty(cx, nenv, id, val, nullptr, nullptr, 0)) { return false; } } + env = nenv; } /* Run the code and produce the completion value. */ @@ -5008,7 +5017,6 @@ static const JSFunctionSpec DebuggerFrame_methods[] = { JS_FS_END }; - /*** Debugger.Object *****************************************************************************/ static void @@ -5018,9 +5026,9 @@ DebuggerObject_trace(JSTracer *trc, JSObject *obj) * There is a barrier on private pointers, so the Unbarriered marking * is okay. */ - if (JSObject *referent = (JSObject *) obj->getPrivate()) { + if (JSObject *referent = (JSObject *) obj->as().getPrivate()) { MarkCrossCompartmentObjectUnbarriered(trc, obj, &referent, "Debugger.Object referent"); - obj->setPrivateUnbarriered(referent); + obj->as().setPrivateUnbarriered(referent); } } @@ -5036,7 +5044,7 @@ const Class DebuggerObject_class = { DebuggerObject_trace }; -static JSObject * +static NativeObject * DebuggerObject_checkThis(JSContext *cx, const CallArgs &args, const char *fnname) { if (!args.thisv().isObject()) { @@ -5055,12 +5063,13 @@ DebuggerObject_checkThis(JSContext *cx, const CallArgs &args, const char *fnname * but isn't a real working Debugger.Object. The prototype object is * distinguished by having no referent. */ - if (!thisobj->getPrivate()) { + NativeObject *nthisobj = &thisobj->as(); + if (!nthisobj->getPrivate()) { JS_ReportErrorNumber(cx, js_GetErrorMessage, nullptr, JSMSG_INCOMPATIBLE_PROTO, "Debugger.Object", fnname, "prototype object"); return nullptr; } - return thisobj; + return nthisobj; } #define THIS_DEBUGOBJECT_REFERENT(cx, argc, vp, fnname, args, obj) \ @@ -5068,16 +5077,16 @@ DebuggerObject_checkThis(JSContext *cx, const CallArgs &args, const char *fnname RootedObject obj(cx, DebuggerObject_checkThis(cx, args, fnname)); \ if (!obj) \ return false; \ - obj = (JSObject *) obj->getPrivate(); \ + obj = (JSObject *) obj->as().getPrivate(); \ MOZ_ASSERT(obj) #define THIS_DEBUGOBJECT_OWNER_REFERENT(cx, argc, vp, fnname, args, dbg, obj) \ - CallArgs args = CallArgsFromVp(argc, vp); \ + CallArgs args = CallArgsFromVp(argc, vp); \ RootedObject obj(cx, DebuggerObject_checkThis(cx, args, fnname)); \ if (!obj) \ return false; \ Debugger *dbg = Debugger::fromChildJSObject(obj); \ - obj = (JSObject *) obj->getPrivate(); \ + obj = (JSObject *) obj->as().getPrivate(); \ MOZ_ASSERT(obj) static bool @@ -5190,7 +5199,7 @@ DebuggerObject_getParameterNames(JSContext *cx, unsigned argc, Value *vp) return true; } - RootedObject result(cx, NewDenseFullyAllocatedArray(cx, fun->nargs())); + RootedArrayObject result(cx, NewDenseFullyAllocatedArray(cx, fun->nargs())); if (!result) return false; result->ensureDenseInitializedLength(cx, 0, fun->nargs()); @@ -5919,9 +5928,9 @@ DebuggerEnv_trace(JSTracer *trc, JSObject *obj) * There is a barrier on private pointers, so the Unbarriered marking * is okay. */ - if (Env *referent = (JSObject *) obj->getPrivate()) { + if (Env *referent = (JSObject *) obj->as().getPrivate()) { MarkCrossCompartmentObjectUnbarriered(trc, obj, &referent, "Debugger.Environment referent"); - obj->setPrivateUnbarriered(referent); + obj->as().setPrivateUnbarriered(referent); } } @@ -5937,7 +5946,7 @@ const Class DebuggerEnv_class = { DebuggerEnv_trace }; -static JSObject * +static NativeObject * DebuggerEnv_checkThis(JSContext *cx, const CallArgs &args, const char *fnname, bool requireDebuggee = true) { @@ -5957,7 +5966,8 @@ DebuggerEnv_checkThis(JSContext *cx, const CallArgs &args, const char *fnname, * but isn't a real working Debugger.Environment. The prototype object is * distinguished by having no referent. */ - if (!thisobj->getPrivate()) { + NativeObject *nthisobj = &thisobj->as(); + if (!nthisobj->getPrivate()) { JS_ReportErrorNumber(cx, js_GetErrorMessage, nullptr, JSMSG_INCOMPATIBLE_PROTO, "Debugger.Environment", fnname, "prototype object"); return nullptr; @@ -5968,20 +5978,20 @@ DebuggerEnv_checkThis(JSContext *cx, const CallArgs &args, const char *fnname, * environments. */ if (requireDebuggee) { - Rooted env(cx, static_cast(thisobj->getPrivate())); - if (!Debugger::fromChildJSObject(thisobj)->observesGlobal(&env->global())) { + Rooted env(cx, static_cast(nthisobj->getPrivate())); + if (!Debugger::fromChildJSObject(nthisobj)->observesGlobal(&env->global())) { JS_ReportErrorNumber(cx, js_GetErrorMessage, nullptr, JSMSG_DEBUG_NOT_DEBUGGEE, "Debugger.Environment", "environment"); return nullptr; } } - return thisobj; + return nthisobj; } #define THIS_DEBUGENV(cx, argc, vp, fnname, args, envobj, env) \ CallArgs args = CallArgsFromVp(argc, vp); \ - JSObject *envobj = DebuggerEnv_checkThis(cx, args, fnname); \ + NativeObject *envobj = DebuggerEnv_checkThis(cx, args, fnname); \ if (!envobj) \ return false; \ Rooted env(cx, static_cast(envobj->getPrivate())); \ @@ -6100,7 +6110,7 @@ static bool DebuggerEnv_getInspectable(JSContext *cx, unsigned argc, Value *vp) { CallArgs args = CallArgsFromVp(argc, vp); - JSObject *envobj = DebuggerEnv_checkThis(cx, args, "get inspectable", false); + NativeObject *envobj = DebuggerEnv_checkThis(cx, args, "get inspectable", false); if (!envobj) return false; Rooted env(cx, static_cast(envobj->getPrivate())); @@ -6351,7 +6361,7 @@ Builder::newObject(JSContext *cx) extern JS_PUBLIC_API(bool) JS_DefineDebuggerObject(JSContext *cx, HandleObject obj) { - RootedObject + RootedNativeObject objProto(cx), debugCtor(cx), debugProto(cx), diff --git a/js/src/vm/Debugger.h b/js/src/vm/Debugger.h index e120c91b817..27115799d05 100644 --- a/js/src/vm/Debugger.h +++ b/js/src/vm/Debugger.h @@ -194,7 +194,7 @@ class Debugger : private mozilla::LinkedListElement JSSLOT_DEBUG_COUNT }; private: - HeapPtrObject object; /* The Debugger object. Strong reference. */ + HeapPtrNativeObject object; /* The Debugger object. Strong reference. */ GlobalObjectSet debuggees; /* Debuggee globals. Cross-compartment weak references. */ js::HeapPtrObject uncaughtExceptionHook; /* Strong reference. */ bool enabled; @@ -240,7 +240,7 @@ class Debugger : private mozilla::LinkedListElement * has to be different. */ typedef HashMap, RuntimeAllocPolicy> FrameMap; FrameMap frames; @@ -417,12 +417,12 @@ class Debugger : private mozilla::LinkedListElement ScriptFrameIter &iter); public: - Debugger(JSContext *cx, JSObject *dbg); + Debugger(JSContext *cx, NativeObject *dbg); ~Debugger(); bool init(JSContext *cx); - inline const js::HeapPtrObject &toJSObject() const; - inline js::HeapPtrObject &toJSObjectRef(); + inline const js::HeapPtrNativeObject &toJSObject() const; + inline js::HeapPtrNativeObject &toJSObjectRef(); static inline Debugger *fromJSObject(JSObject *obj); static Debugger *fromChildJSObject(JSObject *obj); @@ -695,14 +695,14 @@ Debugger::fromOnNewGlobalObjectWatchersLink(JSCList *link) { return reinterpret_cast(p - offsetof(Debugger, onNewGlobalObjectWatchersLink)); } -const js::HeapPtrObject & +const js::HeapPtrNativeObject & Debugger::toJSObject() const { MOZ_ASSERT(object); return object; } -js::HeapPtrObject & +js::HeapPtrNativeObject & Debugger::toJSObjectRef() { MOZ_ASSERT(object); diff --git a/js/src/vm/DebuggerMemory.cpp b/js/src/vm/DebuggerMemory.cpp index 7ed8c244884..38cb3e358a4 100644 --- a/js/src/vm/DebuggerMemory.cpp +++ b/js/src/vm/DebuggerMemory.cpp @@ -23,6 +23,7 @@ #include "vm/SavedStacks.h" #include "vm/Debugger-inl.h" +#include "vm/ObjectImpl-inl.h" using namespace js; @@ -38,8 +39,8 @@ DebuggerMemory::create(JSContext *cx, Debugger *dbg) { Value memoryProto = dbg->object->getReservedSlot(Debugger::JSSLOT_DEBUG_MEMORY_PROTO); - RootedObject memory(cx, NewObjectWithGivenProto(cx, &class_, - &memoryProto.toObject(), nullptr)); + RootedNativeObject memory(cx, NewNativeObjectWithGivenProto(cx, &class_, + &memoryProto.toObject(), nullptr)); if (!memory) return nullptr; @@ -99,7 +100,7 @@ DebuggerMemory::checkThis(JSContext *cx, CallArgs &args, const char *fnName) // Debugger.Memory instances, however doesn't actually represent an instance // of Debugger.Memory. It is the only object that is() but // doesn't have a Debugger instance. - if (thisObject.getReservedSlot(JSSLOT_DEBUGGER).isUndefined()) { + if (thisObject.as().getReservedSlot(JSSLOT_DEBUGGER).isUndefined()) { JS_ReportErrorNumber(cx, js_GetErrorMessage, nullptr, JSMSG_INCOMPATIBLE_PROTO, class_.name, fnName, "prototype object"); return nullptr; @@ -192,7 +193,7 @@ DebuggerMemory::drainAllocationsLog(JSContext *cx, unsigned argc, Value *vp) size_t length = dbg->allocationsLogLength; - RootedObject result(cx, NewDenseFullyAllocatedArray(cx, length)); + RootedArrayObject result(cx, NewDenseFullyAllocatedArray(cx, length)); if (!result) return false; result->ensureDenseInitializedLength(cx, 0, length); diff --git a/js/src/vm/DebuggerMemory.h b/js/src/vm/DebuggerMemory.h index 11a74d45d73..d584a3b1748 100644 --- a/js/src/vm/DebuggerMemory.h +++ b/js/src/vm/DebuggerMemory.h @@ -15,7 +15,7 @@ namespace js { -class DebuggerMemory : public JSObject { +class DebuggerMemory : public NativeObject { friend class Debugger; static DebuggerMemory *checkThis(JSContext *cx, CallArgs &args, const char *fnName); diff --git a/js/src/vm/ErrorObject.cpp b/js/src/vm/ErrorObject.cpp index 39aef0b1813..b49b2fb60eb 100644 --- a/js/src/vm/ErrorObject.cpp +++ b/js/src/vm/ErrorObject.cpp @@ -13,6 +13,7 @@ #include "jsobjinlines.h" +#include "vm/ObjectImpl-inl.h" #include "vm/Shape-inl.h" using namespace js; @@ -21,7 +22,7 @@ using mozilla::PodZero; /* static */ Shape * js::ErrorObject::assignInitialShape(ExclusiveContext *cx, Handle obj) { - MOZ_ASSERT(obj->nativeEmpty()); + MOZ_ASSERT(obj->empty()); if (!obj->addDataProperty(cx, cx->names().fileName, FILENAME_SLOT, 0)) return nullptr; @@ -56,13 +57,13 @@ js::ErrorObject::init(JSContext *cx, Handle obj, JSExnType type, MOZ_ASSERT(messageShape->slot() == MESSAGE_SLOT); } - MOZ_ASSERT(obj->nativeLookupPure(NameToId(cx->names().fileName))->slot() == FILENAME_SLOT); - MOZ_ASSERT(obj->nativeLookupPure(NameToId(cx->names().lineNumber))->slot() == LINENUMBER_SLOT); - MOZ_ASSERT(obj->nativeLookupPure(NameToId(cx->names().columnNumber))->slot() == + MOZ_ASSERT(obj->lookupPure(NameToId(cx->names().fileName))->slot() == FILENAME_SLOT); + MOZ_ASSERT(obj->lookupPure(NameToId(cx->names().lineNumber))->slot() == LINENUMBER_SLOT); + MOZ_ASSERT(obj->lookupPure(NameToId(cx->names().columnNumber))->slot() == COLUMNNUMBER_SLOT); - MOZ_ASSERT(obj->nativeLookupPure(NameToId(cx->names().stack))->slot() == STACK_SLOT); + MOZ_ASSERT(obj->lookupPure(NameToId(cx->names().stack))->slot() == STACK_SLOT); MOZ_ASSERT_IF(message, - obj->nativeLookupPure(NameToId(cx->names().message))->slot() == MESSAGE_SLOT); + obj->lookupPure(NameToId(cx->names().message))->slot() == MESSAGE_SLOT); MOZ_ASSERT(JSEXN_ERR <= type && type < JSEXN_LIMIT); @@ -74,7 +75,7 @@ js::ErrorObject::init(JSContext *cx, Handle obj, JSExnType type, obj->initReservedSlot(COLUMNNUMBER_SLOT, Int32Value(columnNumber)); obj->initReservedSlot(STACK_SLOT, StringValue(stack)); if (message) - obj->nativeSetSlotWithType(cx, messageShape, StringValue(message)); + obj->setSlotWithType(cx, messageShape, StringValue(message)); return true; } diff --git a/js/src/vm/ErrorObject.h b/js/src/vm/ErrorObject.h index 0023c261e5e..458c148c258 100644 --- a/js/src/vm/ErrorObject.h +++ b/js/src/vm/ErrorObject.h @@ -9,8 +9,7 @@ #include "mozilla/ArrayUtils.h" -#include "jsobj.h" - +#include "vm/ObjectImpl.h" #include "vm/Shape.h" struct JSExnPrivate; @@ -23,7 +22,7 @@ js_InitExceptionClasses(JSContext *cx, JS::HandleObject obj); namespace js { -class ErrorObject : public JSObject +class ErrorObject : public NativeObject { static JSObject * createProto(JSContext *cx, JSProtoKey key); diff --git a/js/src/vm/GeneratorObject.h b/js/src/vm/GeneratorObject.h index a4f72445cae..889933e3cce 100644 --- a/js/src/vm/GeneratorObject.h +++ b/js/src/vm/GeneratorObject.h @@ -11,7 +11,7 @@ namespace js { -class LegacyGeneratorObject : public JSObject +class LegacyGeneratorObject : public NativeObject { public: static const Class class_; @@ -19,7 +19,7 @@ class LegacyGeneratorObject : public JSObject JSGenerator *getGenerator() { return static_cast(getPrivate()); } }; -class StarGeneratorObject : public JSObject +class StarGeneratorObject : public NativeObject { public: static const Class class_; diff --git a/js/src/vm/GlobalObject.cpp b/js/src/vm/GlobalObject.cpp index 874d133d266..776d46f6d3f 100644 --- a/js/src/vm/GlobalObject.cpp +++ b/js/src/vm/GlobalObject.cpp @@ -29,6 +29,7 @@ #include "vm/HelperThreads.h" #include "vm/PIC.h" #include "vm/RegExpStatics.h" +#include "vm/RegExpStaticsObject.h" #include "vm/StopIterationObject.h" #include "vm/WeakMapObject.h" @@ -207,13 +208,13 @@ GlobalObject::resolveConstructor(JSContext *cx, Handle global, JS GlobalObject::initBuiltinConstructor(JSContext *cx, Handle global, JSProtoKey key, HandleObject ctor, HandleObject proto) { - MOZ_ASSERT(!global->nativeEmpty()); // reserved slots already allocated + MOZ_ASSERT(!global->empty()); // reserved slots already allocated MOZ_ASSERT(key != JSProto_Null); MOZ_ASSERT(ctor); MOZ_ASSERT(proto); RootedId id(cx, NameToId(ClassName(key, cx))); - MOZ_ASSERT(!global->nativeLookup(cx, id)); + MOZ_ASSERT(!global->lookup(cx, id)); if (!global->addDataProperty(cx, id, constructorPropertySlot(key), 0)) return false; @@ -376,19 +377,20 @@ GlobalObject::createConstructor(JSContext *cx, Native ctor, JSAtom *nameArg, uns return NewFunction(cx, NullPtr(), ctor, length, JSFunction::NATIVE_CTOR, self, name, kind); } -static JSObject * +static NativeObject * CreateBlankProto(JSContext *cx, const Class *clasp, JSObject &proto, GlobalObject &global) { MOZ_ASSERT(clasp != &JSFunction::class_); - RootedObject blankProto(cx, NewObjectWithGivenProto(cx, clasp, &proto, &global, SingletonObject)); + RootedNativeObject blankProto(cx, NewNativeObjectWithGivenProto(cx, clasp, &proto, &global, + SingletonObject)); if (!blankProto || !blankProto->setDelegate(cx)) return nullptr; return blankProto; } -JSObject * +NativeObject * GlobalObject::createBlankPrototype(JSContext *cx, const Class *clasp) { Rooted self(cx, this); @@ -399,7 +401,7 @@ GlobalObject::createBlankPrototype(JSContext *cx, const Class *clasp) return CreateBlankProto(cx, clasp, *objectProto, *self.get()); } -JSObject * +NativeObject * GlobalObject::createBlankPrototypeInheriting(JSContext *cx, const Class *clasp, JSObject &proto) { return CreateBlankProto(cx, clasp, proto, *this); @@ -434,7 +436,7 @@ js::DefinePropertiesAndFunctions(JSContext *cx, HandleObject obj, static void GlobalDebuggees_finalize(FreeOp *fop, JSObject *obj) { - fop->delete_((GlobalObject::DebuggerVector *) obj->getPrivate()); + fop->delete_((GlobalObject::DebuggerVector *) obj->as().getPrivate()); } static const Class @@ -451,7 +453,7 @@ GlobalObject::getDebuggers() if (debuggers.isUndefined()) return nullptr; MOZ_ASSERT(debuggers.toObject().getClass() == &GlobalDebuggees_class); - return (DebuggerVector *) debuggers.toObject().getPrivate(); + return (DebuggerVector *) debuggers.toObject().as().getPrivate(); } /* static */ GlobalObject::DebuggerVector * @@ -462,7 +464,7 @@ GlobalObject::getOrCreateDebuggers(JSContext *cx, Handle global) if (debuggers) return debuggers; - JSObject *obj = NewObjectWithGivenProto(cx, &GlobalDebuggees_class, nullptr, global); + NativeObject *obj = NewNativeObjectWithGivenProto(cx, &GlobalDebuggees_class, nullptr, global); if (!obj) return nullptr; debuggers = cx->new_(); @@ -473,11 +475,11 @@ GlobalObject::getOrCreateDebuggers(JSContext *cx, Handle global) return debuggers; } -/* static */ JSObject * +/* static */ NativeObject * GlobalObject::getOrCreateForOfPICObject(JSContext *cx, Handle global) { assertSameCompartment(cx, global); - JSObject *forOfPIC = global->getForOfPICObject(); + NativeObject *forOfPIC = global->getForOfPICObject(); if (forOfPIC) return forOfPIC; @@ -500,7 +502,7 @@ GlobalObject::getRegExpStatics(ExclusiveContext *cx) const MOZ_ASSERT(cx); Rooted self(cx, const_cast(this)); - JSObject *resObj = nullptr; + RegExpStaticsObject *resObj = nullptr; const Value &val = this->getSlot(REGEXP_STATICS); if (!val.isObject()) { MOZ_ASSERT(val.isUndefined()); @@ -510,7 +512,7 @@ GlobalObject::getRegExpStatics(ExclusiveContext *cx) const self->initSlot(REGEXP_STATICS, ObjectValue(*resObj)); } else { - resObj = &val.toObject(); + resObj = &val.toObject().as(); } return static_cast(resObj->getPrivate(/* nfixed = */ 1)); } @@ -520,7 +522,7 @@ GlobalObject::getAlreadyCreatedRegExpStatics() const { const Value &val = this->getSlot(REGEXP_STATICS); MOZ_ASSERT(val.isObject()); - return static_cast(val.toObject().getPrivate(/* nfixed = */ 1)); + return static_cast(val.toObject().as().getPrivate(/* nfixed = */ 1)); } bool @@ -547,7 +549,7 @@ GlobalObject::getSelfHostedFunction(JSContext *cx, HandleAtom selfHostedName, Ha bool GlobalObject::addIntrinsicValue(JSContext *cx, HandleId id, HandleValue value) { - RootedObject holder(cx, intrinsicsHolder()); + RootedNativeObject holder(cx, intrinsicsHolder()); uint32_t slot = holder->slotSpan(); RootedShape last(cx, holder->lastProperty()); @@ -558,7 +560,7 @@ GlobalObject::addIntrinsicValue(JSContext *cx, HandleId id, HandleValue value) if (!shape) return false; - if (!JSObject::setLastProperty(cx, holder, shape)) + if (!NativeObject::setLastProperty(cx, holder, shape)) return false; holder->setSlot(shape->slot(), value); diff --git a/js/src/vm/GlobalObject.h b/js/src/vm/GlobalObject.h index 38409819ff3..771fe315d63 100644 --- a/js/src/vm/GlobalObject.h +++ b/js/src/vm/GlobalObject.h @@ -57,7 +57,7 @@ class TypedObjectModuleObject; * even deletable) Object, Array, &c. properties (although a slot won't be used * again if its property is deleted and readded). */ -class GlobalObject : public JSObject +class GlobalObject : public NativeObject { /* Count of slots set aside for application use. */ static const unsigned APPLICATION_SLOTS = JSCLASS_GLOBAL_APPLICATION_SLOTS; @@ -278,72 +278,72 @@ class GlobalObject : public JSObject * complete the minimal initialization to make the returned object safe to * touch. */ - JSObject *createBlankPrototype(JSContext *cx, const js::Class *clasp); + NativeObject *createBlankPrototype(JSContext *cx, const js::Class *clasp); /* * Identical to createBlankPrototype, but uses proto as the [[Prototype]] * of the returned blank prototype. */ - JSObject *createBlankPrototypeInheriting(JSContext *cx, const js::Class *clasp, JSObject &proto); + NativeObject *createBlankPrototypeInheriting(JSContext *cx, const js::Class *clasp, JSObject &proto); - JSObject *getOrCreateObjectPrototype(JSContext *cx) { + NativeObject *getOrCreateObjectPrototype(JSContext *cx) { if (functionObjectClassesInitialized()) - return &getPrototype(JSProto_Object).toObject(); + return &getPrototype(JSProto_Object).toObject().as(); Rooted self(cx, this); if (!ensureConstructor(cx, self, JSProto_Object)) return nullptr; - return &self->getPrototype(JSProto_Object).toObject(); + return &self->getPrototype(JSProto_Object).toObject().as(); } - JSObject *getOrCreateFunctionPrototype(JSContext *cx) { + NativeObject *getOrCreateFunctionPrototype(JSContext *cx) { if (functionObjectClassesInitialized()) - return &getPrototype(JSProto_Function).toObject(); + return &getPrototype(JSProto_Function).toObject().as(); Rooted self(cx, this); if (!ensureConstructor(cx, self, JSProto_Object)) return nullptr; - return &self->getPrototype(JSProto_Function).toObject(); + return &self->getPrototype(JSProto_Function).toObject().as(); } - static JSObject *getOrCreateArrayPrototype(JSContext *cx, Handle global) { + static NativeObject *getOrCreateArrayPrototype(JSContext *cx, Handle global) { if (!ensureConstructor(cx, global, JSProto_Array)) return nullptr; - return &global->getPrototype(JSProto_Array).toObject(); + return &global->getPrototype(JSProto_Array).toObject().as(); } - JSObject *maybeGetArrayPrototype() { + NativeObject *maybeGetArrayPrototype() { if (arrayClassInitialized()) - return &getPrototype(JSProto_Array).toObject(); + return &getPrototype(JSProto_Array).toObject().as(); return nullptr; } - static JSObject *getOrCreateBooleanPrototype(JSContext *cx, Handle global) { + static NativeObject *getOrCreateBooleanPrototype(JSContext *cx, Handle global) { if (!ensureConstructor(cx, global, JSProto_Boolean)) return nullptr; - return &global->getPrototype(JSProto_Boolean).toObject(); + return &global->getPrototype(JSProto_Boolean).toObject().as(); } - static JSObject *getOrCreateNumberPrototype(JSContext *cx, Handle global) { + static NativeObject *getOrCreateNumberPrototype(JSContext *cx, Handle global) { if (!ensureConstructor(cx, global, JSProto_Number)) return nullptr; - return &global->getPrototype(JSProto_Number).toObject(); + return &global->getPrototype(JSProto_Number).toObject().as(); } - static JSObject *getOrCreateStringPrototype(JSContext *cx, Handle global) { + static NativeObject *getOrCreateStringPrototype(JSContext *cx, Handle global) { if (!ensureConstructor(cx, global, JSProto_String)) return nullptr; - return &global->getPrototype(JSProto_String).toObject(); + return &global->getPrototype(JSProto_String).toObject().as(); } - static JSObject *getOrCreateSymbolPrototype(JSContext *cx, Handle global) { + static NativeObject *getOrCreateSymbolPrototype(JSContext *cx, Handle global) { if (!ensureConstructor(cx, global, JSProto_Symbol)) return nullptr; - return &global->getPrototype(JSProto_Symbol).toObject(); + return &global->getPrototype(JSProto_Symbol).toObject().as(); } - static JSObject *getOrCreateRegExpPrototype(JSContext *cx, Handle global) { + static NativeObject *getOrCreateRegExpPrototype(JSContext *cx, Handle global) { if (!ensureConstructor(cx, global, JSProto_RegExp)) return nullptr; - return &global->getPrototype(JSProto_RegExp).toObject(); + return &global->getPrototype(JSProto_RegExp).toObject().as(); } JSObject *maybeGetRegExpPrototype() { @@ -448,52 +448,54 @@ class GlobalObject : public JSObject } public: - static JSObject *getOrCreateIteratorPrototype(JSContext *cx, - Handle global) + static NativeObject *getOrCreateIteratorPrototype(JSContext *cx, + Handle global) { if (!ensureConstructor(cx, global, JSProto_Iterator)) return nullptr; - return &global->getSlot(APPLICATION_SLOTS + JSProto_LIMIT + JSProto_Iterator).toObject(); + size_t slot = APPLICATION_SLOTS + JSProto_LIMIT + JSProto_Iterator; + return &global->getSlot(slot).toObject().as(); } - static JSObject *getOrCreateArrayIteratorPrototype(JSContext *cx, - Handle global) + static NativeObject *getOrCreateArrayIteratorPrototype(JSContext *cx, + Handle global) { if (!ensureConstructor(cx, global, JSProto_Iterator)) return nullptr; - return &global->getSlot(ARRAY_ITERATOR_PROTO).toObject(); + return &global->getSlot(ARRAY_ITERATOR_PROTO).toObject().as(); } - static JSObject *getOrCreateStringIteratorPrototype(JSContext *cx, - Handle global) + static NativeObject *getOrCreateStringIteratorPrototype(JSContext *cx, + Handle global) { if (!ensureConstructor(cx, global, JSProto_Iterator)) return nullptr; - return &global->getSlot(STRING_ITERATOR_PROTO).toObject(); + return &global->getSlot(STRING_ITERATOR_PROTO).toObject().as(); } - static JSObject *getOrCreateLegacyGeneratorObjectPrototype(JSContext *cx, - Handle global) + static NativeObject *getOrCreateLegacyGeneratorObjectPrototype(JSContext *cx, + Handle global) { if (!ensureConstructor(cx, global, JSProto_Iterator)) return nullptr; - return &global->getSlot(LEGACY_GENERATOR_OBJECT_PROTO).toObject(); + return &global->getSlot(LEGACY_GENERATOR_OBJECT_PROTO).toObject().as(); } - static JSObject *getOrCreateStarGeneratorObjectPrototype(JSContext *cx, - Handle global) + static NativeObject *getOrCreateStarGeneratorObjectPrototype(JSContext *cx, + Handle global) { if (!ensureConstructor(cx, global, JSProto_Iterator)) return nullptr; - return &global->getSlot(STAR_GENERATOR_OBJECT_PROTO).toObject(); + return &global->getSlot(STAR_GENERATOR_OBJECT_PROTO).toObject().as(); } - static JSObject *getOrCreateStarGeneratorFunctionPrototype(JSContext *cx, - Handle global) + static NativeObject *getOrCreateStarGeneratorFunctionPrototype(JSContext *cx, + Handle global) { if (!ensureConstructor(cx, global, JSProto_Iterator)) return nullptr; - return &global->getSlot(APPLICATION_SLOTS + JSProto_LIMIT + JSProto_GeneratorFunction).toObject(); + size_t slot = APPLICATION_SLOTS + JSProto_LIMIT + JSProto_GeneratorFunction; + return &global->getSlot(slot).toObject().as(); } static JSObject *getOrCreateStarGeneratorFunction(JSContext *cx, @@ -523,15 +525,15 @@ class GlobalObject : public JSObject return &self->getPrototype(JSProto_DataView).toObject(); } - JSObject *intrinsicsHolder() { + NativeObject *intrinsicsHolder() { MOZ_ASSERT(!getSlot(INTRINSICS).isUndefined()); - return &getSlot(INTRINSICS).toObject(); + return &getSlot(INTRINSICS).toObject().as(); } bool maybeGetIntrinsicValue(jsid id, Value *vp) { - JSObject *holder = intrinsicsHolder(); + NativeObject *holder = intrinsicsHolder(); - if (Shape *shape = holder->nativeLookupPure(id)) { + if (Shape *shape = holder->lookupPure(id)) { *vp = holder->getSlot(shape->slot()); return true; } @@ -644,13 +646,13 @@ class GlobalObject : public JSObject */ static DebuggerVector *getOrCreateDebuggers(JSContext *cx, Handle global); - inline JSObject *getForOfPICObject() { + inline NativeObject *getForOfPICObject() { Value forOfPIC = getReservedSlot(FOR_OF_PIC_CHAIN); if (forOfPIC.isUndefined()) return nullptr; - return &forOfPIC.toObject(); + return &forOfPIC.toObject().as(); } - static JSObject *getOrCreateForOfPICObject(JSContext *cx, Handle global); + static NativeObject *getOrCreateForOfPICObject(JSContext *cx, Handle global); }; template<> diff --git a/js/src/vm/Interpreter-inl.h b/js/src/vm/Interpreter-inl.h index d575de9b29d..db62d9e7feb 100644 --- a/js/src/vm/Interpreter-inl.h +++ b/js/src/vm/Interpreter-inl.h @@ -137,8 +137,8 @@ IsUninitializedLexicalSlot(HandleObject obj, HandleShape shape) { return false; } - MOZ_ASSERT(obj->nativeContainsPure(shape)); - return IsUninitializedLexical(obj->nativeGetSlot(shape->slot())); + MOZ_ASSERT(obj->as().containsPure(shape)); + return IsUninitializedLexical(obj->as().getSlot(shape->slot())); } static inline bool @@ -243,15 +243,16 @@ FetchName(JSContext *cx, HandleObject obj, HandleObject obj2, HandlePropertyName if (!JSObject::getGeneric(cx, obj, obj, id, vp)) return false; } else { - Rooted normalized(cx, obj); + RootedObject normalized(cx, obj); if (normalized->is() && !shape->hasDefaultGetter()) normalized = &normalized->as().object(); if (shape->isDataDescriptor() && shape->hasDefaultGetter()) { /* Fast path for Object instance properties. */ MOZ_ASSERT(shape->hasSlot()); - vp.set(obj2->nativeGetSlot(shape->slot())); - } else if (!NativeGet(cx, normalized, obj2, shape, vp)) { - return false; + vp.set(obj2->as().getSlot(shape->slot())); + } else { + if (!NativeGet(cx, normalized, obj2.as(), shape, vp)) + return false; } } @@ -266,7 +267,7 @@ FetchNameNoGC(JSObject *pobj, Shape *shape, MutableHandleValue vp) if (!shape || !pobj->isNative() || !shape->isDataDescriptor() || !shape->hasDefaultGetter()) return false; - vp.set(pobj->nativeGetSlot(shape->slot())); + vp.set(pobj->as().getSlot(shape->slot())); return !IsUninitializedLexical(vp); } @@ -319,8 +320,10 @@ SetNameOperation(JSContext *cx, JSScript *script, jsbytecode *pc, HandleObject s if (scope->isUnqualifiedVarObj()) { MOZ_ASSERT(!scope->getOps()->setProperty); RootedId id(cx, NameToId(name)); - return baseops::SetPropertyHelper(cx, scope, scope, id, - baseops::Unqualified, &valCopy, + return baseops::SetPropertyHelper(cx, + scope.as(), + scope.as(), + id, baseops::Unqualified, &valCopy, strict); } diff --git a/js/src/vm/Interpreter.cpp b/js/src/vm/Interpreter.cpp index ecf8813c21f..268a0563daf 100644 --- a/js/src/vm/Interpreter.cpp +++ b/js/src/vm/Interpreter.cpp @@ -47,6 +47,7 @@ #include "jsscriptinlines.h" #include "jit/IonFrames-inl.h" +#include "vm/ObjectImpl-inl.h" #include "vm/Probes-inl.h" #include "vm/ScopeObject-inl.h" #include "vm/Stack-inl.h" @@ -168,7 +169,7 @@ js::OnUnknownMethod(JSContext *cx, HandleObject obj, Value idval_, MutableHandle return false; if (value.isObject()) { - JSObject *obj = NewObjectWithClassProto(cx, &js_NoSuchMethodClass, nullptr, nullptr); + NativeObject *obj = NewNativeObjectWithClassProto(cx, &js_NoSuchMethodClass, nullptr, nullptr); if (!obj) return false; @@ -188,7 +189,7 @@ NoSuchMethod(JSContext *cx, unsigned argc, Value *vp) MOZ_ASSERT(vp[0].isObject()); MOZ_ASSERT(vp[1].isObject()); - JSObject *obj = &vp[0].toObject(); + NativeObject *obj = &vp[0].toObject().as(); MOZ_ASSERT(obj->getClass() == &js_NoSuchMethodClass); args.setCallee(obj->getReservedSlot(JSSLOT_FOUND_FUNCTION)); @@ -233,7 +234,7 @@ GetPropertyOperation(JSContext *cx, InterpreterFrame *fp, HandleScript script, j /* Optimize (.1).toString(). */ if (lval.isNumber() && id == NameToId(cx->names().toString)) { - JSObject *proto = GlobalObject::getOrCreateNumberPrototype(cx, global); + NativeObject *proto = GlobalObject::getOrCreateNumberPrototype(cx, global); if (!proto) return false; if (ClassMethodIsNative(cx, proto, &NumberObject::class_, id, js_num_toString)) @@ -317,7 +318,11 @@ SetPropertyOperation(JSContext *cx, HandleScript script, jsbytecode *pc, HandleV RootedId id(cx, NameToId(script->getName(pc))); if (MOZ_LIKELY(!obj->getOps()->setProperty)) { - if (!baseops::SetPropertyHelper(cx, obj, obj, id, baseops::Qualified, + if (!baseops::SetPropertyHelper(cx, + obj.as(), + obj.as(), + id, + baseops::Qualified, &rref, script->strict())) { return false; @@ -1301,7 +1306,7 @@ SetObjectElementOperation(JSContext *cx, Handle obj, HandleId id, con types::TypeScript::MonitorAssign(cx, obj, id); if (obj->isNative() && JSID_IS_INT(id)) { - uint32_t length = obj->getDenseInitializedLength(); + uint32_t length = obj->as().getDenseInitializedLength(); int32_t i = JSID_TO_INT(id); if ((uint32_t)i >= length) { // Annotate script if provided with information (e.g. baseline) @@ -1464,6 +1469,7 @@ Interpret(JSContext *cx, RunState &state) RootedValue rootValue0(cx), rootValue1(cx); RootedString rootString0(cx), rootString1(cx); RootedObject rootObject0(cx), rootObject1(cx), rootObject2(cx); + RootedNativeObject rootNativeObject0(cx); RootedFunction rootFunction0(cx); RootedTypeObject rootType0(cx); RootedPropertyName rootName0(cx); @@ -2402,7 +2408,7 @@ CASE(JSOP_SETGNAME) CASE(JSOP_SETNAME) { RootedObject &scope = rootObject0; - scope = REGS.sp[-2].toObjectOrNull(); + scope = ®S.sp[-2].toObject(); HandleValue value = REGS.stackHandleAt(-1); if (!SetNameOperation(cx, script, REGS.pc, scope, value)) @@ -2716,7 +2722,7 @@ END_CASE(JSOP_TOSTRING) CASE(JSOP_OBJECT) { - RootedObject &ref = rootObject0; + RootedNativeObject &ref = rootNativeObject0; ref = script->getObject(REGS.pc); if (JS::CompartmentOptionsRef(cx).cloneSingletons()) { JSObject *obj = js::DeepCloneObjectLiteral(cx, ref, js::MaybeSingletonObject); @@ -3100,7 +3106,7 @@ END_CASE(JSOP_NEWARRAY) CASE(JSOP_NEWARRAY_COPYONWRITE) { - RootedObject &baseobj = rootObject0; + RootedNativeObject &baseobj = rootNativeObject0; baseobj = types::GetOrFixupCopyOnWriteObject(cx, script, REGS.pc); if (!baseobj) goto error; @@ -3116,7 +3122,7 @@ END_CASE(JSOP_NEWARRAY_COPYONWRITE) CASE(JSOP_NEWOBJECT) { - RootedObject &baseobj = rootObject0; + RootedNativeObject &baseobj = rootNativeObject0; baseobj = script->getObject(REGS.pc); RootedObject &obj = rootObject1; @@ -3167,8 +3173,8 @@ CASE(JSOP_INITPROP) rval = REGS.sp[-1]; /* Load the object being initialized into lval/obj. */ - RootedObject &obj = rootObject0; - obj = ®S.sp[-2].toObject(); + RootedNativeObject &obj = rootNativeObject0; + obj = ®S.sp[-2].toObject().as(); MOZ_ASSERT(obj->is()); PropertyName *name = script->getName(REGS.pc); @@ -3961,8 +3967,8 @@ bool js::SpreadCallOperation(JSContext *cx, HandleScript script, jsbytecode *pc, HandleValue thisv, HandleValue callee, HandleValue arr, MutableHandleValue res) { - RootedObject aobj(cx, &arr.toObject()); - uint32_t length = aobj->as().length(); + RootedArrayObject aobj(cx, &arr.toObject().as()); + uint32_t length = aobj->length(); JSOp op = JSOp(*pc); if (length > ARGS_LENGTH_MAX) { diff --git a/js/src/vm/NumberObject.h b/js/src/vm/NumberObject.h index 4617b2ae87c..0e1a7ae58c1 100644 --- a/js/src/vm/NumberObject.h +++ b/js/src/vm/NumberObject.h @@ -11,7 +11,7 @@ namespace js { -class NumberObject : public JSObject +class NumberObject : public NativeObject { /* Stores this Number object's [[PrimitiveValue]]. */ static const unsigned PRIMITIVE_VALUE_SLOT = 0; diff --git a/js/src/vm/ObjectImpl-inl.h b/js/src/vm/ObjectImpl-inl.h index 03ccaf41d65..1c4c335c79b 100644 --- a/js/src/vm/ObjectImpl-inl.h +++ b/js/src/vm/ObjectImpl-inl.h @@ -16,44 +16,453 @@ #include "vm/ProxyObject.h" #include "vm/TypedArrayObject.h" +#include "jsobjinlines.h" + namespace js { -/* static */ inline bool -ObjectImpl::isExtensible(ExclusiveContext *cx, Handle obj, bool *extensible) -{ - if (obj->asObjectPtr()->is()) { - if (!cx->shouldBeJSContext()) - return false; - HandleObject h = - HandleObject::fromMarkedLocation(reinterpret_cast(obj.address())); - return Proxy::isExtensible(cx->asJSContext(), h, extensible); - } - - *extensible = obj->nonProxyIsExtensible(); - return true; -} - -inline bool -ClassCanHaveFixedData(const Class *clasp) -{ - // Normally, the number of fixed slots given an object is the maximum - // permitted for its size class. For array buffers and non-shared typed - // arrays we only use enough to cover the class reserved slots, so that - // the remaining space in the object's allocation is available for the - // buffer's data. - return clasp == &ArrayBufferObject::class_ - || clasp == &InlineOpaqueTypedObject::class_ - || IsTypedArrayClass(clasp); -} - inline uint8_t * -ObjectImpl::fixedData(size_t nslots) const +NativeObject::fixedData(size_t nslots) const { MOZ_ASSERT(ClassCanHaveFixedData(getClass())); MOZ_ASSERT(nslots == numFixedSlots() + (hasPrivate() ? 1 : 0)); return reinterpret_cast(&fixedSlots()[nslots]); } +/* static */ inline bool +NativeObject::changePropertyAttributes(JSContext *cx, HandleNativeObject obj, + HandleShape shape, unsigned attrs) +{ + return !!changeProperty(cx, obj, shape, attrs, 0, + shape->getter(), shape->setter()); +} + +inline void +NativeObject::removeLastProperty(ExclusiveContext *cx) +{ + MOZ_ASSERT(canRemoveLastProperty()); + RootedNativeObject self(cx, this); + RootedShape prev(cx, lastProperty()->previous()); + JS_ALWAYS_TRUE(setLastProperty(cx, self, prev)); +} + +inline bool +NativeObject::canRemoveLastProperty() +{ + /* + * Check that the information about the object stored in the last + * property's base shape is consistent with that stored in the previous + * shape. If not consistent, then the last property cannot be removed as it + * will induce a change in the object itself, and the object must be + * converted to dictionary mode instead. See BaseShape comment in jsscope.h + */ + MOZ_ASSERT(!inDictionaryMode()); + Shape *previous = lastProperty()->previous().get(); + return previous->getObjectParent() == lastProperty()->getObjectParent() + && previous->getObjectMetadata() == lastProperty()->getObjectMetadata() + && previous->getObjectFlags() == lastProperty()->getObjectFlags(); +} + +inline void +NativeObject::setShouldConvertDoubleElements() +{ + MOZ_ASSERT(is() && !hasEmptyElements()); + getElementsHeader()->setShouldConvertDoubleElements(); +} + +inline void +NativeObject::clearShouldConvertDoubleElements() +{ + MOZ_ASSERT(is() && !hasEmptyElements()); + getElementsHeader()->clearShouldConvertDoubleElements(); +} + +inline bool +NativeObject::setDenseElementIfHasType(uint32_t index, const Value &val) +{ + if (!types::HasTypePropertyId(this, JSID_VOID, val)) + return false; + setDenseElementMaybeConvertDouble(index, val); + return true; +} + +inline void +NativeObject::setDenseElementWithType(ExclusiveContext *cx, uint32_t index, + const Value &val) +{ + // Avoid a slow AddTypePropertyId call if the type is the same as the type + // of the previous element. + types::Type thisType = types::GetValueType(val); + if (index == 0 || types::GetValueType(elements[index - 1]) != thisType) + types::AddTypePropertyId(cx, this, JSID_VOID, thisType); + setDenseElementMaybeConvertDouble(index, val); +} + +inline void +NativeObject::initDenseElementWithType(ExclusiveContext *cx, uint32_t index, + const Value &val) +{ + MOZ_ASSERT(!shouldConvertDoubleElements()); + types::AddTypePropertyId(cx, this, JSID_VOID, val); + initDenseElement(index, val); +} + +inline void +NativeObject::setDenseElementHole(ExclusiveContext *cx, uint32_t index) +{ + types::MarkTypeObjectFlags(cx, this, types::OBJECT_FLAG_NON_PACKED); + setDenseElement(index, MagicValue(JS_ELEMENTS_HOLE)); +} + +/* static */ inline void +NativeObject::removeDenseElementForSparseIndex(ExclusiveContext *cx, + HandleNativeObject obj, uint32_t index) +{ + types::MarkTypeObjectFlags(cx, obj, + types::OBJECT_FLAG_NON_PACKED | + types::OBJECT_FLAG_SPARSE_INDEXES); + if (obj->containsDenseElement(index)) + obj->setDenseElement(index, MagicValue(JS_ELEMENTS_HOLE)); +} + +inline bool +NativeObject::writeToIndexWouldMarkNotPacked(uint32_t index) +{ + return getElementsHeader()->initializedLength < index; +} + +inline void +NativeObject::markDenseElementsNotPacked(ExclusiveContext *cx) +{ + MOZ_ASSERT(isNative()); + MarkTypeObjectFlags(cx, this, types::OBJECT_FLAG_NON_PACKED); +} + +inline void +NativeObject::ensureDenseInitializedLengthNoPackedCheck(ThreadSafeContext *cx, uint32_t index, + uint32_t extra) +{ + MOZ_ASSERT(cx->isThreadLocal(this)); + MOZ_ASSERT(!denseElementsAreCopyOnWrite()); + + /* + * Ensure that the array's contents have been initialized up to index, and + * mark the elements through 'index + extra' as initialized in preparation + * for a write. + */ + MOZ_ASSERT(index + extra <= getDenseCapacity()); + uint32_t &initlen = getElementsHeader()->initializedLength; + + if (initlen < index + extra) { + size_t offset = initlen; + for (HeapSlot *sp = elements + initlen; + sp != elements + (index + extra); + sp++, offset++) + { + sp->init(this, HeapSlot::Element, offset, MagicValue(JS_ELEMENTS_HOLE)); + } + initlen = index + extra; + } +} + +inline void +NativeObject::ensureDenseInitializedLength(ExclusiveContext *cx, uint32_t index, uint32_t extra) +{ + if (writeToIndexWouldMarkNotPacked(index)) + markDenseElementsNotPacked(cx); + ensureDenseInitializedLengthNoPackedCheck(cx, index, extra); +} + +inline void +NativeObject::ensureDenseInitializedLengthPreservePackedFlag(ThreadSafeContext *cx, + uint32_t index, uint32_t extra) +{ + MOZ_ASSERT(!writeToIndexWouldMarkNotPacked(index)); + ensureDenseInitializedLengthNoPackedCheck(cx, index, extra); +} + +NativeObject::EnsureDenseResult +NativeObject::extendDenseElements(ThreadSafeContext *cx, + uint32_t requiredCapacity, uint32_t extra) +{ + MOZ_ASSERT(cx->isThreadLocal(this)); + MOZ_ASSERT(!denseElementsAreCopyOnWrite()); + + /* + * Don't grow elements for non-extensible objects or watched objects. Dense + * elements can be added/written with no extensible or watchpoint checks as + * long as there is capacity for them. + */ + if (!nonProxyIsExtensible() || watched()) { + MOZ_ASSERT(getDenseCapacity() == 0); + return ED_SPARSE; + } + + /* + * Don't grow elements for objects which already have sparse indexes. + * This avoids needing to count non-hole elements in willBeSparseElements + * every time a new index is added. + */ + if (isIndexed()) + return ED_SPARSE; + + /* + * We use the extra argument also as a hint about number of non-hole + * elements to be inserted. + */ + if (requiredCapacity > MIN_SPARSE_INDEX && + willBeSparseElements(requiredCapacity, extra)) { + return ED_SPARSE; + } + + if (!growElements(cx, requiredCapacity)) + return ED_FAILED; + + return ED_OK; +} + +inline NativeObject::EnsureDenseResult +NativeObject::ensureDenseElementsNoPackedCheck(ThreadSafeContext *cx, uint32_t index, uint32_t extra) +{ + MOZ_ASSERT(isNative()); + + if (!maybeCopyElementsForWrite(cx)) + return ED_FAILED; + + uint32_t currentCapacity = getDenseCapacity(); + + uint32_t requiredCapacity; + if (extra == 1) { + /* Optimize for the common case. */ + if (index < currentCapacity) { + ensureDenseInitializedLengthNoPackedCheck(cx, index, 1); + return ED_OK; + } + requiredCapacity = index + 1; + if (requiredCapacity == 0) { + /* Overflow. */ + return ED_SPARSE; + } + } else { + requiredCapacity = index + extra; + if (requiredCapacity < index) { + /* Overflow. */ + return ED_SPARSE; + } + if (requiredCapacity <= currentCapacity) { + ensureDenseInitializedLengthNoPackedCheck(cx, index, extra); + return ED_OK; + } + } + + EnsureDenseResult edr = extendDenseElements(cx, requiredCapacity, extra); + if (edr != ED_OK) + return edr; + + ensureDenseInitializedLengthNoPackedCheck(cx, index, extra); + return ED_OK; +} + +inline NativeObject::EnsureDenseResult +NativeObject::ensureDenseElements(ExclusiveContext *cx, uint32_t index, uint32_t extra) +{ + if (writeToIndexWouldMarkNotPacked(index)) + markDenseElementsNotPacked(cx); + return ensureDenseElementsNoPackedCheck(cx, index, extra); +} + +inline NativeObject::EnsureDenseResult +NativeObject::ensureDenseElementsPreservePackedFlag(ThreadSafeContext *cx, uint32_t index, + uint32_t extra) +{ + MOZ_ASSERT(!writeToIndexWouldMarkNotPacked(index)); + return ensureDenseElementsNoPackedCheck(cx, index, extra); +} + +inline Value +NativeObject::getDenseOrTypedArrayElement(uint32_t idx) +{ + if (is()) + return as().getElement(idx); + if (is()) + return as().getElement(idx); + return getDenseElement(idx); +} + +inline void +NativeObject::initDenseElementsUnbarriered(uint32_t dstStart, const Value *src, uint32_t count) { + /* + * For use by parallel threads, which since they cannot see nursery + * things do not require a barrier. + */ + MOZ_ASSERT(dstStart + count <= getDenseCapacity()); + MOZ_ASSERT(!denseElementsAreCopyOnWrite()); +#if defined(DEBUG) && defined(JSGC_GENERATIONAL) + /* + * This asserts a global invariant: parallel code does not + * observe objects inside the generational GC's nursery. + */ + MOZ_ASSERT(!gc::IsInsideGGCNursery(this)); + for (uint32_t index = 0; index < count; ++index) { + const Value& value = src[index]; + if (value.isMarkable()) + MOZ_ASSERT(!gc::IsInsideGGCNursery(static_cast(value.toGCThing()))); + } +#endif + memcpy(&elements[dstStart], src, count * sizeof(HeapSlot)); +} + +/* static */ inline NativeObject * +NativeObject::copy(ExclusiveContext *cx, gc::AllocKind kind, gc::InitialHeap heap, + HandleNativeObject templateObject) +{ + RootedShape shape(cx, templateObject->lastProperty()); + RootedTypeObject type(cx, templateObject->type()); + MOZ_ASSERT(!templateObject->denseElementsAreCopyOnWrite()); + + JSObject *baseObj = create(cx, kind, heap, shape, type); + if (!baseObj) + return nullptr; + NativeObject *obj = &baseObj->as(); + + size_t span = shape->slotSpan(); + if (span) { + uint32_t numFixed = templateObject->numFixedSlots(); + const Value *fixed = &templateObject->getSlot(0); + MOZ_ASSERT(numFixed <= span); + obj->copySlotRange(0, fixed, numFixed); + + if (numFixed < span) { + uint32_t numSlots = span - numFixed; + const Value *slots = &templateObject->getSlot(numFixed); + obj->copySlotRange(numFixed, slots, numSlots); + } + } + + return obj; +} + +inline bool +NativeObject::setSlotIfHasType(Shape *shape, const Value &value, bool overwriting) +{ + if (!types::HasTypePropertyId(this, shape->propid(), value)) + return false; + setSlot(shape->slot(), value); + + if (overwriting) + shape->setOverwritten(); + + return true; +} + +inline void +NativeObject::setSlotWithType(ExclusiveContext *cx, Shape *shape, + const Value &value, bool overwriting) +{ + setSlot(shape->slot(), value); + + if (overwriting) + shape->setOverwritten(); + + types::AddTypePropertyId(cx, this, shape->propid(), value); +} + +/* Make an object with pregenerated shape from a NEWOBJECT bytecode. */ +static inline NativeObject * +CopyInitializerObject(JSContext *cx, HandleNativeObject baseobj, NewObjectKind newKind = GenericObject) +{ + MOZ_ASSERT(baseobj->getClass() == &JSObject::class_); + MOZ_ASSERT(!baseobj->inDictionaryMode()); + + gc::AllocKind allocKind = gc::GetGCObjectFixedSlotsKind(baseobj->numFixedSlots()); + allocKind = gc::GetBackgroundAllocKind(allocKind); + MOZ_ASSERT_IF(baseobj->isTenured(), allocKind == baseobj->asTenured().getAllocKind()); + JSObject *baseObj = NewBuiltinClassInstance(cx, &JSObject::class_, allocKind, newKind); + if (!baseObj) + return nullptr; + RootedNativeObject obj(cx, &baseObj->as()); + + RootedObject metadata(cx, obj->getMetadata()); + RootedShape lastProp(cx, baseobj->lastProperty()); + if (!NativeObject::setLastProperty(cx, obj, lastProp)) + return nullptr; + if (metadata && !JSObject::setMetadata(cx, obj, metadata)) + return nullptr; + + return obj; +} + +inline NativeObject * +NewNativeObjectWithGivenProto(ExclusiveContext *cx, const js::Class *clasp, + TaggedProto proto, JSObject *parent, + gc::AllocKind allocKind, NewObjectKind newKind) +{ + return MaybeNativeObject(NewObjectWithGivenProto(cx, clasp, proto, parent, allocKind, newKind)); +} + +inline NativeObject * +NewNativeObjectWithGivenProto(ExclusiveContext *cx, const js::Class *clasp, + TaggedProto proto, JSObject *parent, + NewObjectKind newKind = GenericObject) +{ + return MaybeNativeObject(NewObjectWithGivenProto(cx, clasp, proto, parent, newKind)); +} + +inline NativeObject * +NewNativeObjectWithGivenProto(ExclusiveContext *cx, const js::Class *clasp, + JSObject *proto, JSObject *parent, + NewObjectKind newKind = GenericObject) +{ + return MaybeNativeObject(NewObjectWithGivenProto(cx, clasp, proto, parent, newKind)); +} + +inline NativeObject * +NewNativeBuiltinClassInstance(ExclusiveContext *cx, const Class *clasp, + gc::AllocKind allocKind, NewObjectKind newKind = GenericObject) +{ + return MaybeNativeObject(NewBuiltinClassInstance(cx, clasp, allocKind, newKind)); +} + +inline NativeObject * +NewNativeBuiltinClassInstance(ExclusiveContext *cx, const Class *clasp, + NewObjectKind newKind = GenericObject) +{ + return MaybeNativeObject(NewBuiltinClassInstance(cx, clasp, newKind)); +} + +inline NativeObject * +NewNativeObjectWithClassProto(ExclusiveContext *cx, const js::Class *clasp, JSObject *proto, JSObject *parent, + gc::AllocKind allocKind, NewObjectKind newKind = GenericObject) +{ + return MaybeNativeObject(NewObjectWithClassProto(cx, clasp, proto, parent, allocKind, newKind)); +} + +inline NativeObject * +NewNativeObjectWithClassProto(ExclusiveContext *cx, const js::Class *clasp, JSObject *proto, JSObject *parent, + NewObjectKind newKind = GenericObject) +{ + return MaybeNativeObject(NewObjectWithClassProto(cx, clasp, proto, parent, newKind)); +} + +inline NativeObject * +NewNativeObjectWithType(JSContext *cx, HandleTypeObject type, JSObject *parent, gc::AllocKind allocKind, + NewObjectKind newKind = GenericObject) +{ + return MaybeNativeObject(NewObjectWithType(cx, type, parent, allocKind, newKind)); +} + +inline NativeObject * +NewNativeObjectWithType(JSContext *cx, HandleTypeObject type, JSObject *parent, + NewObjectKind newKind = GenericObject) +{ + return MaybeNativeObject(NewObjectWithType(cx, type, parent, newKind)); +} + } // namespace js +inline uint8_t * +JSObject::fakeNativeFixedData(size_t nslots) const +{ + return static_cast(this)->fixedData(nslots); +} + #endif /* vm_ObjectImpl_inl_h */ diff --git a/js/src/vm/ObjectImpl.cpp b/js/src/vm/ObjectImpl.cpp index 296df68e30c..132c78e8cc4 100644 --- a/js/src/vm/ObjectImpl.cpp +++ b/js/src/vm/ObjectImpl.cpp @@ -76,10 +76,9 @@ HeapSlot *const js::emptyObjectElements = #ifdef DEBUG bool -ObjectImpl::canHaveNonEmptyElements() +NativeObject::canHaveNonEmptyElements() { - JSObject *obj = static_cast(this); - return isNative() && !IsAnyTypedArray(obj); + return !IsAnyTypedArray(this); } #endif // DEBUG @@ -111,7 +110,7 @@ ObjectElements::ConvertElementsToDoubles(JSContext *cx, uintptr_t elementsPtr) } /* static */ bool -ObjectElements::MakeElementsCopyOnWrite(ExclusiveContext *cx, JSObject *obj) +ObjectElements::MakeElementsCopyOnWrite(ExclusiveContext *cx, NativeObject *obj) { // Make sure there is enough room for the owner object pointer at the end // of the elements. @@ -132,7 +131,7 @@ ObjectElements::MakeElementsCopyOnWrite(ExclusiveContext *cx, JSObject *obj) #ifdef DEBUG void -js::ObjectImpl::checkShapeConsistency() +js::NativeObject::checkShapeConsistency() { static int throttle = -1; if (throttle < 0) { @@ -197,7 +196,7 @@ js::ObjectImpl::checkShapeConsistency() #endif void -js::ObjectImpl::initializeSlotRange(uint32_t start, uint32_t length) +js::NativeObject::initializeSlotRange(uint32_t start, uint32_t length) { /* * No bounds check, as this is used when the object's shape does not @@ -208,43 +207,37 @@ js::ObjectImpl::initializeSlotRange(uint32_t start, uint32_t length) uint32_t offset = start; for (HeapSlot *sp = fixedStart; sp < fixedEnd; sp++) - sp->init(this->asObjectPtr(), HeapSlot::Slot, offset++, UndefinedValue()); + sp->init(this, HeapSlot::Slot, offset++, UndefinedValue()); for (HeapSlot *sp = slotsStart; sp < slotsEnd; sp++) - sp->init(this->asObjectPtr(), HeapSlot::Slot, offset++, UndefinedValue()); + sp->init(this, HeapSlot::Slot, offset++, UndefinedValue()); } void -js::ObjectImpl::initSlotRange(uint32_t start, const Value *vector, uint32_t length) +js::NativeObject::initSlotRange(uint32_t start, const Value *vector, uint32_t length) { HeapSlot *fixedStart, *fixedEnd, *slotsStart, *slotsEnd; getSlotRange(start, length, &fixedStart, &fixedEnd, &slotsStart, &slotsEnd); for (HeapSlot *sp = fixedStart; sp < fixedEnd; sp++) - sp->init(this->asObjectPtr(), HeapSlot::Slot, start++, *vector++); + sp->init(this, HeapSlot::Slot, start++, *vector++); for (HeapSlot *sp = slotsStart; sp < slotsEnd; sp++) - sp->init(this->asObjectPtr(), HeapSlot::Slot, start++, *vector++); + sp->init(this, HeapSlot::Slot, start++, *vector++); } void -js::ObjectImpl::copySlotRange(uint32_t start, const Value *vector, uint32_t length) +js::NativeObject::copySlotRange(uint32_t start, const Value *vector, uint32_t length) { JS::Zone *zone = this->zone(); HeapSlot *fixedStart, *fixedEnd, *slotsStart, *slotsEnd; getSlotRange(start, length, &fixedStart, &fixedEnd, &slotsStart, &slotsEnd); for (HeapSlot *sp = fixedStart; sp < fixedEnd; sp++) - sp->set(zone, this->asObjectPtr(), HeapSlot::Slot, start++, *vector++); + sp->set(zone, this, HeapSlot::Slot, start++, *vector++); for (HeapSlot *sp = slotsStart; sp < slotsEnd; sp++) - sp->set(zone, this->asObjectPtr(), HeapSlot::Slot, start++, *vector++); + sp->set(zone, this, HeapSlot::Slot, start++, *vector++); } #ifdef DEBUG bool -js::ObjectImpl::isProxy() const -{ - return asObjectPtr()->is(); -} - -bool -js::ObjectImpl::slotInRange(uint32_t slot, SentinelAllowed sentinel) const +js::NativeObject::slotInRange(uint32_t slot, SentinelAllowed sentinel) const { uint32_t capacity = numFixedSlots() + numDynamicSlots(); if (sentinel == SENTINEL_ALLOWED) @@ -262,7 +255,7 @@ js::ObjectImpl::slotInRange(uint32_t slot, SentinelAllowed sentinel) const MOZ_NEVER_INLINE #endif Shape * -js::ObjectImpl::nativeLookup(ExclusiveContext *cx, jsid id) +js::NativeObject::lookup(ExclusiveContext *cx, jsid id) { MOZ_ASSERT(isNative()); Shape **spp; @@ -270,14 +263,14 @@ js::ObjectImpl::nativeLookup(ExclusiveContext *cx, jsid id) } Shape * -js::ObjectImpl::nativeLookupPure(jsid id) +js::NativeObject::lookupPure(jsid id) { MOZ_ASSERT(isNative()); return Shape::searchNoHashify(lastProperty(), id); } uint32_t -js::ObjectImpl::dynamicSlotsCount(uint32_t nfixed, uint32_t span, const Class *clasp) +js::NativeObject::dynamicSlotsCount(uint32_t nfixed, uint32_t span, const Class *clasp) { if (span <= nfixed) return 0; @@ -295,32 +288,32 @@ js::ObjectImpl::dynamicSlotsCount(uint32_t nfixed, uint32_t span, const Class *c } void -js::ObjectImpl::markChildren(JSTracer *trc) +JSObject::markChildren(JSTracer *trc) { MarkTypeObject(trc, &type_, "type"); MarkShape(trc, &shape_, "shape"); const Class *clasp = type_->clasp(); - JSObject *obj = asObjectPtr(); if (clasp->trace) - clasp->trace(trc, obj); + clasp->trace(trc, this); if (shape_->isNative()) { - MarkObjectSlots(trc, obj, 0, obj->slotSpan()); + NativeObject *nobj = &as(); + MarkObjectSlots(trc, nobj, 0, nobj->slotSpan()); do { - if (obj->denseElementsAreCopyOnWrite()) { - HeapPtrObject &owner = getElementsHeader()->ownerObject(); - if (owner != this) { + if (nobj->denseElementsAreCopyOnWrite()) { + HeapPtrNativeObject &owner = nobj->getElementsHeader()->ownerObject(); + if (owner != nobj) { MarkObject(trc, &owner, "objectElementsOwner"); break; } } gc::MarkArraySlots(trc, - obj->getDenseInitializedLength(), - obj->getDenseElementsAllowCopyOnWrite(), + nobj->getDenseInitializedLength(), + nobj->getDenseElementsAllowCopyOnWrite(), "objectElements"); } while (false); } diff --git a/js/src/vm/ObjectImpl.h b/js/src/vm/ObjectImpl.h index 6e4c8210e23..8bedc879145 100644 --- a/js/src/vm/ObjectImpl.h +++ b/js/src/vm/ObjectImpl.h @@ -14,6 +14,7 @@ #include "jsfriendapi.h" #include "jsinfer.h" +#include "jsobj.h" #include "NamespaceImports.h" #include "gc/Barrier.h" @@ -26,7 +27,6 @@ namespace js { -class ObjectImpl; class Nursery; class Shape; @@ -98,7 +98,7 @@ ArraySetLength(typename ExecutionModeTraits::ContextType cx, unsigned attrs, HandleValue value, bool setterIsStrict); /* - * Elements header used for all objects. The elements component of such objects + * Elements header used for native objects. The elements component of such objects * offers an efficient representation for all or some of the indexed properties * of the object, using a flat array of Values rather than a shape hierarchy * stored in the object's slots. This structure is immediately followed by an @@ -188,7 +188,7 @@ class ObjectElements private: friend class ::JSObject; - friend class ObjectImpl; + friend class NativeObject; friend class ArrayObject; friend class Nursery; friend class gc::ForkJoinNursery; @@ -257,9 +257,9 @@ class ObjectElements return reinterpret_cast(uintptr_t(elems) - sizeof(ObjectElements)); } - HeapPtrObject &ownerObject() const { + HeapPtrNativeObject &ownerObject() const { MOZ_ASSERT(isCopyOnWrite()); - return *(HeapPtrObject *)(&elements()[initializedLength]); + return *(HeapPtrNativeObject *)(&elements()[initializedLength]); } static int offsetOfFlags() { @@ -276,7 +276,7 @@ class ObjectElements } static bool ConvertElementsToDoubles(JSContext *cx, uintptr_t elements); - static bool MakeElementsCopyOnWrite(ExclusiveContext *cx, JSObject *obj); + static bool MakeElementsCopyOnWrite(ExclusiveContext *cx, NativeObject *obj); // This is enough slots to store an object of this class. See the static // assertion below. @@ -297,26 +297,17 @@ class Shape; class NewObjectCache; class TaggedProto; -inline Value -ObjectValue(ObjectImpl &obj); - #ifdef DEBUG static inline bool -IsObjectValueInCompartment(js::Value v, JSCompartment *comp); +IsObjectValueInCompartment(Value v, JSCompartment *comp); #endif /* - * ObjectImpl specifies the internal implementation of an object. (In contrast - * JSObject specifies an "external" interface, at the conceptual level of that - * exposed in ECMAScript.) + * NativeObject specifies the internal implementation of a native object. * - * The |shape_| member stores the shape of the object, which includes the - * object's class and the layout of all its properties. + * Native objects extend the base implementation of an object with storage + * for the object's named properties and indexed elements. * - * The |type_| member stores the type of the object, which contains its - * prototype object and the possible types of its properties. - * - * The rest of the object stores its named properties and indexed elements. * These are stored separately from one another. Objects are followed by a * variable-sized array of values for inline storage, which may be used by * either properties of native objects (fixed slots), by elements (fixed @@ -337,126 +328,76 @@ IsObjectValueInCompartment(js::Value v, JSCompartment *comp); * (the address of the third value, to leave room for a ObjectElements header; * in this case numFixedSlots() is zero) or to a dynamically allocated array. * - * Only certain combinations of slots and elements storage are possible. - * - * - For native objects, slots and elements may both be non-empty. The - * slots may be either names or indexes; no indexed property will be in both - * the slots and elements. - * - * - For non-native objects, slots and elements are both empty. - * - * The members of this class are currently protected; in the long run this will - * will change so that some members are private, and only certain methods that - * act upon them will be protected. + * Slots and elements may both be non-empty. The slots may be either names or + * indexes; no indexed property will be in both the slots and elements. */ -class ObjectImpl : public gc::Cell +class NativeObject : public JSObject { protected: - /* - * Shape of the object, encodes the layout of the object's properties and - * all other information about its structure. See vm/Shape.h. - */ - HeapPtrShape shape_; - - /* - * The object's type and prototype. For objects with the LAZY_TYPE flag - * set, this is the prototype's default 'new' type and can only be used - * to get that prototype. - */ - HeapPtrTypeObject type_; - - HeapSlot *slots; /* Slots for object properties. */ - HeapSlot *elements; /* Slots for object elements. */ - friend bool ArraySetLength(JSContext *cx, Handle obj, HandleId id, unsigned attrs, HandleValue value, bool setterIsStrict); + // FIXME Bug 1073842: this is temporary until non-native objects can + // access non-slot storage. + friend class ::JSObject; + private: static void staticAsserts() { - static_assert(sizeof(ObjectImpl) == sizeof(shadow::Object), + static_assert(sizeof(NativeObject) == sizeof(shadow::Object), "shadow interface must match actual implementation"); - static_assert(sizeof(ObjectImpl) % sizeof(Value) == 0, + static_assert(sizeof(NativeObject) % sizeof(Value) == 0, "fixed slots after an object must be aligned"); - static_assert(offsetof(ObjectImpl, shape_) == offsetof(shadow::Object, shape), + static_assert(offsetof(NativeObject, shape_) == offsetof(shadow::Object, shape), "shadow shape must match actual shape"); - static_assert(offsetof(ObjectImpl, type_) == offsetof(shadow::Object, type), + static_assert(offsetof(NativeObject, type_) == offsetof(shadow::Object, type), "shadow type must match actual type"); - static_assert(offsetof(ObjectImpl, slots) == offsetof(shadow::Object, slots), + static_assert(offsetof(NativeObject, slots) == offsetof(shadow::Object, slots), "shadow slots must match actual slots"); - static_assert(offsetof(ObjectImpl, elements) == offsetof(shadow::Object, _1), + static_assert(offsetof(NativeObject, elements) == offsetof(shadow::Object, _1), "shadow placeholder must match actual elements"); + + static_assert(js::shadow::Object::MAX_FIXED_SLOTS == MAX_FIXED_SLOTS, + "We shouldn't be confused about our actual maximum " + "number of fixed slots"); } - JSObject * asObjectPtr() { return reinterpret_cast(this); } - const JSObject * asObjectPtr() const { return reinterpret_cast(this); } - - friend inline Value ObjectValue(ObjectImpl &obj); - - /* These functions are public, and they should remain public. */ - public: - TaggedProto getTaggedProto() const { - return type_->proto(); - } - - bool hasTenuredProto() const; - - const Class *getClass() const { - return type_->clasp(); - } - - static inline bool - isExtensible(ExclusiveContext *cx, Handle obj, bool *extensible); - - // Indicates whether a non-proxy is extensible. Don't call on proxies! - // This method really shouldn't exist -- but there are a few internal - // places that want it (JITs and the like), and it'd be a pain to mark them - // all as friends. - bool nonProxyIsExtensible() const { - MOZ_ASSERT(!isProxy()); - - // [[Extensible]] for ordinary non-proxy objects is an object flag. - return !lastProperty()->hasObjectFlag(BaseShape::NOT_EXTENSIBLE); - } - -#ifdef DEBUG - bool isProxy() const; -#endif - - // Attempt to change the [[Extensible]] bit on |obj| to false. Callers - // must ensure that |obj| is currently extensible before calling this! - static bool - preventExtensions(JSContext *cx, Handle obj); - HeapSlotArray getDenseElements() { - MOZ_ASSERT(isNative()); return HeapSlotArray(elements, !getElementsHeader()->isCopyOnWrite()); } HeapSlotArray getDenseElementsAllowCopyOnWrite() { // Backdoor allowing direct access to copy on write elements. - MOZ_ASSERT(isNative()); return HeapSlotArray(elements, true); } const Value &getDenseElement(uint32_t idx) { - MOZ_ASSERT(isNative()); MOZ_ASSERT(idx < getDenseInitializedLength()); return elements[idx]; } bool containsDenseElement(uint32_t idx) { - MOZ_ASSERT(isNative()); return idx < getDenseInitializedLength() && !elements[idx].isMagic(JS_ELEMENTS_HOLE); } uint32_t getDenseInitializedLength() { - MOZ_ASSERT(getClass()->isNative()); return getElementsHeader()->initializedLength; } uint32_t getDenseCapacity() { - MOZ_ASSERT(getClass()->isNative()); return getElementsHeader()->capacity; } + /* + * Update the last property, keeping the number of allocated slots in sync + * with the object's new slot span. + */ + static bool setLastProperty(ThreadSafeContext *cx, + HandleNativeObject obj, HandleShape shape); + + // As for setLastProperty(), but allows the number of fixed slots to + // change. This can only be used when fixed slots are being erased from the + // object, and only when the object will not require dynamic slots to cover + // the new properties. + void setLastPropertyShrinkFixedSlots(Shape *shape); + protected: #ifdef DEBUG void checkShapeConsistency(); @@ -468,14 +409,19 @@ class ObjectImpl : public gc::Cell replaceWithNewEquivalentShape(ThreadSafeContext *cx, Shape *existingShape, Shape *newShape = nullptr); - enum GenerateShape { - GENERATE_NONE, - GENERATE_SHAPE - }; + /* + * Remove the last property of an object, provided that it is safe to do so + * (the shape and previous shape do not carry conflicting information about + * the object itself). + */ + inline void removeLastProperty(ExclusiveContext *cx); + inline bool canRemoveLastProperty(); - bool setFlag(ExclusiveContext *cx, /*BaseShape::Flag*/ uint32_t flag, - GenerateShape generateShape = GENERATE_NONE); - bool clearFlag(ExclusiveContext *cx, /*BaseShape::Flag*/ uint32_t flag); + /* + * Update the slot span directly for a dictionary object, and allocate + * slots to cover the new span if necessary. + */ + static bool setSlotSpan(ThreadSafeContext *cx, HandleNativeObject obj, uint32_t span); bool toDictionaryMode(ThreadSafeContext *cx); @@ -570,40 +516,16 @@ class ObjectImpl : public gc::Cell static const uint32_t SLOT_CAPACITY_MIN = 8; HeapSlot *fixedSlots() const { - return reinterpret_cast(uintptr_t(this) + sizeof(ObjectImpl)); + return reinterpret_cast(uintptr_t(this) + sizeof(NativeObject)); } - /* - * These functions are currently public for simplicity; in the long run - * it may make sense to make at least some of them private. - */ - public: - Shape * lastProperty() const { - MOZ_ASSERT(shape_); - return shape_; - } - - bool generateOwnShape(ThreadSafeContext *cx, js::Shape *newShape = nullptr) { + bool generateOwnShape(ThreadSafeContext *cx, Shape *newShape = nullptr) { return replaceWithNewEquivalentShape(cx, lastProperty(), newShape); } - JSCompartment *compartment() const { - return lastProperty()->base()->compartment(); - } - - bool isNative() const { - return lastProperty()->isNative(); - } - - types::TypeObject *type() const { - MOZ_ASSERT(!hasLazyType()); - return typeRaw(); - } - - types::TypeObject *typeRaw() const { - return type_; - } + bool shadowingShapeChange(ExclusiveContext *cx, const Shape &shape); + bool clearFlag(ExclusiveContext *cx, BaseShape::Flag flag); uint32_t numFixedSlots() const { return reinterpret_cast(this)->numFixedSlots(); @@ -613,93 +535,172 @@ class ObjectImpl : public gc::Cell return Min(nslots, numFixedSlots()); } - /* - * Whether this is the only object which has its specified type. This - * object will have its type constructed lazily as needed by analysis. - */ - bool hasSingletonType() const { - return !!type_->singleton(); - } - - /* - * Whether the object's type has not been constructed yet. If an object - * might have a lazy type, use getType() below, otherwise type(). - */ - bool hasLazyType() const { - return type_->lazy(); - } - uint32_t slotSpan() const { if (inDictionaryMode()) return lastProperty()->base()->slotSpan(); return lastProperty()->slotSpan(); } + /* Whether a slot is at a fixed offset from this object. */ + bool isFixedSlot(size_t slot) { + return slot < numFixedSlots(); + } + + /* Index into the dynamic slots array to use for a dynamic slot. */ + size_t dynamicSlotIndex(size_t slot) { + MOZ_ASSERT(slot >= numFixedSlots()); + return slot - numFixedSlots(); + } + + /* + * Grow or shrink slots immediately before changing the slot span. + * The number of allocated slots is not stored explicitly, and changes to + * the slots must track changes in the slot span. + */ + static bool growSlots(ThreadSafeContext *cx, HandleNativeObject obj, uint32_t oldCount, + uint32_t newCount); + static void shrinkSlots(ThreadSafeContext *cx, HandleNativeObject obj, uint32_t oldCount, + uint32_t newCount); + + bool hasDynamicSlots() const { return !!slots; } + /* Compute dynamicSlotsCount() for this object. */ uint32_t numDynamicSlots() const { return dynamicSlotsCount(numFixedSlots(), slotSpan(), getClass()); } - - Shape *nativeLookup(ExclusiveContext *cx, jsid id); - Shape *nativeLookup(ExclusiveContext *cx, PropertyName *name) { - return nativeLookup(cx, NameToId(name)); + bool empty() const { + return lastProperty()->isEmptyShape(); } - bool nativeContains(ExclusiveContext *cx, jsid id) { - return nativeLookup(cx, id) != nullptr; + Shape *lookup(ExclusiveContext *cx, jsid id); + Shape *lookup(ExclusiveContext *cx, PropertyName *name) { + return lookup(cx, NameToId(name)); } - bool nativeContains(ExclusiveContext *cx, PropertyName* name) { - return nativeLookup(cx, name) != nullptr; + + bool contains(ExclusiveContext *cx, jsid id) { + return lookup(cx, id) != nullptr; } - bool nativeContains(ExclusiveContext *cx, Shape* shape) { - return nativeLookup(cx, shape->propid()) == shape; + bool contains(ExclusiveContext *cx, PropertyName* name) { + return lookup(cx, name) != nullptr; + } + bool contains(ExclusiveContext *cx, Shape* shape) { + return lookup(cx, shape->propid()) == shape; } /* Contextless; can be called from parallel code. */ - Shape *nativeLookupPure(jsid id); - Shape *nativeLookupPure(PropertyName *name) { - return nativeLookupPure(NameToId(name)); + Shape *lookupPure(jsid id); + Shape *lookupPure(PropertyName *name) { + return lookupPure(NameToId(name)); } - bool nativeContainsPure(jsid id) { - return nativeLookupPure(id) != nullptr; + bool containsPure(jsid id) { + return lookupPure(id) != nullptr; } - bool nativeContainsPure(PropertyName* name) { - return nativeContainsPure(NameToId(name)); + bool containsPure(PropertyName* name) { + return containsPure(NameToId(name)); } - bool nativeContainsPure(Shape* shape) { - return nativeLookupPure(shape->propid()) == shape; - } - - const JSClass *getJSClass() const { - return Jsvalify(getClass()); - } - bool hasClass(const Class *c) const { - return getClass() == c; - } - const ObjectOps *getOps() const { - return &getClass()->ops; + bool containsPure(Shape* shape) { + return lookupPure(shape->propid()) == shape; } /* - * An object is a delegate if it is on another object's prototype or scope - * chain, and therefore the delegate might be asked implicitly to get or - * set a property on behalf of another object. Delegates may be accessed - * directly too, as may any object, but only those objects linked after the - * head of any prototype or scope chain are flagged as delegates. This - * definition helps to optimize shape-based property cache invalidation - * (see Purge{Scope,Proto}Chain in jsobj.cpp). + * Allocate and free an object slot. + * + * FIXME: bug 593129 -- slot allocation should be done by object methods + * after calling object-parameter-free shape methods, avoiding coupling + * logic across the object vs. shape module wall. */ - bool isDelegate() const { - return lastProperty()->hasObjectFlag(BaseShape::DELEGATE); + static bool allocSlot(ThreadSafeContext *cx, HandleNativeObject obj, uint32_t *slotp); + void freeSlot(uint32_t slot); + + private: + static Shape *getChildPropertyOnDictionary(ThreadSafeContext *cx, HandleNativeObject obj, + HandleShape parent, StackShape &child); + static Shape *getChildProperty(ExclusiveContext *cx, HandleNativeObject obj, + HandleShape parent, StackShape &child); + template + static inline Shape * + getOrLookupChildProperty(typename ExecutionModeTraits::ExclusiveContextType cx, + HandleNativeObject obj, HandleShape parent, StackShape &child) + { + if (mode == ParallelExecution) + return lookupChildProperty(cx, obj, parent, child); + return getChildProperty(cx->asExclusiveContext(), obj, parent, child); } + public: /* - * Return true if this object is a native one that has been converted from - * shared-immutable prototype-rooted shape storage to dictionary-shapes in - * a doubly-linked list. + * XXX: This should be private, but is public because it needs to be a + * friend of ThreadSafeContext to get to the propertyTree on cx->compartment_. */ + static Shape *lookupChildProperty(ThreadSafeContext *cx, HandleNativeObject obj, + HandleShape parent, StackShape &child); + + /* Add a property whose id is not yet in this scope. */ + static Shape *addProperty(ExclusiveContext *cx, HandleNativeObject obj, HandleId id, + JSPropertyOp getter, JSStrictPropertyOp setter, + uint32_t slot, unsigned attrs, unsigned flags, + bool allowDictionary = true); + + /* Add a data property whose id is not yet in this scope. */ + Shape *addDataProperty(ExclusiveContext *cx, + jsid id_, uint32_t slot, unsigned attrs); + Shape *addDataProperty(ExclusiveContext *cx, HandlePropertyName name, + uint32_t slot, unsigned attrs); + + /* Add or overwrite a property for id in this scope. */ + template + static Shape * + putProperty(typename ExecutionModeTraits::ExclusiveContextType cx, + HandleNativeObject obj, HandleId id, + JSPropertyOp getter, JSStrictPropertyOp setter, + uint32_t slot, unsigned attrs, + unsigned flags); + template + static inline Shape * + putProperty(typename ExecutionModeTraits::ExclusiveContextType cx, + HandleObject obj, PropertyName *name, + JSPropertyOp getter, JSStrictPropertyOp setter, + uint32_t slot, unsigned attrs, + unsigned flags); + + /* Change the given property into a sibling with the same id in this scope. */ + template + static Shape * + changeProperty(typename ExecutionModeTraits::ExclusiveContextType cx, + HandleNativeObject obj, HandleShape shape, unsigned attrs, unsigned mask, + JSPropertyOp getter, JSStrictPropertyOp setter); + + static inline bool changePropertyAttributes(JSContext *cx, HandleNativeObject obj, + HandleShape shape, unsigned attrs); + + /* Remove the property named by id from this object. */ + bool removeProperty(ExclusiveContext *cx, jsid id); + + /* Clear the scope, making it empty. */ + static void clear(JSContext *cx, HandleNativeObject obj); + + protected: + /* + * Internal helper that adds a shape not yet mapped by this object. + * + * Notes: + * 1. getter and setter must be normalized based on flags (see jsscope.cpp). + * 2. Checks for non-extensibility must be done by callers. + */ + template + static Shape * + addPropertyInternal(typename ExecutionModeTraits::ExclusiveContextType cx, + HandleNativeObject obj, HandleId id, + JSPropertyOp getter, JSStrictPropertyOp setter, + uint32_t slot, unsigned attrs, unsigned flags, Shape **spp, + bool allowDictionary); + + public: + // Return true if this object has been converted from shared-immutable + // prototype-rooted shape storage to dictionary-shapes in a doubly-linked + // list. bool inDictionaryMode() const { return lastProperty()->inDictionary(); } @@ -720,8 +721,10 @@ class ObjectImpl : public gc::Cell } HeapSlot *getSlotAddressUnchecked(uint32_t slot) { - const ObjectImpl *obj = static_cast(this); - return const_cast(obj->getSlotAddressUnchecked(slot)); + uint32_t fixed = numFixedSlots(); + if (slot < fixed) + return fixedSlots() + slot; + return slots + (slot - fixed); } HeapSlot *getSlotAddress(uint32_t slot) { @@ -754,24 +757,15 @@ class ObjectImpl : public gc::Cell return *getSlotAddress(slot); } - HeapSlot &nativeGetSlotRef(uint32_t slot) { - MOZ_ASSERT(isNative() && slot < slotSpan()); - return getSlotRef(slot); - } - const Value &nativeGetSlot(uint32_t slot) const { - MOZ_ASSERT(isNative() && slot < slotSpan()); - return getSlot(slot); - } - void setSlot(uint32_t slot, const Value &value) { MOZ_ASSERT(slotInRange(slot)); MOZ_ASSERT(IsObjectValueInCompartment(value, compartment())); - getSlotRef(slot).set(this->asObjectPtr(), HeapSlot::Slot, slot, value); + getSlotRef(slot).set(this, HeapSlot::Slot, slot, value); } MOZ_ALWAYS_INLINE void setCrossCompartmentSlot(uint32_t slot, const Value &value) { MOZ_ASSERT(slotInRange(slot)); - getSlotRef(slot).set(this->asObjectPtr(), HeapSlot::Slot, slot, value); + getSlotRef(slot).set(this, HeapSlot::Slot, slot, value); } void initSlot(uint32_t slot, const Value &value) { @@ -788,7 +782,65 @@ class ObjectImpl : public gc::Cell } void initSlotUnchecked(uint32_t slot, const Value &value) { - getSlotAddressUnchecked(slot)->init(this->asObjectPtr(), HeapSlot::Slot, slot, value); + getSlotAddressUnchecked(slot)->init(this, HeapSlot::Slot, slot, value); + } + + // MAX_FIXED_SLOTS is the biggest number of fixed slots our GC + // size classes will give an object. + static const uint32_t MAX_FIXED_SLOTS = 16; + + protected: + static inline bool updateSlotsForSpan(ThreadSafeContext *cx, + HandleNativeObject obj, size_t oldSpan, size_t newSpan); + + public: + /* + * Trigger the write barrier on a range of slots that will no longer be + * reachable. + */ + void prepareSlotRangeForOverwrite(size_t start, size_t end) { + for (size_t i = start; i < end; i++) + getSlotAddressUnchecked(i)->HeapSlot::~HeapSlot(); + } + + void prepareElementRangeForOverwrite(size_t start, size_t end) { + MOZ_ASSERT(end <= getDenseInitializedLength()); + MOZ_ASSERT(!denseElementsAreCopyOnWrite()); + for (size_t i = start; i < end; i++) + elements[i].HeapSlot::~HeapSlot(); + } + + static bool rollbackProperties(ExclusiveContext *cx, HandleNativeObject obj, + uint32_t slotSpan); + + inline bool setSlotIfHasType(Shape *shape, const Value &value, + bool overwriting = true); + inline void setSlotWithType(ExclusiveContext *cx, Shape *shape, + const Value &value, bool overwriting = true); + + inline const Value &getReservedSlot(uint32_t index) const { + MOZ_ASSERT(index < JSSLOT_FREE(getClass())); + return getSlot(index); + } + + const HeapSlot &getReservedSlotRef(uint32_t index) const { + MOZ_ASSERT(index < JSSLOT_FREE(getClass())); + return getSlotRef(index); + } + + HeapSlot &getReservedSlotRef(uint32_t index) { + MOZ_ASSERT(index < JSSLOT_FREE(getClass())); + return getSlotRef(index); + } + + void initReservedSlot(uint32_t index, const Value &v) { + MOZ_ASSERT(index < JSSLOT_FREE(getClass())); + initSlot(index, v); + } + + void setReservedSlot(uint32_t index, const Value &v) { + MOZ_ASSERT(index < JSSLOT_FREE(getClass())); + setSlot(index, v); } /* For slots which are known to always be fixed, due to the way they are allocated. */ @@ -805,12 +857,12 @@ class ObjectImpl : public gc::Cell void setFixedSlot(uint32_t slot, const Value &value) { MOZ_ASSERT(slot < numFixedSlots()); - fixedSlots()[slot].set(this->asObjectPtr(), HeapSlot::Slot, slot, value); + fixedSlots()[slot].set(this, HeapSlot::Slot, slot, value); } void initFixedSlot(uint32_t slot, const Value &value) { MOZ_ASSERT(slot < numFixedSlots()); - fixedSlots()[slot].init(this->asObjectPtr(), HeapSlot::Slot, slot, value); + fixedSlots()[slot].init(this, HeapSlot::Slot, slot, value); } /* @@ -821,18 +873,223 @@ class ObjectImpl : public gc::Cell */ static uint32_t dynamicSlotsCount(uint32_t nfixed, uint32_t span, const Class *clasp); - /* Memory usage functions. */ - size_t tenuredSizeOfThis() const { - MOZ_ASSERT(isTenured()); - return js::gc::Arena::thingSize(asTenured().getAllocKind()); - } - /* Elements accessors. */ + /* Upper bound on the number of elements in an object. */ + static const uint32_t NELEMENTS_LIMIT = JS_BIT(28); + ObjectElements * getElementsHeader() const { return ObjectElements::fromElements(elements); } + /* Accessors for elements. */ + bool ensureElements(ThreadSafeContext *cx, uint32_t capacity) { + MOZ_ASSERT(!denseElementsAreCopyOnWrite()); + if (capacity > getDenseCapacity()) + return growElements(cx, capacity); + return true; + } + + static uint32_t goodAllocated(uint32_t n, uint32_t length); + bool growElements(ThreadSafeContext *cx, uint32_t newcap); + void shrinkElements(ThreadSafeContext *cx, uint32_t cap); + void setDynamicElements(ObjectElements *header) { + MOZ_ASSERT(!hasDynamicElements()); + elements = header->elements(); + MOZ_ASSERT(hasDynamicElements()); + } + + static bool CopyElementsForWrite(ThreadSafeContext *cx, NativeObject *obj); + + bool maybeCopyElementsForWrite(ThreadSafeContext *cx) { + if (denseElementsAreCopyOnWrite()) + return CopyElementsForWrite(cx, this); + return true; + } + + private: + inline void ensureDenseInitializedLengthNoPackedCheck(ThreadSafeContext *cx, + uint32_t index, uint32_t extra); + + public: + void setDenseInitializedLength(uint32_t length) { + MOZ_ASSERT(length <= getDenseCapacity()); + MOZ_ASSERT(!denseElementsAreCopyOnWrite()); + prepareElementRangeForOverwrite(length, getElementsHeader()->initializedLength); + getElementsHeader()->initializedLength = length; + } + + inline void ensureDenseInitializedLength(ExclusiveContext *cx, + uint32_t index, uint32_t extra); + inline void ensureDenseInitializedLengthPreservePackedFlag(ThreadSafeContext *cx, + uint32_t index, uint32_t extra); + void setDenseElement(uint32_t index, const Value &val) { + MOZ_ASSERT(index < getDenseInitializedLength()); + MOZ_ASSERT(!denseElementsAreCopyOnWrite()); + elements[index].set(this, HeapSlot::Element, index, val); + } + + void initDenseElement(uint32_t index, const Value &val) { + MOZ_ASSERT(index < getDenseInitializedLength()); + MOZ_ASSERT(!denseElementsAreCopyOnWrite()); + elements[index].init(this, HeapSlot::Element, index, val); + } + + void setDenseElementMaybeConvertDouble(uint32_t index, const Value &val) { + if (val.isInt32() && shouldConvertDoubleElements()) + setDenseElement(index, DoubleValue(val.toInt32())); + else + setDenseElement(index, val); + } + + inline bool setDenseElementIfHasType(uint32_t index, const Value &val); + inline void setDenseElementWithType(ExclusiveContext *cx, uint32_t index, + const Value &val); + inline void initDenseElementWithType(ExclusiveContext *cx, uint32_t index, + const Value &val); + inline void setDenseElementHole(ExclusiveContext *cx, uint32_t index); + static inline void removeDenseElementForSparseIndex(ExclusiveContext *cx, + HandleNativeObject obj, uint32_t index); + + inline Value getDenseOrTypedArrayElement(uint32_t idx); + + void copyDenseElements(uint32_t dstStart, const Value *src, uint32_t count) { + MOZ_ASSERT(dstStart + count <= getDenseCapacity()); + MOZ_ASSERT(!denseElementsAreCopyOnWrite()); + JSRuntime *rt = runtimeFromMainThread(); + if (JS::IsIncrementalBarrierNeeded(rt)) { + Zone *zone = this->zone(); + for (uint32_t i = 0; i < count; ++i) + elements[dstStart + i].set(zone, this, HeapSlot::Element, dstStart + i, src[i]); + } else { + memcpy(&elements[dstStart], src, count * sizeof(HeapSlot)); + DenseRangeWriteBarrierPost(rt, this, dstStart, count); + } + } + + void initDenseElements(uint32_t dstStart, const Value *src, uint32_t count) { + MOZ_ASSERT(dstStart + count <= getDenseCapacity()); + MOZ_ASSERT(!denseElementsAreCopyOnWrite()); + memcpy(&elements[dstStart], src, count * sizeof(HeapSlot)); + DenseRangeWriteBarrierPost(runtimeFromMainThread(), this, dstStart, count); + } + + void initDenseElementsUnbarriered(uint32_t dstStart, const Value *src, uint32_t count); + + void moveDenseElements(uint32_t dstStart, uint32_t srcStart, uint32_t count) { + MOZ_ASSERT(dstStart + count <= getDenseCapacity()); + MOZ_ASSERT(srcStart + count <= getDenseInitializedLength()); + MOZ_ASSERT(!denseElementsAreCopyOnWrite()); + + /* + * Using memmove here would skip write barriers. Also, we need to consider + * an array containing [A, B, C], in the following situation: + * + * 1. Incremental GC marks slot 0 of array (i.e., A), then returns to JS code. + * 2. JS code moves slots 1..2 into slots 0..1, so it contains [B, C, C]. + * 3. Incremental GC finishes by marking slots 1 and 2 (i.e., C). + * + * Since normal marking never happens on B, it is very important that the + * write barrier is invoked here on B, despite the fact that it exists in + * the array before and after the move. + */ + Zone *zone = this->zone(); + JS::shadow::Zone *shadowZone = JS::shadow::Zone::asShadowZone(zone); + if (shadowZone->needsIncrementalBarrier()) { + if (dstStart < srcStart) { + HeapSlot *dst = elements + dstStart; + HeapSlot *src = elements + srcStart; + for (uint32_t i = 0; i < count; i++, dst++, src++) + dst->set(zone, this, HeapSlot::Element, dst - elements, *src); + } else { + HeapSlot *dst = elements + dstStart + count - 1; + HeapSlot *src = elements + srcStart + count - 1; + for (uint32_t i = 0; i < count; i++, dst--, src--) + dst->set(zone, this, HeapSlot::Element, dst - elements, *src); + } + } else { + memmove(elements + dstStart, elements + srcStart, count * sizeof(HeapSlot)); + DenseRangeWriteBarrierPost(runtimeFromMainThread(), this, dstStart, count); + } + } + + void moveDenseElementsNoPreBarrier(uint32_t dstStart, uint32_t srcStart, uint32_t count) { + MOZ_ASSERT(!shadowZone()->needsIncrementalBarrier()); + + MOZ_ASSERT(dstStart + count <= getDenseCapacity()); + MOZ_ASSERT(srcStart + count <= getDenseCapacity()); + MOZ_ASSERT(!denseElementsAreCopyOnWrite()); + + memmove(elements + dstStart, elements + srcStart, count * sizeof(Value)); + DenseRangeWriteBarrierPost(runtimeFromMainThread(), this, dstStart, count); + } + + bool shouldConvertDoubleElements() { + return getElementsHeader()->shouldConvertDoubleElements(); + } + + inline void setShouldConvertDoubleElements(); + inline void clearShouldConvertDoubleElements(); + + bool denseElementsAreCopyOnWrite() { + return getElementsHeader()->isCopyOnWrite(); + } + + /* Packed information for this object's elements. */ + inline bool writeToIndexWouldMarkNotPacked(uint32_t index); + inline void markDenseElementsNotPacked(ExclusiveContext *cx); + + /* + * ensureDenseElements ensures that the object can hold at least + * index + extra elements. It returns ED_OK on success, ED_FAILED on + * failure to grow the array, ED_SPARSE when the object is too sparse to + * grow (this includes the case of index + extra overflow). In the last + * two cases the object is kept intact. + */ + enum EnsureDenseResult { ED_OK, ED_FAILED, ED_SPARSE }; + + private: + inline EnsureDenseResult ensureDenseElementsNoPackedCheck(ThreadSafeContext *cx, + uint32_t index, uint32_t extra); + + public: + inline EnsureDenseResult ensureDenseElements(ExclusiveContext *cx, + uint32_t index, uint32_t extra); + inline EnsureDenseResult ensureDenseElementsPreservePackedFlag(ThreadSafeContext *cx, + uint32_t index, uint32_t extra); + + inline EnsureDenseResult extendDenseElements(ThreadSafeContext *cx, + uint32_t requiredCapacity, uint32_t extra); + + /* Convert a single dense element to a sparse property. */ + static bool sparsifyDenseElement(ExclusiveContext *cx, + HandleNativeObject obj, uint32_t index); + + /* Convert all dense elements to sparse properties. */ + static bool sparsifyDenseElements(ExclusiveContext *cx, HandleNativeObject obj); + + /* Small objects are dense, no matter what. */ + static const uint32_t MIN_SPARSE_INDEX = 1000; + + /* + * Element storage for an object will be sparse if fewer than 1/8 indexes + * are filled in. + */ + static const unsigned SPARSE_DENSITY_RATIO = 8; + + /* + * Check if after growing the object's elements will be too sparse. + * newElementsHint is an estimated number of elements to be added. + */ + bool willBeSparseElements(uint32_t requiredCapacity, uint32_t newElementsHint); + + /* + * After adding a sparse index to obj, see if it should be converted to use + * dense elements. + */ + static EnsureDenseResult maybeDensifySparseElements(ExclusiveContext *cx, + HandleNativeObject obj); + inline HeapSlot *fixedElements() const { static_assert(2 * sizeof(Value) == sizeof(ObjectElements), "when elements are stored inline, the first two " @@ -875,24 +1132,19 @@ class ObjectImpl : public gc::Cell */ inline uint8_t *fixedData(size_t nslots) const; - /* GC support. */ - static ThingRootKind rootKind() { return THING_ROOT_OBJECT; } - inline void privateWriteBarrierPre(void **oldval); void privateWriteBarrierPost(void **pprivate) { #ifdef JSGC_GENERATIONAL - js::gc::Cell **cellp = reinterpret_cast(pprivate); + gc::Cell **cellp = reinterpret_cast(pprivate); MOZ_ASSERT(cellp); MOZ_ASSERT(*cellp); - js::gc::StoreBuffer *storeBuffer = (*cellp)->storeBuffer(); + gc::StoreBuffer *storeBuffer = (*cellp)->storeBuffer(); if (storeBuffer) storeBuffer->putCellFromAnyThread(cellp); #endif } - void markChildren(JSTracer *trc); - /* Private data accessors. */ inline void *&privateRef(uint32_t nfixed) const { /* XXX should be private, not protected! */ @@ -939,136 +1191,421 @@ class ObjectImpl : public gc::Cell return privateRef(nfixed); } - /* GC Accessors */ - static const size_t MaxTagBits = 3; void setInitialSlots(HeapSlot *newSlots) { slots = newSlots; } - static bool isNullLike(const ObjectImpl *obj) { return uintptr_t(obj) < (1 << MaxTagBits); } - MOZ_ALWAYS_INLINE JS::Zone *zone() const { - return shape_->zone(); - } - MOZ_ALWAYS_INLINE JS::shadow::Zone *shadowZone() const { - return JS::shadow::Zone::asShadowZone(zone()); - } - MOZ_ALWAYS_INLINE JS::Zone *zoneFromAnyThread() const { - return shape_->zoneFromAnyThread(); - } - MOZ_ALWAYS_INLINE JS::shadow::Zone *shadowZoneFromAnyThread() const { - return JS::shadow::Zone::asShadowZone(zoneFromAnyThread()); - } - static MOZ_ALWAYS_INLINE void readBarrier(ObjectImpl *obj); - static MOZ_ALWAYS_INLINE void writeBarrierPre(ObjectImpl *obj); - static MOZ_ALWAYS_INLINE void writeBarrierPost(ObjectImpl *obj, void *cellp); - static MOZ_ALWAYS_INLINE void writeBarrierPostRelocate(ObjectImpl *obj, void *cellp); - static MOZ_ALWAYS_INLINE void writeBarrierPostRemove(ObjectImpl *obj, void *cellp); + + static inline NativeObject * + copy(ExclusiveContext *cx, gc::AllocKind kind, gc::InitialHeap heap, + HandleNativeObject templateObject); /* JIT Accessors */ - static size_t offsetOfShape() { return offsetof(ObjectImpl, shape_); } - HeapPtrShape *addressOfShape() { return &shape_; } - - static size_t offsetOfType() { return offsetof(ObjectImpl, type_); } - HeapPtrTypeObject *addressOfType() { return &type_; } - - static size_t offsetOfElements() { return offsetof(ObjectImpl, elements); } + static size_t offsetOfElements() { return offsetof(NativeObject, elements); } static size_t offsetOfFixedElements() { - return sizeof(ObjectImpl) + sizeof(ObjectElements); + return sizeof(NativeObject) + sizeof(ObjectElements); } static size_t getFixedSlotOffset(size_t slot) { - return sizeof(ObjectImpl) + slot * sizeof(Value); + return sizeof(NativeObject) + slot * sizeof(Value); } static size_t getPrivateDataOffset(size_t nfixed) { return getFixedSlotOffset(nfixed); } - static size_t offsetOfSlots() { return offsetof(ObjectImpl, slots); } + static size_t offsetOfSlots() { return offsetof(NativeObject, slots); } }; -/* static */ MOZ_ALWAYS_INLINE void -ObjectImpl::readBarrier(ObjectImpl *obj) -{ - if (!isNullLike(obj) && obj->isTenured()) - obj->asTenured().readBarrier(&obj->asTenured()); -} - -/* static */ MOZ_ALWAYS_INLINE void -ObjectImpl::writeBarrierPre(ObjectImpl *obj) -{ - if (!isNullLike(obj) && obj->isTenured()) - obj->asTenured().writeBarrierPre(&obj->asTenured()); -} - -/* static */ MOZ_ALWAYS_INLINE void -ObjectImpl::writeBarrierPost(ObjectImpl *obj, void *cellp) -{ - MOZ_ASSERT(cellp); -#ifdef JSGC_GENERATIONAL - if (IsNullTaggedPointer(obj)) - return; - MOZ_ASSERT(obj == *static_cast(cellp)); - gc::StoreBuffer *storeBuffer = obj->storeBuffer(); - if (storeBuffer) - storeBuffer->putCellFromAnyThread(static_cast(cellp)); -#endif -} - -/* static */ MOZ_ALWAYS_INLINE void -ObjectImpl::writeBarrierPostRelocate(ObjectImpl *obj, void *cellp) -{ - MOZ_ASSERT(cellp); - MOZ_ASSERT(obj); - MOZ_ASSERT(obj == *static_cast(cellp)); -#ifdef JSGC_GENERATIONAL - gc::StoreBuffer *storeBuffer = obj->storeBuffer(); - if (storeBuffer) - storeBuffer->putRelocatableCellFromAnyThread(static_cast(cellp)); -#endif -} - -/* static */ MOZ_ALWAYS_INLINE void -ObjectImpl::writeBarrierPostRemove(ObjectImpl *obj, void *cellp) -{ - MOZ_ASSERT(cellp); - MOZ_ASSERT(obj); - MOZ_ASSERT(obj == *static_cast(cellp)); -#ifdef JSGC_GENERATIONAL - obj->shadowRuntimeFromAnyThread()->gcStoreBufferPtr()->removeRelocatableCellFromAnyThread( - static_cast(cellp)); -#endif -} - inline void -ObjectImpl::privateWriteBarrierPre(void **oldval) +NativeObject::privateWriteBarrierPre(void **oldval) { #ifdef JSGC_INCREMENTAL JS::shadow::Zone *shadowZone = this->shadowZoneFromAnyThread(); if (shadowZone->needsIncrementalBarrier()) { if (*oldval && getClass()->trace) - getClass()->trace(shadowZone->barrierTracer(), this->asObjectPtr()); + getClass()->trace(shadowZone->barrierTracer(), this); } #endif } -inline Value -ObjectValue(ObjectImpl &obj) -{ - Value v; - v.setObject(*obj.asObjectPtr()); - return v; -} - -inline Handle -Downcast(Handle obj) -{ - return Handle::fromMarkedLocation(reinterpret_cast(obj.address())); -} - #ifdef DEBUG static inline bool -IsObjectValueInCompartment(js::Value v, JSCompartment *comp) +IsObjectValueInCompartment(Value v, JSCompartment *comp) { if (!v.isObject()) return true; - return reinterpret_cast(&v.toObject())->compartment() == comp; + return v.toObject().compartment() == comp; } #endif +/* + * The baseops namespace encapsulates the default behavior when performing + * various operations on an object, irrespective of hooks installed in the + * object's class. In general, instance methods on the object itself should be + * called instead of calling these methods directly. + */ +namespace baseops { + +/* + * On success, and if id was found, return true with *objp non-null and with a + * property of *objp stored in *propp. If successful but id was not found, + * return true with both *objp and *propp null. + */ +template +extern bool +LookupProperty(ExclusiveContext *cx, + typename MaybeRooted::HandleType obj, + typename MaybeRooted::HandleType id, + typename MaybeRooted::MutableHandleType objp, + typename MaybeRooted::MutableHandleType propp); + +extern bool +LookupElement(JSContext *cx, HandleNativeObject obj, uint32_t index, + MutableHandleObject objp, MutableHandleShape propp); + +extern bool +DefineGeneric(ExclusiveContext *cx, HandleNativeObject obj, HandleId id, HandleValue value, + JSPropertyOp getter, JSStrictPropertyOp setter, unsigned attrs); + +extern bool +DefineElement(ExclusiveContext *cx, HandleNativeObject obj, uint32_t index, HandleValue value, + JSPropertyOp getter, JSStrictPropertyOp setter, unsigned attrs); + +extern bool +GetProperty(JSContext *cx, HandleNativeObject obj, HandleObject receiver, HandleId id, MutableHandleValue vp); + +extern bool +GetPropertyNoGC(JSContext *cx, NativeObject *obj, JSObject *receiver, jsid id, Value *vp); + +extern bool +GetElement(JSContext *cx, HandleNativeObject obj, HandleObject receiver, + uint32_t index, MutableHandleValue vp); + +inline bool +GetProperty(JSContext *cx, HandleNativeObject obj, HandleId id, MutableHandleValue vp) +{ + return GetProperty(cx, obj, obj, id, vp); +} + +inline bool +GetElement(JSContext *cx, HandleNativeObject obj, uint32_t index, MutableHandleValue vp) +{ + return GetElement(cx, obj, obj, index, vp); +} + +/* + * Indicates whether an assignment operation is qualified (`x.y = 0`) or + * unqualified (`y = 0`). In strict mode, the latter is an error if no such + * variable already exists. + * + * Used as an argument to baseops::SetPropertyHelper. + */ +enum QualifiedBool { + Unqualified = 0, + Qualified = 1 +}; + +template +extern bool +SetPropertyHelper(typename ExecutionModeTraits::ContextType cx, + HandleNativeObject obj, + HandleObject receiver, HandleId id, QualifiedBool qualified, + MutableHandleValue vp, bool strict); + +extern bool +SetElementHelper(JSContext *cx, HandleNativeObject obj, HandleObject Receiver, uint32_t index, + MutableHandleValue vp, bool strict); + +extern bool +GetAttributes(JSContext *cx, HandleNativeObject obj, HandleId id, unsigned *attrsp); + +extern bool +SetAttributes(JSContext *cx, HandleNativeObject obj, HandleId id, unsigned *attrsp); + +extern bool +DeleteGeneric(JSContext *cx, HandleNativeObject obj, HandleId id, bool *succeeded); + +extern bool +Watch(JSContext *cx, JS::HandleObject obj, JS::HandleId id, JS::HandleObject callable); + +extern bool +Unwatch(JSContext *cx, JS::HandleObject obj, JS::HandleId id); + +} /* namespace js::baseops */ + } /* namespace js */ +inline void * +JSObject::fakeNativeGetPrivate() const +{ + return static_cast(this)->getPrivate(); +} + +inline void * +JSObject::fakeNativeGetPrivate(uint32_t nfixed) const +{ + return static_cast(this)->getPrivate(nfixed); +} + +inline void +JSObject::fakeNativeSetPrivate(void *data) +{ + static_cast(this)->setPrivate(data); +} + +inline bool +JSObject::fakeNativeHasPrivate() const +{ + return static_cast(this)->hasPrivate(); +} + +inline void +JSObject::fakeNativeInitPrivate(void *data) +{ + static_cast(this)->initPrivate(data); +} + +inline void *& +JSObject::fakeNativePrivateRef(uint32_t nfixed) const +{ + return static_cast(this)->privateRef(nfixed); +} + +inline uint32_t +JSObject::fakeNativeSlotSpan() +{ + return static_cast(this)->slotSpan(); +} + +inline const js::Value & +JSObject::fakeNativeGetSlot(uint32_t slot) +{ + return static_cast(this)->getSlot(slot); +} + +inline void +JSObject::fakeNativeSetSlot(uint32_t slot, const js::Value &value) +{ + static_cast(this)->setSlot(slot, value); +} + +inline js::HeapSlot & +JSObject::fakeNativeGetSlotRef(uint32_t slot) +{ + return static_cast(this)->getSlotRef(slot); +} + +inline const js::Value & +JSObject::fakeNativeGetReservedSlot(uint32_t slot) const +{ + return static_cast(this)->getReservedSlot(slot); +} + +inline js::HeapSlot & +JSObject::fakeNativeGetReservedSlotRef(uint32_t slot) +{ + return static_cast(this)->getReservedSlotRef(slot); +} + +inline void +JSObject::fakeNativeSetReservedSlot(uint32_t slot, const js::Value &value) +{ + static_cast(this)->setReservedSlot(slot, value); +} + +inline void +JSObject::fakeNativeInitReservedSlot(uint32_t slot, const js::Value &value) +{ + static_cast(this)->initReservedSlot(slot, value); +} + +inline void +JSObject::fakeNativeSetCrossCompartmentSlot(uint32_t slot, const js::Value &value) +{ + static_cast(this)->setCrossCompartmentSlot(slot, value); +} + +inline void +JSObject::fakeNativeInitCrossCompartmentSlot(uint32_t slot, const js::Value &value) +{ + static_cast(this)->initCrossCompartmentSlot(slot, value); +} + +inline void +JSObject::fakeNativeSetInitialSlots(js::HeapSlot *newSlots) +{ + static_cast(this)->setInitialSlots(newSlots); +} + +inline bool +JSObject::fakeNativeHasDynamicSlots() const +{ + return static_cast(this)->hasDynamicSlots(); +} + +inline uint32_t +JSObject::fakeNativeNumFixedSlots() const +{ + return static_cast(this)->numFixedSlots(); +} + +inline uint32_t +JSObject::fakeNativeNumDynamicSlots() const +{ + return static_cast(this)->numDynamicSlots(); +} + +inline js::HeapSlot *& +JSObject::fakeNativeSlots() +{ + return slots; +} + +inline void +JSObject::fakeNativeInitSlot(uint32_t slot, const js::Value &value) +{ + static_cast(this)->initSlot(slot, value); +} + +inline void +JSObject::fakeNativeInitSlotRange(uint32_t start, const js::Value *vector, uint32_t length) +{ + static_cast(this)->initSlotRange(start, vector, length); +} + +inline void +JSObject::fakeNativeInitializeSlotRange(uint32_t start, uint32_t count) +{ + static_cast(this)->initializeSlotRange(start, count); +} + +inline bool +JSObject::fakeNativeHasDynamicElements() const +{ + return static_cast(this)->hasDynamicElements(); +} + +inline bool +JSObject::fakeNativeHasEmptyElements() const +{ + return static_cast(this)->hasEmptyElements(); +} + +inline js::HeapSlotArray +JSObject::fakeNativeGetDenseElements() +{ + return static_cast(this)->getDenseElements(); +} + +inline bool +JSObject::fakeNativeDenseElementsAreCopyOnWrite() +{ + return static_cast(this)->denseElementsAreCopyOnWrite(); +} + +inline js::ObjectElements * +JSObject::fakeNativeGetElementsHeader() const +{ + return static_cast(this)->getElementsHeader(); +} + +inline js::HeapSlot *& +JSObject::fakeNativeElements() +{ + return elements; +} + +inline const js::Value & +JSObject::fakeNativeGetDenseElement(uint32_t idx) +{ + return static_cast(this)->getDenseElement(idx); +} + +inline uint32_t +JSObject::fakeNativeGetDenseInitializedLength() +{ + return static_cast(this)->getDenseInitializedLength(); +} + +inline const js::HeapSlot * +JSObject::fakeNativeGetSlotAddressUnchecked(uint32_t slot) const +{ + return static_cast(this)->getSlotAddressUnchecked(slot); +} + +template <> +inline bool +JSObject::is() const { return isNative(); } + +/* static */ inline bool +JSObject::lookupElement(JSContext *cx, js::HandleObject obj, uint32_t index, + js::MutableHandleObject objp, js::MutableHandleShape propp) +{ + js::LookupElementOp op = obj->getOps()->lookupElement; + if (op) + return op(cx, obj, index, objp, propp); + return js::baseops::LookupElement(cx, obj.as(), index, objp, propp); +} + +/* static */ inline bool +JSObject::getGeneric(JSContext *cx, js::HandleObject obj, js::HandleObject receiver, + js::HandleId id, js::MutableHandleValue vp) +{ + MOZ_ASSERT(!!obj->getOps()->getGeneric == !!obj->getOps()->getProperty); + js::GenericIdOp op = obj->getOps()->getGeneric; + if (op) { + if (!op(cx, obj, receiver, id, vp)) + return false; + } else { + if (!js::baseops::GetProperty(cx, obj.as(), receiver, id, vp)) + return false; + } + return true; +} + +/* static */ inline bool +JSObject::getGenericNoGC(JSContext *cx, JSObject *obj, JSObject *receiver, + jsid id, js::Value *vp) +{ + js::GenericIdOp op = obj->getOps()->getGeneric; + if (op) + return false; + return js::baseops::GetPropertyNoGC(cx, &obj->as(), receiver, id, vp); +} + +/* static */ inline bool +JSObject::setGeneric(JSContext *cx, js::HandleObject obj, js::HandleObject receiver, + js::HandleId id, js::MutableHandleValue vp, bool strict) +{ + if (obj->getOps()->setGeneric) + return nonNativeSetProperty(cx, obj, id, vp, strict); + return js::baseops::SetPropertyHelper( + cx, obj.as(), receiver, id, js::baseops::Qualified, vp, strict); +} + +/* static */ inline bool +JSObject::setElement(JSContext *cx, js::HandleObject obj, js::HandleObject receiver, + uint32_t index, js::MutableHandleValue vp, bool strict) +{ + if (obj->getOps()->setElement) + return nonNativeSetElement(cx, obj, index, vp, strict); + return js::baseops::SetElementHelper(cx, obj.as(), + receiver, index, vp, strict); +} + +/* static */ inline bool +JSObject::getGenericAttributes(JSContext *cx, js::HandleObject obj, + js::HandleId id, unsigned *attrsp) +{ + js::GenericAttributesOp op = obj->getOps()->getGenericAttributes; + if (op) + return op(cx, obj, id, attrsp); + return js::baseops::GetAttributes(cx, obj.as(), id, attrsp); +} + +namespace js { + +// Alternate to JSObject::as() that tolerates null pointers. +inline NativeObject * +MaybeNativeObject(JSObject *obj) +{ + return obj ? &obj->as() : nullptr; +} + +} // namespace js + #endif /* vm_ObjectImpl_h */ diff --git a/js/src/vm/PIC.cpp b/js/src/vm/PIC.cpp index 18b2057ceb4..ccf48dbca0f 100644 --- a/js/src/vm/PIC.cpp +++ b/js/src/vm/PIC.cpp @@ -24,13 +24,13 @@ js::ForOfPIC::Chain::initialize(JSContext *cx) MOZ_ASSERT(!initialized_); // Get the canonical Array.prototype - RootedObject arrayProto(cx, GlobalObject::getOrCreateArrayPrototype(cx, cx->global())); + RootedNativeObject arrayProto(cx, GlobalObject::getOrCreateArrayPrototype(cx, cx->global())); if (!arrayProto) return false; // Get the canonical ArrayIterator.prototype - RootedObject arrayIteratorProto(cx, - GlobalObject::getOrCreateArrayIteratorPrototype(cx, cx->global())); + RootedNativeObject arrayIteratorProto(cx, + GlobalObject::getOrCreateArrayIteratorPrototype(cx, cx->global())); if (!arrayIteratorProto) return false; @@ -45,7 +45,7 @@ js::ForOfPIC::Chain::initialize(JSContext *cx) disabled_ = true; // Look up '@@iterator' on Array.prototype, ensure it's a slotful shape. - Shape *iterShape = arrayProto->nativeLookup(cx, cx->names().std_iterator); + Shape *iterShape = arrayProto->lookup(cx, cx->names().std_iterator); if (!iterShape || !iterShape->hasSlot() || !iterShape->hasDefaultGetter()) return true; @@ -58,7 +58,7 @@ js::ForOfPIC::Chain::initialize(JSContext *cx) return true; // Look up the 'next' value on ArrayIterator.prototype - Shape *nextShape = arrayIteratorProto->nativeLookup(cx, cx->names().next); + Shape *nextShape = arrayIteratorProto->lookup(cx, cx->names().next); if (!nextShape || !nextShape->hasSlot()) return true; @@ -99,9 +99,8 @@ js::ForOfPIC::Chain::isArrayOptimized(ArrayObject *obj) } bool -js::ForOfPIC::Chain::tryOptimizeArray(JSContext *cx, HandleObject array, bool *optimized) +js::ForOfPIC::Chain::tryOptimizeArray(JSContext *cx, HandleArrayObject array, bool *optimized) { - MOZ_ASSERT(array->is()); MOZ_ASSERT(optimized); *optimized = false; @@ -145,7 +144,7 @@ js::ForOfPIC::Chain::tryOptimizeArray(JSContext *cx, HandleObject array, bool *o return true; // Ensure array doesn't define '@@iterator' directly. - if (array->nativeLookup(cx, cx->names().std_iterator)) + if (array->lookup(cx, cx->names().std_iterator)) return true; // Good to optimize now, create stub to add. @@ -283,14 +282,14 @@ js::ForOfPIC::Chain::sweep(FreeOp *fop) static void ForOfPIC_finalize(FreeOp *fop, JSObject *obj) { - if (ForOfPIC::Chain *chain = ForOfPIC::fromJSObject(obj)) + if (ForOfPIC::Chain *chain = ForOfPIC::fromJSObject(&obj->as())) chain->sweep(fop); } static void ForOfPIC_traceObject(JSTracer *trc, JSObject *obj) { - if (ForOfPIC::Chain *chain = ForOfPIC::fromJSObject(obj)) + if (ForOfPIC::Chain *chain = ForOfPIC::fromJSObject(&obj->as())) chain->mark(trc); } @@ -304,11 +303,11 @@ const Class ForOfPIC::jsclass = { ForOfPIC_traceObject }; -/* static */ JSObject * +/* static */ NativeObject * js::ForOfPIC::createForOfPICObject(JSContext *cx, Handle global) { assertSameCompartment(cx, global); - JSObject *obj = NewObjectWithGivenProto(cx, &ForOfPIC::jsclass, nullptr, global); + NativeObject *obj = NewNativeObjectWithGivenProto(cx, &ForOfPIC::jsclass, nullptr, global); if (!obj) return nullptr; ForOfPIC::Chain *chain = cx->new_(); @@ -323,7 +322,7 @@ js::ForOfPIC::create(JSContext *cx) { MOZ_ASSERT(!cx->global()->getForOfPICObject()); Rooted global(cx, cx->global()); - JSObject *obj = GlobalObject::getOrCreateForOfPICObject(cx, global); + NativeObject *obj = GlobalObject::getOrCreateForOfPICObject(cx, global); if (!obj) return nullptr; return fromJSObject(obj); diff --git a/js/src/vm/PIC.h b/js/src/vm/PIC.h index 63d8e466d74..6e564482777 100644 --- a/js/src/vm/PIC.h +++ b/js/src/vm/PIC.h @@ -178,8 +178,8 @@ struct ForOfPIC { private: // Pointer to canonical Array.prototype and ArrayIterator.prototype - HeapPtrObject arrayProto_; - HeapPtrObject arrayIteratorProto_; + HeapPtrNativeObject arrayProto_; + HeapPtrNativeObject arrayIteratorProto_; // Shape of matching Array.prototype object, and slot containing // the '@@iterator' for it, and the canonical value. @@ -223,7 +223,7 @@ struct ForOfPIC Stub *isArrayOptimized(ArrayObject *obj); // Try to optimize this chain for an object. - bool tryOptimizeArray(JSContext *cx, HandleObject array, bool *optimized); + bool tryOptimizeArray(JSContext *cx, HandleArrayObject array, bool *optimized); // Check if the global array-related objects have not been messed with // in a way that would disable this PIC. @@ -255,14 +255,14 @@ struct ForOfPIC // Class for object that holds ForOfPIC chain. static const Class jsclass; - static JSObject *createForOfPICObject(JSContext *cx, Handle global); + static NativeObject *createForOfPICObject(JSContext *cx, Handle global); - static inline Chain *fromJSObject(JSObject *obj) { + static inline Chain *fromJSObject(NativeObject *obj) { MOZ_ASSERT(js::GetObjectClass(obj) == &ForOfPIC::jsclass); return (ForOfPIC::Chain *) obj->getPrivate(); } static inline Chain *getOrCreate(JSContext *cx) { - JSObject *obj = cx->global()->getForOfPICObject(); + NativeObject *obj = cx->global()->getForOfPICObject(); if (obj) return fromJSObject(obj); return create(cx); diff --git a/js/src/vm/ProxyObject.cpp b/js/src/vm/ProxyObject.cpp index 0f492aeec6a..85a4ad521ce 100644 --- a/js/src/vm/ProxyObject.cpp +++ b/js/src/vm/ProxyObject.cpp @@ -63,25 +63,25 @@ ProxyObject::New(JSContext *cx, const BaseProxyHandler *handler, HandleValue pri void ProxyObject::initCrossCompartmentPrivate(HandleValue priv) { - initCrossCompartmentSlot(PRIVATE_SLOT, priv); + fakeNativeInitCrossCompartmentSlot(PRIVATE_SLOT, priv); } void ProxyObject::setSameCompartmentPrivate(const Value &priv) { - setSlot(PRIVATE_SLOT, priv); + fakeNativeSetSlot(PRIVATE_SLOT, priv); } void ProxyObject::initHandler(const BaseProxyHandler *handler) { - initSlot(HANDLER_SLOT, PrivateValue(const_cast(handler))); + fakeNativeInitSlot(HANDLER_SLOT, PrivateValue(const_cast(handler))); } static void NukeSlot(ProxyObject *proxy, uint32_t slot) { - proxy->setReservedSlot(slot, NullValue()); + proxy->fakeNativeSetReservedSlot(slot, NullValue()); } void diff --git a/js/src/vm/ProxyObject.h b/js/src/vm/ProxyObject.h index 1720fe4efed..083067ab407 100644 --- a/js/src/vm/ProxyObject.h +++ b/js/src/vm/ProxyObject.h @@ -7,9 +7,10 @@ #ifndef vm_ProxyObject_h #define vm_ProxyObject_h -#include "jsobj.h" #include "jsproxy.h" +#include "vm/ObjectImpl.h" + namespace js { // This is the base class for the various kinds of proxy objects. It's never @@ -35,7 +36,7 @@ class ProxyObject : public JSObject void setSameCompartmentPrivate(const Value &priv); HeapSlot *slotOfPrivate() { - return &getReservedSlotRef(PRIVATE_SLOT); + return &fakeNativeGetReservedSlotRef(PRIVATE_SLOT); } JSObject *target() const { @@ -54,7 +55,9 @@ class ProxyObject : public JSObject } static size_t offsetOfHandler() { - return getFixedSlotOffset(HANDLER_SLOT); + // FIXME Bug 1073842: this is temporary until non-native objects can + // access non-slot storage. + return NativeObject::getFixedSlotOffset(HANDLER_SLOT); } const Value &extra(size_t n) const { @@ -70,13 +73,13 @@ class ProxyObject : public JSObject private: HeapSlot *slotOfExtra(size_t n) { MOZ_ASSERT(n == 0 || n == 1); - return &getReservedSlotRef(EXTRA_SLOT + n); + return &fakeNativeGetReservedSlotRef(EXTRA_SLOT + n); } HeapSlot *slotOfClassSpecific(size_t n) { MOZ_ASSERT(n >= PROXY_MINIMUM_SLOTS); MOZ_ASSERT(n < JSCLASS_RESERVED_SLOTS(getClass())); - return &getReservedSlotRef(n); + return &fakeNativeGetReservedSlotRef(n); } static bool isValidProxyClass(const Class *clasp) { diff --git a/js/src/vm/RegExpObject.cpp b/js/src/vm/RegExpObject.cpp index b5cac05ac53..3c48c060ac9 100644 --- a/js/src/vm/RegExpObject.cpp +++ b/js/src/vm/RegExpObject.cpp @@ -21,6 +21,7 @@ #include "jsobjinlines.h" +#include "vm/ObjectImpl-inl.h" #include "vm/Shape-inl.h" using namespace js; @@ -52,7 +53,7 @@ RegExpObjectBuilder::getOrCreate() // Note: RegExp objects are always allocated in the tenured heap. This is // not strictly required, but simplifies embedding them in jitcode. - JSObject *obj = NewBuiltinClassInstance(cx, &RegExpObject::class_, TenuredObject); + NativeObject *obj = NewNativeBuiltinClassInstance(cx, &RegExpObject::class_, TenuredObject); if (!obj) return false; obj->initPrivate(nullptr); @@ -71,7 +72,7 @@ RegExpObjectBuilder::getOrCreateClone(HandleTypeObject type) // Note: RegExp objects are always allocated in the tenured heap. This is // not strictly required, but simplifies embedding them in jitcode. - JSObject *clone = NewObjectWithType(cx->asJSContext(), type, parent, TenuredObject); + NativeObject *clone = NewNativeObjectWithType(cx->asJSContext(), type, parent, TenuredObject); if (!clone) return false; clone->initPrivate(nullptr); @@ -254,7 +255,7 @@ RegExpObject::trace(JSTracer *trc, JSObject *obj) IS_GC_MARKING_TRACER(trc) && !obj->asTenured().zone()->isPreservingCode()) { - obj->setPrivate(nullptr); + obj->as().setPrivate(nullptr); } else { shared->trace(trc); } @@ -335,7 +336,7 @@ RegExpObject::createShared(JSContext *cx, RegExpGuard *g) Shape * RegExpObject::assignInitialShape(ExclusiveContext *cx, Handle self) { - MOZ_ASSERT(self->nativeEmpty()); + MOZ_ASSERT(self->empty()); JS_STATIC_ASSERT(LAST_INDEX_SLOT == 0); JS_STATIC_ASSERT(SOURCE_SLOT == LAST_INDEX_SLOT + 1); @@ -369,24 +370,24 @@ RegExpObject::init(ExclusiveContext *cx, HandleAtom source, RegExpFlag flags) if (!EmptyShape::ensureInitialCustomShape(cx, self)) return false; - MOZ_ASSERT(self->nativeLookup(cx, NameToId(cx->names().lastIndex))->slot() == + MOZ_ASSERT(self->lookup(cx, NameToId(cx->names().lastIndex))->slot() == LAST_INDEX_SLOT); - MOZ_ASSERT(self->nativeLookup(cx, NameToId(cx->names().source))->slot() == + MOZ_ASSERT(self->lookup(cx, NameToId(cx->names().source))->slot() == SOURCE_SLOT); - MOZ_ASSERT(self->nativeLookup(cx, NameToId(cx->names().global))->slot() == + MOZ_ASSERT(self->lookup(cx, NameToId(cx->names().global))->slot() == GLOBAL_FLAG_SLOT); - MOZ_ASSERT(self->nativeLookup(cx, NameToId(cx->names().ignoreCase))->slot() == + MOZ_ASSERT(self->lookup(cx, NameToId(cx->names().ignoreCase))->slot() == IGNORE_CASE_FLAG_SLOT); - MOZ_ASSERT(self->nativeLookup(cx, NameToId(cx->names().multiline))->slot() == + MOZ_ASSERT(self->lookup(cx, NameToId(cx->names().multiline))->slot() == MULTILINE_FLAG_SLOT); - MOZ_ASSERT(self->nativeLookup(cx, NameToId(cx->names().sticky))->slot() == + MOZ_ASSERT(self->lookup(cx, NameToId(cx->names().sticky))->slot() == STICKY_FLAG_SLOT); /* * If this is a re-initialization with an existing RegExpShared, 'flags' * may not match getShared()->flags, so forget the RegExpShared. */ - self->JSObject::setPrivate(nullptr); + self->NativeObject::setPrivate(nullptr); self->zeroLastIndex(); self->setSource(source); @@ -704,7 +705,7 @@ RegExpCompartment::createMatchResultTemplateObject(JSContext *cx) MOZ_ASSERT(!matchResultTemplateObject_); /* Create template array object */ - RootedObject templateObject(cx, NewDenseUnallocatedArray(cx, 0, nullptr, TenuredObject)); + RootedArrayObject templateObject(cx, NewDenseUnallocatedArray(cx, 0, nullptr, TenuredObject)); if (!templateObject) return matchResultTemplateObject_; // = nullptr @@ -932,7 +933,7 @@ js::ParseRegExpFlags(JSContext *cx, JSString *flagStr, RegExpFlag *flagsOut) template bool -js::XDRScriptRegExpObject(XDRState *xdr, HeapPtrObject *objp) +js::XDRScriptRegExpObject(XDRState *xdr, MutableHandle objp) { /* NB: Keep this in sync with CloneScriptRegExpObject. */ @@ -941,7 +942,7 @@ js::XDRScriptRegExpObject(XDRState *xdr, HeapPtrObject *objp) if (mode == XDR_ENCODE) { MOZ_ASSERT(objp); - RegExpObject &reobj = (*objp)->as(); + RegExpObject &reobj = *objp; source = reobj.getSource(); flagsword = reobj.getFlags(); } @@ -954,16 +955,16 @@ js::XDRScriptRegExpObject(XDRState *xdr, HeapPtrObject *objp) if (!reobj) return false; - objp->init(reobj); + objp.set(reobj); } return true; } template bool -js::XDRScriptRegExpObject(XDRState *xdr, HeapPtrObject *objp); +js::XDRScriptRegExpObject(XDRState *xdr, MutableHandle objp); template bool -js::XDRScriptRegExpObject(XDRState *xdr, HeapPtrObject *objp); +js::XDRScriptRegExpObject(XDRState *xdr, MutableHandle objp); JSObject * js::CloneScriptRegExpObject(JSContext *cx, RegExpObject &reobj) diff --git a/js/src/vm/RegExpObject.h b/js/src/vm/RegExpObject.h index 38b0651cb72..26a3652ad9e 100644 --- a/js/src/vm/RegExpObject.h +++ b/js/src/vm/RegExpObject.h @@ -314,7 +314,7 @@ class RegExpCompartment size_t sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf); }; -class RegExpObject : public JSObject +class RegExpObject : public NativeObject { static const unsigned LAST_INDEX_SLOT = 0; static const unsigned SOURCE_SLOT = 1; @@ -407,7 +407,7 @@ class RegExpObject : public JSObject void setShared(RegExpShared &shared) { MOZ_ASSERT(!maybeShared()); - JSObject::setPrivate(&shared); + NativeObject::setPrivate(&shared); } static void trace(JSTracer *trc, JSObject *obj); @@ -436,7 +436,7 @@ class RegExpObject : public JSObject */ bool createShared(JSContext *cx, RegExpGuard *g); RegExpShared *maybeShared() const { - return static_cast(JSObject::getPrivate()); + return static_cast(NativeObject::getPrivate()); } /* Call setShared in preference to setPrivate. */ @@ -463,7 +463,7 @@ RegExpToShared(JSContext *cx, HandleObject obj, RegExpGuard *g) template bool -XDRScriptRegExpObject(XDRState *xdr, HeapPtrObject *objp); +XDRScriptRegExpObject(XDRState *xdr, MutableHandle objp); extern JSObject * CloneScriptRegExpObject(JSContext *cx, RegExpObject &re); diff --git a/js/src/vm/RegExpStatics.cpp b/js/src/vm/RegExpStatics.cpp index 5c10358e48d..9dd4ee0811a 100644 --- a/js/src/vm/RegExpStatics.cpp +++ b/js/src/vm/RegExpStatics.cpp @@ -8,7 +8,7 @@ #include "vm/RegExpStaticsObject.h" -#include "jsobjinlines.h" +#include "vm/ObjectImpl-inl.h" using namespace js; @@ -22,14 +22,14 @@ using namespace js; static void resc_finalize(FreeOp *fop, JSObject *obj) { - RegExpStatics *res = static_cast(obj->getPrivate()); + RegExpStatics *res = static_cast(obj->as().getPrivate()); fop->delete_(res); } static void resc_trace(JSTracer *trc, JSObject *obj) { - void *pdata = obj->getPrivate(); + void *pdata = obj->as().getPrivate(); MOZ_ASSERT(pdata); RegExpStatics *res = static_cast(pdata); res->mark(trc); @@ -52,17 +52,17 @@ const Class RegExpStaticsObject::class_ = { resc_trace }; -JSObject * +RegExpStaticsObject * RegExpStatics::create(ExclusiveContext *cx, GlobalObject *parent) { - JSObject *obj = NewObjectWithGivenProto(cx, &RegExpStaticsObject::class_, nullptr, parent); + NativeObject *obj = NewNativeObjectWithGivenProto(cx, &RegExpStaticsObject::class_, nullptr, parent); if (!obj) return nullptr; RegExpStatics *res = cx->new_(); if (!res) return nullptr; obj->setPrivate(static_cast(res)); - return obj; + return &obj->as(); } void diff --git a/js/src/vm/RegExpStatics.h b/js/src/vm/RegExpStatics.h index 9d053f48847..2bbfd596436 100644 --- a/js/src/vm/RegExpStatics.h +++ b/js/src/vm/RegExpStatics.h @@ -15,6 +15,7 @@ namespace js { class GlobalObject; +class RegExpStaticsObject; class RegExpStatics { @@ -47,7 +48,7 @@ class RegExpStatics public: RegExpStatics() : bufferLink(nullptr), copied(false) { clear(); } - static JSObject *create(ExclusiveContext *cx, GlobalObject *parent); + static RegExpStaticsObject *create(ExclusiveContext *cx, GlobalObject *parent); private: bool executeLazy(JSContext *cx); diff --git a/js/src/vm/RegExpStaticsObject.h b/js/src/vm/RegExpStaticsObject.h index bc584343c16..5b877e951e5 100644 --- a/js/src/vm/RegExpStaticsObject.h +++ b/js/src/vm/RegExpStaticsObject.h @@ -11,7 +11,7 @@ namespace js { -class RegExpStaticsObject : public JSObject +class RegExpStaticsObject : public NativeObject { public: static const Class class_; diff --git a/js/src/vm/Runtime.cpp b/js/src/vm/Runtime.cpp index d9cc948365e..764d97db6cb 100644 --- a/js/src/vm/Runtime.cpp +++ b/js/src/vm/Runtime.cpp @@ -451,8 +451,8 @@ NewObjectCache::clearNurseryObjects(JSRuntime *rt) Entry &e = entries[i]; JSObject *obj = reinterpret_cast(&e.templateObject); if (IsInsideNursery(e.key) || - rt->gc.nursery.isInside(obj->slots) || - rt->gc.nursery.isInside(obj->elements)) + rt->gc.nursery.isInside(obj->fakeNativeSlots()) || + rt->gc.nursery.isInside(obj->fakeNativeElements())) { PodZero(&e); } diff --git a/js/src/vm/Runtime.h b/js/src/vm/Runtime.h index b184e8a4993..8696d73a9b3 100644 --- a/js/src/vm/Runtime.h +++ b/js/src/vm/Runtime.h @@ -334,7 +334,7 @@ class NewObjectCache MOZ_ASSERT(entry_ == makeIndex(clasp, key, kind)); Entry *entry = &entries[entry_]; - MOZ_ASSERT(!obj->hasDynamicSlots() && !obj->hasDynamicElements()); + MOZ_ASSERT(!obj->fakeNativeHasDynamicSlots() && !obj->fakeNativeHasDynamicElements()); entry->clasp = clasp; entry->key = key; @@ -822,7 +822,7 @@ struct JSRuntime : public JS::shadow::Runtime, * Self-hosting state cloned on demand into other compartments. Shared with the parent * runtime if there is one. */ - JSObject *selfHostingGlobal_; + js::NativeObject *selfHostingGlobal_; /* Space for interpreter frames. */ js::InterpreterStack interpreterStack_; diff --git a/js/src/vm/SavedStacks.cpp b/js/src/vm/SavedStacks.cpp index 77b4214b565..0d01238a6a8 100644 --- a/js/src/vm/SavedStacks.cpp +++ b/js/src/vm/SavedStacks.cpp @@ -25,7 +25,8 @@ #include "vm/StringBuffer.h" #include "jscntxtinlines.h" -#include "jsobjinlines.h" + +#include "vm/ObjectImpl-inl.h" using mozilla::AddToHash; using mozilla::HashString; @@ -280,7 +281,7 @@ SavedFrame::checkThis(JSContext *cx, CallArgs &args, const char *fnName) // Check for SavedFrame.prototype, which has the same class as SavedFrame // instances, however doesn't actually represent a captured stack frame. It // is the only object that is() but doesn't have a source. - if (thisObject.getReservedSlot(JSSLOT_SOURCE).isNull()) { + if (thisObject.as().getReservedSlot(JSSLOT_SOURCE).isNull()) { JS_ReportErrorNumber(cx, js_GetErrorMessage, nullptr, JSMSG_INCOMPATIBLE_PROTO, SavedFrame::class_.name, fnName, "prototype object"); return nullptr; @@ -592,9 +593,10 @@ SavedStacks::getOrCreateSavedFramePrototype(JSContext *cx) if (!global) return nullptr; - RootedObject proto(cx, NewObjectWithGivenProto(cx, &SavedFrame::class_, - global->getOrCreateObjectPrototype(cx), - global)); + RootedNativeObject proto(cx, + NewNativeObjectWithGivenProto(cx, &SavedFrame::class_, + global->getOrCreateObjectPrototype(cx), + global)); if (!proto || !JS_DefineProperties(cx, proto, SavedFrame::properties) || !JS_DefineFunctions(cx, proto, SavedFrame::methods) @@ -603,10 +605,11 @@ SavedStacks::getOrCreateSavedFramePrototype(JSContext *cx) return nullptr; } - savedFrameProto.set(proto); // The only object with the SavedFrame::class_ that doesn't have a source // should be the prototype. - savedFrameProto->setReservedSlot(SavedFrame::JSSLOT_SOURCE, NullValue()); + proto->setReservedSlot(SavedFrame::JSSLOT_SOURCE, NullValue()); + + savedFrameProto.set(proto); return savedFrameProto; } diff --git a/js/src/vm/SavedStacks.h b/js/src/vm/SavedStacks.h index 95dab132863..d328265983f 100644 --- a/js/src/vm/SavedStacks.h +++ b/js/src/vm/SavedStacks.h @@ -14,7 +14,7 @@ namespace js { -class SavedFrame : public JSObject { +class SavedFrame : public NativeObject { friend class SavedStacks; public: diff --git a/js/src/vm/ScopeObject-inl.h b/js/src/vm/ScopeObject-inl.h index 6084dee0e1e..8176333f3f5 100644 --- a/js/src/vm/ScopeObject-inl.h +++ b/js/src/vm/ScopeObject-inl.h @@ -28,7 +28,7 @@ ScopeObject::setAliasedVar(JSContext *cx, ScopeCoordinate sc, PropertyName *name // Keep track of properties which have ever been overwritten. if (!getSlot(sc.slot()).isUndefined()) { - Shape *shape = nativeLookup(cx, name); + Shape *shape = lookup(cx, name); shape->setOverwritten(); } } diff --git a/js/src/vm/ScopeObject.cpp b/js/src/vm/ScopeObject.cpp index 12d4408dd4a..da0414822a3 100644 --- a/js/src/vm/ScopeObject.cpp +++ b/js/src/vm/ScopeObject.cpp @@ -330,7 +330,8 @@ DeclEnvObject::createTemplateObject(JSContext *cx, HandleFunction fun, gc::Initi if (!emptyDeclEnvShape) return nullptr; - RootedObject obj(cx, JSObject::create(cx, FINALIZE_KIND, heap, emptyDeclEnvShape, type)); + RootedNativeObject obj(cx, MaybeNativeObject(JSObject::create(cx, FINALIZE_KIND, heap, + emptyDeclEnvShape, type))); if (!obj) return nullptr; @@ -338,8 +339,8 @@ DeclEnvObject::createTemplateObject(JSContext *cx, HandleFunction fun, gc::Initi Rooted id(cx, AtomToId(fun->atom())); const Class *clasp = obj->getClass(); unsigned attrs = JSPROP_ENUMERATE | JSPROP_PERMANENT | JSPROP_READONLY; - if (!JSObject::putProperty(cx, obj, id, clasp->getProperty, - clasp->setProperty, lambdaSlot(), attrs, 0)) { + if (!NativeObject::putProperty(cx, obj, id, clasp->getProperty, + clasp->setProperty, lambdaSlot(), attrs, 0)) { return nullptr; } @@ -350,13 +351,13 @@ DeclEnvObject::createTemplateObject(JSContext *cx, HandleFunction fun, gc::Initi DeclEnvObject * DeclEnvObject::create(JSContext *cx, HandleObject enclosing, HandleFunction callee) { - RootedObject obj(cx, createTemplateObject(cx, callee, gc::DefaultHeap)); + Rooted obj(cx, createTemplateObject(cx, callee, gc::DefaultHeap)); if (!obj) return nullptr; - obj->as().setEnclosingScope(enclosing); + obj->setEnclosingScope(enclosing); obj->setFixedSlot(lambdaSlot(), ObjectValue(*callee)); - return &obj->as(); + return obj; } template @@ -431,7 +432,8 @@ DynamicWithObject::create(JSContext *cx, HandleObject object, HandleObject enclo if (!shape) return nullptr; - RootedObject obj(cx, JSObject::create(cx, FINALIZE_KIND, gc::DefaultHeap, shape, type)); + RootedNativeObject obj(cx, MaybeNativeObject(JSObject::create(cx, FINALIZE_KIND, + gc::DefaultHeap, shape, type))); if (!obj) return nullptr; @@ -618,7 +620,8 @@ ClonedBlockObject::create(JSContext *cx, Handle block, Abst RootedShape shape(cx, block->lastProperty()); - RootedObject obj(cx, JSObject::create(cx, FINALIZE_KIND, gc::TenuredHeap, shape, type)); + RootedNativeObject obj(cx, MaybeNativeObject(JSObject::create(cx, FINALIZE_KIND, + gc::TenuredHeap, shape, type))); if (!obj) return nullptr; @@ -705,14 +708,14 @@ StaticBlockObject::addVar(ExclusiveContext *cx, Handle block * block's shape later. */ uint32_t slot = JSSLOT_FREE(&BlockObject::class_) + index; - return JSObject::addPropertyInternal(cx, block, id, - /* getter = */ nullptr, - /* setter = */ nullptr, - slot, - JSPROP_ENUMERATE | JSPROP_PERMANENT, - /* attrs = */ 0, - spp, - /* allowDictionary = */ false); + return NativeObject::addPropertyInternal(cx, block, id, + /* getter = */ nullptr, + /* setter = */ nullptr, + slot, + JSPROP_ENUMERATE | JSPROP_PERMANENT, + /* attrs = */ 0, + spp, + /* allowDictionary = */ false); } const Class BlockObject::class_ = { @@ -1356,7 +1359,7 @@ class DebugScopeProxy : public BaseProxyHandler vp.set(frame.unaliasedLocal(i)); else frame.unaliasedLocal(i) = vp; - } else if (JSObject *snapshot = debugScope->maybeSnapshot()) { + } else if (NativeObject *snapshot = debugScope->maybeSnapshot()) { if (action == GET) vp.set(snapshot->getDenseElement(bindings.numArgs() + i)); else @@ -1387,7 +1390,7 @@ class DebugScopeProxy : public BaseProxyHandler else frame.unaliasedFormal(i, DONT_CHECK_ALIASING) = vp; } - } else if (JSObject *snapshot = debugScope->maybeSnapshot()) { + } else if (NativeObject *snapshot = debugScope->maybeSnapshot()) { if (action == GET) vp.set(snapshot->getDenseElement(i)); else @@ -1800,15 +1803,16 @@ DebugScopeObject::enclosingScope() const return extra(ENCLOSING_EXTRA).toObject(); } -JSObject * +ArrayObject * DebugScopeObject::maybeSnapshot() const { MOZ_ASSERT(!scope().as().isForEval()); - return extra(SNAPSHOT_EXTRA).toObjectOrNull(); + JSObject *obj = extra(SNAPSHOT_EXTRA).toObjectOrNull(); + return obj ? &obj->as() : nullptr; } void -DebugScopeObject::initSnapshot(JSObject &o) +DebugScopeObject::initSnapshot(ArrayObject &o) { MOZ_ASSERT(maybeSnapshot() == nullptr); setExtra(SNAPSHOT_EXTRA, ObjectValue(o)); @@ -1972,7 +1976,7 @@ DebugScopes::sweep(JSRuntime *rt) ScopeIterKey key = e.front().key(); bool needsUpdate = false; if (IsForwarded(key.cur())) { - key.updateCur(js::gc::Forwarded(key.cur())); + key.updateCur(&gc::Forwarded(key.cur())->as()); needsUpdate = true; } if (key.staticScope() && IsForwarded(key.staticScope())) { @@ -2211,7 +2215,7 @@ DebugScopes::onPopCall(AbstractFramePtr frame, JSContext *cx) * Use a dense array as storage (since proxies do not have trace * hooks). This array must not escape into the wild. */ - RootedObject snapshot(cx, NewDenseCopiedArray(cx, vec.length(), vec.begin())); + RootedArrayObject snapshot(cx, NewDenseCopiedArray(cx, vec.length(), vec.begin())); if (!snapshot) { cx->clearPendingException(); return; diff --git a/js/src/vm/ScopeObject.h b/js/src/vm/ScopeObject.h index 65395a814b6..8d86e89a2e8 100644 --- a/js/src/vm/ScopeObject.h +++ b/js/src/vm/ScopeObject.h @@ -193,7 +193,7 @@ ScopeCoordinateFunctionScript(JSScript *script, jsbytecode *pc); * See also "Debug scope objects" below. */ -class ScopeObject : public JSObject +class ScopeObject : public NativeObject { protected: static const uint32_t SCOPE_CHAIN_SLOT = 0; @@ -841,8 +841,8 @@ class DebugScopeObject : public ProxyObject JSObject &enclosingScope() const; /* May only be called for proxies to function call objects. */ - JSObject *maybeSnapshot() const; - void initSnapshot(JSObject &snapshot); + ArrayObject *maybeSnapshot() const; + void initSnapshot(ArrayObject &snapshot); /* Currently, the 'declarative' scopes are Call and Block. */ bool isForDeclarative() const; diff --git a/js/src/vm/SelfHosting.cpp b/js/src/vm/SelfHosting.cpp index 0290241aeb1..e402de21ea2 100644 --- a/js/src/vm/SelfHosting.cpp +++ b/js/src/vm/SelfHosting.cpp @@ -33,6 +33,7 @@ #include "vm/BooleanObject-inl.h" #include "vm/NumberObject-inl.h" +#include "vm/ObjectImpl-inl.h" #include "vm/StringObject-inl.h" using namespace js; @@ -412,7 +413,7 @@ js::intrinsic_NewDenseArray(JSContext *cx, unsigned argc, Value *vp) uint32_t length = args[0].toInt32(); // Make a new buffer and initialize it up to length. - RootedObject buffer(cx, NewDenseFullyAllocatedArray(cx, length)); + RootedArrayObject buffer(cx, NewDenseFullyAllocatedArray(cx, length)); if (!buffer) return false; @@ -421,18 +422,18 @@ js::intrinsic_NewDenseArray(JSContext *cx, unsigned argc, Value *vp) return false; buffer->setType(newtype); - JSObject::EnsureDenseResult edr = buffer->ensureDenseElements(cx, length, 0); + NativeObject::EnsureDenseResult edr = buffer->ensureDenseElements(cx, length, 0); switch (edr) { - case JSObject::ED_OK: + case NativeObject::ED_OK: args.rval().setObject(*buffer); return true; - case JSObject::ED_SPARSE: // shouldn't happen! + case NativeObject::ED_SPARSE: // shouldn't happen! MOZ_ASSERT(!"%EnsureDenseArrayElements() would yield sparse array"); JS_ReportError(cx, "%EnsureDenseArrayElements() would yield sparse array"); break; - case JSObject::ED_FAILED: + case NativeObject::ED_FAILED: break; } return false; @@ -480,8 +481,8 @@ js::intrinsic_UnsafePutElements(JSContext *cx, unsigned argc, Value *vp) if (!JSObject::setElement(cx, arrobj, arrobj, idx, &tmp, false)) return false; } else { - MOZ_ASSERT(idx < arrobj->getDenseInitializedLength()); - arrobj->setDenseElementWithType(cx, idx, args[elemi]); + MOZ_ASSERT(idx < arrobj->as().getDenseInitializedLength()); + arrobj->as().setDenseElementWithType(cx, idx, args[elemi]); } } @@ -537,7 +538,7 @@ js::intrinsic_UnsafeSetReservedSlot(JSContext *cx, unsigned argc, Value *vp) MOZ_ASSERT(args[0].isObject()); MOZ_ASSERT(args[1].isInt32()); - args[0].toObject().setReservedSlot(args[1].toPrivateUint32(), args[2]); + args[0].toObject().as().setReservedSlot(args[1].toPrivateUint32(), args[2]); args.rval().setUndefined(); return true; } @@ -550,7 +551,7 @@ js::intrinsic_UnsafeGetReservedSlot(JSContext *cx, unsigned argc, Value *vp) MOZ_ASSERT(args[0].isObject()); MOZ_ASSERT(args[1].isInt32()); - args.rval().set(args[0].toObject().getReservedSlot(args[1].toPrivateUint32())); + args.rval().set(args[0].toObject().as().getReservedSlot(args[1].toPrivateUint32())); return true; } @@ -576,7 +577,8 @@ js::intrinsic_IsPackedArray(JSContext *cx, unsigned argc, Value *vp) JSObject *obj = &args[0].toObject(); bool isPacked = obj->is() && !obj->hasLazyType() && !obj->type()->hasAllFlags(types::OBJECT_FLAG_NON_PACKED) && - obj->getDenseInitializedLength() == obj->as().length(); + obj->as().getDenseInitializedLength() == + obj->as().length(); args.rval().setBoolean(isPacked); return true; @@ -1072,10 +1074,13 @@ JSRuntime::initSelfHosting(JSContext *cx) JS::CompartmentOptions compartmentOptions; compartmentOptions.setDiscardSource(true); - if (!(selfHostingGlobal_ = JS_NewGlobalObject(cx, &self_hosting_global_class, - nullptr, JS::DontFireOnNewGlobalHook, - compartmentOptions))) + if (!(selfHostingGlobal_ = MaybeNativeObject(JS_NewGlobalObject(cx, &self_hosting_global_class, + nullptr, JS::DontFireOnNewGlobalHook, + compartmentOptions)))) + { return false; + } + JSAutoCompartment ac(cx, selfHostingGlobal_); Rooted shg(cx, &selfHostingGlobal_->as()); selfHostingGlobal_->compartment()->isSelfHosting = true; @@ -1144,7 +1149,8 @@ static bool CloneValue(JSContext *cx, HandleValue selfHostedValue, MutableHandleValue vp); static bool -GetUnclonedValue(JSContext *cx, HandleObject selfHostedObject, HandleId id, MutableHandleValue vp) +GetUnclonedValue(JSContext *cx, HandleNativeObject selfHostedObject, + HandleId id, MutableHandleValue vp) { vp.setUndefined(); @@ -1169,7 +1175,7 @@ GetUnclonedValue(JSContext *cx, HandleObject selfHostedObject, HandleId id, Muta JSDVG_IGNORE_STACK, value, NullPtr(), nullptr, nullptr); } - RootedShape shape(cx, selfHostedObject->nativeLookupPure(id)); + RootedShape shape(cx, selfHostedObject->lookupPure(id)); if (!shape) { RootedValue value(cx, IdToValue(id)); return js_ReportValueErrorFlags(cx, JSREPORT_ERROR, JSMSG_NO_SUCH_SELF_HOSTED_PROP, @@ -1182,7 +1188,7 @@ GetUnclonedValue(JSContext *cx, HandleObject selfHostedObject, HandleId id, Muta } static bool -CloneProperties(JSContext *cx, HandleObject selfHostedObject, HandleObject clone) +CloneProperties(JSContext *cx, HandleNativeObject selfHostedObject, HandleObject clone) { AutoIdVector ids(cx); @@ -1241,7 +1247,7 @@ CloneString(JSContext *cx, JSFlatString *selfHostedString) } static JSObject * -CloneObject(JSContext *cx, HandleObject selfHostedObject) +CloneObject(JSContext *cx, HandleNativeObject selfHostedObject) { AutoCycleDetector detect(cx, selfHostedObject); if (!detect.init()) @@ -1303,7 +1309,7 @@ static bool CloneValue(JSContext *cx, HandleValue selfHostedValue, MutableHandleValue vp) { if (selfHostedValue.isObject()) { - RootedObject selfHostedObject(cx, &selfHostedValue.toObject()); + RootedNativeObject selfHostedObject(cx, &selfHostedValue.toObject().as()); JSObject *clone = CloneObject(cx, selfHostedObject); if (!clone) return false; @@ -1331,7 +1337,7 @@ JSRuntime::cloneSelfHostedFunctionScript(JSContext *cx, HandlePropertyName name, { RootedId id(cx, NameToId(name)); RootedValue funVal(cx); - if (!GetUnclonedValue(cx, HandleObject::fromMarkedLocation(&selfHostingGlobal_), id, &funVal)) + if (!GetUnclonedValue(cx, HandleNativeObject::fromMarkedLocation(&selfHostingGlobal_), id, &funVal)) return false; RootedFunction sourceFun(cx, &funVal.toObject().as()); @@ -1361,7 +1367,7 @@ JSRuntime::cloneSelfHostedValue(JSContext *cx, HandlePropertyName name, MutableH { RootedId id(cx, NameToId(name)); RootedValue selfHostedValue(cx); - if (!GetUnclonedValue(cx, HandleObject::fromMarkedLocation(&selfHostingGlobal_), id, &selfHostedValue)) + if (!GetUnclonedValue(cx, HandleNativeObject::fromMarkedLocation(&selfHostingGlobal_), id, &selfHostedValue)) return false; /* diff --git a/js/src/vm/Shape-inl.h b/js/src/vm/Shape-inl.h index 3ba80c17f04..3920a00f357 100644 --- a/js/src/vm/Shape-inl.h +++ b/js/src/vm/Shape-inl.h @@ -162,14 +162,14 @@ EmptyShape::ensureInitialCustomShape(ExclusiveContext *cx, HandlenativeEmpty()) + if (!obj->empty()) return true; // If no initial shape was assigned, do so. RootedShape shape(cx, ObjectSubclass::assignInitialShape(cx, obj)); if (!shape) return false; - MOZ_ASSERT(!obj->nativeEmpty()); + MOZ_ASSERT(!obj->empty()); // If the object is a standard prototype -- |RegExp.prototype|, // |String.prototype|, |RangeError.prototype|, &c. -- GlobalObject.cpp's diff --git a/js/src/vm/Shape.cpp b/js/src/vm/Shape.cpp index bb75d2f4e4a..78e3df14783 100644 --- a/js/src/vm/Shape.cpp +++ b/js/src/vm/Shape.cpp @@ -69,7 +69,7 @@ ShapeTable::init(ThreadSafeContext *cx, Shape *lastProp) } void -Shape::removeFromDictionary(ObjectImpl *obj) +Shape::removeFromDictionary(NativeObject *obj) { MOZ_ASSERT(inDictionary()); MOZ_ASSERT(obj->inDictionaryMode()); @@ -346,8 +346,8 @@ Shape::replaceLastProperty(ExclusiveContext *cx, StackBaseShape &base, * one of lastProperty() or lastProperty()->parent. */ /* static */ Shape * -JSObject::getChildPropertyOnDictionary(ThreadSafeContext *cx, JS::HandleObject obj, - HandleShape parent, js::StackShape &child) +NativeObject::getChildPropertyOnDictionary(ThreadSafeContext *cx, HandleNativeObject obj, + HandleShape parent, StackShape &child) { /* * Shared properties have no slot, but slot_ will reflect that of parent. @@ -389,7 +389,7 @@ JSObject::getChildPropertyOnDictionary(ThreadSafeContext *cx, JS::HandleObject o if (!shape) return nullptr; if (childRoot->hasSlot() && childRoot->slot() >= obj->lastProperty()->base()->slotSpan()) { - if (!JSObject::setSlotSpan(cx, obj, childRoot->slot() + 1)) + if (!setSlotSpan(cx, obj, childRoot->slot() + 1)) return nullptr; } shape->initDictionaryShape(*childRoot, obj->numFixedSlots(), &obj->shape_); @@ -399,8 +399,8 @@ JSObject::getChildPropertyOnDictionary(ThreadSafeContext *cx, JS::HandleObject o } /* static */ Shape * -JSObject::getChildProperty(ExclusiveContext *cx, - HandleObject obj, HandleShape parent, StackShape &unrootedChild) +NativeObject::getChildProperty(ExclusiveContext *cx, + HandleNativeObject obj, HandleShape parent, StackShape &unrootedChild) { RootedGeneric child(cx, &unrootedChild); RootedShape shape(cx, getChildPropertyOnDictionary(cx, obj, parent, *child)); @@ -411,7 +411,7 @@ JSObject::getChildProperty(ExclusiveContext *cx, return nullptr; //MOZ_ASSERT(shape->parent == parent); //MOZ_ASSERT_IF(parent != lastProperty(), parent == lastProperty()->parent); - if (!JSObject::setLastProperty(cx, obj, shape)) + if (!setLastProperty(cx, obj, shape)) return nullptr; } @@ -419,8 +419,8 @@ JSObject::getChildProperty(ExclusiveContext *cx, } /* static */ Shape * -JSObject::lookupChildProperty(ThreadSafeContext *cx, - HandleObject obj, HandleShape parent, StackShape &unrootedChild) +NativeObject::lookupChildProperty(ThreadSafeContext *cx, + HandleNativeObject obj, HandleShape parent, StackShape &unrootedChild) { RootedGeneric child(cx, &unrootedChild); MOZ_ASSERT(cx->isThreadLocal(obj)); @@ -431,7 +431,7 @@ JSObject::lookupChildProperty(ThreadSafeContext *cx, shape = cx->compartment_->propertyTree.lookupChild(cx, parent, *child); if (!shape) return nullptr; - if (!JSObject::setLastProperty(cx, obj, shape)) + if (!setLastProperty(cx, obj, shape)) return nullptr; } @@ -439,7 +439,7 @@ JSObject::lookupChildProperty(ThreadSafeContext *cx, } bool -js::ObjectImpl::toDictionaryMode(ThreadSafeContext *cx) +js::NativeObject::toDictionaryMode(ThreadSafeContext *cx) { MOZ_ASSERT(!inDictionaryMode()); @@ -460,7 +460,7 @@ js::ObjectImpl::toDictionaryMode(ThreadSafeContext *cx) uint32_t span = slotSpan(); - Rooted self(cx, this); + Rooted self(cx, this); /* * Clone the shapes into a new dictionary list. Don't update the @@ -531,10 +531,10 @@ NormalizeGetterAndSetter(JSObject *obj, } /* static */ Shape * -JSObject::addProperty(ExclusiveContext *cx, HandleObject obj, HandleId id, - PropertyOp getter, StrictPropertyOp setter, - uint32_t slot, unsigned attrs, - unsigned flags, bool allowDictionary) +NativeObject::addProperty(ExclusiveContext *cx, HandleNativeObject obj, HandleId id, + PropertyOp getter, StrictPropertyOp setter, + uint32_t slot, unsigned attrs, + unsigned flags, bool allowDictionary) { MOZ_ASSERT(!JSID_IS_VOID(id)); @@ -581,12 +581,12 @@ GetOrLookupUnownedBaseShape(typename ExecutionModeTraits::ExclusiveContext template /* static */ Shape * -JSObject::addPropertyInternal(typename ExecutionModeTraits::ExclusiveContextType cx, - HandleObject obj, HandleId id, - PropertyOp getter, StrictPropertyOp setter, - uint32_t slot, unsigned attrs, - unsigned flags, Shape **spp, - bool allowDictionary) +NativeObject::addPropertyInternal(typename ExecutionModeTraits::ExclusiveContextType cx, + HandleNativeObject obj, HandleId id, + PropertyOp getter, StrictPropertyOp setter, + uint32_t slot, unsigned attrs, + unsigned flags, Shape **spp, + bool allowDictionary) { MOZ_ASSERT(cx->isThreadLocal(obj)); MOZ_ASSERT_IF(!allowDictionary, !obj->inDictionaryMode()); @@ -672,25 +672,25 @@ JSObject::addPropertyInternal(typename ExecutionModeTraits::ExclusiveConte } template /* static */ Shape * -JSObject::addPropertyInternal(ExclusiveContext *cx, - HandleObject obj, HandleId id, - PropertyOp getter, StrictPropertyOp setter, - uint32_t slot, unsigned attrs, - unsigned flags, Shape **spp, - bool allowDictionary); +NativeObject::addPropertyInternal(ExclusiveContext *cx, + HandleNativeObject obj, HandleId id, + PropertyOp getter, StrictPropertyOp setter, + uint32_t slot, unsigned attrs, + unsigned flags, Shape **spp, + bool allowDictionary); template /* static */ Shape * -JSObject::addPropertyInternal(ForkJoinContext *cx, - HandleObject obj, HandleId id, - PropertyOp getter, StrictPropertyOp setter, - uint32_t slot, unsigned attrs, - unsigned flags, Shape **spp, - bool allowDictionary); +NativeObject::addPropertyInternal(ForkJoinContext *cx, + HandleNativeObject obj, HandleId id, + PropertyOp getter, StrictPropertyOp setter, + uint32_t slot, unsigned attrs, + unsigned flags, Shape **spp, + bool allowDictionary); JSObject * js::NewReshapedObject(JSContext *cx, HandleTypeObject type, JSObject *parent, gc::AllocKind allocKind, HandleShape shape, NewObjectKind newKind) { - RootedObject res(cx, NewObjectWithType(cx, type, parent, allocKind, newKind)); + RootedNativeObject res(cx, NewNativeObjectWithType(cx, type, parent, allocKind, newKind)); if (!res) return nullptr; @@ -716,7 +716,7 @@ js::NewReshapedObject(JSContext *cx, HandleTypeObject type, JSObject *parent, RootedShape newShape(cx, res->lastProperty()); for (unsigned i = 0; i < ids.length(); i++) { id = ids[i]; - MOZ_ASSERT(!res->nativeContains(cx, id)); + MOZ_ASSERT(!res->contains(cx, id)); uint32_t index; bool indexed = js_IdIsIndex(id, &index); @@ -734,7 +734,7 @@ js::NewReshapedObject(JSContext *cx, HandleTypeObject type, JSObject *parent, newShape = cx->compartment()->propertyTree.getChild(cx, newShape, child); if (!newShape) return nullptr; - if (!JSObject::setLastProperty(cx, res, newShape)) + if (!NativeObject::setLastProperty(cx, res, newShape)) return nullptr; } @@ -769,10 +769,10 @@ CheckCanChangeAttrs(ThreadSafeContext *cx, JSObject *obj, Shape *shape, unsigned template /* static */ Shape * -JSObject::putProperty(typename ExecutionModeTraits::ExclusiveContextType cx, - HandleObject obj, HandleId id, - PropertyOp getter, StrictPropertyOp setter, - uint32_t slot, unsigned attrs, unsigned flags) +NativeObject::putProperty(typename ExecutionModeTraits::ExclusiveContextType cx, + HandleNativeObject obj, HandleId id, + PropertyOp getter, StrictPropertyOp setter, + uint32_t slot, unsigned attrs, unsigned flags) { MOZ_ASSERT(cx->isThreadLocal(obj)); MOZ_ASSERT(!JSID_IS_VOID(id)); @@ -959,26 +959,26 @@ JSObject::putProperty(typename ExecutionModeTraits::ExclusiveContextType c } template /* static */ Shape * -JSObject::putProperty(ExclusiveContext *cx, - HandleObject obj, HandleId id, - PropertyOp getter, StrictPropertyOp setter, - uint32_t slot, unsigned attrs, - unsigned flags); +NativeObject::putProperty(ExclusiveContext *cx, + HandleNativeObject obj, HandleId id, + PropertyOp getter, StrictPropertyOp setter, + uint32_t slot, unsigned attrs, + unsigned flags); template /* static */ Shape * -JSObject::putProperty(ForkJoinContext *cx, - HandleObject obj, HandleId id, - PropertyOp getter, StrictPropertyOp setter, - uint32_t slot, unsigned attrs, - unsigned flags); +NativeObject::putProperty(ForkJoinContext *cx, + HandleNativeObject obj, HandleId id, + PropertyOp getter, StrictPropertyOp setter, + uint32_t slot, unsigned attrs, + unsigned flags); template /* static */ Shape * -JSObject::changeProperty(typename ExecutionModeTraits::ExclusiveContextType cx, - HandleObject obj, HandleShape shape, unsigned attrs, - unsigned mask, PropertyOp getter, StrictPropertyOp setter) +NativeObject::changeProperty(typename ExecutionModeTraits::ExclusiveContextType cx, + HandleNativeObject obj, HandleShape shape, unsigned attrs, + unsigned mask, PropertyOp getter, StrictPropertyOp setter) { MOZ_ASSERT(cx->isThreadLocal(obj)); - MOZ_ASSERT(obj->nativeContainsPure(shape)); + MOZ_ASSERT(obj->containsPure(shape)); attrs |= shape->attrs & mask; MOZ_ASSERT_IF(attrs & (JSPROP_GETTER | JSPROP_SETTER), attrs & JSPROP_SHARED); @@ -1020,21 +1020,21 @@ JSObject::changeProperty(typename ExecutionModeTraits::ExclusiveContextTyp } template /* static */ Shape * -JSObject::changeProperty(ExclusiveContext *cx, - HandleObject obj, HandleShape shape, - unsigned attrs, unsigned mask, - PropertyOp getter, StrictPropertyOp setter); +NativeObject::changeProperty(ExclusiveContext *cx, + HandleNativeObject obj, HandleShape shape, + unsigned attrs, unsigned mask, + PropertyOp getter, StrictPropertyOp setter); template /* static */ Shape * -JSObject::changeProperty(ForkJoinContext *cx, - HandleObject obj, HandleShape shape, - unsigned attrs, unsigned mask, - PropertyOp getter, StrictPropertyOp setter); +NativeObject::changeProperty(ForkJoinContext *cx, + HandleNativeObject obj, HandleShape shape, + unsigned attrs, unsigned mask, + PropertyOp getter, StrictPropertyOp setter); bool -JSObject::removeProperty(ExclusiveContext *cx, jsid id_) +NativeObject::removeProperty(ExclusiveContext *cx, jsid id_) { RootedId id(cx, id_); - RootedObject self(cx, this); + RootedNativeObject self(cx, this); Shape **spp; RootedShape shape(cx, Shape::search(cx, lastProperty(), id, &spp)); @@ -1113,7 +1113,7 @@ JSObject::removeProperty(ExclusiveContext *cx, jsid id_) */ Shape *aprop = self->lastProperty(); for (int n = 50; --n >= 0 && aprop->parent; aprop = aprop->parent) - MOZ_ASSERT_IF(aprop != shape, self->nativeContains(cx, aprop)); + MOZ_ASSERT_IF(aprop != shape, self->contains(cx, aprop)); #endif } @@ -1149,7 +1149,7 @@ JSObject::removeProperty(ExclusiveContext *cx, jsid id_) } /* static */ void -JSObject::clear(JSContext *cx, HandleObject obj) +NativeObject::clear(JSContext *cx, HandleNativeObject obj) { RootedShape shape(cx, obj->lastProperty()); MOZ_ASSERT(obj->inDictionaryMode() == shape->inDictionary()); @@ -1163,14 +1163,14 @@ JSObject::clear(JSContext *cx, HandleObject obj) if (obj->inDictionaryMode()) shape->listp = &obj->shape_; - JS_ALWAYS_TRUE(JSObject::setLastProperty(cx, obj, shape)); + JS_ALWAYS_TRUE(setLastProperty(cx, obj, shape)); ++cx->runtime()->propertyRemovals; obj->checkShapeConsistency(); } /* static */ bool -JSObject::rollbackProperties(ExclusiveContext *cx, HandleObject obj, uint32_t slotSpan) +NativeObject::rollbackProperties(ExclusiveContext *cx, HandleNativeObject obj, uint32_t slotSpan) { /* * Remove properties from this object until it has a matching slot span. @@ -1196,7 +1196,7 @@ JSObject::rollbackProperties(ExclusiveContext *cx, HandleObject obj, uint32_t sl } Shape * -ObjectImpl::replaceWithNewEquivalentShape(ThreadSafeContext *cx, Shape *oldShape, Shape *newShape) +NativeObject::replaceWithNewEquivalentShape(ThreadSafeContext *cx, Shape *oldShape, Shape *newShape) { MOZ_ASSERT(cx->isThreadLocal(this)); MOZ_ASSERT(cx->isThreadLocal(oldShape)); @@ -1204,13 +1204,13 @@ ObjectImpl::replaceWithNewEquivalentShape(ThreadSafeContext *cx, Shape *oldShape MOZ_ASSERT_IF(oldShape != lastProperty(), inDictionaryMode() && ((cx->isExclusiveContext() - ? nativeLookup(cx->asExclusiveContext(), oldShape->propidRef()) - : nativeLookupPure(oldShape->propidRef())) == oldShape)); + ? lookup(cx->asExclusiveContext(), oldShape->propidRef()) + : lookupPure(oldShape->propidRef())) == oldShape)); - ObjectImpl *self = this; + NativeObject *self = this; if (!inDictionaryMode()) { - Rooted selfRoot(cx, self); + RootedNativeObject selfRoot(cx, self); RootedShape newRoot(cx, newShape); if (!toDictionaryMode(cx)) return nullptr; @@ -1220,7 +1220,7 @@ ObjectImpl::replaceWithNewEquivalentShape(ThreadSafeContext *cx, Shape *oldShape } if (!newShape) { - Rooted selfRoot(cx, self); + RootedNativeObject selfRoot(cx, self); RootedShape oldRoot(cx, oldShape); newShape = js_NewGCShape(cx); if (!newShape) @@ -1254,7 +1254,7 @@ ObjectImpl::replaceWithNewEquivalentShape(ThreadSafeContext *cx, Shape *oldShape } bool -JSObject::shadowingShapeChange(ExclusiveContext *cx, const Shape &shape) +NativeObject::shadowingShapeChange(ExclusiveContext *cx, const Shape &shape) { return generateOwnShape(cx); } @@ -1271,7 +1271,7 @@ JSObject::setParent(JSContext *cx, HandleObject obj, HandleObject parent) if (parent && !parent->setDelegate(cx)) return false; - if (obj->inDictionaryMode()) { + if (obj->isNative() && obj->as().inDictionaryMode()) { StackBaseShape base(obj->lastProperty()); base.parent = parent; UnownedBaseShape *nbase = BaseShape::getUnowned(cx, base); @@ -1306,7 +1306,7 @@ Shape::setObjectParent(ExclusiveContext *cx, JSObject *parent, TaggedProto proto /* static */ bool JSObject::setMetadata(JSContext *cx, HandleObject obj, HandleObject metadata) { - if (obj->inDictionaryMode()) { + if (obj->isNative() && obj->as().inDictionaryMode()) { StackBaseShape base(obj->lastProperty()); base.metadata = metadata; UnownedBaseShape *nbase = BaseShape::getUnowned(cx, base); @@ -1339,24 +1339,20 @@ Shape::setObjectMetadata(JSContext *cx, JSObject *metadata, TaggedProto proto, S } /* static */ bool -js::ObjectImpl::preventExtensions(JSContext *cx, Handle obj) +JSObject::preventExtensions(JSContext *cx, HandleObject obj) { - if (Downcast(obj)->is()) { - RootedObject object(cx, obj->asObjectPtr()); - return js::Proxy::preventExtensions(cx, object); - } + if (obj->is()) + return js::Proxy::preventExtensions(cx, obj); if (!obj->nonProxyIsExtensible()) return true; - RootedObject self(cx, obj->asObjectPtr()); - /* * Force lazy properties to be resolved by iterating over the objects' own * properties. */ AutoIdVector props(cx); - if (!js::GetPropertyNames(cx, self, JSITER_HIDDEN | JSITER_OWNONLY, &props)) + if (!js::GetPropertyNames(cx, obj, JSITER_HIDDEN | JSITER_OWNONLY, &props)) return false; /* @@ -1365,25 +1361,25 @@ js::ObjectImpl::preventExtensions(JSContext *cx, Handle obj) * new dense elements can be added without calling growElements(), which * checks isExtensible(). */ - if (self->isNative() && !JSObject::sparsifyDenseElements(cx, self)) + if (obj->isNative() && !NativeObject::sparsifyDenseElements(cx, obj.as())) return false; - return self->setFlag(cx, BaseShape::NOT_EXTENSIBLE, GENERATE_SHAPE); + return obj->setFlag(cx, BaseShape::NOT_EXTENSIBLE, GENERATE_SHAPE); } bool -js::ObjectImpl::setFlag(ExclusiveContext *cx, /*BaseShape::Flag*/ uint32_t flag_, - GenerateShape generateShape) +JSObject::setFlag(ExclusiveContext *cx, /*BaseShape::Flag*/ uint32_t flag_, + GenerateShape generateShape) { BaseShape::Flag flag = (BaseShape::Flag) flag_; if (lastProperty()->getObjectFlags() & flag) return true; - Rooted self(cx, this); + RootedObject self(cx, this); - if (inDictionaryMode()) { - if (generateShape == GENERATE_SHAPE && !generateOwnShape(cx)) + if (isNative() && as().inDictionaryMode()) { + if (generateShape == GENERATE_SHAPE && !as().generateOwnShape(cx)) return false; StackBaseShape base(self->lastProperty()); base.flags |= flag; @@ -1405,12 +1401,12 @@ js::ObjectImpl::setFlag(ExclusiveContext *cx, /*BaseShape::Flag*/ uint32_t flag_ } bool -js::ObjectImpl::clearFlag(ExclusiveContext *cx, /*BaseShape::Flag*/ uint32_t flag) +NativeObject::clearFlag(ExclusiveContext *cx, BaseShape::Flag flag) { MOZ_ASSERT(inDictionaryMode()); MOZ_ASSERT(lastProperty()->getObjectFlags() & flag); - RootedObject self(cx, this->asObjectPtr()); + RootedObject self(cx, this); StackBaseShape base(self->lastProperty()); base.flags &= ~flag; diff --git a/js/src/vm/Shape.h b/js/src/vm/Shape.h index a48af2a5bff..969e06cea1e 100644 --- a/js/src/vm/Shape.h +++ b/js/src/vm/Shape.h @@ -110,7 +110,6 @@ namespace js { class Bindings; class Debugger; class Nursery; -class ObjectImpl; class StaticBlockObject; namespace gc { @@ -638,7 +637,7 @@ class Shape : public gc::TenuredCell friend class js::Bindings; friend class js::Nursery; friend class js::gc::ForkJoinNursery; - friend class js::ObjectImpl; + friend class js::NativeObject; friend class js::PropertyTree; friend class js::StaticBlockObject; friend struct js::StackShape; @@ -696,7 +695,7 @@ class Shape : public gc::TenuredCell Shape ***pspp, bool adding = false); static inline Shape *searchNoHashify(Shape *start, jsid id); - void removeFromDictionary(ObjectImpl *obj); + void removeFromDictionary(NativeObject *obj); void insertIntoDictionary(HeapPtrShape *dictp); void initDictionaryShape(const StackShape &child, uint32_t nfixed, HeapPtrShape *dictp) { diff --git a/js/src/vm/SharedArrayObject.cpp b/js/src/vm/SharedArrayObject.cpp index aae016e6e80..01818b21d88 100644 --- a/js/src/vm/SharedArrayObject.cpp +++ b/js/src/vm/SharedArrayObject.cpp @@ -326,7 +326,7 @@ js_InitSharedArrayBufferClass(JSContext *cx, HandleObject obj) { MOZ_ASSERT(obj->isNative()); Rooted global(cx, &obj->as()); - RootedObject proto(cx, global->createBlankPrototype(cx, &SharedArrayBufferObject::protoClass)); + RootedNativeObject proto(cx, global->createBlankPrototype(cx, &SharedArrayBufferObject::protoClass)); if (!proto) return nullptr; diff --git a/js/src/vm/SharedTypedArrayObject.cpp b/js/src/vm/SharedTypedArrayObject.cpp index c39de5b1843..1c9169b6bea 100644 --- a/js/src/vm/SharedTypedArrayObject.cpp +++ b/js/src/vm/SharedTypedArrayObject.cpp @@ -344,7 +344,7 @@ class SharedTypedArrayObjectTemplate : public SharedTypedArrayObject // Define an accessor for a read-only property that invokes a native getter static bool - DefineGetter(JSContext *cx, HandleObject proto, PropertyName *name, Native native) + DefineGetter(JSContext *cx, HandleNativeObject proto, PropertyName *name, Native native) { RootedId id(cx, NameToId(name)); unsigned attrs = JSPROP_SHARED | JSPROP_GETTER; diff --git a/js/src/vm/SharedTypedArrayObject.h b/js/src/vm/SharedTypedArrayObject.h index 8de7e957f1d..f2ed14690a3 100644 --- a/js/src/vm/SharedTypedArrayObject.h +++ b/js/src/vm/SharedTypedArrayObject.h @@ -24,7 +24,7 @@ namespace js { // same as the representation of a TypedArrayObject, see comments in // TypedArrayObject.h. -class SharedTypedArrayObject : public JSObject +class SharedTypedArrayObject : public NativeObject { protected: static const size_t BUFFER_SLOT = TypedArrayLayout::BUFFER_SLOT; diff --git a/js/src/vm/Stack.cpp b/js/src/vm/Stack.cpp index 242b42ee035..1aacbde495b 100644 --- a/js/src/vm/Stack.cpp +++ b/js/src/vm/Stack.cpp @@ -169,7 +169,7 @@ InterpreterFrame::createRestParameter(JSContext *cx) unsigned nformal = fun()->nargs() - 1, nactual = numActualArgs(); unsigned nrest = (nactual > nformal) ? nactual - nformal : 0; Value *restvp = argv() + nformal; - JSObject *obj = NewDenseCopiedArray(cx, nrest, restvp, nullptr); + ArrayObject *obj = NewDenseCopiedArray(cx, nrest, restvp, nullptr); if (!obj) return nullptr; types::FixRestArgumentsType(cx, obj); diff --git a/js/src/vm/StringObject-inl.h b/js/src/vm/StringObject-inl.h index baa1a7ef4a9..ac583797d6d 100644 --- a/js/src/vm/StringObject-inl.h +++ b/js/src/vm/StringObject-inl.h @@ -25,7 +25,7 @@ StringObject::init(JSContext *cx, HandleString str) if (!EmptyShape::ensureInitialCustomShape(cx, self)) return false; - MOZ_ASSERT(self->nativeLookup(cx, NameToId(cx->names().length))->slot() == LENGTH_SLOT); + MOZ_ASSERT(self->lookup(cx, NameToId(cx->names().length))->slot() == LENGTH_SLOT); self->setStringThis(str); diff --git a/js/src/vm/StringObject.h b/js/src/vm/StringObject.h index 2932a001757..959173999ca 100644 --- a/js/src/vm/StringObject.h +++ b/js/src/vm/StringObject.h @@ -14,7 +14,7 @@ namespace js { -class StringObject : public JSObject +class StringObject : public NativeObject { static const unsigned PRIMITIVE_VALUE_SLOT = 0; static const unsigned LENGTH_SLOT = 1; diff --git a/js/src/vm/TypedArrayCommon.h b/js/src/vm/TypedArrayCommon.h index f68e3554e90..aca75addb52 100644 --- a/js/src/vm/TypedArrayCommon.h +++ b/js/src/vm/TypedArrayCommon.h @@ -67,27 +67,13 @@ template<> inline Scalar::Type TypeIDOfType() { return Scalar::Float64; template<> inline Scalar::Type TypeIDOfType() { return Scalar::Uint8Clamped; } inline bool -IsAnyTypedArray(HandleObject obj) -{ - return obj->is() || obj->is(); -} - -inline bool -IsAnyTypedArray(const JSObject *obj) +IsAnyTypedArray(JSObject *obj) { return obj->is() || obj->is(); } inline uint32_t -AnyTypedArrayLength(HandleObject obj) -{ - if (obj->is()) - return obj->as().length(); - return obj->as().length(); -} - -inline uint32_t -AnyTypedArrayLength(const JSObject *obj) +AnyTypedArrayLength(JSObject *obj) { if (obj->is()) return obj->as().length(); @@ -95,15 +81,7 @@ AnyTypedArrayLength(const JSObject *obj) } inline Scalar::Type -AnyTypedArrayType(HandleObject obj) -{ - if (obj->is()) - return obj->as().type(); - return obj->as().type(); -} - -inline Scalar::Type -AnyTypedArrayType(const JSObject *obj) +AnyTypedArrayType(JSObject *obj) { if (obj->is()) return obj->as().type(); @@ -111,15 +89,7 @@ AnyTypedArrayType(const JSObject *obj) } inline Shape* -AnyTypedArrayShape(HandleObject obj) -{ - if (obj->is()) - return obj->as().lastProperty(); - return obj->as().lastProperty(); -} - -inline Shape* -AnyTypedArrayShape(const JSObject *obj) +AnyTypedArrayShape(JSObject *obj) { if (obj->is()) return obj->as().lastProperty(); @@ -266,14 +236,14 @@ class ElementSpecific if (source->isNative()) { // Attempt fast-path infallible conversion of dense elements up to // the first potentially side-effectful lookup or conversion. - uint32_t bound = Min(source->getDenseInitializedLength(), len); + uint32_t bound = Min(source->as().getDenseInitializedLength(), len); T *dest = static_cast(target->viewData()) + offset; MOZ_ASSERT(!canConvertInfallibly(MagicValue(JS_ELEMENTS_HOLE)), "the following loop must abort on holes"); - const Value *srcValues = source->getDenseElements(); + const Value *srcValues = source->as().getDenseElements(); for (; i < bound; i++) { if (!canConvertInfallibly(srcValues[i])) break; diff --git a/js/src/vm/TypedArrayObject.cpp b/js/src/vm/TypedArrayObject.cpp index d244737f60a..58fad334aca 100644 --- a/js/src/vm/TypedArrayObject.cpp +++ b/js/src/vm/TypedArrayObject.cpp @@ -42,6 +42,7 @@ #include "jsinferinlines.h" #include "jsobjinlines.h" +#include "vm/ObjectImpl-inl.h" #include "vm/Shape-inl.h" using namespace js; @@ -80,13 +81,13 @@ TypedArrayLayout::TypedArrayLayout(bool isShared, bool isNeuterable, const Class /* static */ int TypedArrayLayout::lengthOffset() { - return JSObject::getFixedSlotOffset(LENGTH_SLOT); + return NativeObject::getFixedSlotOffset(LENGTH_SLOT); } /* static */ int TypedArrayLayout::dataOffset() { - return JSObject::getPrivateDataOffset(DATA_SLOT); + return NativeObject::getPrivateDataOffset(DATA_SLOT); } void @@ -124,12 +125,13 @@ TypedArrayObject::ensureHasBuffer(JSContext *cx, Handle tarr } /* static */ void -TypedArrayObject::ObjectMoved(JSObject *obj, const JSObject *old) +TypedArrayObject::ObjectMoved(JSObject *dstArg, const JSObject *srcArg) { - const TypedArrayObject &src = old->as(); + const TypedArrayObject &src = srcArg->as(); + TypedArrayObject &dst = dstArg->as(); if (!src.hasBuffer()) { - MOZ_ASSERT(old->getPrivate() == old->fixedData(FIXED_DATA_START)); - obj->setPrivate(obj->fixedData(FIXED_DATA_START)); + MOZ_ASSERT(src.getPrivate() == src.fixedData(FIXED_DATA_START)); + dst.setPrivate(dst.fixedData(FIXED_DATA_START)); } } @@ -1820,7 +1822,7 @@ js_InitArrayBufferClass(JSContext *cx, HandleObject obj) if (global->isStandardClassResolved(JSProto_ArrayBuffer)) return &global->getPrototype(JSProto_ArrayBuffer).toObject(); - RootedObject arrayBufferProto(cx, global->createBlankPrototype(cx, &ArrayBufferObject::protoClass)); + RootedNativeObject arrayBufferProto(cx, global->createBlankPrototype(cx, &ArrayBufferObject::protoClass)); if (!arrayBufferProto) return nullptr; @@ -1936,7 +1938,7 @@ DataViewObject::getter(JSContext *cx, unsigned argc, Value *vp) template bool -DataViewObject::defineGetter(JSContext *cx, PropertyName *name, HandleObject proto) +DataViewObject::defineGetter(JSContext *cx, PropertyName *name, HandleNativeObject proto) { RootedId id(cx, NameToId(name)); unsigned attrs = JSPROP_SHARED | JSPROP_GETTER; @@ -1958,7 +1960,7 @@ DataViewObject::initClass(JSContext *cx) if (global->isStandardClassResolved(JSProto_DataView)) return true; - RootedObject proto(cx, global->createBlankPrototype(cx, &DataViewObject::protoClass)); + RootedNativeObject proto(cx, global->createBlankPrototype(cx, &DataViewObject::protoClass)); if (!proto) return false; diff --git a/js/src/vm/TypedArrayObject.h b/js/src/vm/TypedArrayObject.h index 03999c461da..4e48df2ac9a 100644 --- a/js/src/vm/TypedArrayObject.h +++ b/js/src/vm/TypedArrayObject.h @@ -75,7 +75,7 @@ class TypedArrayLayout "bad inlined constant in jsfriendapi.h"); }; -class TypedArrayObject : public ArrayBufferViewObject +class TypedArrayObject : public NativeObject { public: typedef TypedArrayObject AnyTypedArray; @@ -106,7 +106,7 @@ class TypedArrayObject : public ArrayBufferViewObject // For typed arrays which can store their data inline, the array buffer // object is created lazily. static const uint32_t INLINE_BUFFER_LIMIT = - (JSObject::MAX_FIXED_SLOTS - FIXED_DATA_START) * sizeof(Value); + (NativeObject::MAX_FIXED_SLOTS - FIXED_DATA_START) * sizeof(Value); static gc::AllocKind AllocKindForLazyBuffer(size_t nbytes) @@ -301,7 +301,7 @@ TypedArrayShift(Scalar::Type viewType) MOZ_CRASH("Unexpected array type"); } -class DataViewObject : public ArrayBufferViewObject +class DataViewObject : public NativeObject { static const size_t RESERVED_SLOTS = JS_DATAVIEW_SLOTS; static const size_t DATA_SLOT = JS_DATAVIEW_SLOT_DATA; @@ -327,7 +327,7 @@ class DataViewObject : public ArrayBufferViewObject template static bool - defineGetter(JSContext *cx, PropertyName *name, HandleObject proto); + defineGetter(JSContext *cx, PropertyName *name, HandleNativeObject proto); public: static const Class class_; diff --git a/js/src/vm/UbiNode.cpp b/js/src/vm/UbiNode.cpp index 9b4ca683be1..1b5cbce65bf 100644 --- a/js/src/vm/UbiNode.cpp +++ b/js/src/vm/UbiNode.cpp @@ -87,7 +87,7 @@ Node::exposeToJS() const JSObject &obj = *as(); if (obj.is()) { v.setUndefined(); - } else if (obj.is() && IsInternalFunctionObject(&obj)) { + } else if (obj.is() && js::IsInternalFunctionObject(&obj)) { v.setUndefined(); } else { v.setObject(obj); diff --git a/js/src/vm/WeakMapObject.h b/js/src/vm/WeakMapObject.h index c093590a15b..583ea70169d 100644 --- a/js/src/vm/WeakMapObject.h +++ b/js/src/vm/WeakMapObject.h @@ -21,7 +21,7 @@ class ObjectValueMap : public WeakMap virtual bool findZoneEdges(); }; -class WeakMapObject : public JSObject +class WeakMapObject : public NativeObject { public: static const Class class_; diff --git a/js/src/vm/WrapperObject.h b/js/src/vm/WrapperObject.h index dfb77a45616..7ce402e809f 100644 --- a/js/src/vm/WrapperObject.h +++ b/js/src/vm/WrapperObject.h @@ -32,14 +32,14 @@ template<> inline bool JSObject::is() const { - return IsWrapper(const_cast(this)); + return js::IsWrapper(const_cast(this)); } template<> inline bool JSObject::is() const { - return IsCrossCompartmentWrapper(const_cast(this)); + return js::IsCrossCompartmentWrapper(const_cast(this)); } #endif /* vm_WrapperObject_h */ diff --git a/js/src/vm/Xdr.cpp b/js/src/vm/Xdr.cpp index bd5c2a2c9fd..d20e22b9fbe 100644 --- a/js/src/vm/Xdr.cpp +++ b/js/src/vm/Xdr.cpp @@ -104,7 +104,7 @@ VersionCheck(XDRState *xdr) template bool -XDRState::codeFunction(MutableHandleObject objp) +XDRState::codeFunction(MutableHandleFunction objp) { if (mode == XDR_DECODE) objp.set(nullptr); diff --git a/js/src/vm/Xdr.h b/js/src/vm/Xdr.h index 9bb23153474..0fe0e150ee6 100644 --- a/js/src/vm/Xdr.h +++ b/js/src/vm/Xdr.h @@ -225,7 +225,7 @@ class XDRState { bool codeChars(const JS::Latin1Char *chars, size_t nchars); bool codeChars(char16_t *chars, size_t nchars); - bool codeFunction(JS::MutableHandleObject objp); + bool codeFunction(JS::MutableHandleFunction objp); bool codeScript(MutableHandleScript scriptp); bool codeConstValue(MutableHandleValue vp); }; From 37a17d4882c7d5413e64366e03fd93616c99c2c4 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Thu, 2 Oct 2014 18:01:19 -0700 Subject: [PATCH 031/146] Bug 1074591 (part 1) - Allow JSON collections to be printed on a single line. r=froydnj. --HG-- extra : rebase_source : d576a70d2d1ff83311ab8cce849ec0b5a6b3fa80 --- mfbt/JSONWriter.h | 92 ++++++++++++++++----- mfbt/tests/TestJSONWriter.cpp | 150 ++++++++++++++++++++++++++++++++-- 2 files changed, 215 insertions(+), 27 deletions(-) diff --git a/mfbt/JSONWriter.h b/mfbt/JSONWriter.h index 1aa83043c8c..22c262f53ed 100644 --- a/mfbt/JSONWriter.h +++ b/mfbt/JSONWriter.h @@ -33,6 +33,10 @@ // pretty-printing, which are (a) correctly escaping strings, and (b) adding // appropriate indentation and commas between items. // +// By default, every property is placed on its own line. However, it is +// possible to request that objects and arrays be placed entirely on a single +// line, which can reduce output size significantly in some cases. +// // Strings used (for property names and string property values) are |const // char*| throughout, and can be ASCII or UTF-8. // @@ -54,6 +58,13 @@ // w.StartObjectElement(); // { // w.PointerProperty("ptr", (void*)0x12345678); +// w.StartArrayProperty("single-line array", w.SingleLineStyle); +// { +// w.IntElement(1); +// w.StartObjectElement(); // SingleLineStyle is inherited from +// w.EndObjectElement(); // above for this collection +// } +// w.EndArray(); // } // w.EndObjectElement(); // } @@ -71,7 +82,8 @@ // "array": [ // 3.4, // { -// "ptr": "0x12345678" +// "ptr": "0x12345678", +// "single-line array": [1, {}] // } // ] // } @@ -217,8 +229,21 @@ class JSONWriter } }; +public: + // Collections (objects and arrays) are printed in a multi-line style by + // default. This can be changed to a single-line style if SingleLineStyle is + // specified. If a collection is printed in single-line style, every nested + // collection within it is also printed in single-line style, even if + // multi-line style is requested. + enum CollectionStyle { + MultiLineStyle, // the default + SingleLineStyle + }; + +private: const UniquePtr mWriter; Vector mNeedComma; // do we need a comma at depth N? + Vector mNeedNewlines; // do we need newlines at depth N? size_t mDepth; // the current nesting depth void Indent() @@ -236,10 +261,12 @@ class JSONWriter if (mNeedComma[mDepth]) { mWriter->Write(","); } - if (mDepth > 0) { + if (mDepth > 0 && mNeedNewlines[mDepth]) { mWriter->Write("\n"); + Indent(); + } else if (mNeedComma[mDepth]) { + mWriter->Write(" "); } - Indent(); } void PropertyNameAndColon(const char* aName) @@ -272,15 +299,18 @@ class JSONWriter mNeedComma[mDepth] = true; } - void NewCommaEntry() + void NewVectorEntries() { - // If this tiny allocation OOMs we might as well just crash because we must - // be in serious memory trouble. + // If these tiny allocations OOM we might as well just crash because we + // must be in serious memory trouble. MOZ_RELEASE_ASSERT(mNeedComma.growByUninitialized(1)); + MOZ_RELEASE_ASSERT(mNeedNewlines.growByUninitialized(1)); mNeedComma[mDepth] = false; + mNeedNewlines[mDepth] = true; } - void StartCollection(const char* aMaybePropertyName, const char* aStartChar) + void StartCollection(const char* aMaybePropertyName, const char* aStartChar, + CollectionStyle aStyle = MultiLineStyle) { Separator(); if (aMaybePropertyName) { @@ -291,15 +321,21 @@ class JSONWriter mWriter->Write(aStartChar); mNeedComma[mDepth] = true; mDepth++; - NewCommaEntry(); + NewVectorEntries(); + mNeedNewlines[mDepth] = + mNeedNewlines[mDepth - 1] && aStyle == MultiLineStyle; } // Adds the whitespace and closing char necessary to end a collection. void EndCollection(const char* aEndChar) { - mDepth--; - mWriter->Write("\n"); - Indent(); + if (mNeedNewlines[mDepth]) { + mWriter->Write("\n"); + mDepth--; + Indent(); + } else { + mDepth--; + } mWriter->Write(aEndChar); } @@ -307,9 +343,10 @@ public: explicit JSONWriter(UniquePtr aWriter) : mWriter(Move(aWriter)) , mNeedComma() + , mNeedNewlines() , mDepth(0) { - NewCommaEntry(); + NewVectorEntries(); } // Returns the JSONWriteFunc passed in at creation, for temporary use. The @@ -317,15 +354,18 @@ public: JSONWriteFunc* WriteFunc() const { return mWriter.get(); } // For all the following functions, the "Prints:" comment indicates what the - // basic output looks like. However, it doesn't indicate the indentation and + // basic output looks like. However, it doesn't indicate the whitespace and // trailing commas, which are automatically added as required. // // All property names and string properties are escaped as necessary. // Prints: { - void Start() { StartCollection(nullptr, "{"); } + void Start(CollectionStyle aStyle = MultiLineStyle) + { + StartCollection(nullptr, "{", aStyle); + } - // Prints: }\n + // Prints: } void End() { EndCollection("}\n"); } // Prints: "": null @@ -396,19 +436,33 @@ public: void PointerElement(const void* aPtr) { PointerProperty(nullptr, aPtr); } // Prints: "": [ - void StartArrayProperty(const char* aName) { StartCollection(aName, "["); } + void StartArrayProperty(const char* aName, + CollectionStyle aStyle = MultiLineStyle) + { + StartCollection(aName, "[", aStyle); + } // Prints: [ - void StartArrayElement() { StartArrayProperty(nullptr); } + void StartArrayElement(CollectionStyle aStyle = MultiLineStyle) + { + StartArrayProperty(nullptr, aStyle); + } // Prints: ] void EndArray() { EndCollection("]"); } // Prints: "": { - void StartObjectProperty(const char* aName) { StartCollection(aName, "{"); } + void StartObjectProperty(const char* aName, + CollectionStyle aStyle = MultiLineStyle) + { + StartCollection(aName, "{", aStyle); + } // Prints: { - void StartObjectElement() { StartObjectProperty(nullptr); } + void StartObjectElement(CollectionStyle aStyle = MultiLineStyle) + { + StartObjectProperty(nullptr, aStyle); + } // Prints: } void EndObject() { EndCollection("}"); } diff --git a/mfbt/tests/TestJSONWriter.cpp b/mfbt/tests/TestJSONWriter.cpp index 87950a7c904..5bb64f77f22 100644 --- a/mfbt/tests/TestJSONWriter.cpp +++ b/mfbt/tests/TestJSONWriter.cpp @@ -73,20 +73,23 @@ void TestBasicProperties() \"ptr1\": \"0x0\",\n\ \"ptr2\": \"0xdeadbeef\",\n\ \"ptr3\": \"0xfacade\",\n\ - \"len 0 array\": [\n\ + \"len 0 array, multi-line\": [\n\ ],\n\ + \"len 0 array, single-line\": [],\n\ \"len 1 array\": [\n\ 1\n\ ],\n\ - \"len 5 array\": [\n\ + \"len 5 array, multi-line\": [\n\ 1,\n\ 2,\n\ 3,\n\ 4,\n\ 5\n\ ],\n\ - \"len 0 object\": {\n\ + \"len 3 array, single-line\": [1, [{}, 2, []], 3],\n\ + \"len 0 object, multi-line\": {\n\ },\n\ + \"len 0 object, single-line\": {},\n\ \"len 1 object\": {\n\ \"one\": 1\n\ },\n\ @@ -96,7 +99,8 @@ void TestBasicProperties() \"three\": 3,\n\ \"four\": 4,\n\ \"five\": 5\n\ - }\n\ + },\n\ + \"len 3 object, single-line\": {\"a\": 1, \"b\": [{}, 2, []], \"c\": 3}\n\ }\n\ "; @@ -127,7 +131,10 @@ void TestBasicProperties() w.PointerProperty("ptr2", (void*)0xdeadbeef); w.PointerProperty("ptr3", (void*)0xFaCaDe); - w.StartArrayProperty("len 0 array"); + w.StartArrayProperty("len 0 array, multi-line", w.MultiLineStyle); + w.EndArray(); + + w.StartArrayProperty("len 0 array, single-line", w.SingleLineStyle); w.EndArray(); w.StartArrayProperty("len 1 array"); @@ -136,7 +143,7 @@ void TestBasicProperties() } w.EndArray(); - w.StartArrayProperty("len 5 array"); + w.StartArrayProperty("len 5 array, multi-line", w.MultiLineStyle); { w.IntElement(1); w.IntElement(2); @@ -146,7 +153,28 @@ void TestBasicProperties() } w.EndArray(); - w.StartObjectProperty("len 0 object"); + w.StartArrayProperty("len 3 array, single-line", w.SingleLineStyle); + { + w.IntElement(1); + w.StartArrayElement(); + { + w.StartObjectElement(w.SingleLineStyle); + w.EndObject(); + + w.IntElement(2); + + w.StartArrayElement(w.MultiLineStyle); // style overridden from above + w.EndArray(); + } + w.EndArray(); + w.IntElement(3); + } + w.EndArray(); + + w.StartObjectProperty("len 0 object, multi-line"); + w.EndObject(); + + w.StartObjectProperty("len 0 object, single-line", w.SingleLineStyle); w.EndObject(); w.StartObjectProperty("len 1 object"); @@ -164,6 +192,24 @@ void TestBasicProperties() w.IntProperty("five", 5); } w.EndObject(); + + w.StartObjectProperty("len 3 object, single-line", w.SingleLineStyle); + { + w.IntProperty("a", 1); + w.StartArrayProperty("b"); + { + w.StartObjectElement(); + w.EndObject(); + + w.IntElement(2); + + w.StartArrayElement(w.SingleLineStyle); + w.EndArray(); + } + w.EndArray(); + w.IntProperty("c", 3); + } + w.EndObject(); } w.End(); @@ -194,6 +240,7 @@ void TestBasicElements() \"0xfacade\",\n\ [\n\ ],\n\ + [],\n\ [\n\ 1\n\ ],\n\ @@ -204,8 +251,10 @@ void TestBasicElements() 4,\n\ 5\n\ ],\n\ + [1, [{}, 2, []], 3],\n\ {\n\ },\n\ + {},\n\ {\n\ \"one\": 1\n\ },\n\ @@ -215,7 +264,8 @@ void TestBasicElements() \"three\": 3,\n\ \"four\": 4,\n\ \"five\": 5\n\ - }\n\ + },\n\ + {\"a\": 1, \"b\": [{}, 2, []], \"c\": 3}\n\ ]\n\ }\n\ "; @@ -251,6 +301,9 @@ void TestBasicElements() w.StartArrayElement(); w.EndArray(); + w.StartArrayElement(w.SingleLineStyle); + w.EndArray(); + w.StartArrayElement(); { w.IntElement(1); @@ -267,9 +320,30 @@ void TestBasicElements() } w.EndArray(); + w.StartArrayElement(w.SingleLineStyle); + { + w.IntElement(1); + w.StartArrayElement(); + { + w.StartObjectElement(w.SingleLineStyle); + w.EndObject(); + + w.IntElement(2); + + w.StartArrayElement(w.MultiLineStyle); // style overridden from above + w.EndArray(); + } + w.EndArray(); + w.IntElement(3); + } + w.EndArray(); + w.StartObjectElement(); w.EndObject(); + w.StartObjectElement(w.SingleLineStyle); + w.EndObject(); + w.StartObjectElement(); { w.IntProperty("one", 1); @@ -285,6 +359,24 @@ void TestBasicElements() w.IntProperty("five", 5); } w.EndObject(); + + w.StartObjectElement(w.SingleLineStyle); + { + w.IntProperty("a", 1); + w.StartArrayProperty("b"); + { + w.StartObjectElement(); + w.EndObject(); + + w.IntElement(2); + + w.StartArrayElement(w.SingleLineStyle); + w.EndArray(); + } + w.EndArray(); + w.IntProperty("c", 3); + } + w.EndObject(); } w.EndArray(); w.End(); @@ -292,6 +384,47 @@ void TestBasicElements() Check(w.WriteFunc(), expected); } +void TestOneLineObject() +{ + const char* expected = "\ +{\"i\": 1, \"array\": [null, [{}], {\"o\": {}}, \"s\"], \"d\": 3.33}\n\ +"; + + JSONWriter w(MakeUnique()); + + w.Start(w.SingleLineStyle); + + w.IntProperty("i", 1); + + w.StartArrayProperty("array"); + { + w.NullElement(); + + w.StartArrayElement(w.MultiLineStyle); // style overridden from above + { + w.StartObjectElement(); + w.EndObject(); + } + w.EndArray(); + + w.StartObjectElement(); + { + w.StartObjectProperty("o"); + w.EndObject(); + } + w.EndObject(); + + w.StringElement("s"); + } + w.EndArray(); + + w.DoubleProperty("d", 3.33); + + w.End(); + + Check(w.WriteFunc(), expected); +} + void TestStringEscaping() { // This test uses hexadecimal character escapes because UTF8 literals cause @@ -412,6 +545,7 @@ int main(void) { TestBasicProperties(); TestBasicElements(); + TestOneLineObject(); TestStringEscaping(); TestDeepNesting(); From 3d38dbb60e6a7404aba2560e3e98140832561d9e Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Thu, 2 Oct 2014 18:01:32 -0700 Subject: [PATCH 032/146] Bug 1074591 (part 2) - Remove Pointer{Element,Property}() from JSONWriter. r=froydnj. --HG-- extra : rebase_source : db07aa93df880e201eba866cb791ccaa245f0ef8 --- mfbt/JSONWriter.h | 23 ++++------------------- mfbt/tests/TestJSONWriter.cpp | 18 ++---------------- 2 files changed, 6 insertions(+), 35 deletions(-) diff --git a/mfbt/JSONWriter.h b/mfbt/JSONWriter.h index 22c262f53ed..bea29719840 100644 --- a/mfbt/JSONWriter.h +++ b/mfbt/JSONWriter.h @@ -51,13 +51,12 @@ // w.NullProperty("null"); // w.BoolProperty("bool", true); // w.IntProperty("int", 1); -// w.StringProperty("string", "hello"); // w.StartArrayProperty("array"); // { -// w.DoubleElement(3.4); +// w.StringElement("string"); // w.StartObjectElement(); // { -// w.PointerProperty("ptr", (void*)0x12345678); +// w.DoubleProperty("double", 3.4); // w.StartArrayProperty("single-line array", w.SingleLineStyle); // { // w.IntElement(1); @@ -78,11 +77,10 @@ // "null": null, // "bool": true, // "int": 1, -// "string": "hello", // "array": [ -// 3.4, +// "string", // { -// "ptr": "0x12345678", +// "double": 3.4, // "single-line array": [1, {}] // } // ] @@ -422,19 +420,6 @@ public: // Prints: "" void StringElement(const char* aStr) { StringProperty(nullptr, aStr); } - // Prints: "": "" - // The pointer is printed as a hexadecimal integer with a leading '0x'. - void PointerProperty(const char* aName, const void* aPtr) - { - char buf[32]; - sprintf(buf, "0x%" PRIxPTR, uintptr_t(aPtr)); - QuotedScalar(aName, buf); - } - - // Prints: "" - // The pointer is printed as a hexadecimal integer with a leading '0x'. - void PointerElement(const void* aPtr) { PointerProperty(nullptr, aPtr); } - // Prints: "": [ void StartArrayProperty(const char* aName, CollectionStyle aStyle = MultiLineStyle) diff --git a/mfbt/tests/TestJSONWriter.cpp b/mfbt/tests/TestJSONWriter.cpp index 5bb64f77f22..6c081d1705e 100644 --- a/mfbt/tests/TestJSONWriter.cpp +++ b/mfbt/tests/TestJSONWriter.cpp @@ -70,9 +70,6 @@ void TestBasicProperties() \"string2\": \"1234\",\n\ \"string3\": \"hello\",\n\ \"string4\": \"\\\" \\\\ \\u0007 \\b \\t \\n \\u000b \\f \\r\",\n\ - \"ptr1\": \"0x0\",\n\ - \"ptr2\": \"0xdeadbeef\",\n\ - \"ptr3\": \"0xfacade\",\n\ \"len 0 array, multi-line\": [\n\ ],\n\ \"len 0 array, single-line\": [],\n\ @@ -127,10 +124,6 @@ void TestBasicProperties() w.StringProperty("string3", "hello"); w.StringProperty("string4", "\" \\ \a \b \t \n \v \f \r"); - w.PointerProperty("ptr1", (void*)0x0); - w.PointerProperty("ptr2", (void*)0xdeadbeef); - w.PointerProperty("ptr3", (void*)0xFaCaDe); - w.StartArrayProperty("len 0 array, multi-line", w.MultiLineStyle); w.EndArray(); @@ -235,9 +228,6 @@ void TestBasicElements() \"1234\",\n\ \"hello\",\n\ \"\\\" \\\\ \\u0007 \\b \\t \\n \\u000b \\f \\r\",\n\ - \"0x0\",\n\ - \"0xdeadbeef\",\n\ - \"0xfacade\",\n\ [\n\ ],\n\ [],\n\ @@ -294,10 +284,6 @@ void TestBasicElements() w.StringElement("hello"); w.StringElement("\" \\ \a \b \t \n \v \f \r"); - w.PointerElement((void*)0x0); - w.PointerElement((void*)0xdeadbeef); - w.PointerElement((void*)0xFaCaDe); - w.StartArrayElement(); w.EndArray(); @@ -436,7 +422,7 @@ void TestStringEscaping() \"\xD5\xA2\xD5\xA1\xD6\x80\xD5\xA5\xD6\x82 \xD5\xB9\xD5\xAF\xD5\xA1\": -123,\n\ \"\xE4\xBD\xA0\xE5\xA5\xBD\": 1.234,\n\ \"\xCE\xB3\xCE\xB5\xCE\xB9\xCE\xB1 \xCE\xB5\xCE\xBA\xCE\xB5\xCE\xAF\": \"\xD8\xB3\xD9\x84\xD8\xA7\xD9\x85\",\n\ - \"hall\xC3\xB3 \xC3\xBE" "arna\": \"0x1234\",\n\ + \"hall\xC3\xB3 \xC3\xBE" "arna\": 4660,\n\ \"\xE3\x81\x93\xE3\x82\x93\xE3\x81\xAB\xE3\x81\xA1\xE3\x81\xAF\": {\n\ \"\xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82\": [\n\ ]\n\ @@ -462,7 +448,7 @@ void TestStringEscaping() w.IntProperty("\xD5\xA2\xD5\xA1\xD6\x80\xD5\xA5\xD6\x82 \xD5\xB9\xD5\xAF\xD5\xA1", -123); w.DoubleProperty("\xE4\xBD\xA0\xE5\xA5\xBD", 1.234); w.StringProperty("\xCE\xB3\xCE\xB5\xCE\xB9\xCE\xB1 \xCE\xB5\xCE\xBA\xCE\xB5\xCE\xAF", "\xD8\xB3\xD9\x84\xD8\xA7\xD9\x85"); - w.PointerProperty("hall\xC3\xB3 \xC3\xBE" "arna", (void*)0x1234); + w.IntProperty("hall\xC3\xB3 \xC3\xBE" "arna", 0x1234); w.StartObjectProperty("\xE3\x81\x93\xE3\x82\x93\xE3\x81\xAB\xE3\x81\xA1\xE3\x81\xAF"); { w.StartArrayProperty("\xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82"); From bcf57a374684557b8397841989d2247ade13eee2 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Thu, 2 Oct 2014 18:01:41 -0700 Subject: [PATCH 033/146] Bug 1074591 (part 3) - Fix unbounded growth in JSONWriter's vectors(!). r=froydnj. --HG-- extra : rebase_source : 364c0295c0b86e0f29dac3772acadbd0e1ab9d9f --- mfbt/JSONWriter.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mfbt/JSONWriter.h b/mfbt/JSONWriter.h index bea29719840..ff5774fc5f6 100644 --- a/mfbt/JSONWriter.h +++ b/mfbt/JSONWriter.h @@ -301,8 +301,8 @@ private: { // If these tiny allocations OOM we might as well just crash because we // must be in serious memory trouble. - MOZ_RELEASE_ASSERT(mNeedComma.growByUninitialized(1)); - MOZ_RELEASE_ASSERT(mNeedNewlines.growByUninitialized(1)); + MOZ_RELEASE_ASSERT(mNeedComma.resizeUninitialized(mDepth + 1)); + MOZ_RELEASE_ASSERT(mNeedNewlines.resizeUninitialized(mDepth + 1)); mNeedComma[mDepth] = false; mNeedNewlines[mDepth] = true; } From 588349bc87cf5b5c01dd800e241bb16e23e0c85a Mon Sep 17 00:00:00 2001 From: Matt Woodrow Date: Wed, 24 Sep 2014 13:23:15 -0400 Subject: [PATCH 034/146] Bug 1075616 - Do partial uploads of non-D2D SourceSurfaces when we have a sampling rect. r=Bas --HG-- extra : rebase_source : d08b5d3d515484ebc0f8a3be774d66c3b881a8d4 --- gfx/2d/DrawTargetD2D.cpp | 11 +++++++++-- gfx/2d/HelpersD2D.h | 6 +++++- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/gfx/2d/DrawTargetD2D.cpp b/gfx/2d/DrawTargetD2D.cpp index 8393286f841..ecd3378586b 100644 --- a/gfx/2d/DrawTargetD2D.cpp +++ b/gfx/2d/DrawTargetD2D.cpp @@ -2367,7 +2367,9 @@ DrawTargetD2D::CreateBrushForPattern(const Pattern &aPattern, Float aAlpha) RefPtr source = pat->mSurface; - if (!pat->mSamplingRect.IsEmpty()) { + if (!pat->mSamplingRect.IsEmpty() && + (source->GetType() == SurfaceType::D2D1_BITMAP || + source->GetType() == SurfaceType::D2D1_DRAWTARGET)) { IntRect samplingRect = pat->mSamplingRect; RefPtr dt = new DrawTargetD2D(); @@ -2411,7 +2413,12 @@ DrawTargetD2D::CreateBrushForPattern(const Pattern &aPattern, Float aAlpha) return nullptr; } - bitmap = CreatePartialBitmapForSurface(dataSurf, mTransform, mSize, pat->mExtendMode, mat, mRT); + IntRect sourceRect = pat->mSamplingRect; + if (sourceRect.IsEmpty()) { + sourceRect = IntRect(0, 0, source->GetSize().width, source->GetSize().height); + } + + bitmap = CreatePartialBitmapForSurface(dataSurf, mTransform, mSize, pat->mExtendMode, mat, mRT, &sourceRect); if (!bitmap) { return nullptr; } diff --git a/gfx/2d/HelpersD2D.h b/gfx/2d/HelpersD2D.h index 4b5b98008a4..99154976f3b 100644 --- a/gfx/2d/HelpersD2D.h +++ b/gfx/2d/HelpersD2D.h @@ -513,7 +513,8 @@ CreateStrokeStyleForOptions(const StrokeOptions &aStrokeOptions) static TemporaryRef CreatePartialBitmapForSurface(DataSourceSurface *aSurface, const Matrix &aDestinationTransform, const IntSize &aDestinationSize, ExtendMode aExtendMode, - Matrix &aSourceTransform, ID2D1RenderTarget *aRT) + Matrix &aSourceTransform, ID2D1RenderTarget *aRT, + const IntRect* aSourceRect = nullptr) { RefPtr bitmap; @@ -538,6 +539,9 @@ CreatePartialBitmapForSurface(DataSourceSurface *aSurface, const Matrix &aDestin IntSize size = aSurface->GetSize(); Rect uploadRect(0, 0, Float(size.width), Float(size.height)); + if (aSourceRect) { + uploadRect = Rect(aSourceRect->x, aSourceRect->y, aSourceRect->width, aSourceRect->height); + } // Limit the uploadRect as much as possible without supporting discontiguous uploads // From 90f77b66fda98fa92263dbe9b2c4a2134f497e55 Mon Sep 17 00:00:00 2001 From: Chris Lord Date: Fri, 3 Oct 2014 13:22:32 +1300 Subject: [PATCH 035/146] Bug 1006797 - Only apply the window render offset when actually rendering to the window. r=nical --HG-- extra : rebase_source : 24f2d8c50e5c0ddfa6683291040a78574d631678 --- gfx/layers/opengl/CompositingRenderTargetOGL.cpp | 3 ++- gfx/layers/opengl/CompositingRenderTargetOGL.h | 4 +++- gfx/layers/opengl/CompositorOGL.cpp | 14 +++++++++++--- 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/gfx/layers/opengl/CompositingRenderTargetOGL.cpp b/gfx/layers/opengl/CompositingRenderTargetOGL.cpp index f56881cb269..2f75e6649ba 100644 --- a/gfx/layers/opengl/CompositingRenderTargetOGL.cpp +++ b/gfx/layers/opengl/CompositingRenderTargetOGL.cpp @@ -92,6 +92,8 @@ CompositingRenderTargetOGL::InitializeImpl() NS_ERROR(msg.get()); } + mInitParams.mStatus = InitParams::INITIALIZED; + mCompositor->PrepareViewport(mInitParams.mSize); mGL->fScissor(0, 0, mInitParams.mSize.width, mInitParams.mSize.height); if (mInitParams.mInit == INIT_MODE_CLEAR) { @@ -99,7 +101,6 @@ CompositingRenderTargetOGL::InitializeImpl() mGL->fClear(LOCAL_GL_COLOR_BUFFER_BIT); } - mInitParams.mStatus = InitParams::INITIALIZED; } } diff --git a/gfx/layers/opengl/CompositingRenderTargetOGL.h b/gfx/layers/opengl/CompositingRenderTargetOGL.h index a08066ff414..76186da9c7c 100644 --- a/gfx/layers/opengl/CompositingRenderTargetOGL.h +++ b/gfx/layers/opengl/CompositingRenderTargetOGL.h @@ -84,7 +84,7 @@ public: const gfx::IntSize& aSize) { RefPtr result - = new CompositingRenderTargetOGL(aCompositor, gfx::IntPoint(0, 0), 0, 0); + = new CompositingRenderTargetOGL(aCompositor, gfx::IntPoint(), 0, 0); result->mInitParams = InitParams(aSize, 0, INIT_MODE_NONE); result->mInitParams.mStatus = InitParams::INITIALIZED; return result.forget(); @@ -112,6 +112,8 @@ public: */ void BindRenderTarget(); + bool IsWindow() { return GetFBO() == 0; } + GLuint GetFBO() const { MOZ_ASSERT(mInitParams.mStatus == InitParams::INITIALIZED); diff --git a/gfx/layers/opengl/CompositorOGL.cpp b/gfx/layers/opengl/CompositorOGL.cpp index 8202261bc62..6c4c09b04ee 100644 --- a/gfx/layers/opengl/CompositorOGL.cpp +++ b/gfx/layers/opengl/CompositorOGL.cpp @@ -593,7 +593,10 @@ CompositorOGL::PrepareViewport(const gfx::IntSize& aSize) viewMatrix.PreScale(1.0f, -1.0f); } - if (!mTarget) { + MOZ_ASSERT(mCurrentRenderTarget, "No destination"); + // If we're drawing directly to the window then we want to offset + // drawing by the render offset. + if (!mTarget && mCurrentRenderTarget->IsWindow()) { viewMatrix.PreTranslate(mRenderOffset.x, mRenderOffset.y); } @@ -660,8 +663,8 @@ CompositorOGL::SetRenderTarget(CompositingRenderTarget *aSurface) CompositingRenderTargetOGL* surface = static_cast(aSurface); if (mCurrentRenderTarget != surface) { - surface->BindRenderTarget(); mCurrentRenderTarget = surface; + surface->BindRenderTarget(); } } @@ -1019,9 +1022,14 @@ CompositorOGL::DrawQuad(const Rect& aRect, js::ProfileEntry::Category::GRAPHICS); MOZ_ASSERT(mFrameInProgress, "frame not started"); + MOZ_ASSERT(mCurrentRenderTarget, "No destination"); Rect clipRect = aClipRect; - if (!mTarget) { + // aClipRect is in destination coordinate space (after all + // transforms and offsets have been applied) so if our + // drawing is going to be shifted by mRenderOffset then we need + // to shift the clip rect by the same amount. + if (!mTarget && mCurrentRenderTarget->IsWindow()) { clipRect.MoveBy(mRenderOffset.x, mRenderOffset.y); } IntRect intClipRect; From c22ac086c3033ef497ea2760379dd1f2d96519a0 Mon Sep 17 00:00:00 2001 From: Matt Woodrow Date: Fri, 3 Oct 2014 13:49:08 +1300 Subject: [PATCH 036/146] Bug 1067588 - Don't draw images with a sampling rect if the rect covers the entire image. r=Bas --HG-- extra : rebase_source : 1c6aebe49a6168953413f60d97d8d53135298e77 --- gfx/thebes/gfxUtils.cpp | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/gfx/thebes/gfxUtils.cpp b/gfx/thebes/gfxUtils.cpp index 8670be50d35..fc84031fc26 100644 --- a/gfx/thebes/gfxUtils.cpp +++ b/gfx/thebes/gfxUtils.cpp @@ -593,17 +593,6 @@ gfxUtils::DrawPixelSnapped(gfxContext* aContext, imageRect.Width(), imageRect.Height(), region.Width(), region.Height()); - if (aRegion.IsRestricted() && - aContext->CurrentMatrix().HasNonIntegerTranslation() && - drawable->DrawWithSamplingRect(aContext, aRegion.Rect(), aRegion.Restriction(), - doTile, aFilter, aOpacity)) { - return; - } - - // On Mobile, we don't ever want to do this; it has the potential for - // allocating very large temporary surfaces, especially since we'll - // do full-page snapshots often (see bug 749426). -#ifndef MOZ_GFX_OPTIMIZE_MOBILE // OK now, the hard part left is to account for the subimage sampling // restriction. If all the transforms involved are just integer // translations, then we assume no resampling will occur so there's @@ -611,19 +600,29 @@ gfxUtils::DrawPixelSnapped(gfxContext* aContext, // XXX if only we had source-clipping in cairo! if (aContext->CurrentMatrix().HasNonIntegerTranslation()) { if (doTile || !aRegion.RestrictionContains(imageRect)) { + if (drawable->DrawWithSamplingRect(aContext, aRegion.Rect(), aRegion.Restriction(), + doTile, aFilter, aOpacity)) { + return; + } + + // On Mobile, we don't ever want to do this; it has the potential for + // allocating very large temporary surfaces, especially since we'll + // do full-page snapshots often (see bug 749426). +#ifndef MOZ_GFX_OPTIMIZE_MOBILE nsRefPtr restrictedDrawable = CreateSamplingRestrictedDrawable(aDrawable, aContext, aRegion, aFormat); if (restrictedDrawable) { drawable.swap(restrictedDrawable); } - } - // We no longer need to tile: Either we never needed to, or we already - // filled a surface with the tiled pattern; this surface can now be - // drawn without tiling. - doTile = false; - } + + // We no longer need to tile: Either we never needed to, or we already + // filled a surface with the tiled pattern; this surface can now be + // drawn without tiling. + doTile = false; #endif + } + } drawable->Draw(aContext, aRegion.Rect(), doTile, aFilter, aOpacity); } From 869f9d8a0d5df4fdbcef8708520c2fc363e8471b Mon Sep 17 00:00:00 2001 From: Randell Jesup Date: Fri, 3 Oct 2014 00:47:59 -0400 Subject: [PATCH 037/146] Bug 1061702: Stop audio sources from continuing to play garbage after being stopped r=roc * clear audio callback buffer if no streams have audio --- content/media/AudioBufferUtils.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/content/media/AudioBufferUtils.h b/content/media/AudioBufferUtils.h index 389927d7ac2..b332e034970 100644 --- a/content/media/AudioBufferUtils.h +++ b/content/media/AudioBufferUtils.h @@ -77,9 +77,18 @@ public: void BufferFilled() { // It's okay to have exactly zero samples here, it can happen we have an // audio callback driver because of a hint on MSG creation, but the - // AudioOutputStream has not been created yet. + // AudioOutputStream has not been created yet, or if all the streams have finished + // but we're still running. + // Note: it's also ok if we had data in the scratch buffer - and we usually do - and + // all the streams were ended (no mixer callback occured). + // XXX Remove this warning, or find a way to avoid it if the mixer callback + // isn't called. NS_WARN_IF_FALSE(Available() == 0 || mSampleWriteOffset == 0, "Audio Buffer is not full by the end of the callback."); + // Make sure the data returned is always set and not random! + if (Available()) { + PodZero(mBuffer + mSampleWriteOffset, FramesToSamples(CHANNELS, Available())); + } MOZ_ASSERT(mSamples, "Buffer not set."); mSamples = 0; mSampleWriteOffset = 0; From b4042bc9df9ab86b4441cb15c0343c3990adfca7 Mon Sep 17 00:00:00 2001 From: Nikhil Marathe Date: Thu, 24 Jul 2014 17:53:03 -0700 Subject: [PATCH 038/146] Bug 1017613 - Part 1 - Fetch Body and Request. r=baku --HG-- extra : rebase_source : 2c8b82650e3669f84ff177a32a9d9b0f845e673a --- dom/bindings/Bindings.conf | 4 + dom/fetch/Request.cpp | 113 ++++++++++++++++++ dom/fetch/Request.h | 106 ++++++++++++++++ dom/fetch/moz.build | 2 + .../mochitest/general/test_interfaces.html | 2 + dom/webidl/Fetch.webidl | 29 +++++ dom/webidl/Request.webidl | 38 ++++++ dom/webidl/moz.build | 2 + dom/workers/moz.build | 1 + dom/workers/test/fetch/mochitest.ini | 5 + dom/workers/test/fetch/moz.build | 7 ++ dom/workers/test/fetch/test_interfaces.html | 48 ++++++++ dom/workers/test/fetch/worker_interfaces.js | 10 ++ 13 files changed, 367 insertions(+) create mode 100644 dom/fetch/Request.cpp create mode 100644 dom/fetch/Request.h create mode 100644 dom/webidl/Fetch.webidl create mode 100644 dom/webidl/Request.webidl create mode 100644 dom/workers/test/fetch/mochitest.ini create mode 100644 dom/workers/test/fetch/moz.build create mode 100644 dom/workers/test/fetch/test_interfaces.html create mode 100644 dom/workers/test/fetch/worker_interfaces.js diff --git a/dom/bindings/Bindings.conf b/dom/bindings/Bindings.conf index a749bd60ef8..89718eb7c56 100644 --- a/dom/bindings/Bindings.conf +++ b/dom/bindings/Bindings.conf @@ -883,6 +883,10 @@ DOMInterfaces = { 'nativeType': 'nsDOMCSSRect', }, +'Request': { + 'binaryNames': { 'headers': 'headers_' }, +}, + 'RGBColor': { 'nativeType': 'nsDOMCSSRGBColor', }, diff --git a/dom/fetch/Request.cpp b/dom/fetch/Request.cpp new file mode 100644 index 00000000000..cd62210e693 --- /dev/null +++ b/dom/fetch/Request.cpp @@ -0,0 +1,113 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "Request.h" + +#include "nsDOMString.h" +#include "nsISupportsImpl.h" +#include "nsIURI.h" +#include "nsPIDOMWindow.h" + +#include "mozilla/dom/Promise.h" + +using namespace mozilla::dom; + +NS_IMPL_CYCLE_COLLECTING_ADDREF(Request) +NS_IMPL_CYCLE_COLLECTING_RELEASE(Request) +NS_IMPL_CYCLE_COLLECTION_WRAPPERCACHE(Request, mOwner, mHeaders) + +NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(Request) + NS_WRAPPERCACHE_INTERFACE_MAP_ENTRY + NS_INTERFACE_MAP_ENTRY(nsISupports) +NS_INTERFACE_MAP_END + +Request::Request(nsISupports* aOwner) + : mOwner(aOwner) + , mHeaders(new Headers(aOwner)) +{ + SetIsDOMBinding(); +} + +Request::~Request() +{ +} + +/*static*/ already_AddRefed +Request::Constructor(const GlobalObject& global, + const RequestOrScalarValueString& aInput, + const RequestInit& aInit, ErrorResult& rv) +{ + nsRefPtr request = new Request(global.GetAsSupports()); + return request.forget(); +} + +already_AddRefed +Request::Clone() const +{ + nsRefPtr request = new Request(mOwner); + return request.forget(); +} + +already_AddRefed +Request::ArrayBuffer(ErrorResult& aRv) +{ + nsCOMPtr global = do_QueryInterface(GetParentObject()); + MOZ_ASSERT(global); + nsRefPtr promise = Promise::Create(global, aRv); + if (aRv.Failed()) { + return nullptr; + } + + promise->MaybeReject(NS_ERROR_NOT_AVAILABLE); + return promise.forget(); +} + +already_AddRefed +Request::Blob(ErrorResult& aRv) +{ + nsCOMPtr global = do_QueryInterface(GetParentObject()); + MOZ_ASSERT(global); + nsRefPtr promise = Promise::Create(global, aRv); + if (aRv.Failed()) { + return nullptr; + } + + promise->MaybeReject(NS_ERROR_NOT_AVAILABLE); + return promise.forget(); +} + +already_AddRefed +Request::Json(ErrorResult& aRv) +{ + nsCOMPtr global = do_QueryInterface(GetParentObject()); + MOZ_ASSERT(global); + nsRefPtr promise = Promise::Create(global, aRv); + if (aRv.Failed()) { + return nullptr; + } + + promise->MaybeReject(NS_ERROR_NOT_AVAILABLE); + return promise.forget(); +} + +already_AddRefed +Request::Text(ErrorResult& aRv) +{ + nsCOMPtr global = do_QueryInterface(GetParentObject()); + MOZ_ASSERT(global); + nsRefPtr promise = Promise::Create(global, aRv); + if (aRv.Failed()) { + return nullptr; + } + + promise->MaybeReject(NS_ERROR_NOT_AVAILABLE); + return promise.forget(); +} + +bool +Request::BodyUsed() +{ + return false; +} diff --git a/dom/fetch/Request.h b/dom/fetch/Request.h new file mode 100644 index 00000000000..49969531d6f --- /dev/null +++ b/dom/fetch/Request.h @@ -0,0 +1,106 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef mozilla_dom_Request_h +#define mozilla_dom_Request_h + +#include "nsISupportsImpl.h" +#include "nsWrapperCache.h" + +#include "mozilla/dom/RequestBinding.h" +#include "mozilla/dom/UnionTypes.h" + + +class nsPIDOMWindow; + +namespace mozilla { +namespace dom { + +class Promise; + +class Request MOZ_FINAL : public nsISupports + , public nsWrapperCache +{ + NS_DECL_CYCLE_COLLECTING_ISUPPORTS + NS_DECL_CYCLE_COLLECTION_SCRIPT_HOLDER_CLASS(Request) + +public: + Request(nsISupports* aOwner); + + JSObject* + WrapObject(JSContext* aCx) + { + return RequestBinding::Wrap(aCx, this); + } + + void + GetUrl(DOMString& aUrl) const + { + aUrl.AsAString() = EmptyString(); + } + + void + GetMethod(nsCString& aMethod) const + { + aMethod = EmptyCString(); + } + + RequestMode + Mode() const + { + return RequestMode::Same_origin; + } + + RequestCredentials + Credentials() const + { + return RequestCredentials::Omit; + } + + void + GetReferrer(DOMString& aReferrer) const + { + aReferrer.AsAString() = EmptyString(); + } + + Headers* Headers_() const { return mHeaders; } + + static already_AddRefed + Constructor(const GlobalObject& aGlobal, const RequestOrScalarValueString& aInput, + const RequestInit& aInit, ErrorResult& rv); + + nsISupports* GetParentObject() const + { + return mOwner; + } + + already_AddRefed + Clone() const; + + already_AddRefed + ArrayBuffer(ErrorResult& aRv); + + already_AddRefed + Blob(ErrorResult& aRv); + + already_AddRefed + Json(ErrorResult& aRv); + + already_AddRefed + Text(ErrorResult& aRv); + + bool + BodyUsed(); +private: + ~Request(); + + nsCOMPtr mOwner; + nsRefPtr mHeaders; +}; + +} // namespace dom +} // namespace mozilla + +#endif // mozilla_dom_Request_h diff --git a/dom/fetch/moz.build b/dom/fetch/moz.build index df9e145ef48..a0670e758a7 100644 --- a/dom/fetch/moz.build +++ b/dom/fetch/moz.build @@ -6,10 +6,12 @@ EXPORTS.mozilla.dom += [ 'Headers.h', + 'Request.h', ] UNIFIED_SOURCES += [ 'Headers.cpp', + 'Request.cpp', ] LOCAL_INCLUDES += [ diff --git a/dom/tests/mochitest/general/test_interfaces.html b/dom/tests/mochitest/general/test_interfaces.html index d7146a3cabd..35b0441aa43 100644 --- a/dom/tests/mochitest/general/test_interfaces.html +++ b/dom/tests/mochitest/general/test_interfaces.html @@ -862,6 +862,8 @@ var interfaceNamesInGlobalScope = "RecordErrorEvent", // IMPORTANT: Do not change this list without review from a DOM peer! "Rect", +// IMPORTANT: Do not change this list without review from a DOM peer! + {name: "Request", pref: "dom.fetch.enabled"}, // IMPORTANT: Do not change this list without review from a DOM peer! "RGBColor", // IMPORTANT: Do not change this list without review from a DOM peer! diff --git a/dom/webidl/Fetch.webidl b/dom/webidl/Fetch.webidl new file mode 100644 index 00000000000..26c9775e696 --- /dev/null +++ b/dom/webidl/Fetch.webidl @@ -0,0 +1,29 @@ +/* -*- Mode: IDL; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. + * + * The origin of this IDL file is + * http://fetch.spec.whatwg.org/ + */ + +typedef object JSON; +// FIXME(nsm): Bug 1071290: Blobs can't be passed as unions in workers. +// FIXME(nsm): Bug 739173: FormData is not available in workers. +// typedef (ArrayBuffer or ArrayBufferView or Blob or FormData or ScalarValueString or URLSearchParams) BodyInit; +typedef (ArrayBuffer or ArrayBufferView or ScalarValueString or URLSearchParams) BodyInit; + +[NoInterfaceObject, Exposed=(Window,Worker)] +interface Body { + readonly attribute boolean bodyUsed; + [Throws] + Promise arrayBuffer(); + [Throws] + Promise blob(); + // FIXME(nsm): Bug 739173 FormData is not supported in workers. + // Promise formData(); + [Throws] + Promise json(); + [Throws] + Promise text(); +}; diff --git a/dom/webidl/Request.webidl b/dom/webidl/Request.webidl new file mode 100644 index 00000000000..47edd845ab3 --- /dev/null +++ b/dom/webidl/Request.webidl @@ -0,0 +1,38 @@ +/* -*- Mode: IDL; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. + * + * The origin of this IDL file is + * https://fetch.spec.whatwg.org/#request-class + */ + +typedef (Request or ScalarValueString) RequestInfo; + +[Constructor(RequestInfo input, optional RequestInit init), + Exposed=(Window,Worker), + Func="mozilla::dom::Headers::PrefEnabled"] +interface Request { + readonly attribute ByteString method; + readonly attribute ScalarValueString url; + readonly attribute Headers headers; + + readonly attribute DOMString referrer; + readonly attribute RequestMode mode; + readonly attribute RequestCredentials credentials; + + Request clone(); +}; + +Request implements Body; + +dictionary RequestInit { + ByteString method; + HeadersInit headers; + BodyInit body; + RequestMode mode; + RequestCredentials credentials; +}; + +enum RequestMode { "same-origin", "no-cors", "cors" }; +enum RequestCredentials { "omit", "same-origin", "include" }; diff --git a/dom/webidl/moz.build b/dom/webidl/moz.build index e627fdb863f..a2bc046306e 100644 --- a/dom/webidl/moz.build +++ b/dom/webidl/moz.build @@ -124,6 +124,7 @@ WEBIDL_FILES = [ 'EventListener.webidl', 'EventSource.webidl', 'EventTarget.webidl', + 'Fetch.webidl', 'File.webidl', 'FileList.webidl', 'FileMode.webidl', @@ -326,6 +327,7 @@ WEBIDL_FILES = [ 'RadioNodeList.webidl', 'Range.webidl', 'Rect.webidl', + 'Request.webidl', 'ResourceStats.webidl', 'ResourceStatsManager.webidl', 'RGBColor.webidl', diff --git a/dom/workers/moz.build b/dom/workers/moz.build index 9e4952a3ec6..02f4d08f4bb 100644 --- a/dom/workers/moz.build +++ b/dom/workers/moz.build @@ -86,6 +86,7 @@ TEST_DIRS += [ ] MOCHITEST_MANIFESTS += [ + 'test/fetch/mochitest.ini', 'test/mochitest.ini', 'test/serviceworkers/mochitest.ini', ] diff --git a/dom/workers/test/fetch/mochitest.ini b/dom/workers/test/fetch/mochitest.ini new file mode 100644 index 00000000000..0a7a7f84443 --- /dev/null +++ b/dom/workers/test/fetch/mochitest.ini @@ -0,0 +1,5 @@ +[DEFAULT] +support-files = + worker_interfaces.js + +[test_interfaces.html] diff --git a/dom/workers/test/fetch/moz.build b/dom/workers/test/fetch/moz.build new file mode 100644 index 00000000000..8421b15157a --- /dev/null +++ b/dom/workers/test/fetch/moz.build @@ -0,0 +1,7 @@ +# -*- Mode: python; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +MOCHITEST_MANIFESTS += ['mochitest.ini'] diff --git a/dom/workers/test/fetch/test_interfaces.html b/dom/workers/test/fetch/test_interfaces.html new file mode 100644 index 00000000000..dbbf21f6d18 --- /dev/null +++ b/dom/workers/test/fetch/test_interfaces.html @@ -0,0 +1,48 @@ + + + + + Bug 1017613 - Test fetch API interfaces + + + + +

+ +

+
+
+
+
+
diff --git a/dom/workers/test/fetch/worker_interfaces.js b/dom/workers/test/fetch/worker_interfaces.js
new file mode 100644
index 00000000000..b6b798a05d4
--- /dev/null
+++ b/dom/workers/test/fetch/worker_interfaces.js
@@ -0,0 +1,10 @@
+function ok(a, msg) {
+  dump("OK: " + !!a + "  =>  " + a + " " + msg + "\n");
+  postMessage({type: 'status', status: !!a, msg: a + ": " + msg });
+}
+
+onmessage = function() {
+  ok(typeof Headers === "function", "Headers should be defined");
+  ok(typeof Request === "function", "Request should be defined");
+  postMessage({ type: 'finish' });
+}

From a66c56397eda8bdc078e62a66bec0159c3d1e081 Mon Sep 17 00:00:00 2001
From: Nikhil Marathe 
Date: Thu, 24 Jul 2014 17:50:32 -0700
Subject: [PATCH 039/146] Bug 1017613 - Part 2 - Response IDL and stubs. r=baku

--HG--
extra : rebase_source : f2f55775ed2e9fc92af829d3ce6f7ca3553b97db
---
 dom/bindings/Bindings.conf                    |   4 +
 dom/fetch/Response.cpp                        | 134 ++++++++++++++++++
 dom/fetch/Response.h                          | 107 ++++++++++++++
 dom/fetch/moz.build                           |   2 +
 .../mochitest/general/test_interfaces.html    |   2 +
 dom/webidl/Response.webidl                    |  36 +++++
 dom/webidl/moz.build                          |   1 +
 dom/workers/test/fetch/worker_interfaces.js   |   1 +
 8 files changed, 287 insertions(+)
 create mode 100644 dom/fetch/Response.cpp
 create mode 100644 dom/fetch/Response.h
 create mode 100644 dom/webidl/Response.webidl

diff --git a/dom/bindings/Bindings.conf b/dom/bindings/Bindings.conf
index 89718eb7c56..d4265810444 100644
--- a/dom/bindings/Bindings.conf
+++ b/dom/bindings/Bindings.conf
@@ -887,6 +887,10 @@ DOMInterfaces = {
     'binaryNames': { 'headers': 'headers_' },
 },
 
+'Response': {
+    'binaryNames': { 'headers': 'headers_' },
+},
+
 'RGBColor': {
     'nativeType': 'nsDOMCSSRGBColor',
 },
diff --git a/dom/fetch/Response.cpp b/dom/fetch/Response.cpp
new file mode 100644
index 00000000000..a7b9352486d
--- /dev/null
+++ b/dom/fetch/Response.cpp
@@ -0,0 +1,134 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "Response.h"
+#include "nsDOMString.h"
+#include "nsPIDOMWindow.h"
+#include "nsIURI.h"
+#include "nsISupportsImpl.h"
+
+#include "mozilla/ErrorResult.h"
+
+using namespace mozilla::dom;
+
+NS_IMPL_CYCLE_COLLECTING_ADDREF(Response)
+NS_IMPL_CYCLE_COLLECTING_RELEASE(Response)
+NS_IMPL_CYCLE_COLLECTION_WRAPPERCACHE(Response, mOwner)
+
+NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(Response)
+  NS_WRAPPERCACHE_INTERFACE_MAP_ENTRY
+  NS_INTERFACE_MAP_ENTRY(nsISupports)
+NS_INTERFACE_MAP_END
+
+Response::Response(nsISupports* aOwner)
+  : mOwner(aOwner)
+  , mHeaders(new Headers(aOwner))
+{
+  SetIsDOMBinding();
+}
+
+Response::~Response()
+{
+}
+
+/* static */ already_AddRefed
+Response::Error(const GlobalObject& aGlobal)
+{
+  ErrorResult result;
+  ResponseInit init;
+  init.mStatus = 0;
+  Optional body;
+  nsRefPtr r = Response::Constructor(aGlobal, body, init, result);
+  return r.forget();
+}
+
+/* static */ already_AddRefed
+Response::Redirect(const GlobalObject& aGlobal, const nsAString& aUrl,
+                   uint16_t aStatus)
+{
+  ErrorResult result;
+  ResponseInit init;
+  Optional body;
+  nsRefPtr r = Response::Constructor(aGlobal, body, init, result);
+  return r.forget();
+}
+
+/*static*/ already_AddRefed
+Response::Constructor(const GlobalObject& global,
+                      const Optional& aBody,
+                      const ResponseInit& aInit, ErrorResult& rv)
+{
+  nsRefPtr response = new Response(global.GetAsSupports());
+  return response.forget();
+}
+
+already_AddRefed
+Response::Clone()
+{
+  nsRefPtr response = new Response(mOwner);
+  return response.forget();
+}
+
+already_AddRefed
+Response::ArrayBuffer(ErrorResult& aRv)
+{
+  nsCOMPtr global = do_QueryInterface(GetParentObject());
+  MOZ_ASSERT(global);
+  nsRefPtr promise = Promise::Create(global, aRv);
+  if (aRv.Failed()) {
+    return nullptr;
+  }
+
+  promise->MaybeReject(NS_ERROR_NOT_AVAILABLE);
+  return promise.forget();
+}
+
+already_AddRefed
+Response::Blob(ErrorResult& aRv)
+{
+  nsCOMPtr global = do_QueryInterface(GetParentObject());
+  MOZ_ASSERT(global);
+  nsRefPtr promise = Promise::Create(global, aRv);
+  if (aRv.Failed()) {
+    return nullptr;
+  }
+
+  promise->MaybeReject(NS_ERROR_NOT_AVAILABLE);
+  return promise.forget();
+}
+
+already_AddRefed
+Response::Json(ErrorResult& aRv)
+{
+  nsCOMPtr global = do_QueryInterface(GetParentObject());
+  MOZ_ASSERT(global);
+  nsRefPtr promise = Promise::Create(global, aRv);
+  if (aRv.Failed()) {
+    return nullptr;
+  }
+
+  promise->MaybeReject(NS_ERROR_NOT_AVAILABLE);
+  return promise.forget();
+}
+
+already_AddRefed
+Response::Text(ErrorResult& aRv)
+{
+  nsCOMPtr global = do_QueryInterface(GetParentObject());
+  MOZ_ASSERT(global);
+  nsRefPtr promise = Promise::Create(global, aRv);
+  if (aRv.Failed()) {
+    return nullptr;
+  }
+
+  promise->MaybeReject(NS_ERROR_NOT_AVAILABLE);
+  return promise.forget();
+}
+
+bool
+Response::BodyUsed()
+{
+  return false;
+}
diff --git a/dom/fetch/Response.h b/dom/fetch/Response.h
new file mode 100644
index 00000000000..31e3b62b7f2
--- /dev/null
+++ b/dom/fetch/Response.h
@@ -0,0 +1,107 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_dom_Response_h
+#define mozilla_dom_Response_h
+
+#include "nsWrapperCache.h"
+#include "nsISupportsImpl.h"
+
+#include "mozilla/dom/ResponseBinding.h"
+#include "mozilla/dom/UnionTypes.h"
+
+class nsPIDOMWindow;
+
+namespace mozilla {
+namespace dom {
+
+class Headers;
+
+class Response MOZ_FINAL : public nsISupports
+                         , public nsWrapperCache
+{
+  NS_DECL_CYCLE_COLLECTING_ISUPPORTS
+  NS_DECL_CYCLE_COLLECTION_SCRIPT_HOLDER_CLASS(Response)
+
+public:
+  Response(nsISupports* aOwner);
+
+  JSObject*
+  WrapObject(JSContext* aCx)
+  {
+    return ResponseBinding::Wrap(aCx, this);
+  }
+
+  ResponseType
+  Type() const
+  {
+    return ResponseType::Error;
+  }
+
+  void
+  GetUrl(DOMString& aUrl) const
+  {
+    aUrl.AsAString() = EmptyString();
+  }
+
+  uint16_t
+  Status() const
+  {
+    return 400;
+  }
+
+  void
+  GetStatusText(nsCString& aStatusText) const
+  {
+    aStatusText = EmptyCString();
+  }
+
+  Headers*
+  Headers_() const { return mHeaders; }
+
+  static already_AddRefed
+  Error(const GlobalObject& aGlobal);
+
+  static already_AddRefed
+  Redirect(const GlobalObject& aGlobal, const nsAString& aUrl, uint16_t aStatus);
+
+  static already_AddRefed
+  Constructor(const GlobalObject& aGlobal,
+              const Optional& aBody,
+              const ResponseInit& aInit, ErrorResult& rv);
+
+  nsISupports* GetParentObject() const
+  {
+    return mOwner;
+  }
+
+  already_AddRefed
+  Clone();
+
+  already_AddRefed
+  ArrayBuffer(ErrorResult& aRv);
+
+  already_AddRefed
+  Blob(ErrorResult& aRv);
+
+  already_AddRefed
+  Json(ErrorResult& aRv);
+
+  already_AddRefed
+  Text(ErrorResult& aRv);
+
+  bool
+  BodyUsed();
+private:
+  ~Response();
+
+  nsCOMPtr mOwner;
+  nsRefPtr mHeaders;
+};
+
+} // namespace dom
+} // namespace mozilla
+
+#endif // mozilla_dom_Response_h
diff --git a/dom/fetch/moz.build b/dom/fetch/moz.build
index a0670e758a7..7853d318053 100644
--- a/dom/fetch/moz.build
+++ b/dom/fetch/moz.build
@@ -7,11 +7,13 @@
 EXPORTS.mozilla.dom += [
     'Headers.h',
     'Request.h',
+    'Response.h',
 ]
 
 UNIFIED_SOURCES += [
     'Headers.cpp',
     'Request.cpp',
+    'Response.cpp',
 ]
 
 LOCAL_INCLUDES += [
diff --git a/dom/tests/mochitest/general/test_interfaces.html b/dom/tests/mochitest/general/test_interfaces.html
index 35b0441aa43..914217568d1 100644
--- a/dom/tests/mochitest/general/test_interfaces.html
+++ b/dom/tests/mochitest/general/test_interfaces.html
@@ -864,6 +864,8 @@ var interfaceNamesInGlobalScope =
     "Rect",
 // IMPORTANT: Do not change this list without review from a DOM peer!
     {name: "Request", pref: "dom.fetch.enabled"},
+// IMPORTANT: Do not change this list without review from a DOM peer!
+    {name: "Response", pref: "dom.fetch.enabled"},
 // IMPORTANT: Do not change this list without review from a DOM peer!
     "RGBColor",
 // IMPORTANT: Do not change this list without review from a DOM peer!
diff --git a/dom/webidl/Response.webidl b/dom/webidl/Response.webidl
new file mode 100644
index 00000000000..f14a1391ee2
--- /dev/null
+++ b/dom/webidl/Response.webidl
@@ -0,0 +1,36 @@
+/* -*- Mode: IDL; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * The origin of this IDL file is
+ * https://fetch.spec.whatwg.org/#response-class
+ */
+
+[Constructor(optional BodyInit body, optional ResponseInit init),
+ Exposed=(Window,Worker),
+ Func="mozilla::dom::Headers::PrefEnabled"]
+interface Response {
+  static Response error();
+  static Response redirect(ScalarValueString url, optional unsigned short status = 302);
+
+  readonly attribute ResponseType type;
+
+  readonly attribute ScalarValueString url;
+  readonly attribute unsigned short status;
+  readonly attribute ByteString statusText;
+  readonly attribute Headers headers;
+
+  Response clone();
+};
+
+Response implements Body;
+
+dictionary ResponseInit {
+  unsigned short status = 200;
+  // WebIDL spec doesn't allow default values for ByteString.
+  ByteString statusText;
+  HeadersInit headers;
+};
+
+enum ResponseType { "basic", "cors", "default", "error", "opaque" };
diff --git a/dom/webidl/moz.build b/dom/webidl/moz.build
index a2bc046306e..99c173e605f 100644
--- a/dom/webidl/moz.build
+++ b/dom/webidl/moz.build
@@ -330,6 +330,7 @@ WEBIDL_FILES = [
     'Request.webidl',
     'ResourceStats.webidl',
     'ResourceStatsManager.webidl',
+    'Response.webidl',
     'RGBColor.webidl',
     'RTCConfiguration.webidl',
     'RTCIceCandidate.webidl',
diff --git a/dom/workers/test/fetch/worker_interfaces.js b/dom/workers/test/fetch/worker_interfaces.js
index b6b798a05d4..7b274980781 100644
--- a/dom/workers/test/fetch/worker_interfaces.js
+++ b/dom/workers/test/fetch/worker_interfaces.js
@@ -6,5 +6,6 @@ function ok(a, msg) {
 onmessage = function() {
   ok(typeof Headers === "function", "Headers should be defined");
   ok(typeof Request === "function", "Request should be defined");
+  ok(typeof Response === "function", "Response should be defined");
   postMessage({ type: 'finish' });
 }

From c40ba404e529653620b189a08491df13d90943d7 Mon Sep 17 00:00:00 2001
From: Nikhil Marathe 
Date: Thu, 24 Jul 2014 18:30:07 -0700
Subject: [PATCH 040/146] Bug 1017613 - Part 3 - fetch() IDL and stubs. r=baku

--HG--
extra : rebase_source : 7783234b38b5d903dd6bd6d1e25d97c2e739f78c
---
 dom/base/nsGlobalWindow.cpp                 | 9 +++++++++
 dom/base/nsGlobalWindow.h                   | 6 ++++++
 dom/webidl/Fetch.webidl                     | 7 +++++++
 dom/webidl/Window.webidl                    | 1 +
 dom/webidl/WorkerGlobalScope.webidl         | 1 +
 dom/workers/WorkerScope.cpp                 | 8 ++++++++
 dom/workers/WorkerScope.h                   | 5 +++++
 dom/workers/test/fetch/worker_interfaces.js | 1 +
 8 files changed, 38 insertions(+)

diff --git a/dom/base/nsGlobalWindow.cpp b/dom/base/nsGlobalWindow.cpp
index c676e5f1f30..3ed57bdf821 100644
--- a/dom/base/nsGlobalWindow.cpp
+++ b/dom/base/nsGlobalWindow.cpp
@@ -187,6 +187,7 @@
 #include "mozilla/dom/MessagePort.h"
 #include "mozilla/dom/MessagePortBinding.h"
 #include "mozilla/dom/indexedDB/IDBFactory.h"
+#include "mozilla/dom/Promise.h"
 
 #include "mozilla/dom/StructuredCloneTags.h"
 
@@ -6370,6 +6371,14 @@ nsGlobalWindow::Confirm(const nsAString& aString, bool* aReturn)
   return rv.ErrorCode();
 }
 
+already_AddRefed
+nsGlobalWindow::Fetch(const RequestOrScalarValueString& aInput,
+                      const RequestInit& aInit, ErrorResult& aRv)
+{
+  aRv.Throw(NS_ERROR_NOT_IMPLEMENTED);
+  return nullptr;
+}
+
 void
 nsGlobalWindow::Prompt(const nsAString& aMessage, const nsAString& aInitial,
                        nsAString& aReturn, ErrorResult& aError)
diff --git a/dom/base/nsGlobalWindow.h b/dom/base/nsGlobalWindow.h
index df3a119def2..a86310630a8 100644
--- a/dom/base/nsGlobalWindow.h
+++ b/dom/base/nsGlobalWindow.h
@@ -38,8 +38,10 @@
 #include "nsSize.h"
 #include "mozFlushType.h"
 #include "prclist.h"
+#include "mozilla/dom/RequestBinding.h"
 #include "mozilla/dom/StorageEvent.h"
 #include "mozilla/dom/StorageEventBinding.h"
+#include "mozilla/dom/UnionTypes.h"
 #include "nsFrameMessageManager.h"
 #include "mozilla/LinkedList.h"
 #include "mozilla/TimeStamp.h"
@@ -107,6 +109,7 @@ class MediaQueryList;
 class MozSelfSupport;
 class Navigator;
 class OwningExternalOrWindowProxy;
+class Promise;
 class Selection;
 class SpeechSynthesis;
 class WakeLock;
@@ -850,6 +853,9 @@ public:
   void Alert(mozilla::ErrorResult& aError);
   void Alert(const nsAString& aMessage, mozilla::ErrorResult& aError);
   bool Confirm(const nsAString& aMessage, mozilla::ErrorResult& aError);
+  already_AddRefed Fetch(const mozilla::dom::RequestOrScalarValueString& aInput,
+                                                const mozilla::dom::RequestInit& aInit,
+                                                mozilla::ErrorResult& aRv);
   void Prompt(const nsAString& aMessage, const nsAString& aInitial,
               nsAString& aReturn, mozilla::ErrorResult& aError);
   void Print(mozilla::ErrorResult& aError);
diff --git a/dom/webidl/Fetch.webidl b/dom/webidl/Fetch.webidl
index 26c9775e696..a1c66f3579d 100644
--- a/dom/webidl/Fetch.webidl
+++ b/dom/webidl/Fetch.webidl
@@ -27,3 +27,10 @@ interface Body {
   [Throws]
   Promise text();
 };
+
+[NoInterfaceObject, Exposed=(Window,Worker)]
+interface GlobalFetch {
+  [Throws, Func="mozilla::dom::Headers::PrefEnabled"]
+  Promise fetch(RequestInfo input, optional RequestInit init);
+};
+
diff --git a/dom/webidl/Window.webidl b/dom/webidl/Window.webidl
index 7bf30c1de77..2c6e2cc7e8b 100644
--- a/dom/webidl/Window.webidl
+++ b/dom/webidl/Window.webidl
@@ -465,3 +465,4 @@ interface ChromeWindow {
 };
 
 Window implements ChromeWindow;
+Window implements GlobalFetch;
diff --git a/dom/webidl/WorkerGlobalScope.webidl b/dom/webidl/WorkerGlobalScope.webidl
index bc0b2135628..bbd41aa5ef7 100644
--- a/dom/webidl/WorkerGlobalScope.webidl
+++ b/dom/webidl/WorkerGlobalScope.webidl
@@ -38,6 +38,7 @@ partial interface WorkerGlobalScope {
 
 WorkerGlobalScope implements WindowTimers;
 WorkerGlobalScope implements WindowBase64;
+WorkerGlobalScope implements GlobalFetch;
 
 // Not implemented yet: bug 1072107.
 // WorkerGlobalScope implements FontFaceSource;
diff --git a/dom/workers/WorkerScope.cpp b/dom/workers/WorkerScope.cpp
index df34df4798e..3e63247261a 100644
--- a/dom/workers/WorkerScope.cpp
+++ b/dom/workers/WorkerScope.cpp
@@ -303,6 +303,14 @@ WorkerGlobalScope::GetPerformance()
   return mPerformance;
 }
 
+already_AddRefed
+WorkerGlobalScope::Fetch(const RequestOrScalarValueString& aInput,
+                         const RequestInit& aInit, ErrorResult& aRv)
+{
+  aRv.Throw(NS_ERROR_NOT_IMPLEMENTED);
+  return nullptr;
+}
+
 DedicatedWorkerGlobalScope::DedicatedWorkerGlobalScope(WorkerPrivate* aWorkerPrivate)
 : WorkerGlobalScope(aWorkerPrivate)
 {
diff --git a/dom/workers/WorkerScope.h b/dom/workers/WorkerScope.h
index a683e0670cc..b522bfe1a23 100644
--- a/dom/workers/WorkerScope.h
+++ b/dom/workers/WorkerScope.h
@@ -8,6 +8,8 @@
 
 #include "Workers.h"
 #include "mozilla/DOMEventTargetHelper.h"
+#include "mozilla/dom/RequestBinding.h"
+#include "mozilla/dom/UnionTypes.h"
 
 namespace mozilla {
 namespace dom {
@@ -120,6 +122,9 @@ public:
   Dump(const Optional& aString) const;
 
   Performance* GetPerformance();
+
+  already_AddRefed
+  Fetch(const RequestOrScalarValueString& aInput, const RequestInit& aInit, ErrorResult& aRv);
 };
 
 class DedicatedWorkerGlobalScope MOZ_FINAL : public WorkerGlobalScope
diff --git a/dom/workers/test/fetch/worker_interfaces.js b/dom/workers/test/fetch/worker_interfaces.js
index 7b274980781..e3c2700fe0e 100644
--- a/dom/workers/test/fetch/worker_interfaces.js
+++ b/dom/workers/test/fetch/worker_interfaces.js
@@ -7,5 +7,6 @@ onmessage = function() {
   ok(typeof Headers === "function", "Headers should be defined");
   ok(typeof Request === "function", "Request should be defined");
   ok(typeof Response === "function", "Response should be defined");
+  ok(typeof fetch === "function", "fetch() should be defined");
   postMessage({ type: 'finish' });
 }

From cfc4bdd40a16a136788fbae64dc20562a8c4590a Mon Sep 17 00:00:00 2001
From: B2G Bumper Bot 
Date: Thu, 2 Oct 2014 08:20:47 -0700
Subject: [PATCH 041/146] Bumping gaia.json for 4 gaia revision(s) a=gaia-bump

========

https://hg.mozilla.org/integration/gaia-central/rev/9d5b6b01cf93
Author: Kevin Grandon 
Desc: Merge pull request #24496 from KevinGrandon/bug_1073608_usage_remove_extend

Bug 1073608 - [Usage] Remove Object.prototype.extend

========

https://hg.mozilla.org/integration/gaia-central/rev/473dde421e6a
Author: Kevin Grandon 
Desc: Bug 1073608 - [Usage] Remove Object.prototype.extend r=salva

========

https://hg.mozilla.org/integration/gaia-central/rev/675a6463b402
Author: Douglas Sherk 
Desc: Merge pull request #24638 from DouglasSherk/1076231-simpicker-racy-tests

Bug 1076231 - Fix racy unit tests in GaiaSimPicker. r=Rik

========

https://hg.mozilla.org/integration/gaia-central/rev/bf3f7477de5a
Author: Doug Sherk 
Desc: Bug 1076231 - Fix racy unit tests in GaiaSimPicker. r=Rik
---
 b2g/config/gaia.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/b2g/config/gaia.json b/b2g/config/gaia.json
index aac37588707..487da1a7ff7 100644
--- a/b2g/config/gaia.json
+++ b/b2g/config/gaia.json
@@ -4,6 +4,6 @@
         "remote": "", 
         "branch": ""
     }, 
-    "revision": "3e43be9b8c24802b40fdfbcf17895c4355e6d238", 
+    "revision": "9d5b6b01cf93ca34cbfce38ba8939d090efbf138", 
     "repo_path": "/integration/gaia-central"
 }

From 562a44dc30c04f727c9934c19e925c7c7e63e7a4 Mon Sep 17 00:00:00 2001
From: B2G Bumper Bot 
Date: Thu, 2 Oct 2014 08:26:54 -0700
Subject: [PATCH 042/146] Bumping manifests a=b2g-bump

---
 b2g/config/dolphin/sources.xml      | 2 +-
 b2g/config/emulator-ics/sources.xml | 2 +-
 b2g/config/emulator-jb/sources.xml  | 2 +-
 b2g/config/emulator-kk/sources.xml  | 2 +-
 b2g/config/emulator/sources.xml     | 2 +-
 b2g/config/flame-kk/sources.xml     | 2 +-
 b2g/config/flame/sources.xml        | 2 +-
 b2g/config/hamachi/sources.xml      | 2 +-
 b2g/config/helix/sources.xml        | 2 +-
 b2g/config/nexus-4/sources.xml      | 2 +-
 b2g/config/wasabi/sources.xml       | 2 +-
 11 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/b2g/config/dolphin/sources.xml b/b2g/config/dolphin/sources.xml
index 141b461dd9e..cd89bee3dac 100644
--- a/b2g/config/dolphin/sources.xml
+++ b/b2g/config/dolphin/sources.xml
@@ -15,7 +15,7 @@
   
     
   
-  
+  
   
   
   
diff --git a/b2g/config/emulator-ics/sources.xml b/b2g/config/emulator-ics/sources.xml
index 23449d3592e..281a6ebe807 100644
--- a/b2g/config/emulator-ics/sources.xml
+++ b/b2g/config/emulator-ics/sources.xml
@@ -19,7 +19,7 @@
     
   
   
-  
+  
   
   
   
diff --git a/b2g/config/emulator-jb/sources.xml b/b2g/config/emulator-jb/sources.xml
index 3636b0f3dc7..ba1f92170f7 100644
--- a/b2g/config/emulator-jb/sources.xml
+++ b/b2g/config/emulator-jb/sources.xml
@@ -17,7 +17,7 @@
   
   
   
-  
+  
   
   
   
diff --git a/b2g/config/emulator-kk/sources.xml b/b2g/config/emulator-kk/sources.xml
index 776d72e86a1..7b3fc4f805a 100644
--- a/b2g/config/emulator-kk/sources.xml
+++ b/b2g/config/emulator-kk/sources.xml
@@ -15,7 +15,7 @@
   
     
   
-  
+  
   
   
   
diff --git a/b2g/config/emulator/sources.xml b/b2g/config/emulator/sources.xml
index 23449d3592e..281a6ebe807 100644
--- a/b2g/config/emulator/sources.xml
+++ b/b2g/config/emulator/sources.xml
@@ -19,7 +19,7 @@
     
   
   
-  
+  
   
   
   
diff --git a/b2g/config/flame-kk/sources.xml b/b2g/config/flame-kk/sources.xml
index d5d9babdd40..bb0f38117c1 100644
--- a/b2g/config/flame-kk/sources.xml
+++ b/b2g/config/flame-kk/sources.xml
@@ -15,7 +15,7 @@
   
     
   
-  
+  
   
   
   
diff --git a/b2g/config/flame/sources.xml b/b2g/config/flame/sources.xml
index 0c7086972d0..af0a5c8e2c8 100644
--- a/b2g/config/flame/sources.xml
+++ b/b2g/config/flame/sources.xml
@@ -17,7 +17,7 @@
   
   
   
-  
+  
   
   
   
diff --git a/b2g/config/hamachi/sources.xml b/b2g/config/hamachi/sources.xml
index a0f50f3b1dd..72cbe7fa07a 100644
--- a/b2g/config/hamachi/sources.xml
+++ b/b2g/config/hamachi/sources.xml
@@ -17,7 +17,7 @@
     
   
   
-  
+  
   
   
   
diff --git a/b2g/config/helix/sources.xml b/b2g/config/helix/sources.xml
index 6338fd5f787..0910b079515 100644
--- a/b2g/config/helix/sources.xml
+++ b/b2g/config/helix/sources.xml
@@ -15,7 +15,7 @@
     
   
   
-  
+  
   
   
   
diff --git a/b2g/config/nexus-4/sources.xml b/b2g/config/nexus-4/sources.xml
index 224cbba3ece..45807ef9325 100644
--- a/b2g/config/nexus-4/sources.xml
+++ b/b2g/config/nexus-4/sources.xml
@@ -17,7 +17,7 @@
   
   
   
-  
+  
   
   
   
diff --git a/b2g/config/wasabi/sources.xml b/b2g/config/wasabi/sources.xml
index 5d0f1ece843..5b4d90332e4 100644
--- a/b2g/config/wasabi/sources.xml
+++ b/b2g/config/wasabi/sources.xml
@@ -17,7 +17,7 @@
     
   
   
-  
+  
   
   
   

From 44b836b4c88cf205f899bce9e0833fe3e7313781 Mon Sep 17 00:00:00 2001
From: B2G Bumper Bot 
Date: Thu, 2 Oct 2014 08:35:47 -0700
Subject: [PATCH 043/146] Bumping gaia.json for 4 gaia revision(s) a=gaia-bump

========

https://hg.mozilla.org/integration/gaia-central/rev/bcc6d931a7df
Author: Kevin Grandon 
Desc: Merge pull request #24627 from KevinGrandon/reland_bug_1071445

Bug 1071445 - [Settings]Cap letter for titles on panels based on new Building Blocks

========

https://hg.mozilla.org/integration/gaia-central/rev/e91157bff2f5
Author: Kevin Grandon 
Desc: Bug 1071445 - Follow-up, update marionette test values r=kgrandon

========

https://hg.mozilla.org/integration/gaia-central/rev/9f5a7417f45b
Author: gasolin 
Desc: Bug 1071445 - [Settings]Cap letter for titles on panels based on new Building Blocks

========

https://hg.mozilla.org/integration/gaia-central/rev/70bd39380d14
Author: Jeff Muizelaar 
Desc: Bug 1042241 - Avoid encoding/decoding clip canvas r=kgrandon

We can just paint the one canvas to the other directly. This
avoids the png encoding/decoding time and will reduce peak memory.
---
 b2g/config/gaia.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/b2g/config/gaia.json b/b2g/config/gaia.json
index 487da1a7ff7..d7844b854f5 100644
--- a/b2g/config/gaia.json
+++ b/b2g/config/gaia.json
@@ -4,6 +4,6 @@
         "remote": "", 
         "branch": ""
     }, 
-    "revision": "9d5b6b01cf93ca34cbfce38ba8939d090efbf138", 
+    "revision": "bcc6d931a7df4946d9cbe31d38d7d998902424bd", 
     "repo_path": "/integration/gaia-central"
 }

From 4d3fe6fe9f6bea7312533fcffae6947477aaf9ef Mon Sep 17 00:00:00 2001
From: B2G Bumper Bot 
Date: Thu, 2 Oct 2014 08:41:55 -0700
Subject: [PATCH 044/146] Bumping manifests a=b2g-bump

---
 b2g/config/dolphin/sources.xml      | 2 +-
 b2g/config/emulator-ics/sources.xml | 2 +-
 b2g/config/emulator-jb/sources.xml  | 2 +-
 b2g/config/emulator-kk/sources.xml  | 2 +-
 b2g/config/emulator/sources.xml     | 2 +-
 b2g/config/flame-kk/sources.xml     | 2 +-
 b2g/config/flame/sources.xml        | 2 +-
 b2g/config/hamachi/sources.xml      | 2 +-
 b2g/config/helix/sources.xml        | 2 +-
 b2g/config/nexus-4/sources.xml      | 2 +-
 b2g/config/wasabi/sources.xml       | 2 +-
 11 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/b2g/config/dolphin/sources.xml b/b2g/config/dolphin/sources.xml
index cd89bee3dac..86385159fe8 100644
--- a/b2g/config/dolphin/sources.xml
+++ b/b2g/config/dolphin/sources.xml
@@ -15,7 +15,7 @@
   
     
   
-  
+  
   
   
   
diff --git a/b2g/config/emulator-ics/sources.xml b/b2g/config/emulator-ics/sources.xml
index 281a6ebe807..49f523d5f93 100644
--- a/b2g/config/emulator-ics/sources.xml
+++ b/b2g/config/emulator-ics/sources.xml
@@ -19,7 +19,7 @@
     
   
   
-  
+  
   
   
   
diff --git a/b2g/config/emulator-jb/sources.xml b/b2g/config/emulator-jb/sources.xml
index ba1f92170f7..d97c1b58e79 100644
--- a/b2g/config/emulator-jb/sources.xml
+++ b/b2g/config/emulator-jb/sources.xml
@@ -17,7 +17,7 @@
   
   
   
-  
+  
   
   
   
diff --git a/b2g/config/emulator-kk/sources.xml b/b2g/config/emulator-kk/sources.xml
index 7b3fc4f805a..64aef9543ca 100644
--- a/b2g/config/emulator-kk/sources.xml
+++ b/b2g/config/emulator-kk/sources.xml
@@ -15,7 +15,7 @@
   
     
   
-  
+  
   
   
   
diff --git a/b2g/config/emulator/sources.xml b/b2g/config/emulator/sources.xml
index 281a6ebe807..49f523d5f93 100644
--- a/b2g/config/emulator/sources.xml
+++ b/b2g/config/emulator/sources.xml
@@ -19,7 +19,7 @@
     
   
   
-  
+  
   
   
   
diff --git a/b2g/config/flame-kk/sources.xml b/b2g/config/flame-kk/sources.xml
index bb0f38117c1..5ab4d6c9de5 100644
--- a/b2g/config/flame-kk/sources.xml
+++ b/b2g/config/flame-kk/sources.xml
@@ -15,7 +15,7 @@
   
     
   
-  
+  
   
   
   
diff --git a/b2g/config/flame/sources.xml b/b2g/config/flame/sources.xml
index af0a5c8e2c8..d4b4260a97b 100644
--- a/b2g/config/flame/sources.xml
+++ b/b2g/config/flame/sources.xml
@@ -17,7 +17,7 @@
   
   
   
-  
+  
   
   
   
diff --git a/b2g/config/hamachi/sources.xml b/b2g/config/hamachi/sources.xml
index 72cbe7fa07a..fd9a98cce7f 100644
--- a/b2g/config/hamachi/sources.xml
+++ b/b2g/config/hamachi/sources.xml
@@ -17,7 +17,7 @@
     
   
   
-  
+  
   
   
   
diff --git a/b2g/config/helix/sources.xml b/b2g/config/helix/sources.xml
index 0910b079515..3c6acddd928 100644
--- a/b2g/config/helix/sources.xml
+++ b/b2g/config/helix/sources.xml
@@ -15,7 +15,7 @@
     
   
   
-  
+  
   
   
   
diff --git a/b2g/config/nexus-4/sources.xml b/b2g/config/nexus-4/sources.xml
index 45807ef9325..30d5e384c8a 100644
--- a/b2g/config/nexus-4/sources.xml
+++ b/b2g/config/nexus-4/sources.xml
@@ -17,7 +17,7 @@
   
   
   
-  
+  
   
   
   
diff --git a/b2g/config/wasabi/sources.xml b/b2g/config/wasabi/sources.xml
index 5b4d90332e4..f6fb7362893 100644
--- a/b2g/config/wasabi/sources.xml
+++ b/b2g/config/wasabi/sources.xml
@@ -17,7 +17,7 @@
     
   
   
-  
+  
   
   
   

From 677c5c8d14cb19b43de2a3c28cbb01b5de20a90d Mon Sep 17 00:00:00 2001
From: George Wright 
Date: Wed, 10 Sep 2014 16:15:43 -0400
Subject: [PATCH 045/146] Bug 1042291 - Implement a better heuristic for when
 to use HW accelerated  r=snorp

---
 dom/canvas/CanvasRenderingContext2D.cpp | 21 +++++++
 dom/canvas/CanvasRenderingContext2D.h   | 74 +++++++++++++++++++++++++
 2 files changed, 95 insertions(+)

diff --git a/dom/canvas/CanvasRenderingContext2D.cpp b/dom/canvas/CanvasRenderingContext2D.cpp
index ab3ada588fc..51c4fd54a98 100644
--- a/dom/canvas/CanvasRenderingContext2D.cpp
+++ b/dom/canvas/CanvasRenderingContext2D.cpp
@@ -724,6 +724,9 @@ public:
       static_cast(aData);
     if (self->mContext) {
       self->mContext->MarkContextClean();
+      if (self->mContext->mDrawObserver) {
+        self->mContext->mDrawObserver->FrameEnd();
+      }
     }
   }
   bool IsForContext(CanvasRenderingContext2D *aContext)
@@ -826,6 +829,7 @@ CanvasRenderingContext2D::CanvasRenderingContext2D()
   , mResetLayer(true)
   , mIPC(false)
   , mStream(nullptr)
+  , mDrawObserver(nullptr)
   , mIsEntireFrameInvalid(false)
   , mPredictManyRedrawCalls(false), mPathTransformWillUpdate(false)
   , mInvalidateCount(0)
@@ -838,6 +842,7 @@ CanvasRenderingContext2D::CanvasRenderingContext2D()
     mRenderingMode = RenderingMode::SoftwareBackendMode;
   }
 
+  mDrawObserver = new CanvasDrawObserver(this);
 }
 
 CanvasRenderingContext2D::~CanvasRenderingContext2D()
@@ -3816,6 +3821,10 @@ CanvasRenderingContext2D::DrawImage(const HTMLImageOrCanvasOrVideoElement& image
                                     uint8_t optional_argc,
                                     ErrorResult& error)
 {
+  if (mDrawObserver) {
+    mDrawObserver->DidDrawCall(CanvasDrawObserver::DrawCallType::DrawImage);
+  }
+
   MOZ_ASSERT(optional_argc == 0 || optional_argc == 2 || optional_argc == 6);
 
   RefPtr srcSurf;
@@ -4332,6 +4341,10 @@ CanvasRenderingContext2D::GetImageData(JSContext* aCx, double aSx,
                                        double aSy, double aSw,
                                        double aSh, ErrorResult& error)
 {
+  if (mDrawObserver) {
+    mDrawObserver->DidDrawCall(CanvasDrawObserver::DrawCallType::GetImageData);
+  }
+
   EnsureTarget();
   if (!IsTargetValid()) {
     error.Throw(NS_ERROR_FAILURE);
@@ -4412,6 +4425,10 @@ CanvasRenderingContext2D::GetImageDataArray(JSContext* aCx,
                                             uint32_t aHeight,
                                             JSObject** aRetval)
 {
+  if (mDrawObserver) {
+    mDrawObserver->DidDrawCall(CanvasDrawObserver::DrawCallType::GetImageData);
+  }
+
   MOZ_ASSERT(aWidth && aHeight);
 
   CheckedInt len = CheckedInt(aWidth) * aHeight * 4;
@@ -4585,6 +4602,10 @@ CanvasRenderingContext2D::PutImageData_explicit(int32_t x, int32_t y, uint32_t w
                                                 bool hasDirtyRect, int32_t dirtyX, int32_t dirtyY,
                                                 int32_t dirtyWidth, int32_t dirtyHeight)
 {
+  if (mDrawObserver) {
+    mDrawObserver->DidDrawCall(CanvasDrawObserver::DrawCallType::PutImageData);
+  }
+
   if (w == 0 || h == 0) {
     return NS_ERROR_DOM_INVALID_STATE_ERR;
   }
diff --git a/dom/canvas/CanvasRenderingContext2D.h b/dom/canvas/CanvasRenderingContext2D.h
index 99ddab8e9c0..2826e4deb81 100644
--- a/dom/canvas/CanvasRenderingContext2D.h
+++ b/dom/canvas/CanvasRenderingContext2D.h
@@ -21,6 +21,7 @@
 #include "mozilla/dom/CanvasPattern.h"
 #include "mozilla/gfx/Rect.h"
 #include "mozilla/gfx/2D.h"
+#include "mozilla/TimeStamp.h"
 #include "gfx2DGlue.h"
 #include "imgIEncoder.h"
 #include "nsLayoutUtils.h"
@@ -115,6 +116,7 @@ private:
 
 struct CanvasBidiProcessor;
 class CanvasRenderingContext2DUserData;
+class CanvasDrawObserver;
 
 /**
  ** CanvasRenderingContext2D
@@ -767,6 +769,11 @@ protected:
 
   RefPtr mStream;
 
+  // This observes our draw calls at the beginning of the canvas
+  // lifetime and switches to software or GPU mode depending on
+  // what it thinks is best
+  CanvasDrawObserver *mDrawObserver;
+
   /**
     * Flag to avoid duplicate calls to InvalidateFrame. Set to true whenever
     * Redraw is called, reset to false when Render is called.
@@ -1081,6 +1088,73 @@ protected:
   }
 
   friend struct CanvasBidiProcessor;
+  friend class CanvasDrawObserver;
+};
+
+class CanvasDrawObserver
+{
+public:
+  CanvasDrawObserver(CanvasRenderingContext2D* aCanvasContext)
+    : mCanvasContext(aCanvasContext)
+    , mDisabled(false)
+    , mSoftwarePreferredCalls(0)
+    , mGPUPreferredCalls(0)
+    , mFramesRendered(0)
+    , mCreationTime(TimeStamp::NowLoRes())
+  {}
+
+  // Only enumerate draw calls that could affect the heuristic
+  enum DrawCallType {
+    PutImageData,
+    GetImageData,
+    DrawImage
+  };
+
+  void DidDrawCall(DrawCallType aType) {
+    if (mDisabled) {
+      return;
+    }
+
+    switch (aType) {
+      case PutImageData:
+      case GetImageData:
+        mSoftwarePreferredCalls++;
+        break;
+      case DrawImage:
+        mGPUPreferredCalls++;
+        break;
+    }
+  }
+
+  void FrameEnd() {
+    if (mDisabled) {
+      return;
+    }
+
+    mFramesRendered++;
+
+    TimeDuration timeElapsed = TimeStamp::NowLoRes() - mCreationTime;
+
+    // We log the first 30 frames of any canvas object then make a
+    // call to determine whether it should be GPU or CPU backed
+    if (mFramesRendered >= 30 || timeElapsed.ToSeconds() >= 5.0) {
+      if (mGPUPreferredCalls >= mSoftwarePreferredCalls) {
+        mCanvasContext->SwitchRenderingMode(CanvasRenderingContext2D::RenderingMode::OpenGLBackendMode);
+      } else {
+        mCanvasContext->SwitchRenderingMode(CanvasRenderingContext2D::RenderingMode::SoftwareBackendMode);
+      }
+
+      mDisabled = true;
+    }
+  }
+
+private:
+  CanvasRenderingContext2D* mCanvasContext;
+  bool mDisabled;
+  unsigned int mSoftwarePreferredCalls;
+  unsigned int mGPUPreferredCalls;
+  unsigned int mFramesRendered;
+  TimeStamp mCreationTime;
 };
 
 MOZ_FINISH_NESTED_ENUM_CLASS(CanvasRenderingContext2D::CanvasMultiGetterType)

From 4dbfc050822620f91242bef1e589864d6cd9cac2 Mon Sep 17 00:00:00 2001
From: "L. David Baron" 
Date: Thu, 2 Oct 2014 21:53:22 -0700
Subject: [PATCH 046/146] Bug 1074634 - Don't overwrite
 eRestyleResult_ContinueAndForceDescendants with eRestyleResult_Continue. 
 r=heycam

---
 layout/base/RestyleManager.cpp | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/layout/base/RestyleManager.cpp b/layout/base/RestyleManager.cpp
index 74b53b25281..39ebdca0c33 100644
--- a/layout/base/RestyleManager.cpp
+++ b/layout/base/RestyleManager.cpp
@@ -2746,9 +2746,7 @@ ElementRestyler::RestyleSelf(nsIFrame* aSelf,
 
   RestyleResult result;
 
-  if (aRestyleHint & eRestyle_ForceDescendants) {
-    result = eRestyleResult_ContinueAndForceDescendants;
-  } else if (aRestyleHint) {
+  if (aRestyleHint) {
     result = eRestyleResult_Continue;
   } else {
     result = ComputeRestyleResultFromFrame(aSelf);
@@ -3158,6 +3156,10 @@ ElementRestyler::RestyleSelf(nsIFrame* aSelf,
     }
   }
 
+  if (aRestyleHint & eRestyle_ForceDescendants) {
+    result = eRestyleResult_ContinueAndForceDescendants;
+  }
+
   LOG_RESTYLE("returning %s", RestyleResultToString(result).get());
 
   return result;

From b5edb04125199d3edd41654c22db5e52304f7990 Mon Sep 17 00:00:00 2001
From: "L. David Baron" 
Date: Thu, 2 Oct 2014 21:53:22 -0700
Subject: [PATCH 047/146] Bug 1075082 patch 1 - Add a helper function for the
 root of the rule tree.  r=birtles

---
 layout/style/nsRuleNode.cpp | 10 ++++++++++
 layout/style/nsRuleNode.h   |  6 ++++++
 layout/style/nsStyleSet.cpp |  7 ++-----
 3 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/layout/style/nsRuleNode.cpp b/layout/style/nsRuleNode.cpp
index dab8e883aea..de8540690dc 100644
--- a/layout/style/nsRuleNode.cpp
+++ b/layout/style/nsRuleNode.cpp
@@ -1556,6 +1556,16 @@ nsRuleNode::Transition(nsIStyleRule* aRule, uint8_t aLevel,
   return next;
 }
 
+nsRuleNode*
+nsRuleNode::RuleTree()
+{
+  nsRuleNode* n = this;
+  while (n->mParent) {
+    n = n->mParent;
+  }
+  return n;
+}
+
 void nsRuleNode::SetUsedDirectly()
 {
   mDependentBits |= NS_RULE_NODE_USED_DIRECTLY;
diff --git a/layout/style/nsRuleNode.h b/layout/style/nsRuleNode.h
index 3529e1be6ce..7ffa327f892 100644
--- a/layout/style/nsRuleNode.h
+++ b/layout/style/nsRuleNode.h
@@ -689,6 +689,12 @@ public:
   nsRuleNode* GetParent() const { return mParent; }
   bool IsRoot() const { return mParent == nullptr; }
 
+  // Return the root of the rule tree that this rule node is in.
+  nsRuleNode* RuleTree();
+  const nsRuleNode* RuleTree() const {
+    return const_cast(this)->RuleTree();
+  }
+
   // These uint8_ts are really nsStyleSet::sheetType values.
   uint8_t GetLevel() const {
     NS_ASSERTION(!IsRoot(), "can't call on root");
diff --git a/layout/style/nsStyleSet.cpp b/layout/style/nsStyleSet.cpp
index b3403162a01..05e34267430 100644
--- a/layout/style/nsStyleSet.cpp
+++ b/layout/style/nsStyleSet.cpp
@@ -233,10 +233,6 @@ nsStyleSet::EndReconstruct()
   mInReconstruct = false;
 #ifdef DEBUG
   for (int32_t i = mRoots.Length() - 1; i >= 0; --i) {
-    nsRuleNode *n = mRoots[i]->RuleNode();
-    while (n->GetParent()) {
-      n = n->GetParent();
-    }
     // Since nsStyleContext's mParent and mRuleNode are immutable, and
     // style contexts own their parents, and nsStyleContext asserts in
     // its constructor that the style context and its parent are in the
@@ -244,7 +240,8 @@ nsStyleSet::EndReconstruct()
     // mRoots; we only need to check the rule nodes of mRoots
     // themselves.
 
-    NS_ASSERTION(n == mRuleTree, "style context has old rule node");
+    NS_ASSERTION(mRoots[i]->RuleNode()->RuleTree() == mRuleTree,
+                 "style context has old rule node");
   }
 #endif
   // This *should* destroy the only element of mOldRuleTrees, but in

From 5e24ca3286e79b143976112cdf8e68c4267eb3dd Mon Sep 17 00:00:00 2001
From: "L. David Baron" 
Date: Thu, 2 Oct 2014 21:53:23 -0700
Subject: [PATCH 048/146] Bug 1075082 patch 2 - Add assertion in destructor of
 style contexts that should have been destroyed earlier.  r=birtles

---
 layout/style/nsStyleContext.cpp | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/layout/style/nsStyleContext.cpp b/layout/style/nsStyleContext.cpp
index 8ea22e8216a..5cd3df470de 100644
--- a/layout/style/nsStyleContext.cpp
+++ b/layout/style/nsStyleContext.cpp
@@ -84,6 +84,11 @@ nsStyleContext::~nsStyleContext()
   NS_ASSERTION((nullptr == mChild) && (nullptr == mEmptyChild), "destructing context with children");
 
   nsPresContext *presContext = mRuleNode->PresContext();
+  nsStyleSet* styleSet = presContext->PresShell()->StyleSet();
+
+  NS_ASSERTION(styleSet->GetRuleTree() == mRuleNode->RuleTree() ||
+               styleSet->IsInRuleTreeReconstruct(),
+               "destroying style context from old rule tree too late");
 
 #ifdef DEBUG
 #if 0
@@ -105,8 +110,7 @@ nsStyleContext::~nsStyleContext()
 
   mRuleNode->Release();
 
-  presContext->PresShell()->StyleSet()->
-    NotifyStyleContextDestroyed(presContext, this);
+  styleSet->NotifyStyleContextDestroyed(presContext, this);
 
   if (mParent) {
     mParent->RemoveChild(this);

From a84c0301dc2a5552e92c2da7bde9bc4a079a27f4 Mon Sep 17 00:00:00 2001
From: "L. David Baron" 
Date: Thu, 2 Oct 2014 21:53:23 -0700
Subject: [PATCH 049/146] Bug 1075137 patch 1 - Convert one use of
 IsProcessingRestyles that doesn't follow normal pattern to a debug-only
 member on the restyle manager.  r=birtles

---
 layout/base/RestyleManager.cpp | 18 +++++++++++++++++-
 layout/base/RestyleManager.h   |  4 ++++
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/layout/base/RestyleManager.cpp b/layout/base/RestyleManager.cpp
index 39ebdca0c33..aaae80124d3 100644
--- a/layout/base/RestyleManager.cpp
+++ b/layout/base/RestyleManager.cpp
@@ -74,6 +74,9 @@ RestyleManager::RestyleManager(nsPresContext* aPresContext)
                      ELEMENT_IS_POTENTIAL_RESTYLE_ROOT)
   , mPendingAnimationRestyles(ELEMENT_HAS_PENDING_ANIMATION_RESTYLE |
                               ELEMENT_IS_POTENTIAL_ANIMATION_RESTYLE_ROOT)
+#ifdef DEBUG
+  , mIsProcessingRestyles(false)
+#endif
 #ifdef RESTYLE_LOGGING
   , mLoggingDepth(0)
 #endif
@@ -1430,6 +1433,10 @@ RestyleManager::RebuildAllStyleData(nsChangeHint aExtraHint)
 
   nsAutoScriptBlocker scriptBlocker;
 
+  MOZ_ASSERT(!mIsProcessingRestyles, "Nesting calls to processing restyles");
+#ifdef DEBUG
+  mIsProcessingRestyles = true;
+#endif
   mPresContext->SetProcessingRestyles(true);
 
   // FIXME (bug 1047928): Many of the callers probably don't need
@@ -1442,6 +1449,9 @@ RestyleManager::RebuildAllStyleData(nsChangeHint aExtraHint)
                         nsRestyleHint(eRestyle_Subtree |
                                       eRestyle_ForceDescendants));
 
+#ifdef DEBUG
+  mIsProcessingRestyles = false;
+#endif
   mPresContext->SetProcessingRestyles(false);
 
   // Make sure that we process any pending animation restyles from the
@@ -1511,8 +1521,11 @@ RestyleManager::ProcessPendingRestyles()
   mPresContext->FrameConstructor()->CreateNeededFrames();
 
   // Process non-animation restyles...
-  NS_ABORT_IF_FALSE(!mPresContext->IsProcessingRestyles(),
+  NS_ABORT_IF_FALSE(!mIsProcessingRestyles,
                     "Nesting calls to ProcessPendingRestyles?");
+#ifdef DEBUG
+  mIsProcessingRestyles = true;
+#endif
   mPresContext->SetProcessingRestyles(true);
 
   // Before we process any restyles, we need to ensure that style
@@ -1545,6 +1558,9 @@ RestyleManager::ProcessPendingRestyles()
   mPresContext->SetProcessingAnimationStyleChange(false);
 
   mPresContext->SetProcessingRestyles(false);
+#ifdef DEBUG
+  mIsProcessingRestyles = false;
+#endif
   NS_POSTCONDITION(mPendingRestyles.Count() == oldPendingRestyleCount,
                    "We should not have posted new non-animation restyles while "
                    "processing animation restyles");
diff --git a/layout/base/RestyleManager.h b/layout/base/RestyleManager.h
index 9e2380c71b0..a5610023880 100644
--- a/layout/base/RestyleManager.h
+++ b/layout/base/RestyleManager.h
@@ -443,6 +443,10 @@ private:
   RestyleTracker mPendingRestyles;
   RestyleTracker mPendingAnimationRestyles;
 
+#ifdef DEBUG
+  bool mIsProcessingRestyles;
+#endif
+
 #ifdef RESTYLE_LOGGING
   int32_t mLoggingDepth;
 #endif

From ab6f8fb56c2bd2287b12b05746074a9a3af5017c Mon Sep 17 00:00:00 2001
From: "L. David Baron" 
Date: Thu, 2 Oct 2014 21:53:23 -0700
Subject: [PATCH 050/146] Bug 1075137 patch 2 - Add new booleans for whether to
 skip animation styles and whether to post animation restyles.  r=birtles

---
 layout/base/RestyleManager.cpp | 21 +++++++++++++++++++++
 layout/base/RestyleManager.h   | 23 +++++++++++++++++++++++
 2 files changed, 44 insertions(+)

diff --git a/layout/base/RestyleManager.cpp b/layout/base/RestyleManager.cpp
index aaae80124d3..a736aacccfb 100644
--- a/layout/base/RestyleManager.cpp
+++ b/layout/base/RestyleManager.cpp
@@ -64,6 +64,8 @@ RestyleManager::RestyleManager(nsPresContext* aPresContext)
   , mRebuildAllStyleData(false)
   , mObservingRefreshDriver(false)
   , mInStyleRefresh(false)
+  , mSkipAnimationRules(false)
+  , mPostAnimationRestyles(false)
   , mHoverGeneration(0)
   , mRebuildAllExtraHint(nsChangeHint(0))
   , mLastUpdateForThrottledAnimations(aPresContext->RefreshDriver()->
@@ -1439,6 +1441,13 @@ RestyleManager::RebuildAllStyleData(nsChangeHint aExtraHint)
 #endif
   mPresContext->SetProcessingRestyles(true);
 
+  // Until we get rid of these phases in bug 960465, we need to skip
+  // animation restyles during the non-animation phase, and post
+  // animation restyles so that we restyle those elements again in the
+  // animation phase.
+  mSkipAnimationRules = true;
+  mPostAnimationRestyles = true;
+
   // FIXME (bug 1047928): Many of the callers probably don't need
   // eRestyle_Subtree because they're changing things that affect data
   // computation rather than selector matching; we could have a restyle
@@ -1449,6 +1458,8 @@ RestyleManager::RebuildAllStyleData(nsChangeHint aExtraHint)
                         nsRestyleHint(eRestyle_Subtree |
                                       eRestyle_ForceDescendants));
 
+  mPostAnimationRestyles = false;
+  mSkipAnimationRules = false;
 #ifdef DEBUG
   mIsProcessingRestyles = false;
 #endif
@@ -1539,8 +1550,18 @@ RestyleManager::ProcessPendingRestyles()
     UpdateOnlyAnimationStyles();
   }
 
+  // Until we get rid of these phases in bug 960465, we need to skip
+  // animation restyles during the non-animation phase, and post
+  // animation restyles so that we restyle those elements again in the
+  // animation phase.
+  mSkipAnimationRules = true;
+  mPostAnimationRestyles = true;
+
   mPendingRestyles.ProcessRestyles();
 
+  mPostAnimationRestyles = false;
+  mSkipAnimationRules = false;
+
 #ifdef DEBUG
   uint32_t oldPendingRestyleCount = mPendingRestyles.Count();
 #endif
diff --git a/layout/base/RestyleManager.h b/layout/base/RestyleManager.h
index a5610023880..6acc770aa2a 100644
--- a/layout/base/RestyleManager.h
+++ b/layout/base/RestyleManager.h
@@ -92,6 +92,22 @@ public:
   // track whether off-main-thread animations are up-to-date.
   uint64_t GetAnimationGeneration() const { return mAnimationGeneration; }
 
+  // Whether rule matching should skip styles associated with animation
+  bool SkipAnimationRules() const {
+    MOZ_ASSERT(mSkipAnimationRules || !mPostAnimationRestyles,
+               "inconsistent state");
+    return mSkipAnimationRules;
+  }
+
+  // Whether rule matching should post animation restyles when it skips
+  // styles associated with animation.  Only true when
+  // SkipAnimationRules() is also true.
+  bool PostAnimationRestyles() const {
+    MOZ_ASSERT(mSkipAnimationRules || !mPostAnimationRestyles,
+               "inconsistent state");
+    return mPostAnimationRestyles;
+  }
+
   /**
    * Reparent the style contexts of this frame subtree.  The parent frame of
    * aFrame must be changed to the new parent before this function is called;
@@ -427,6 +443,13 @@ private:
   bool mObservingRefreshDriver : 1;
   // True if we're in the middle of a nsRefreshDriver refresh
   bool mInStyleRefresh : 1;
+  // Whether rule matching should skip styles associated with animation
+  bool mSkipAnimationRules : 1;
+  // Whether rule matching should post animation restyles when it skips
+  // styles associated with animation.  Only true when
+  // mSkipAnimationRules is also true.
+  bool mPostAnimationRestyles : 1;
+
   uint32_t mHoverGeneration;
   nsChangeHint mRebuildAllExtraHint;
 

From 288801f4fa2827dbe070ded41402ba11a35c8fa3 Mon Sep 17 00:00:00 2001
From: "L. David Baron" 
Date: Thu, 2 Oct 2014 21:53:23 -0700
Subject: [PATCH 051/146] Bug 1075137 patch 3 - Add PresContext() method to
 nsRuleWalker.  r=birtles

---
 layout/style/nsRuleWalker.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/layout/style/nsRuleWalker.h b/layout/style/nsRuleWalker.h
index a9480a91efe..f77ee89161e 100644
--- a/layout/style/nsRuleWalker.h
+++ b/layout/style/nsRuleWalker.h
@@ -23,6 +23,8 @@ public:
     mCurrent = aNode;
   }
 
+  nsPresContext* PresContext() const { return mRoot->PresContext(); }
+
 protected:
   void DoForward(nsIStyleRule* aRule) {
     mCurrent = mCurrent->Transition(aRule, mLevel, mImportance);

From 14a1bf93dc5389391ad431f69f7ba45483806a1e Mon Sep 17 00:00:00 2001
From: "L. David Baron" 
Date: Thu, 2 Oct 2014 21:53:24 -0700
Subject: [PATCH 052/146] Bug 1075137 patch 4 - Convert callers from
 IsProcessingRestyles/IsProcessingAnimationStyleChange to
 SkipAnimationRules/PostAnimationRestyles.  r=birtles

---
 content/svg/content/src/moz.build        |  1 +
 content/svg/content/src/nsSVGElement.cpp | 24 +++++++++--------
 layout/style/nsAnimationManager.cpp      |  6 ++---
 layout/style/nsHTMLCSSStyleSheet.cpp     | 11 +++++---
 layout/style/nsStyleSet.cpp              | 33 +++++++++++++++---------
 layout/style/nsTransitionManager.cpp     | 12 +++++----
 6 files changed, 52 insertions(+), 35 deletions(-)

diff --git a/content/svg/content/src/moz.build b/content/svg/content/src/moz.build
index 7fdb2c4faeb..ef69053ae00 100644
--- a/content/svg/content/src/moz.build
+++ b/content/svg/content/src/moz.build
@@ -259,6 +259,7 @@ LOCAL_INCLUDES += [
     '/dom/smil',
     '/dom/xbl',
     '/dom/xml',
+    '/layout/base',
     '/layout/generic',
     '/layout/style',
     '/layout/svg',
diff --git a/content/svg/content/src/nsSVGElement.cpp b/content/svg/content/src/nsSVGElement.cpp
index bce1c4cbd5d..66343af629e 100644
--- a/content/svg/content/src/nsSVGElement.cpp
+++ b/content/svg/content/src/nsSVGElement.cpp
@@ -51,6 +51,7 @@
 #include "nsSMILAnimationController.h"
 #include "mozilla/dom/SVGElementBinding.h"
 #include "mozilla/unused.h"
+#include "RestyleManager.h"
 
 using namespace mozilla;
 using namespace mozilla::dom;
@@ -916,17 +917,18 @@ nsSVGElement::WalkAnimatedContentStyleRules(nsRuleWalker* aRuleWalker)
   // whether this is a "no-animation restyle". (This should match the check
   // in nsHTMLCSSStyleSheet::RulesMatching(), where we determine whether to
   // apply the SMILOverrideStyle.)
-  nsIDocument* doc = OwnerDoc();
-  nsIPresShell* shell = doc->GetShell();
-  nsPresContext* context = shell ? shell->GetPresContext() : nullptr;
-  if (context && context->IsProcessingRestyles() &&
-      !context->IsProcessingAnimationStyleChange()) {
-    // Any style changes right now could trigger CSS Transitions. We don't
-    // want that to happen from SMIL-animated value of mapped attrs, so
-    // ignore animated value for now, and request an animation restyle to
-    // get our animated value noticed.
-    shell->RestyleForAnimation(this,
-      eRestyle_SVGAttrAnimations | eRestyle_ChangeAnimationPhase);
+  nsPresContext* context = aRuleWalker->PresContext();
+  nsIPresShell* shell = context->PresShell();
+  RestyleManager* restyleManager = context->RestyleManager();
+  if (restyleManager->SkipAnimationRules()) {
+    if (restyleManager->PostAnimationRestyles()) {
+      // Any style changes right now could trigger CSS Transitions. We don't
+      // want that to happen from SMIL-animated value of mapped attrs, so
+      // ignore animated value for now, and request an animation restyle to
+      // get our animated value noticed.
+      shell->RestyleForAnimation(this,
+        eRestyle_SVGAttrAnimations | eRestyle_ChangeAnimationPhase);
+    }
   } else {
     // Ok, this is an animation restyle -- go ahead and update/walk the
     // animated content style rule.
diff --git a/layout/style/nsAnimationManager.cpp b/layout/style/nsAnimationManager.cpp
index 8b660a65782..d856107bd72 100644
--- a/layout/style/nsAnimationManager.cpp
+++ b/layout/style/nsAnimationManager.cpp
@@ -667,12 +667,12 @@ nsAnimationManager::GetAnimationRule(mozilla::dom::Element* aElement,
     return nullptr;
   }
 
-  if (mPresContext->IsProcessingRestyles() &&
-      !mPresContext->IsProcessingAnimationStyleChange()) {
+  RestyleManager* restyleManager = mPresContext->RestyleManager();
+  if (restyleManager->SkipAnimationRules()) {
     // During the non-animation part of processing restyles, we don't
     // add the animation rule.
 
-    if (collection->mStyleRule) {
+    if (collection->mStyleRule && restyleManager->PostAnimationRestyles()) {
       collection->PostRestyleForAnimation(mPresContext);
     }
 
diff --git a/layout/style/nsHTMLCSSStyleSheet.cpp b/layout/style/nsHTMLCSSStyleSheet.cpp
index bcb60f73e24..8a08d7ebbd2 100644
--- a/layout/style/nsHTMLCSSStyleSheet.cpp
+++ b/layout/style/nsHTMLCSSStyleSheet.cpp
@@ -17,6 +17,7 @@
 #include "mozilla/dom/Element.h"
 #include "nsAttrValue.h"
 #include "nsAttrValueInlines.h"
+#include "RestyleManager.h"
 
 using namespace mozilla;
 using namespace mozilla::dom;
@@ -72,13 +73,15 @@ nsHTMLCSSStyleSheet::ElementRulesMatching(nsPresContext* aPresContext,
 
   rule = aElement->GetSMILOverrideStyleRule();
   if (rule) {
-    if (aPresContext->IsProcessingRestyles() &&
-        !aPresContext->IsProcessingAnimationStyleChange()) {
+    RestyleManager* restyleManager = aPresContext->RestyleManager();
+    if (restyleManager->SkipAnimationRules()) {
       // Non-animation restyle -- don't process SMIL override style, because we
       // don't want SMIL animation to trigger new CSS transitions. Instead,
       // request an Animation restyle, so we still get noticed.
-      aPresContext->PresShell()->RestyleForAnimation(aElement,
-        eRestyle_StyleAttribute | eRestyle_ChangeAnimationPhase);
+      if (restyleManager->PostAnimationRestyles()) {
+        aPresContext->PresShell()->RestyleForAnimation(aElement,
+          eRestyle_StyleAttribute | eRestyle_ChangeAnimationPhase);
+      }
     } else {
       // Animation restyle (or non-restyle traversal of rules)
       // Now we can walk SMIL overrride style, without triggering transitions.
diff --git a/layout/style/nsStyleSet.cpp b/layout/style/nsStyleSet.cpp
index 05e34267430..2bb27c50bfa 100644
--- a/layout/style/nsStyleSet.cpp
+++ b/layout/style/nsStyleSet.cpp
@@ -37,6 +37,7 @@
 #include "nsCSSRules.h"
 #include "nsPrintfCString.h"
 #include "nsIFrame.h"
+#include "RestyleManager.h"
 
 using namespace mozilla;
 using namespace mozilla::dom;
@@ -1367,6 +1368,7 @@ nsStyleSet::RuleNodeWithReplacement(Element* aElement,
                                     uint32_t(aReplacements)).get());
 
   bool skipAnimationRules = false;
+  bool postAnimationRestyles = false;
 
   // If we're changing animation phase, we have to reconsider what rules
   // are in these four levels.
@@ -1376,9 +1378,9 @@ nsStyleSet::RuleNodeWithReplacement(Element* aElement,
                      eRestyle_SVGAttrAnimations |
                      eRestyle_StyleAttribute;
 
-    nsPresContext* presContext = PresContext();
-    skipAnimationRules = presContext->IsProcessingRestyles() &&
-                         !presContext->IsProcessingAnimationStyleChange();
+    RestyleManager* restyleManager = PresContext()->RestyleManager();
+    skipAnimationRules = restyleManager->SkipAnimationRules();
+    postAnimationRestyles = restyleManager->PostAnimationRestyles();
   }
 
   // FIXME (perf): This should probably not rebuild the whole path, but
@@ -1430,7 +1432,9 @@ nsStyleSet::RuleNodeWithReplacement(Element* aElement,
 
           if (collection) {
             if (skipAnimationRules) {
-              collection->PostRestyleForAnimation(presContext);
+              if (postAnimationRestyles) {
+                collection->PostRestyleForAnimation(presContext);
+              }
             } else {
               animationManager->UpdateStyleAndEvents(
                 collection, PresContext()->RefreshDriver()->MostRecentRefresh(),
@@ -1453,7 +1457,9 @@ nsStyleSet::RuleNodeWithReplacement(Element* aElement,
 
           if (collection) {
             if (skipAnimationRules) {
-              collection->PostRestyleForAnimation(presContext);
+              if (postAnimationRestyles) {
+                collection->PostRestyleForAnimation(presContext);
+              }
             } else {
               collection->EnsureStyleRuleFor(
                 presContext->RefreshDriver()->MostRecentRefresh(),
@@ -2080,7 +2086,8 @@ nsStyleSet::GCRuleTrees()
  * rules removed, and post a restyle if needed.
  */
 static inline nsRuleNode*
-SkipAnimationRules(nsRuleNode* aRuleNode, Element* aElementOrPseudoElement)
+SkipAnimationRules(nsRuleNode* aRuleNode, Element* aElementOrPseudoElement,
+                   bool aPostAnimationRestyles)
 {
   nsRuleNode* ruleNode = aRuleNode;
   // The transition rule must be at the top of the cascade.
@@ -2099,7 +2106,7 @@ SkipAnimationRules(nsRuleNode* aRuleNode, Element* aElementOrPseudoElement)
     ruleNode = ReplaceAnimationRule(ruleNode, animationRule, nullptr);
   }
 
-  if (ruleNode != aRuleNode) {
+  if (ruleNode != aRuleNode && aPostAnimationRestyles) {
     NS_ASSERTION(aElementOrPseudoElement,
                  "How can we have transition rules but no element?");
     // Need to do an animation restyle, just like
@@ -2135,14 +2142,15 @@ nsStyleSet::ReparentStyleContext(nsStyleContext* aStyleContext,
 
   // Skip transition rules as needed just like
   // nsTransitionManager::WalkTransitionRule would.
-  bool skipAnimationRules = PresContext()->IsProcessingRestyles() &&
-    !PresContext()->IsProcessingAnimationStyleChange();
+  RestyleManager* restyleManager = PresContext()->RestyleManager();
+  bool skipAnimationRules = restyleManager->SkipAnimationRules();
+  bool postAnimationRestyles = restyleManager->PostAnimationRestyles();
   if (skipAnimationRules) {
     // Make sure that we're not using transition rules or animation rules for
     // our new style context.  If we need them, an animation restyle will
     // provide.
-    ruleNode =
-      SkipAnimationRules(ruleNode, aElementOrPseudoElement);
+    ruleNode = SkipAnimationRules(ruleNode, aElementOrPseudoElement,
+                                  postAnimationRestyles);
   }
 
   nsRuleNode* visitedRuleNode = nullptr;
@@ -2157,7 +2165,8 @@ nsStyleSet::ReparentStyleContext(nsStyleContext* aStyleContext,
      if (skipAnimationRules) {
       // FIXME do something here for animations?
        visitedRuleNode =
-         SkipAnimationRules(visitedRuleNode, aElementOrPseudoElement);
+         SkipAnimationRules(visitedRuleNode, aElementOrPseudoElement,
+                            postAnimationRestyles);
      }
   }
 
diff --git a/layout/style/nsTransitionManager.cpp b/layout/style/nsTransitionManager.cpp
index 5b38f0319ad..e3f2a29c2f1 100644
--- a/layout/style/nsTransitionManager.cpp
+++ b/layout/style/nsTransitionManager.cpp
@@ -640,16 +640,18 @@ nsTransitionManager::WalkTransitionRule(
     return;
   }
 
-  if (aData->mPresContext->IsProcessingRestyles() &&
-      !aData->mPresContext->IsProcessingAnimationStyleChange()) {
+  RestyleManager* restyleManager = aData->mPresContext->RestyleManager();
+  if (restyleManager->SkipAnimationRules()) {
     // If we're processing a normal style change rather than one from
     // animation, don't add the transition rule.  This allows us to
     // compute the new style value rather than having the transition
     // override it, so that we can start transitioning differently.
 
-    // We need to immediately restyle with animation
-    // after doing this.
-    collection->PostRestyleForAnimation(mPresContext);
+    if (restyleManager->PostAnimationRestyles()) {
+      // We need to immediately restyle with animation
+      // after doing this.
+      collection->PostRestyleForAnimation(mPresContext);
+    }
     return;
   }
 

From cd74ed0fc8a0de89edcf08b3643cd783b1ac2d22 Mon Sep 17 00:00:00 2001
From: "L. David Baron" 
Date: Thu, 2 Oct 2014 21:53:24 -0700
Subject: [PATCH 053/146] Bug 1075137 patch 5 - Add new boolean to
 RestyleManager for whether we're currently processing animation restyles. 
 r=birtles

This is just moving one bit of data from the pres context without any
logic change.  But given the other refactoring, it seems to make more
sense here now.
---
 layout/base/RestyleManager.cpp | 5 +++++
 layout/base/RestyleManager.h   | 9 +++++++++
 2 files changed, 14 insertions(+)

diff --git a/layout/base/RestyleManager.cpp b/layout/base/RestyleManager.cpp
index a736aacccfb..940ff5c1fb8 100644
--- a/layout/base/RestyleManager.cpp
+++ b/layout/base/RestyleManager.cpp
@@ -66,6 +66,7 @@ RestyleManager::RestyleManager(nsPresContext* aPresContext)
   , mInStyleRefresh(false)
   , mSkipAnimationRules(false)
   , mPostAnimationRestyles(false)
+  , mIsProcessingAnimationStyleChange(false)
   , mHoverGeneration(0)
   , mRebuildAllExtraHint(nsChangeHint(0))
   , mLastUpdateForThrottledAnimations(aPresContext->RefreshDriver()->
@@ -1575,7 +1576,11 @@ RestyleManager::ProcessPendingRestyles()
   // the running transition so it can check for a new change on the same
   // property, and then posts an immediate animation style change).
   mPresContext->SetProcessingAnimationStyleChange(true);
+  MOZ_ASSERT(!mIsProcessingAnimationStyleChange, "nesting forbidden");
+  mIsProcessingAnimationStyleChange = true;
   mPendingAnimationRestyles.ProcessRestyles();
+  MOZ_ASSERT(mIsProcessingAnimationStyleChange, "nesting forbidden");
+  mIsProcessingAnimationStyleChange = false;
   mPresContext->SetProcessingAnimationStyleChange(false);
 
   mPresContext->SetProcessingRestyles(false);
diff --git a/layout/base/RestyleManager.h b/layout/base/RestyleManager.h
index 6acc770aa2a..960fa4fc04c 100644
--- a/layout/base/RestyleManager.h
+++ b/layout/base/RestyleManager.h
@@ -108,6 +108,12 @@ public:
     return mPostAnimationRestyles;
   }
 
+  // Whether we're currently in the animation phase of restyle
+  // processing (to be eliminated in bug 960465)
+  bool IsProcessingAnimationStyleChange() const {
+    return mIsProcessingAnimationStyleChange;
+  }
+
   /**
    * Reparent the style contexts of this frame subtree.  The parent frame of
    * aFrame must be changed to the new parent before this function is called;
@@ -449,6 +455,9 @@ private:
   // styles associated with animation.  Only true when
   // mSkipAnimationRules is also true.
   bool mPostAnimationRestyles : 1;
+  // Whether we're currently in the animation phase of restyle
+  // processing (to be eliminated in bug 960465)
+  bool mIsProcessingAnimationStyleChange : 1;
 
   uint32_t mHoverGeneration;
   nsChangeHint mRebuildAllExtraHint;

From 25df7c71fc1cdc4320acba7033d382b27a404f9a Mon Sep 17 00:00:00 2001
From: "L. David Baron" 
Date: Thu, 2 Oct 2014 21:53:24 -0700
Subject: [PATCH 054/146] Bug 1075137 patch 6 - Convert remaining
 IsProcessingAnimationStyleChange calls to call the method on the
 RestyleManager.  r=birtles

---
 layout/base/RestyleManager.h         | 5 +++--
 layout/base/RestyleTracker.cpp       | 2 +-
 layout/style/nsAnimationManager.cpp  | 3 ++-
 layout/style/nsTransitionManager.cpp | 4 +++-
 4 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/layout/base/RestyleManager.h b/layout/base/RestyleManager.h
index 960fa4fc04c..a28dc7cd368 100644
--- a/layout/base/RestyleManager.h
+++ b/layout/base/RestyleManager.h
@@ -320,7 +320,7 @@ public:
   {
     if (mPresContext) {
       PostRestyleEventCommon(aElement, aRestyleHint, aMinChangeHint,
-                             mPresContext->IsProcessingAnimationStyleChange());
+                             IsProcessingAnimationStyleChange());
     }
   }
 
@@ -397,7 +397,8 @@ public:
    */
   static bool ShouldLogRestyle(nsPresContext* aPresContext) {
     return aPresContext->RestyleLoggingEnabled() &&
-           (!aPresContext->IsProcessingAnimationStyleChange() ||
+           (!aPresContext->RestyleManager()->
+               IsProcessingAnimationStyleChange() ||
             AnimationRestyleLoggingEnabled());
   }
 
diff --git a/layout/base/RestyleTracker.cpp b/layout/base/RestyleTracker.cpp
index 4dd86967c03..dea8b9123bf 100644
--- a/layout/base/RestyleTracker.cpp
+++ b/layout/base/RestyleTracker.cpp
@@ -195,7 +195,7 @@ RestyleTracker::DoProcessRestyles()
 
   LOG_RESTYLE("Processing %d pending %srestyles with %d restyle roots for %s",
               mPendingRestyles.Count(),
-              mRestyleManager->PresContext()->IsProcessingAnimationStyleChange()
+              mRestyleManager->IsProcessingAnimationStyleChange()
                 ? (const char*) "animation " : (const char*) "",
               static_cast(mRestyleRoots.Length()),
               GetDocumentURI(Document()).get());
diff --git a/layout/style/nsAnimationManager.cpp b/layout/style/nsAnimationManager.cpp
index d856107bd72..5d7bec195cc 100644
--- a/layout/style/nsAnimationManager.cpp
+++ b/layout/style/nsAnimationManager.cpp
@@ -226,7 +226,8 @@ nsIStyleRule*
 nsAnimationManager::CheckAnimationRule(nsStyleContext* aStyleContext,
                                        mozilla::dom::Element* aElement)
 {
-  if (!mPresContext->IsProcessingAnimationStyleChange()) {
+  // FIXME (bug 960465): This test should go away.
+  if (!mPresContext->RestyleManager()->IsProcessingAnimationStyleChange()) {
     if (!mPresContext->IsDynamic()) {
       // For print or print preview, ignore animations.
       return nullptr;
diff --git a/layout/style/nsTransitionManager.cpp b/layout/style/nsTransitionManager.cpp
index e3f2a29c2f1..9f01f9441b4 100644
--- a/layout/style/nsTransitionManager.cpp
+++ b/layout/style/nsTransitionManager.cpp
@@ -175,7 +175,9 @@ nsTransitionManager::StyleContextChanged(dom::Element *aElement,
   }
 
 
-  if (aNewStyleContext->PresContext()->IsProcessingAnimationStyleChange()) {
+  // FIXME (bug 960465): This test should go away.
+  if (aNewStyleContext->PresContext()->RestyleManager()->
+        IsProcessingAnimationStyleChange()) {
     return nullptr;
   }
 

From 99b5e8cd964074c038e43b8568c579155c6e0390 Mon Sep 17 00:00:00 2001
From: "L. David Baron" 
Date: Thu, 2 Oct 2014 21:53:24 -0700
Subject: [PATCH 055/146] Bug 1075137 patch 7 - Remove old IsProcessingRestyles
 and IsProcessingAnimationStyleChange booleans.  r=birtles

---
 layout/base/RestyleManager.cpp |  6 ------
 layout/base/nsPresContext.h    | 23 -----------------------
 2 files changed, 29 deletions(-)

diff --git a/layout/base/RestyleManager.cpp b/layout/base/RestyleManager.cpp
index 940ff5c1fb8..2e82f4c386a 100644
--- a/layout/base/RestyleManager.cpp
+++ b/layout/base/RestyleManager.cpp
@@ -1440,7 +1440,6 @@ RestyleManager::RebuildAllStyleData(nsChangeHint aExtraHint)
 #ifdef DEBUG
   mIsProcessingRestyles = true;
 #endif
-  mPresContext->SetProcessingRestyles(true);
 
   // Until we get rid of these phases in bug 960465, we need to skip
   // animation restyles during the non-animation phase, and post
@@ -1464,7 +1463,6 @@ RestyleManager::RebuildAllStyleData(nsChangeHint aExtraHint)
 #ifdef DEBUG
   mIsProcessingRestyles = false;
 #endif
-  mPresContext->SetProcessingRestyles(false);
 
   // Make sure that we process any pending animation restyles from the
   // above style change.  Note that we can *almost* implement the above
@@ -1538,7 +1536,6 @@ RestyleManager::ProcessPendingRestyles()
 #ifdef DEBUG
   mIsProcessingRestyles = true;
 #endif
-  mPresContext->SetProcessingRestyles(true);
 
   // Before we process any restyles, we need to ensure that style
   // resulting from any throttled animations (animations that we're
@@ -1575,15 +1572,12 @@ RestyleManager::ProcessPendingRestyles()
   // mid-transition (since processing the non-animation restyle ignores
   // the running transition so it can check for a new change on the same
   // property, and then posts an immediate animation style change).
-  mPresContext->SetProcessingAnimationStyleChange(true);
   MOZ_ASSERT(!mIsProcessingAnimationStyleChange, "nesting forbidden");
   mIsProcessingAnimationStyleChange = true;
   mPendingAnimationRestyles.ProcessRestyles();
   MOZ_ASSERT(mIsProcessingAnimationStyleChange, "nesting forbidden");
   mIsProcessingAnimationStyleChange = false;
-  mPresContext->SetProcessingAnimationStyleChange(false);
 
-  mPresContext->SetProcessingRestyles(false);
 #ifdef DEBUG
   mIsProcessingRestyles = false;
 #endif
diff --git a/layout/base/nsPresContext.h b/layout/base/nsPresContext.h
index c1273e220e2..20effeb7149 100644
--- a/layout/base/nsPresContext.h
+++ b/layout/base/nsPresContext.h
@@ -902,26 +902,6 @@ public:
     mAllInvalidated = false;
   }
 
-  bool IsProcessingRestyles() const {
-    return mProcessingRestyles;
-  }
-
-  void SetProcessingRestyles(bool aProcessing) {
-    NS_ASSERTION(aProcessing != bool(mProcessingRestyles),
-                 "should never nest");
-    mProcessingRestyles = aProcessing;
-  }
-
-  bool IsProcessingAnimationStyleChange() const {
-    return mProcessingAnimationStyleChange;
-  }
-
-  void SetProcessingAnimationStyleChange(bool aProcessing) {
-    NS_ASSERTION(aProcessing != bool(mProcessingAnimationStyleChange),
-                 "should never nest");
-    mProcessingAnimationStyleChange = aProcessing;
-  }
-
   /**
    * Returns whether there are any pending restyles or reflows.
    */
@@ -1359,9 +1339,6 @@ protected:
 
   unsigned              mIsVisual : 1;
 
-  unsigned              mProcessingRestyles : 1;
-  unsigned              mProcessingAnimationStyleChange : 1;
-
   unsigned              mFireAfterPaintEvents : 1;
 
   unsigned              mIsChrome : 1;

From 2d3efb0d763473d4883b77ec6ee4369aab1b2ded Mon Sep 17 00:00:00 2001
From: Phil Ringnalda 
Date: Thu, 2 Oct 2014 22:20:04 -0700
Subject: [PATCH 056/146] Back out 3 changesets (bug 1017613) for build bustage
 CLOSED TREE

Backed out changeset 77b599edbab4 (bug 1017613)
Backed out changeset f048600cf938 (bug 1017613)
Backed out changeset 083a2fb884ed (bug 1017613)
---
 dom/base/nsGlobalWindow.cpp                   |   9 --
 dom/base/nsGlobalWindow.h                     |   6 -
 dom/bindings/Bindings.conf                    |   8 --
 dom/fetch/Request.cpp                         | 113 ---------------
 dom/fetch/Request.h                           | 106 --------------
 dom/fetch/Response.cpp                        | 134 ------------------
 dom/fetch/Response.h                          | 107 --------------
 dom/fetch/moz.build                           |   4 -
 .../mochitest/general/test_interfaces.html    |   4 -
 dom/webidl/Fetch.webidl                       |  36 -----
 dom/webidl/Request.webidl                     |  38 -----
 dom/webidl/Response.webidl                    |  36 -----
 dom/webidl/Window.webidl                      |   1 -
 dom/webidl/WorkerGlobalScope.webidl           |   1 -
 dom/webidl/moz.build                          |   3 -
 dom/workers/WorkerScope.cpp                   |   8 --
 dom/workers/WorkerScope.h                     |   5 -
 dom/workers/moz.build                         |   1 -
 dom/workers/test/fetch/mochitest.ini          |   5 -
 dom/workers/test/fetch/moz.build              |   7 -
 dom/workers/test/fetch/test_interfaces.html   |  48 -------
 dom/workers/test/fetch/worker_interfaces.js   |  12 --
 22 files changed, 692 deletions(-)
 delete mode 100644 dom/fetch/Request.cpp
 delete mode 100644 dom/fetch/Request.h
 delete mode 100644 dom/fetch/Response.cpp
 delete mode 100644 dom/fetch/Response.h
 delete mode 100644 dom/webidl/Fetch.webidl
 delete mode 100644 dom/webidl/Request.webidl
 delete mode 100644 dom/webidl/Response.webidl
 delete mode 100644 dom/workers/test/fetch/mochitest.ini
 delete mode 100644 dom/workers/test/fetch/moz.build
 delete mode 100644 dom/workers/test/fetch/test_interfaces.html
 delete mode 100644 dom/workers/test/fetch/worker_interfaces.js

diff --git a/dom/base/nsGlobalWindow.cpp b/dom/base/nsGlobalWindow.cpp
index 3ed57bdf821..c676e5f1f30 100644
--- a/dom/base/nsGlobalWindow.cpp
+++ b/dom/base/nsGlobalWindow.cpp
@@ -187,7 +187,6 @@
 #include "mozilla/dom/MessagePort.h"
 #include "mozilla/dom/MessagePortBinding.h"
 #include "mozilla/dom/indexedDB/IDBFactory.h"
-#include "mozilla/dom/Promise.h"
 
 #include "mozilla/dom/StructuredCloneTags.h"
 
@@ -6371,14 +6370,6 @@ nsGlobalWindow::Confirm(const nsAString& aString, bool* aReturn)
   return rv.ErrorCode();
 }
 
-already_AddRefed
-nsGlobalWindow::Fetch(const RequestOrScalarValueString& aInput,
-                      const RequestInit& aInit, ErrorResult& aRv)
-{
-  aRv.Throw(NS_ERROR_NOT_IMPLEMENTED);
-  return nullptr;
-}
-
 void
 nsGlobalWindow::Prompt(const nsAString& aMessage, const nsAString& aInitial,
                        nsAString& aReturn, ErrorResult& aError)
diff --git a/dom/base/nsGlobalWindow.h b/dom/base/nsGlobalWindow.h
index a86310630a8..df3a119def2 100644
--- a/dom/base/nsGlobalWindow.h
+++ b/dom/base/nsGlobalWindow.h
@@ -38,10 +38,8 @@
 #include "nsSize.h"
 #include "mozFlushType.h"
 #include "prclist.h"
-#include "mozilla/dom/RequestBinding.h"
 #include "mozilla/dom/StorageEvent.h"
 #include "mozilla/dom/StorageEventBinding.h"
-#include "mozilla/dom/UnionTypes.h"
 #include "nsFrameMessageManager.h"
 #include "mozilla/LinkedList.h"
 #include "mozilla/TimeStamp.h"
@@ -109,7 +107,6 @@ class MediaQueryList;
 class MozSelfSupport;
 class Navigator;
 class OwningExternalOrWindowProxy;
-class Promise;
 class Selection;
 class SpeechSynthesis;
 class WakeLock;
@@ -853,9 +850,6 @@ public:
   void Alert(mozilla::ErrorResult& aError);
   void Alert(const nsAString& aMessage, mozilla::ErrorResult& aError);
   bool Confirm(const nsAString& aMessage, mozilla::ErrorResult& aError);
-  already_AddRefed Fetch(const mozilla::dom::RequestOrScalarValueString& aInput,
-                                                const mozilla::dom::RequestInit& aInit,
-                                                mozilla::ErrorResult& aRv);
   void Prompt(const nsAString& aMessage, const nsAString& aInitial,
               nsAString& aReturn, mozilla::ErrorResult& aError);
   void Print(mozilla::ErrorResult& aError);
diff --git a/dom/bindings/Bindings.conf b/dom/bindings/Bindings.conf
index d4265810444..a749bd60ef8 100644
--- a/dom/bindings/Bindings.conf
+++ b/dom/bindings/Bindings.conf
@@ -883,14 +883,6 @@ DOMInterfaces = {
     'nativeType': 'nsDOMCSSRect',
 },
 
-'Request': {
-    'binaryNames': { 'headers': 'headers_' },
-},
-
-'Response': {
-    'binaryNames': { 'headers': 'headers_' },
-},
-
 'RGBColor': {
     'nativeType': 'nsDOMCSSRGBColor',
 },
diff --git a/dom/fetch/Request.cpp b/dom/fetch/Request.cpp
deleted file mode 100644
index cd62210e693..00000000000
--- a/dom/fetch/Request.cpp
+++ /dev/null
@@ -1,113 +0,0 @@
-/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-#include "Request.h"
-
-#include "nsDOMString.h"
-#include "nsISupportsImpl.h"
-#include "nsIURI.h"
-#include "nsPIDOMWindow.h"
-
-#include "mozilla/dom/Promise.h"
-
-using namespace mozilla::dom;
-
-NS_IMPL_CYCLE_COLLECTING_ADDREF(Request)
-NS_IMPL_CYCLE_COLLECTING_RELEASE(Request)
-NS_IMPL_CYCLE_COLLECTION_WRAPPERCACHE(Request, mOwner, mHeaders)
-
-NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(Request)
-  NS_WRAPPERCACHE_INTERFACE_MAP_ENTRY
-  NS_INTERFACE_MAP_ENTRY(nsISupports)
-NS_INTERFACE_MAP_END
-
-Request::Request(nsISupports* aOwner)
-  : mOwner(aOwner)
-  , mHeaders(new Headers(aOwner))
-{
-  SetIsDOMBinding();
-}
-
-Request::~Request()
-{
-}
-
-/*static*/ already_AddRefed
-Request::Constructor(const GlobalObject& global,
-                     const RequestOrScalarValueString& aInput,
-                     const RequestInit& aInit, ErrorResult& rv)
-{
-  nsRefPtr request = new Request(global.GetAsSupports());
-  return request.forget();
-}
-
-already_AddRefed
-Request::Clone() const
-{
-  nsRefPtr request = new Request(mOwner);
-  return request.forget();
-}
-
-already_AddRefed
-Request::ArrayBuffer(ErrorResult& aRv)
-{
-  nsCOMPtr global = do_QueryInterface(GetParentObject());
-  MOZ_ASSERT(global);
-  nsRefPtr promise = Promise::Create(global, aRv);
-  if (aRv.Failed()) {
-    return nullptr;
-  }
-
-  promise->MaybeReject(NS_ERROR_NOT_AVAILABLE);
-  return promise.forget();
-}
-
-already_AddRefed
-Request::Blob(ErrorResult& aRv)
-{
-  nsCOMPtr global = do_QueryInterface(GetParentObject());
-  MOZ_ASSERT(global);
-  nsRefPtr promise = Promise::Create(global, aRv);
-  if (aRv.Failed()) {
-    return nullptr;
-  }
-
-  promise->MaybeReject(NS_ERROR_NOT_AVAILABLE);
-  return promise.forget();
-}
-
-already_AddRefed
-Request::Json(ErrorResult& aRv)
-{
-  nsCOMPtr global = do_QueryInterface(GetParentObject());
-  MOZ_ASSERT(global);
-  nsRefPtr promise = Promise::Create(global, aRv);
-  if (aRv.Failed()) {
-    return nullptr;
-  }
-
-  promise->MaybeReject(NS_ERROR_NOT_AVAILABLE);
-  return promise.forget();
-}
-
-already_AddRefed
-Request::Text(ErrorResult& aRv)
-{
-  nsCOMPtr global = do_QueryInterface(GetParentObject());
-  MOZ_ASSERT(global);
-  nsRefPtr promise = Promise::Create(global, aRv);
-  if (aRv.Failed()) {
-    return nullptr;
-  }
-
-  promise->MaybeReject(NS_ERROR_NOT_AVAILABLE);
-  return promise.forget();
-}
-
-bool
-Request::BodyUsed()
-{
-  return false;
-}
diff --git a/dom/fetch/Request.h b/dom/fetch/Request.h
deleted file mode 100644
index 49969531d6f..00000000000
--- a/dom/fetch/Request.h
+++ /dev/null
@@ -1,106 +0,0 @@
-/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-#ifndef mozilla_dom_Request_h
-#define mozilla_dom_Request_h
-
-#include "nsISupportsImpl.h"
-#include "nsWrapperCache.h"
-
-#include "mozilla/dom/RequestBinding.h"
-#include "mozilla/dom/UnionTypes.h"
-
-
-class nsPIDOMWindow;
-
-namespace mozilla {
-namespace dom {
-
-class Promise;
-
-class Request MOZ_FINAL : public nsISupports
-                        , public nsWrapperCache
-{
-  NS_DECL_CYCLE_COLLECTING_ISUPPORTS
-  NS_DECL_CYCLE_COLLECTION_SCRIPT_HOLDER_CLASS(Request)
-
-public:
-  Request(nsISupports* aOwner);
-
-  JSObject*
-  WrapObject(JSContext* aCx)
-  {
-    return RequestBinding::Wrap(aCx, this);
-  }
-
-  void
-  GetUrl(DOMString& aUrl) const
-  {
-    aUrl.AsAString() = EmptyString();
-  }
-
-  void
-  GetMethod(nsCString& aMethod) const
-  {
-    aMethod = EmptyCString();
-  }
-
-  RequestMode
-  Mode() const
-  {
-    return RequestMode::Same_origin;
-  }
-
-  RequestCredentials
-  Credentials() const
-  {
-    return RequestCredentials::Omit;
-  }
-
-  void
-  GetReferrer(DOMString& aReferrer) const
-  {
-    aReferrer.AsAString() = EmptyString();
-  }
-
-  Headers* Headers_() const { return mHeaders; }
-
-  static already_AddRefed
-  Constructor(const GlobalObject& aGlobal, const RequestOrScalarValueString& aInput,
-              const RequestInit& aInit, ErrorResult& rv);
-
-  nsISupports* GetParentObject() const
-  {
-    return mOwner;
-  }
-
-  already_AddRefed
-  Clone() const;
-
-  already_AddRefed
-  ArrayBuffer(ErrorResult& aRv);
-
-  already_AddRefed
-  Blob(ErrorResult& aRv);
-
-  already_AddRefed
-  Json(ErrorResult& aRv);
-
-  already_AddRefed
-  Text(ErrorResult& aRv);
-
-  bool
-  BodyUsed();
-private:
-  ~Request();
-
-  nsCOMPtr mOwner;
-  nsRefPtr mHeaders;
-};
-
-} // namespace dom
-} // namespace mozilla
-
-#endif // mozilla_dom_Request_h
diff --git a/dom/fetch/Response.cpp b/dom/fetch/Response.cpp
deleted file mode 100644
index a7b9352486d..00000000000
--- a/dom/fetch/Response.cpp
+++ /dev/null
@@ -1,134 +0,0 @@
-/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-#include "Response.h"
-#include "nsDOMString.h"
-#include "nsPIDOMWindow.h"
-#include "nsIURI.h"
-#include "nsISupportsImpl.h"
-
-#include "mozilla/ErrorResult.h"
-
-using namespace mozilla::dom;
-
-NS_IMPL_CYCLE_COLLECTING_ADDREF(Response)
-NS_IMPL_CYCLE_COLLECTING_RELEASE(Response)
-NS_IMPL_CYCLE_COLLECTION_WRAPPERCACHE(Response, mOwner)
-
-NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(Response)
-  NS_WRAPPERCACHE_INTERFACE_MAP_ENTRY
-  NS_INTERFACE_MAP_ENTRY(nsISupports)
-NS_INTERFACE_MAP_END
-
-Response::Response(nsISupports* aOwner)
-  : mOwner(aOwner)
-  , mHeaders(new Headers(aOwner))
-{
-  SetIsDOMBinding();
-}
-
-Response::~Response()
-{
-}
-
-/* static */ already_AddRefed
-Response::Error(const GlobalObject& aGlobal)
-{
-  ErrorResult result;
-  ResponseInit init;
-  init.mStatus = 0;
-  Optional body;
-  nsRefPtr r = Response::Constructor(aGlobal, body, init, result);
-  return r.forget();
-}
-
-/* static */ already_AddRefed
-Response::Redirect(const GlobalObject& aGlobal, const nsAString& aUrl,
-                   uint16_t aStatus)
-{
-  ErrorResult result;
-  ResponseInit init;
-  Optional body;
-  nsRefPtr r = Response::Constructor(aGlobal, body, init, result);
-  return r.forget();
-}
-
-/*static*/ already_AddRefed
-Response::Constructor(const GlobalObject& global,
-                      const Optional& aBody,
-                      const ResponseInit& aInit, ErrorResult& rv)
-{
-  nsRefPtr response = new Response(global.GetAsSupports());
-  return response.forget();
-}
-
-already_AddRefed
-Response::Clone()
-{
-  nsRefPtr response = new Response(mOwner);
-  return response.forget();
-}
-
-already_AddRefed
-Response::ArrayBuffer(ErrorResult& aRv)
-{
-  nsCOMPtr global = do_QueryInterface(GetParentObject());
-  MOZ_ASSERT(global);
-  nsRefPtr promise = Promise::Create(global, aRv);
-  if (aRv.Failed()) {
-    return nullptr;
-  }
-
-  promise->MaybeReject(NS_ERROR_NOT_AVAILABLE);
-  return promise.forget();
-}
-
-already_AddRefed
-Response::Blob(ErrorResult& aRv)
-{
-  nsCOMPtr global = do_QueryInterface(GetParentObject());
-  MOZ_ASSERT(global);
-  nsRefPtr promise = Promise::Create(global, aRv);
-  if (aRv.Failed()) {
-    return nullptr;
-  }
-
-  promise->MaybeReject(NS_ERROR_NOT_AVAILABLE);
-  return promise.forget();
-}
-
-already_AddRefed
-Response::Json(ErrorResult& aRv)
-{
-  nsCOMPtr global = do_QueryInterface(GetParentObject());
-  MOZ_ASSERT(global);
-  nsRefPtr promise = Promise::Create(global, aRv);
-  if (aRv.Failed()) {
-    return nullptr;
-  }
-
-  promise->MaybeReject(NS_ERROR_NOT_AVAILABLE);
-  return promise.forget();
-}
-
-already_AddRefed
-Response::Text(ErrorResult& aRv)
-{
-  nsCOMPtr global = do_QueryInterface(GetParentObject());
-  MOZ_ASSERT(global);
-  nsRefPtr promise = Promise::Create(global, aRv);
-  if (aRv.Failed()) {
-    return nullptr;
-  }
-
-  promise->MaybeReject(NS_ERROR_NOT_AVAILABLE);
-  return promise.forget();
-}
-
-bool
-Response::BodyUsed()
-{
-  return false;
-}
diff --git a/dom/fetch/Response.h b/dom/fetch/Response.h
deleted file mode 100644
index 31e3b62b7f2..00000000000
--- a/dom/fetch/Response.h
+++ /dev/null
@@ -1,107 +0,0 @@
-/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-#ifndef mozilla_dom_Response_h
-#define mozilla_dom_Response_h
-
-#include "nsWrapperCache.h"
-#include "nsISupportsImpl.h"
-
-#include "mozilla/dom/ResponseBinding.h"
-#include "mozilla/dom/UnionTypes.h"
-
-class nsPIDOMWindow;
-
-namespace mozilla {
-namespace dom {
-
-class Headers;
-
-class Response MOZ_FINAL : public nsISupports
-                         , public nsWrapperCache
-{
-  NS_DECL_CYCLE_COLLECTING_ISUPPORTS
-  NS_DECL_CYCLE_COLLECTION_SCRIPT_HOLDER_CLASS(Response)
-
-public:
-  Response(nsISupports* aOwner);
-
-  JSObject*
-  WrapObject(JSContext* aCx)
-  {
-    return ResponseBinding::Wrap(aCx, this);
-  }
-
-  ResponseType
-  Type() const
-  {
-    return ResponseType::Error;
-  }
-
-  void
-  GetUrl(DOMString& aUrl) const
-  {
-    aUrl.AsAString() = EmptyString();
-  }
-
-  uint16_t
-  Status() const
-  {
-    return 400;
-  }
-
-  void
-  GetStatusText(nsCString& aStatusText) const
-  {
-    aStatusText = EmptyCString();
-  }
-
-  Headers*
-  Headers_() const { return mHeaders; }
-
-  static already_AddRefed
-  Error(const GlobalObject& aGlobal);
-
-  static already_AddRefed
-  Redirect(const GlobalObject& aGlobal, const nsAString& aUrl, uint16_t aStatus);
-
-  static already_AddRefed
-  Constructor(const GlobalObject& aGlobal,
-              const Optional& aBody,
-              const ResponseInit& aInit, ErrorResult& rv);
-
-  nsISupports* GetParentObject() const
-  {
-    return mOwner;
-  }
-
-  already_AddRefed
-  Clone();
-
-  already_AddRefed
-  ArrayBuffer(ErrorResult& aRv);
-
-  already_AddRefed
-  Blob(ErrorResult& aRv);
-
-  already_AddRefed
-  Json(ErrorResult& aRv);
-
-  already_AddRefed
-  Text(ErrorResult& aRv);
-
-  bool
-  BodyUsed();
-private:
-  ~Response();
-
-  nsCOMPtr mOwner;
-  nsRefPtr mHeaders;
-};
-
-} // namespace dom
-} // namespace mozilla
-
-#endif // mozilla_dom_Response_h
diff --git a/dom/fetch/moz.build b/dom/fetch/moz.build
index 7853d318053..df9e145ef48 100644
--- a/dom/fetch/moz.build
+++ b/dom/fetch/moz.build
@@ -6,14 +6,10 @@
 
 EXPORTS.mozilla.dom += [
     'Headers.h',
-    'Request.h',
-    'Response.h',
 ]
 
 UNIFIED_SOURCES += [
     'Headers.cpp',
-    'Request.cpp',
-    'Response.cpp',
 ]
 
 LOCAL_INCLUDES += [
diff --git a/dom/tests/mochitest/general/test_interfaces.html b/dom/tests/mochitest/general/test_interfaces.html
index 914217568d1..d7146a3cabd 100644
--- a/dom/tests/mochitest/general/test_interfaces.html
+++ b/dom/tests/mochitest/general/test_interfaces.html
@@ -862,10 +862,6 @@ var interfaceNamesInGlobalScope =
     "RecordErrorEvent",
 // IMPORTANT: Do not change this list without review from a DOM peer!
     "Rect",
-// IMPORTANT: Do not change this list without review from a DOM peer!
-    {name: "Request", pref: "dom.fetch.enabled"},
-// IMPORTANT: Do not change this list without review from a DOM peer!
-    {name: "Response", pref: "dom.fetch.enabled"},
 // IMPORTANT: Do not change this list without review from a DOM peer!
     "RGBColor",
 // IMPORTANT: Do not change this list without review from a DOM peer!
diff --git a/dom/webidl/Fetch.webidl b/dom/webidl/Fetch.webidl
deleted file mode 100644
index a1c66f3579d..00000000000
--- a/dom/webidl/Fetch.webidl
+++ /dev/null
@@ -1,36 +0,0 @@
-/* -*- Mode: IDL; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this file,
- * You can obtain one at http://mozilla.org/MPL/2.0/.
- *
- * The origin of this IDL file is
- * http://fetch.spec.whatwg.org/
- */
-
-typedef object JSON;
-// FIXME(nsm): Bug 1071290: Blobs can't be passed as unions in workers.
-// FIXME(nsm): Bug 739173: FormData is not available in workers.
-// typedef (ArrayBuffer or ArrayBufferView or Blob or FormData or ScalarValueString or URLSearchParams) BodyInit;
-typedef (ArrayBuffer or ArrayBufferView or ScalarValueString or URLSearchParams) BodyInit;
-
-[NoInterfaceObject, Exposed=(Window,Worker)]
-interface Body {
-  readonly attribute boolean bodyUsed;
-  [Throws]
-  Promise arrayBuffer();
-  [Throws]
-  Promise blob();
-  // FIXME(nsm): Bug 739173 FormData is not supported in workers.
-  // Promise formData();
-  [Throws]
-  Promise json();
-  [Throws]
-  Promise text();
-};
-
-[NoInterfaceObject, Exposed=(Window,Worker)]
-interface GlobalFetch {
-  [Throws, Func="mozilla::dom::Headers::PrefEnabled"]
-  Promise fetch(RequestInfo input, optional RequestInit init);
-};
-
diff --git a/dom/webidl/Request.webidl b/dom/webidl/Request.webidl
deleted file mode 100644
index 47edd845ab3..00000000000
--- a/dom/webidl/Request.webidl
+++ /dev/null
@@ -1,38 +0,0 @@
-/* -*- Mode: IDL; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this file,
- * You can obtain one at http://mozilla.org/MPL/2.0/.
- *
- * The origin of this IDL file is
- * https://fetch.spec.whatwg.org/#request-class
- */
-
-typedef (Request or ScalarValueString) RequestInfo;
-
-[Constructor(RequestInfo input, optional RequestInit init),
- Exposed=(Window,Worker),
- Func="mozilla::dom::Headers::PrefEnabled"]
-interface Request {
-  readonly attribute ByteString method;
-  readonly attribute ScalarValueString url;
-  readonly attribute Headers headers;
-
-  readonly attribute DOMString referrer;
-  readonly attribute RequestMode mode;
-  readonly attribute RequestCredentials credentials;
-
-  Request clone();
-};
-
-Request implements Body;
-
-dictionary RequestInit {
-  ByteString method;
-  HeadersInit headers;
-  BodyInit body;
-  RequestMode mode;
-  RequestCredentials credentials;
-};
-
-enum RequestMode { "same-origin", "no-cors", "cors" };
-enum RequestCredentials { "omit", "same-origin", "include" };
diff --git a/dom/webidl/Response.webidl b/dom/webidl/Response.webidl
deleted file mode 100644
index f14a1391ee2..00000000000
--- a/dom/webidl/Response.webidl
+++ /dev/null
@@ -1,36 +0,0 @@
-/* -*- Mode: IDL; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this file,
- * You can obtain one at http://mozilla.org/MPL/2.0/.
- *
- * The origin of this IDL file is
- * https://fetch.spec.whatwg.org/#response-class
- */
-
-[Constructor(optional BodyInit body, optional ResponseInit init),
- Exposed=(Window,Worker),
- Func="mozilla::dom::Headers::PrefEnabled"]
-interface Response {
-  static Response error();
-  static Response redirect(ScalarValueString url, optional unsigned short status = 302);
-
-  readonly attribute ResponseType type;
-
-  readonly attribute ScalarValueString url;
-  readonly attribute unsigned short status;
-  readonly attribute ByteString statusText;
-  readonly attribute Headers headers;
-
-  Response clone();
-};
-
-Response implements Body;
-
-dictionary ResponseInit {
-  unsigned short status = 200;
-  // WebIDL spec doesn't allow default values for ByteString.
-  ByteString statusText;
-  HeadersInit headers;
-};
-
-enum ResponseType { "basic", "cors", "default", "error", "opaque" };
diff --git a/dom/webidl/Window.webidl b/dom/webidl/Window.webidl
index 2c6e2cc7e8b..7bf30c1de77 100644
--- a/dom/webidl/Window.webidl
+++ b/dom/webidl/Window.webidl
@@ -465,4 +465,3 @@ interface ChromeWindow {
 };
 
 Window implements ChromeWindow;
-Window implements GlobalFetch;
diff --git a/dom/webidl/WorkerGlobalScope.webidl b/dom/webidl/WorkerGlobalScope.webidl
index bbd41aa5ef7..bc0b2135628 100644
--- a/dom/webidl/WorkerGlobalScope.webidl
+++ b/dom/webidl/WorkerGlobalScope.webidl
@@ -38,7 +38,6 @@ partial interface WorkerGlobalScope {
 
 WorkerGlobalScope implements WindowTimers;
 WorkerGlobalScope implements WindowBase64;
-WorkerGlobalScope implements GlobalFetch;
 
 // Not implemented yet: bug 1072107.
 // WorkerGlobalScope implements FontFaceSource;
diff --git a/dom/webidl/moz.build b/dom/webidl/moz.build
index 99c173e605f..e627fdb863f 100644
--- a/dom/webidl/moz.build
+++ b/dom/webidl/moz.build
@@ -124,7 +124,6 @@ WEBIDL_FILES = [
     'EventListener.webidl',
     'EventSource.webidl',
     'EventTarget.webidl',
-    'Fetch.webidl',
     'File.webidl',
     'FileList.webidl',
     'FileMode.webidl',
@@ -327,10 +326,8 @@ WEBIDL_FILES = [
     'RadioNodeList.webidl',
     'Range.webidl',
     'Rect.webidl',
-    'Request.webidl',
     'ResourceStats.webidl',
     'ResourceStatsManager.webidl',
-    'Response.webidl',
     'RGBColor.webidl',
     'RTCConfiguration.webidl',
     'RTCIceCandidate.webidl',
diff --git a/dom/workers/WorkerScope.cpp b/dom/workers/WorkerScope.cpp
index 3e63247261a..df34df4798e 100644
--- a/dom/workers/WorkerScope.cpp
+++ b/dom/workers/WorkerScope.cpp
@@ -303,14 +303,6 @@ WorkerGlobalScope::GetPerformance()
   return mPerformance;
 }
 
-already_AddRefed
-WorkerGlobalScope::Fetch(const RequestOrScalarValueString& aInput,
-                         const RequestInit& aInit, ErrorResult& aRv)
-{
-  aRv.Throw(NS_ERROR_NOT_IMPLEMENTED);
-  return nullptr;
-}
-
 DedicatedWorkerGlobalScope::DedicatedWorkerGlobalScope(WorkerPrivate* aWorkerPrivate)
 : WorkerGlobalScope(aWorkerPrivate)
 {
diff --git a/dom/workers/WorkerScope.h b/dom/workers/WorkerScope.h
index b522bfe1a23..a683e0670cc 100644
--- a/dom/workers/WorkerScope.h
+++ b/dom/workers/WorkerScope.h
@@ -8,8 +8,6 @@
 
 #include "Workers.h"
 #include "mozilla/DOMEventTargetHelper.h"
-#include "mozilla/dom/RequestBinding.h"
-#include "mozilla/dom/UnionTypes.h"
 
 namespace mozilla {
 namespace dom {
@@ -122,9 +120,6 @@ public:
   Dump(const Optional& aString) const;
 
   Performance* GetPerformance();
-
-  already_AddRefed
-  Fetch(const RequestOrScalarValueString& aInput, const RequestInit& aInit, ErrorResult& aRv);
 };
 
 class DedicatedWorkerGlobalScope MOZ_FINAL : public WorkerGlobalScope
diff --git a/dom/workers/moz.build b/dom/workers/moz.build
index 02f4d08f4bb..9e4952a3ec6 100644
--- a/dom/workers/moz.build
+++ b/dom/workers/moz.build
@@ -86,7 +86,6 @@ TEST_DIRS += [
 ]
 
 MOCHITEST_MANIFESTS += [
-    'test/fetch/mochitest.ini',
     'test/mochitest.ini',
     'test/serviceworkers/mochitest.ini',
 ]
diff --git a/dom/workers/test/fetch/mochitest.ini b/dom/workers/test/fetch/mochitest.ini
deleted file mode 100644
index 0a7a7f84443..00000000000
--- a/dom/workers/test/fetch/mochitest.ini
+++ /dev/null
@@ -1,5 +0,0 @@
-[DEFAULT]
-support-files =
-  worker_interfaces.js
-
-[test_interfaces.html]
diff --git a/dom/workers/test/fetch/moz.build b/dom/workers/test/fetch/moz.build
deleted file mode 100644
index 8421b15157a..00000000000
--- a/dom/workers/test/fetch/moz.build
+++ /dev/null
@@ -1,7 +0,0 @@
-# -*- Mode: python; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 40 -*-
-# vim: set filetype=python:
-# This Source Code Form is subject to the terms of the Mozilla Public
-# License, v. 2.0. If a copy of the MPL was not distributed with this
-# file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-MOCHITEST_MANIFESTS += ['mochitest.ini']
diff --git a/dom/workers/test/fetch/test_interfaces.html b/dom/workers/test/fetch/test_interfaces.html
deleted file mode 100644
index dbbf21f6d18..00000000000
--- a/dom/workers/test/fetch/test_interfaces.html
+++ /dev/null
@@ -1,48 +0,0 @@
-
-
-
-
-  Bug 1017613 - Test fetch API interfaces
-  
-  
-
-
-

- -

-
-
-
-
-
diff --git a/dom/workers/test/fetch/worker_interfaces.js b/dom/workers/test/fetch/worker_interfaces.js
deleted file mode 100644
index e3c2700fe0e..00000000000
--- a/dom/workers/test/fetch/worker_interfaces.js
+++ /dev/null
@@ -1,12 +0,0 @@
-function ok(a, msg) {
-  dump("OK: " + !!a + "  =>  " + a + " " + msg + "\n");
-  postMessage({type: 'status', status: !!a, msg: a + ": " + msg });
-}
-
-onmessage = function() {
-  ok(typeof Headers === "function", "Headers should be defined");
-  ok(typeof Request === "function", "Request should be defined");
-  ok(typeof Response === "function", "Response should be defined");
-  ok(typeof fetch === "function", "fetch() should be defined");
-  postMessage({ type: 'finish' });
-}

From 0545660103cee37d33bc6692294fc4a5bdfb7112 Mon Sep 17 00:00:00 2001
From: Masayuki Nakano 
Date: Fri, 3 Oct 2014 15:33:47 +0900
Subject: [PATCH 057/146] Bug 975383 part.1 TextComposition should dispatch
 compositionupdate event automatically if text event changes composition
 string r=smaug

---
 dom/base/nsDOMWindowUtils.cpp  |  9 +++++++-
 dom/events/IMEStateManager.cpp |  5 ++++
 dom/events/TextComposition.cpp | 42 +++++++---------------------------
 dom/events/TextComposition.h   |  2 +-
 4 files changed, 22 insertions(+), 36 deletions(-)

diff --git a/dom/base/nsDOMWindowUtils.cpp b/dom/base/nsDOMWindowUtils.cpp
index ec4f0ba0545..3988ceb280e 100644
--- a/dom/base/nsDOMWindowUtils.cpp
+++ b/dom/base/nsDOMWindowUtils.cpp
@@ -2117,7 +2117,14 @@ nsDOMWindowUtils::SendCompositionEvent(const nsAString& aType,
   } else if (aType.EqualsLiteral("compositionend")) {
     msg = NS_COMPOSITION_END;
   } else if (aType.EqualsLiteral("compositionupdate")) {
-    msg = NS_COMPOSITION_UPDATE;
+    // Now we don't support manually dispatching composition update with this
+    // API.  compositionupdate is dispatched when text event modifies
+    // composition string automatically.  For backward compatibility, this
+    // shouldn't return error in this case.
+    NS_WARNING("Don't call nsIDOMWindowUtils.sendCompositionEvent() for "
+               "compositionupdate since it's ignored and the event is "
+               "fired automatically when it's necessary");
+    return NS_OK;
   } else {
     return NS_ERROR_FAILURE;
   }
diff --git a/dom/events/IMEStateManager.cpp b/dom/events/IMEStateManager.cpp
index 6f944ab9d5e..5b41b9b326f 100644
--- a/dom/events/IMEStateManager.cpp
+++ b/dom/events/IMEStateManager.cpp
@@ -908,6 +908,11 @@ IMEStateManager::DispatchCompositionEvent(nsINode* aEventTargetNode,
     return;
   }
 
+  // Temporarily, let's ignore compositionupdate event from widget.
+  if (aEvent->message == NS_COMPOSITION_UPDATE) {
+    return;
+  }
+
   EnsureTextCompositionArray();
 
   WidgetGUIEvent* GUIEvent = aEvent->AsGUIEvent();
diff --git a/dom/events/TextComposition.cpp b/dom/events/TextComposition.cpp
index 9f1911ee5ea..5651f521c7b 100644
--- a/dom/events/TextComposition.cpp
+++ b/dom/events/TextComposition.cpp
@@ -81,16 +81,9 @@ TextComposition::MaybeDispatchCompositionUpdate(const WidgetTextEvent* aEvent)
     aEvent->mFlags.mIsSynthesizedForTests;
 
   nsEventStatus status = nsEventStatus_eConsumeNoDefault;
-  if (aEvent->mFlags.mIsSynthesizedForTests &&
-      (mIsRequestingCommit || mIsRequestingCancel)) {
-    // At emulating commit/cancel request, compositionupdate should be
-    // dispatched via widget since it's more similar path to native event.
-    aEvent->widget->DispatchEvent(&compositionUpdate, status);
-  } else {
-    mLastData = compositionUpdate.data;
-    EventDispatcher::Dispatch(mNode, mPresContext,
-                              &compositionUpdate, nullptr, &status, nullptr);
-  }
+  mLastData = compositionUpdate.data;
+  EventDispatcher::Dispatch(mNode, mPresContext,
+                            &compositionUpdate, nullptr, &status, nullptr);
   return !Destroyed();
 }
 
@@ -152,7 +145,6 @@ TextComposition::DispatchEvent(WidgetGUIEvent* aEvent,
   if (!aIsSynthesized && (mIsRequestingCommit || mIsRequestingCancel)) {
     nsString* committingData = nullptr;
     switch (aEvent->message) {
-      case NS_COMPOSITION_UPDATE:
       case NS_COMPOSITION_END:
         committingData = &aEvent->AsCompositionEvent()->data;
         break;
@@ -171,26 +163,13 @@ TextComposition::DispatchEvent(WidgetGUIEvent* aEvent,
       } else if (mIsRequestingCancel && !committingData->IsEmpty()) {
         committingData->Truncate();
       }
-
-      if (aEvent->message == NS_COMPOSITION_UPDATE) {
-        // If committing string is not different from the last data,
-        // we don't need to dispatch this.
-        if (committingData->Equals(mLastData)) {
-          return;
-        }
-      } else if (aEvent->message == NS_TEXT_TEXT) {
-        // If committing string is different from the last data,
-        // we need to dispatch compositionupdate before dispatching text event.
-        if (!MaybeDispatchCompositionUpdate(aEvent->AsTextEvent())) {
-          NS_WARNING("Dispatching compositionupdate caused destroying");
-          return;
-        }
-      }
     }
   }
 
-  if (aEvent->message == NS_COMPOSITION_UPDATE) {
-    mLastData = aEvent->AsCompositionEvent()->data;
+  if (aEvent->message == NS_TEXT_TEXT) {
+    if (!MaybeDispatchCompositionUpdate(aEvent->AsTextEvent())) {
+      return;
+    }
   }
 
   EventDispatcher::Dispatch(mNode, mPresContext,
@@ -341,15 +320,11 @@ TextComposition::RequestToCommit(nsIWidget* aWidget, bool aDiscard)
 
   // Otherwise, synthesize the commit in content.
   nsAutoString data(aDiscard ? EmptyString() : lastData);
-  bool changingData = lastData != data;
-  if (changingData) {
-    DispatchCompositionEventRunnable(NS_COMPOSITION_UPDATE, data, true);
-  }
   // If the last composition string and new data are different, we need to
   // dispatch text event for removing IME selection.  However, if the commit
   // string is empty string and it's not changed from the last data, we don't
   // need to dispatch text event.
-  if (changingData || !data.IsEmpty()) {
+  if (lastData != data || !data.IsEmpty()) {
     DispatchCompositionEventRunnable(NS_TEXT_TEXT, data, true);
   }
   DispatchCompositionEventRunnable(NS_COMPOSITION_END, data, true);
@@ -468,7 +443,6 @@ TextComposition::CompositionEventDispatcher::Run()
                                                 mIsSynthesizedEvent);
       break;
     }
-    case NS_COMPOSITION_UPDATE:
     case NS_COMPOSITION_END: {
       WidgetCompositionEvent compEvent(true, mEventMessage, widget);
       compEvent.data = mData;
diff --git a/dom/events/TextComposition.h b/dom/events/TextComposition.h
index 911810de212..db0d847ffe6 100644
--- a/dom/events/TextComposition.h
+++ b/dom/events/TextComposition.h
@@ -314,7 +314,7 @@ private:
    *
    * @param aEventMessage       Must be one of composition event or text event.
    * @param aData               Used for data value if aEventMessage is
-   *                            NS_COMPOSITION_UPDATE or NS_COMPOSITION_END.
+   *                            NS_COMPOSITION_END.
    *                            Used for theText value if aEventMessage is
    *                            NS_TEXT_TEXT.
    * @param aIsSynthesizingCommit   true if this is called for synthesizing

From 50bfc725672a82c577d448bbc79958e9249ca83f Mon Sep 17 00:00:00 2001
From: Masayuki Nakano 
Date: Fri, 3 Oct 2014 15:33:48 +0900
Subject: [PATCH 058/146] Bug 975383 part.2 Remove compositionupdate
 dispatchers in nsWindow of Android r=nchen

---
 widget/android/nsWindow.cpp | 20 --------------------
 1 file changed, 20 deletions(-)

diff --git a/widget/android/nsWindow.cpp b/widget/android/nsWindow.cpp
index d112b2df655..7be17db0152 100644
--- a/widget/android/nsWindow.cpp
+++ b/widget/android/nsWindow.cpp
@@ -1844,12 +1844,6 @@ nsWindow::OnIMEEvent(AndroidGeckoEvent *ae)
                 }
             }
 
-            {
-                WidgetCompositionEvent event(true, NS_COMPOSITION_UPDATE, this);
-                InitEvent(event, nullptr);
-                event.data = ae->Characters();
-                DispatchEvent(&event);
-            }
             {
                 WidgetTextEvent event(true, NS_TEXT_TEXT, this);
                 InitEvent(event, nullptr);
@@ -2010,15 +2004,6 @@ nsWindow::OnIMEEvent(AndroidGeckoEvent *ae)
                 event.theText = mIMEComposingText;
             }
 
-            {
-                WidgetCompositionEvent compositionUpdate(true,
-                                                         NS_COMPOSITION_UPDATE,
-                                                         this);
-                InitEvent(compositionUpdate, nullptr);
-                compositionUpdate.data = event.theText;
-                DispatchEvent(&compositionUpdate);
-            }
-
 #ifdef DEBUG_ANDROID_IME
             const NS_ConvertUTF16toUTF8 theText8(event.theText);
             const char* text = theText8.get();
@@ -2101,11 +2086,6 @@ nsWindow::NotifyIME(const IMENotification& aIMENotification)
             if (mIMEComposing) {
                 nsRefPtr kungFuDeathGrip(this);
 
-                WidgetCompositionEvent updateEvent(true, NS_COMPOSITION_UPDATE,
-                                                   this);
-                InitEvent(updateEvent, nullptr);
-                DispatchEvent(&updateEvent);
-
                 WidgetTextEvent textEvent(true, NS_TEXT_TEXT, this);
                 InitEvent(textEvent, nullptr);
                 DispatchEvent(&textEvent);

From 5c2ec48f6d4f1bf2d8723cfe9d4b24f5106bb31c Mon Sep 17 00:00:00 2001
From: Masayuki Nakano 
Date: Fri, 3 Oct 2014 15:33:48 +0900
Subject: [PATCH 059/146] Bug 975383 part.3 Remove compositionupdate
 dispatchers in TextInputHandler of Cocoa r=smichaud

---
 widget/cocoa/TextInputHandler.mm | 20 +-------------------
 1 file changed, 1 insertion(+), 19 deletions(-)

diff --git a/widget/cocoa/TextInputHandler.mm b/widget/cocoa/TextInputHandler.mm
index 41b486c92be..bb7b295e2fa 100644
--- a/widget/cocoa/TextInputHandler.mm
+++ b/widget/cocoa/TextInputHandler.mm
@@ -2722,25 +2722,7 @@ IMEInputHandler::DispatchTextEvent(const nsString& aText,
   if (!aDoCommit) {
     textEvent.mRanges = CreateTextRangeArray(aAttrString, aSelectedRange);
   }
-
-  if (textEvent.theText != mLastDispatchedCompositionString) {
-    WidgetCompositionEvent compositionUpdate(true, NS_COMPOSITION_UPDATE,
-                                             mWidget);
-    compositionUpdate.time = textEvent.time;
-    compositionUpdate.data = textEvent.theText;
-    mLastDispatchedCompositionString = textEvent.theText;
-    DispatchEvent(compositionUpdate);
-    if (mIsInFocusProcessing || Destroyed()) {
-      PR_LOG(gLog, PR_LOG_ALWAYS,
-        ("%p IMEInputHandler::DispatchTextEvent, compositionupdate causes "
-         "aborting the composition, mIsInFocusProcessing=%s, Destryoed()=%s",
-         this, TrueOrFalse(mIsInFocusProcessing), TrueOrFalse(Destroyed())));
-      if (Destroyed()) {
-        return true;
-      }
-    }
-  }
-
+  mLastDispatchedCompositionString = textEvent.theText;
   return DispatchEvent(textEvent);
 }
 

From fd4d807e050f89faffc3269480259e89c6e75d5d Mon Sep 17 00:00:00 2001
From: Masayuki Nakano 
Date: Fri, 3 Oct 2014 15:33:48 +0900
Subject: [PATCH 060/146] Bug 975383 part.4 Remove compositionupdate
 dispatchers in nsGtkIMModule of GTK r=karlt

---
 widget/gtk/nsGtkIMModule.cpp | 19 ++-----------------
 1 file changed, 2 insertions(+), 17 deletions(-)

diff --git a/widget/gtk/nsGtkIMModule.cpp b/widget/gtk/nsGtkIMModule.cpp
index b810c714725..3334e9380f3 100644
--- a/widget/gtk/nsGtkIMModule.cpp
+++ b/widget/gtk/nsGtkIMModule.cpp
@@ -1092,21 +1092,6 @@ nsGtkIMModule::DispatchTextEvent(const nsAString &aCompositionString,
     nsEventStatus status;
     nsRefPtr lastFocusedWindow = mLastFocusedWindow;
 
-    if (aCompositionString != mDispatchedCompositionString) {
-      WidgetCompositionEvent compositionUpdate(true, NS_COMPOSITION_UPDATE,
-                                               mLastFocusedWindow);
-      InitEvent(compositionUpdate);
-      compositionUpdate.data = aCompositionString;
-      mDispatchedCompositionString = aCompositionString;
-      mLastFocusedWindow->DispatchEvent(&compositionUpdate, status);
-      if (lastFocusedWindow->IsDestroyed() ||
-          lastFocusedWindow != mLastFocusedWindow) {
-          PR_LOG(gGtkIMLog, PR_LOG_ALWAYS,
-              ("    NOTE, the focused widget was destroyed/changed by compositionupdate"));
-          return false;
-      }
-    }
-
     // Store the selected string which will be removed by following text event.
     if (mCompositionState == eCompositionState_CompositionStartDispatched) {
         // XXX We should assume, for now, any web applications don't change
@@ -1126,6 +1111,8 @@ nsGtkIMModule::DispatchTextEvent(const nsAString &aCompositionString,
 
     uint32_t targetOffset = mCompositionStart;
 
+    textEvent.theText = mDispatchedCompositionString = aCompositionString;
+
     if (!aIsCommit) {
         // NOTE: SetTextRangeList() assumes that mDispatchedCompositionString
         //       has been updated already.
@@ -1133,8 +1120,6 @@ nsGtkIMModule::DispatchTextEvent(const nsAString &aCompositionString,
         targetOffset += textEvent.mRanges->TargetClauseOffset();
     }
 
-    textEvent.theText = mDispatchedCompositionString.get();
-
     mCompositionState = aIsCommit ?
         eCompositionState_CommitTextEventDispatched :
         eCompositionState_TextEventDispatched;

From 71ce4ede13f760646f333f0d9ed8f0ad19181fcc Mon Sep 17 00:00:00 2001
From: Masayuki Nakano 
Date: Fri, 3 Oct 2014 15:33:49 +0900
Subject: [PATCH 061/146] Bug 975383 part.5 Remove compositionupdate
 dispatchers in nsIMM32Handler of Windows r=emk

---
 widget/windows/nsIMM32Handler.cpp | 17 +----------------
 1 file changed, 1 insertion(+), 16 deletions(-)

diff --git a/widget/windows/nsIMM32Handler.cpp b/widget/windows/nsIMM32Handler.cpp
index 2fd5ac35499..ca4b8001262 100644
--- a/widget/windows/nsIMM32Handler.cpp
+++ b/widget/windows/nsIMM32Handler.cpp
@@ -1590,21 +1590,6 @@ nsIMM32Handler::DispatchTextEvent(nsWindow* aWindow,
 
   nsIntPoint point(0, 0);
 
-  if (mCompositionString != mLastDispatchedCompositionString) {
-    WidgetCompositionEvent compositionUpdate(true, NS_COMPOSITION_UPDATE,
-                                             aWindow);
-    aWindow->InitEvent(compositionUpdate, &point);
-    compositionUpdate.data = mCompositionString;
-    mLastDispatchedCompositionString = mCompositionString;
-
-    aWindow->DispatchWindowEvent(&compositionUpdate);
-
-    if (!mIsComposing || aWindow->Destroyed()) {
-      return;
-    }
-    SetIMERelatedWindowsPos(aWindow, aIMEContext);
-  }
-
   WidgetTextEvent event(true, NS_TEXT_TEXT, aWindow);
 
   aWindow->InitEvent(event, &point);
@@ -1613,7 +1598,7 @@ nsIMM32Handler::DispatchTextEvent(nsWindow* aWindow,
     event.mRanges = CreateTextRangeArray();
   }
 
-  event.theText = mCompositionString.get();
+  event.theText = mLastDispatchedCompositionString = mCompositionString;
 
   aWindow->DispatchWindowEvent(&event);
 

From aceb703f2a458b70c72c08c876bb573e395caf3a Mon Sep 17 00:00:00 2001
From: Masayuki Nakano 
Date: Fri, 3 Oct 2014 15:33:49 +0900
Subject: [PATCH 062/146] Bug 975383 part.6 Remove compositionupdate
 dispatchers in nsTextStore of Windows r=emk

---
 widget/windows/nsTextStore.cpp | 42 +++-------------------------------
 widget/windows/nsTextStore.h   |  4 ----
 2 files changed, 3 insertions(+), 43 deletions(-)

diff --git a/widget/windows/nsTextStore.cpp b/widget/windows/nsTextStore.cpp
index c3806b0316a..7edf0e34fec 100644
--- a/widget/windows/nsTextStore.cpp
+++ b/widget/windows/nsTextStore.cpp
@@ -1578,8 +1578,6 @@ nsTextStore::FlushPendingActions()
                 "mSelectionLength=%d }",
                 this, action.mSelectionStart, action.mSelectionLength));
 
-        MOZ_ASSERT(mComposition.mLastData.IsEmpty());
-
         if (action.mAdjustSelection) {
           // Select composition range so the new composition replaces the range
           WidgetSelectionEvent selectionSet(true, NS_SELECTION_SET, mWidget);
@@ -1649,29 +1647,12 @@ nsTextStore::FlushPendingActions()
         action.mData.ReplaceSubstring(NS_LITERAL_STRING("\r\n"),
                                       NS_LITERAL_STRING("\n"));
 
-        if (action.mData != mComposition.mLastData) {
-          PR_LOG(sTextStoreLog, PR_LOG_DEBUG,
-                 ("TSF: 0x%p   nsTextStore::FlushPendingActions(), "
-                  "dispatching compositionupdate event...", this));
-          WidgetCompositionEvent compositionUpdate(true, NS_COMPOSITION_UPDATE,
-                                                   mWidget);
-          mWidget->InitEvent(compositionUpdate);
-          compositionUpdate.data = action.mData;
-          mComposition.mLastData = compositionUpdate.data;
-          mWidget->DispatchWindowEvent(&compositionUpdate);
-          if (!mWidget || mWidget->Destroyed()) {
-            break;
-          }
-        }
-
-        MOZ_ASSERT(action.mData == mComposition.mLastData);
-
         PR_LOG(sTextStoreLog, PR_LOG_DEBUG,
                ("TSF: 0x%p   nsTextStore::FlushPendingActions(), "
                 "dispatching text event...", this));
         WidgetTextEvent textEvent(true, NS_TEXT_TEXT, mWidget);
         mWidget->InitEvent(textEvent);
-        textEvent.theText = mComposition.mLastData;
+        textEvent.theText = action.mData;
         if (action.mRanges->IsEmpty()) {
           TextRange wholeRange;
           wholeRange.mStartOffset = 0;
@@ -1692,29 +1673,13 @@ nsTextStore::FlushPendingActions()
 
         action.mData.ReplaceSubstring(NS_LITERAL_STRING("\r\n"),
                                       NS_LITERAL_STRING("\n"));
-        if (action.mData != mComposition.mLastData) {
-          PR_LOG(sTextStoreLog, PR_LOG_DEBUG,
-                 ("TSF: 0x%p   nsTextStore::FlushPendingActions(), "
-                  "dispatching compositionupdate event...", this));
-          WidgetCompositionEvent compositionUpdate(true, NS_COMPOSITION_UPDATE,
-                                                   mWidget);
-          mWidget->InitEvent(compositionUpdate);
-          compositionUpdate.data = action.mData;
-          mComposition.mLastData = compositionUpdate.data;
-          mWidget->DispatchWindowEvent(&compositionUpdate);
-          if (!mWidget || mWidget->Destroyed()) {
-            break;
-          }
-        }
-
-        MOZ_ASSERT(action.mData == mComposition.mLastData);
 
         PR_LOG(sTextStoreLog, PR_LOG_DEBUG,
                ("TSF: 0x%p   nsTextStore::FlushPendingActions(), "
                 "dispatching text event...", this));
         WidgetTextEvent textEvent(true, NS_TEXT_TEXT, mWidget);
         mWidget->InitEvent(textEvent);
-        textEvent.theText = mComposition.mLastData;
+        textEvent.theText = action.mData;
         mWidget->DispatchWindowEvent(&textEvent);
         if (!mWidget || mWidget->Destroyed()) {
           break;
@@ -1725,13 +1690,12 @@ nsTextStore::FlushPendingActions()
                 "dispatching compositionend event...", this));
         WidgetCompositionEvent compositionEnd(true, NS_COMPOSITION_END,
                                               mWidget);
-        compositionEnd.data = mComposition.mLastData;
+        compositionEnd.data = textEvent.theText;
         mWidget->InitEvent(compositionEnd);
         mWidget->DispatchWindowEvent(&compositionEnd);
         if (!mWidget || mWidget->Destroyed()) {
           break;
         }
-        mComposition.mLastData.Truncate();
         break;
       }
       case PendingAction::SELECTION_SET: {
diff --git a/widget/windows/nsTextStore.h b/widget/windows/nsTextStore.h
index 1538941b008..627ca026c72 100644
--- a/widget/windows/nsTextStore.h
+++ b/widget/windows/nsTextStore.h
@@ -327,10 +327,6 @@ protected:
     // inconsistencies/artifacts.
     nsString mString;
 
-    // The latest composition string which was dispatched by composition update
-    // event.
-    nsString mLastData;
-
     // The start of the current active composition, in ACP offsets
     LONG mStart;
 

From aa14aa64a179bef3fb8c3b10ff3cf07ffb550209 Mon Sep 17 00:00:00 2001
From: Masayuki Nakano 
Date: Fri, 3 Oct 2014 15:33:49 +0900
Subject: [PATCH 063/146] Bug 975383 part.7 Remove compositionupdate
 dispatchers in forms.js of B2G r=yxl

---
 dom/inputmethod/forms.js | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/dom/inputmethod/forms.js b/dom/inputmethod/forms.js
index 2377cf1d5d7..3de291d469b 100644
--- a/dom/inputmethod/forms.js
+++ b/dom/inputmethod/forms.js
@@ -1180,7 +1180,6 @@ function replaceSurroundingText(element, text, selectionStart, selectionEnd,
 
 let CompositionManager =  {
   _isStarted: false,
-  _text: '',
   _clauseAttrMap: {
     'raw-input':
       Ci.nsICompositionStringSynthesizer.ATTR_RAWINPUT,
@@ -1233,14 +1232,9 @@ let CompositionManager =  {
     if (!this._isStarted) {
       this._isStarted = true;
       domWindowUtils.sendCompositionEvent('compositionstart', '', '');
-      this._text = '';
     }
 
     // Update the composing text.
-    if (this._text !== text) {
-      this._text = text;
-      domWindowUtils.sendCompositionEvent('compositionupdate', text, '');
-    }
     let compositionString = domWindowUtils.createCompositionStringSynthesizer();
     compositionString.setString(text);
     for (var i = 0; i < clauseLens.length; i++) {
@@ -1257,9 +1251,6 @@ let CompositionManager =  {
       return;
     }
     // Update the composing text.
-    if (this._text !== text) {
-      domWindowUtils.sendCompositionEvent('compositionupdate', text, '');
-    }
     let compositionString = domWindowUtils.createCompositionStringSynthesizer();
     compositionString.setString(text);
     // Set the cursor position to |text.length| so that the text will be
@@ -1267,7 +1258,6 @@ let CompositionManager =  {
     compositionString.setCaret(text.length, 0);
     compositionString.dispatchEvent();
     domWindowUtils.sendCompositionEvent('compositionend', text, '');
-    this._text = '';
     this._isStarted = false;
   },
 
@@ -1277,7 +1267,6 @@ let CompositionManager =  {
       return;
     }
 
-    this._text = '';
     this._isStarted = false;
   }
 };

From b7e19da023260d2a287af876e0d624e6b8638a85 Mon Sep 17 00:00:00 2001
From: Masayuki Nakano 
Date: Fri, 3 Oct 2014 15:33:49 +0900
Subject: [PATCH 064/146] Bug 975383 part.8 Remove unnecesary handler of
 compositionupdate r=smaug

---
 dom/events/EventStateManager.cpp | 1 -
 dom/events/IMEStateManager.cpp   | 6 ++----
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/dom/events/EventStateManager.cpp b/dom/events/EventStateManager.cpp
index d9d03bb0e1f..dc5e5b98d7b 100644
--- a/dom/events/EventStateManager.cpp
+++ b/dom/events/EventStateManager.cpp
@@ -820,7 +820,6 @@ EventStateManager::PreHandleEvent(nsPresContext* aPresContext,
       compositionEvent->data = selectedText.mReply.mString;
     }
     // through to compositionend handling
-  case NS_COMPOSITION_UPDATE:
   case NS_COMPOSITION_END:
     {
       WidgetCompositionEvent* compositionEvent = aEvent->AsCompositionEvent();
diff --git a/dom/events/IMEStateManager.cpp b/dom/events/IMEStateManager.cpp
index 5b41b9b326f..1f99df00bac 100644
--- a/dom/events/IMEStateManager.cpp
+++ b/dom/events/IMEStateManager.cpp
@@ -908,10 +908,8 @@ IMEStateManager::DispatchCompositionEvent(nsINode* aEventTargetNode,
     return;
   }
 
-  // Temporarily, let's ignore compositionupdate event from widget.
-  if (aEvent->message == NS_COMPOSITION_UPDATE) {
-    return;
-  }
+  MOZ_ASSERT(aEvent->message != NS_COMPOSITION_UPDATE,
+             "compositionupdate event shouldn't be dispatched manually");
 
   EnsureTextCompositionArray();
 

From a06e8de5c81c15e24235f8001483abe125f6b551 Mon Sep 17 00:00:00 2001
From: Masayuki Nakano 
Date: Fri, 3 Oct 2014 15:33:50 +0900
Subject: [PATCH 065/146] Bug 975383 part.9 Remove compositionupdate event
 dispatchers from all tests r=smaug

---
 editor/libeditor/tests/test_bug1026397.html   |  1 -
 editor/libeditor/tests/test_bug697842.html    |  2 -
 editor/libeditor/tests/test_bug795785.html    |  2 -
 ...t_contenteditable_text_input_handling.html |  1 -
 layout/base/tests/bug613807-1.html            |  3 -
 .../Harness_sanity/test_sanityEventUtils.html |  2 +-
 .../mochitest/tests/SimpleTest/EventUtils.js  |  3 +-
 .../file_autocomplete_with_composition.js     | 47 +++------
 .../content/tests/chrome/findbar_window.xul   |  1 -
 widget/tests/test_assign_event_data.html      |  7 --
 widget/tests/test_imestate.html               |  3 -
 .../test_input_events_on_deactive_window.xul  |  1 -
 .../window_composition_text_querycontent.xul  | 99 -------------------
 13 files changed, 15 insertions(+), 157 deletions(-)

diff --git a/editor/libeditor/tests/test_bug1026397.html b/editor/libeditor/tests/test_bug1026397.html
index 5ac95d3d891..35261128844 100644
--- a/editor/libeditor/tests/test_bug1026397.html
+++ b/editor/libeditor/tests/test_bug1026397.html
@@ -47,7 +47,6 @@ function runTests()
     }
 
     synthesizeComposition({ type: "compositionstart" });
-    synthesizeComposition({ type: "compositionupdate", data: aInsertString });
     synthesizeText(
       { "composition":
         { "string": aInsertString,
diff --git a/editor/libeditor/tests/test_bug697842.html b/editor/libeditor/tests/test_bug697842.html
index 9a6700b3969..ac1ee8f59e6 100644
--- a/editor/libeditor/tests/test_bug697842.html
+++ b/editor/libeditor/tests/test_bug697842.html
@@ -53,7 +53,6 @@ function runTests()
 
     // input first character
     composingString = "\u306B";
-    synthesizeComposition({ type: "compositionupdate", data: composingString });
     synthesizeText(
       { "composition":
         { "string": composingString,
@@ -67,7 +66,6 @@ function runTests()
 
     // input second character
     composingString = "\u306B\u3085";
-    synthesizeComposition({ type: "compositionupdate", data: composingString });
     synthesizeText(
       { "composition":
         { "string": composingString,
diff --git a/editor/libeditor/tests/test_bug795785.html b/editor/libeditor/tests/test_bug795785.html
index aedffa76278..1b311a8b09e 100644
--- a/editor/libeditor/tests/test_bug795785.html
+++ b/editor/libeditor/tests/test_bug795785.html
@@ -122,7 +122,6 @@ function doCompositionTest(aElement, aElementDescription, aCallback)
               "\u30d7\u30ea\u30b1\u30fc\u30b7\u30e7\u30f3\u3092\u3088\u308a" +
               "\u77ed\u6642\u9593\u3067\u7c21\u5358\u306b\u4f5c\u6210\u3067" +
               "\u304d\u307e\u3059\u3002";
-    synthesizeComposition({ type: "compositionupdate", data: str });
     synthesizeText({
         composition: {
           string: str,
@@ -135,7 +134,6 @@ function doCompositionTest(aElement, aElementDescription, aCallback)
     hitEventLoop(function () {
       isnot(aElement.scrollTop, 0,
             aElementDescription + " was not scrolled by composition");
-      synthesizeComposition({ type: "compositionupdate", data: "" });
       synthesizeText({
         composition: { string: "", clauses: [ { length: 0, attr: 0 } ] },
         caret: { start: 0, length: 0 }
diff --git a/editor/libeditor/tests/test_contenteditable_text_input_handling.html b/editor/libeditor/tests/test_contenteditable_text_input_handling.html
index 79f9553cc9b..31c9054bbe9 100644
--- a/editor/libeditor/tests/test_contenteditable_text_input_handling.html
+++ b/editor/libeditor/tests/test_contenteditable_text_input_handling.html
@@ -222,7 +222,6 @@ function runTests()
     // start composition
     synthesizeComposition({ type: "compositionstart" });
     // input first character
-    synthesizeComposition({ type: "compositionupdate", data: "\u3089" });
     synthesizeText(
       { "composition":
         { "string": "\u3089",
diff --git a/layout/base/tests/bug613807-1.html b/layout/base/tests/bug613807-1.html
index d56634238d5..08f516f890b 100644
--- a/layout/base/tests/bug613807-1.html
+++ b/layout/base/tests/bug613807-1.html
@@ -50,7 +50,6 @@
     synthesizeComposition({ type: "compositionstart" });
 
     // input raw characters
-    synthesizeComposition({ type: "compositionupdate", data: "\u306D" });
     synthesizeText(
       { composition:
         { string: "\u306D",
@@ -60,7 +59,6 @@
         },
         caret: { start: 1, length: 0 }
       });
-    synthesizeComposition({ type: "compositionupdate", data: "\u306D\u3053" });
     synthesizeText(
       { composition:
         { string: "\u306D\u3053",
@@ -72,7 +70,6 @@
       });
 
     // convert
-    synthesizeComposition({ type: "compositionupdate", data: "\u732B" });
     synthesizeText(
       { composition:
         { string: "\u732B",
diff --git a/testing/mochitest/tests/Harness_sanity/test_sanityEventUtils.html b/testing/mochitest/tests/Harness_sanity/test_sanityEventUtils.html
index 69261a3f6bb..76e32d9d774 100644
--- a/testing/mochitest/tests/Harness_sanity/test_sanityEventUtils.html
+++ b/testing/mochitest/tests/Harness_sanity/test_sanityEventUtils.html
@@ -124,7 +124,7 @@ function starttest() {
       check = false;
       window.addEventListener("compositionupdate", function() { check = true; }, false);
       synthesizeComposition({ type: "compositionupdate", data: "a" });
-      is(check, true, 'synthesizeComposition() should dispatch compositionupdate');
+      is(check, false, 'synthesizeComposition() should not dispatch compositionupdate without error');
 
       check = false;
       window.addEventListener("text", function() { check = true; }, false);
diff --git a/testing/mochitest/tests/SimpleTest/EventUtils.js b/testing/mochitest/tests/SimpleTest/EventUtils.js
index de8148a38d4..6412cd6d10a 100644
--- a/testing/mochitest/tests/SimpleTest/EventUtils.js
+++ b/testing/mochitest/tests/SimpleTest/EventUtils.js
@@ -870,8 +870,7 @@ const COMPOSITION_ATTR_SELECTEDCONVERTEDTEXT = 0x05;
  *
  * @param aEvent               The composition event information.  This must
  *                             have |type| member.  The value must be
- *                             "compositionstart", "compositionend" or
- *                             "compositionupdate".
+ *                             "compositionstart" or "compositionend".
  *                             And also this may have |data| and |locale| which
  *                             would be used for the value of each property of
  *                             the composition event.  Note that the data would
diff --git a/toolkit/content/tests/chrome/file_autocomplete_with_composition.js b/toolkit/content/tests/chrome/file_autocomplete_with_composition.js
index ea3deab3279..d8e67ebe719 100644
--- a/toolkit/content/tests/chrome/file_autocomplete_with_composition.js
+++ b/toolkit/content/tests/chrome/file_autocomplete_with_composition.js
@@ -88,7 +88,6 @@ nsDoTestsForAutoCompleteWithComposition.prototype = {
       execute: function (aWindow) {
         synthesizeKey("m", { type: "keydown", shiftKey: true }, aWindow);
         synthesizeComposition({ type: "compositionstart" }, aWindow);
-        synthesizeComposition({ type: "compositionupdate", data: "M" }, aWindow);
         synthesizeText(
           { "composition":
             { "string": "M",
@@ -101,10 +100,9 @@ nsDoTestsForAutoCompleteWithComposition.prototype = {
           }, aWindow);
       }, popup: false, value: "M", searchString: ""
     },
-    { description: "compositionupdate shouldn't open the popup",
+    { description: "modifying composition string shouldn't open the popup",
       completeDefaultIndex: false,
       execute: function (aWindow) {
-        synthesizeComposition({ type: "compositionupdate", data: "Mo" }, aWindow);
         synthesizeText(
           { "composition":
             { "string": "Mo",
@@ -141,7 +139,6 @@ nsDoTestsForAutoCompleteWithComposition.prototype = {
       execute: function (aWindow) {
         synthesizeKey("z", { type: "keydown" }, aWindow);
         synthesizeComposition({ type: "compositionstart" }, aWindow);
-        synthesizeComposition({ type: "compositionupdate", data: "z" }, aWindow);
         synthesizeText(
           { "composition":
             { "string": "z",
@@ -154,10 +151,9 @@ nsDoTestsForAutoCompleteWithComposition.prototype = {
           }, aWindow);
       }, popup: false, value: "Moz", searchString: "Mo"
     },
-    { description: "compositionupdate shouldn't reopen the popup",
+    { description: "modifying composition string shouldn't reopen the popup",
       completeDefaultIndex: false,
       execute: function (aWindow) {
-        synthesizeComposition({ type: "compositionupdate", data: "zi" }, aWindow);
         synthesizeText(
           { "composition":
             { "string": "zi",
@@ -193,7 +189,6 @@ nsDoTestsForAutoCompleteWithComposition.prototype = {
       execute: function (aWindow) {
         synthesizeKey("l", { type: "keydown" }, aWindow);
         synthesizeComposition({ type: "compositionstart" }, aWindow);
-        synthesizeComposition({ type: "compositionupdate", data: "l" }, aWindow);
         synthesizeText(
           { "composition":
             { "string": "l",
@@ -206,10 +201,9 @@ nsDoTestsForAutoCompleteWithComposition.prototype = {
           }, aWindow);
       }, popup: false, value: "Mozil", searchString: "Mozi"
     },
-    { description: "compositionupdate shouldn't reopen the popup",
+    { description: "modifying composition string shouldn't reopen the popup",
       completeDefaultIndex: false,
       execute: function (aWindow) {
-        synthesizeComposition({ type: "compositionupdate", data: "ll" }, aWindow);
         synthesizeText(
           { "composition":
             { "string": "ll",
@@ -222,10 +216,9 @@ nsDoTestsForAutoCompleteWithComposition.prototype = {
           }, aWindow);
       }, popup: false, value: "Mozill", searchString: "Mozi"
     },
-    { description: "empty compositionupdate shouldn't reopen the popup",
+    { description: "modifying composition string to empty string shouldn't reopen the popup",
       completeDefaultIndex: false,
       execute: function (aWindow) {
-        synthesizeComposition({ type: "compositionupdate", data: "" }, aWindow);
         synthesizeText(
           { "composition":
             { "string": "",
@@ -264,7 +257,6 @@ nsDoTestsForAutoCompleteWithComposition.prototype = {
         synthesizeKey("VK_LEFT", { shiftKey: true }, aWindow);
         synthesizeKey("z", { type: "keydown" }, aWindow);
         synthesizeComposition({ type: "compositionstart" }, aWindow);
-        synthesizeComposition({ type: "compositionupdate", data: "z" }, aWindow);
         synthesizeText(
           { "composition":
             { "string": "z",
@@ -277,10 +269,9 @@ nsDoTestsForAutoCompleteWithComposition.prototype = {
           }, aWindow);
       }, popup: false, value: "Moz", searchString: "Mozi"
     },
-    { description: "compositionupdate shouldn't reopen the popup",
+    { description: "modifying composition string shouldn't reopen the popup",
       completeDefaultIndex: false,
       execute: function (aWindow) {
-        synthesizeComposition({ type: "compositionupdate", data: "zi" }, aWindow);
         synthesizeText(
           { "composition":
             { "string": "zi",
@@ -293,10 +284,9 @@ nsDoTestsForAutoCompleteWithComposition.prototype = {
           }, aWindow);
       }, popup: false, value: "Mozi", searchString: "Mozi"
     },
-    { description: "empty compositionupdate shouldn't reopen the popup",
+    { description: "modifying composition string to empty string shouldn't reopen the popup",
       completeDefaultIndex: false,
       execute: function (aWindow) {
-        synthesizeComposition({ type: "compositionupdate", data: "" }, aWindow);
         synthesizeText(
           { "composition":
             { "string": "",
@@ -340,7 +330,6 @@ nsDoTestsForAutoCompleteWithComposition.prototype = {
       execute: function (aWindow) {
         synthesizeKey("m", { type: "keydown", shiftKey: true }, aWindow);
         synthesizeComposition({ type: "compositionstart" }, aWindow);
-        synthesizeComposition({ type: "compositionupdate", data: "M" }, aWindow);
         synthesizeText(
           { "composition":
             { "string": "M",
@@ -353,10 +342,9 @@ nsDoTestsForAutoCompleteWithComposition.prototype = {
           }, aWindow);
       }, popup: false, value: "M", searchString: ""
     },
-    { description: "compositionupdate shouldn't open the popup",
+    { description: "modifying composition string shouldn't open the popup",
       completeDefaultIndex: false,
       execute: function (aWindow) {
-        synthesizeComposition({ type: "compositionupdate", data: "Mo" }, aWindow);
         synthesizeText(
           { "composition":
             { "string": "Mo",
@@ -369,10 +357,9 @@ nsDoTestsForAutoCompleteWithComposition.prototype = {
           }, aWindow);
       }, popup: false, value: "Mo", searchString: ""
     },
-    { description: "empty compositionupdate shouldn't open the popup",
+    { description: "modifying composition string to empty string shouldn't open the popup",
       completeDefaultIndex: false,
       execute: function (aWindow) {
-        synthesizeComposition({ type: "compositionupdate", data: "" }, aWindow);
         synthesizeText(
           { "composition":
             { "string": "",
@@ -416,7 +403,6 @@ nsDoTestsForAutoCompleteWithComposition.prototype = {
       execute: function (aWindow) {
         synthesizeKey("m", { type: "keydown", shiftKey: true }, aWindow);
         synthesizeComposition({ type: "compositionstart" }, aWindow);
-        synthesizeComposition({ type: "compositionupdate", data: "M" }, aWindow);
         synthesizeText(
           { "composition":
             { "string": "M",
@@ -429,10 +415,9 @@ nsDoTestsForAutoCompleteWithComposition.prototype = {
           }, aWindow);
       }, popup: false, value: "M", searchString: ""
     },
-    { description: "compositionupdate shouldn't open the popup",
+    { description: "modifying composition string shouldn't open the popup",
       completeDefaultIndex: false,
       execute: function (aWindow) {
-        synthesizeComposition({ type: "compositionupdate", data: "Mo" }, aWindow);
         synthesizeText(
           { "composition":
             { "string": "Mo",
@@ -445,10 +430,9 @@ nsDoTestsForAutoCompleteWithComposition.prototype = {
           }, aWindow);
       }, popup: false, value: "Mo", searchString: ""
     },
-    { description: "empty compositionupdate shouldn't open the popup",
+    { description: "modifying composition string to empty string shouldn't open the popup",
       completeDefaultIndex: false,
       execute: function (aWindow) {
-        synthesizeComposition({ type: "compositionupdate", data: "" }, aWindow);
         synthesizeText(
           { "composition":
             { "string": "",
@@ -495,7 +479,6 @@ nsDoTestsForAutoCompleteWithComposition.prototype = {
       execute: function (aWindow) {
         synthesizeKey("z", { type: "keydown", shiftKey: true }, aWindow);
         synthesizeComposition({ type: "compositionstart" }, aWindow);
-        synthesizeComposition({ type: "compositionupdate", data: "z" }, aWindow);
         synthesizeText(
           { "composition":
             { "string": "z",
@@ -508,10 +491,9 @@ nsDoTestsForAutoCompleteWithComposition.prototype = {
           }, aWindow);
       }, popup: false, value: "Moz", searchString: "Mo"
     },
-    { description: "compositionupdate shouldn't open the popup",
+    { description: "modifying composition string shouldn't open the popup",
       completeDefaultIndex: false,
       execute: function (aWindow) {
-        synthesizeComposition({ type: "compositionupdate", data: "zi" }, aWindow);
         synthesizeText(
           { "composition":
             { "string": "zi",
@@ -524,10 +506,9 @@ nsDoTestsForAutoCompleteWithComposition.prototype = {
           }, aWindow);
       }, popup: false, value: "Mozi", searchString: "Mo"
     },
-    { description: "empty compositionupdate shouldn't open the popup",
+    { description: "modifying composition string to empty string shouldn't open the popup",
       completeDefaultIndex: false,
       execute: function (aWindow) {
-        synthesizeComposition({ type: "compositionupdate", data: "" }, aWindow);
         synthesizeText(
           { "composition":
             { "string": "",
@@ -571,7 +552,6 @@ nsDoTestsForAutoCompleteWithComposition.prototype = {
       execute: function (aWindow) {
         synthesizeKey("m", { type: "keydown", shiftKey: true }, aWindow);
         synthesizeComposition({ type: "compositionstart" }, aWindow);
-        synthesizeComposition({ type: "compositionupdate", data: "M" }, aWindow);
         synthesizeText(
           { "composition":
             { "string": "M",
@@ -584,10 +564,9 @@ nsDoTestsForAutoCompleteWithComposition.prototype = {
           }, aWindow);
       }, popup: false, value: "M", searchString: ""
     },
-    { description: "compositionupdate shouldn't open the popup (completeDefaultIndex is true)",
+    { description: "modifying composition string shouldn't open the popup (completeDefaultIndex is true)",
       completeDefaultIndex: true,
       execute: function (aWindow) {
-        synthesizeComposition({ type: "compositionupdate", data: "Mo" }, aWindow);
         synthesizeText(
           { "composition":
             { "string": "Mo",
diff --git a/toolkit/content/tests/chrome/findbar_window.xul b/toolkit/content/tests/chrome/findbar_window.xul
index c28db00b226..6ef189cc00a 100644
--- a/toolkit/content/tests/chrome/findbar_window.xul
+++ b/toolkit/content/tests/chrome/findbar_window.xul
@@ -235,7 +235,6 @@
       var searchStr = "text";
 
       synthesizeComposition({ type: "compositionstart" });
-      synthesizeComposition({ type: "compositionupdate", data: searchStr });
       synthesizeText(
         { "composition":
           { "string": searchStr,
diff --git a/widget/tests/test_assign_event_data.html b/widget/tests/test_assign_event_data.html
index 303797f75a4..d723be5ffa9 100644
--- a/widget/tests/test_assign_event_data.html
+++ b/widget/tests/test_assign_event_data.html
@@ -176,7 +176,6 @@ const kTests = [
       document.getElementById(this.targetID).focus();
       synthesizeKey("a", { type: "keydown" });
       synthesizeComposition({ type: "compositionstart" });
-      synthesizeComposition({ type: "compositionupdate", data: "\u306D" });
       synthesizeText({ "composition":
         { "string": "\u306D",
           "clauses":
@@ -187,7 +186,6 @@ const kTests = [
         "caret": { "start": 1, "length": 0 }
       });
       synthesizeKey("a", { type: "keyup" });
-      synthesizeComposition({ type: "compositionupdate", data: "\u306D" });
       synthesizeText({ "composition":
         { "string": "\u306D",
           "clauses":
@@ -210,7 +208,6 @@ const kTests = [
       document.getElementById(this.targetID).value = "";
       document.getElementById(this.targetID).focus();
       synthesizeComposition({ type: "compositionstart" });
-      synthesizeComposition({ type: "compositionupdate", data: "\u306D" });
       synthesizeText({ "composition":
         { "string": "\u306D",
           "clauses":
@@ -221,7 +218,6 @@ const kTests = [
         "caret": { "start": 1, "length": 0 }
       });
       synthesizeKey("VK_RETURN", { type: "keydown" });
-      synthesizeComposition({ type: "compositionupdate", data: "\u306D" });
       synthesizeText({ "composition":
         { "string": "\u306D",
           "clauses":
@@ -292,7 +288,6 @@ const kTests = [
       document.getElementById(this.targetID).value = "";
       document.getElementById(this.targetID).focus();
       synthesizeComposition({ type: "compositionstart" });
-      synthesizeComposition({ type: "compositionupdate", data: "\u306D" });
       synthesizeText({ "composition":
         { "string": "\u306D",
           "clauses":
@@ -315,7 +310,6 @@ const kTests = [
       document.getElementById(this.targetID).value = "";
       document.getElementById(this.targetID).focus();
       synthesizeComposition({ type: "compositionstart" });
-      synthesizeComposition({ type: "compositionupdate", data: "\u30E9\u30FC\u30E1\u30F3" });
       synthesizeText({ "composition":
         { "string": "\u30E9\u30FC\u30E1\u30F3",
           "clauses":
@@ -351,7 +345,6 @@ const kTests = [
       document.getElementById(this.targetID).value = "";
       document.getElementById(this.targetID).focus();
       synthesizeComposition({ type: "compositionstart" });
-      synthesizeComposition({ type: "compositionupdate", data: "\u30E9\u30FC\u30E1\u30F3" });
       synthesizeText({ "composition":
         { "string": "\u30E9\u30FC\u30E1\u30F3",
           "clauses":
diff --git a/widget/tests/test_imestate.html b/widget/tests/test_imestate.html
index e0a9ff74f45..cdb63e92bbf 100644
--- a/widget/tests/test_imestate.html
+++ b/widget/tests/test_imestate.html
@@ -1236,8 +1236,6 @@ function runEditorFlagChangeTests()
   synthesizeComposition({ type: "compositionstart" });
 
   // input characters
-  synthesizeComposition({ type: "compositionupdate",
-                          data: "\u3078\u3093\u3057\u3093" });
   synthesizeText(
     { "composition":
       { "string": "\u3078\u3093\u3057\u3093",
@@ -1268,7 +1266,6 @@ function runEditorFlagChangeTests()
      description + "#3 IME isn't enabled on HTML editor");
 
   // cancel the composition
-  synthesizeComposition({ type: "compositionupdate", data: "" });
   synthesizeText(
     { "composition":
       { "string": "",
diff --git a/widget/tests/test_input_events_on_deactive_window.xul b/widget/tests/test_input_events_on_deactive_window.xul
index c314240a2a2..d7e3f4c7812 100644
--- a/widget/tests/test_input_events_on_deactive_window.xul
+++ b/widget/tests/test_input_events_on_deactive_window.xul
@@ -159,7 +159,6 @@ function startTests()
     checkCompositionEvents(true, false, false, false, "compositionstart");
     clear();
     // input first character
-    synthesizeComposition({ type: "compositionupdate", data: "\u3089" });
     synthesizeText(
       { "composition":
         { "string": "\u3089",
diff --git a/widget/tests/window_composition_text_querycontent.xul b/widget/tests/window_composition_text_querycontent.xul
index a30383c735b..3dbbaa7ba56 100644
--- a/widget/tests/window_composition_text_querycontent.xul
+++ b/widget/tests/window_composition_text_querycontent.xul
@@ -194,7 +194,6 @@ function runUndoRedoTest()
   synthesizeComposition({ type: "compositionstart" });
 
   // input raw characters
-  synthesizeComposition({ type: "compositionupdate", data: "\u306D" });
   synthesizeText(
     { "composition":
       { "string": "\u306D",
@@ -206,7 +205,6 @@ function runUndoRedoTest()
       "caret": { "start": 1, "length": 0 }
     });
 
-  synthesizeComposition({ type: "compositionupdate", data: "\u306D\u3053" });
   synthesizeText(
     { "composition":
       { "string": "\u306D\u3053",
@@ -219,7 +217,6 @@ function runUndoRedoTest()
     });
 
   // convert
-  synthesizeComposition({ type: "compositionupdate", data: "\u732B" });
   synthesizeText(
     { "composition":
       { "string": "\u732B",
@@ -251,7 +248,6 @@ function runUndoRedoTest()
   synthesizeComposition({ type: "compositionstart" });
 
   // input raw characters
-  synthesizeComposition({ type: "compositionupdate", data: "\u307E" });
   synthesizeText(
     { "composition":
       { "string": "\u307E",
@@ -264,7 +260,6 @@ function runUndoRedoTest()
     });
 
   // cancel the composition
-  synthesizeComposition({ type: "compositionupdate", data: "" });
   synthesizeText(
     { "composition":
       { "string": "",
@@ -283,7 +278,6 @@ function runUndoRedoTest()
   synthesizeComposition({ type: "compositionstart" });
 
   // input raw characters
-  synthesizeComposition({ type: "compositionupdate", data: "\u3080" });
   synthesizeText(
     { "composition":
       { "string": "\u3080",
@@ -295,7 +289,6 @@ function runUndoRedoTest()
       "caret": { "start": 1, "length": 0 }
     });
 
-  synthesizeComposition({ type: "compositionupdate", data: "\u3080\u3059" });
   synthesizeText(
     { "composition":
       { "string": "\u3080\u3059",
@@ -307,8 +300,6 @@ function runUndoRedoTest()
       "caret": { "start": 2, "length": 0 }
     });
 
-  synthesizeComposition({ type: "compositionupdate",
-                          data: "\u3080\u3059\u3081" });
   synthesizeText(
     { "composition":
       { "string": "\u3080\u3059\u3081",
@@ -321,7 +312,6 @@ function runUndoRedoTest()
     });
 
   // convert
-  synthesizeComposition({ type: "compositionupdate", data: "\u5A18" });
   synthesizeText(
     { "composition":
       { "string": "\u5A18",
@@ -386,7 +376,6 @@ function runUndoRedoTest()
   synthesizeComposition({ type: "compositionstart" });
 
   // input raw characters
-  synthesizeComposition({ type: "compositionupdate", data: "\u3088" });
   synthesizeText(
     { "composition":
       { "string": "\u3088",
@@ -398,7 +387,6 @@ function runUndoRedoTest()
       "caret": { "start": 1, "length": 0 }
     });
 
-  synthesizeComposition({ type: "compositionupdate", data: "\u3088\u3046" });
   synthesizeText(
     { "composition":
       { "string": "\u3088\u3046",
@@ -410,8 +398,6 @@ function runUndoRedoTest()
       "caret": { "start": 2, "length": 0 }
     });
 
-  synthesizeComposition({ type: "compositionupdate",
-                          data: "\u3088\u3046\u304b" });
   synthesizeText(
     { "composition":
       { "string": "\u3088\u3046\u304b",
@@ -423,8 +409,6 @@ function runUndoRedoTest()
       "caret": { "start": 3, "length": 0 }
     });
 
-  synthesizeComposition({ type: "compositionupdate",
-                          data: "\u3088\u3046\u304b\u3044" });
   synthesizeText(
     { "composition":
       { "string": "\u3088\u3046\u304b\u3044",
@@ -437,7 +421,6 @@ function runUndoRedoTest()
     });
 
   // convert
-  synthesizeComposition({ type: "compositionupdate", data: "\u5996\u602a" });
   synthesizeText(
     { "composition":
       { "string": "\u5996\u602a",
@@ -648,7 +631,6 @@ function runCompositionTest()
   synthesizeComposition({ type: "compositionstart" });
 
   // input first character
-  synthesizeComposition({ type: "compositionupdate", data: "\u3089" });
   synthesizeText(
     { "composition":
       { "string": "\u3089",
@@ -673,7 +655,6 @@ function runCompositionTest()
   caretRects[1] = caretRect;
 
   // input second character
-  synthesizeComposition({ type: "compositionupdate", data: "\u3089\u30FC" });
   synthesizeText(
     { "composition":
       { "string": "\u3089\u30FC",
@@ -707,8 +688,6 @@ function runCompositionTest()
      "runCompositionTest: caret width is wrong (#1-2)");
 
   // input third character
-  synthesizeComposition({ type: "compositionupdate",
-                          data: "\u3089\u30FC\u3081" });
   synthesizeText(
     { "composition":
       { "string": "\u3089\u30FC\u3081",
@@ -809,8 +788,6 @@ function runCompositionTest()
   is(caretRect.height, caretRects[1].height,
      "runCompositionTest: caret rects are different (#1-3-2, height)");
 
-  synthesizeComposition({ type: "compositionupdate",
-                          data: "\u3089\u30FC\u3081\u3093" });
   synthesizeText(
     { "composition":
       { "string": "\u3089\u30FC\u3081\u3093",
@@ -829,8 +806,6 @@ function runCompositionTest()
 
 
   // backspace
-  synthesizeComposition({ type: "compositionupdate",
-                          data: "\u3089\u30FC\u3081" });
   synthesizeText(
     { "composition":
       { "string": "\u3089\u30FC\u3081",
@@ -848,8 +823,6 @@ function runCompositionTest()
   }
 
   // re-input
-  synthesizeComposition({ type: "compositionupdate",
-                          data: "\u3089\u30FC\u3081\u3093" });
   synthesizeText(
     { "composition":
       { "string": "\u3089\u30FC\u3081\u3093",
@@ -866,8 +839,6 @@ function runCompositionTest()
     return;
   }
 
-  synthesizeComposition({ type: "compositionupdate",
-                          data: "\u3089\u30FC\u3081\u3093\u3055" });
   synthesizeText(
     { "composition":
       { "string": "\u3089\u30FC\u3081\u3093\u3055",
@@ -884,8 +855,6 @@ function runCompositionTest()
     return;
   }
 
-  synthesizeComposition({ type: "compositionupdate",
-                          data: "\u3089\u30FC\u3081\u3093\u3055\u3044" });
   synthesizeText(
     { "composition":
       { "string": "\u3089\u30FC\u3081\u3093\u3055\u3044",
@@ -902,8 +871,6 @@ function runCompositionTest()
     return;
   }
 
-  synthesizeComposition({ type: "compositionupdate",
-                          data: "\u3089\u30FC\u3081\u3093\u3055\u3044\u3053" });
   synthesizeText(
     { "composition":
       { "string": "\u3089\u30FC\u3081\u3093\u3055\u3044\u3053",
@@ -920,8 +887,6 @@ function runCompositionTest()
     return;
   }
 
-  synthesizeComposition({ type: "compositionupdate",
-                          data: "\u3089\u30FC\u3081\u3093\u3055\u3044\u3053\u3046" });
   synthesizeText(
     { "composition":
       { "string": "\u3089\u30FC\u3081\u3093\u3055\u3044\u3053\u3046",
@@ -940,8 +905,6 @@ function runCompositionTest()
   }
 
   // convert
-  synthesizeComposition({ type: "compositionupdate",
-                          data: "\u30E9\u30FC\u30E1\u30F3\u6700\u9AD8" });
   synthesizeText(
     { "composition":
       { "string": "\u30E9\u30FC\u30E1\u30F3\u6700\u9AD8",
@@ -984,8 +947,6 @@ function runCompositionTest()
   }
 
   // reset clauses
-  synthesizeComposition({ type: "compositionupdate",
-                          data: "\u30E9\u30FC\u30E1\u30F3\u3055\u884C\u3053\u3046" });
   synthesizeText(
     { "composition":
       { "string": "\u30E9\u30FC\u30E1\u30F3\u3055\u884C\u3053\u3046",
@@ -1054,7 +1015,6 @@ function runCompositionTest()
   synthesizeComposition({ type: "compositionstart" });
 
   // input characters
-  synthesizeComposition({ type: "compositionupdate", data: "\u3057" });
   synthesizeText(
     { "composition":
       { "string": "\u3057",
@@ -1072,7 +1032,6 @@ function runCompositionTest()
     return;
   }
 
-  synthesizeComposition({ type: "compositionupdate", data: "\u3058" });
   synthesizeText(
     { "composition":
       { "string": "\u3058",
@@ -1090,7 +1049,6 @@ function runCompositionTest()
     return;
   }
 
-  synthesizeComposition({ type: "compositionupdate", data: "\u3058\u3087" });
   synthesizeText(
     { "composition":
       { "string": "\u3058\u3087",
@@ -1108,8 +1066,6 @@ function runCompositionTest()
     return;
   }
 
-  synthesizeComposition({ type: "compositionupdate",
-                          data: "\u3058\u3087\u3046" });
   synthesizeText(
     { "composition":
       { "string": "\u3058\u3087\u3046",
@@ -1158,7 +1114,6 @@ function runCompositionTest()
   // start composition with selection
   synthesizeComposition({ type: "compositionstart" });
 
-  synthesizeComposition({ type: "compositionupdate", data: "\u304A" });
   synthesizeText(
     { "composition":
       { "string": "\u304A",
@@ -1177,7 +1132,6 @@ function runCompositionTest()
   }
 
   // remove the composition string
-  synthesizeComposition({ type: "compositionupdate", data: "" });
   synthesizeText(
     { "composition":
       { "string": "",
@@ -1196,7 +1150,6 @@ function runCompositionTest()
   }
 
   // re-input the composition string
-  synthesizeComposition({ type: "compositionupdate", data: "\u3046" });
   synthesizeText(
     { "composition":
       { "string": "\u3046",
@@ -1215,7 +1168,6 @@ function runCompositionTest()
   }
 
   // cancel the composition
-  synthesizeComposition({ type: "compositionupdate", data: "" });
   synthesizeText(
     { "composition":
       { "string": "",
@@ -1274,7 +1226,6 @@ function runCompositionTest()
     return;
   }
 
-  synthesizeComposition({ type: "compositionupdate", data: "\u6700" });
   synthesizeText(
     { "composition":
       { "string": "\u6700",
@@ -1367,7 +1318,6 @@ function runCompositionTest()
     return;
   }
 
-  synthesizeComposition({ type: "compositionupdate", data: "\u9AD8" });
   synthesizeText(
     { "composition":
       { "string": "\u9AD8",
@@ -1404,7 +1354,6 @@ function runCompositionTest()
   // twice at canceling composition.
   synthesizeComposition({ type: "compositionstart" });
 
-  synthesizeComposition({ type: "compositionupdate", data: "\u6700" });
   synthesizeText(
     { "composition":
       { "string": "\u6700",
@@ -1422,7 +1371,6 @@ function runCompositionTest()
     return;
   }
 
-  synthesizeComposition({ type: "compositionupdate", data: "" });
   synthesizeText(
     { "composition":
       { "string": "",
@@ -1596,7 +1544,6 @@ function runCompositionEventTest()
   initResults();
 
   synthesizeComposition({ type: "compositionstart" });
-  synthesizeComposition({ type: "compositionupdate", data: "\u3089" });
   synthesizeText(
     { "composition":
       { "string": "\u3089",
@@ -1648,7 +1595,6 @@ function runCompositionEventTest()
   is(inputEventData["input"], "\u3089",
      kDescription + "value of input element wasn't modified (input) #1");
 
-  synthesizeComposition({ type: "compositionupdate", data: "\u3089\u30FC" });
   synthesizeText(
     { "composition":
       { "string": "\u3089\u30FC",
@@ -1745,7 +1691,6 @@ function runCompositionEventTest()
 
   synthesizeComposition({ type: "compositionstart" });
 
-  synthesizeComposition({ type: "compositionupdate", data: "\u3089" });
   synthesizeText(
     { "composition":
       { "string": "\u3089",
@@ -1826,7 +1771,6 @@ function runCompositionEventTest()
 
   synthesizeComposition({ type: "compositionstart" });
 
-  synthesizeComposition({ type: "compositionupdate", data: "\u306D" });
   synthesizeText(
     { "composition":
       { "string": "\u306D",
@@ -1909,7 +1853,6 @@ function runCompositionEventTest()
 
   synthesizeComposition({ type: "compositionstart" });
 
-  synthesizeComposition({ type: "compositionupdate", data: "\u306E" });
   synthesizeText(
     { "composition":
       { "string": "\u306E",
@@ -2295,7 +2238,6 @@ function runForceCommitTest()
   events = [];
   synthesizeComposition({ type: "compositionstart" });
 
-  synthesizeComposition({ type: "compositionupdate", data: "\u306E" });
   synthesizeText(
     { "composition":
       { "string": "\u306E",
@@ -2349,7 +2291,6 @@ function runForceCommitTest()
 
   synthesizeComposition({ type: "compositionstart" });
 
-  synthesizeComposition({ type: "compositionupdate", data: "\u306E" });
   synthesizeText(
     { "composition":
       { "string": "\u306E",
@@ -2395,7 +2336,6 @@ function runForceCommitTest()
 
   synthesizeComposition({ type: "compositionstart" });
 
-  synthesizeComposition({ type: "compositionupdate", data: "\u306E" });
   synthesizeText(
     { "composition":
       { "string": "\u306E",
@@ -2438,7 +2378,6 @@ function runForceCommitTest()
 
   synthesizeComposition({ type: "compositionstart" });
 
-  synthesizeComposition({ type: "compositionupdate", data: "\u306E" });
   synthesizeText(
     { "composition":
       { "string": "\u306E",
@@ -2485,7 +2424,6 @@ function runForceCommitTest()
 
   synthesizeComposition({ type: "compositionstart" });
 
-  synthesizeComposition({ type: "compositionupdate", data: "\u306E" });
   synthesizeText(
     { "composition":
       { "string": "\u306E",
@@ -2532,7 +2470,6 @@ function runForceCommitTest()
 
   synthesizeComposition({ type: "compositionstart" });
 
-  synthesizeComposition({ type: "compositionupdate", data: "\u306E" });
   synthesizeText(
     { "composition":
       { "string": "\u306E",
@@ -2580,7 +2517,6 @@ function runForceCommitTest()
 
   synthesizeComposition({ type: "compositionstart" });
 
-  synthesizeComposition({ type: "compositionupdate", data: "\u306E" });
   synthesizeText(
     { "composition":
       { "string": "\u306E",
@@ -2627,7 +2563,6 @@ function runForceCommitTest()
 
   synthesizeComposition({ type: "compositionstart" }, iframe2.contentWindow);
 
-  synthesizeComposition({ type: "compositionupdate", data: "\u306E" }, iframe2.contentWindow);
   synthesizeText(
     { "composition":
       { "string": "\u306E",
@@ -2672,7 +2607,6 @@ function runForceCommitTest()
 
   synthesizeComposition({ type: "compositionstart" }, iframe2.contentWindow);
 
-  synthesizeComposition({ type: "compositionupdate", data: "\u306E" }, iframe2.contentWindow);
   synthesizeText(
     { "composition":
       { "string": "\u306E",
@@ -2722,7 +2656,6 @@ function runForceCommitTest()
 
   synthesizeComposition({ type: "compositionstart" }, iframe2.contentWindow);
 
-  synthesizeComposition({ type: "compositionupdate", data: "\u306E" }, iframe2.contentWindow);
   synthesizeText(
     { "composition":
       { "string": "\u306E",
@@ -2826,7 +2759,6 @@ function runIsComposingTest()
   synthesizeComposition({ type: "compositionstart" });
   expectedIsComposing = true;
   description = "events after dispatching compositionstart";
-  synthesizeComposition({ type: "compositionupdate", data: "\u3042" });
   synthesizeText(
     { "composition":
       { "string": "\u3042",
@@ -2888,7 +2820,6 @@ function runRemoveContentTest(aCallback)
 
   synthesizeComposition({ type: "compositionstart" });
 
-  synthesizeComposition({ type: "compositionupdate", data: "\u306E" });
   synthesizeText(
     { "composition":
       { "string": "\u306E",
@@ -2988,8 +2919,6 @@ function runTestOnAnotherContext(aPanelOrFrame, aFocusedEditor, aTestName)
   synthesizeComposition({ type: "compositionstart" });
 
   // input characters
-  synthesizeComposition({ type: "compositionupdate",
-                          data: "\u3078\u3093\u3057\u3093" });
   synthesizeText(
     { "composition":
       { "string": "\u3078\u3093\u3057\u3093",
@@ -3007,7 +2936,6 @@ function runTestOnAnotherContext(aPanelOrFrame, aFocusedEditor, aTestName)
   }
 
   // convert them #1
-  synthesizeComposition({ type: "compositionupdate", data: "\u8FD4\u4FE1" });
   synthesizeText(
     { "composition":
       { "string": "\u8FD4\u4FE1",
@@ -3026,7 +2954,6 @@ function runTestOnAnotherContext(aPanelOrFrame, aFocusedEditor, aTestName)
   }
 
   // convert them #2
-  synthesizeComposition({ type: "compositionupdate", data: "\u5909\u8EAB" });
   synthesizeText(
     { "composition":
       { "string": "\u5909\u8EAB",
@@ -3145,7 +3072,6 @@ function runMaxLengthTest()
   synthesizeComposition({ type: "compositionstart" });
 
   // input first character
-  synthesizeComposition({ type: "compositionupdate", data: "\u3089" });
   synthesizeText(
     { "composition":
       { "string": "\u3089",
@@ -3163,7 +3089,6 @@ function runMaxLengthTest()
   }
 
   // input second character
-  synthesizeComposition({ type: "compositionupdate", data: "\u3089\u30FC" });
   synthesizeText(
     { "composition":
       { "string": "\u3089\u30FC",
@@ -3181,8 +3106,6 @@ function runMaxLengthTest()
   }
 
   // input third character
-  synthesizeComposition({ type: "compositionupdate",
-                          data: "\u3089\u30FC\u3081" });
   synthesizeText(
     { "composition":
       { "string": "\u3089\u30FC\u3081",
@@ -3200,8 +3123,6 @@ function runMaxLengthTest()
   }
 
   // input fourth character
-  synthesizeComposition({ type: "compositionupdate",
-                          data: "\u3089\u30FC\u3081\u3093" });
   synthesizeText(
     { "composition":
       { "string": "\u3089\u30FC\u3081\u3093",
@@ -3220,8 +3141,6 @@ function runMaxLengthTest()
 
 
   // backspace
-  synthesizeComposition({ type: "compositionupdate",
-                          data: "\u3089\u30FC\u3081" });
   synthesizeText(
     { "composition":
       { "string": "\u3089\u30FC\u3081",
@@ -3239,8 +3158,6 @@ function runMaxLengthTest()
   }
 
   // re-input
-  synthesizeComposition({ type: "compositionupdate",
-                          data: "\u3089\u30FC\u3081\u3093" });
   synthesizeText(
     { "composition":
       { "string": "\u3089\u30FC\u3081\u3093",
@@ -3257,8 +3174,6 @@ function runMaxLengthTest()
     return;
   }
 
-  synthesizeComposition({ type: "compositionupdate",
-                          data: "\u3089\u30FC\u3081\u3093\u3055" });
   synthesizeText(
     { "composition":
       { "string": "\u3089\u30FC\u3081\u3093\u3055",
@@ -3275,8 +3190,6 @@ function runMaxLengthTest()
     return;
   }
 
-  synthesizeComposition({ type: "compositionupdate",
-                          data: "\u3089\u30FC\u3081\u3093\u3055\u3044" });
   synthesizeText(
     { "composition":
       { "string": "\u3089\u30FC\u3081\u3093\u3055\u3044",
@@ -3293,8 +3206,6 @@ function runMaxLengthTest()
     return;
   }
 
-  synthesizeComposition({ type: "compositionupdate",
-                          data: "\u3089\u30FC\u3081\u3093\u3055\u3044\u3053" });
   synthesizeText(
     { "composition":
       { "string": "\u3089\u30FC\u3081\u3093\u3055\u3044\u3053",
@@ -3312,8 +3223,6 @@ function runMaxLengthTest()
     return;
   }
 
-  synthesizeComposition({ type: "compositionupdate",
-                          data: "\u3089\u30FC\u3081\u3093\u3055\u3044\u3053\u3046" });
   synthesizeText(
     { "composition":
       { "string": "\u3089\u30FC\u3081\u3093\u3055\u3044\u3053\u3046",
@@ -3332,8 +3241,6 @@ function runMaxLengthTest()
   }
 
   // convert
-  synthesizeComposition({ type: "compositionupdate",
-                          data: "\u30E9\u30FC\u30E1\u30F3\u6700\u9AD8" });
   synthesizeText(
     { "composition":
       { "string": "\u30E9\u30FC\u30E1\u30F3\u6700\u9AD8",
@@ -3377,7 +3284,6 @@ function runMaxLengthTest()
   synthesizeComposition({ type: "compositionstart" });
 
   // input characters
-  synthesizeComposition({ type: "compositionupdate", data: "\u3057" });
   synthesizeText(
     { "composition":
       { "string": "\u3057",
@@ -3395,7 +3301,6 @@ function runMaxLengthTest()
   }
 
   // commit the composition string
-  synthesizeComposition({ type: "compositionupdate", data: "\u3058" });
   synthesizeText(
     { "composition":
       { "string": "\u3058",
@@ -3453,7 +3358,6 @@ function runMaxLengthTest()
   synthesizeComposition({ type: "compositionstart" });
 
   // input characters
-  synthesizeComposition({ type: "compositionupdate", data: "\u9B54" });
   synthesizeText(
     { "composition":
       { "string": "\u9B54",
@@ -3471,7 +3375,6 @@ function runMaxLengthTest()
   }
 
   // commit the composition string
-  synthesizeComposition({ type: "compositionupdate", data: "\u9B54" });
   synthesizeText(
     { "composition":
       { "string": "\u9B54",
@@ -3496,7 +3399,6 @@ function runMaxLengthTest()
   synthesizeComposition({ type: "compositionstart" });
 
   // input characters
-  synthesizeComposition({ type: "compositionupdate", data: "\u9B54\u6CD5" });
   synthesizeText(
     { "composition":
       { "string": "\u9B54\u6CD5",
@@ -3514,7 +3416,6 @@ function runMaxLengthTest()
   }
 
   // commit the composition string
-  synthesizeComposition({ type: "compositionupdate", data: "\u9B54\u6CD5" });
   synthesizeText(
     { "composition":
       { "string": "\u9B54\u6CD5",

From 0bacef12a1fcbc8a5dc3224fbe5128aa5b55579a Mon Sep 17 00:00:00 2001
From: Christoph Kerschbaumer 
Date: Thu, 2 Oct 2014 23:34:30 -0700
Subject: [PATCH 066/146] Bug 1041180: Remove deprecated nsIChannelPolicy
 (r=sstamm,jduell,jst)

---
 content/base/src/EventSource.cpp              | 15 +-----
 content/base/src/ImportManager.cpp            | 11 -----
 content/base/src/moz.build                    |  1 -
 content/base/src/nsCSPContext.cpp             |  3 --
 content/base/src/nsCSPService.cpp             |  5 --
 content/base/src/nsChannelPolicy.cpp          | 46 ------------------
 content/base/src/nsChannelPolicy.h            | 37 --------------
 content/base/src/nsContentUtils.cpp           | 17 -------
 content/base/src/nsCrossSiteListenerProxy.cpp |  2 -
 content/base/src/nsDocument.cpp               |  1 -
 content/base/src/nsObjectLoadingContent.cpp   | 12 -----
 content/base/src/nsScriptLoader.cpp           | 15 ------
 content/base/src/nsSyncLoadService.cpp        |  1 -
 content/base/src/nsXMLHttpRequest.cpp         | 15 ------
 content/html/content/src/HTMLMediaElement.cpp | 15 ------
 content/html/content/src/HTMLTrackElement.cpp | 16 -------
 content/html/document/src/nsHTMLDocument.cpp  |  1 -
 content/media/MediaResource.cpp               |  2 -
 content/xul/document/src/XULDocument.cpp      |  1 -
 docshell/base/nsDocShell.cpp                  | 22 ---------
 dom/base/Navigator.cpp                        | 18 +------
 dom/plugins/base/nsPluginHost.cpp             |  1 -
 .../base/nsPluginStreamListenerPeer.cpp       |  1 -
 dom/workers/ScriptLoader.cpp                  | 21 --------
 dom/xbl/nsXBLService.cpp                      |  1 -
 dom/xml/XMLDocument.cpp                       |  1 -
 dom/xslt/base/txURIUtils.cpp                  |  1 -
 embedding/browser/nsContextMenuInfo.cpp       | 20 ++------
 .../webbrowserpersist/nsWebBrowserPersist.cpp |  1 -
 .../pref/autoconfig/src/nsAutoConfig.cpp      |  1 -
 image/public/imgILoader.idl                   |  4 +-
 image/src/imgLoader.cpp                       | 48 +++++++++----------
 image/src/imgLoader.h                         |  4 --
 image/test/unit/async_load_tests.js           |  6 +--
 image/test/unit/test_private_channel.js       |  2 +-
 js/xpconnect/loader/mozJSSubScriptLoader.cpp  |  1 -
 layout/build/nsLayoutModule.cpp               |  5 --
 layout/generic/nsImageFrame.cpp               |  1 -
 layout/style/FontFaceSet.cpp                  | 26 +---------
 layout/style/Loader.cpp                       | 12 -----
 modules/libjar/nsJARChannel.cpp               |  1 -
 netwerk/base/public/moz.build                 |  3 --
 netwerk/base/public/nsChannelProperties.h     | 35 --------------
 netwerk/base/public/nsIChannelPolicy.idl      | 29 -----------
 netwerk/base/public/nsNetStrings.h            | 24 ----------
 netwerk/base/public/nsNetUtil.h               | 20 --------
 netwerk/base/src/moz.build                    |  1 -
 netwerk/base/src/nsIncrementalDownload.cpp    |  1 -
 netwerk/base/src/nsNetStrings.cpp             | 14 ------
 netwerk/build/nsNetModule.cpp                 |  9 +---
 netwerk/protocol/ftp/FTPChannelParent.cpp     |  1 -
 netwerk/protocol/http/HttpChannelParent.cpp   |  1 -
 .../protocol/wyciwyg/WyciwygChannelParent.cpp |  1 -
 netwerk/test/TestPageLoad.cpp                 |  2 -
 netwerk/test/TestProtocols.cpp                |  2 -
 .../downloads/nsDownloadManager.cpp           |  1 -
 .../nsUrlClassifierStreamUpdater.cpp          |  1 -
 .../exthandler/nsExternalHelperAppService.cpp |  1 -
 uriloader/prefetch/nsOfflineCacheUpdate.cpp   |  2 -
 uriloader/prefetch/nsPrefetchService.cpp      |  1 -
 widget/cocoa/OSXNotificationCenter.mm         |  2 +-
 widget/cocoa/nsMenuItemIconX.mm               |  6 +--
 widget/windows/nsDataObj.cpp                  |  1 -
 .../directory/nsDirectoryViewer.cpp           |  1 -
 64 files changed, 37 insertions(+), 536 deletions(-)
 delete mode 100644 content/base/src/nsChannelPolicy.cpp
 delete mode 100644 content/base/src/nsChannelPolicy.h
 delete mode 100644 netwerk/base/public/nsChannelProperties.h
 delete mode 100644 netwerk/base/public/nsIChannelPolicy.idl
 delete mode 100644 netwerk/base/public/nsNetStrings.h
 delete mode 100644 netwerk/base/src/nsNetStrings.cpp

diff --git a/content/base/src/EventSource.cpp b/content/base/src/EventSource.cpp
index a9138c5982c..54212e88976 100644
--- a/content/base/src/EventSource.cpp
+++ b/content/base/src/EventSource.cpp
@@ -28,7 +28,6 @@
 #include "nsIAsyncVerifyRedirectCallback.h"
 #include "nsIScriptError.h"
 #include "mozilla/dom/EncodingUtils.h"
-#include "nsIChannelPolicy.h"
 #include "nsIContentSecurityPolicy.h"
 #include "nsContentUtils.h"
 #include "mozilla/Preferences.h"
@@ -738,17 +737,7 @@ EventSource::InitChannelAndRequestEventSource()
   nsLoadFlags loadFlags;
   loadFlags = nsIRequest::LOAD_BACKGROUND | nsIRequest::LOAD_BYPASS_CACHE;
 
-  // get Content Security Policy from principal to pass into channel
-  nsCOMPtr channelPolicy;
-  nsCOMPtr csp;
-  nsresult rv = mPrincipal->GetCsp(getter_AddRefs(csp));
-  NS_ENSURE_SUCCESS(rv, rv);
-  if (csp) {
-    channelPolicy = do_CreateInstance("@mozilla.org/nschannelpolicy;1");
-    channelPolicy->SetContentSecurityPolicy(csp);
-    channelPolicy->SetLoadType(nsIContentPolicy::TYPE_DATAREQUEST);
-  }
-
+  nsresult rv;
   nsIScriptContext* sc = GetContextForEventHandlers(&rv);
   nsCOMPtr doc =
     nsContentUtils::GetDocumentFromScriptContext(sc);
@@ -761,7 +750,6 @@ EventSource::InitChannelAndRequestEventSource()
                        doc,
                        nsILoadInfo::SEC_FORCE_INHERIT_PRINCIPAL,
                        nsIContentPolicy::TYPE_DATAREQUEST,
-                       channelPolicy,    // aChannelPolicy
                        mLoadGroup,       // loadGroup
                        nullptr,          // aCallbacks
                        loadFlags);       // aLoadFlags
@@ -772,7 +760,6 @@ EventSource::InitChannelAndRequestEventSource()
                        mPrincipal,
                        nsILoadInfo::SEC_FORCE_INHERIT_PRINCIPAL,
                        nsIContentPolicy::TYPE_DATAREQUEST,
-                       channelPolicy,    // aChannelPolicy
                        mLoadGroup,       // loadGroup
                        nullptr,          // aCallbacks
                        loadFlags);       // aLoadFlags
diff --git a/content/base/src/ImportManager.cpp b/content/base/src/ImportManager.cpp
index 7af448d4b67..64c04f1e677 100644
--- a/content/base/src/ImportManager.cpp
+++ b/content/base/src/ImportManager.cpp
@@ -12,7 +12,6 @@
 #include "nsContentUtils.h"
 #include "nsCrossSiteListenerProxy.h"
 #include "nsIChannel.h"
-#include "nsIChannelPolicy.h"
 #include "nsIContentPolicy.h"
 #include "nsIContentSecurityPolicy.h"
 #include "nsIDocument.h"
@@ -481,23 +480,13 @@ ImportLoader::Open()
   NS_ENSURE_SUCCESS_VOID(rv);
 
   nsCOMPtr loadGroup = mImportParent->GetDocumentLoadGroup();
-  nsCOMPtr channelPolicy;
-  nsCOMPtr csp;
-  rv = principal->GetCsp(getter_AddRefs(csp));
-  NS_ENSURE_SUCCESS_VOID(rv);
 
-  if (csp) {
-    channelPolicy = do_CreateInstance("@mozilla.org/nschannelpolicy;1");
-    channelPolicy->SetContentSecurityPolicy(csp);
-    channelPolicy->SetLoadType(nsIContentPolicy::TYPE_SUBDOCUMENT);
-  }
   nsCOMPtr channel;
   rv = NS_NewChannel(getter_AddRefs(channel),
                      mURI,
                      mImportParent,
                      nsILoadInfo::SEC_NORMAL,
                      nsIContentPolicy::TYPE_SUBDOCUMENT,
-                     channelPolicy,
                      loadGroup,
                      nullptr,  // aCallbacks
                      nsIRequest::LOAD_BACKGROUND);
diff --git a/content/base/src/moz.build b/content/base/src/moz.build
index a660ba44cbc..f59c117101a 100644
--- a/content/base/src/moz.build
+++ b/content/base/src/moz.build
@@ -109,7 +109,6 @@ UNIFIED_SOURCES += [
     'nsAttrValue.cpp',
     'nsAttrValueOrString.cpp',
     'nsCCUncollectableMarker.cpp',
-    'nsChannelPolicy.cpp',
     'nsContentAreaDragDrop.cpp',
     'nsContentIterator.cpp',
     'nsContentList.cpp',
diff --git a/content/base/src/nsCSPContext.cpp b/content/base/src/nsCSPContext.cpp
index ccbb2fdd846..657fe992012 100644
--- a/content/base/src/nsCSPContext.cpp
+++ b/content/base/src/nsCSPContext.cpp
@@ -11,7 +11,6 @@
 #include "nsCSPService.h"
 #include "nsError.h"
 #include "nsIAsyncVerifyRedirectCallback.h"
-#include "nsIChannelPolicy.h"
 #include "nsIClassInfoImpl.h"
 #include "nsIDocShell.h"
 #include "nsIDocShellTreeItem.h"
@@ -25,12 +24,10 @@
 #include "nsIObjectOutputStream.h"
 #include "nsIObserver.h"
 #include "nsIObserverService.h"
-#include "nsIPropertyBag2.h"
 #include "nsIStringStream.h"
 #include "nsIUploadChannel.h"
 #include "nsIScriptError.h"
 #include "nsIWebNavigation.h"
-#include "nsIWritablePropertyBag2.h"
 #include "nsNetUtil.h"
 #include "nsNullPrincipal.h"
 #include "nsIContentPolicy.h"
diff --git a/content/base/src/nsCSPService.cpp b/content/base/src/nsCSPService.cpp
index d872851c23d..64e1a8dcd7e 100644
--- a/content/base/src/nsCSPService.cpp
+++ b/content/base/src/nsCSPService.cpp
@@ -12,12 +12,7 @@
 #include "nsIContent.h"
 #include "nsCSPService.h"
 #include "nsIContentSecurityPolicy.h"
-#include "nsIChannelPolicy.h"
-#include "nsIChannelEventSink.h"
-#include "nsIPropertyBag2.h"
-#include "nsIWritablePropertyBag2.h"
 #include "nsError.h"
-#include "nsChannelProperties.h"
 #include "nsIAsyncVerifyRedirectCallback.h"
 #include "nsAsyncRedirectVerifyHelper.h"
 #include "mozilla/Preferences.h"
diff --git a/content/base/src/nsChannelPolicy.cpp b/content/base/src/nsChannelPolicy.cpp
deleted file mode 100644
index 7d647695bbe..00000000000
--- a/content/base/src/nsChannelPolicy.cpp
+++ /dev/null
@@ -1,46 +0,0 @@
-/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-#include "nsChannelPolicy.h"
-
-nsChannelPolicy::nsChannelPolicy()
-  : mLoadType(0)
-{
-}
-
-nsChannelPolicy::~nsChannelPolicy()
-{
-}
-
-NS_IMPL_ISUPPORTS(nsChannelPolicy, nsIChannelPolicy)
-
-NS_IMETHODIMP
-nsChannelPolicy::GetLoadType(uint32_t *aLoadType)
-{
-    *aLoadType = mLoadType;
-    return NS_OK;
-}
-
-NS_IMETHODIMP
-nsChannelPolicy::SetLoadType(uint32_t aLoadType)
-{
-    mLoadType = aLoadType;
-    return NS_OK;
-}
-
-NS_IMETHODIMP
-nsChannelPolicy::GetContentSecurityPolicy(nsISupports **aCSP)
-{
-    *aCSP = mCSP;
-    NS_IF_ADDREF(*aCSP);
-    return NS_OK;
-}
-
-NS_IMETHODIMP
-nsChannelPolicy::SetContentSecurityPolicy(nsISupports *aCSP)
-{
-    mCSP = aCSP;
-    return NS_OK;
-}
diff --git a/content/base/src/nsChannelPolicy.h b/content/base/src/nsChannelPolicy.h
deleted file mode 100644
index f5a0ba29f0e..00000000000
--- a/content/base/src/nsChannelPolicy.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-#ifndef nsChannelPolicy_h___
-#define nsChannelPolicy_h___
-
-#include "nsCOMPtr.h"
-#include "nsIChannelPolicy.h"
-
-#define NSCHANNELPOLICY_CONTRACTID "@mozilla.org/nschannelpolicy;1"
-#define NSCHANNELPOLICY_CID \
-{ 0xd396b3cd, 0xf164, 0x4ce8, \
-  { 0x93, 0xa7, 0xe3, 0x85, 0xe1, 0x46, 0x56, 0x3c } }
-
-class nsChannelPolicy : public nsIChannelPolicy
-{
-public:
-    NS_DECL_ISUPPORTS
-    NS_DECL_NSICHANNELPOLICY
-
-    nsChannelPolicy();
-
-protected:
-    virtual ~nsChannelPolicy();
-
-    /* Represents the type of content being loaded in the channel per
-     * nsIContentPolicy, e.g. TYPE_IMAGE, TYPE_SCRIPT
-     */
-    unsigned long mLoadType;
-
-    /* pointer to a Content Security Policy object if available */
-    nsCOMPtr mCSP;
-};
-
-#endif /* nsChannelPolicy_h___ */
diff --git a/content/base/src/nsContentUtils.cpp b/content/base/src/nsContentUtils.cpp
index 015f26156e2..8e0efd068d4 100644
--- a/content/base/src/nsContentUtils.cpp
+++ b/content/base/src/nsContentUtils.cpp
@@ -60,7 +60,6 @@
 #include "nsAttrValueInlines.h"
 #include "nsBindingManager.h"
 #include "nsCCUncollectableMarker.h"
-#include "nsChannelPolicy.h"
 #include "nsCharSeparatedTokenizer.h"
 #include "nsCOMPtr.h"
 #include "nsContentCreatorFunctions.h"
@@ -89,7 +88,6 @@
 #include "nsIAsyncVerifyRedirectCallback.h"
 #include "nsICategoryManager.h"
 #include "nsIChannelEventSink.h"
-#include "nsIChannelPolicy.h"
 #include "nsIChromeRegistry.h"
 #include "nsIConsoleService.h"
 #include "nsIContent.h"
@@ -3006,20 +3004,6 @@ nsContentUtils::LoadImage(nsIURI* aURI, nsIDocument* aLoadingDocument,
   NS_ASSERTION(loadGroup || IsFontTableURI(documentURI),
                "Could not get loadgroup; onload may fire too early");
 
-  // check for a Content Security Policy to pass down to the channel that
-  // will get created to load the image
-  nsCOMPtr channelPolicy;
-  nsCOMPtr csp;
-  if (aLoadingPrincipal) {
-    nsresult rv = aLoadingPrincipal->GetCsp(getter_AddRefs(csp));
-    NS_ENSURE_SUCCESS(rv, rv);
-    if (csp) {
-      channelPolicy = do_CreateInstance("@mozilla.org/nschannelpolicy;1");
-      channelPolicy->SetContentSecurityPolicy(csp);
-      channelPolicy->SetLoadType(nsIContentPolicy::TYPE_IMAGE);
-    }
-  }
-    
   // Make the URI immutable so people won't change it under us
   NS_TryToSetImmutable(aURI);
 
@@ -3034,7 +3018,6 @@ nsContentUtils::LoadImage(nsIURI* aURI, nsIDocument* aLoadingDocument,
                               aLoadingDocument,     /* uniquification key */
                               aLoadFlags,           /* load flags */
                               nullptr,               /* cache key */
-                              channelPolicy,        /* CSP info */
                               initiatorType,        /* the load initiator */
                               aRequest);
 }
diff --git a/content/base/src/nsCrossSiteListenerProxy.cpp b/content/base/src/nsCrossSiteListenerProxy.cpp
index 6a2ea5122a4..582b6336c82 100644
--- a/content/base/src/nsCrossSiteListenerProxy.cpp
+++ b/content/base/src/nsCrossSiteListenerProxy.cpp
@@ -1122,7 +1122,6 @@ NS_StartCORSPreflight(nsIChannel* aRequestChannel,
     rv = NS_NewChannelInternal(getter_AddRefs(preflightChannel),
                                uri,
                                loadInfo,
-                               nullptr,   // aChannelPolicy
                                loadGroup,
                                nullptr,   // aCallbacks
                                loadFlags);
@@ -1134,7 +1133,6 @@ NS_StartCORSPreflight(nsIChannel* aRequestChannel,
                                nsContentUtils::GetSystemPrincipal(),
                                nsILoadInfo::SEC_NORMAL,
                                nsIContentPolicy::TYPE_OTHER,
-                               nullptr,   // aChannelPolicy
                                loadGroup,
                                nullptr,   // aCallbacks
                                loadFlags);
diff --git a/content/base/src/nsDocument.cpp b/content/base/src/nsDocument.cpp
index 6cb9484b13f..999ac719a3f 100644
--- a/content/base/src/nsDocument.cpp
+++ b/content/base/src/nsDocument.cpp
@@ -1333,7 +1333,6 @@ nsExternalResourceMap::PendingLoad::StartLoad(nsIURI* aURI,
                      aRequestingNode,
                      nsILoadInfo::SEC_NORMAL,
                      nsIContentPolicy::TYPE_OTHER,
-                     nullptr, // aChannelPolicy
                      loadGroup,
                      req); // aCallbacks
 
diff --git a/content/base/src/nsObjectLoadingContent.cpp b/content/base/src/nsObjectLoadingContent.cpp
index 5ca840c3a98..e38c15733b0 100644
--- a/content/base/src/nsObjectLoadingContent.cpp
+++ b/content/base/src/nsObjectLoadingContent.cpp
@@ -65,8 +65,6 @@
 #include "nsObjectLoadingContent.h"
 #include "mozAutoDocUpdate.h"
 #include "nsIContentSecurityPolicy.h"
-#include "nsIChannelPolicy.h"
-#include "nsChannelPolicy.h"
 #include "GeckoProfiler.h"
 #include "nsPluginFrame.h"
 #include "nsDOMClassInfo.h"
@@ -2492,15 +2490,6 @@ nsObjectLoadingContent::OpenChannel()
 
   nsCOMPtr group = doc->GetDocumentLoadGroup();
   nsCOMPtr chan;
-  nsCOMPtr channelPolicy;
-  nsCOMPtr csp;
-  rv = doc->NodePrincipal()->GetCsp(getter_AddRefs(csp));
-  NS_ENSURE_SUCCESS(rv, rv);
-  if (csp) {
-    channelPolicy = do_CreateInstance("@mozilla.org/nschannelpolicy;1");
-    channelPolicy->SetContentSecurityPolicy(csp);
-    channelPolicy->SetLoadType(nsIContentPolicy::TYPE_OBJECT);
-  }
   nsRefPtr shim =
     new ObjectInterfaceRequestorShim(this);
 
@@ -2522,7 +2511,6 @@ nsObjectLoadingContent::OpenChannel()
                      thisContent,
                      securityFlags,
                      nsIContentPolicy::TYPE_OBJECT,
-                     channelPolicy,
                      group, // aLoadGroup
                      shim,  // aCallbacks
                      nsIChannel::LOAD_CALL_CONTENT_SNIFFERS |
diff --git a/content/base/src/nsScriptLoader.cpp b/content/base/src/nsScriptLoader.cpp
index f1184291529..4ba7bb67f6b 100644
--- a/content/base/src/nsScriptLoader.cpp
+++ b/content/base/src/nsScriptLoader.cpp
@@ -41,8 +41,6 @@
 #include "nsDocShellCID.h"
 #include "nsIContentSecurityPolicy.h"
 #include "prlog.h"
-#include "nsIChannelPolicy.h"
-#include "nsChannelPolicy.h"
 #include "nsCRT.h"
 #include "nsContentCreatorFunctions.h"
 #include "nsCrossSiteListenerProxy.h"
@@ -306,25 +304,12 @@ nsScriptLoader::StartLoad(nsScriptLoadRequest *aRequest, const nsAString &aType,
     return NS_OK;
   }
 
-  // check for a Content Security Policy to pass down to the channel
-  // that will be created to load the script
-  nsCOMPtr channelPolicy;
-  nsCOMPtr csp;
-  rv = mDocument->NodePrincipal()->GetCsp(getter_AddRefs(csp));
-  NS_ENSURE_SUCCESS(rv, rv);
-  if (csp) {
-    channelPolicy = do_CreateInstance("@mozilla.org/nschannelpolicy;1");
-    channelPolicy->SetContentSecurityPolicy(csp);
-    channelPolicy->SetLoadType(nsIContentPolicy::TYPE_SCRIPT);
-  }
-
   nsCOMPtr channel;
   rv = NS_NewChannel(getter_AddRefs(channel),
                      aRequest->mURI,
                      mDocument,
                      nsILoadInfo::SEC_NORMAL,
                      nsIContentPolicy::TYPE_SCRIPT,
-                     channelPolicy,
                      loadGroup,
                      prompter,
                      nsIRequest::LOAD_NORMAL |
diff --git a/content/base/src/nsSyncLoadService.cpp b/content/base/src/nsSyncLoadService.cpp
index 3a675e1a8d6..6805aca44df 100644
--- a/content/base/src/nsSyncLoadService.cpp
+++ b/content/base/src/nsSyncLoadService.cpp
@@ -315,7 +315,6 @@ nsSyncLoadService::LoadDocument(nsIURI *aURI, nsIPrincipal *aLoaderPrincipal,
                                 aLoaderPrincipal,
                                 nsILoadInfo::SEC_NORMAL,
                                 nsIContentPolicy::TYPE_OTHER,
-                                nullptr,   // aChannelPolicy
                                 aLoadGroup);
     NS_ENSURE_SUCCESS(rv, rv);
 
diff --git a/content/base/src/nsXMLHttpRequest.cpp b/content/base/src/nsXMLHttpRequest.cpp
index 80d05325431..c2cd05246cf 100644
--- a/content/base/src/nsXMLHttpRequest.cpp
+++ b/content/base/src/nsXMLHttpRequest.cpp
@@ -51,8 +51,6 @@
 #include "nsIPromptFactory.h"
 #include "nsIWindowWatcher.h"
 #include "nsIConsoleService.h"
-#include "nsIChannelPolicy.h"
-#include "nsChannelPolicy.h"
 #include "nsIContentSecurityPolicy.h"
 #include "nsAsyncRedirectVerifyHelper.h"
 #include "nsStringBuffer.h"
@@ -1724,17 +1722,6 @@ nsXMLHttpRequest::Open(const nsACString& inMethod, const nsACString& url,
   // will be automatically aborted if the user leaves the page.
   nsCOMPtr loadGroup = GetLoadGroup();
 
-  // get Content Security Policy from principal to pass into channel
-  nsCOMPtr channelPolicy;
-  nsCOMPtr csp;
-  rv = mPrincipal->GetCsp(getter_AddRefs(csp));
-  NS_ENSURE_SUCCESS(rv, rv);
-  if (csp) {
-    channelPolicy = do_CreateInstance("@mozilla.org/nschannelpolicy;1");
-    channelPolicy->SetContentSecurityPolicy(csp);
-    channelPolicy->SetLoadType(nsIContentPolicy::TYPE_XMLHTTPREQUEST);
-  }
-
   nsSecurityFlags secFlags = nsILoadInfo::SEC_NORMAL;
   if (IsSystemXHR()) {
     // Don't give this document the system principal.  We need to keep track of
@@ -1754,7 +1741,6 @@ nsXMLHttpRequest::Open(const nsACString& inMethod, const nsACString& url,
                        doc,
                        secFlags,
                        nsIContentPolicy::TYPE_XMLHTTPREQUEST,
-                       channelPolicy,
                        loadGroup,
                        nullptr,   // aCallbacks
                        nsIRequest::LOAD_BACKGROUND);
@@ -1765,7 +1751,6 @@ nsXMLHttpRequest::Open(const nsACString& inMethod, const nsACString& url,
                        mPrincipal,
                        secFlags,
                        nsIContentPolicy::TYPE_XMLHTTPREQUEST,
-                       channelPolicy,
                        loadGroup,
                        nullptr,   // aCallbacks
                        nsIRequest::LOAD_BACKGROUND);
diff --git a/content/html/content/src/HTMLMediaElement.cpp b/content/html/content/src/HTMLMediaElement.cpp
index 616b4bd04be..6a485c361ac 100755
--- a/content/html/content/src/HTMLMediaElement.cpp
+++ b/content/html/content/src/HTMLMediaElement.cpp
@@ -99,8 +99,6 @@ static PRLogModuleInfo* gMediaElementEventsLog;
 #endif
 
 #include "nsIContentSecurityPolicy.h"
-#include "nsIChannelPolicy.h"
-#include "nsChannelPolicy.h"
 
 #include "mozilla/Preferences.h"
 
@@ -1186,25 +1184,12 @@ nsresult HTMLMediaElement::LoadResource()
   }
 
   nsCOMPtr loadGroup = GetDocumentLoadGroup();
-
-  // check for a Content Security Policy to pass down to the channel
-  // created to load the media content
-  nsCOMPtr channelPolicy;
-  nsCOMPtr csp;
-  rv = NodePrincipal()->GetCsp(getter_AddRefs(csp));
-  NS_ENSURE_SUCCESS(rv,rv);
-  if (csp) {
-    channelPolicy = do_CreateInstance("@mozilla.org/nschannelpolicy;1");
-    channelPolicy->SetContentSecurityPolicy(csp);
-    channelPolicy->SetLoadType(nsIContentPolicy::TYPE_MEDIA);
-  }
   nsCOMPtr channel;
   rv = NS_NewChannel(getter_AddRefs(channel),
                      mLoadingSrc,
                      static_cast(this),
                      nsILoadInfo::SEC_NORMAL,
                      nsIContentPolicy::TYPE_MEDIA,
-                     channelPolicy,
                      loadGroup,
                      nullptr,   // aCallbacks
                      nsICachingChannel::LOAD_BYPASS_LOCAL_CACHE_IF_BUSY |
diff --git a/content/html/content/src/HTMLTrackElement.cpp b/content/html/content/src/HTMLTrackElement.cpp
index 5c21c4cc6f5..db01a4d5608 100644
--- a/content/html/content/src/HTMLTrackElement.cpp
+++ b/content/html/content/src/HTMLTrackElement.cpp
@@ -21,7 +21,6 @@
 #include "nsIAsyncVerifyRedirectCallback.h"
 #include "nsICachingChannel.h"
 #include "nsIChannelEventSink.h"
-#include "nsIChannelPolicy.h"
 #include "nsIContentPolicy.h"
 #include "nsIContentSecurityPolicy.h"
 #include "nsIDocument.h"
@@ -237,20 +236,6 @@ HTMLTrackElement::LoadResource()
     CreateTextTrack();
   }
 
-  // Check for a Content Security Policy to pass down to the channel
-  // created to load the media content.
-  nsCOMPtr channelPolicy;
-  nsCOMPtr csp;
-  rv = NodePrincipal()->GetCsp(getter_AddRefs(csp));
-  NS_ENSURE_TRUE_VOID(NS_SUCCEEDED(rv));
-  if (csp) {
-    channelPolicy = do_CreateInstance("@mozilla.org/nschannelpolicy;1");
-    if (!channelPolicy) {
-      return;
-    }
-    channelPolicy->SetContentSecurityPolicy(csp);
-    channelPolicy->SetLoadType(nsIContentPolicy::TYPE_MEDIA);
-  }
   nsCOMPtr channel;
   nsCOMPtr loadGroup = OwnerDoc()->GetDocumentLoadGroup();
   rv = NS_NewChannel(getter_AddRefs(channel),
@@ -258,7 +243,6 @@ HTMLTrackElement::LoadResource()
                      static_cast(this),
                      nsILoadInfo::SEC_NORMAL,
                      nsIContentPolicy::TYPE_MEDIA,
-                     channelPolicy,
                      loadGroup);
 
   NS_ENSURE_TRUE_VOID(NS_SUCCEEDED(rv));
diff --git a/content/html/document/src/nsHTMLDocument.cpp b/content/html/document/src/nsHTMLDocument.cpp
index 0678c81353d..e1de1ccb279 100644
--- a/content/html/document/src/nsHTMLDocument.cpp
+++ b/content/html/document/src/nsHTMLDocument.cpp
@@ -1516,7 +1516,6 @@ nsHTMLDocument::Open(JSContext* cx,
                      callerDoc,
                      nsILoadInfo::SEC_FORCE_INHERIT_PRINCIPAL,
                      nsIContentPolicy::TYPE_OTHER,
-                     nullptr,   // aChannelPolicy
                      group);
 
   if (rv.Failed()) {
diff --git a/content/media/MediaResource.cpp b/content/media/MediaResource.cpp
index 0864c91dbe4..012b73f9669 100644
--- a/content/media/MediaResource.cpp
+++ b/content/media/MediaResource.cpp
@@ -939,7 +939,6 @@ ChannelMediaResource::RecreateChannel()
                               element,
                               nsILoadInfo::SEC_NORMAL,
                               nsIContentPolicy::TYPE_MEDIA,
-                              nullptr,   // aChannelPolicy
                               loadGroup,
                               nullptr,  // aCallbacks
                               loadFlags);
@@ -1459,7 +1458,6 @@ already_AddRefed FileMediaResource::CloneData(MediaDecoder* aDeco
                   element,
                   nsILoadInfo::SEC_NORMAL,
                   nsIContentPolicy::TYPE_MEDIA,
-                  nullptr,   // aChannelPolicy
                   loadGroup);
 
   if (NS_FAILED(rv))
diff --git a/content/xul/document/src/XULDocument.cpp b/content/xul/document/src/XULDocument.cpp
index de31b115072..b4f27ec5e37 100644
--- a/content/xul/document/src/XULDocument.cpp
+++ b/content/xul/document/src/XULDocument.cpp
@@ -2701,7 +2701,6 @@ XULDocument::LoadOverlayInternal(nsIURI* aURI, bool aIsDynamic,
                            NodePrincipal(),
                            nsILoadInfo::SEC_FORCE_INHERIT_PRINCIPAL,
                            nsIContentPolicy::TYPE_OTHER,
-                           nullptr,    // aChannelPolicy
                            group);
 
         if (NS_SUCCEEDED(rv)) {
diff --git a/docshell/base/nsDocShell.cpp b/docshell/base/nsDocShell.cpp
index f9647455116..89135644bee 100644
--- a/docshell/base/nsDocShell.cpp
+++ b/docshell/base/nsDocShell.cpp
@@ -173,7 +173,6 @@
 #endif
 
 #include "nsContentUtils.h"
-#include "nsIChannelPolicy.h"
 #include "nsIContentSecurityPolicy.h"
 #include "nsILoadInfo.h"
 #include "nsSandboxFlags.h"
@@ -10132,27 +10131,7 @@ nsDocShell::DoURILoad(nsIURI * aURI,
         loadFlags |= nsIChannel::LOAD_BACKGROUND;
     }
 
-    // check for Content Security Policy to pass along with the
-    // new channel we are creating
-    nsCOMPtr channelPolicy;
     if (IsFrame()) {
-        // check the parent docshell for a CSP
-        nsCOMPtr csp;
-        nsCOMPtr parentItem;
-        GetSameTypeParent(getter_AddRefs(parentItem));
-        if (parentItem) {
-          nsCOMPtr doc = parentItem->GetDocument();
-          if (doc) {
-            rv = doc->NodePrincipal()->GetCsp(getter_AddRefs(csp));
-            NS_ENSURE_SUCCESS(rv, rv);
-            if (csp) {
-              channelPolicy = do_CreateInstance("@mozilla.org/nschannelpolicy;1");
-              channelPolicy->SetContentSecurityPolicy(csp);
-              channelPolicy->SetLoadType(nsIContentPolicy::TYPE_SUBDOCUMENT);
-            }
-          }
-        }
-
         // Only allow view-source scheme in top-level docshells. view-source is
         // the only scheme to which this applies at the moment due to potential
         // timing attacks to read data from cross-origin iframes. If this widens
@@ -10221,7 +10200,6 @@ nsDocShell::DoURILoad(nsIURI * aURI,
                                    requestingPrincipal,
                                    securityFlags,
                                    aContentPolicyType,
-                                   channelPolicy,
                                    nullptr,   // loadGroup
                                    static_cast(this),
                                    loadFlags);
diff --git a/dom/base/Navigator.cpp b/dom/base/Navigator.cpp
index 17568810041..9f169c7195f 100644
--- a/dom/base/Navigator.cpp
+++ b/dom/base/Navigator.cpp
@@ -66,7 +66,6 @@
 #include "mozIApplication.h"
 #include "WidgetUtils.h"
 #include "mozIThirdPartyUtil.h"
-#include "nsChannelPolicy.h"
 
 #ifdef MOZ_MEDIA_NAVIGATOR
 #include "MediaManager.h"
@@ -1049,26 +1048,11 @@ Navigator::SendBeacon(const nsAString& aUrl,
   }
 
   nsCOMPtr channel;
-  nsCOMPtr channelPolicy;
-  nsCOMPtr csp;
-  rv = principal->GetCsp(getter_AddRefs(csp));
-  if (NS_FAILED(rv)) {
-    aRv.Throw(NS_ERROR_FAILURE);
-    return false;
-  }
-
-  if (csp) {
-    channelPolicy = do_CreateInstance(NSCHANNELPOLICY_CONTRACTID);
-    channelPolicy->SetContentSecurityPolicy(csp);
-    channelPolicy->SetLoadType(nsIContentPolicy::TYPE_BEACON);
-  }
-
   rv = NS_NewChannel(getter_AddRefs(channel),
                      uri,
                      doc,
                      nsILoadInfo::SEC_NORMAL,
-                     nsIContentPolicy::TYPE_BEACON,
-                     channelPolicy);
+                     nsIContentPolicy::TYPE_BEACON);
 
   if (NS_FAILED(rv)) {
     aRv.Throw(rv);
diff --git a/dom/plugins/base/nsPluginHost.cpp b/dom/plugins/base/nsPluginHost.cpp
index d8fc5947742..ec70c22afa0 100644
--- a/dom/plugins/base/nsPluginHost.cpp
+++ b/dom/plugins/base/nsPluginHost.cpp
@@ -2857,7 +2857,6 @@ nsresult nsPluginHost::NewPluginURLStream(const nsString& aURL,
                              principal,
                              nsILoadInfo::SEC_FORCE_INHERIT_PRINCIPAL,
                              nsIContentPolicy::TYPE_OBJECT_SUBREQUEST,
-                             nullptr,  // aChannelPolicy
                              nullptr,  // aLoadGroup 
                              listenerPeer);
 
diff --git a/dom/plugins/base/nsPluginStreamListenerPeer.cpp b/dom/plugins/base/nsPluginStreamListenerPeer.cpp
index 54f2afb92a0..ab00a1d9721 100644
--- a/dom/plugins/base/nsPluginStreamListenerPeer.cpp
+++ b/dom/plugins/base/nsPluginStreamListenerPeer.cpp
@@ -662,7 +662,6 @@ nsPluginStreamListenerPeer::RequestRead(NPByteRange* rangeList)
                              principal,
                              nsILoadInfo::SEC_NORMAL,
                              nsIContentPolicy::TYPE_OTHER,
-                             nullptr,   // aChannelPolicy
                              loadGroup,
                              callbacks);
 
diff --git a/dom/workers/ScriptLoader.cpp b/dom/workers/ScriptLoader.cpp
index c4f6aa37c08..1f4b4b6841b 100644
--- a/dom/workers/ScriptLoader.cpp
+++ b/dom/workers/ScriptLoader.cpp
@@ -6,7 +6,6 @@
 #include "ScriptLoader.h"
 
 #include "nsIChannel.h"
-#include "nsIChannelPolicy.h"
 #include "nsIContentPolicy.h"
 #include "nsIContentSecurityPolicy.h"
 #include "nsIHttpChannel.h"
@@ -17,7 +16,6 @@
 #include "nsIURI.h"
 
 #include "jsapi.h"
-#include "nsChannelPolicy.h"
 #include "nsError.h"
 #include "nsContentPolicyUtils.h"
 #include "nsContentUtils.h"
@@ -104,23 +102,6 @@ ChannelFromScriptURL(nsIPrincipal* principal,
     NS_ENSURE_SUCCESS(rv, NS_ERROR_DOM_SECURITY_ERR);
   }
 
-  // Get Content Security Policy from parent document to pass into channel.
-  nsCOMPtr csp;
-  rv = principal->GetCsp(getter_AddRefs(csp));
-  NS_ENSURE_SUCCESS(rv, rv);
-
-  nsCOMPtr channelPolicy;
-  if (csp) {
-    channelPolicy = do_CreateInstance(NSCHANNELPOLICY_CONTRACTID, &rv);
-    NS_ENSURE_SUCCESS(rv, rv);
-
-    rv = channelPolicy->SetContentSecurityPolicy(csp);
-    NS_ENSURE_SUCCESS(rv, rv);
-
-    rv = channelPolicy->SetLoadType(nsIContentPolicy::TYPE_SCRIPT);
-    NS_ENSURE_SUCCESS(rv, rv);
-  }
-
   uint32_t flags = nsIRequest::LOAD_NORMAL | nsIChannel::LOAD_CLASSIFY_URI;
 
   nsCOMPtr channel;
@@ -131,7 +112,6 @@ ChannelFromScriptURL(nsIPrincipal* principal,
                        parentDoc,
                        nsILoadInfo::SEC_NORMAL,
                        nsIContentPolicy::TYPE_SCRIPT,
-                       channelPolicy,
                        loadGroup,
                        nullptr, // aCallbacks
                        flags,
@@ -148,7 +128,6 @@ ChannelFromScriptURL(nsIPrincipal* principal,
                        nullPrincipal,
                        nsILoadInfo::SEC_NORMAL,
                        nsIContentPolicy::TYPE_SCRIPT,
-                       channelPolicy,
                        loadGroup,
                        nullptr, // aCallbacks
                        flags,
diff --git a/dom/xbl/nsXBLService.cpp b/dom/xbl/nsXBLService.cpp
index 22d2b07d370..eac4d6c9fa0 100644
--- a/dom/xbl/nsXBLService.cpp
+++ b/dom/xbl/nsXBLService.cpp
@@ -1076,7 +1076,6 @@ nsXBLService::FetchBindingDocument(nsIContent* aBoundElement, nsIDocument* aBoun
                              requestingPrincipal,
                              nsILoadInfo::SEC_NORMAL,
                              nsIContentPolicy::TYPE_OTHER,
-                             nullptr,   // aChannelPolicy
                              loadGroup);
 
   NS_ENSURE_SUCCESS(rv, rv);
diff --git a/dom/xml/XMLDocument.cpp b/dom/xml/XMLDocument.cpp
index ebd093a5f54..2d1bc1d2097 100644
--- a/dom/xml/XMLDocument.cpp
+++ b/dom/xml/XMLDocument.cpp
@@ -449,7 +449,6 @@ XMLDocument::Load(const nsAString& aUrl, ErrorResult& aRv)
                                   static_cast(this),
                      nsILoadInfo::SEC_NORMAL,
                      nsIContentPolicy::TYPE_XMLHTTPREQUEST,
-                     nullptr,   // aChannelPolicy
                      loadGroup,
                      req,
                      nsIRequest::LOAD_BACKGROUND);
diff --git a/dom/xslt/base/txURIUtils.cpp b/dom/xslt/base/txURIUtils.cpp
index ebcc484d21d..b2a78db2e47 100644
--- a/dom/xslt/base/txURIUtils.cpp
+++ b/dom/xslt/base/txURIUtils.cpp
@@ -64,7 +64,6 @@ URIUtils::ResetWithSource(nsIDocument *aNewDoc, nsIDOMNode *aSourceNode)
                                     sourceDoc,
                                     nsILoadInfo::SEC_FORCE_INHERIT_PRINCIPAL,
                                     nsIContentPolicy::TYPE_OTHER,
-                                    nullptr,   // aChannelPolicy
                                     loadGroup);
 
         if (NS_FAILED(rv)) {
diff --git a/embedding/browser/nsContextMenuInfo.cpp b/embedding/browser/nsContextMenuInfo.cpp
index 6a15401444b..0b4733071c0 100644
--- a/embedding/browser/nsContextMenuInfo.cpp
+++ b/embedding/browser/nsContextMenuInfo.cpp
@@ -23,7 +23,6 @@
 #include "nsUnicharUtils.h"
 #include "nsIDocument.h"
 #include "nsIPrincipal.h"
-#include "nsIChannelPolicy.h"
 #include "nsIContentSecurityPolicy.h"
 #include "nsIContentPolicy.h"
 #include "nsAutoPtr.h"
@@ -268,22 +267,9 @@ nsContextMenuInfo::GetBackgroundImageRequestInternal(nsIDOMNode *aDOMNode, imgRe
   nsCOMPtr primitiveValue;
   nsAutoString bgStringValue;
 
-  // get Content Security Policy to pass to LoadImage
   nsCOMPtr doc(do_QueryInterface(document));
-  nsCOMPtr principal;
-  nsCOMPtr channelPolicy;
-  nsCOMPtr csp;
-  if (doc) {
-    principal = doc->NodePrincipal();
-    nsresult rv = principal->GetCsp(getter_AddRefs(csp));
-    NS_ENSURE_SUCCESS(rv, rv);
-    if (csp) {
-      channelPolicy = do_CreateInstance("@mozilla.org/nschannelpolicy;1");
-      channelPolicy->SetContentSecurityPolicy(csp);
-      channelPolicy->SetLoadType(nsIContentPolicy::TYPE_IMAGE);
-    }
-  }
-  
+  nsCOMPtr principal = doc ? doc->NodePrincipal() : nullptr;
+
   while (true) {
     nsCOMPtr domElement(do_QueryInterface(domNode));
     // bail for the parent node of the root element or null argument
@@ -310,7 +296,7 @@ nsContextMenuInfo::GetBackgroundImageRequestInternal(nsIDOMNode *aDOMNode, imgRe
 
           return il->LoadImage(bgUri, nullptr, nullptr, principal, nullptr,
                                nullptr, nullptr, nsIRequest::LOAD_NORMAL,
-                               nullptr, channelPolicy, EmptyString(), aRequest);
+                               nullptr, EmptyString(), aRequest);
         }
       }
 
diff --git a/embedding/components/webbrowserpersist/nsWebBrowserPersist.cpp b/embedding/components/webbrowserpersist/nsWebBrowserPersist.cpp
index cbfef03aa9f..dad12ded7f6 100644
--- a/embedding/components/webbrowserpersist/nsWebBrowserPersist.cpp
+++ b/embedding/components/webbrowserpersist/nsWebBrowserPersist.cpp
@@ -1205,7 +1205,6 @@ nsresult nsWebBrowserPersist::SaveURIInternal(
                        nsContentUtils::GetSystemPrincipal(),
                        nsILoadInfo::SEC_NORMAL,
                        nsIContentPolicy::TYPE_OTHER,
-                       nullptr,  // aChannelPolicy
                        nullptr,  // aLoadGroup
                        static_cast(this),
                        loadFlags);
diff --git a/extensions/pref/autoconfig/src/nsAutoConfig.cpp b/extensions/pref/autoconfig/src/nsAutoConfig.cpp
index 565bdb20ca2..8b81bbef5e3 100644
--- a/extensions/pref/autoconfig/src/nsAutoConfig.cpp
+++ b/extensions/pref/autoconfig/src/nsAutoConfig.cpp
@@ -285,7 +285,6 @@ nsresult nsAutoConfig::downloadAutoConfig()
                        nsContentUtils::GetSystemPrincipal(),
                        nsILoadInfo::SEC_NORMAL,
                        nsIContentPolicy::TYPE_OTHER,
-                       nullptr,  // aChannelPolicy
                        nullptr,  // loadGroup
                        nullptr,  // aCallbacks
                        nsIRequest::INHIBIT_PERSISTENT_CACHING |
diff --git a/image/public/imgILoader.idl b/image/public/imgILoader.idl
index cf751632aae..b47c34b092d 100644
--- a/image/public/imgILoader.idl
+++ b/image/public/imgILoader.idl
@@ -16,7 +16,6 @@ interface nsIStreamListener;
 interface nsIURI;
 
 interface nsISimpleEnumerator;
-interface nsIChannelPolicy;
 
 #include "nsIRequest.idl" // for nsLoadFlags
 
@@ -62,8 +61,7 @@ interface imgILoader : nsISupports
                              in imgINotificationObserver aObserver,
                              in nsISupports aCX,
                              in nsLoadFlags aLoadFlags,
-                             in nsISupports cacheKey,
-                             in nsIChannelPolicy channelPolicy);
+                             in nsISupports cacheKey);
 
   /**
    * Start the load and decode of an image.
diff --git a/image/src/imgLoader.cpp b/image/src/imgLoader.cpp
index 0ac8d497a9d..a96e00796d4 100644
--- a/image/src/imgLoader.cpp
+++ b/image/src/imgLoader.cpp
@@ -635,7 +635,6 @@ static nsresult NewImageChannel(nsIChannel **aResult,
                                 nsILoadGroup *aLoadGroup,
                                 const nsCString& aAcceptHeader,
                                 nsLoadFlags aLoadFlags,
-                                nsIChannelPolicy *aPolicy,
                                 nsIPrincipal *aLoadingPrincipal,
                                 nsISupports *aRequestingContext)
 {
@@ -691,7 +690,6 @@ static nsresult NewImageChannel(nsIChannel **aResult,
                              requestingPrincipal,
                              securityFlags,
                              nsIContentPolicy::TYPE_IMAGE,
-                             aPolicy,
                              nullptr,   // loadGroup
                              callbacks,
                              aLoadFlags);
@@ -1445,7 +1443,6 @@ bool imgLoader::ValidateRequestWithNewChannel(imgRequest *request,
                                                 nsISupports *aCX,
                                                 nsLoadFlags aLoadFlags,
                                                 imgRequestProxy **aProxyRequest,
-                                                nsIChannelPolicy *aPolicy,
                                                 nsIPrincipal* aLoadingPrincipal,
                                                 int32_t aCORSMode)
 {
@@ -1493,7 +1490,6 @@ bool imgLoader::ValidateRequestWithNewChannel(imgRequest *request,
                          aLoadGroup,
                          mAcceptHeader,
                          aLoadFlags,
-                         aPolicy,
                          aLoadingPrincipal,
                          aCX);
     if (NS_FAILED(rv)) {
@@ -1573,7 +1569,6 @@ bool imgLoader::ValidateEntry(imgCacheEntry *aEntry,
                                 nsLoadFlags aLoadFlags,
                                 bool aCanMakeNewChannel,
                                 imgRequestProxy **aProxyRequest,
-                                nsIChannelPolicy *aPolicy,
                                 nsIPrincipal* aLoadingPrincipal,
                                 int32_t aCORSMode)
 {
@@ -1678,7 +1673,7 @@ bool imgLoader::ValidateEntry(imgCacheEntry *aEntry,
 
     return ValidateRequestWithNewChannel(request, aURI, aInitialDocumentURI,
                                          aReferrerURI, aLoadGroup, aObserver,
-                                         aCX, aLoadFlags, aProxyRequest, aPolicy,
+                                         aCX, aLoadFlags, aProxyRequest,
                                          aLoadingPrincipal, aCORSMode);
   }
 
@@ -1853,7 +1848,6 @@ NS_IMETHODIMP imgLoader::LoadImageXPCOM(nsIURI *aURI,
                                    nsISupports *aCX,
                                    nsLoadFlags aLoadFlags,
                                    nsISupports *aCacheKey,
-                                   nsIChannelPolicy *aPolicy,
                                    imgIRequest **_retval)
 {
     imgRequestProxy *proxy;
@@ -1866,29 +1860,32 @@ NS_IMETHODIMP imgLoader::LoadImageXPCOM(nsIURI *aURI,
                                 aCX,
                                 aLoadFlags,
                                 aCacheKey,
-                                aPolicy,
                                 EmptyString(),
                                 &proxy);
     *_retval = proxy;
     return result;
 }
 
-
-
-/* imgIRequest loadImage(in nsIURI aURI, in nsIURI aInitialDocumentURL, in nsIURI aReferrerURI, in nsIPrincipal aLoadingPrincipal, in nsILoadGroup aLoadGroup, in imgINotificationObserver aObserver, in nsISupports aCX, in nsLoadFlags aLoadFlags, in nsISupports cacheKey, in nsIChannelPolicy channelPolicy); */
-
+// imgIRequest loadImage(in nsIURI aURI,
+//                       in nsIURI aInitialDocumentURL,
+//                       in nsIURI aReferrerURI,
+//                       in nsIPrincipal aLoadingPrincipal,
+//                       in nsILoadGroup aLoadGroup,
+//                       in imgINotificationObserver aObserver,
+//                       in nsISupports aCX,
+//                       in nsLoadFlags aLoadFlags,
+//                       in nsISupports cacheKey);
 nsresult imgLoader::LoadImage(nsIURI *aURI,
-			      nsIURI *aInitialDocumentURI,
-			      nsIURI *aReferrerURI,
-			      nsIPrincipal* aLoadingPrincipal,
-			      nsILoadGroup *aLoadGroup,
-			      imgINotificationObserver *aObserver,
-			      nsISupports *aCX,
-			      nsLoadFlags aLoadFlags,
-			      nsISupports *aCacheKey,
-			      nsIChannelPolicy *aPolicy,
-			      const nsAString& initiatorType,
-			      imgRequestProxy **_retval)
+                              nsIURI *aInitialDocumentURI,
+                              nsIURI *aReferrerURI,
+                              nsIPrincipal* aLoadingPrincipal,
+                              nsILoadGroup *aLoadGroup,
+                              imgINotificationObserver *aObserver,
+                              nsISupports *aCX,
+                              nsLoadFlags aLoadFlags,
+                              nsISupports *aCacheKey,
+                              const nsAString& initiatorType,
+                              imgRequestProxy **_retval)
 {
 	VerifyCacheSizes();
 
@@ -1966,7 +1963,7 @@ nsresult imgLoader::LoadImage(nsIURI *aURI,
   if (cache.Get(spec, getter_AddRefs(entry)) && entry) {
     if (ValidateEntry(entry, aURI, aInitialDocumentURI, aReferrerURI,
                       aLoadGroup, aObserver, aCX, requestFlags, true,
-                      _retval, aPolicy, aLoadingPrincipal, corsmode)) {
+                      _retval, aLoadingPrincipal, corsmode)) {
       request = entry->GetRequest();
 
       // If this entry has no proxies, its request has no reference to the entry.
@@ -2008,7 +2005,6 @@ nsresult imgLoader::LoadImage(nsIURI *aURI,
                          aLoadGroup,
                          mAcceptHeader,
                          requestFlags,
-                         aPolicy,
                          aLoadingPrincipal,
                          aCX);
     if (NS_FAILED(rv))
@@ -2192,7 +2188,7 @@ nsresult imgLoader::LoadImageWithChannel(nsIChannel *channel, imgINotificationOb
       // XXX -- should this be changed? it's pretty much verbatim from the old
       // code, but seems nonsensical.
       if (ValidateEntry(entry, uri, nullptr, nullptr, nullptr, aObserver, aCX,
-                        requestFlags, false, nullptr, nullptr, nullptr,
+                        requestFlags, false, nullptr, nullptr,
                         imgIRequest::CORS_NONE)) {
         request = entry->GetRequest();
       } else {
diff --git a/image/src/imgLoader.h b/image/src/imgLoader.h
index 334a3a16b26..62045765108 100644
--- a/image/src/imgLoader.h
+++ b/image/src/imgLoader.h
@@ -29,7 +29,6 @@ class imgINotificationObserver;
 class nsILoadGroup;
 class imgCacheExpirationTracker;
 class imgMemoryReporter;
-class nsIChannelPolicy;
 
 namespace mozilla {
 namespace image {
@@ -258,7 +257,6 @@ public:
                      nsISupports *aCX,
                      nsLoadFlags aLoadFlags,
                      nsISupports *aCacheKey,
-                     nsIChannelPolicy *aPolicy,
                      const nsAString& initiatorType,
                      imgRequestProxy **_retval);
   nsresult LoadImageWithChannel(nsIChannel *channel,
@@ -340,7 +338,6 @@ private: // methods
                        imgINotificationObserver *aObserver, nsISupports *aCX,
                        nsLoadFlags aLoadFlags, bool aCanMakeNewChannel,
                        imgRequestProxy **aProxyRequest,
-                       nsIChannelPolicy *aPolicy,
                        nsIPrincipal* aLoadingPrincipal,
                        int32_t aCORSMode);
 
@@ -351,7 +348,6 @@ private: // methods
                                        imgINotificationObserver *aObserver,
                                        nsISupports *aCX, nsLoadFlags aLoadFlags,
                                        imgRequestProxy **aProxyRequest,
-                                       nsIChannelPolicy *aPolicy,
                                        nsIPrincipal* aLoadingPrincipal,
                                        int32_t aCORSMode);
 
diff --git a/image/test/unit/async_load_tests.js b/image/test/unit/async_load_tests.js
index 970ae812ee8..c4cdcd0730c 100644
--- a/image/test/unit/async_load_tests.js
+++ b/image/test/unit/async_load_tests.js
@@ -96,7 +96,7 @@ function checkSecondLoad()
   var listener = new ImageListener(checkClone, secondLoadDone);
   var outer = Cc["@mozilla.org/image/tools;1"].getService(Ci.imgITools)
                 .createScriptedObserver(listener);
-  requests.push(gCurrentLoader.loadImageXPCOM(uri, null, null, null, null, outer, null, 0, null, null));
+  requests.push(gCurrentLoader.loadImageXPCOM(uri, null, null, null, null, outer, null, 0, null));
   listener.synchronous = false;
 }
 
@@ -194,7 +194,7 @@ function startImageCallback(otherCb)
     var listener2 = new ImageListener(null, function(foo, bar) { do_test_finished(); });
     var outer = Cc["@mozilla.org/image/tools;1"].getService(Ci.imgITools)
                   .createScriptedObserver(listener2);
-    requests.push(gCurrentLoader.loadImageXPCOM(uri, null, null, null, null, outer, null, 0, null, null));
+    requests.push(gCurrentLoader.loadImageXPCOM(uri, null, null, null, null, outer, null, 0, null));
     listener2.synchronous = false;
 
     // Now that we've started another load, chain to the callback.
@@ -221,7 +221,7 @@ function run_test()
   var listener = new ImageListener(startImageCallback(checkClone), firstLoadDone);
   var outer = Cc["@mozilla.org/image/tools;1"].getService(Ci.imgITools)
                 .createScriptedObserver(listener);
-  var req = gCurrentLoader.loadImageXPCOM(uri, null, null, null, null, outer, null, 0, null, null);
+  var req = gCurrentLoader.loadImageXPCOM(uri, null, null, null, null, outer, null, 0, null);
   requests.push(req);
 
   // Ensure that we don't cause any mayhem when we lock an image.
diff --git a/image/test/unit/test_private_channel.js b/image/test/unit/test_private_channel.js
index 36aa8ee784c..318dece2afa 100644
--- a/image/test/unit/test_private_channel.js
+++ b/image/test/unit/test_private_channel.js
@@ -77,7 +77,7 @@ function loadImage(isPrivate, callback) {
   var loadGroup = Cc["@mozilla.org/network/load-group;1"].createInstance(Ci.nsILoadGroup);
   loadGroup.notificationCallbacks = new NotificationCallbacks(isPrivate);
   var loader = isPrivate ? gPrivateLoader : gPublicLoader;
-  requests.push(loader.loadImageXPCOM(uri, null, null, null, loadGroup, outer, null, 0, null, null));
+  requests.push(loader.loadImageXPCOM(uri, null, null, null, loadGroup, outer, null, 0, null));
   listener.synchronous = false;
 }
 
diff --git a/js/xpconnect/loader/mozJSSubScriptLoader.cpp b/js/xpconnect/loader/mozJSSubScriptLoader.cpp
index ce6c072c1a7..9cbec3d8e05 100644
--- a/js/xpconnect/loader/mozJSSubScriptLoader.cpp
+++ b/js/xpconnect/loader/mozJSSubScriptLoader.cpp
@@ -113,7 +113,6 @@ mozJSSubScriptLoader::ReadScript(nsIURI *uri, JSContext *cx, JSObject *targetObj
                        nsContentUtils::GetSystemPrincipal(),
                        nsILoadInfo::SEC_NORMAL,
                        nsIContentPolicy::TYPE_OTHER,
-                       nullptr,  // aChannelPolicy
                        nullptr,  // aLoadGroup
                        nullptr,  // aCallbacks
                        nsIRequest::LOAD_NORMAL,
diff --git a/layout/build/nsLayoutModule.cpp b/layout/build/nsLayoutModule.cpp
index e0ad0f35c1e..24d4fdb82bf 100644
--- a/layout/build/nsLayoutModule.cpp
+++ b/layout/build/nsLayoutModule.cpp
@@ -61,7 +61,6 @@
 #include "mozilla/dom/DOMParser.h"
 #include "nsDOMSerializer.h"
 #include "nsXMLHttpRequest.h"
-#include "nsChannelPolicy.h"
 
 // view stuff
 #include "nsContentCreatorFunctions.h"
@@ -283,7 +282,6 @@ NS_GENERIC_FACTORY_CONSTRUCTOR(DOMParser)
 NS_GENERIC_FACTORY_CONSTRUCTOR(Exception)
 NS_GENERIC_FACTORY_CONSTRUCTOR(DOMSessionStorageManager)
 NS_GENERIC_FACTORY_CONSTRUCTOR(DOMLocalStorageManager)
-NS_GENERIC_FACTORY_CONSTRUCTOR(nsChannelPolicy)
 NS_GENERIC_FACTORY_SINGLETON_CONSTRUCTOR(DOMRequestService,
                                          DOMRequestService::FactoryCreate)
 NS_GENERIC_FACTORY_SINGLETON_CONSTRUCTOR(QuotaManager,
@@ -753,7 +751,6 @@ NS_DEFINE_NAMED_CID(NS_EVENTLISTENERSERVICE_CID);
 NS_DEFINE_NAMED_CID(NS_GLOBALMESSAGEMANAGER_CID);
 NS_DEFINE_NAMED_CID(NS_PARENTPROCESSMESSAGEMANAGER_CID);
 NS_DEFINE_NAMED_CID(NS_CHILDPROCESSMESSAGEMANAGER_CID);
-NS_DEFINE_NAMED_CID(NSCHANNELPOLICY_CID);
 NS_DEFINE_NAMED_CID(NS_SCRIPTSECURITYMANAGER_CID);
 NS_DEFINE_NAMED_CID(NS_PRINCIPAL_CID);
 NS_DEFINE_NAMED_CID(NS_SYSTEMPRINCIPAL_CID);
@@ -1048,7 +1045,6 @@ static const mozilla::Module::CIDEntry kLayoutCIDs[] = {
   { &kNS_GLOBALMESSAGEMANAGER_CID, false, nullptr, CreateGlobalMessageManager },
   { &kNS_PARENTPROCESSMESSAGEMANAGER_CID, false, nullptr, CreateParentMessageManager },
   { &kNS_CHILDPROCESSMESSAGEMANAGER_CID, false, nullptr, CreateChildMessageManager },
-  { &kNSCHANNELPOLICY_CID, false, nullptr, nsChannelPolicyConstructor },
   { &kNS_SCRIPTSECURITYMANAGER_CID, false, nullptr, Construct_nsIScriptSecurityManager },
   { &kNS_PRINCIPAL_CID, false, nullptr, nsPrincipalConstructor },
   { &kNS_SYSTEMPRINCIPAL_CID, false, nullptr, nsSystemPrincipalConstructor },
@@ -1206,7 +1202,6 @@ static const mozilla::Module::ContractIDEntry kLayoutContracts[] = {
   { NS_GLOBALMESSAGEMANAGER_CONTRACTID, &kNS_GLOBALMESSAGEMANAGER_CID },
   { NS_PARENTPROCESSMESSAGEMANAGER_CONTRACTID, &kNS_PARENTPROCESSMESSAGEMANAGER_CID },
   { NS_CHILDPROCESSMESSAGEMANAGER_CONTRACTID, &kNS_CHILDPROCESSMESSAGEMANAGER_CID },
-  { NSCHANNELPOLICY_CONTRACTID, &kNSCHANNELPOLICY_CID },
   { NS_SCRIPTSECURITYMANAGER_CONTRACTID, &kNS_SCRIPTSECURITYMANAGER_CID },
   { NS_GLOBAL_CHANNELEVENTSINK_CONTRACTID, &kNS_SCRIPTSECURITYMANAGER_CID },
   { NS_PRINCIPAL_CONTRACTID, &kNS_PRINCIPAL_CID },
diff --git a/layout/generic/nsImageFrame.cpp b/layout/generic/nsImageFrame.cpp
index c742c78225b..6b67ee2d55f 100644
--- a/layout/generic/nsImageFrame.cpp
+++ b/layout/generic/nsImageFrame.cpp
@@ -1931,7 +1931,6 @@ nsImageFrame::LoadIcon(const nsAString& aSpec,
                        nullptr,      /* Not associated with any particular document */
                        loadFlags,
                        nullptr,
-                       nullptr,      /* channel policy not needed */
                        EmptyString(),
                        aRequest);
 }
diff --git a/layout/style/FontFaceSet.cpp b/layout/style/FontFaceSet.cpp
index 59ed4f9fc09..571aaf7f154 100644
--- a/layout/style/FontFaceSet.cpp
+++ b/layout/style/FontFaceSet.cpp
@@ -19,7 +19,6 @@
 #include "mozilla/AsyncEventDispatcher.h"
 #include "nsCrossSiteListenerProxy.h"
 #include "nsFontFaceLoader.h"
-#include "nsIChannelPolicy.h"
 #include "nsIConsoleService.h"
 #include "nsIContentPolicy.h"
 #include "nsIContentSecurityPolicy.h"
@@ -397,16 +396,6 @@ FontFaceSet::StartLoad(gfxUserFontEntry* aUserFontEntry,
   nsCOMPtr loadGroup(ps->GetDocument()->GetDocumentLoadGroup());
 
   nsCOMPtr channel;
-  // get Content Security Policy from principal to pass into channel
-  nsCOMPtr channelPolicy;
-  nsCOMPtr csp;
-  rv = aUserFontEntry->GetPrincipal()->GetCsp(getter_AddRefs(csp));
-  NS_ENSURE_SUCCESS(rv, rv);
-  if (csp) {
-    channelPolicy = do_CreateInstance("@mozilla.org/nschannelpolicy;1");
-    channelPolicy->SetContentSecurityPolicy(csp);
-    channelPolicy->SetLoadType(nsIContentPolicy::TYPE_FONT);
-  }
   // Note we are calling NS_NewChannelInternal() with both a node and a
   // principal.  This is because the document where the font is being loaded
   // might have a different origin from the principal of the stylesheet
@@ -417,7 +406,6 @@ FontFaceSet::StartLoad(gfxUserFontEntry* aUserFontEntry,
                              aUserFontEntry->GetPrincipal(),
                              nsILoadInfo::SEC_NORMAL,
                              nsIContentPolicy::TYPE_FONT,
-                             channelPolicy,
                              loadGroup);
 
   NS_ENSURE_SUCCESS(rv, rv);
@@ -1154,17 +1142,6 @@ FontFaceSet::SyncLoadFontData(gfxUserFontEntry* aFontToLoad,
   nsresult rv;
 
   nsCOMPtr channel;
-  // get Content Security Policy from principal to pass into channel
-  nsCOMPtr channelPolicy;
-  nsCOMPtr csp;
-  rv = aFontToLoad->GetPrincipal()->GetCsp(getter_AddRefs(csp));
-  NS_ENSURE_SUCCESS(rv, rv);
-  if (csp) {
-    channelPolicy = do_CreateInstance("@mozilla.org/nschannelpolicy;1");
-    channelPolicy->SetContentSecurityPolicy(csp);
-    channelPolicy->SetLoadType(nsIContentPolicy::TYPE_FONT);
-  }
-
   nsIPresShell* ps = mPresContext->PresShell();
   if (!ps) {
     return NS_ERROR_FAILURE;
@@ -1178,8 +1155,7 @@ FontFaceSet::SyncLoadFontData(gfxUserFontEntry* aFontToLoad,
                              ps->GetDocument(),
                              aFontToLoad->GetPrincipal(),
                              nsILoadInfo::SEC_NORMAL,
-                             nsIContentPolicy::TYPE_FONT,
-                             channelPolicy);
+                             nsIContentPolicy::TYPE_FONT);
 
   NS_ENSURE_SUCCESS(rv, rv);
 
diff --git a/layout/style/Loader.cpp b/layout/style/Loader.cpp
index 4e82b4e83b0..e0f480c2f6e 100644
--- a/layout/style/Loader.cpp
+++ b/layout/style/Loader.cpp
@@ -60,7 +60,6 @@
 #include "nsIDOMStyleSheet.h"
 #include "nsError.h"
 
-#include "nsIChannelPolicy.h"
 #include "nsIContentSecurityPolicy.h"
 
 #include "mozilla/dom/EncodingUtils.h"
@@ -1552,20 +1551,10 @@ Loader::LoadSheet(SheetLoadData* aLoadData, StyleSheetState aSheetState)
   mSyncCallback = true;
 #endif
   nsCOMPtr loadGroup;
-  // Content Security Policy information to pass into channel
-  nsCOMPtr channelPolicy;
   if (mDocument) {
     loadGroup = mDocument->GetDocumentLoadGroup();
     NS_ASSERTION(loadGroup,
                  "No loadgroup for stylesheet; onload will fire early");
-    nsCOMPtr csp;
-    rv = mDocument->NodePrincipal()->GetCsp(getter_AddRefs(csp));
-    NS_ENSURE_SUCCESS(rv, rv);
-    if (csp) {
-      channelPolicy = do_CreateInstance("@mozilla.org/nschannelpolicy;1");
-      channelPolicy->SetContentSecurityPolicy(csp);
-      channelPolicy->SetLoadType(nsIContentPolicy::TYPE_STYLESHEET);
-    }
   }
 
   nsLoadFlags securityFlags = nsILoadInfo::SEC_NORMAL;
@@ -1584,7 +1573,6 @@ Loader::LoadSheet(SheetLoadData* aLoadData, StyleSheetState aSheetState)
                              requestingPrincipal,
                              securityFlags,
                              nsIContentPolicy::TYPE_STYLESHEET,
-                             channelPolicy,
                              loadGroup,
                              nullptr,   // aCallbacks
                              nsIChannel::LOAD_NORMAL |
diff --git a/modules/libjar/nsJARChannel.cpp b/modules/libjar/nsJARChannel.cpp
index 99230e00679..c712544f2d5 100644
--- a/modules/libjar/nsJARChannel.cpp
+++ b/modules/libjar/nsJARChannel.cpp
@@ -13,7 +13,6 @@
 #include "nsIPrefService.h"
 #include "nsIPrefBranch.h"
 #include "nsIViewSourceChannel.h"
-#include "nsChannelProperties.h"
 #include "nsContentUtils.h"
 #include "nsProxyRelease.h"
 
diff --git a/netwerk/base/public/moz.build b/netwerk/base/public/moz.build
index 9bde2151303..ecba3e7344b 100644
--- a/netwerk/base/public/moz.build
+++ b/netwerk/base/public/moz.build
@@ -29,7 +29,6 @@ XPIDL_SOURCES += [
     'nsICancelable.idl',
     'nsIChannel.idl',
     'nsIChannelEventSink.idl',
-    'nsIChannelPolicy.idl',
     'nsIChildChannel.idl',
     'nsIContentSniffer.idl',
     'nsICryptoFIPSInfo.idl',
@@ -137,8 +136,6 @@ EXPORTS += [
     'netCore.h',
     'nsASocketHandler.h',
     'nsAsyncRedirectVerifyHelper.h',
-    'nsChannelProperties.h',
-    'nsNetStrings.h',
     'nsNetUtil.h',
     'nsReadLine.h',
     'nsStreamListenerWrapper.h',
diff --git a/netwerk/base/public/nsChannelProperties.h b/netwerk/base/public/nsChannelProperties.h
deleted file mode 100644
index 0323a6f39d1..00000000000
--- a/netwerk/base/public/nsChannelProperties.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-#ifndef nsChannelProperties_h__
-#define nsChannelProperties_h__
-
-#include "nsStringGlue.h"
-#ifdef IMPL_LIBXUL
-#include "nsNetStrings.h"
-#endif
-
-/**
- * @file
- * This file contains constants for properties channels can expose.
- * They can be accessed by using QueryInterface to access the nsIPropertyBag
- * or nsIPropertyBag2 interface on a channel and reading the value.
- */
-
-
-/**
- * Exists to allow content policy mechanism to function properly during channel
- * redirects.  Contains security contextual information about the load.
- * Type: nsIChannelPolicy
- */
-#define NS_CHANNEL_PROP_CHANNEL_POLICY_STR "channel-policy"
-
-#ifdef IMPL_LIBXUL
-#define NS_CHANNEL_PROP_CHANNEL_POLICY gNetStrings->kChannelPolicy
-#else
-#define NS_CHANNEL_PROP_CHANNEL_POLICY \
-  NS_LITERAL_STRING(NS_CHANNEL_PROP_CHANNEL_POLICY_STR)
-#endif
-
-#endif
diff --git a/netwerk/base/public/nsIChannelPolicy.idl b/netwerk/base/public/nsIChannelPolicy.idl
deleted file mode 100644
index 5894db08fcc..00000000000
--- a/netwerk/base/public/nsIChannelPolicy.idl
+++ /dev/null
@@ -1,29 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-#include "nsISupports.idl"
-
-/**
- * A container for policy information to be used during channel creation.
- *
- * This interface exists to allow the content policy mechanism to function
- * properly during channel redirects.  Channels can be created with this
- * interface placed in the property bag and upon redirect, the interface can
- * be transferred from the old channel to the new channel.
- */
-[scriptable, uuid(18045e96-1afe-4162-837a-04691267158c)]
-interface nsIChannelPolicy : nsISupports
-{
-  /**
-   * Indicates what type of content is being loaded, e.g.
-   * nsIContentPolicy::TYPE_IMAGE
-   */
-  attribute unsigned long loadType;
-
-  /**
-   * A nsIContentSecurityPolicy object to determine if the load should
-   * be allowed.
-   */
-  attribute nsISupports contentSecurityPolicy;
-};
diff --git a/netwerk/base/public/nsNetStrings.h b/netwerk/base/public/nsNetStrings.h
deleted file mode 100644
index 653c34dc3f7..00000000000
--- a/netwerk/base/public/nsNetStrings.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-#ifndef nsNetStrings_h__
-#define nsNetStrings_h__
-
-#include "nsLiteralString.h"
-
-/**
- * Class on which wide strings are available, to avoid constructing strings
- * wherever these strings are used.
- */
-class nsNetStrings {
-public:
-  nsNetStrings();
-
-  const nsLiteralString kChannelPolicy;
-};
-
-extern nsNetStrings* gNetStrings;
-
-
-#endif
diff --git a/netwerk/base/public/nsNetUtil.h b/netwerk/base/public/nsNetUtil.h
index a27d01f1961..60cf2e1ee7d 100644
--- a/netwerk/base/public/nsNetUtil.h
+++ b/netwerk/base/public/nsNetUtil.h
@@ -32,7 +32,6 @@
 #include "nsIIOService.h"
 #include "nsIServiceManager.h"
 #include "nsIChannel.h"
-#include "nsChannelProperties.h"
 #include "nsIInputStreamChannel.h"
 #include "nsITransport.h"
 #include "nsIStreamTransportService.h"
@@ -69,7 +68,6 @@
 #include "nsIWritablePropertyBag2.h"
 #include "nsIIDNService.h"
 #include "nsIChannelEventSink.h"
-#include "nsIChannelPolicy.h"
 #include "nsISocketProviderService.h"
 #include "nsISocketProvider.h"
 #include "nsIRedirectChannelRegistrar.h"
@@ -202,7 +200,6 @@ inline nsresult
 NS_NewChannelInternal(nsIChannel**           outChannel,
                       nsIURI*                aUri,
                       nsILoadInfo*           aLoadInfo,
-                      nsIChannelPolicy*      aChannelPolicy = nullptr,
                       nsILoadGroup*          aLoadGroup = nullptr,
                       nsIInterfaceRequestor* aCallbacks = nullptr,
                       nsLoadFlags            aLoadFlags = nsIRequest::LOAD_NORMAL,
@@ -236,14 +233,6 @@ NS_NewChannelInternal(nsIChannel**           outChannel,
     rv = channel->SetLoadFlags(aLoadFlags | (normalLoadFlags & nsIChannel::LOAD_REPLACE));
     NS_ENSURE_SUCCESS(rv, rv);
   }
-
-  if (aChannelPolicy) {
-    nsCOMPtr props = do_QueryInterface(channel);
-    if (props) {
-      props->SetPropertyAsInterface(NS_CHANNEL_PROP_CHANNEL_POLICY, aChannelPolicy);
-    }
-  }
-
   channel->SetLoadInfo(aLoadInfo);
 
   // If we're sandboxed, make sure to clear any owner the channel
@@ -263,7 +252,6 @@ NS_NewChannelInternal(nsIChannel**           outChannel,
                       nsIPrincipal*          aRequestingPrincipal,
                       nsSecurityFlags        aSecurityFlags,
                       nsContentPolicyType    aContentPolicyType,
-                      nsIChannelPolicy*      aChannelPolicy = nullptr,
                       nsILoadGroup*          aLoadGroup = nullptr,
                       nsIInterfaceRequestor* aCallbacks = nullptr,
                       nsLoadFlags            aLoadFlags = nsIRequest::LOAD_NORMAL,
@@ -282,7 +270,6 @@ NS_NewChannelInternal(nsIChannel**           outChannel,
   return NS_NewChannelInternal(outChannel,
                                aUri,
                                loadInfo,
-                               aChannelPolicy,
                                aLoadGroup,
                                aCallbacks,
                                aLoadFlags,
@@ -295,7 +282,6 @@ NS_NewChannel(nsIChannel**           outChannel,
               nsINode*               aRequestingNode,
               nsSecurityFlags        aSecurityFlags,
               nsContentPolicyType    aContentPolicyType,
-              nsIChannelPolicy*      aChannelPolicy = nullptr,
               nsILoadGroup*          aLoadGroup = nullptr,
               nsIInterfaceRequestor* aCallbacks = nullptr,
               nsLoadFlags            aLoadFlags = nsIRequest::LOAD_NORMAL,
@@ -308,7 +294,6 @@ NS_NewChannel(nsIChannel**           outChannel,
                                aRequestingNode->NodePrincipal(),
                                aSecurityFlags,
                                aContentPolicyType,
-                               aChannelPolicy,
                                aLoadGroup,
                                aCallbacks,
                                aLoadFlags,
@@ -321,7 +306,6 @@ NS_NewChannel(nsIChannel**           outChannel,
               nsIPrincipal*          aRequestingPrincipal,
               nsSecurityFlags        aSecurityFlags,
               nsContentPolicyType    aContentPolicyType,
-              nsIChannelPolicy*      aChannelPolicy = nullptr,
               nsILoadGroup*          aLoadGroup = nullptr,
               nsIInterfaceRequestor* aCallbacks = nullptr,
               nsLoadFlags            aLoadFlags = nsIRequest::LOAD_NORMAL,
@@ -333,7 +317,6 @@ NS_NewChannel(nsIChannel**           outChannel,
                                aRequestingPrincipal,
                                aSecurityFlags,
                                aContentPolicyType,
-                               aChannelPolicy,
                                aLoadGroup,
                                aCallbacks,
                                aLoadFlags,
@@ -366,7 +349,6 @@ NS_OpenURIInternal(nsIInputStream**       outStream,
                                       aRequestingPrincipal,
                                       aSecurityFlags,
                                       aContentPolicyType,
-                                      nullptr,   // aChannelPolicy,
                                       aLoadGroup,
                                       aCallbacks,
                                       aLoadFlags,
@@ -423,7 +405,6 @@ NS_OpenURIInternal(nsIStreamListener*     aListener,
   nsresult rv = NS_NewChannelInternal(getter_AddRefs(channel),
                                       aUri,
                                       aLoadInfo,
-                                      nullptr,    // aChannelPolicy
                                       aLoadGroup,
                                       aCallbacks,
                                       aLoadFlags,
@@ -834,7 +815,6 @@ NS_NewStreamLoaderInternal(nsIStreamLoader**        outStream,
                                        aRequestingPrincipal,
                                        aSecurityFlags,
                                        aContentPolicyType,
-                                       nullptr,  // aChannelPolicy
                                        aLoadGroup,
                                        aCallbacks,
                                        aLoadFlags);
diff --git a/netwerk/base/src/moz.build b/netwerk/base/src/moz.build
index 0fb2fc3a97c..9539524bbaf 100644
--- a/netwerk/base/src/moz.build
+++ b/netwerk/base/src/moz.build
@@ -47,7 +47,6 @@ UNIFIED_SOURCES += [
     'nsMediaFragmentURIParser.cpp',
     'nsMIMEInputStream.cpp',
     'nsNetAddr.cpp',
-    'nsNetStrings.cpp',
     'nsNetUtil.cpp',
     'nsPACMan.cpp',
     'nsPreloadedStream.cpp',
diff --git a/netwerk/base/src/nsIncrementalDownload.cpp b/netwerk/base/src/nsIncrementalDownload.cpp
index 5859f50b1d0..d13a81213ab 100644
--- a/netwerk/base/src/nsIncrementalDownload.cpp
+++ b/netwerk/base/src/nsIncrementalDownload.cpp
@@ -267,7 +267,6 @@ nsIncrementalDownload::ProcessTimeout()
                               nsContentUtils::GetSystemPrincipal(),
                               nsILoadInfo::SEC_NORMAL,
                               nsIContentPolicy::TYPE_OTHER,
-                              nullptr,   // aChannelPolicy
                               nullptr,   // loadGroup
                               this,      // aCallbacks
                               mLoadFlags);
diff --git a/netwerk/base/src/nsNetStrings.cpp b/netwerk/base/src/nsNetStrings.cpp
deleted file mode 100644
index cd3ebfac303..00000000000
--- a/netwerk/base/src/nsNetStrings.cpp
+++ /dev/null
@@ -1,14 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-#include "nsNetStrings.h"
-#include "nsChannelProperties.h"
-
-nsNetStrings* gNetStrings;
-
-nsNetStrings::nsNetStrings()
-  : NS_LITERAL_STRING_INIT(kChannelPolicy, NS_CHANNEL_PROP_CHANNEL_POLICY_STR)
-{}
-
-
diff --git a/netwerk/build/nsNetModule.cpp b/netwerk/build/nsNetModule.cpp
index 51e20fb53d4..070762956e7 100644
--- a/netwerk/build/nsNetModule.cpp
+++ b/netwerk/build/nsNetModule.cpp
@@ -31,7 +31,6 @@
 #include "nsApplicationCache.h"
 #include "nsApplicationCacheService.h"
 #include "nsMimeTypes.h"
-#include "nsNetStrings.h"
 #include "nsDNSPrefetch.h"
 #include "nsAboutProtocolHandler.h"
 #include "nsXULAppAPI.h"
@@ -630,11 +629,9 @@ CreateNewBinaryDetectorFactory(nsISupports *aOuter, REFNSIID aIID, void **aResul
 // Net module startup hook
 static nsresult nsNetStartup()
 {
-    gNetStrings = new nsNetStrings();
-    return gNetStrings ? NS_OK : NS_ERROR_OUT_OF_MEMORY;
+    return NS_OK;
 }
 
-
 // Net module shutdown hook
 static void nsNetShutdown()
 {
@@ -647,10 +644,6 @@ static void nsNetShutdown()
     net_ShutdownURLHelperOSX();
 #endif
     
-    // Release necko strings
-    delete gNetStrings;
-    gNetStrings = nullptr;
-    
     // Release DNS service reference.
     nsDNSPrefetch::Shutdown();
 
diff --git a/netwerk/protocol/ftp/FTPChannelParent.cpp b/netwerk/protocol/ftp/FTPChannelParent.cpp
index 3d94e6c1688..1b140c2dcde 100644
--- a/netwerk/protocol/ftp/FTPChannelParent.cpp
+++ b/netwerk/protocol/ftp/FTPChannelParent.cpp
@@ -132,7 +132,6 @@ FTPChannelParent::DoAsyncOpen(const URIParams& aURI,
                      requestingPrincipal,
                      aSecurityFlags,
                      aContentPolicyType,
-                     nullptr, // aChannelPolicy
                      nullptr, // aLoadGroup
                      nullptr, // aCallbacks
                      nsIRequest::LOAD_NORMAL,
diff --git a/netwerk/protocol/http/HttpChannelParent.cpp b/netwerk/protocol/http/HttpChannelParent.cpp
index 6b3bee11619..3a13ba0442b 100644
--- a/netwerk/protocol/http/HttpChannelParent.cpp
+++ b/netwerk/protocol/http/HttpChannelParent.cpp
@@ -219,7 +219,6 @@ HttpChannelParent::DoAsyncOpen(  const URIParams&           aURI,
                      requestingPrincipal,
                      aSecurityFlags,
                      aContentPolicyType,
-                     nullptr,   // aChannelPolicy
                      nullptr,   // loadGroup
                      nullptr,   // aCallbacks
                      loadFlags,
diff --git a/netwerk/protocol/wyciwyg/WyciwygChannelParent.cpp b/netwerk/protocol/wyciwyg/WyciwygChannelParent.cpp
index 41194d23370..474a0e5955a 100644
--- a/netwerk/protocol/wyciwyg/WyciwygChannelParent.cpp
+++ b/netwerk/protocol/wyciwyg/WyciwygChannelParent.cpp
@@ -92,7 +92,6 @@ WyciwygChannelParent::RecvInit(const URIParams&          aURI,
                      requestingPrincipal,
                      aSecurityFlags,
                      aContentPolicyType,
-                     nullptr,   // aChannelPolicy
                      nullptr,   // loadGroup
                      nullptr,   // aCallbacks
                      nsIRequest::LOAD_NORMAL,
diff --git a/netwerk/test/TestPageLoad.cpp b/netwerk/test/TestPageLoad.cpp
index 6402b0e6cb6..fe5002a2019 100644
--- a/netwerk/test/TestPageLoad.cpp
+++ b/netwerk/test/TestPageLoad.cpp
@@ -312,7 +312,6 @@ nsresult auxLoad(char *uriBuf)
                        systemPrincipal,
                        nsILoadInfo::SEC_NORMAL,
                        nsIContentPolicy::TYPE_OTHER,
-                       nullptr,   // aChannelPolicy
                        nullptr,   // loadGroup
                        callbacks);
 
@@ -371,7 +370,6 @@ int main(int argc, char **argv)
                            systemPrincipal,
                            nsILoadInfo::SEC_NORMAL,
                            nsIContentPolicy::TYPE_OTHER,
-                           nullptr,   // aChannelPolicy
                            nullptr,   // loadGroup
                            callbacks);
 
diff --git a/netwerk/test/TestProtocols.cpp b/netwerk/test/TestProtocols.cpp
index 3e147adecd5..ec41b840e8f 100644
--- a/netwerk/test/TestProtocols.cpp
+++ b/netwerk/test/TestProtocols.cpp
@@ -49,7 +49,6 @@
 #include "nsIPropertyBag2.h"
 #include "nsIWritablePropertyBag2.h"
 #include "nsITimedChannel.h"
-#include "nsChannelProperties.h"
 #include "mozilla/Attributes.h"
 #include "mozilla/unused.h"
 #include "nsIScriptSecurityManager.h"
@@ -644,7 +643,6 @@ nsresult StartLoadingURL(const char* aUrlString)
                            systemPrincipal,
                            nsILoadInfo::SEC_NORMAL,
                            nsIContentPolicy::TYPE_OTHER,
-                           nullptr,  // aChannelPolicy
                            nullptr,  // loadGroup
                            callbacks,
                            nsIRequest::LOAD_NORMAL,
diff --git a/toolkit/components/downloads/nsDownloadManager.cpp b/toolkit/components/downloads/nsDownloadManager.cpp
index b17ab187aef..4c67e4969c9 100644
--- a/toolkit/components/downloads/nsDownloadManager.cpp
+++ b/toolkit/components/downloads/nsDownloadManager.cpp
@@ -3534,7 +3534,6 @@ nsDownload::Resume()
                      nsContentUtils::GetSystemPrincipal(),
                      nsILoadInfo::SEC_NORMAL,
                      nsIContentPolicy::TYPE_OTHER,
-                     nullptr,  // aChannelPolicy
                      nullptr,  // aLoadGroup
                      ir);
 
diff --git a/toolkit/components/url-classifier/nsUrlClassifierStreamUpdater.cpp b/toolkit/components/url-classifier/nsUrlClassifierStreamUpdater.cpp
index 77aaa0bbf56..731070ed997 100644
--- a/toolkit/components/url-classifier/nsUrlClassifierStreamUpdater.cpp
+++ b/toolkit/components/url-classifier/nsUrlClassifierStreamUpdater.cpp
@@ -99,7 +99,6 @@ nsUrlClassifierStreamUpdater::FetchUpdate(nsIURI *aUpdateUrl,
                      nsContentUtils::GetSystemPrincipal(),
                      nsILoadInfo::SEC_NORMAL,
                      nsIContentPolicy::TYPE_OTHER,
-                     nullptr,  // aChannelPolicy
                      nullptr,  // aLoadGroup
                      this,     // aInterfaceRequestor
                      loadFlags);
diff --git a/uriloader/exthandler/nsExternalHelperAppService.cpp b/uriloader/exthandler/nsExternalHelperAppService.cpp
index 85475533bc9..9d3a133fc54 100644
--- a/uriloader/exthandler/nsExternalHelperAppService.cpp
+++ b/uriloader/exthandler/nsExternalHelperAppService.cpp
@@ -57,7 +57,6 @@
 #include "nsNetUtil.h"
 #include "nsIIOService.h"
 #include "nsNetCID.h"
-#include "nsChannelProperties.h"
 
 #include "nsMimeTypes.h"
 // used for header disposition information.
diff --git a/uriloader/prefetch/nsOfflineCacheUpdate.cpp b/uriloader/prefetch/nsOfflineCacheUpdate.cpp
index 30529630920..769f188e659 100644
--- a/uriloader/prefetch/nsOfflineCacheUpdate.cpp
+++ b/uriloader/prefetch/nsOfflineCacheUpdate.cpp
@@ -185,7 +185,6 @@ nsManifestCheck::Begin()
                        nsContentUtils::GetSystemPrincipal(),
                        nsILoadInfo::SEC_NORMAL,
                        nsIContentPolicy::TYPE_OTHER,
-                       nullptr,   // aChannelPolicy
                        nullptr,   // loadGroup
                        nullptr,   // aCallbacks
                        nsIRequest::LOAD_BYPASS_CACHE);
@@ -383,7 +382,6 @@ nsOfflineCacheUpdateItem::OpenChannel(nsOfflineCacheUpdate *aUpdate)
                        nsContentUtils::GetSystemPrincipal(),
                        nsILoadInfo::SEC_NORMAL,
                        nsIContentPolicy::TYPE_OTHER,
-                       nullptr,  // aChannelPolicy
                        nullptr,  // aLoadGroup
                        this,     // aCallbacks
                        flags);
diff --git a/uriloader/prefetch/nsPrefetchService.cpp b/uriloader/prefetch/nsPrefetchService.cpp
index ec5a601dd00..71d04d8dbcf 100644
--- a/uriloader/prefetch/nsPrefetchService.cpp
+++ b/uriloader/prefetch/nsPrefetchService.cpp
@@ -192,7 +192,6 @@ nsPrefetchNode::OpenChannel()
                                 nsContentUtils::GetSystemPrincipal(),
                                 nsILoadInfo::SEC_NORMAL,
                                 nsIContentPolicy::TYPE_OTHER,
-                                nullptr,  // aChannelPolicy
                                 loadGroup, // aLoadGroup
                                 this,      // aCallbacks
                                 nsIRequest::LOAD_BACKGROUND |
diff --git a/widget/cocoa/OSXNotificationCenter.mm b/widget/cocoa/OSXNotificationCenter.mm
index 43e44db0d6f..943658e96f4 100644
--- a/widget/cocoa/OSXNotificationCenter.mm
+++ b/widget/cocoa/OSXNotificationCenter.mm
@@ -244,7 +244,7 @@ OSXNotificationCenter::ShowAlertNotification(const nsAString & aImageUrl, const
       if (imageUri) {
         nsresult rv = il->LoadImage(imageUri, nullptr, nullptr, aPrincipal, nullptr,
                                     this, nullptr, nsIRequest::LOAD_NORMAL, nullptr,
-                                    nullptr, EmptyString(),
+                                    EmptyString(),
                                     getter_AddRefs(osxni->mIconRequest));
         if (NS_SUCCEEDED(rv)) {
           // Set a timer for six seconds. If we don't have an icon by the time this
diff --git a/widget/cocoa/nsMenuItemIconX.mm b/widget/cocoa/nsMenuItemIconX.mm
index 9ad01949f06..1150d9fc798 100644
--- a/widget/cocoa/nsMenuItemIconX.mm
+++ b/widget/cocoa/nsMenuItemIconX.mm
@@ -305,11 +305,9 @@ nsMenuItemIconX::LoadIcon(nsIURI* aIconURI)
       [mNativeMenuItem setImage:sPlaceholderIconImage];
   }
 
-  // Passing in null for channelPolicy here since nsMenuItemIconX::LoadIcon is
-  // not exposed to web content
   nsresult rv = loader->LoadImage(aIconURI, nullptr, nullptr, nullptr, loadGroup, this,
-                                   nullptr, nsIRequest::LOAD_NORMAL, nullptr,
-                                   nullptr, EmptyString(), getter_AddRefs(mIconRequest));
+                                  nullptr, nsIRequest::LOAD_NORMAL, nullptr,
+                                  EmptyString(), getter_AddRefs(mIconRequest));
   if (NS_FAILED(rv)) return rv;
 
   // We need to request the icon be decoded (bug 573583, bug 705516).
diff --git a/widget/windows/nsDataObj.cpp b/widget/windows/nsDataObj.cpp
index adb04267ac6..67196ca6e1e 100644
--- a/widget/windows/nsDataObj.cpp
+++ b/widget/windows/nsDataObj.cpp
@@ -74,7 +74,6 @@ nsresult nsDataObj::CStream::Init(nsIURI *pSourceURI,
                      aRequestingNode,
                      nsILoadInfo::SEC_NORMAL,
                      nsIContentPolicy::TYPE_OTHER,
-                     nullptr,   // aChannelPolicy
                      nullptr,   // loadGroup
                      nullptr,   // aCallbacks
                      nsIRequest::LOAD_FROM_CACHE);
diff --git a/xpfe/components/directory/nsDirectoryViewer.cpp b/xpfe/components/directory/nsDirectoryViewer.cpp
index 8cf610b8ba9..b54fde367d8 100644
--- a/xpfe/components/directory/nsDirectoryViewer.cpp
+++ b/xpfe/components/directory/nsDirectoryViewer.cpp
@@ -1305,7 +1305,6 @@ nsDirectoryViewerFactory::CreateInstance(const char *aCommand,
                        nsContentUtils::GetSystemPrincipal(),
                        nsILoadInfo::SEC_NORMAL,
                        nsIContentPolicy::TYPE_OTHER,
-                       nullptr, // aChannelPolicy
                        aLoadGroup);
     if (NS_FAILED(rv)) return rv;
     

From c1b93529ad747e4bcd2b40a347d6c118b2f3e618 Mon Sep 17 00:00:00 2001
From: Christoph Kerschbaumer 
Date: Thu, 2 Oct 2014 23:34:53 -0700
Subject: [PATCH 067/146] Bug 1041180: Remove deprecated nsIChannelPolicy and
 use LoadInfo instead (r=sstamm)

---
 content/base/src/nsCSPService.cpp | 103 ++++++++++++------------------
 1 file changed, 40 insertions(+), 63 deletions(-)

diff --git a/content/base/src/nsCSPService.cpp b/content/base/src/nsCSPService.cpp
index 64e1a8dcd7e..0683e78db0d 100644
--- a/content/base/src/nsCSPService.cpp
+++ b/content/base/src/nsCSPService.cpp
@@ -18,6 +18,7 @@
 #include "mozilla/Preferences.h"
 #include "nsIScriptError.h"
 #include "nsContentUtils.h"
+#include "nsContentPolicyUtils.h"
 #include "nsPrincipal.h"
 
 using namespace mozilla;
@@ -231,53 +232,56 @@ CSPService::AsyncOnChannelRedirect(nsIChannel *oldChannel,
 {
   nsAsyncRedirectAutoCallback autoCallback(callback);
 
-  // get the Content Security Policy and load type from the property bag
-  nsCOMPtr policyContainer;
-  nsCOMPtr props(do_QueryInterface(oldChannel));
-  if (!props)
+  nsCOMPtr loadInfo;
+  nsresult rv = oldChannel->GetLoadInfo(getter_AddRefs(loadInfo));
+
+  // if no loadInfo on the channel, nothing for us to do
+  if (!loadInfo) {
     return NS_OK;
+  }
 
-  props->GetPropertyAsInterface(NS_CHANNEL_PROP_CHANNEL_POLICY,
-                                NS_GET_IID(nsISupports),
-                                getter_AddRefs(policyContainer));
+  // The loadInfo must not necessarily contain a Node, hence we try to query
+  // the CSP in the following order:
+  //   a) Get the Node, the Principal of that Node, and the CSP of that Principal
+  //   b) Get the Principal and the CSP of that Principal
 
-  // see if we have a valid nsIChannelPolicy containing CSP and load type
-  nsCOMPtr channelPolicy(do_QueryInterface(policyContainer));
-  if (!channelPolicy)
-    return NS_OK;
-
-  nsCOMPtr supports;
+  nsCOMPtr loadingNode = loadInfo->LoadingNode();
+  nsCOMPtr principal = loadingNode ?
+                                     loadingNode->NodePrincipal() :
+                                     loadInfo->LoadingPrincipal();
+  NS_ASSERTION(principal, "Can not evaluate CSP without a principal");
   nsCOMPtr csp;
-  channelPolicy->GetContentSecurityPolicy(getter_AddRefs(supports));
-  csp = do_QueryInterface(supports);
-  uint32_t loadType;
-  channelPolicy->GetLoadType(&loadType);
+  rv = principal->GetCsp(getter_AddRefs(csp));
+  NS_ENSURE_SUCCESS(rv, rv);
 
-  // if no CSP in the channelPolicy, nothing for us to add to the channel
-  if (!csp)
+  // if there is no CSP, nothing for us to do
+  if (!csp) {
     return NS_OK;
+  }
 
   /* Since redirecting channels don't call into nsIContentPolicy, we call our
-   * Content Policy implementation directly when redirects occur. When channels
-   * are created using NS_NewChannel(), callers can optionally pass in a
-   * nsIChannelPolicy containing a CSP object and load type, which is placed in
-   * the new channel's property bag. This container is propagated forward when
-   * channels redirect.
+   * Content Policy implementation directly when redirects occur using the
+   * information set in the LoadInfo when channels are created.
+   *
+   * We check if the CSP permits this host for this type of load, if not,
+   * we cancel the load now.
    */
 
-  // Does the CSP permit this host for this type of load?
-  // If not, cancel the load now.
   nsCOMPtr newUri;
-  newChannel->GetURI(getter_AddRefs(newUri));
+  rv = newChannel->GetURI(getter_AddRefs(newUri));
+  NS_ENSURE_SUCCESS(rv, rv);
   nsCOMPtr originalUri;
-  oldChannel->GetOriginalURI(getter_AddRefs(originalUri));
+  rv = oldChannel->GetOriginalURI(getter_AddRefs(originalUri));
+  NS_ENSURE_SUCCESS(rv, rv);
+  nsContentPolicyType policyType = loadInfo->GetContentPolicyType();
+
   int16_t aDecision = nsIContentPolicy::ACCEPT;
-  csp->ShouldLoad(loadType,        // load type per nsIContentPolicy (uint32_t)
-                  newUri,          // nsIURI
-                  nullptr,          // nsIURI
-                  nullptr,          // nsISupports
-                  EmptyCString(),  // ACString - MIME guess
-                  originalUri,     // nsISupports - extra
+  csp->ShouldLoad(policyType,     // load type per nsIContentPolicy (uint32_t)
+                  newUri,         // nsIURI
+                  nullptr,        // nsIURI
+                  nullptr,        // nsISupports
+                  EmptyCString(), // ACString - MIME guess
+                  originalUri,    // aMimeTypeGuess
                   &aDecision);
 
 #ifdef PR_LOGGING
@@ -297,36 +301,9 @@ CSPService::AsyncOnChannelRedirect(nsIChannel *oldChannel,
 #endif
 
   // if ShouldLoad doesn't accept the load, cancel the request
-  if (aDecision != 1) {
+  if (!NS_CP_ACCEPTED(aDecision)) {
     autoCallback.DontCallback();
     return NS_BINDING_FAILED;
   }
-
-  // the redirect is permitted, so propagate the Content Security Policy
-  // and load type to the redirecting channel
-  nsresult rv;
-  nsCOMPtr props2 = do_QueryInterface(newChannel);
-  if (props2) {
-    rv = props2->SetPropertyAsInterface(NS_CHANNEL_PROP_CHANNEL_POLICY,
-                                        channelPolicy);
-    if (NS_SUCCEEDED(rv)) {
-      return NS_OK;
-    }
-  }
-
-  // The redirecting channel isn't a writable property bag, we won't be able
-  // to enforce the load policy if it redirects again, so we stop it now.
-  nsAutoCString newUriSpec;
-  rv = newUri->GetSpec(newUriSpec);
-  NS_ConvertUTF8toUTF16 unicodeSpec(newUriSpec);
-  const char16_t *formatParams[] = { unicodeSpec.get() };
-  if (NS_SUCCEEDED(rv)) {
-    nsContentUtils::ReportToConsole(nsIScriptError::warningFlag,
-                                    NS_LITERAL_CSTRING("Redirect Error"), nullptr,
-                                    nsContentUtils::eDOM_PROPERTIES,
-                                    "InvalidRedirectChannelWarning",
-                                    formatParams, 1);
-  }
-
-  return NS_BINDING_FAILED;
+  return NS_OK;
 }

From 7b359f2871c75d102ace369e1a0dd65f802178b0 Mon Sep 17 00:00:00 2001
From: Bobby Holley 
Date: Fri, 3 Oct 2014 10:05:49 +0200
Subject: [PATCH 068/146] Bug 1065185 - Fix permissions tests. r=bz

---
 dom/permission/tests/file_framework.js | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/dom/permission/tests/file_framework.js b/dom/permission/tests/file_framework.js
index e05d479946a..9030be02c88 100644
--- a/dom/permission/tests/file_framework.js
+++ b/dom/permission/tests/file_framework.js
@@ -168,11 +168,9 @@ function expandPermissions(aPerms) {
   var perms = [];
   aPerms.forEach(function(el) {
     var access = permTable[el].access ? "readwrite" : null;
-    var expanded = SpecialPowers.unwrap(expand(el, access));
-    // COW arrays don't behave array-like enough, to allow
-    // using expanded.slice(0) here.
+    var expanded = expand(el, access);
     for (let i = 0; i < expanded.length; i++) {
-      perms.push(expanded[i]);
+      perms.push(SpecialPowers.unwrap(expanded[i]));
     }
   });
 

From 2aa842b0f2ad1e58208518609b115cc55b283850 Mon Sep 17 00:00:00 2001
From: Bobby Holley 
Date: Fri, 3 Oct 2014 10:05:49 +0200
Subject: [PATCH 069/146] Bug 1065185 - Fix crash IPC tests. r=bz

---
 dom/ipc/tests/test_CrashService_crash.html | 6 +++---
 dom/plugins/test/mochitest/utils.js        | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/dom/ipc/tests/test_CrashService_crash.html b/dom/ipc/tests/test_CrashService_crash.html
index ce94119bc54..eb9afcf0496 100644
--- a/dom/ipc/tests/test_CrashService_crash.html
+++ b/dom/ipc/tests/test_CrashService_crash.html
@@ -55,10 +55,10 @@ SpecialPowers.pushPrefEnv({'set':[
     // Finally, poll for the new crash record.
     function tryGetCrash() {
       info("Waiting for getCrashes");
-      crashMan.getCrashes().then(function (crashes) {
+      crashMan.getCrashes().then(SpecialPowers.wrapCallback(function (crashes) {
         if (crashes.length) {
           is(crashes.length, 1, "There should be only one record");
-          var crash = SpecialPowers.wrap(crashes[0]);
+          var crash = crashes[0];
           ok(crash.isOfType(crashMan.PROCESS_TYPE_CONTENT,
                             crashMan.CRASH_TYPE_CRASH),
              "Record should be a content crash");
@@ -75,7 +75,7 @@ SpecialPowers.pushPrefEnv({'set':[
         else {
           setTimeout(tryGetCrash, 1000);
         }
-      }, function (err) {
+      }), function (err) {
         ok(false, "Error getting crashes: " + err);
         SimpleTest.finish();
       });
diff --git a/dom/plugins/test/mochitest/utils.js b/dom/plugins/test/mochitest/utils.js
index 2d7299cb69a..a20ec57d022 100644
--- a/dom/plugins/test/mochitest/utils.js
+++ b/dom/plugins/test/mochitest/utils.js
@@ -69,7 +69,7 @@ function crashAndGetCrashServiceRecord(crashMethodName, callback) {
     // the new record.
     function tryGetCrash() {
       info("Waiting for getCrashes");
-      crashMan.getCrashes().then(function (crashes) {
+      crashMan.getCrashes().then(SpecialPowers.wrapCallback(function (crashes) {
         if (crashes.length) {
           is(crashes.length, 1, "There should be only one record");
           var crash = SpecialPowers.wrap(crashes[0]);
@@ -86,7 +86,7 @@ function crashAndGetCrashServiceRecord(crashMethodName, callback) {
         else {
           setTimeout(tryGetCrash, 1000);
         }
-      }, function (err) {
+      }), function (err) {
         ok(false, "Error getting crashes: " + err);
         SimpleTest.finish();
       });

From 9a234aeb717afc3fed371c30d0e5bb506fff6ddd Mon Sep 17 00:00:00 2001
From: Bobby Holley 
Date: Fri, 3 Oct 2014 10:05:50 +0200
Subject: [PATCH 070/146] Bug 1065185 - Fix extension manager test. r=bz

---
 .../extensions/test/mochitest/test_bug687194.html      | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/toolkit/mozapps/extensions/test/mochitest/test_bug687194.html b/toolkit/mozapps/extensions/test/mochitest/test_bug687194.html
index 010b8b6f3c8..8f99ea73a2a 100644
--- a/toolkit/mozapps/extensions/test/mochitest/test_bug687194.html
+++ b/toolkit/mozapps/extensions/test/mochitest/test_bug687194.html
@@ -60,11 +60,11 @@
         "http://mochi.test:8888/tests/toolkit/mozapps/extensions/test/mochitest/file_bug687194.xpi"
       AddonManager.getInstallForURL(INSTALL_URI, (install) => {
         install = SpecialPowers.wrap(install);
-        install.addListener({
+        install.addListener(SpecialPowers.wrapCallbackObject({
           onInstallEnded: function(install, addon) {
             SimpleTest.executeSoon(() => test.next(addon));
           }
-        });
+        }));
         install.install();
       }, "application/x-xpinstall");
 
@@ -75,14 +75,14 @@
       lastResult = yield;
       is(lastResult.threw, false, "able to resolve after the installation");
 
-      let listener = {
+      let listener = SpecialPowers.wrapCallbackObject({
         onUninstalled: function(removedAddon) {
-          if (SpecialPowers.wrap(removedAddon).id === addon.id) {
+          if (removedAddon.id === addon.id) {
             AddonManager.removeAddonListener(listener);
             SimpleTest.executeSoon(() => test.next());
           }
         }
-      };
+      });
       AddonManager.addAddonListener(listener);
       addon.uninstall();
 

From 32d45fdbb54397f8acd0e71e0cc0d3f9c12944c4 Mon Sep 17 00:00:00 2001
From: Bobby Holley 
Date: Fri, 3 Oct 2014 10:05:50 +0200
Subject: [PATCH 071/146] Bug 1065185 - Fix devtools actors. r=bz

See browser/devtools/canvasdebugger/test/browser_canvas-actor-test-02.js.
And note that we need to re-waive the result, otherwise we get failures in
browser/devtools/canvasdebugger/test/browser_canvas-actor-test-10.js.

See browser/devtools/shadereditor/test/browser_se_editors-contents.js for the
webgls stuff.
---
 toolkit/devtools/server/actors/call-watcher.js | 14 +++++++++-----
 toolkit/devtools/server/actors/webgl.js        |  4 +++-
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/toolkit/devtools/server/actors/call-watcher.js b/toolkit/devtools/server/actors/call-watcher.js
index b8483858af4..990858480d7 100644
--- a/toolkit/devtools/server/actors/call-watcher.js
+++ b/toolkit/devtools/server/actors/call-watcher.js
@@ -412,11 +412,13 @@ let CallWatcherActor = exports.CallWatcherActor = protocol.ActorClass({
      * Instruments a function on the specified target object.
      */
     function overrideFunction(global, target, name, descriptor, callback) {
-      let originalFunc = target[name];
+      // Invoking .apply on an unxrayed content function doesn't work, because
+      // the arguments array is inaccessible to it. Get Xrays back.
+      let originalFunc = Cu.unwaiveXrays(target[name]);
 
       Object.defineProperty(target, name, {
         value: function(...args) {
-          let result = originalFunc.apply(this, args);
+          let result = Cu.waiveXrays(originalFunc.apply(this, args));
 
           if (self._recording) {
             let stack = getStack(name);
@@ -435,13 +437,15 @@ let CallWatcherActor = exports.CallWatcherActor = protocol.ActorClass({
      * Instruments a getter or setter on the specified target object.
      */
     function overrideAccessor(global, target, name, descriptor, callback) {
-      let originalGetter = target.__lookupGetter__(name);
-      let originalSetter = target.__lookupSetter__(name);
+      // Invoking .apply on an unxrayed content function doesn't work, because
+      // the arguments array is inaccessible to it. Get Xrays back.
+      let originalGetter = Cu.unwaiveXrays(target.__lookupGetter__(name));
+      let originalSetter = Cu.unwaiveXrays(target.__lookupSetter__(name));
 
       Object.defineProperty(target, name, {
         get: function(...args) {
           if (!originalGetter) return undefined;
-          let result = originalGetter.apply(this, args);
+          let result = Cu.waiveXrays(originalGetter.apply(this, args));
 
           if (self._recording) {
             let stack = getStack(name);
diff --git a/toolkit/devtools/server/actors/webgl.js b/toolkit/devtools/server/actors/webgl.js
index afeeb0d32d0..21e9165b08d 100644
--- a/toolkit/devtools/server/actors/webgl.js
+++ b/toolkit/devtools/server/actors/webgl.js
@@ -492,7 +492,9 @@ let WebGLInstrumenter = {
         if (glBreak) return undefined;
       }
 
-      let glResult = originalFunc.apply(this, glArgs);
+      // Invoking .apply on an unxrayed content function doesn't work, because
+      // the arguments array is inaccessible to it. Get Xrays back.
+      let glResult = Cu.waiveXrays(Cu.unwaiveXrays(originalFunc).apply(this, glArgs));
 
       if (timing >= 0 && !observer.suppressHandlers) {
         let glBreak = observer[afterFuncName](glArgs, glResult, cache, proxy);

From 8dde4796dcbfe6929e5cf31c9dbe60beef0506fc Mon Sep 17 00:00:00 2001
From: Bobby Holley 
Date: Fri, 3 Oct 2014 10:05:50 +0200
Subject: [PATCH 072/146] Bug 1065185 - Explicitly expose indexed properties in
 COW array test. r=bz

---
 js/xpconnect/tests/chrome/test_bug760109.xul | 6 +++++-
 js/xpconnect/tests/unit/test_bug853709.js    | 1 +
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/js/xpconnect/tests/chrome/test_bug760109.xul b/js/xpconnect/tests/chrome/test_bug760109.xul
index d8045d46452..b71f094056c 100644
--- a/js/xpconnect/tests/chrome/test_bug760109.xul
+++ b/js/xpconnect/tests/chrome/test_bug760109.xul
@@ -80,7 +80,11 @@ https://bugzilla.mozilla.org/show_bug.cgi?id=760109
   const Cu = Components.utils;
   var sb = new Cu.Sandbox('http://www.example.org');
   sb.chromeArray = ['a', 'b', 'z'];
-  sb.chromeArray.__exposedProps__ = {};
+  sb.chromeArray.__exposedProps__ = { length: 'rw' };
+  for (var i = 0; i < 10; ++i) {
+    sb.chromeArray.__exposedProps__[i] = 'rw';
+  }
+
   sb.chromeObject = new SomeConstructor();
   sb.ok = ok;
   sb.is = is;
diff --git a/js/xpconnect/tests/unit/test_bug853709.js b/js/xpconnect/tests/unit/test_bug853709.js
index 502c656425e..6e80759ae54 100644
--- a/js/xpconnect/tests/unit/test_bug853709.js
+++ b/js/xpconnect/tests/unit/test_bug853709.js
@@ -7,6 +7,7 @@ function setupChromeSandbox() {
                                           get: function() { return _b; },
                                           set: function(val) { _b = val; } });
   this.chromeArr = [4, 2, 1];
+  this.chromeArr["__exposedProps__"] = { "1": "rw" };
 }
 
 function checkDefineThrows(sb, obj, prop, desc) {

From 06a2906a383e83fdced076c4991ed1be246818a3 Mon Sep 17 00:00:00 2001
From: Bobby Holley 
Date: Fri, 3 Oct 2014 10:05:50 +0200
Subject: [PATCH 073/146] Bug 1065185 - Stop using Promise.jsm over
 SpecialPowers in marionette tests. r=bz

---
 dom/bluetooth/tests/marionette/head.js     | 11 +++++++++--
 dom/bluetooth2/tests/marionette/head.js    | 11 +++++++++--
 dom/cellbroadcast/tests/marionette/head.js | 10 +++++++++-
 dom/mobilemessage/tests/marionette/head.js | 10 +++++++++-
 dom/telephony/test/marionette/head.js      | 11 ++++++++++-
 dom/tethering/tests/marionette/head.js     | 11 +++++++++--
 dom/voicemail/test/marionette/head.js      | 10 +++++++++-
 dom/wifi/test/marionette/head.js           | 10 +++++++++-
 8 files changed, 73 insertions(+), 11 deletions(-)

diff --git a/dom/bluetooth/tests/marionette/head.js b/dom/bluetooth/tests/marionette/head.js
index 893ee78c060..65fd7828a73 100644
--- a/dom/bluetooth/tests/marionette/head.js
+++ b/dom/bluetooth/tests/marionette/head.js
@@ -43,8 +43,15 @@ const BT_PAIRING_REQ = "bluetooth-pairing-request";
 const BT_PAIRING_PASSKEY = 123456;
 const BT_PAIRING_PINCODE = "ABCDEFG";
 
-let Promise =
-  SpecialPowers.Cu.import("resource://gre/modules/Promise.jsm").Promise;
+// Emulate Promise.jsm semantics.
+Promise.defer = function() { return new Deferred(); }
+function Deferred()  {
+  this.promise = new Promise(function(resolve, reject) {
+    this.resolve = resolve;
+    this.reject = reject;
+  }.bind(this));
+  Object.freeze(this);
+}
 
 let bluetoothManager;
 
diff --git a/dom/bluetooth2/tests/marionette/head.js b/dom/bluetooth2/tests/marionette/head.js
index 07f456e6e8e..ed5c5abe169 100644
--- a/dom/bluetooth2/tests/marionette/head.js
+++ b/dom/bluetooth2/tests/marionette/head.js
@@ -36,8 +36,15 @@ const BDADDR_LOCAL = "ff:ff:ff:00:00:00";
 // A user friendly name for remote BT device.
 const REMOTE_DEVICE_NAME = "Remote_BT_Device";
 
-let Promise =
-  SpecialPowers.Cu.import("resource://gre/modules/Promise.jsm").Promise;
+// Emulate Promise.jsm semantics.
+Promise.defer = function() { return new Deferred(); }
+function Deferred()  {
+  this.promise = new Promise(function(resolve, reject) {
+    this.resolve = resolve;
+    this.reject = reject;
+  }.bind(this));
+  Object.freeze(this);
+}
 
 let bluetoothManager;
 
diff --git a/dom/cellbroadcast/tests/marionette/head.js b/dom/cellbroadcast/tests/marionette/head.js
index 95d039cdbbd..ed8de044600 100644
--- a/dom/cellbroadcast/tests/marionette/head.js
+++ b/dom/cellbroadcast/tests/marionette/head.js
@@ -3,7 +3,15 @@
 
 const {Cc: Cc, Ci: Ci, Cr: Cr, Cu: Cu} = SpecialPowers;
 
-let Promise = Cu.import("resource://gre/modules/Promise.jsm").Promise;
+// Emulate Promise.jsm semantics.
+Promise.defer = function() { return new Deferred(); }
+function Deferred()  {
+  this.promise = new Promise(function(resolve, reject) {
+    this.resolve = resolve;
+    this.reject = reject;
+  }.bind(this));
+  Object.freeze(this);
+}
 
 const PDU_DCS_CODING_GROUP_BITS          = 0xF0;
 const PDU_DCS_MSG_CODING_7BITS_ALPHABET  = 0x00;
diff --git a/dom/mobilemessage/tests/marionette/head.js b/dom/mobilemessage/tests/marionette/head.js
index c7872041556..f4eb83882d4 100644
--- a/dom/mobilemessage/tests/marionette/head.js
+++ b/dom/mobilemessage/tests/marionette/head.js
@@ -3,7 +3,15 @@
 
 const {Cc: Cc, Ci: Ci, Cr: Cr, Cu: Cu} = SpecialPowers;
 
-let Promise = Cu.import("resource://gre/modules/Promise.jsm").Promise;
+// Emulate Promise.jsm semantics.
+Promise.defer = function() { return new Deferred(); }
+function Deferred()  {
+  this.promise = new Promise(function(resolve, reject) {
+    this.resolve = resolve;
+    this.reject = reject;
+  }.bind(this));
+  Object.freeze(this);
+}
 
 /**
  * Push a list of preference settings. Never reject.
diff --git a/dom/telephony/test/marionette/head.js b/dom/telephony/test/marionette/head.js
index 41481d7f1c0..ea7ea2933ef 100644
--- a/dom/telephony/test/marionette/head.js
+++ b/dom/telephony/test/marionette/head.js
@@ -1,7 +1,16 @@
 /* Any copyright is dedicated to the Public Domain.
  * http://creativecommons.org/publicdomain/zero/1.0/ */
 
-let Promise = SpecialPowers.Cu.import("resource://gre/modules/Promise.jsm").Promise;
+// Emulate Promise.jsm semantics.
+Promise.defer = function() { return new Deferred(); }
+function Deferred()  {
+  this.promise = new Promise(function(resolve, reject) {
+    this.resolve = resolve;
+    this.reject = reject;
+  }.bind(this));
+  Object.freeze(this);
+}
+
 let telephony;
 let conference;
 
diff --git a/dom/tethering/tests/marionette/head.js b/dom/tethering/tests/marionette/head.js
index 4e2c5680840..bd2731b64ec 100644
--- a/dom/tethering/tests/marionette/head.js
+++ b/dom/tethering/tests/marionette/head.js
@@ -25,8 +25,15 @@ const TETHERING_SETTING_KEY = "1234567890";
 
 const SETTINGS_RIL_DATA_ENABLED = 'ril.data.enabled';
 
-let Promise =
-  SpecialPowers.Cu.import("resource://gre/modules/Promise.jsm").Promise;
+// Emulate Promise.jsm semantics.
+Promise.defer = function() { return new Deferred(); }
+function Deferred()  {
+  this.promise = new Promise(function(resolve, reject) {
+    this.resolve = resolve;
+    this.reject = reject;
+  }.bind(this));
+  Object.freeze(this);
+}
 
 let gTestSuite = (function() {
   let suite = {};
diff --git a/dom/voicemail/test/marionette/head.js b/dom/voicemail/test/marionette/head.js
index f3f8b1b17c7..b3d6041af26 100644
--- a/dom/voicemail/test/marionette/head.js
+++ b/dom/voicemail/test/marionette/head.js
@@ -8,7 +8,15 @@ const {Cc: Cc, Ci: Ci, Cr: Cr, Cu: Cu} = SpecialPowers;
 let RIL = {};
 Cu.import("resource://gre/modules/ril_consts.js", RIL);
 
-let Promise = Cu.import("resource://gre/modules/Promise.jsm").Promise;
+// Emulate Promise.jsm semantics.
+Promise.defer = function() { return new Deferred(); }
+function Deferred()  {
+  this.promise = new Promise(function(resolve, reject) {
+    this.resolve = resolve;
+    this.reject = reject;
+  }.bind(this));
+  Object.freeze(this);
+}
 
 const MWI_PDU_PREFIX = "0000";
 const MWI_PDU_UDH_PREFIX = "0040";
diff --git a/dom/wifi/test/marionette/head.js b/dom/wifi/test/marionette/head.js
index c6981fd7181..dd28995bae0 100644
--- a/dom/wifi/test/marionette/head.js
+++ b/dom/wifi/test/marionette/head.js
@@ -1,7 +1,15 @@
 /* Any copyright is dedicated to the Public Domain.
  * http://creativecommons.org/publicdomain/zero/1.0/ */
 
-let Promise = SpecialPowers.Cu.import('resource://gre/modules/Promise.jsm').Promise;
+// Emulate Promise.jsm semantics.
+Promise.defer = function() { return new Deferred(); }
+function Deferred()  {
+  this.promise = new Promise(function(resolve, reject) {
+    this.resolve = resolve;
+    this.reject = reject;
+  }.bind(this));
+  Object.freeze(this);
+}
 
 const STOCK_HOSTAPD_NAME = 'goldfish-hostapd';
 const HOSTAPD_CONFIG_PATH = '/data/misc/wifi/remote-hostapd/';

From 3a844df4cbb9b547e8c18beeb72d804c43d6669a Mon Sep 17 00:00:00 2001
From: Bobby Holley 
Date: Fri, 3 Oct 2014 10:05:51 +0200
Subject: [PATCH 074/146] Bug 1065185 - Create a content Array rather than a
 chrome array when reading Icc contacts. r=hsinyi

Right now, this array gets implicit __exposedProps__ for all of its indexed
members (and .length). Instead, we should be creating the Array directly in
the target Window.
---
 dom/system/gonk/RILContentHelper.js | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/dom/system/gonk/RILContentHelper.js b/dom/system/gonk/RILContentHelper.js
index 0861ccbc7f5..e8524ba7765 100644
--- a/dom/system/gonk/RILContentHelper.js
+++ b/dom/system/gonk/RILContentHelper.js
@@ -799,7 +799,8 @@ RILContentHelper.prototype = {
     let window = this._windowsMap[message.requestId];
     delete this._windowsMap[message.requestId];
     let contacts = message.contacts;
-    let result = contacts.map(function(c) {
+    let result = new window.Array();
+    contacts.forEach(function(c) {
       let prop = {name: [c.alphaId], tel: [{value: c.number}]};
 
       if (c.email) {
@@ -814,7 +815,7 @@ RILContentHelper.prototype = {
 
       let contact = new window.mozContact(prop);
       contact.id = c.contactId;
-      return contact;
+      result.push(contact);
     });
 
     this.fireRequestSuccess(message.requestId, result);

From edc0482e21c7891613c0a3ad6a3eeb409fccaa73 Mon Sep 17 00:00:00 2001
From: Bobby Holley 
Date: Fri, 3 Oct 2014 10:05:51 +0200
Subject: [PATCH 075/146] Bug 1065185 - Create a content Array rather than a
 chrome Array in _convertWifiNetworks. r=chucklee

---
 dom/wifi/DOMWifiManager.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dom/wifi/DOMWifiManager.js b/dom/wifi/DOMWifiManager.js
index e1a8f9b7cda..300b06d1948 100644
--- a/dom/wifi/DOMWifiManager.js
+++ b/dom/wifi/DOMWifiManager.js
@@ -160,7 +160,7 @@ DOMWifiManager.prototype = {
   },
 
   _convertWifiNetworks: function(aNetworks) {
-    let networks = [];
+    let networks = new this._window.Array();
     for (let i in aNetworks) {
       networks.push(this._convertWifiNetwork(aNetworks[i]));
     }

From 004ae70a974669923c17c10b7702b0acfbbc4351 Mon Sep 17 00:00:00 2001
From: Bobby Holley 
Date: Fri, 3 Oct 2014 10:05:51 +0200
Subject: [PATCH 076/146] Bug 1065185 - Do a better job of accessing privileged
 constants from voicemail marionette tests. r=bz

---
 dom/voicemail/test/marionette/head.js | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dom/voicemail/test/marionette/head.js b/dom/voicemail/test/marionette/head.js
index b3d6041af26..e9ba49a9f1e 100644
--- a/dom/voicemail/test/marionette/head.js
+++ b/dom/voicemail/test/marionette/head.js
@@ -5,8 +5,8 @@
 
 const {Cc: Cc, Ci: Ci, Cr: Cr, Cu: Cu} = SpecialPowers;
 
-let RIL = {};
-Cu.import("resource://gre/modules/ril_consts.js", RIL);
+let RIL = SpecialPowers.wrap(SpecialPowers.createBlankObject());
+SpecialPowers.Cu.import("resource://gre/modules/ril_consts.js", RIL);
 
 // Emulate Promise.jsm semantics.
 Promise.defer = function() { return new Deferred(); }

From f198ed50f159f970bf0b2b145ed466cbf89b75aa Mon Sep 17 00:00:00 2001
From: Bobby Holley 
Date: Fri, 3 Oct 2014 10:05:51 +0200
Subject: [PATCH 077/146] Bug 1065185 - Create content arrays rather than
 chrome arrays in DOM Apps. r=me

---
 dom/apps/Webapps.js | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dom/apps/Webapps.js b/dom/apps/Webapps.js
index 675317e1954..700eca9c0c5 100644
--- a/dom/apps/Webapps.js
+++ b/dom/apps/Webapps.js
@@ -612,7 +612,7 @@ WebappsApplication.prototype = {
       case "Webapps:Connect:Return:OK":
         this.removeMessageListeners(["Webapps:Connect:Return:OK",
                                      "Webapps:Connect:Return:KO"]);
-        let messagePorts = [];
+        let messagePorts = new this._window.Array();
         msg.messagePortIDs.forEach((aPortID) => {
           let port = new this._window.MozInterAppMessagePort(aPortID);
           messagePorts.push(port);
@@ -626,7 +626,7 @@ WebappsApplication.prototype = {
         break;
       case "Webapps:GetConnections:Return:OK":
         this.removeMessageListeners(aMessage.name);
-        let connections = [];
+        let connections = new this._window.Array();
         msg.connections.forEach((aConnection) => {
           let connection =
             new this._window.MozInterAppConnection(aConnection.keyword,

From b42ebc5b7b13d0e806b0c5b09e5f0a8ca8aaa8df Mon Sep 17 00:00:00 2001
From: Bobby Holley 
Date: Fri, 3 Oct 2014 10:05:51 +0200
Subject: [PATCH 078/146] Bug 1065185 - Expand XrayWrapper console messages for
 COWs. r=bz

---
 js/xpconnect/src/xpcprivate.h         | 13 ++++++--
 js/xpconnect/wrappers/AccessCheck.cpp | 15 +++++++++
 js/xpconnect/wrappers/AccessCheck.h   |  6 +---
 js/xpconnect/wrappers/XrayWrapper.cpp | 47 +++++++++++++++++----------
 4 files changed, 55 insertions(+), 26 deletions(-)

diff --git a/js/xpconnect/src/xpcprivate.h b/js/xpconnect/src/xpcprivate.h
index 50d7712eba7..3764b2e0900 100644
--- a/js/xpconnect/src/xpcprivate.h
+++ b/js/xpconnect/src/xpcprivate.h
@@ -3621,6 +3621,13 @@ GetRTIdByIndex(JSContext *cx, unsigned index);
 
 namespace xpc {
 
+enum WrapperDenialType {
+    WrapperDenialForXray = 0,
+    WrapperDenialForCOW,
+    WrapperDenialTypeCount
+};
+bool ReportWrapperDenial(JSContext *cx, JS::HandleId id, WrapperDenialType type, const char *reason);
+
 class CompartmentPrivate
 {
 public:
@@ -3635,11 +3642,11 @@ public:
         , skipWriteToGlobalPrototype(false)
         , universalXPConnectEnabled(false)
         , forcePermissiveCOWs(false)
-        , warnedAboutXrays(false)
         , scriptability(c)
         , scope(nullptr)
     {
         MOZ_COUNT_CTOR(xpc::CompartmentPrivate);
+        mozilla::PodArrayZero(wrapperDenialWarnings);
     }
 
     ~CompartmentPrivate();
@@ -3688,8 +3695,8 @@ public:
     bool forcePermissiveCOWs;
 
     // Whether we've emitted a warning about a property that was filtered out
-    // by XrayWrappers. See XrayWrapper.cpp.
-    bool warnedAboutXrays;
+    // by a security wrapper. See XrayWrapper.cpp.
+    bool wrapperDenialWarnings[WrapperDenialTypeCount];
 
     // The scriptability of this compartment.
     Scriptability scriptability;
diff --git a/js/xpconnect/wrappers/AccessCheck.cpp b/js/xpconnect/wrappers/AccessCheck.cpp
index b238dd217f6..310ad5141b6 100644
--- a/js/xpconnect/wrappers/AccessCheck.cpp
+++ b/js/xpconnect/wrappers/AccessCheck.cpp
@@ -341,4 +341,19 @@ ExposedPropertiesOnly::check(JSContext *cx, HandleObject wrapper, HandleId id, W
     return true;
 }
 
+bool
+ExposedPropertiesOnly::deny(js::Wrapper::Action act, HandleId id)
+{
+    // Fail silently for GET, ENUMERATE, and GET_PROPERTY_DESCRIPTOR.
+    if (act == js::Wrapper::GET || act == js::Wrapper::ENUMERATE ||
+        act == js::Wrapper::GET_PROPERTY_DESCRIPTOR)
+    {
+        AutoJSContext cx;
+        return ReportWrapperDenial(cx, id, WrapperDenialForCOW,
+                                   "Access to privileged JS object not permitted");
+    }
+
+    return false;
+}
+
 }
diff --git a/js/xpconnect/wrappers/AccessCheck.h b/js/xpconnect/wrappers/AccessCheck.h
index 28dc9669aaa..1882c2dbd57 100644
--- a/js/xpconnect/wrappers/AccessCheck.h
+++ b/js/xpconnect/wrappers/AccessCheck.h
@@ -78,11 +78,7 @@ struct CrossOriginAccessiblePropertiesOnly : public Policy {
 struct ExposedPropertiesOnly : public Policy {
     static bool check(JSContext *cx, JS::HandleObject wrapper, JS::HandleId id, js::Wrapper::Action act);
 
-    static bool deny(js::Wrapper::Action act, JS::HandleId id) {
-        // Fail silently for GET ENUMERATE, and GET_PROPERTY_DESCRIPTOR.
-        return act == js::Wrapper::GET || act == js::Wrapper::ENUMERATE ||
-               act == js::Wrapper::GET_PROPERTY_DESCRIPTOR;
-    }
+    static bool deny(js::Wrapper::Action act, JS::HandleId id);
     static bool allowNativeCall(JSContext *cx, JS::IsAcceptableThis test, JS::NativeImpl impl) {
         return false;
     }
diff --git a/js/xpconnect/wrappers/XrayWrapper.cpp b/js/xpconnect/wrappers/XrayWrapper.cpp
index 5eb8097b708..c0d81410e83 100644
--- a/js/xpconnect/wrappers/XrayWrapper.cpp
+++ b/js/xpconnect/wrappers/XrayWrapper.cpp
@@ -69,8 +69,6 @@ IsTypedArrayKey(JSProtoKey key)
     return key >= JSProto_Int8Array && key <= JSProto_Uint8ClampedArray;
 }
 
-bool SilentFailure(JSContext *cx, JS::HandleId id, const char *reason);
-
 // Whitelist for the standard ES classes we can Xray to.
 static bool
 IsJSXraySupported(JSProtoKey key)
@@ -169,15 +167,15 @@ OpaqueXrayTraits::resolveOwnProperty(JSContext *cx, const Wrapper &jsWrapper, Ha
     if (!ok || desc.object())
         return ok;
 
-    return SilentFailure(cx, id, "object is not safely Xrayable");
+    return ReportWrapperDenial(cx, id, WrapperDenialForXray, "object is not safely Xrayable");
 }
 
 bool
-SilentFailure(JSContext *cx, HandleId id, const char *reason)
+ReportWrapperDenial(JSContext *cx, HandleId id, WrapperDenialType type, const char *reason)
 {
     CompartmentPrivate *priv = CompartmentPrivate::Get(CurrentGlobalOrNull(cx));
-    bool alreadyWarnedOnce = priv->warnedAboutXrays;
-    priv->warnedAboutXrays = true;
+    bool alreadyWarnedOnce = priv->wrapperDenialWarnings[type];
+    priv->wrapperDenialWarnings[type] = true;
 
     // The browser console warning is only emitted for the first violation,
     // whereas the (debug-only) NS_WARNING is emitted for each violation.
@@ -220,14 +218,27 @@ SilentFailure(JSContext *cx, HandleId id, const char *reason)
     if (win)
       windowId = win->WindowID();
 
-    nsPrintfCString errorMessage("XrayWrapper denied access to property %s (reason: %s). "
-                                 "See https://developer.mozilla.org/en-US/docs/Xray_vision "
-                                 "for more information. Note that only the first denied "
-                                 "property access from a given global object will be reported.",
-                                 NS_LossyConvertUTF16toASCII(propertyName).get(),
-                                 reason);
+
+    Maybe errorMessage;
+    if (type == WrapperDenialForXray) {
+        errorMessage.emplace("XrayWrapper denied access to property %s (reason: %s). "
+                             "See https://developer.mozilla.org/en-US/docs/Xray_vision "
+                             "for more information. Note that only the first denied "
+                             "property access from a given global object will be reported.",
+                             NS_LossyConvertUTF16toASCII(propertyName).get(),
+                             reason);
+    } else {
+        MOZ_ASSERT(type == WrapperDenialForCOW);
+        errorMessage.emplace("Security wrapper denied access to property %s on privileged "
+                             "Javascript object. Support for exposing privileged objects "
+                             "to untrusted content via __exposedProps__ is being gradually "
+                             "removed - use WebIDL bindings or Components.utils.cloneInto "
+                             "instead. Note that only the first denied property access from a "
+                             "given global object will be reported.",
+                             NS_LossyConvertUTF16toASCII(propertyName).get());
+    }
     nsString filenameStr(NS_ConvertASCIItoUTF16(filename.get()));
-    nsresult rv = errorObject->InitWithWindowID(NS_ConvertASCIItoUTF16(errorMessage),
+    nsresult rv = errorObject->InitWithWindowID(NS_ConvertASCIItoUTF16(errorMessage.ref()),
                                                 filenameStr,
                                                 EmptyString(),
                                                 line, 0,
@@ -265,7 +276,7 @@ bool JSXrayTraits::getOwnPropertyFromTargetIfSafe(JSContext *cx,
     // Disallow accessor properties.
     if (desc.hasGetterOrSetter()) {
         JSAutoCompartment ac(cx, wrapper);
-        return SilentFailure(cx, id, "property has accessor");
+        return ReportWrapperDenial(cx, id, WrapperDenialForXray, "property has accessor");
     }
 
     // Apply extra scrutiny to objects.
@@ -276,20 +287,20 @@ bool JSXrayTraits::getOwnPropertyFromTargetIfSafe(JSContext *cx,
         // Disallow non-subsumed objects.
         if (!AccessCheck::subsumes(target, propObj)) {
             JSAutoCompartment ac(cx, wrapper);
-            return SilentFailure(cx, id, "value not same-origin with target");
+            return ReportWrapperDenial(cx, id, WrapperDenialForXray, "value not same-origin with target");
         }
 
         // Disallow non-Xrayable objects.
         XrayType xrayType = GetXrayType(propObj);
         if (xrayType == NotXray || xrayType == XrayForOpaqueObject) {
             JSAutoCompartment ac(cx, wrapper);
-            return SilentFailure(cx, id, "value not Xrayable");
+            return ReportWrapperDenial(cx, id, WrapperDenialForXray, "value not Xrayable");
         }
 
         // Disallow callables.
         if (JS::IsCallable(propObj)) {
             JSAutoCompartment ac(cx, wrapper);
-            return SilentFailure(cx, id, "value is callable");
+            return ReportWrapperDenial(cx, id, WrapperDenialForXray, "value is callable");
         }
     }
 
@@ -304,7 +315,7 @@ bool JSXrayTraits::getOwnPropertyFromTargetIfSafe(JSContext *cx,
         return false;
     }
     if (foundOnProto)
-        return SilentFailure(cx, id, "value shadows a property on the standard prototype");
+        return ReportWrapperDenial(cx, id, WrapperDenialForXray, "value shadows a property on the standard prototype");
 
     // We made it! Assign over the descriptor, and don't forget to wrap.
     outDesc.assign(desc.get());

From e8e79ce406d73d2b8e1e8c9dd5acec2128184e95 Mon Sep 17 00:00:00 2001
From: Bobby Holley 
Date: Fri, 3 Oct 2014 10:05:52 +0200
Subject: [PATCH 079/146] Bug 1065185 - Turn off indexed/.length access on COW
 arrays. r=bz

---
 js/xpconnect/wrappers/AccessCheck.cpp | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/js/xpconnect/wrappers/AccessCheck.cpp b/js/xpconnect/wrappers/AccessCheck.cpp
index 310ad5141b6..2bb679c0510 100644
--- a/js/xpconnect/wrappers/AccessCheck.cpp
+++ b/js/xpconnect/wrappers/AccessCheck.cpp
@@ -248,16 +248,21 @@ ExposedPropertiesOnly::check(JSContext *cx, HandleObject wrapper, HandleId id, W
     if (!JS_HasPropertyById(cx, wrappedObject, exposedPropsId, &found))
         return false;
 
-    // Always permit access to "length" and indexed properties of arrays.
-    if ((JS_IsArrayObject(cx, wrappedObject) ||
-         JS_IsTypedArrayObject(wrappedObject)) &&
-        ((JSID_IS_INT(id) && JSID_TO_INT(id) >= 0) ||
-         (JSID_IS_STRING(id) && JS_FlatStringEqualsAscii(JSID_TO_FLAT_STRING(id), "length")))) {
-        return true; // Allow
-    }
-
     // If no __exposedProps__ existed, deny access.
     if (!found) {
+        // Previously we automatically granted access to indexed properties and
+        // .length for Array COWs. We're not doing that anymore, so make sure to
+        // let people know what's going on.
+        bool isArray = JS_IsArrayObject(cx, wrappedObject) || JS_IsTypedArrayObject(wrappedObject);
+        bool isIndexedAccessOnArray = isArray && JSID_IS_INT(id) && JSID_TO_INT(id) >= 0;
+        bool isLengthAccessOnArray = isArray && JSID_IS_STRING(id) &&
+                                     JS_FlatStringEqualsAscii(JSID_TO_FLAT_STRING(id), "length");
+        if (isIndexedAccessOnArray || isLengthAccessOnArray) {
+            JSAutoCompartment ac2(cx, wrapper);
+            ReportWrapperDenial(cx, id, WrapperDenialForCOW,
+                                "Access to elements and length of privileged Array not permitted");
+        }
+
         return false;
     }
 

From 3db03976705121a25e7e87a74b876cf2f13929e8 Mon Sep 17 00:00:00 2001
From: Bobby Holley 
Date: Fri, 3 Oct 2014 10:05:52 +0200
Subject: [PATCH 080/146] Bug 1065185 - Console Message Tests. r=bz

---
 js/xpconnect/tests/chrome/chrome.ini          |  1 +
 .../tests/chrome/test_bug1065185.html         | 78 +++++++++++++++++++
 2 files changed, 79 insertions(+)
 create mode 100644 js/xpconnect/tests/chrome/test_bug1065185.html

diff --git a/js/xpconnect/tests/chrome/chrome.ini b/js/xpconnect/tests/chrome/chrome.ini
index b0673916c0f..f75cc560ec1 100644
--- a/js/xpconnect/tests/chrome/chrome.ini
+++ b/js/xpconnect/tests/chrome/chrome.ini
@@ -67,6 +67,7 @@ skip-if = buildapp == 'mulet'
 [test_bug1041626.xul]
 [test_bug1042436.xul]
 [test_bug1050049.html]
+[test_bug1065185.html]
 [test_bug1074863.html]
 [test_xrayToJS.xul]
 skip-if = buildapp == 'mulet'
diff --git a/js/xpconnect/tests/chrome/test_bug1065185.html b/js/xpconnect/tests/chrome/test_bug1065185.html
new file mode 100644
index 00000000000..0ff125f3e36
--- /dev/null
+++ b/js/xpconnect/tests/chrome/test_bug1065185.html
@@ -0,0 +1,78 @@
+
+
+
+
+  
+  Test for Bug 1065185
+  
+  
+  
+  
+
+
+Mozilla Bug 1065185
+

+ +
+
+