536cd135cc
Former-commit-id: 5624ac747d633e885131e8349322922b6a59baaa
1017 lines
41 KiB
C#
1017 lines
41 KiB
C#
//---------------------------------------------------------------------
|
|
// <copyright file="AggregatePushdown.cs" company="Microsoft">
|
|
// Copyright (c) Microsoft Corporation. All rights reserved.
|
|
// </copyright>
|
|
//
|
|
// @owner Microsoft
|
|
// @backupOwner Microsoft
|
|
//---------------------------------------------------------------------
|
|
|
|
using System;
|
|
using System.Collections.Generic;
|
|
using System.Data.Common;
|
|
//using System.Diagnostics; // Please use PlanCompiler.Assert instead of Debug.Assert in this class...
|
|
|
|
// It is fine to use Debug.Assert in cases where you assert an obvious thing that is supposed
|
|
// to prevent from simple mistakes during development (e.g. method argument validation
|
|
// in cases where it was you who created the variables or the variables had already been validated or
|
|
// in "else" clauses where due to code changes (e.g. adding a new value to an enum type) the default
|
|
// "else" block is chosen why the new condition should be treated separately). This kind of asserts are
|
|
// (can be) helpful when developing new code to avoid simple mistakes but have no or little value in
|
|
// the shipped product.
|
|
// PlanCompiler.Assert *MUST* be used to verify conditions in the trees. These would be assumptions
|
|
// about how the tree was built etc. - in these cases we probably want to throw an exception (this is
|
|
// what PlanCompiler.Assert does when the condition is not met) if either the assumption is not correct
|
|
// or the tree was built/rewritten not the way we thought it was.
|
|
// Use your judgment - if you rather remove an assert than ship it use Debug.Assert otherwise use
|
|
// PlanCompiler.Assert.
|
|
|
|
using System.Globalization;
|
|
using System.Text;
|
|
|
|
using System.Data.Metadata.Edm;
|
|
using System.Data.Query.InternalTrees;
|
|
|
|
namespace System.Data.Query.PlanCompiler
|
|
{
|
|
internal delegate bool TryGetValue(Node key, out Node value);
|
|
|
|
#region Helper Classes
|
|
/// <summary>
|
|
/// Helper class to track the aggregate nodes that are candidates to be
|
|
/// pushed into the definingGroupByNode.
|
|
/// </summary>
|
|
internal class GroupAggregateVarInfo
|
|
{
|
|
#region Private Fields
|
|
private readonly Node _definingGroupByNode;
|
|
private HashSet<KeyValuePair<Node, Node>> _candidateAggregateNodes;
|
|
private readonly Var _groupAggregateVar;
|
|
#endregion
|
|
|
|
#region Constructor
|
|
/// <summary>
|
|
/// Public constructor
|
|
/// </summary>
|
|
/// <param name="defingingGroupNode">The GroupIntoOp node</param>
|
|
/// <param name="groupAggregateVar"></param>
|
|
internal GroupAggregateVarInfo(Node defingingGroupNode, Var groupAggregateVar)
|
|
{
|
|
_definingGroupByNode = defingingGroupNode;
|
|
_groupAggregateVar = groupAggregateVar;
|
|
}
|
|
#endregion
|
|
|
|
#region 'Public' Properties
|
|
/// <summary>
|
|
/// Each key value pair represents a candidate aggregate.
|
|
/// The key is the function aggregate subtree and the value is a 'template' of translation of the
|
|
/// function aggregate's argument over the var representing the group aggregate.
|
|
/// A valid candidate has an argument that does not have any external references
|
|
/// except for the group aggregate corresponding to the DefiningGroupNode.
|
|
/// </summary>
|
|
internal HashSet<KeyValuePair<Node, Node>> CandidateAggregateNodes
|
|
{
|
|
get
|
|
{
|
|
if (_candidateAggregateNodes == null)
|
|
{
|
|
_candidateAggregateNodes = new HashSet<KeyValuePair<Node, Node>>();
|
|
}
|
|
return _candidateAggregateNodes;
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Are there are agregates that are candidates to be pushed into the DefiningGroupNode
|
|
/// </summary>
|
|
internal bool HasCandidateAggregateNodes
|
|
{
|
|
get
|
|
{
|
|
return (_candidateAggregateNodes != null && _candidateAggregateNodes.Count != 0);
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// The GroupIntoOp node that this GroupAggregateVarInfo represents
|
|
/// </summary>
|
|
internal Node DefiningGroupNode
|
|
{
|
|
get { return _definingGroupByNode; }
|
|
}
|
|
|
|
internal Var GroupAggregateVar
|
|
{
|
|
get { return _groupAggregateVar; }
|
|
}
|
|
#endregion
|
|
}
|
|
|
|
/// <summary>
|
|
/// Helper class to track usage of GroupAggregateVarInfo
|
|
/// It represents the usage of a single GroupAggregateVar.
|
|
/// The usage is defined by the computation, it should be a subree whose only
|
|
/// external reference is the group var represented by the GroupAggregateVarInfo.
|
|
/// </summary>
|
|
internal class GroupAggregateVarRefInfo
|
|
{
|
|
#region Private fields
|
|
private readonly Node _computation;
|
|
private readonly GroupAggregateVarInfo _groupAggregateVarInfo;
|
|
private readonly bool _isUnnested;
|
|
#endregion
|
|
|
|
#region Constructor
|
|
/// <summary>
|
|
/// Public constructor
|
|
/// </summary>
|
|
/// <param name="groupAggregateVarInfo"></param>
|
|
/// <param name="computation"></param>
|
|
internal GroupAggregateVarRefInfo(GroupAggregateVarInfo groupAggregateVarInfo, Node computation, bool isUnnested)
|
|
{
|
|
this._groupAggregateVarInfo = groupAggregateVarInfo;
|
|
this._computation = computation;
|
|
this._isUnnested = isUnnested;
|
|
}
|
|
|
|
#endregion
|
|
|
|
#region 'Public' Properties
|
|
/// <summary>
|
|
/// Subtree whose only external reference is
|
|
/// the group var represented by the GroupAggregateVarInfo
|
|
/// </summary>
|
|
internal Node Computation
|
|
{
|
|
get { return _computation; }
|
|
}
|
|
|
|
/// <summary>
|
|
/// The GroupAggregateVarInfo (possibly) referenced by the computation
|
|
/// </summary>
|
|
internal GroupAggregateVarInfo GroupAggregateVarInfo
|
|
{
|
|
get { return _groupAggregateVarInfo; }
|
|
}
|
|
|
|
/// <summary>
|
|
/// Is the computation over unnested group aggregate var
|
|
/// </summary>
|
|
internal bool IsUnnested
|
|
{
|
|
get { return _isUnnested; }
|
|
}
|
|
#endregion
|
|
}
|
|
|
|
/// <summary>
|
|
/// Manages refereces to groupAggregate variables.
|
|
/// </summary>
|
|
internal class GroupAggregateVarInfoManager
|
|
{
|
|
#region Private state
|
|
private readonly Dictionary<Var, GroupAggregateVarRefInfo> _groupAggregateVarRelatedVarToInfo = new Dictionary<Var, GroupAggregateVarRefInfo>();
|
|
private Dictionary<Var, Dictionary<EdmMember, GroupAggregateVarRefInfo>> _groupAggregateVarRelatedVarPropertyToInfo;
|
|
private HashSet<GroupAggregateVarInfo> _groupAggregateVarInfos = new HashSet<GroupAggregateVarInfo>();
|
|
#endregion
|
|
|
|
#region Public Surface
|
|
/// <summary>
|
|
/// Get all the groupAggregateVarInfos
|
|
/// </summary>
|
|
internal IEnumerable<GroupAggregateVarInfo> GroupAggregateVarInfos
|
|
{
|
|
get
|
|
{
|
|
return _groupAggregateVarInfos;
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Add an entry that var is a computation represented by the computationTemplate
|
|
/// over the var represented by the given groupAggregateVarInfo
|
|
/// </summary>
|
|
/// <param name="var"></param>
|
|
/// <param name="groupAggregateVarInfo"></param>
|
|
/// <param name="computationTemplate"></param>
|
|
/// <param name="isUnnested"></param>
|
|
internal void Add(Var var, GroupAggregateVarInfo groupAggregateVarInfo, Node computationTemplate, bool isUnnested)
|
|
{
|
|
this._groupAggregateVarRelatedVarToInfo.Add(var, new GroupAggregateVarRefInfo(groupAggregateVarInfo, computationTemplate, isUnnested));
|
|
_groupAggregateVarInfos.Add(groupAggregateVarInfo);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Add an entry that the given property of the given var is a computation represented
|
|
/// by the computationTemplate over the var represented by the given groupAggregateVarInfo
|
|
/// </summary>
|
|
/// <param name="var"></param>
|
|
/// <param name="groupAggregateVarInfo"></param>
|
|
/// <param name="computationTemplate"></param>
|
|
/// <param name="isUnnested"></param>
|
|
/// <param name="property"></param>
|
|
internal void Add(Var var, GroupAggregateVarInfo groupAggregateVarInfo, Node computationTemplate, bool isUnnested, EdmMember property)
|
|
{
|
|
if (property == null)
|
|
{
|
|
Add(var, groupAggregateVarInfo, computationTemplate, isUnnested);
|
|
return;
|
|
}
|
|
if (this._groupAggregateVarRelatedVarPropertyToInfo == null)
|
|
{
|
|
this._groupAggregateVarRelatedVarPropertyToInfo = new Dictionary<Var, Dictionary<System.Data.Metadata.Edm.EdmMember, GroupAggregateVarRefInfo>>();
|
|
}
|
|
Dictionary<EdmMember, GroupAggregateVarRefInfo> varPropertyDictionary;
|
|
if (!_groupAggregateVarRelatedVarPropertyToInfo.TryGetValue(var, out varPropertyDictionary))
|
|
{
|
|
varPropertyDictionary = new Dictionary<System.Data.Metadata.Edm.EdmMember, GroupAggregateVarRefInfo>();
|
|
_groupAggregateVarRelatedVarPropertyToInfo.Add(var, varPropertyDictionary);
|
|
}
|
|
varPropertyDictionary.Add(property, new GroupAggregateVarRefInfo(groupAggregateVarInfo, computationTemplate, isUnnested));
|
|
|
|
// Note: The following line is not necessary with the current usage pattern, this method is
|
|
// never called with a new groupAggregateVarInfo thus it is a no-op.
|
|
_groupAggregateVarInfos.Add(groupAggregateVarInfo);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Gets the groupAggregateVarRefInfo representing the definition of the given var over
|
|
/// a group aggregate var if any.
|
|
/// </summary>
|
|
/// <param name="var"></param>
|
|
/// <param name="groupAggregateVarRefInfo"></param>
|
|
/// <returns></returns>
|
|
internal bool TryGetReferencedGroupAggregateVarInfo(Var var, out GroupAggregateVarRefInfo groupAggregateVarRefInfo)
|
|
{
|
|
return this._groupAggregateVarRelatedVarToInfo.TryGetValue(var, out groupAggregateVarRefInfo);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Gets the groupAggregateVarRefInfo representing the definition of the given property of the given
|
|
/// var over a group aggregate var if any.
|
|
/// </summary>
|
|
/// <param name="var"></param>
|
|
/// <param name="property"></param>
|
|
/// <param name="groupAggregateVarRefInfo"></param>
|
|
/// <returns></returns>
|
|
internal bool TryGetReferencedGroupAggregateVarInfo(Var var, EdmMember property, out GroupAggregateVarRefInfo groupAggregateVarRefInfo)
|
|
{
|
|
if (property == null)
|
|
{
|
|
return TryGetReferencedGroupAggregateVarInfo(var, out groupAggregateVarRefInfo);
|
|
}
|
|
|
|
Dictionary<EdmMember, GroupAggregateVarRefInfo> varPropertyDictionary;
|
|
if (_groupAggregateVarRelatedVarPropertyToInfo == null || !_groupAggregateVarRelatedVarPropertyToInfo.TryGetValue(var, out varPropertyDictionary))
|
|
{
|
|
groupAggregateVarRefInfo = null;
|
|
return false;
|
|
}
|
|
return varPropertyDictionary.TryGetValue(property, out groupAggregateVarRefInfo);
|
|
}
|
|
#endregion
|
|
}
|
|
#endregion
|
|
|
|
/// <summary>
|
|
/// Utility class that tries to produce an equivalent tree to the input tree over
|
|
/// a single group aggregate variable and no other external references
|
|
/// </summary>
|
|
internal class GroupAggregateVarComputationTranslator : BasicOpVisitorOfNode
|
|
{
|
|
#region Private State
|
|
private GroupAggregateVarInfo _targetGroupAggregateVarInfo;
|
|
private bool _isUnnested;
|
|
private readonly Command _command;
|
|
private readonly GroupAggregateVarInfoManager _groupAggregateVarInfoManager;
|
|
#endregion
|
|
|
|
#region Constructor
|
|
/// <summary>
|
|
/// Private constructor
|
|
/// </summary>
|
|
/// <param name="command"></param>
|
|
/// <param name="groupAggregateVarInfoManager"></param>
|
|
private GroupAggregateVarComputationTranslator(
|
|
Command command,
|
|
GroupAggregateVarInfoManager groupAggregateVarInfoManager)
|
|
{
|
|
this._command = command;
|
|
this._groupAggregateVarInfoManager = groupAggregateVarInfoManager;
|
|
}
|
|
#endregion
|
|
|
|
#region 'Public' Surface
|
|
/// <summary>
|
|
/// Try to produce an equivalent tree to the input subtree, over a single group aggregate variable.
|
|
/// Such translation can only be produced if all external references of the input subtree are to a
|
|
/// single group aggregate var, or to vars that are can be translated over that single group
|
|
/// aggregate var
|
|
/// </summary>
|
|
/// <param name="subtree">The input subtree</param>
|
|
/// <param name="isVarDefinition"></param>
|
|
/// <param name="command"></param>
|
|
/// <param name="groupAggregateVarInfoManager"></param>
|
|
/// <param name="groupAggregateVarInfo">The groupAggregateVarInfo over which the input subtree can be translated </param>
|
|
/// <param name="templateNode">A tree that is equvalent to the input tree, but over the group aggregate variable
|
|
/// represented by the groupAggregetVarInfo</param>
|
|
/// <param name="isUnnested"></param>
|
|
/// <returns>True, if the translation can be done, false otherwise</returns>
|
|
public static bool TryTranslateOverGroupAggregateVar(
|
|
Node subtree,
|
|
bool isVarDefinition,
|
|
Command command,
|
|
GroupAggregateVarInfoManager groupAggregateVarInfoManager,
|
|
out GroupAggregateVarInfo groupAggregateVarInfo,
|
|
out Node templateNode,
|
|
out bool isUnnested)
|
|
{
|
|
GroupAggregateVarComputationTranslator handler = new GroupAggregateVarComputationTranslator(command, groupAggregateVarInfoManager);
|
|
|
|
Node inputNode = subtree;
|
|
SoftCastOp softCastOp = null;
|
|
bool isCollect;
|
|
if (inputNode.Op.OpType == OpType.SoftCast)
|
|
{
|
|
softCastOp = (SoftCastOp)inputNode.Op;
|
|
inputNode = inputNode.Child0;
|
|
}
|
|
|
|
if (inputNode.Op.OpType == OpType.Collect)
|
|
{
|
|
templateNode = handler.VisitCollect(inputNode);
|
|
isCollect = true;
|
|
}
|
|
else
|
|
{
|
|
templateNode = handler.VisitNode(inputNode);
|
|
isCollect = false;
|
|
}
|
|
|
|
groupAggregateVarInfo = handler._targetGroupAggregateVarInfo;
|
|
isUnnested = handler._isUnnested;
|
|
|
|
if (handler._targetGroupAggregateVarInfo == null || templateNode == null)
|
|
{
|
|
return false;
|
|
}
|
|
if (softCastOp != null)
|
|
{
|
|
SoftCastOp newSoftCastOp;
|
|
//
|
|
// The type needs to be fixed only if the unnesting happened during this translation.
|
|
// That can be recognized by these two cases:
|
|
// 1) if the input node was a collect, or
|
|
// 2) if the input did not represent a var definition, but a function aggregate argument and
|
|
// the template is VarRef of a group aggregate var.
|
|
//
|
|
if (isCollect || !isVarDefinition && AggregatePushdownUtil.IsVarRefOverGivenVar(templateNode, handler._targetGroupAggregateVarInfo.GroupAggregateVar))
|
|
{
|
|
newSoftCastOp = command.CreateSoftCastOp(TypeHelpers.GetEdmType<CollectionType>(softCastOp.Type).TypeUsage);
|
|
}
|
|
else
|
|
{
|
|
newSoftCastOp = softCastOp;
|
|
}
|
|
templateNode = command.CreateNode(newSoftCastOp, templateNode);
|
|
}
|
|
return true;
|
|
}
|
|
#endregion
|
|
|
|
#region Visitor Methods
|
|
/// <summary>
|
|
/// See <cref="TryTranslateOverGroupAggregateVar"/>
|
|
/// </summary>
|
|
/// <param name="op"></param>
|
|
/// <param name="n"></param>
|
|
/// <returns></returns>
|
|
public override Node Visit(VarRefOp op, Node n)
|
|
{
|
|
return TranslateOverGroupAggregateVar(op.Var, null);
|
|
}
|
|
|
|
/// <summary>
|
|
/// If the child is VarRef check if the subtree PropertyOp(VarRef) is reference to a
|
|
/// group aggregate var.
|
|
/// Otherwise do default processing
|
|
/// </summary>
|
|
/// <param name="op"></param>
|
|
/// <param name="n"></param>
|
|
/// <returns></returns>
|
|
public override Node Visit(PropertyOp op, Node n)
|
|
{
|
|
if (n.Child0.Op.OpType != OpType.VarRef)
|
|
{
|
|
return base.Visit(op, n);
|
|
}
|
|
VarRefOp varRefOp = (VarRefOp)n.Child0.Op;
|
|
return TranslateOverGroupAggregateVar(varRefOp.Var, op.PropertyInfo);
|
|
}
|
|
|
|
/// <summary>
|
|
/// If the Subtree rooted at the collect is of the following structure:
|
|
///
|
|
/// PhysicalProject(outputVar)
|
|
/// |
|
|
/// Project(s)
|
|
/// |
|
|
/// Unnest
|
|
///
|
|
/// where the unnest is over the group aggregate var and the output var
|
|
/// is either a reference to the group aggregate var or to a constant, it returns the
|
|
/// translation of the ouput var.
|
|
/// </summary>
|
|
/// <param name="n"></param>
|
|
/// <returns></returns>
|
|
private Node VisitCollect(Node n)
|
|
{
|
|
//Make sure the only children are projects over unnest
|
|
Node currentNode = n.Child0;
|
|
Dictionary<Var, Node> constantDefinitions = new Dictionary<Var, Node>();
|
|
while (currentNode.Child0.Op.OpType == OpType.Project)
|
|
{
|
|
currentNode = currentNode.Child0;
|
|
//Visit the VarDefListOp child
|
|
if (VisitDefault(currentNode.Child1) == null)
|
|
{
|
|
return null;
|
|
}
|
|
foreach (Node definitionNode in currentNode.Child1.Children)
|
|
{
|
|
if (IsConstant(definitionNode.Child0))
|
|
{
|
|
constantDefinitions.Add(((VarDefOp)definitionNode.Op).Var, definitionNode.Child0);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (currentNode.Child0.Op.OpType != OpType.Unnest)
|
|
{
|
|
return null;
|
|
}
|
|
|
|
// Handle the unnest
|
|
UnnestOp unnestOp = (UnnestOp)currentNode.Child0.Op;
|
|
GroupAggregateVarRefInfo groupAggregateVarRefInfo;
|
|
if (_groupAggregateVarInfoManager.TryGetReferencedGroupAggregateVarInfo(unnestOp.Var, out groupAggregateVarRefInfo))
|
|
{
|
|
if (_targetGroupAggregateVarInfo == null)
|
|
{
|
|
_targetGroupAggregateVarInfo = groupAggregateVarRefInfo.GroupAggregateVarInfo;
|
|
}
|
|
else if (_targetGroupAggregateVarInfo != groupAggregateVarRefInfo.GroupAggregateVarInfo)
|
|
{
|
|
return null;
|
|
}
|
|
if (!_isUnnested)
|
|
{
|
|
return null;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
return null;
|
|
}
|
|
|
|
PhysicalProjectOp physicalProjectOp = (PhysicalProjectOp)n.Child0.Op;
|
|
PlanCompiler.Assert(physicalProjectOp.Outputs.Count == 1, "PhysicalProject should only have one output at this stage");
|
|
Var outputVar = physicalProjectOp.Outputs[0];
|
|
|
|
Node computationTemplate = TranslateOverGroupAggregateVar(outputVar, null);
|
|
if (computationTemplate != null)
|
|
{
|
|
_isUnnested = true;
|
|
return computationTemplate;
|
|
}
|
|
|
|
Node constantDefinitionNode;
|
|
if (constantDefinitions.TryGetValue(outputVar, out constantDefinitionNode))
|
|
{
|
|
_isUnnested = true;
|
|
return constantDefinitionNode;
|
|
}
|
|
return null;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Determines whether the given Node is a constant subtree
|
|
/// It only recognizes any of the constant base ops
|
|
/// and possibly casts over these nodes.
|
|
/// </summary>
|
|
/// <param name="node"></param>
|
|
/// <returns></returns>
|
|
private static bool IsConstant(Node node)
|
|
{
|
|
Node currentNode = node;
|
|
while (currentNode.Op.OpType == OpType.Cast)
|
|
{
|
|
currentNode = currentNode.Child0;
|
|
}
|
|
return PlanCompilerUtil.IsConstantBaseOp(currentNode.Op.OpType);
|
|
}
|
|
|
|
/// <summary>
|
|
/// (1) If the given var or the given property of the given var are defined over a group aggregate var,
|
|
/// (2) and if that group aggregate var matches the var represented by represented by _targetGroupAggregateVarInfo
|
|
/// if any
|
|
///
|
|
/// it returns the corresponding translation over the group aggregate var. Also, if _targetGroupAggregateVarInfo
|
|
/// is not set, it sets it to the group aggregate var representing the referenced var.
|
|
/// </summary>
|
|
/// <param name="var"></param>
|
|
/// <param name="property"></param>
|
|
/// <returns></returns>
|
|
private Node TranslateOverGroupAggregateVar(Var var, EdmMember property)
|
|
{
|
|
GroupAggregateVarRefInfo groupAggregateVarRefInfo;
|
|
EdmMember localProperty;
|
|
if (_groupAggregateVarInfoManager.TryGetReferencedGroupAggregateVarInfo(var, out groupAggregateVarRefInfo))
|
|
{
|
|
localProperty = property;
|
|
}
|
|
else if (_groupAggregateVarInfoManager.TryGetReferencedGroupAggregateVarInfo(var, property, out groupAggregateVarRefInfo))
|
|
{
|
|
localProperty = null;
|
|
}
|
|
else
|
|
{
|
|
return null;
|
|
}
|
|
|
|
if (_targetGroupAggregateVarInfo == null)
|
|
{
|
|
_targetGroupAggregateVarInfo = groupAggregateVarRefInfo.GroupAggregateVarInfo;
|
|
_isUnnested = groupAggregateVarRefInfo.IsUnnested;
|
|
}
|
|
else if (_targetGroupAggregateVarInfo != groupAggregateVarRefInfo.GroupAggregateVarInfo || _isUnnested != groupAggregateVarRefInfo.IsUnnested)
|
|
{
|
|
return null;
|
|
}
|
|
|
|
Node computationTemplate = groupAggregateVarRefInfo.Computation;
|
|
if (localProperty != null)
|
|
{
|
|
computationTemplate = this._command.CreateNode(this._command.CreatePropertyOp(localProperty), computationTemplate);
|
|
}
|
|
return computationTemplate;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Default processing for nodes.
|
|
/// Visits the children and if any child has changed it creates a new node
|
|
/// for the parent.
|
|
/// If the reference of the child node did not change, the child node did not change either,
|
|
/// this is because a node can only be reused "as is" when building a template.
|
|
/// </summary>
|
|
/// <param name="n"></param>
|
|
/// <returns></returns>
|
|
protected override Node VisitDefault(Node n)
|
|
{
|
|
List<Node> newChildren = new List<Node>(n.Children.Count);
|
|
bool anyChildChanged = false;
|
|
for (int i = 0; i < n.Children.Count; i++)
|
|
{
|
|
Node processedChild = VisitNode(n.Children[i]);
|
|
if (processedChild == null)
|
|
{
|
|
return null;
|
|
}
|
|
if (!anyChildChanged && !Object.ReferenceEquals(n.Children[i], processedChild))
|
|
{
|
|
anyChildChanged = true;
|
|
}
|
|
newChildren.Add(processedChild);
|
|
}
|
|
|
|
if (!anyChildChanged)
|
|
{
|
|
return n;
|
|
}
|
|
else
|
|
{
|
|
return _command.CreateNode(n.Op, newChildren);
|
|
}
|
|
}
|
|
|
|
#region Unsupported node types
|
|
|
|
protected override Node VisitRelOpDefault(RelOp op, Node n)
|
|
{
|
|
return null;
|
|
}
|
|
|
|
public override Node Visit(AggregateOp op, Node n)
|
|
{
|
|
return null;
|
|
}
|
|
|
|
public override Node Visit(CollectOp op, Node n)
|
|
{
|
|
return null;
|
|
}
|
|
|
|
public override Node Visit(ElementOp op, Node n)
|
|
{
|
|
return null;
|
|
}
|
|
|
|
#endregion
|
|
|
|
#endregion
|
|
}
|
|
|
|
/// <summary>
|
|
/// A visitor that collects all group aggregates and the corresponding function aggregates
|
|
/// that are defined over them, referred to as 'candidate aggregates'. The candidate aggregates are aggregates
|
|
/// that have an argument that has the corresponding group aggregate as the only external reference
|
|
/// </summary>
|
|
internal class GroupAggregateRefComputingVisitor : BasicOpVisitor
|
|
{
|
|
#region private state
|
|
private readonly Command _command;
|
|
private readonly GroupAggregateVarInfoManager _groupAggregateVarInfoManager = new GroupAggregateVarInfoManager();
|
|
private readonly Dictionary<Node, Node> _childToParent = new Dictionary<Node, Node>();
|
|
#endregion
|
|
|
|
#region 'Public'
|
|
/// <summary>
|
|
/// Produces a list of all GroupAggregateVarInfos, each of which represents a single group aggregate
|
|
/// and it candidate function aggregates. It also produces a delegate that given a child node returns the parent node
|
|
/// </summary>
|
|
/// <param name="itree"></param>
|
|
/// <param name="tryGetParent"></param>
|
|
/// <returns></returns>
|
|
internal static IEnumerable<GroupAggregateVarInfo> Process(Command itree, out TryGetValue tryGetParent)
|
|
{
|
|
GroupAggregateRefComputingVisitor groupRefComputingVisitor = new GroupAggregateRefComputingVisitor(itree);
|
|
groupRefComputingVisitor.VisitNode(itree.Root);
|
|
tryGetParent = groupRefComputingVisitor._childToParent.TryGetValue;
|
|
|
|
return groupRefComputingVisitor._groupAggregateVarInfoManager.GroupAggregateVarInfos;
|
|
}
|
|
#endregion
|
|
|
|
#region Private Constructor
|
|
/// <summary>
|
|
/// Private constructor
|
|
/// </summary>
|
|
/// <param name="itree"></param>
|
|
private GroupAggregateRefComputingVisitor(Command itree)
|
|
{
|
|
this._command = itree;
|
|
}
|
|
#endregion
|
|
|
|
#region Visitor Methods
|
|
|
|
#region AncillaryOps
|
|
/// <summary>
|
|
/// Determines whether the var or a property of the var (if the var is defined as a NewRecord)
|
|
/// is defined exclusively over a single group aggregate. If so, it registers it as such with the
|
|
/// group aggregate var info manager.
|
|
/// </summary>
|
|
/// <param name="op"></param>
|
|
/// <param name="n"></param>
|
|
public override void Visit(VarDefOp op, Node n)
|
|
{
|
|
VisitDefault(n);
|
|
|
|
Node definingNode = n.Child0;
|
|
Op definingNodeOp = definingNode.Op;
|
|
|
|
GroupAggregateVarInfo referencedVarInfo;
|
|
Node templateNode;
|
|
bool isUnnested;
|
|
if (GroupAggregateVarComputationTranslator.TryTranslateOverGroupAggregateVar(definingNode, true, this._command, this._groupAggregateVarInfoManager, out referencedVarInfo, out templateNode, out isUnnested))
|
|
{
|
|
_groupAggregateVarInfoManager.Add(op.Var, referencedVarInfo, templateNode, isUnnested);
|
|
}
|
|
else if (definingNodeOp.OpType == OpType.NewRecord)
|
|
{
|
|
NewRecordOp newRecordOp = (NewRecordOp)definingNodeOp;
|
|
for (int i = 0; i < definingNode.Children.Count; i++)
|
|
{
|
|
Node argumentNode = definingNode.Children[i];
|
|
if (GroupAggregateVarComputationTranslator.TryTranslateOverGroupAggregateVar(argumentNode, true, this._command, this._groupAggregateVarInfoManager, out referencedVarInfo, out templateNode, out isUnnested))
|
|
{
|
|
_groupAggregateVarInfoManager.Add(op.Var, referencedVarInfo, templateNode, isUnnested, newRecordOp.Properties[i]);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
#endregion
|
|
|
|
#region RelOp Visitors
|
|
/// <summary>
|
|
/// Registers the group aggregate var with the group aggregate var info manager
|
|
/// </summary>
|
|
/// <param name="op"></param>
|
|
/// <param name="n"></param>
|
|
public override void Visit(GroupByIntoOp op, Node n)
|
|
{
|
|
VisitGroupByOp(op, n);
|
|
foreach (Node child in n.Child3.Children)
|
|
{
|
|
Var groupAggregateVar = ((VarDefOp)child.Op).Var;
|
|
GroupAggregateVarRefInfo groupAggregateVarRefInfo;
|
|
// If the group by is over a group, it may be already tracked as referencing a group var
|
|
// An optimization would be to separately track this groupAggregateVar too, for the cases when the aggregate can
|
|
// not be pushed to the group by node over which this one is defined but can be propagated to this group by node.
|
|
if (!_groupAggregateVarInfoManager.TryGetReferencedGroupAggregateVarInfo(groupAggregateVar, out groupAggregateVarRefInfo))
|
|
{
|
|
_groupAggregateVarInfoManager.Add(groupAggregateVar, new GroupAggregateVarInfo(n, groupAggregateVar), this._command.CreateNode(this._command.CreateVarRefOp(groupAggregateVar)), false);
|
|
}
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// If the unnestOp's var is defined as a reference of a group aggregate var,
|
|
/// then the columns it produces should be registered too, but as 'unnested' references
|
|
/// </summary>
|
|
/// <param name="op">the unnestOp</param>
|
|
/// <param name="n">current subtree</param>
|
|
/// <returns>modified subtree</returns>
|
|
public override void Visit(UnnestOp op, Node n)
|
|
{
|
|
VisitDefault(n);
|
|
GroupAggregateVarRefInfo groupAggregateVarRefInfo;
|
|
if (_groupAggregateVarInfoManager.TryGetReferencedGroupAggregateVarInfo(op.Var, out groupAggregateVarRefInfo))
|
|
{
|
|
PlanCompiler.Assert(op.Table.Columns.Count == 1, "Expected one column before NTE");
|
|
_groupAggregateVarInfoManager.Add(op.Table.Columns[0], groupAggregateVarRefInfo.GroupAggregateVarInfo, groupAggregateVarRefInfo.Computation, true);
|
|
}
|
|
}
|
|
|
|
#endregion
|
|
|
|
#region ScalarOps Visitors
|
|
/// <summary>
|
|
/// If the op is a collection aggregate function it checks whether its arguement can be translated over
|
|
/// a single group aggregate var. If so, it is tracked as a candidate to be pushed into that
|
|
/// group by into node.
|
|
/// </summary>
|
|
/// <param name="op"></param>
|
|
/// <param name="n"></param>
|
|
public override void Visit(FunctionOp op, Node n)
|
|
{
|
|
VisitDefault(n);
|
|
if (!PlanCompilerUtil.IsCollectionAggregateFunction(op, n))
|
|
{
|
|
return;
|
|
}
|
|
PlanCompiler.Assert(n.Children.Count == 1, "Aggregate Function must have one argument");
|
|
|
|
Node argumentNode = n.Child0;
|
|
|
|
GroupAggregateVarInfo referencedGroupAggregateVarInfo;
|
|
Node templateNode;
|
|
bool isUnnested;
|
|
if (GroupAggregateVarComputationTranslator.TryTranslateOverGroupAggregateVar(n.Child0, false, _command, _groupAggregateVarInfoManager, out referencedGroupAggregateVarInfo, out templateNode, out isUnnested)
|
|
&& (isUnnested || AggregatePushdownUtil.IsVarRefOverGivenVar(templateNode, referencedGroupAggregateVarInfo.GroupAggregateVar)))
|
|
{
|
|
referencedGroupAggregateVarInfo.CandidateAggregateNodes.Add(new KeyValuePair<Node, Node>(n, templateNode));
|
|
}
|
|
}
|
|
|
|
#endregion
|
|
|
|
/// <summary>
|
|
/// Default visitor for nodes.
|
|
/// It tracks the child-parent relationship.
|
|
/// </summary>
|
|
/// <param name="n"></param>
|
|
protected override void VisitDefault(Node n)
|
|
{
|
|
VisitChildren(n);
|
|
foreach (Node child in n.Children)
|
|
{
|
|
//No need to track terminal nodes, plus some of these may be reused.
|
|
if (child.Op.Arity != 0)
|
|
{
|
|
_childToParent.Add(child, n);
|
|
}
|
|
}
|
|
}
|
|
#endregion
|
|
|
|
}
|
|
|
|
/// <summary>
|
|
/// Utility class to gather helper methods used by more than one class in the Aggregate Pushdown feature.
|
|
/// </summary>
|
|
internal static class AggregatePushdownUtil
|
|
{
|
|
/// <summary>
|
|
/// Determines whether the given node is a VarRef over the given var
|
|
/// </summary>
|
|
/// <param name="node"></param>
|
|
/// <param name="var"></param>
|
|
/// <returns></returns>
|
|
internal static bool IsVarRefOverGivenVar(Node node, Var var)
|
|
{
|
|
if (node.Op.OpType != OpType.VarRef)
|
|
{
|
|
return false;
|
|
}
|
|
return ((VarRefOp)node.Op).Var == var;
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// The Aggregate Pushdown feature tries to identify function aggregates defined over a
|
|
/// group aggregate and push their definitions in the group by into node corresponding to
|
|
/// the group aggregate.
|
|
/// </summary>
|
|
internal class AggregatePushdown
|
|
{
|
|
#region Private fields
|
|
private readonly Command m_command;
|
|
private TryGetValue m_tryGetParent;
|
|
#endregion
|
|
|
|
#region Private Constructor
|
|
private AggregatePushdown(Command command)
|
|
{
|
|
this.m_command = command;
|
|
}
|
|
#endregion
|
|
|
|
#region 'Public' Surface
|
|
/// <summary>
|
|
/// Apply Aggregate Pushdown over the tree in the given plan complier state.
|
|
/// </summary>
|
|
/// <param name="planCompilerState"></param>
|
|
internal static void Process(PlanCompiler planCompilerState)
|
|
{
|
|
AggregatePushdown aggregatePushdown = new AggregatePushdown(planCompilerState.Command);
|
|
aggregatePushdown.Process();
|
|
}
|
|
#endregion
|
|
|
|
#region Private Methods
|
|
|
|
/// <summary>
|
|
/// The main driver
|
|
/// </summary>
|
|
private void Process()
|
|
{
|
|
IEnumerable<GroupAggregateVarInfo> groupAggregateVarInfos = GroupAggregateRefComputingVisitor.Process(m_command, out m_tryGetParent);
|
|
foreach (GroupAggregateVarInfo groupAggregateVarInfo in groupAggregateVarInfos)
|
|
{
|
|
if (groupAggregateVarInfo.HasCandidateAggregateNodes)
|
|
{
|
|
foreach (KeyValuePair<Node, Node> candidate in groupAggregateVarInfo.CandidateAggregateNodes)
|
|
{
|
|
TryProcessCandidate(candidate, groupAggregateVarInfo);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Try to push the given function aggregate candidate to the corresponding group into node.
|
|
/// The candidate can be pushed if all ancestors of the group into node up to the least common
|
|
/// ancestor between the group into node and the function aggregate have one of the following node op types:
|
|
/// Project
|
|
/// Filter
|
|
/// ConstraintSortOp
|
|
/// </summary>
|
|
/// <param name="command"></param>
|
|
/// <param name="candidate"></param>
|
|
/// <param name="groupAggregateVarInfo"></param>
|
|
/// <param name="m_childToParent"></param>
|
|
private void TryProcessCandidate(
|
|
KeyValuePair<Node, Node> candidate,
|
|
GroupAggregateVarInfo groupAggregateVarInfo)
|
|
{
|
|
IList<Node> functionAncestors;
|
|
IList<Node> groupByAncestors;
|
|
Node definingGroupNode = groupAggregateVarInfo.DefiningGroupNode;
|
|
FindPathsToLeastCommonAncestor(candidate.Key, definingGroupNode, out functionAncestors, out groupByAncestors);
|
|
|
|
//Check whether all ancestors of the GroupByInto node are of type that we support propagating through
|
|
if (!AreAllNodesSupportedForPropagation(groupByAncestors))
|
|
{
|
|
return;
|
|
}
|
|
|
|
//Add the function to the group by node
|
|
GroupByIntoOp definingGroupOp = (GroupByIntoOp)definingGroupNode.Op;
|
|
PlanCompiler.Assert(definingGroupOp.Inputs.Count == 1, "There should be one input var to GroupByInto at this stage");
|
|
Var inputVar = definingGroupOp.Inputs.First;
|
|
FunctionOp functionOp = (FunctionOp)candidate.Key.Op;
|
|
|
|
//
|
|
// Remap the template from referencing the groupAggregate var to reference the input to
|
|
// the group by into
|
|
//
|
|
Node argumentNode = OpCopier.Copy(m_command, candidate.Value);
|
|
Dictionary<Var, Var> dictionary = new Dictionary<Var, Var>(1);
|
|
dictionary.Add(groupAggregateVarInfo.GroupAggregateVar, inputVar);
|
|
VarRemapper remapper = new VarRemapper(m_command, dictionary);
|
|
remapper.RemapSubtree(argumentNode);
|
|
|
|
Node newFunctionDefiningNode = m_command.CreateNode(
|
|
m_command.CreateAggregateOp(functionOp.Function, false),
|
|
argumentNode);
|
|
|
|
Var newFunctionVar;
|
|
Node varDefNode = m_command.CreateVarDefNode(newFunctionDefiningNode, out newFunctionVar);
|
|
|
|
// Add the new aggregate to the list of aggregates
|
|
definingGroupNode.Child2.Children.Add(varDefNode);
|
|
GroupByIntoOp groupByOp = (GroupByIntoOp)definingGroupNode.Op;
|
|
groupByOp.Outputs.Set(newFunctionVar);
|
|
|
|
//Propagate the new var throught the ancestors of the GroupByInto
|
|
for (int i = 0; i < groupByAncestors.Count; i++)
|
|
{
|
|
Node groupByAncestor = groupByAncestors[i];
|
|
if (groupByAncestor.Op.OpType == OpType.Project)
|
|
{
|
|
ProjectOp ancestorProjectOp = (ProjectOp)groupByAncestor.Op;
|
|
ancestorProjectOp.Outputs.Set(newFunctionVar);
|
|
}
|
|
}
|
|
|
|
//Update the functionNode
|
|
candidate.Key.Op = m_command.CreateVarRefOp(newFunctionVar);
|
|
candidate.Key.Children.Clear();
|
|
}
|
|
|
|
/// <summary>
|
|
/// Check whether all nodes in the given list of nodes are of types
|
|
/// that we know how to propagate an aggregate through
|
|
/// </summary>
|
|
/// <param name="nodes"></param>
|
|
/// <returns></returns>
|
|
private static bool AreAllNodesSupportedForPropagation(IList<Node> nodes)
|
|
{
|
|
foreach (Node node in nodes)
|
|
{
|
|
if (node.Op.OpType != OpType.Project
|
|
&& node.Op.OpType != OpType.Filter
|
|
&& node.Op.OpType != OpType.ConstrainedSort
|
|
)
|
|
{
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Finds the paths from each of node1 and node2 to their least common ancestor
|
|
/// </summary>
|
|
/// <param name="node1"></param>
|
|
/// <param name="node2"></param>
|
|
/// <param name="ancestors1"></param>
|
|
/// <param name="ancestors2"></param>
|
|
private void FindPathsToLeastCommonAncestor(Node node1, Node node2, out IList<Node> ancestors1, out IList<Node> ancestors2)
|
|
{
|
|
ancestors1 = FindAncestors(node1);
|
|
ancestors2 = FindAncestors(node2);
|
|
|
|
int currentIndex1 = ancestors1.Count - 1;
|
|
int currentIndex2 = ancestors2.Count - 1;
|
|
while (ancestors1[currentIndex1] == ancestors2[currentIndex2])
|
|
{
|
|
currentIndex1--;
|
|
currentIndex2--;
|
|
}
|
|
|
|
for (int i = ancestors1.Count - 1; i > currentIndex1; i--)
|
|
{
|
|
ancestors1.RemoveAt(i);
|
|
}
|
|
for (int i = ancestors2.Count - 1; i > currentIndex2; i--)
|
|
{
|
|
ancestors2.RemoveAt(i);
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Finds all ancestors of the given node.
|
|
/// </summary>
|
|
/// <param name="node"></param>
|
|
/// <returns>An ordered list of the all the ancestors of the given node starting from the immediate parent
|
|
/// to the root of the tree</returns>
|
|
private IList<Node> FindAncestors(Node node)
|
|
{
|
|
List<Node> ancestors = new List<Node>();
|
|
Node currentNode = node;
|
|
Node ancestor;
|
|
while (m_tryGetParent(currentNode, out ancestor))
|
|
{
|
|
ancestors.Add(ancestor);
|
|
currentNode = ancestor;
|
|
}
|
|
return ancestors;
|
|
}
|
|
#endregion
|
|
}
|
|
}
|