2016-01-07 08:17:16 -05:00
// Copyright 1998-2016 Epic Games, Inc. All Rights Reserved.
2014-03-14 14:13:41 -04:00
/*=============================================================================
GPUProfiler . h : Hierarchical GPU Profiler Implementation .
= = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = */
# include "RHI.h"
# include "GPUProfiler.h"
# if !UE_BUILD_SHIPPING
# include "STaskGraph.h"
# include "ModuleManager.h"
# include "TaskGraphInterfaces.h"
# endif
2015-01-14 00:36:58 -05:00
# define LOCTEXT_NAMESPACE "GpuProfiler"
2014-09-17 12:36:31 -04:00
static TAutoConsoleVariable < FString > GProfileGPUPatternCVar (
TEXT ( " r.ProfileGPU.Pattern " ) ,
TEXT ( " * " ) ,
TEXT ( " Allows to filter the entries when using ProfileGPU, the pattern match is case sensitive. \n " )
TEXT ( " '*' can be used in the end to get all entries starting with the string. \n " )
TEXT ( " '*' without any leading characters disables the pattern matching and uses a time threshold instead (default). \n " )
TEXT ( " '?' allows to ignore one character. \n " )
TEXT ( " e.g. AmbientOcclusionSetup, AmbientOcclusion*, Ambient???lusion*, * " ) ,
ECVF_Default ) ;
2014-09-24 20:17:36 -04:00
static TAutoConsoleVariable < FString > GProfileGPURootCVar (
TEXT ( " r.ProfileGPU.Root " ) ,
TEXT ( " * " ) ,
TEXT ( " Allows to filter the tree when using ProfileGPU, the pattern match is case sensitive. " ) ,
ECVF_Default ) ;
2015-09-21 20:07:00 -04:00
static TAutoConsoleVariable < int32 > GProfileGPUShowEvents (
TEXT ( " r.ProfileGPU.ShowLeafEvents " ) ,
0 ,
TEXT ( " Allows profileGPU to display event-only leaf nodes with no draws associated. " ) ,
ECVF_Default ) ;
static TAutoConsoleVariable < int32 > GProfileGPUTransitions (
TEXT ( " r.ProfileGPU.ShowTransitions " ) ,
0 ,
TEXT ( " Allows profileGPU to display resource transition events. " ) ,
ECVF_Default ) ;
2014-03-14 14:13:41 -04:00
struct FNodeStatsCompare
{
/** Sorts nodes by descending durations. */
FORCEINLINE bool operator ( ) ( const FGPUProfilerEventNodeStats & A , const FGPUProfilerEventNodeStats & B ) const
{
return B . TimingResult < A . TimingResult ;
}
} ;
/** Recursively generates a histogram of nodes and stores their timing in TimingResult. */
static void GatherStatsEventNode ( FGPUProfilerEventNode * Node , int32 Depth , TMap < FString , FGPUProfilerEventNodeStats > & EventHistogram )
{
if ( Node - > NumDraws > 0 | | Node - > Children . Num ( ) > 0 )
{
Node - > TimingResult = Node - > GetTiming ( ) * 1000.0f ;
FGPUProfilerEventNodeStats * FoundHistogramBucket = EventHistogram . Find ( Node - > Name ) ;
if ( FoundHistogramBucket )
{
FoundHistogramBucket - > NumDraws + = Node - > NumDraws ;
FoundHistogramBucket - > NumPrimitives + = Node - > NumPrimitives ;
FoundHistogramBucket - > NumVertices + = Node - > NumVertices ;
FoundHistogramBucket - > TimingResult + = Node - > TimingResult ;
FoundHistogramBucket - > NumEvents + + ;
}
else
{
FGPUProfilerEventNodeStats NewNodeStats ;
NewNodeStats . NumDraws = Node - > NumDraws ;
NewNodeStats . NumPrimitives = Node - > NumPrimitives ;
NewNodeStats . NumVertices = Node - > NumVertices ;
NewNodeStats . TimingResult = Node - > TimingResult ;
NewNodeStats . NumEvents = 1 ;
EventHistogram . Add ( Node - > Name , NewNodeStats ) ;
}
for ( int32 ChildIndex = 0 ; ChildIndex < Node - > Children . Num ( ) ; ChildIndex + + )
{
// Traverse children
GatherStatsEventNode ( Node - > Children [ ChildIndex ] , Depth + 1 , EventHistogram ) ;
}
}
}
/** Recursively dumps stats for each node with a depth first traversal. */
2015-09-21 20:07:00 -04:00
static void DumpStatsEventNode ( FGPUProfilerEventNode * Node , float RootResult , int32 Depth , const FWildcardString & WildcardFilter , bool bParentMatchedFilter , int32 & NumNodes , int32 & NumDraws , bool bDumpEventLeafNodes )
2014-03-14 14:13:41 -04:00
{
NumNodes + + ;
2015-09-21 20:07:00 -04:00
if ( Node - > NumDraws > 0 | | Node - > Children . Num ( ) > 0 | | bDumpEventLeafNodes )
2014-03-14 14:13:41 -04:00
{
NumDraws + = Node - > NumDraws ;
// Percent that this node was of the total frame time
const float Percent = Node - > TimingResult * 100.0f / ( RootResult * 1000.0f ) ;
const int32 EffectiveDepth = FMath : : Max ( Depth - 1 , 0 ) ;
2014-09-24 20:17:36 -04:00
const bool bMatchesFilter = bParentMatchedFilter | | WildcardFilter . IsMatch ( Node - > Name ) ;
if ( bMatchesFilter )
{
2015-09-01 16:52:40 -04:00
FString Extra ;
if ( Node - > TimingResult > = 0.1f & & Node - > NumVertices * Node - > NumDraws > 100 )
{
Extra = FString : : Printf ( TEXT ( " %.0f prims/ms %.0f verts/ms " ) ,
Node - > NumPrimitives / Node - > TimingResult ,
Node - > NumVertices / Node - > TimingResult ) ;
}
2014-09-24 20:17:36 -04:00
// Print information about this node, padded to its depth in the tree
2015-09-01 16:52:40 -04:00
UE_LOG ( LogRHI , Warning , TEXT ( " %s%4.1f%%%5.2fms %s %u draws %u prims %u verts%s " ) ,
2014-09-24 20:17:36 -04:00
* FString ( TEXT ( " " ) ) . LeftPad ( EffectiveDepth * 3 ) ,
Percent ,
Node - > TimingResult ,
* Node - > Name ,
Node - > NumDraws ,
Node - > NumPrimitives ,
2015-09-01 16:52:40 -04:00
Node - > NumVertices ,
* Extra
2014-09-24 20:17:36 -04:00
) ;
}
2014-03-14 14:13:41 -04:00
float TotalChildTime = 0 ;
uint32 TotalChildDraws = 0 ;
for ( int32 ChildIndex = 0 ; ChildIndex < Node - > Children . Num ( ) ; ChildIndex + + )
{
FGPUProfilerEventNode * ChildNode = Node - > Children [ ChildIndex ] ;
int32 NumChildDraws = 0 ;
2015-09-21 20:07:00 -04:00
// Traverse children
DumpStatsEventNode ( Node - > Children [ ChildIndex ] , RootResult , Depth + 1 , WildcardFilter , bMatchesFilter , NumNodes , NumChildDraws , bDumpEventLeafNodes ) ;
2014-03-14 14:13:41 -04:00
NumDraws + = NumChildDraws ;
TotalChildTime + = ChildNode - > TimingResult ;
TotalChildDraws + = NumChildDraws ;
}
const float UnaccountedTime = FMath : : Max ( Node - > TimingResult - TotalChildTime , 0.0f ) ;
const float UnaccountedPercent = UnaccountedTime * 100.0f / ( RootResult * 1000.0f ) ;
2015-02-19 16:26:32 -05:00
// Add an 'Other Children' node if necessary to show time spent in the current node that is not in any of its children
2014-09-24 20:17:36 -04:00
if ( bMatchesFilter & & Node - > Children . Num ( ) > 0 & & TotalChildDraws > 0 & & ( UnaccountedPercent > 2.0f | | UnaccountedTime > .2f ) )
2014-03-14 14:13:41 -04:00
{
Copying //UE4/Orion-Staging (Orion Main @ CL-2792706 to //UE4/Main
==========================
MAJOR FEATURES + CHANGES
==========================
Change 2792706 on 2015/12/07 by Terence.Burns
Rebuild lightmaps automation changes
- Sync and Build binary files for execution
- Much improved error handling
- Email notification support added.
#Note - This should massively simplify the batch script we use to rebuild lightmaps.
#rb none
#Tests Run the RebuildLightmaps commandlet many times to ensure it runs and errors correctly.
Change 2791950 on 2015/12/05 by Matt.Kuhlenschmidt
Added settings to toggle on and off display of Ping and FPS values.
Server FPS will be disabled before ship
#rb none #test pc/ps4 golden path, pie
Change 2791827 on 2015/12/05 by Marcus.Wassmer
Fix texture memory leak. Fixes automation using too much memory.
#rb Brad.Angelcyk
#codereview bob.ferreira
#test automation runs, editor.
Change 2791313 on 2015/12/04 by Martin.Mittring
fixed PS4 compiling
#rb:Michael.Noland
#test:not
Change 2791014 on 2015/12/04 by Martin.Mittring
nicer cvar help for r.PS4ContinuousSubmits
#rb:Olaf.Piesche
#code_review:Marcus.Wassmer
#test:PC
Change 2791011 on 2015/12/04 by Martin.Mittring
fixed compile error when disabling ENABLE_TEXTURE_TRACKING
#rb:Olaf.Piesche
#test:run Paragon on PC
Change 2790848 on 2015/12/04 by Martin.Mittring
missing changes
nicer cvar help, optimized unneccessary referencecounting, removed redundant code
#rb:Olaf.Piesche
#test:PC Paragon
Change 2790840 on 2015/12/04 by Martin.Mittring
nicer cvar help, optimized unneccessary referencecounting, removed redundant code
#rb:Olaf.Piesche
#test:PC Paragon
Change 2791585 on 2015/12/04 by Michael.Noland
Rendering: Added a more actionable error message to a check() failure for a FStaticLightingMesh that has already been processed when building lighting in a map that contains HLOD
#rb None
#tests Built lighting in a map that was crashing at this check() and verified that the message indicated the problematic mesh
Change 2791244 on 2015/12/04 by Ryan.Brucks
Submitting all my Paragon Content before the new Agora Branch.
Change 2791240 on 2015/12/04 by Marcus.Wassmer
Bump to .061 patch and new pub tools to pass cert
#rb non
#test compile ps4
Change 2791132 on 2015/12/04 by ryan.brucks
RenderToTextureMacros: fixed issue with polygon index being +1 on accident
Change 2790747 on 2015/12/04 by Terence.Burns
Rebuild Lightmaps Automation Script - Adding the -unattended switch for build machines.
#rb None
#tests Run through the Rebuild Lightmaps UAT script process.
Change 2790589 on 2015/12/04 by Bart.Bressler
- Invite PS4 friend option for add party member button now works properly, also fixes crash. Fixes OR-10359.
#rb sam.zamani
#tests invited ps4 player using Invite PS4 Friend option, confirmed that player joined mcp party and ps4 session
Change 2790418 on 2015/12/04 by James.Golding
Roll back HLOD lightmap UV change, Oz reporting issues when building lighting, need more investigation
#rb none
#tests none
Change 2790333 on 2015/12/04 by James.Golding
Add fallback to FMeshUtilities::PropagatePaintedColorsToRawMesh when mesh has been reduced in engine and WedgeMap is missing
#rb martin.wilson
#codereview jurre.debaare
#tests Built HLOD meshes in the editor
Change 2790292 on 2015/12/04 by Olaf.Piesche
Free the new particle array at the beginning of the tick for each instance; that way, even if we're not rendering the array will be cleared and we don't keep injecting new particles that never get killed until rendering resumes
#rb marcus.wassmer
#tests Editor, PIE
Change 2790003 on 2015/12/04 by James.Golding
Fix possible crash in ALODActor::RemoveSubActor
#rb keith.judge
#codereview jurre.debaare
#tests Generated HLOD proxy in editor
Change 2789998 on 2015/12/04 by James.Golding
2015-12-08 09:25:02 -05:00
UE_LOG ( LogRHI , Warning , TEXT ( " %s%4.1f%%%5.2fms Other Children " ) ,
2014-03-14 14:13:41 -04:00
* FString ( TEXT ( " " ) ) . LeftPad ( ( EffectiveDepth + 1 ) * 3 ) ,
UnaccountedPercent ,
UnaccountedTime ) ;
}
}
}
# if !UE_BUILD_SHIPPING
/**
* Converts GPU profile data to Visualizer data
*
* @ param InProfileData GPU profile data
* @ param OutVisualizerData Visualizer data
*/
static TSharedPtr < FVisualizerEvent > CreateVisualizerDataRecursively ( const TRefCountPtr < class FGPUProfilerEventNode > & InNode , TSharedPtr < FVisualizerEvent > InParentEvent , const double InStartTimeMs , const double InTotalTimeMs )
{
TSharedPtr < FVisualizerEvent > VisualizerEvent ( new FVisualizerEvent ( InStartTimeMs / InTotalTimeMs , InNode - > TimingResult / InTotalTimeMs , InNode - > TimingResult , 0 , InNode - > Name ) ) ;
VisualizerEvent - > ParentEvent = InParentEvent ;
double ChildStartTimeMs = InStartTimeMs ;
for ( int32 ChildIndex = 0 ; ChildIndex < InNode - > Children . Num ( ) ; ChildIndex + + )
{
TRefCountPtr < FGPUProfilerEventNode > ChildNode = InNode - > Children [ ChildIndex ] ;
TSharedPtr < FVisualizerEvent > ChildEvent = CreateVisualizerDataRecursively ( ChildNode , VisualizerEvent , ChildStartTimeMs , InTotalTimeMs ) ;
VisualizerEvent - > Children . Add ( ChildEvent ) ;
ChildStartTimeMs + = ChildNode - > TimingResult ;
}
return VisualizerEvent ;
}
/**
* Converts GPU profile data to Visualizer data
*
* @ param InProfileData GPU profile data
* @ param OutVisualizerData Visualizer data
*/
static TSharedPtr < FVisualizerEvent > CreateVisualizerData ( const TArray < TRefCountPtr < class FGPUProfilerEventNode > > & InProfileData )
{
// Calculate total time first
double TotalTimeMs = 0.0 ;
for ( int32 Index = 0 ; Index < InProfileData . Num ( ) ; + + Index )
{
TotalTimeMs + = InProfileData [ Index ] - > TimingResult ;
}
// Assumption: InProfileData contains only one (root) element. Otherwise an extra FVisualizerEvent root event is required.
TSharedPtr < FVisualizerEvent > DummyRoot ;
// Recursively create visualizer event data.
TSharedPtr < FVisualizerEvent > StatEvents ( CreateVisualizerDataRecursively ( InProfileData [ 0 ] , DummyRoot , 0.0 , TotalTimeMs ) ) ;
return StatEvents ;
}
# endif
void FGPUProfilerEventNodeFrame : : DumpEventTree ( )
{
if ( EventTree . Num ( ) > 0 )
{
float RootResult = GetRootTimingResults ( ) ;
UE_LOG ( LogRHI , Warning , TEXT ( " Perf marker hierarchy, total GPU time %.2fms " ) , RootResult * 1000.0f ) ;
2015-01-14 00:36:58 -05:00
// Display a warning if this is a GPU profile and the GPU was profiled with v-sync enabled
FText VsyncEnabledWarningText = FText : : GetEmpty ( ) ;
static IConsoleVariable * CVSyncVar = IConsoleManager : : Get ( ) . FindConsoleVariable ( TEXT ( " r.VSync " ) ) ;
2015-04-01 16:25:53 -04:00
if ( CVSyncVar - > GetInt ( ) ! = 0 & & ! PlatformDisablesVSync ( ) )
2015-01-14 00:36:58 -05:00
{
VsyncEnabledWarningText = LOCTEXT ( " GpuProfileVsyncEnabledWarning " ,
" WARNING: This GPU profile was captured with v-sync enabled. V-sync wait time may show up in any bucket, and as a result the data in this profile may be skewed. Please profile with v-sync disabled to obtain the most accurate data. " ) ;
UE_LOG ( LogRHI , Warning , TEXT ( " %s " ) , * ( VsyncEnabledWarningText . ToString ( ) ) ) ;
}
2014-03-14 14:13:41 -04:00
LogDisjointQuery ( ) ;
TMap < FString , FGPUProfilerEventNodeStats > EventHistogram ;
for ( int32 BaseNodeIndex = 0 ; BaseNodeIndex < EventTree . Num ( ) ; BaseNodeIndex + + )
{
GatherStatsEventNode ( EventTree [ BaseNodeIndex ] , 0 , EventHistogram ) ;
}
2014-09-24 20:17:36 -04:00
static IConsoleVariable * CVar2 = IConsoleManager : : Get ( ) . FindConsoleVariable ( TEXT ( " r.ProfileGPU.Root " ) ) ;
FString RootWildcardString = CVar2 - > GetString ( ) ;
FWildcardString RootWildcard ( RootWildcardString ) ;
2014-03-14 14:13:41 -04:00
int32 NumNodes = 0 ;
int32 NumDraws = 0 ;
2015-09-21 20:07:00 -04:00
bool bDumpEventLeafNodes = GProfileGPUShowEvents . GetValueOnRenderThread ( ) ! = 0 ;
2014-03-14 14:13:41 -04:00
for ( int32 BaseNodeIndex = 0 ; BaseNodeIndex < EventTree . Num ( ) ; BaseNodeIndex + + )
{
2015-09-21 20:07:00 -04:00
DumpStatsEventNode ( EventTree [ BaseNodeIndex ] , RootResult , 0 , RootWildcard , false , NumNodes , NumDraws , bDumpEventLeafNodes ) ;
2014-03-14 14:13:41 -04:00
}
//@todo - calculate overhead instead of hardcoding
// This .012ms of overhead is based on what Nsight shows as the minimum draw call duration on a 580 GTX,
// Which is apparently how long it takes to issue two timing events.
UE_LOG ( LogRHI , Warning , TEXT ( " Total Nodes %u Draws %u approx overhead %.2fms " ) , NumNodes , NumDraws , .012f * NumNodes ) ;
UE_LOG ( LogRHI , Warning , TEXT ( " " ) ) ;
UE_LOG ( LogRHI , Warning , TEXT ( " " ) ) ;
2014-09-24 20:17:36 -04:00
if ( RootWildcardString = = TEXT ( " * " ) )
2014-09-17 12:36:31 -04:00
{
2014-09-24 20:17:36 -04:00
// Sort descending based on node duration
EventHistogram . ValueSort ( FNodeStatsCompare ( ) ) ;
2014-09-17 12:36:31 -04:00
2014-09-24 20:17:36 -04:00
// Log stats about the node histogram
UE_LOG ( LogRHI , Warning , TEXT ( " Node histogram %u buckets " ) , EventHistogram . Num ( ) ) ;
2014-09-17 12:36:31 -04:00
2014-09-24 20:17:36 -04:00
static IConsoleVariable * CVar = IConsoleManager : : Get ( ) . FindConsoleVariable ( TEXT ( " r.ProfileGPU.Pattern " ) ) ;
2014-09-17 12:36:31 -04:00
2014-09-24 20:17:36 -04:00
// bad: reading on render thread but we don't support ECVF_RenderThreadSafe on strings yet
// It's very unlikely to cause a problem as the cvar is only changes by the user.
FString WildcardString = CVar - > GetString ( ) ;
2014-09-17 12:36:31 -04:00
2015-06-10 15:32:35 -04:00
FGPUProfilerEventNodeStats Sum ;
2014-09-24 20:17:36 -04:00
const float ThresholdInMS = 5.0f ;
2014-09-17 12:36:31 -04:00
2014-09-24 20:17:36 -04:00
if ( WildcardString = = FString ( TEXT ( " * " ) ) )
2014-09-17 12:36:31 -04:00
{
2014-09-24 20:17:36 -04:00
// disable Wildcard functionality
WildcardString . Empty ( ) ;
2014-09-17 12:36:31 -04:00
}
2014-09-24 20:17:36 -04:00
if ( WildcardString . IsEmpty ( ) )
2014-03-14 14:13:41 -04:00
{
2014-09-24 20:17:36 -04:00
UE_LOG ( LogRHI , Warning , TEXT ( " r.ProfileGPU.Pattern = '*' (using threshold of %g ms) " ) , ThresholdInMS ) ;
2014-03-14 14:13:41 -04:00
}
else
{
2014-09-24 20:17:36 -04:00
UE_LOG ( LogRHI , Warning , TEXT ( " r.ProfileGPU.Pattern = '%s' (not using time threshold) " ) , * WildcardString ) ;
2014-03-14 14:13:41 -04:00
}
2014-09-24 20:17:36 -04:00
FWildcardString Wildcard ( WildcardString ) ;
int32 NumNotShown = 0 ;
for ( TMap < FString , FGPUProfilerEventNodeStats > : : TIterator It ( EventHistogram ) ; It ; + + It )
{
const FGPUProfilerEventNodeStats & NodeStats = It . Value ( ) ;
bool bDump = NodeStats . TimingResult > RootResult * ThresholdInMS ;
if ( ! Wildcard . IsEmpty ( ) )
{
// if a Wildcard string was specified, we want to always dump all entries
bDump = Wildcard . IsMatch ( * It . Key ( ) ) ;
}
if ( bDump )
{
UE_LOG ( LogRHI , Warning , TEXT ( " %.2fms %s Events %u Draws %u " ) , NodeStats . TimingResult , * It . Key ( ) , NodeStats . NumEvents , NodeStats . NumDraws ) ;
2015-06-10 15:32:35 -04:00
Sum + = NodeStats ;
2014-09-24 20:17:36 -04:00
}
else
{
NumNotShown + + ;
}
}
2015-06-10 15:32:35 -04:00
UE_LOG ( LogRHI , Warning , TEXT ( " Total %.2fms Events %u Draws %u, %u buckets not shown " ) ,
Sum . TimingResult , Sum . NumEvents , Sum . NumDraws , NumNotShown ) ;
2014-09-24 20:17:36 -04:00
}
2014-03-14 14:13:41 -04:00
# if !UE_BUILD_SHIPPING
// Create and display profile visualizer data
if ( RHIConfig : : ShouldShowProfilerAfterProfilingGPU ( ) )
{
// execute on main thread
{
struct FDisplayProfilerVisualizer
{
2015-01-14 00:36:58 -05:00
void Thread ( TSharedPtr < FVisualizerEvent > InVisualizerData , const FText InVsyncEnabledWarningText )
2014-03-14 14:13:41 -04:00
{
static FName TaskGraphModule ( TEXT ( " TaskGraph " ) ) ;
if ( FModuleManager : : Get ( ) . IsModuleLoaded ( TaskGraphModule ) )
{
2014-06-18 19:23:34 -04:00
IProfileVisualizerModule & ProfileVisualizer = FModuleManager : : GetModuleChecked < IProfileVisualizerModule > ( TaskGraphModule ) ;
2015-01-14 00:36:58 -05:00
// Display a warning if this is a GPU profile and the GPU was profiled with v-sync enabled (otherwise InVsyncEnabledWarningText is empty)
ProfileVisualizer . DisplayProfileVisualizer ( InVisualizerData , TEXT ( " GPU " ) , InVsyncEnabledWarningText , FLinearColor : : Red ) ;
2014-03-14 14:13:41 -04:00
}
}
} DisplayProfilerVisualizer ;
TSharedPtr < FVisualizerEvent > VisualizerData = CreateVisualizerData ( EventTree ) ;
2014-09-02 05:19:25 -04:00
DECLARE_CYCLE_STAT ( TEXT ( " FSimpleDelegateGraphTask.DisplayProfilerVisualizer " ) ,
STAT_FSimpleDelegateGraphTask_DisplayProfilerVisualizer ,
STATGROUP_TaskGraphTasks ) ;
FSimpleDelegateGraphTask : : CreateAndDispatchWhenReady (
2015-01-14 00:36:58 -05:00
FSimpleDelegateGraphTask : : FDelegate : : CreateRaw ( & DisplayProfilerVisualizer , & FDisplayProfilerVisualizer : : Thread , VisualizerData , VsyncEnabledWarningText ) ,
2014-09-02 05:19:25 -04:00
GET_STATID ( STAT_FSimpleDelegateGraphTask_DisplayProfilerVisualizer ) , nullptr , ENamedThreads : : GameThread
2014-03-14 14:13:41 -04:00
) ;
}
}
# endif
}
}
Copying //UE4/Dev-Rendering to Dev-Main (//UE4/Dev-Main)
#lockdown ben.marsh
==========================
MAJOR FEATURES + CHANGES
==========================
Change 2774277 on 2015/11/19 by Gil.Gribb
UE4 - Did minor optimizations to the PS4 RHI and drawlists.
Change 2791226 on 2015/12/04 by Uriel.Doyon
Added source code for Embree 2.7.0
Removed duplicate files from the /doc folder.
Change 2800193 on 2015/12/11 by Marcus.Wassmer
SSAO AsyncCompute support.
#rb Martin.Mittring
Change 2801631 on 2015/12/14 by Olaf.Piesche
Making auto deactivate true by default, moving checks to HasCompleted, eliminating some unnecessary logic
#rb martin.mittring
Change 2803240 on 2015/12/15 by Gil.Gribb
UE4 - Added command to collect stats on spammy stats.
Change 2803476 on 2015/12/15 by Rolando.Caloca
DR - Allow toggling compute skin dispatch at runtime
- r.SkinCacheShaders Now enable the shaders and feature
- r.SkinCaching enables toggling at runtime
- r.SkinCache.BufferSize Sets the size in bytes of buffer for outputting
- Now uses 3 UAV buffers instead of one (avoid RenderDoc crashes)
#codereview Marcus.Wassmer, Martin.Mittring
Change 2803940 on 2015/12/15 by Marcus.Wassmer
Add r.PS4.AsyncComputeBudgetMode to switch between CUMasking and WaveLimit modes. So far it looks like WaveLimits behave better in UE4.
Also rearrange AsyncSSAO to run immediately after HZB to overlap with occlusion queries. In my testing this takes SSAO cost from .5ms -> .2ms. However it had to be hacked to run without normals. Hopefully Martin can get some real AsyncSSAO in.
#rb Martin.Mittring
#codereview Martin.Mittring
Change 2803999 on 2015/12/15 by Uriel.Doyon
Refactored the shader complexity material override logic to allow other viewmodes shader overrides.
TexelFactorAccuracy ViewMode : shows the accuracy of the static mesh texel factors, used for streaming.
WantedMipsAccuracy ViewMode : shows the accuracy of the static mesh wanted mips accuracy, used for streaming.
Added an option to stream textures based on the AABB distance instead of using the sphere approximation.
Added an option to only keep a the wanted mips.
Moved optimization related viewmodes into a submenu to avoid polluting the interface.
#jira UE-24502
#jira UE-24503
#jira UERNDR-89
Change 2804150 on 2015/12/15 by Olaf.Piesche
make separate translucency screen percentage a bit more robust; add numsamples to the render target creation functions in preparation for MSAA support for higher quality with low res separate translucency
#rb martin.mittring
Change 2804367 on 2015/12/15 by Daniel.Wright
Capsule shadow primitives are tracked separately on registration - saves 2.6ms of RT time doing the view frustum culling in a medium sized map
Change 2805293 on 2015/12/16 by Olaf.Piesche
logging if potentially immortal emitters are spawned from gameplay; this should catch if we spawn burst only emitters with indefinite life spans (muzzle flashes, hit impacts, etc.)
#rb martin.mittring
Change 2805586 on 2015/12/16 by Zabir.Hoque
Adding support for decals to fade and destroy themselves automatically.
#CodeReview: Martin.Mittring, Daniel.Wright, Olaf.Piesche
Change 2807663 on 2015/12/17 by Rolando.Caloca
DR - Remove expensive logging
#codereview Marcus.Wassmer
Change 2807903 on 2015/12/17 by Zabir.Hoque
Refactored DecalComponent's lifetime management such that it can be set and reset from Blueprints.
#CodeReview Daniel.Wright, Martin.Mittring, Olaf.Piesche
Change 2809261 on 2015/12/18 by Martin.Mittring
Added VisualizeShadingModels to track down issues like that:
FORT-16913 Textures on Hero Mesh is not shown
#rb:David.Hill
#code_review:Bob.Tellez
Change 2810136 on 2015/12/21 by Rolando.Caloca
DR - Added back draw event colors
PR #1602
#jira UE-21526
#codereview Mark.Satterthwaite, Keith.Judge, Marcus.Wassmer, Josh.Adams
Change 2810680 on 2015/12/21 by Martin.Mittring
moved SSAO ComputeShader running without per pixel normal (for AsyncCompute) into DevRendering
#test:editor
Change 2811205 on 2015/12/22 by Brian.Karis
Pulled clear coat out of the reflection compute shader. Added permutation for skylight.
Clear coat base layer now done in base pass. It only picks up the closest capture. This will cause popping when the object moves. Still needs a cross fade.
Change 2811275 on 2015/12/22 by David.Hill
UE-24675
#rb martin.mittring
Corrected buffer-size related problem with fringe.
Change 2811397 on 2015/12/22 by Brian.Karis
2016-01-08 11:12:28 -05:00
void FGPUProfiler : : PushEvent ( const TCHAR * Name , FColor Color )
2014-03-14 14:13:41 -04:00
{
if ( bTrackingEvents )
{
2014-10-02 11:20:07 -04:00
check ( IsInRenderingThread ( ) | | IsInRHIThread ( ) ) ;
2014-03-14 14:13:41 -04:00
if ( CurrentEventNode )
{
// Add to the current node's children
CurrentEventNode - > Children . Add ( CreateEventNode ( Name , CurrentEventNode ) ) ;
CurrentEventNode = CurrentEventNode - > Children . Last ( ) ;
}
else
{
// Add a new root node to the tree
CurrentEventNodeFrame - > EventTree . Add ( CreateEventNode ( Name , NULL ) ) ;
CurrentEventNode = CurrentEventNodeFrame - > EventTree . Last ( ) ;
}
check ( CurrentEventNode ) ;
// Start timing the current node
CurrentEventNode - > StartTiming ( ) ;
}
}
void FGPUProfiler : : PopEvent ( )
{
if ( bTrackingEvents )
{
2014-10-02 11:20:07 -04:00
check ( CurrentEventNode & & ( IsInRenderingThread ( ) | | IsInRHIThread ( ) ) ) ;
2014-03-14 14:13:41 -04:00
// Stop timing the current node and move one level up the tree
CurrentEventNode - > StopTiming ( ) ;
CurrentEventNode = CurrentEventNode - > Parent ;
}
}
/** Whether GPU timing measurements are supported by the driver. */
bool FGPUTiming : : GIsSupported = false ;
/** Frequency for the timing values, in number of ticks per seconds, or 0 if the feature isn't supported. */
uint64 FGPUTiming : : GTimingFrequency = 0 ;
/** Whether the static variables have been initialized. */
bool FGPUTiming : : GAreGlobalsInitialized = false ;
2015-01-14 00:36:58 -05:00
2015-10-31 10:55:13 -04:00
float FWindowedGPUTimer : : GetElapsedAverage ( FRHICommandListImmediate & RHICmdList , float & OutAvgTimeInSeconds )
{
if ( QueriesFinished < StartQueries . Num ( ) )
{
return 0.0f ;
}
float TotalTime = 0 ;
// Grab the queries in our window (specified number of frames old) and calculate total time as an average of the winow size
//
for ( int32 i = StartQueries . Num ( ) - WindowSize ; i < StartQueries . Num ( ) ; i + + )
{
uint64 StartTime , EndTime ;
bool StartSucceeded = RHICmdList . GetRenderQueryResult ( StartQueries [ i ] , StartTime , false ) ;
bool EndSucceeded = RHICmdList . GetRenderQueryResult ( EndQueries [ i ] , EndTime , false ) ;
// figure out what the failure rate of the queries is; they fail because the GPU hasn't finished them when we
// try to get the data, so if the failure rate is too high, number of frames behind needs to be increased
QueriesFailed + = StartSucceeded & & EndSucceeded ? - 1 : 1 ;
QueriesFailed = FMath : : Max < int32 > ( 0 , QueriesFailed ) ;
TotalTime + = ( EndTime - StartTime ) / 1000000.0f ;
}
float FailRate = static_cast < float > ( QueriesFailed ) / WindowSize ;
OutAvgTimeInSeconds = TotalTime / WindowSize ;
return FailRate ;
}
2015-01-14 00:36:58 -05:00
# undef LOCTEXT_NAMESPACE