Files
UnrealEngineUWP/Engine/Source/Runtime/OpenGLDrv/Private/OpenGLDrv.cpp
T

509 lines
17 KiB
C++
Raw Normal View History

// Copyright 1998-2014 Epic Games, Inc. All Rights Reserved.
/*=============================================================================
OpenGLDrv.cpp: Unreal OpenGL RHI library implementation.
=============================================================================*/
#include "OpenGLDrvPrivate.h"
IMPLEMENT_MODULE(FOpenGLDynamicRHIModule, OpenGLDrv);
#include "ShaderParameterUtils.h"
#include "RHIStaticStates.h"
#include "OneColorShader.h"
#if !UE_BUILD_SHIPPING
#include "STaskGraph.h"
#endif
/** OpenGL Logging. */
DEFINE_LOG_CATEGORY(LogOpenGL);
void FOpenGLDynamicRHI::PushEvent(const TCHAR* Name)
{
#if ENABLE_OPENGL_DEBUG_GROUPS
// @todo-mobile: Fix string conversion ASAP!
FOpenGL::PushGroupMarker(TCHAR_TO_ANSI(Name));
#endif
GPUProfilingData.PushEvent(Name);
}
void FOpenGLGPUProfiler::PushEvent(const TCHAR* Name)
{
FGPUProfiler::PushEvent(Name);
}
void FOpenGLDynamicRHI::PopEvent()
{
#if ENABLE_OPENGL_DEBUG_GROUPS
FOpenGL::PopGroupMarker();
#endif
GPUProfilingData.PopEvent();
}
void FOpenGLGPUProfiler::PopEvent()
{
FGPUProfiler::PopEvent();
}
void FOpenGLGPUProfiler::BeginFrame(FOpenGLDynamicRHI* InRHI)
{
CurrentEventNode = NULL;
check(!bTrackingEvents);
check(!CurrentEventNodeFrame); // this should have already been cleaned up and the end of the previous frame
// latch the bools from the game thread into our private copy
bLatchedGProfilingGPU = GTriggerGPUProfile;
bLatchedGProfilingGPUHitches = GTriggerGPUHitchProfile;
if (bLatchedGProfilingGPUHitches)
{
bLatchedGProfilingGPU = false; // we do NOT permit an ordinary GPU profile during hitch profiles
}
if (bLatchedGProfilingGPU)
{
// Issue a bunch of GPU work at the beginning of the frame, to make sure that we are GPU bound
// We can't isolate idle time from GPU timestamps
InRHI->IssueLongGPUTask();
}
// if we are starting a hitch profile or this frame is a gpu profile, then save off the state of the draw events
if (bLatchedGProfilingGPU || (!bPreviousLatchedGProfilingGPUHitches && bLatchedGProfilingGPUHitches))
{
bOriginalGEmitDrawEvents = GEmitDrawEvents;
}
if (bLatchedGProfilingGPU || bLatchedGProfilingGPUHitches)
{
if (bLatchedGProfilingGPUHitches && GPUHitchDebounce)
{
// if we are doing hitches and we had a recent hitch, wait to recover
// the reasoning is that collecting the hitch report may itself hitch the GPU
GPUHitchDebounce--;
}
else
{
GEmitDrawEvents = true; // thwart an attempt to turn this off on the game side
bTrackingEvents = true;
CurrentEventNodeFrame = new FOpenGLEventNodeFrame(InRHI);
CurrentEventNodeFrame->StartFrame();
}
}
else if (bPreviousLatchedGProfilingGPUHitches)
{
// hitch profiler is turning off, clear history and restore draw events
GPUHitchEventNodeFrames.Empty();
GEmitDrawEvents = bOriginalGEmitDrawEvents;
}
bPreviousLatchedGProfilingGPUHitches = bLatchedGProfilingGPUHitches;
// Skip timing events when using SLI, they will not be accurate anyway
if (GNumActiveGPUsForRendering == 1)
{
if (FrameTiming.IsSupported())
{
FrameTiming.StartTiming();
}
if (FOpenGLDisjointTimeStampQuery::IsSupported())
{
CurrentGPUFrameQueryIndex = (CurrentGPUFrameQueryIndex + 1) % MAX_GPUFRAMEQUERIES;
DisjointGPUFrameTimeQuery[CurrentGPUFrameQueryIndex].StartTracking();
}
}
if (GEmitDrawEvents)
{
PushEvent(TEXT("FRAME"));
}
}
void FOpenGLGPUProfiler::EndFrame()
{
if (GEmitDrawEvents)
{
PopEvent();
}
// Skip timing events when using SLI, they will not be accurate anyway
if (GNumActiveGPUsForRendering == 1)
{
if (FrameTiming.IsSupported())
{
FrameTiming.EndTiming();
}
if (FOpenGLDisjointTimeStampQuery::IsSupported())
{
DisjointGPUFrameTimeQuery[CurrentGPUFrameQueryIndex].EndTracking();
}
}
// Skip timing events when using SLI, as they will block the GPU and we want maximum throughput
// Stat unit GPU time is not accurate anyway with SLI
if (FrameTiming.IsSupported() && GNumActiveGPUsForRendering == 1)
{
uint64 GPUTiming = FrameTiming.GetTiming();
uint64 GPUFreq = FrameTiming.GetTimingFrequency();
2014-05-06 06:26:25 -04:00
GGPUFrameTime = FMath::TruncToInt( double(GPUTiming) / double(GPUFreq) / FPlatformTime::GetSecondsPerCycle() );
}
else if (FOpenGLDisjointTimeStampQuery::IsSupported() && GNumActiveGPUsForRendering == 1)
{
static uint32 GLastGPUFrameTime = 0;
uint64 GPUTiming = 0;
uint64 GPUFreq = FOpenGLDisjointTimeStampQuery::GetTimingFrequency();
int OldestQueryIndex = (CurrentGPUFrameQueryIndex + 1) % MAX_GPUFRAMEQUERIES;
if ( DisjointGPUFrameTimeQuery[OldestQueryIndex].IsResultValid() && DisjointGPUFrameTimeQuery[OldestQueryIndex].GetResult(&GPUTiming) )
{
2014-05-06 06:26:25 -04:00
GGPUFrameTime = FMath::TruncToInt( double(GPUTiming) / double(GPUFreq) / FPlatformTime::GetSecondsPerCycle() );
GLastGPUFrameTime = GGPUFrameTime;
}
else
{
// Keep the timing of the last frame if the query was disjoint (e.g. GPU frequency changed and the result is undefined)
GGPUFrameTime = GLastGPUFrameTime;
}
}
else
{
GGPUFrameTime = 0;
}
// if we have a frame open, close it now.
if (CurrentEventNodeFrame)
{
CurrentEventNodeFrame->EndFrame();
}
check(!bTrackingEvents || bLatchedGProfilingGPU || bLatchedGProfilingGPUHitches);
check(!bTrackingEvents || CurrentEventNodeFrame);
if (bLatchedGProfilingGPU)
{
if (bTrackingEvents)
{
GEmitDrawEvents = bOriginalGEmitDrawEvents;
UE_LOG(LogRHI, Warning, TEXT(""));
UE_LOG(LogRHI, Warning, TEXT(""));
CurrentEventNodeFrame->DumpEventTree();
// OPENGL_PERFORMANCE_DATA_INVALID is a compile time constant
bool DebugEnabled = false;
#ifdef GL_ARB_debug_output
DebugEnabled = GL_TRUE == glIsEnabled( GL_DEBUG_OUTPUT );
#endif
if(OPENGL_PERFORMANCE_DATA_INVALID || DebugEnabled )
{
UE_LOG(LogRHI, Warning, TEXT(""));
UE_LOG(LogRHI, Warning, TEXT(""));
UE_LOG(LogRHI, Warning, TEXT("*********************************************************************************************"));
UE_LOG(LogRHI, Warning, TEXT("OpenGL perfomance data is potentially invalid because of the following build/runtime options:"));
#define LOG_GL_DEBUG_FLAG(a) UE_LOG(LogRHI, Warning, TEXT(" built with %s = %d"), TEXT(#a), a);
LOG_GL_DEBUG_FLAG(ENABLE_OPENGL_FRAMEDUMP);
LOG_GL_DEBUG_FLAG(ENABLE_VERIFY_GL);
LOG_GL_DEBUG_FLAG(ENABLE_UNIFORM_BUFFER_LAYOUT_VERIFICATION);
LOG_GL_DEBUG_FLAG(ENABLE_UNIFORM_BUFFER_LAYOUT_DUMP);
LOG_GL_DEBUG_FLAG(DEBUG_GL_SHADERS);
LOG_GL_DEBUG_FLAG(ENABLE_OPENGL_DEBUG_GROUPS);
LOG_GL_DEBUG_FLAG(OPENGL_PERFORMANCE_DATA_INVALID);
#undef LOG_GL_DEBUG_FLAG
UE_LOG(LogRHI, Warning, TEXT("*********************************************************************************************"));
UE_LOG(LogRHI, Warning, TEXT(""));
UE_LOG(LogRHI, Warning, TEXT(""));
}
GTriggerGPUProfile = false;
bLatchedGProfilingGPU = false;
if (RHIConfig::ShouldSaveScreenshotAfterProfilingGPU()
&& GEngine->GameViewport)
{
GEngine->GameViewport->Exec( NULL, TEXT("SCREENSHOT"), *GLog);
}
}
}
else if (bLatchedGProfilingGPUHitches)
{
//@todo this really detects any hitch, even one on the game thread.
// it would be nice to restrict the test to stalls on D3D, but for now...
// this needs to be out here because bTrackingEvents is false during the hitch debounce
static double LastTime = -1.0;
double Now = FPlatformTime::Seconds();
if (bTrackingEvents)
{
/** How long, in seconds a frame much be to be considered a hitch **/
static const float HitchThreshold = .1f; //100ms
float ThisTime = Now - LastTime;
bool bHitched = (ThisTime > HitchThreshold) && LastTime > 0.0 && CurrentEventNodeFrame;
if (bHitched)
{
UE_LOG(LogRHI, Warning, TEXT("*******************************************************************************"));
UE_LOG(LogRHI, Warning, TEXT("********** Hitch detected on CPU, frametime = %6.1fms"),ThisTime * 1000.0f);
UE_LOG(LogRHI, Warning, TEXT("*******************************************************************************"));
for (int32 Frame = 0; Frame < GPUHitchEventNodeFrames.Num(); Frame++)
{
UE_LOG(LogRHI, Warning, TEXT(""));
UE_LOG(LogRHI, Warning, TEXT(""));
UE_LOG(LogRHI, Warning, TEXT("********** GPU Frame: Current - %d"),GPUHitchEventNodeFrames.Num() - Frame);
GPUHitchEventNodeFrames[Frame].DumpEventTree();
}
UE_LOG(LogRHI, Warning, TEXT(""));
UE_LOG(LogRHI, Warning, TEXT(""));
UE_LOG(LogRHI, Warning, TEXT("********** GPU Frame: Current"));
CurrentEventNodeFrame->DumpEventTree();
UE_LOG(LogRHI, Warning, TEXT("*******************************************************************************"));
UE_LOG(LogRHI, Warning, TEXT("********** End Hitch GPU Profile"));
UE_LOG(LogRHI, Warning, TEXT("*******************************************************************************"));
if (GEngine->GameViewport)
{
GEngine->GameViewport->Exec( NULL, TEXT("SCREENSHOT"), *GLog);
}
GPUHitchDebounce = 5; // don't trigger this again for a while
GPUHitchEventNodeFrames.Empty(); // clear history
}
else if (CurrentEventNodeFrame) // this will be null for discarded frames while recovering from a recent hitch
{
/** How many old frames to buffer for hitch reports **/
static const int32 HitchHistorySize = 4;
if (GPUHitchEventNodeFrames.Num() >= HitchHistorySize)
{
GPUHitchEventNodeFrames.RemoveAt(0);
}
GPUHitchEventNodeFrames.AddRawItem((FOpenGLEventNodeFrame*)CurrentEventNodeFrame);
CurrentEventNodeFrame = NULL; // prevent deletion of this below; ke kept it in the history
}
}
LastTime = Now;
}
bTrackingEvents = false;
delete CurrentEventNodeFrame;
CurrentEventNodeFrame = NULL;
}
void FOpenGLGPUProfiler::Cleanup()
{
for (int32 Index = 0; Index < MAX_GPUFRAMEQUERIES; ++Index)
{
DisjointGPUFrameTimeQuery[Index].ReleaseResource();
}
FrameTiming.ReleaseResource();
}
/** Start this frame of per tracking */
void FOpenGLEventNodeFrame::StartFrame()
{
EventTree.Reset();
DisjointQuery.StartTracking();
RootEventTiming.StartTiming();
}
/** End this frame of per tracking, but do not block yet */
void FOpenGLEventNodeFrame::EndFrame()
{
RootEventTiming.EndTiming();
DisjointQuery.EndTracking();
}
float FOpenGLEventNodeFrame::GetRootTimingResults()
{
double RootResult = 0.0f;
if (RootEventTiming.IsSupported())
{
const uint64 GPUTiming = RootEventTiming.GetTiming(true);
const uint64 GPUFreq = RootEventTiming.GetTimingFrequency();
RootResult = double(GPUTiming) / double(GPUFreq);
}
return (float)RootResult;
}
void FOpenGLEventNodeFrame::LogDisjointQuery()
{
2014-04-30 23:51:07 -04:00
if (DisjointQuery.IsSupported())
{
UE_LOG(LogRHI, Warning, TEXT("%s"),
DisjointQuery.IsResultValid() ?
TEXT("Profiled range was continuous.") :
TEXT("Profiled range was disjoint! GPU switched to doing something else while profiling.")
);
}
else
{
TEXT("Profiled range \"disjoinness\" could not be determined due to lack of disjoing timer query functionality on this platform.");
}
}
float FOpenGLEventNode::GetTiming()
{
float Result = 0;
if (Timing.IsSupported())
{
// Get the timing result and block the CPU until it is ready
const uint64 GPUTiming = Timing.GetTiming(true);
const uint64 GPUFreq = Timing.GetTimingFrequency();
Result = double(GPUTiming) / double(GPUFreq);
}
return Result;
}
class FOpenGLRHILongGPUTaskPS : public FGlobalShader
{
DECLARE_SHADER_TYPE(FOpenGLRHILongGPUTaskPS,Global);
public:
FOpenGLRHILongGPUTaskPS( ) { }
FOpenGLRHILongGPUTaskPS(const ShaderMetaType::CompiledShaderInitializerType& Initializer)
: FGlobalShader( Initializer )
{
}
// FShader interface.
virtual bool Serialize(FArchive& Ar)
{
bool bShaderHasOutdatedParameters = FGlobalShader::Serialize(Ar);
return bShaderHasOutdatedParameters;
}
static bool ShouldCache(EShaderPlatform Platform)
{
return IsFeatureLevelSupported(Platform, ERHIFeatureLevel::SM3);
}
};
IMPLEMENT_SHADER_TYPE(,FOpenGLRHILongGPUTaskPS,TEXT("OneColorShader"),TEXT("MainLongGPUTask"),SF_Pixel);
static FGlobalBoundShaderState LongGPUTaskBoundShaderState;
void FOpenGLDynamicRHI::IssueLongGPUTask()
{
int32 LargestViewportIndex = INDEX_NONE;
int32 LargestViewportPixels = 0;
for (int32 ViewportIndex = 0; ViewportIndex < Viewports.Num(); ViewportIndex++)
{
FOpenGLViewport* Viewport = Viewports[ViewportIndex];
if (Viewport->GetSizeXY().X * Viewport->GetSizeXY().Y > LargestViewportPixels)
{
LargestViewportPixels = Viewport->GetSizeXY().X * Viewport->GetSizeXY().Y;
LargestViewportIndex = ViewportIndex;
}
}
if (LargestViewportIndex >= 0)
{
FOpenGLViewport* Viewport = Viewports[LargestViewportIndex];
RHISetRenderTarget(Viewport->GetBackBuffer(), FTextureRHIRef());
RHISetBlendState(TStaticBlendState<CW_RGBA, BO_Add, BF_One, BF_One>::GetRHI(), FLinearColor::Black);
RHISetDepthStencilState(TStaticDepthStencilState<false,CF_Always>::GetRHI(), 0);
RHISetRasterizerState(TStaticRasterizerState<FM_Solid,CM_None>::GetRHI());
TShaderMapRef<TOneColorVS<true> > VertexShader(GetGlobalShaderMap());
TShaderMapRef<FOpenGLRHILongGPUTaskPS> PixelShader(GetGlobalShaderMap());
SetGlobalBoundShaderState(LongGPUTaskBoundShaderState, GOpenGLVector4VertexDeclaration.VertexDeclarationRHI, *VertexShader, *PixelShader, 0);
// Draw a fullscreen quad
FVector4 Vertices[4];
Vertices[0].Set( -1.0f, 1.0f, 0, 1.0f );
Vertices[1].Set( 1.0f, 1.0f, 0, 1.0f );
Vertices[2].Set( -1.0f, -1.0f, 0, 1.0f );
Vertices[3].Set( 1.0f, -1.0f, 0, 1.0f );
RHIDrawPrimitiveUP(PT_TriangleStrip, 2, Vertices, sizeof(Vertices[0]) );
}
}
void FOpenGLDynamicRHI::InitializeStateResources()
{
SharedContextState.InitializeResources(FOpenGL::GetMaxCombinedTextureImageUnits(), OGL_MAX_COMPUTE_STAGE_UAV_UNITS);
RenderingContextState.InitializeResources(FOpenGL::GetMaxCombinedTextureImageUnits(), OGL_MAX_COMPUTE_STAGE_UAV_UNITS);
PendingState.InitializeResources(FOpenGL::GetMaxCombinedTextureImageUnits(), OGL_MAX_COMPUTE_STAGE_UAV_UNITS);
}
GLint FOpenGLBase::MaxTextureImageUnits = -1;
GLint FOpenGLBase::MaxCombinedTextureImageUnits = -1;
GLint FOpenGLBase::MaxVertexTextureImageUnits = -1;
GLint FOpenGLBase::MaxGeometryTextureImageUnits = -1;
GLint FOpenGLBase::MaxHullTextureImageUnits = -1;
GLint FOpenGLBase::MaxDomainTextureImageUnits = -1;
GLint FOpenGLBase::MaxVertexUniformComponents = -1;
GLint FOpenGLBase::MaxPixelUniformComponents = -1;
GLint FOpenGLBase::MaxGeometryUniformComponents = -1;
GLint FOpenGLBase::MaxHullUniformComponents = -1;
GLint FOpenGLBase::MaxDomainUniformComponents = -1;
bool FOpenGLBase::bSupportsASTC = false;
bool FOpenGLBase::bSupportsCopyImage = false;
bool FOpenGLBase::bSupportsSeamlessCubemap = false;
bool FOpenGLBase::bSupportsVolumeTextureRendering = false;
bool FOpenGLBase::bSupportsTextureFilterAnisotropic = false;
void FOpenGLBase::ProcessQueryGLInt()
{
GET_GL_INT(GL_MAX_TEXTURE_IMAGE_UNITS, 0, MaxTextureImageUnits);
GET_GL_INT(GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS, 0, MaxVertexTextureImageUnits);
GET_GL_INT(GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS, 0, MaxCombinedTextureImageUnits);
GET_GL_INT(GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS, 0, MaxHullTextureImageUnits);
GET_GL_INT(GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS, 0, MaxDomainTextureImageUnits);
}
void FOpenGLBase::ProcessExtensions( const FString& ExtensionsString )
{
ProcessQueryGLInt();
// For now, just allocate additional units if available and advertise no tessellation units for HW that can't handle more
if ( MaxCombinedTextureImageUnits < 48 )
{
// To work around AMD driver limitation of 32 GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS,
// Going to hard code this for now (16 units in PS, 8 units in VS, 8 units in GS).
// This is going to be a problem for tessellation.
MaxTextureImageUnits = MaxTextureImageUnits > 16 ? 16 : MaxTextureImageUnits;
MaxVertexTextureImageUnits = MaxVertexTextureImageUnits > 8 ? 8 : MaxVertexTextureImageUnits;
MaxGeometryTextureImageUnits = MaxGeometryTextureImageUnits > 8 ? 8 : MaxGeometryTextureImageUnits;
MaxHullTextureImageUnits = 0;
MaxDomainTextureImageUnits = 0;
MaxCombinedTextureImageUnits = MaxCombinedTextureImageUnits > 32 ? 32 : MaxCombinedTextureImageUnits;
}
else
{
// clamp things to the levels that the other path is going, but allow additional units for tessellation
MaxTextureImageUnits = MaxTextureImageUnits > 16 ? 16 : MaxTextureImageUnits;
MaxVertexTextureImageUnits = MaxVertexTextureImageUnits > 8 ? 8 : MaxVertexTextureImageUnits;
MaxGeometryTextureImageUnits = MaxGeometryTextureImageUnits > 8 ? 8 : MaxGeometryTextureImageUnits;
MaxHullTextureImageUnits = MaxHullTextureImageUnits > 8 ? 8 : MaxHullTextureImageUnits;
MaxDomainTextureImageUnits = MaxDomainTextureImageUnits > 8 ? 8 : MaxDomainTextureImageUnits;
MaxCombinedTextureImageUnits = MaxCombinedTextureImageUnits > 48 ? 48 : MaxCombinedTextureImageUnits;
}
// Check for support for advanced texture compression (desktop and mobile)
bSupportsASTC = ExtensionsString.Contains(TEXT("GL_KHR_texture_compression_astc_ldr"));
// check for copy image support
bSupportsCopyImage = ExtensionsString.Contains(TEXT("GL_ARB_copy_image"));
bSupportsSeamlessCubemap = ExtensionsString.Contains(TEXT("GL_ARB_seamless_cube_map"));
bSupportsTextureFilterAnisotropic = ExtensionsString.Contains(TEXT("GL_EXT_texture_filter_anisotropic"));
}
void InitDefaultGLContextState(void)
{
// Intel HD4000 under <= 10.8.4 requires GL_DITHER disabled or dithering will occur on any channel < 8bits.
// No other driver does this but we don't need GL_DITHER on anyway.
glDisable(GL_DITHER);
2014-04-23 17:24:22 -04:00
// Render targets with TexCreate_SRGB should do sRGB conversion like in D3D11
glEnable(GL_FRAMEBUFFER_SRGB);
}