Files
UnrealEngineUWP/Engine/Source/Runtime/OpenGLDrv/Private/OpenGLQuery.cpp
Marcus Wassmer cbfcbbb93b Merging //UE4/Dev-Main@4662404 to Dev-Rendering (//UE4/Dev-Rendering)
#rb none
Should be just copyright updates

[CL 4680440 by Marcus Wassmer in Dev-Rendering branch]
2019-01-03 19:16:26 -05:00

1138 lines
31 KiB
C++

// Copyright 1998-2019 Epic Games, Inc. All Rights Reserved.
/*=============================================================================
OpenGLQuery.cpp: OpenGL query RHI implementation.
=============================================================================*/
#include "CoreMinimal.h"
#include "Stats/Stats.h"
#include "OpenGLDrv.h"
#include "OpenGLDrvPrivate.h"
static int32 GOpenGLPollRenderQueryResult = 1;
static FAutoConsoleVariableRef CVarOpenGLPollRenderQueryResult(
TEXT("r.OpenGL.PollRenderQueryResult"),
GOpenGLPollRenderQueryResult,
TEXT("Whether to poll render query for result until it's ready, otherwise do a blocking call to get result.")
TEXT("0: Block, 1: Poll (default)"),
ECVF_Default
);
struct FQueryItem
{
FRenderQueryRHIParamRef Query;
int32 BeginSequence;
FQueryItem(FRenderQueryRHIParamRef InQueryRHI)
: Query(InQueryRHI)
{
FOpenGLRenderQuery* InQuery = FOpenGLDynamicRHI::ResourceCast(InQueryRHI);
BeginSequence = InQuery->TotalBegins.GetValue();
}
};
struct FGLQueryBatch
{
TArray<FQueryItem> BatchContents;
uint32 FrameNumberRenderThread;
bool bHasFlushedSinceLastWait;
FGLQueryBatch()
: FrameNumberRenderThread(0)
, bHasFlushedSinceLastWait(false)
{
}
};
struct FGLQueryBatcher
{
FGLQueryBatch* NewBatch;
TArray<FGLQueryBatch*> Batches;
uint32 NextFrameNumberRenderThread;
FGLQueryBatcher()
: NewBatch(nullptr)
, NextFrameNumberRenderThread(1)
{
}
void Add(FRenderQueryRHIParamRef Query)
{
if (NewBatch && NewBatch->FrameNumberRenderThread)
{
NewBatch->BatchContents.Add(FQueryItem(Query));
}
}
void Waited()
{
for (int32 Index = 0; Index < Batches.Num(); Index++)
{
FGLQueryBatch* Batch = Batches[Index];
Batch->bHasFlushedSinceLastWait = false;
}
}
void Flush(FOpenGLDynamicRHI& RHI, FRenderQueryRHIParamRef TargetQueryRHI)
{
QUICK_SCOPE_CYCLE_COUNTER(STAT_FGLQueryBatcher_FlushScan);
bool bFoundQuery = false;
for (int32 Index = 0; Index < Batches.Num() && !bFoundQuery; Index++)
{
FGLQueryBatch* Batch = Batches[Index];
if (Batch->bHasFlushedSinceLastWait)
{
break;
}
bool bAnyUnfinished = false;
for (int32 IndexInner = 0; IndexInner < Batch->BatchContents.Num(); IndexInner++)
{
FQueryItem& Item = Batch->BatchContents[IndexInner];
FRenderQueryRHIParamRef QueryRHI = Item.Query;
FOpenGLRenderQuery* Query = FOpenGLDynamicRHI::ResourceCast(QueryRHI);
if (TargetQueryRHI == QueryRHI)
{
bFoundQuery = true;
}
if (Item.BeginSequence < Query->TotalBegins.GetValue())
{
// stale entry, was never checked, but was reused
Batch->BatchContents.RemoveAtSwap(IndexInner--, 1, false);
continue;
}
RHI.GetRenderQueryResult_OnThisThread(Query, false);
if (Query->TotalResults.GetValue() == Query->TotalBegins.GetValue())
{
Batch->BatchContents.RemoveAtSwap(IndexInner--, 1, false);
}
else
{
bAnyUnfinished = true;
}
}
if (!bAnyUnfinished || Batch->BatchContents.Num() == 0)
{
delete Batch;
Batches.RemoveAt(Index--);
}
else
{
Batch->bHasFlushedSinceLastWait = true;
break;
}
}
}
// this just tries to readback queries until it finds one that is not ready
void SoftFlush(FOpenGLDynamicRHI& RHI, bool bResetHasFlushedSinceLastWait = false)
{
QUICK_SCOPE_CYCLE_COUNTER(STAT_FGLQueryBatcher_SoftFlushScan);
for (int32 Index = 0; Index < Batches.Num(); Index++)
{
FGLQueryBatch* Batch = Batches[Index];
if (bResetHasFlushedSinceLastWait)
{
Batch->bHasFlushedSinceLastWait = false; // we will try a full scan if we get around to initviews
}
if (Batch->FrameNumberRenderThread == NextFrameNumberRenderThread)
{
// do not scan queries issued this frame,
// on some Android devices this causes stalls in the driver (eg. S7 Adreno with Android 7)
break;
}
for (int32 IndexInner = 0; IndexInner < Batch->BatchContents.Num(); IndexInner++)
{
FQueryItem& Item = Batch->BatchContents[IndexInner];
FRenderQueryRHIParamRef QueryRHI = Item.Query;
FOpenGLRenderQuery* Query = FOpenGLDynamicRHI::ResourceCast(QueryRHI);
int32 Begins = Query->TotalBegins.GetValue();
if (Item.BeginSequence < Query->TotalBegins.GetValue())
{
// stale entry, was never checked, but was reused
Batch->BatchContents.RemoveAtSwap(IndexInner--, 1, false);
continue;
}
RHI.GetRenderQueryResult_OnThisThread(Query, false);
if (Query->TotalResults.GetValue() == Query->TotalBegins.GetValue())
{
Batch->BatchContents.RemoveAtSwap(IndexInner--, 1, false);
}
}
if (Batch->BatchContents.Num() == 0)
{
delete Batch;
Batches.RemoveAt(Index--);
}
else
{
break;
}
}
}
void PerFrameFlush()
{
NextFrameNumberRenderThread++;
for (int32 Index = 0; Index < Batches.Num(); Index++)
{
FGLQueryBatch* Batch = Batches[Index];
if (Batch->FrameNumberRenderThread <= NextFrameNumberRenderThread - 5)
{
delete Batch;
Batches.RemoveAt(Index--);
}
}
}
void StartNewBatch(FOpenGLDynamicRHI& RHI)
{
check(!NewBatch);
NewBatch = new FGLQueryBatch();
NewBatch->FrameNumberRenderThread = NextFrameNumberRenderThread;
}
void EndBatch(FOpenGLDynamicRHI& RHI)
{
QUICK_SCOPE_CYCLE_COUNTER(STAT_FGLQueryBatcher_EndBatch);
SoftFlush(RHI, true);
if (NewBatch)
{
Batches.Add(NewBatch);
NewBatch = nullptr;
}
}
} GBatcher;
void BeginFrame_QueryBatchCleanup()
{
GBatcher.PerFrameFlush();
}
void BeginOcclusionQueryBatch(uint32 NumOcclusionQueries)
{
if (IsRunningRHIInSeparateThread())
{
GBatcher.StartNewBatch(*(FOpenGLDynamicRHI*)GDynamicRHI);
}
}
void EndOcclusionQueryBatch()
{
if (IsRunningRHIInSeparateThread())
{
GBatcher.EndBatch(*(FOpenGLDynamicRHI*)GDynamicRHI);
}
}
void FOpenGLDynamicRHI::RHIPollOcclusionQueries()
{
if (IsRunningRHIInSeparateThread())
{
GBatcher.SoftFlush(*(FOpenGLDynamicRHI*)GDynamicRHI);
}
}
FRenderQueryRHIRef FOpenGLDynamicRHI::RHICreateRenderQuery(ERenderQueryType QueryType)
{
check(QueryType == RQT_Occlusion || QueryType == RQT_AbsoluteTime);
if(QueryType == RQT_AbsoluteTime && FOpenGL::SupportsTimestampQueries() == false)
{
return NULL;
}
return new FOpenGLRenderQuery(QueryType);
}
void FOpenGLDynamicRHI::RHIBeginRenderQuery(FRenderQueryRHIParamRef QueryRHI)
{
VERIFY_GL_SCOPE();
FOpenGLRenderQuery* Query = ResourceCast(QueryRHI);
if (Query)
{
BeginRenderQuery_OnThisThread(Query);
GBatcher.Add(QueryRHI);
}
}
void FOpenGLDynamicRHI::RHIEndRenderQuery(FRenderQueryRHIParamRef QueryRHI)
{
VERIFY_GL_SCOPE();
FOpenGLRenderQuery* Query = ResourceCast(QueryRHI);
if (Query)
{
EndRenderQuery_OnThisThread(Query);
}
}
void FOpenGLDynamicRHI::BeginRenderQuery_OnThisThread(FOpenGLRenderQuery* Query)
{
VERIFY_GL_SCOPE();
int32 NewVal = Query->TotalBegins.Increment();
Query->TotalResults.Set(NewVal - 1);
Query->Result = 0;
Query->bResultWasSuccess = false;
if (Query->QueryType == RQT_Occlusion)
{
check(PendingState.RunningOcclusionQuery == 0);
if (!Query->bInvalidResource && !PlatformContextIsCurrent(Query->ResourceContext))
{
PlatformReleaseRenderQuery(Query->Resource, Query->ResourceContext);
Query->bInvalidResource = true;
}
if (Query->bInvalidResource)
{
PlatformGetNewRenderQuery(&Query->Resource, &Query->ResourceContext);
Query->bInvalidResource = false;
}
GLenum QueryType = FOpenGL::SupportsExactOcclusionQueries() ? UGL_SAMPLES_PASSED : UGL_ANY_SAMPLES_PASSED;
FOpenGL::BeginQuery(QueryType, Query->Resource);
PendingState.RunningOcclusionQuery = Query->Resource;
}
else
{
// not supported/needed for RQT_AbsoluteTime
check(0);
}
}
void FOpenGLDynamicRHI::EndRenderQuery_OnThisThread(FOpenGLRenderQuery* Query)
{
VERIFY_GL_SCOPE();
if (Query)
{
if (Query->QueryType == RQT_Occlusion)
{
if (!Query->bInvalidResource && !PlatformContextIsCurrent(Query->ResourceContext))
{
PlatformReleaseRenderQuery(Query->Resource, Query->ResourceContext);
Query->Resource = 0;
Query->bInvalidResource = true;
}
if (!Query->bInvalidResource)
{
check(PendingState.RunningOcclusionQuery == Query->Resource);
PendingState.RunningOcclusionQuery = 0;
GLenum QueryType = FOpenGL::SupportsExactOcclusionQueries() ? UGL_SAMPLES_PASSED : UGL_ANY_SAMPLES_PASSED;
FOpenGL::EndQuery(QueryType);
}
}
else if (Query->QueryType == RQT_AbsoluteTime)
{
int32 NewVal = Query->TotalBegins.Increment();
Query->TotalResults.Set(NewVal - 1);
Query->Result = 0;
Query->bResultWasSuccess = false;
if (!Query->bInvalidResource && !PlatformContextIsCurrent(Query->ResourceContext))
{
PlatformReleaseRenderQuery(Query->Resource, Query->ResourceContext);
Query->Resource = 0;
Query->bInvalidResource = true;
}
// query can be silently invalidated in GetRenderQueryResult
if (Query->bInvalidResource)
{
PlatformGetNewRenderQuery(&Query->Resource, &Query->ResourceContext);
Query->bInvalidResource = false;
}
FOpenGL::QueryTimestampCounter(Query->Resource);
}
}
}
static void GetRenderQueryResult(FOpenGLRenderQuery* Query)
{
VERIFY_GL_SCOPE();
if (Query->QueryType == RQT_AbsoluteTime)
{
FOpenGL::GetQueryObject(Query->Resource, FOpenGL::QM_Result, &Query->Result);
}
else
{
GLuint Result32 = 0;
FOpenGL::GetQueryObject(Query->Resource, FOpenGL::QM_Result, &Result32);
Query->Result = Result32 * (FOpenGL::SupportsExactOcclusionQueries() ? 1 : 500000); // half a mega pixel display
}
Query->bResultWasSuccess = true;
Query->TotalResults.Increment();
}
void FOpenGLDynamicRHI::GetRenderQueryResult_OnThisThread(FOpenGLRenderQuery* Query, bool bWait)
{
if (Query->TotalResults.GetValue() == Query->TotalBegins.GetValue())
{
return;
}
check(Query->TotalResults.GetValue() + 1 == Query->TotalBegins.GetValue());
VERIFY_GL_SCOPE();
if (!Query->bInvalidResource && !PlatformContextIsCurrent(Query->ResourceContext))
{
PlatformReleaseRenderQuery(Query->Resource, Query->ResourceContext);
Query->Resource = 0;
Query->bInvalidResource = true;
}
// Check if the query is valid first
if (Query->bInvalidResource)
{
Query->Result = 0;
Query->TotalResults.Increment();
}
else
{
// Check if the query is finished
GLuint Result = 0;
FOpenGL::GetQueryObject(Query->Resource, FOpenGL::QM_ResultAvailable, &Result);
if (Result == GL_TRUE)
{
GetRenderQueryResult(Query);
}
else if (bWait) // Isn't the query finished yet, and can we wait for it?
{
SCOPE_CYCLE_COUNTER(STAT_RenderQueryResultTime);
uint32 IdleStart = FPlatformTime::Cycles();
GBatcher.Waited();
if (GOpenGLPollRenderQueryResult == 0)
{
// block in the driver waiting for result
GetRenderQueryResult(Query);
}
else
{
// poll result until it's ready
double StartTime = FPlatformTime::Seconds();
do
{
FPlatformProcess::Sleep(0); // yield to other threads - some of them may be OpenGL driver's and we'd be starving them
if (Query->bInvalidResource)
{
// Query got invalidated while we were sleeping.
// Bail out, no sense to wait and generate OpenGL errors,
// we're in a new OpenGL context that knows nothing about us.
Query->Result = 1000; // safe value
Result = GL_FALSE;
bWait = false;
Query->bResultWasSuccess = true;
break;
}
FOpenGL::GetQueryObject(Query->Resource, FOpenGL::QM_ResultAvailable, &Result);
// timer queries are used for Benchmarks which can stall a bit more
double TimeoutValue = (Query->QueryType == RQT_AbsoluteTime) ? 2.0 : 0.5;
if ((FPlatformTime::Seconds() - StartTime) > TimeoutValue)
{
UE_LOG(LogRHI, Log, TEXT("Timed out while waiting for GPU to catch up. (%.1f s)"), TimeoutValue);
break;
}
} while (Result == GL_FALSE);
if (Result == GL_TRUE)
{
GetRenderQueryResult(Query);
}
else
{
Query->Result = 0;
Query->TotalResults.Increment();
}
}
uint32 ThisCycles = FPlatformTime::Cycles() - IdleStart;
if (IsInRHIThread())
{
GWorkingRHIThreadStallTime += ThisCycles;
}
else
{
GRenderThreadIdle[ERenderThreadIdleTypes::WaitingForGPUQuery] += ThisCycles;
GRenderThreadNumIdle[ERenderThreadIdleTypes::WaitingForGPUQuery]++;
}
}
}
}
class FPollQueriesRHIThreadTask
{
FOpenGLRenderQuery* Query;
FOpenGLDynamicRHI* RHI;
bool bWait;
public:
FPollQueriesRHIThreadTask(FOpenGLRenderQuery* InQuery, FOpenGLDynamicRHI* InRHI, bool bInWait)
: Query(InQuery)
, RHI(InRHI)
, bWait(bInWait)
{
}
FORCEINLINE TStatId GetStatId() const
{
RETURN_QUICK_DECLARE_CYCLE_STAT(FPollQueriesRHIThreadTask, STATGROUP_TaskGraphTasks);
}
ENamedThreads::Type GetDesiredThread()
{
return ENamedThreads::SetTaskPriority(ENamedThreads::RHIThread, ENamedThreads::HighTaskPriority);
}
static ESubsequentsMode::Type GetSubsequentsMode() { return ESubsequentsMode::TrackSubsequents; }
void DoTask(ENamedThreads::Type CurrentThread, const FGraphEventRef& MyCompletionGraphEvent)
{
check(IsRunningRHIInDedicatedThread() && IsInRHIThread()); // this should never be used on a platform that doesn't support the RHI thread, and it can't quite work when running the RHI stuff on task threads
if (bWait)
{
RHI->GetRenderQueryResult_OnThisThread(Query, true); // we must get this one if bWait is true;
RHI->RHIPollOcclusionQueries(); // finish any other ones, but don't wait
}
else
{
RHI->GetRenderQueryResult_OnThisThread(Query, false);
if (Query->TotalResults.GetValue() == Query->TotalBegins.GetValue())
{
RHI->RHIPollOcclusionQueries(); // If the target query was ready, then go ahead and scan to see what else is ready.
}
}
}
};
bool FOpenGLDynamicRHI::RHIGetRenderQueryResult(FRenderQueryRHIParamRef QueryRHI,uint64& OutResult,bool bWait)
{
check(IsInRenderingThread() || IsInRHIThread());
FOpenGLRenderQuery* Query = ResourceCast(QueryRHI);
if (!Query)
{
// If timer queries are unsupported, just make sure that OutResult does not contain any random values.
OutResult = 0;
return false;
}
FRHICommandListImmediate& RHICmdList = FRHICommandListExecutor::GetImmediateCommandList();
const bool bCanRunOnThisThread = RHICmdList.Bypass() || (!IsRunningRHIInSeparateThread() && IsInRenderingThread()) || IsInRHIThread();
if (Query->TotalResults.GetValue() != Query->TotalBegins.GetValue())
{
if (bCanRunOnThisThread)
{
GetRenderQueryResult_OnThisThread(Query, bWait);
}
else
{
if (bWait)
{
QUICK_SCOPE_CYCLE_COUNTER(STAT_WaitForRHIThreadOcclusionReadback);
if (IsRunningRHIInDedicatedThread())
{
// send a command that will wait, so if the RHIT runs out of work, it just blocks and waits for the GPU
ALLOC_COMMAND_CL(RHICmdList, FRHICommandGLCommand)([=]() {GetRenderQueryResult_OnThisThread(ResourceCast(QueryRHI), true); });
FGraphEventRef Done = RHICmdList.RHIThreadFence(false);
ALLOC_COMMAND_CL(RHICmdList, FRHICommandGLCommand)([=]() {GBatcher.Flush(*this, QueryRHI); });
RHICmdList.ImmediateFlush(EImmediateFlushType::DispatchToRHIThread);
while (!Done->IsComplete())
{
FGraphEventRef RHITask = TGraphTask<FPollQueriesRHIThreadTask>::CreateTask().ConstructAndDispatchWhenReady(ResourceCast(QueryRHI), this, false);
FTaskGraphInterface::Get().WaitUntilTaskCompletes(RHITask);
if (Query->TotalResults.GetValue() == Query->TotalBegins.GetValue())
{
break;
}
// We want to keep the RHIT working, but we want keep checking between command lists so that we can get the results as soon as the GPU has them
// this isn't really a spin, the ping-pong between threads will not consume CPU (usually a bad thing, not here).
}
}
else
{
ALLOC_COMMAND_CL(RHICmdList, FRHICommandGLCommand)([=]() {GetRenderQueryResult_OnThisThread(ResourceCast(QueryRHI), true); });
FGraphEventRef Done = RHICmdList.RHIThreadFence(false);
ALLOC_COMMAND_CL(RHICmdList, FRHICommandGLCommand)([=]() {GBatcher.Flush(*this, QueryRHI); });
RHICmdList.ImmediateFlush(EImmediateFlushType::DispatchToRHIThread);
FRHICommandListExecutor::WaitOnRHIThreadFence(Done);
}
check(Query->TotalResults.GetValue() == Query->TotalBegins.GetValue());
}
else
{
ALLOC_COMMAND_CL(RHICmdList, FRHICommandGLCommand)([=]() {GetRenderQueryResult_OnThisThread(ResourceCast(QueryRHI), false); GBatcher.Flush(*this, QueryRHI); });
}
}
}
if (Query->TotalResults.GetValue() == Query->TotalBegins.GetValue() && Query->bResultWasSuccess)
{
if (Query->QueryType == RQT_AbsoluteTime)
{
// GetTimingFrequency is the number of ticks per second
uint64 Div = FMath::Max(1llu, FOpenGLBufferedGPUTiming::GetTimingFrequency() / (1000 * 1000));
// convert from GPU specific timestamp to micro sec (1 / 1 000 000 s) which seems a reasonable resolution
OutResult = Query->Result / Div;
}
else
{
OutResult = Query->Result;
}
return true;
}
OutResult = 0;
return false;
}
extern void OnQueryCreation( FOpenGLRenderQuery* Query );
extern void OnQueryDeletion( FOpenGLRenderQuery* Query );
FOpenGLRenderQuery::FOpenGLRenderQuery(ERenderQueryType InQueryType)
: Result(0)
, bInvalidResource(true)
, QueryType(InQueryType)
{
FRHICommandListImmediate& RHICmdList = FRHICommandListExecutor::GetImmediateCommandList();
const bool bCanRunOnThisThread = RHICmdList.Bypass() || (!IsRunningRHIInSeparateThread() && IsInRenderingThread()) || IsInRHIThread();
if (bCanRunOnThisThread)
{
AcquireResource();
}
else
{
CreationFence.Reset();
ALLOC_COMMAND_CL(RHICmdList, FRHICommandGLCommand)([=]() {AcquireResource(); CreationFence.WriteAssertFence(); });
CreationFence.SetRHIThreadFence();
}
}
FOpenGLRenderQuery::~FOpenGLRenderQuery()
{
OnQueryDeletion( this );
FRHICommandListImmediate& RHICmdList = FRHICommandListExecutor::GetImmediateCommandList();
const bool bCanRunOnThisThread = RHICmdList.Bypass() || (!IsRunningRHIInSeparateThread() && IsInRenderingThread()) || IsInRHIThread();
if (Resource && !bInvalidResource)
{
bInvalidResource = true;
if (bCanRunOnThisThread)
{
ReleaseResource(Resource, ResourceContext);
}
else
{
CreationFence.WaitFence();
ALLOC_COMMAND_CL(RHICmdList, FRHICommandGLCommand)([Resource = Resource, ResourceContext = ResourceContext]() {VERIFY_GL_SCOPE(); ReleaseResource(Resource, ResourceContext); });
}
}
}
void FOpenGLRenderQuery::AcquireResource()
{
VERIFY_GL_SCOPE();
bInvalidResource = false;
PlatformGetNewRenderQuery(&Resource, &ResourceContext);
OnQueryCreation(this);
}
void FOpenGLRenderQuery::ReleaseResource(GLuint Resource, uint64 ResourceContext)
{
VERIFY_GL_SCOPE();
check(Resource);
PlatformReleaseRenderQuery(Resource, ResourceContext);
}
void FOpenGLEventQuery::IssueEvent()
{
VERIFY_GL_SCOPE();
if(Sync)
{
FOpenGL::DeleteSync(Sync);
Sync = UGLsync();
}
Sync = FOpenGL::FenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
#ifndef __EMSCRIPTEN__ // https://answers.unrealengine.com/questions/409649/html5-opengl-backend-doesnt-need-to-flush-gl-comma.html
FOpenGL::Flush();
#endif
checkSlow(FOpenGL::IsSync(Sync));
}
void FOpenGLEventQuery::WaitForCompletion()
{
VERIFY_GL_SCOPE();
QUICK_SCOPE_CYCLE_COUNTER(STAT_FOpenGLEventQuery_WaitForCompletion);
checkSlow(FOpenGL::IsSync(Sync));
// Wait up to 1/2 second for sync execution
FOpenGL::EFenceResult Status = FOpenGL::ClientWaitSync( Sync, 0, 500*1000*1000);
if ( Status != FOpenGL::FR_AlreadySignaled && Status != FOpenGL::FR_ConditionSatisfied )
{
//failure of some type, determine type and send diagnostic message
if ( Status == FOpenGL::FR_TimeoutExpired )
{
UE_LOG(LogRHI, Log, TEXT("Timed out while waiting for GPU to catch up. (500 ms)"));
}
else if ( Status == FOpenGL::FR_WaitFailed )
{
UE_LOG(LogRHI, Log, TEXT("Wait on GPU failed in driver"));
}
else
{
UE_LOG(LogRHI, Log, TEXT("Unknown error while waiting on GPU"));
check(0);
}
}
}
void FOpenGLEventQuery::InitDynamicRHI()
{
FRHICommandListImmediate& RHICmdList = FRHICommandListExecutor::GetImmediateCommandList();
RHITHREAD_GLCOMMAND_PROLOGUE();
VERIFY_GL_SCOPE();
// Initialize the query by issuing an initial event.
IssueEvent();
check(FOpenGL::IsSync(Sync));
RHITHREAD_GLCOMMAND_EPILOGUE();
}
void FOpenGLEventQuery::ReleaseDynamicRHI()
{
FRHICommandListImmediate& RHICmdList = FRHICommandListExecutor::GetImmediateCommandList();
RHITHREAD_GLCOMMAND_PROLOGUE();
VERIFY_GL_SCOPE();
FOpenGL::DeleteSync(Sync);
RHITHREAD_GLCOMMAND_EPILOGUE();
}
/*=============================================================================
* class FOpenGLBufferedGPUTiming
*=============================================================================*/
/**
* Constructor.
*
* @param InOpenGLRHI RHI interface
* @param InBufferSize Number of buffered measurements
*/
FOpenGLBufferedGPUTiming::FOpenGLBufferedGPUTiming( FOpenGLDynamicRHI* InOpenGLRHI, int32 InBufferSize )
: OpenGLRHI( InOpenGLRHI )
, BufferSize( InBufferSize )
, CurrentTimestamp( -1 )
, NumIssuedTimestamps( 0 )
, bIsTiming( false )
{
}
/**
* Initializes the static variables, if necessary.
*/
void FOpenGLBufferedGPUTiming::PlatformStaticInitialize(void* UserData)
{
// Are the static variables initialized?
if ( !GAreGlobalsInitialized )
{
GIsSupported = FOpenGL::SupportsTimestampQueries();
GTimingFrequency = 1000 * 1000 * 1000;
GAreGlobalsInitialized = true;
}
}
/**
* Initializes all OpenGL resources and if necessary, the static variables.
*/
static TArray<FOpenGLRenderQuery*> TimerQueryPool;
static FOpenGLRenderQuery* GetTimeQuery()
{
if (TimerQueryPool.Num())
{
return TimerQueryPool.Pop();
}
return new FOpenGLRenderQuery(RQT_AbsoluteTime);
}
void FOpenGLBufferedGPUTiming::InitResources()
{
VERIFY_GL_SCOPE();
StaticInitialize(OpenGLRHI, PlatformStaticInitialize);
CurrentTimestamp = 0;
NumIssuedTimestamps = 0;
bIsTiming = false;
GIsSupported = FOpenGL::SupportsTimestampQueries();
if ( GIsSupported )
{
StartTimestamps.Reserve(BufferSize);
EndTimestamps.Reserve(BufferSize);
for(int32 BufferIndex = 0; BufferIndex < BufferSize; ++BufferIndex)
{
StartTimestamps.Add(GetTimeQuery());
EndTimestamps.Add(GetTimeQuery());
}
}
}
/**
* Releases all OpenGL resources.
*/
void FOpenGLBufferedGPUTiming::ReleaseResources()
{
VERIFY_GL_SCOPE();
for(FOpenGLRenderQuery* Query : StartTimestamps)
{
TimerQueryPool.Add(Query);
}
for(FOpenGLRenderQuery* Query : EndTimestamps)
{
TimerQueryPool.Add(Query);
}
StartTimestamps.Reset();
EndTimestamps.Reset();
}
/**
* Start a GPU timing measurement.
*/
void FOpenGLBufferedGPUTiming::StartTiming()
{
VERIFY_GL_SCOPE();
// Issue a timestamp query for the 'start' time.
if ( GIsSupported && !bIsTiming )
{
int32 NewTimestampIndex = (CurrentTimestamp + 1) % BufferSize;
FOpenGLRenderQuery* TimerQuery = StartTimestamps[NewTimestampIndex];
{
if (!TimerQuery->bInvalidResource && !PlatformContextIsCurrent(TimerQuery->ResourceContext))
{
PlatformReleaseRenderQuery(TimerQuery->Resource, TimerQuery->ResourceContext);
TimerQuery->bInvalidResource = true;
}
if (TimerQuery->bInvalidResource)
{
PlatformGetNewRenderQuery(&TimerQuery->Resource, &TimerQuery->ResourceContext);
TimerQuery->bInvalidResource = false;
}
}
FOpenGL::QueryTimestampCounter(StartTimestamps[NewTimestampIndex]->Resource);
CurrentTimestamp = NewTimestampIndex;
bIsTiming = true;
}
}
/**
* End a GPU timing measurement.
* The timing for this particular measurement will be resolved at a later time by the GPU.
*/
void FOpenGLBufferedGPUTiming::EndTiming()
{
VERIFY_GL_SCOPE();
// Issue a timestamp query for the 'end' time.
if ( GIsSupported && bIsTiming )
{
checkSlow( CurrentTimestamp >= 0 && CurrentTimestamp < BufferSize );
FOpenGLRenderQuery* TimerQuery = EndTimestamps[CurrentTimestamp];
{
if (!TimerQuery->bInvalidResource && !PlatformContextIsCurrent(TimerQuery->ResourceContext))
{
PlatformReleaseRenderQuery(TimerQuery->Resource, TimerQuery->ResourceContext);
TimerQuery->bInvalidResource = true;
}
if (TimerQuery->bInvalidResource && PlatformOpenGLContextValid())
{
PlatformGetNewRenderQuery(&TimerQuery->Resource, &TimerQuery->ResourceContext);
TimerQuery->bInvalidResource = false;
}
}
FOpenGL::QueryTimestampCounter(EndTimestamps[CurrentTimestamp]->Resource);
NumIssuedTimestamps = FMath::Min<int32>(NumIssuedTimestamps + 1, BufferSize);
bIsTiming = false;
}
}
/**
* Retrieves the most recently resolved timing measurement.
* The unit is the same as for FPlatformTime::Cycles(). Returns 0 if there are no resolved measurements.
*
* @return Value of the most recently resolved timing, or 0 if no measurements have been resolved by the GPU yet.
*/
uint64 FOpenGLBufferedGPUTiming::GetTiming(bool bGetCurrentResultsAndBlock)
{
VERIFY_GL_SCOPE();
if ( GIsSupported )
{
checkSlow( CurrentTimestamp >= 0 && CurrentTimestamp < BufferSize );
GLuint64 StartTime, EndTime;
int32 TimestampIndex = CurrentTimestamp;
{
FOpenGLRenderQuery* EndStamp = EndTimestamps[TimestampIndex];
if (!EndStamp->bInvalidResource && !PlatformContextIsCurrent(EndStamp->ResourceContext))
{
PlatformReleaseRenderQuery(EndStamp->Resource, EndStamp->ResourceContext);
EndStamp->bInvalidResource = true;
}
FOpenGLRenderQuery* StartStamp = StartTimestamps[TimestampIndex];
if (!StartStamp->bInvalidResource && !PlatformContextIsCurrent(StartStamp->ResourceContext))
{
PlatformReleaseRenderQuery(StartStamp->Resource, StartStamp->ResourceContext);
StartStamp->bInvalidResource = true;
}
if(StartStamp->bInvalidResource || EndStamp->bInvalidResource)
{
UE_LOG(LogRHI, Log, TEXT("timing invalid, since the stamp queries have invalid resources"));
return 0.0f;
}
}
if (!bGetCurrentResultsAndBlock)
{
// Quickly check the most recent measurements to see if any of them has been resolved. Do not flush these queries.
for ( int32 IssueIndex = 1; IssueIndex < NumIssuedTimestamps; ++IssueIndex )
{
GLuint EndAvailable = GL_FALSE;
FOpenGL::GetQueryObject(EndTimestamps[TimestampIndex]->Resource, FOpenGL::QM_ResultAvailable, &EndAvailable);
if ( EndAvailable == GL_TRUE )
{
GLuint StartAvailable = GL_FALSE;
FOpenGL::GetQueryObject(StartTimestamps[TimestampIndex]->Resource, FOpenGL::QM_ResultAvailable, &StartAvailable);
if(StartAvailable == GL_TRUE)
{
FOpenGL::GetQueryObject(EndTimestamps[TimestampIndex]->Resource, FOpenGL::QM_Result, &EndTime);
FOpenGL::GetQueryObject(StartTimestamps[TimestampIndex]->Resource, FOpenGL::QM_Result, &StartTime);
if (EndTime > StartTime)
{
return EndTime - StartTime;
}
}
}
TimestampIndex = (TimestampIndex + BufferSize - 1) % BufferSize;
}
}
if ( NumIssuedTimestamps > 0 || bGetCurrentResultsAndBlock )
{
// None of the (NumIssuedTimestamps - 1) measurements were ready yet,
// so check the oldest measurement more thoroughly.
// This really only happens if occlusion and frame sync event queries are disabled, otherwise those will block until the GPU catches up to 1 frame behind
const bool bBlocking = ( NumIssuedTimestamps == BufferSize ) || bGetCurrentResultsAndBlock;
uint32 IdleStart = FPlatformTime::Cycles();
double StartTimeoutTime = FPlatformTime::Seconds();
GLuint EndAvailable = GL_FALSE;
SCOPE_CYCLE_COUNTER( STAT_RenderQueryResultTime );
// If we are blocking, retry until the GPU processes the time stamp command
do
{
FOpenGL::GetQueryObject(EndTimestamps[TimestampIndex]->Resource, FOpenGL::QM_ResultAvailable, &EndAvailable);
if ((FPlatformTime::Seconds() - StartTimeoutTime) > 0.5)
{
UE_LOG(LogRHI, Log, TEXT("Timed out while waiting for GPU to catch up. (500 ms) EndTimeStamp"));
return 0;
}
} while ( EndAvailable == GL_FALSE && bBlocking );
GRenderThreadIdle[ERenderThreadIdleTypes::WaitingForGPUQuery] += FPlatformTime::Cycles() - IdleStart;
GRenderThreadNumIdle[ERenderThreadIdleTypes::WaitingForGPUQuery]++;
if ( EndAvailable == GL_TRUE )
{
IdleStart = FPlatformTime::Cycles();
StartTimeoutTime = FPlatformTime::Seconds();
GLuint StartAvailable = GL_FALSE;
do
{
FOpenGL::GetQueryObject(StartTimestamps[TimestampIndex]->Resource, FOpenGL::QM_ResultAvailable, &StartAvailable);
if ((FPlatformTime::Seconds() - StartTimeoutTime) > 0.5)
{
UE_LOG(LogRHI, Log, TEXT("Timed out while waiting for GPU to catch up. (500 ms) StartTimeStamp"));
return 0;
}
} while ( StartAvailable == GL_FALSE && bBlocking );
GRenderThreadIdle[ERenderThreadIdleTypes::WaitingForGPUQuery] += FPlatformTime::Cycles() - IdleStart;
if(StartAvailable == GL_TRUE)
{
FOpenGL::GetQueryObject(EndTimestamps[TimestampIndex]->Resource, FOpenGL::QM_Result, &EndTime);
FOpenGL::GetQueryObject(StartTimestamps[TimestampIndex]->Resource, FOpenGL::QM_Result, &StartTime);
if (EndTime > StartTime)
{
return EndTime - StartTime;
}
}
}
}
}
return 0;
}
FOpenGLDisjointTimeStampQuery::FOpenGLDisjointTimeStampQuery(class FOpenGLDynamicRHI* InOpenGLRHI)
: bIsResultValid(false)
, DisjointQuery(0)
, Context(0)
, OpenGLRHI(InOpenGLRHI)
{
}
void FOpenGLDisjointTimeStampQuery::StartTracking()
{
VERIFY_GL_SCOPE();
if (IsSupported())
{
if (!PlatformContextIsCurrent(Context))
{
PlatformReleaseRenderQuery(DisjointQuery, Context);
PlatformGetNewRenderQuery(&DisjointQuery, &Context);
}
// Dummy query to reset the driver's internal disjoint status
FOpenGL::TimerQueryDisjoint();
FOpenGL::BeginQuery(UGL_TIME_ELAPSED, DisjointQuery);
}
}
void FOpenGLDisjointTimeStampQuery::EndTracking()
{
VERIFY_GL_SCOPE();
if(IsSupported())
{
FOpenGL::EndQuery( UGL_TIME_ELAPSED );
// Check if the GPU changed clock frequency since the last time GL_GPU_DISJOINT_EXT was checked.
// If so, any timer query will be undefined.
bIsResultValid = !FOpenGL::TimerQueryDisjoint();
}
}
bool FOpenGLDisjointTimeStampQuery::IsResultValid()
{
checkSlow(IsSupported());
return bIsResultValid;
}
bool FOpenGLDisjointTimeStampQuery::GetResult( uint64* OutResult/*=NULL*/ )
{
VERIFY_GL_SCOPE();
if (IsSupported())
{
GLuint Result = 0;
FOpenGL::GetQueryObject(DisjointQuery, FOpenGL::QM_ResultAvailable, &Result);
const double StartTime = FPlatformTime::Seconds();
while (Result == GL_FALSE && (FPlatformTime::Seconds() - StartTime) < 0.5)
{
FPlatformProcess::Sleep(0.005f);
FOpenGL::GetQueryObject(DisjointQuery, FOpenGL::QM_ResultAvailable, &Result);
}
// Presently just discarding the result, because timing is handled by timestamps inside
if (Result != GL_FALSE)
{
GLuint64 ElapsedTime = 0;
FOpenGL::GetQueryObject(DisjointQuery, FOpenGL::QM_Result, &ElapsedTime);
if (OutResult)
{
*OutResult = ElapsedTime;
}
}
bIsResultValid = Result != GL_FALSE;
}
return bIsResultValid;
}
void FOpenGLDisjointTimeStampQuery::InitResources()
{
VERIFY_GL_SCOPE();
if ( IsSupported() )
{
PlatformGetNewRenderQuery(&DisjointQuery, &Context);
}
}
void FOpenGLDisjointTimeStampQuery::ReleaseResources()
{
VERIFY_GL_SCOPE();
if ( IsSupported() )
{
PlatformReleaseRenderQuery(DisjointQuery, Context);
}
}