Editgrate changes from Dev-Rendering:

- Integrate new NvAftermath version which supports GPU Crash minidumps now
- Refactor GPU crash handling on D3D12 to unify multiple crash paths (use exception handling when GPU minidump is created to make sure it properly saved)
- Cleanup error reporting on GPU crash
- Make sure only one thread will log and show error messages

#rb Emil.Person

#ROBOMERGE-SOURCE: CL 12508032 via CL 12508036 via CL 12508040
#ROBOMERGE-BOT: RELEASE (Release-Engine-Staging -> Main) (v673-12478461)

[CL 12508050 by kenzo terelst in Main branch]
This commit is contained in:
kenzo terelst
2020-04-01 12:46:32 -04:00
parent f56e173c78
commit 6a078547a2
26 changed files with 3050 additions and 334 deletions
@@ -593,7 +593,7 @@ FD3DGPUProfiler::FD3DGPUProfiler(class FD3D11DynamicRHI* InD3DRHI) :
void FD3DGPUProfiler::PushEvent(const TCHAR* Name, FColor Color)
{
#if NV_AFTERMATH
if(GDX11NVAfterMathEnabled && bTrackingGPUCrashData)
if(GDX11NVAfterMathEnabled && bTrackingGPUCrashData && GDX11NVAfterMathMarkers)
{
uint32 CRC = 0;
if (GPUCrashDataDepth < 0 || PushPopStack.Num() < GPUCrashDataDepth)
@@ -633,7 +633,7 @@ void FD3DGPUProfiler::PushEvent(const TCHAR* Name, FColor Color)
void FD3DGPUProfiler::PopEvent()
{
#if NV_AFTERMATH
if (GDX11NVAfterMathEnabled && bTrackingGPUCrashData)
if (GDX11NVAfterMathEnabled && bTrackingGPUCrashData && GDX11NVAfterMathMarkers)
{
PushPopStack.Pop(false);
}
@@ -39,8 +39,10 @@ DECLARE_LOG_CATEGORY_EXTERN(LogD3D11RHI, Log, All);
#if NV_AFTERMATH
#define GFSDK_Aftermath_WITH_DX11 1
#include "GFSDK_Aftermath.h"
#include "GFSDK_Aftermath_GpuCrashdump.h"
#undef GFSDK_Aftermath_WITH_DX11
extern bool GDX11NVAfterMathEnabled;
extern bool GDX11NVAfterMathMarkers;
#endif
#if INTEL_METRICSDISCOVERY
@@ -8,6 +8,7 @@
#include "Misc/CommandLine.h"
#include "Misc/EngineVersion.h"
#include "Windows/AllowWindowsPlatformTypes.h"
#include "Windows/WindowsPlatformCrashContext.h"
#include <delayimp.h>
#if !PLATFORM_HOLOLENS
#include "nvapi.h"
@@ -24,10 +25,12 @@ THIRD_PARTY_INCLUDES_START
#include "dxgi1_6.h"
THIRD_PARTY_INCLUDES_END
#include "RHIValidation.h"
#include "HAL/ExceptionHandling.h"
#if NV_AFTERMATH
bool GDX11NVAfterMathEnabled = false;
bool GNVAftermathModuleLoaded = false;
bool GDX11NVAfterMathMarkers = false;
#endif
#if INTEL_METRICSDISCOVERY
@@ -1159,9 +1162,25 @@ void FD3D11DynamicRHI::StartNVAftermath()
if (bShouldStart)
{
static IConsoleVariable* MarkersCVar = IConsoleManager::Get().FindConsoleVariable(TEXT("r.GPUCrashDebugging.Aftermath.Markers"));
static IConsoleVariable* CallstackCVar = IConsoleManager::Get().FindConsoleVariable(TEXT("r.GPUCrashDebugging.Aftermath.Callstack"));
static IConsoleVariable* ResourcesCVar = IConsoleManager::Get().FindConsoleVariable(TEXT("r.GPUCrashDebugging.Aftermath.ResourceTracking"));
static IConsoleVariable* TrackAllCVar = IConsoleManager::Get().FindConsoleVariable(TEXT("r.GPUCrashDebugging.Aftermath.TrackAll"));
const bool bEnableMarkers = FParse::Param(FCommandLine::Get(), TEXT("aftermathmarkers")) || (MarkersCVar && MarkersCVar->GetInt());
const bool bEnableCallstack = FParse::Param(FCommandLine::Get(), TEXT("aftermathcallstack")) || (CallstackCVar && CallstackCVar->GetInt());
const bool bEnableResources = FParse::Param(FCommandLine::Get(), TEXT("aftermathresources")) || (ResourcesCVar && ResourcesCVar->GetInt());
const bool bEnableAll = FParse::Param(FCommandLine::Get(), TEXT("aftermathall")) || (TrackAllCVar && TrackAllCVar->GetInt());
uint32 Flags = GFSDK_Aftermath_FeatureFlags_Minimum;
Flags |= bEnableMarkers ? GFSDK_Aftermath_FeatureFlags_EnableMarkers : 0;
Flags |= bEnableCallstack ? GFSDK_Aftermath_FeatureFlags_CallStackCapturing : 0;
Flags |= bEnableResources ? GFSDK_Aftermath_FeatureFlags_EnableResourceTracking : 0;
Flags |= bEnableAll ? GFSDK_Aftermath_FeatureFlags_Maximum : 0;
GFSDK_Aftermath_Result Result = GFSDK_Aftermath_DX11_Initialize(
GFSDK_Aftermath_Version_API, GFSDK_Aftermath_FeatureFlags_Maximum,
Direct3DDevice);
GFSDK_Aftermath_Version_API, (GFSDK_Aftermath_FeatureFlags)Flags, Direct3DDevice);
if (GFSDK_Aftermath_SUCCEED(Result)) //-V547 Expression is always true -- confirmed false positive, fix coming in future PVS version (v6.24)
{
@@ -1170,7 +1189,6 @@ void FD3D11DynamicRHI::StartNVAftermath()
if (GFSDK_Aftermath_SUCCEED(Result)) //-V547 Expression is always true -- confirmed false positive, fix coming in future PVS version (v6.24)
{
UE_LOG(LogD3D11RHI, Log, TEXT("[Aftermath] Enabled and primed"));
SetEmitDrawEvents(true);
}
else
{
@@ -1183,6 +1201,12 @@ void FD3D11DynamicRHI::StartNVAftermath()
UE_LOG(LogD3D11RHI, Log, TEXT("[Aftermath] Failed to initialize. Result=%08x"), Result);
GDX11NVAfterMathEnabled = false;
}
if (GDX11NVAfterMathEnabled && (bEnableMarkers || bEnableAll))
{
SetEmitDrawEvents(true);
GDX11NVAfterMathMarkers = true;
}
}
}
@@ -1212,15 +1236,68 @@ void FD3D11DynamicRHI::StopNVAftermath()
}
}
static void D3D11AftermathCrashCallback(const void* InGPUCrashDump, const size_t InGPUCrashDumpSize, void* InUserData)
{
// decode the GPU marker stack data
if (GDynamicRHI)
{
GDynamicRHI->CheckGpuHeartbeat();
}
// Write out crash dump to project log dir - exception handling code will take care of copying it to the correct location
const FString GPUMiniDumpPath = FPaths::Combine(FPaths::ProjectLogDir(), FWindowsPlatformCrashContext::UE4GPUAftermathMinidumpName);
// Just use raw windows file routines for the GPU minidump (TODO: refactor to our own functions?)
HANDLE FileHandle = CreateFileW(*GPUMiniDumpPath, GENERIC_WRITE, 0, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);
if (FileHandle != INVALID_HANDLE_VALUE)
{
WriteFile(FileHandle, InGPUCrashDump, InGPUCrashDumpSize, nullptr, nullptr);
}
CloseHandle(FileHandle);
// Report the GPU crash which will raise the exception
ReportGPUCrash(TEXT("Aftermath GPU Crash dump Triggered"), 0);
}
void EnableNVAftermathCrashDumps()
{
if (GNVAftermathModuleLoaded)
{
static IConsoleVariable* GPUCrashDump = IConsoleManager::Get().FindConsoleVariable(TEXT("r.GPUCrashDump"));
if (FParse::Param(FCommandLine::Get(), TEXT("gpucrashdump")) || (GPUCrashDump && GPUCrashDump->GetInt()))
{
GFSDK_Aftermath_Result Result = GFSDK_Aftermath_EnableGpuCrashDumps(
GFSDK_Aftermath_Version_API,
GFSDK_Aftermath_GpuCrashDumpFeatureFlags_Default,
D3D11AftermathCrashCallback,
nullptr, //Shader debug callback
nullptr, // description callback
nullptr); // user data
if (Result == GFSDK_Aftermath_Result_Success)
{
UE_LOG(LogD3D11RHI, Log, TEXT("[Aftermath] Aftermath crash dumping enabled"));
}
else
{
UE_LOG(LogD3D11RHI, Log, TEXT("[Aftermath] Aftermath crash dumping failed to initialize (%x)"), Result);
}
}
}
}
#define CACHE_NV_AFTERMATH_ENABLED() CacheNVAftermathEnabled()
#define START_NV_AFTERMATH() StartNVAftermath()
#define STOP_NV_AFTERMATH() StopNVAftermath()
#define ENABLE_NV_AFTERMATH_CRASH_DUMPS() EnableNVAftermathCrashDumps()
#else
#define CACHE_NV_AFTERMATH_ENABLED()
#define START_NV_AFTERMATH()
#define STOP_NV_AFTERMATH()
#define ENABLE_NV_AFTERMATH_CRASH_DUMPS()
#endif
@@ -1778,6 +1855,11 @@ void FD3D11DynamicRHI::InitD3DDevice()
CreateIntelMetricsDiscovery();
}
#endif
if (IsRHIDeviceNVIDIA())
{
// crash dump hooks need to be attached before device creation
ENABLE_NV_AFTERMATH_CRASH_DUMPS();
}
if (!bDeviceCreated)
{