Files
UnrealEngineUWP/Engine/Source/Programs/CrashReportClient/Private/Windows/CrashReportClientMainWindows.cpp
Patrick Laflamme 2e5316e1ca Generalized the Editor analytics summary session system to be usable/extendable by other apps.
Engine/Editor changes:

- Split the Editor summary session in two, one summary for the Engine properties and one for the Editor specific properties. Made it easy to extend the Engine summary to create other summaries.
- Made the summary sender as agnostics as possible of the keys it sends.
- Fixed the system wide lock contention between the process when persisting a session. (On problem caused by the lock is UE-114315).
- Fixed concurrent issue when saving the summary sessions on Linux/Mac
- Fixed performance issue when saving the summary session on Linux/Mac. This enable saving at higher frequency.
- Fixed cases where the same session summary is sent more than once.
- Fixed Windows registry key overflow that could happens if we accumulated too many sessions (in theory, this can happen)
- Made adding new properties to the summary easy and private to the implementation.
- Brought the Linux/Mac implementation closer to Windows implementation.
- Reduced memory allocation, especially when the session records a crash.
- Improved chances to send the summary non-delayed by allowing the Editor to send the reports if CRC died unexpectedly.
- Generalized the support to collect and aggregate analytics from helper processes. For example, CRC already collects analytics that is merged with the Editor summary as information supplement
- Reserved the disk space required to store the summary ahead of time to prevent failing later.
- Increased frequency at which the summary is persisted because saving the summary is more efficient. (About every 10 seconds rather than every minutes).
- Added unit tests

CrashReportClient changes:

- Created a 'session summary' from the CRC point of view to merge with the Editor summary.
- Moved analytics collection in a separated class to make the crash reporting code leaner and less noisy with all the analytics
- Merged the CRC diagnostic logger in the class collecting CRC analytics summary and make the diagnostic log a property in the summary.
- Collected analytics (on behalf of Editor) in a background thread because CRC main thread can be blocked collecting a crash, so it doesn't pay attention to other things
- Added MonitorBatteryLevel and MonitorOnACPower summary properties on Windows. Collected on CRC background thread (never blocked, so we reduce changes to miss the battery running out)
- Added MonitorSessionDuration summary property to track now long CRC ran.
- Added MonitorQuitSignalRecv summary property to detect when CRC is soft killed like: taskkill /PID 1234
- Added MonitorIsReportingCrash summary property to track when CRC dies reporting a crash.
- Added MonitorIsCollectingCrash summary property to track when CRC dies collecting a crash artifacts.
- Added IsProcessingCrash summary property to track when CRC dies processing a crash.
- Added MonitorCrashed summary property to track when CRC exception handler was triggered.
- Added MonitorWasShutdown summary property to track when CRC summary was shutdown
- Added MonitorLoggingOut summary property to track when CRC died because the user was logging out (or as result of shutting down or restarting the computer).
- More accurate value for DeathTimestamp summary property because this is now captured in CRC background thread (which cannot be busy handling a crash)
- Added crash processing timing to CRC diagnostic logs (how long it takes to collect/process a crash).

#rb Jamie.Dale, Wes.Hunt, Johan.Berg
#jira UETOOL-3500
#jira UE-114315

[CL 16324612 by Patrick Laflamme in ue5-main branch]
2021-05-13 21:58:20 -04:00

133 lines
4.9 KiB
C++

// Copyright Epic Games, Inc. All Rights Reserved.
#include "CrashReportClientApp.h"
#include "Windows/WindowsHWrapper.h"
#include "CrashReportClientDefines.h"
#if CRASH_REPORT_WITH_MTBF
#include "HAL/FileManager.h"
#include "HAL/PlatformProcess.h"
#include "HAL/PlatformAtomics.h"
#include "HAL/PlatformStackWalk.h"
#include "Serialization/Archive.h"
#include "CrashReportAnalyticsSessionSummary.h"
#endif
#if CRASH_REPORT_WITH_MTBF && !PLATFORM_SEH_EXCEPTIONS_DISABLED
static ANSICHAR CrashStackTrace[8*1024] = {0};
void SaveCrcCrashException(EXCEPTION_POINTERS* ExceptionInfo)
{
// Try to write the exception code in the appropriate field if the session was created. The first crashing thread
// incrementing the counter wins the race and can write its exception code.
static volatile int32 CrashCount = 0;
if (FPlatformAtomics::InterlockedIncrement(&CrashCount) == 1)
{
FCrashReportAnalyticsSessionSummary::Get().OnCrcCrashing(ExceptionInfo->ExceptionRecord->ExceptionCode);
if (ExceptionInfo->ExceptionRecord->ExceptionCode != STATUS_HEAP_CORRUPTION)
{
// Try to get the exception callstack to log to figure out why CRC crashed. This is not robust because this runs
// in the crashing processs and it allocates memory/use callstack, but we may still be able to get some useful data.
if (FPlatformStackWalk::InitStackWalkingForProcess(FProcHandle()))
{
FPlatformStackWalk::StackWalkAndDump(CrashStackTrace, UE_ARRAY_COUNT(CrashStackTrace), 0);
if (CrashStackTrace[0] != 0)
{
FCrashReportAnalyticsSessionSummary::Get().LogEvent(ANSI_TO_TCHAR(CrashStackTrace));
}
}
}
}
}
/**
* The Vectored Exception Handler (VEH) is added to capture heap corruption exceptions because those are not reaching the
* UnhandledExceptionFilter(). VEH has first and only chance to heap corrutpion exceptions before they got 'handled' by the OS.
*/
LONG WINAPI CrashReportVectoredExceptionFilter(EXCEPTION_POINTERS* ExceptionInfo)
{
if (ExceptionInfo->ExceptionRecord->ExceptionCode == STATUS_HEAP_CORRUPTION)
{
SaveCrcCrashException(ExceptionInfo);
}
// Let the OS deal with the exception. (the process will crash)
return EXCEPTION_CONTINUE_SEARCH;
}
/**
* Invoked when an exception was not handled by vectored exception handler(s) nor structured exception handler(s)(__try/__except).
* For good understanding a SEH inner working,take a look at EngineUnhandledExceptionFilter documentation in WindowsPlatformCrashContext.cpp.
*/
LONG WINAPI CrashReportUnhandledExceptionFilter(EXCEPTION_POINTERS* ExceptionInfo)
{
SaveCrcCrashException(ExceptionInfo);
// Let the OS deal with the exception. (the process will crash)
return EXCEPTION_CONTINUE_SEARCH;
}
#endif
/**
* WinMain, called when the application is started
*/
int WINAPI WinMain(_In_ HINSTANCE hInInstance, _In_opt_ HINSTANCE hPrevInstance, _In_ LPSTR, _In_ int nCmdShow)
{
hInstance = hInInstance;
#if CRASH_REPORT_WITH_MTBF // For the Editor only.
FString Arguments(::GetCommandLineW());
if (Arguments.Contains(TEXT("-MONITOR=")) && !Arguments.Contains(TEXT("-RespawnedInstance")))
{
uint64 ChildPipe = 0;
FParse::Value(GetCommandLineW(), TEXT("-READ="), ChildPipe);
// Parse the process ID of the Editor that spawned this CRC.
uint32 MonitoredEditorPid = 0;
if (FParse::Value(GetCommandLineW(), TEXT("-MONITOR="), MonitoredEditorPid))
{
TCHAR RespawnExePathname[MAX_PATH];
GetModuleFileName(NULL, RespawnExePathname, MAX_PATH);
FString RespawnExeArguments(Arguments);
RespawnExeArguments.Append(" -RespawnedInstance");
uint32 RespawnPid = 0;
// Respawn itself to sever the process grouping with the Editor. If the user kills the Editor process group in task manager,
// CRC will not die at the same time, will be able to capture the Editor exit code and send the MTBF report to correctly
// identify the Editor 'AbnormalShutdown' as 'Killed' instead.
FProcHandle Handle = FPlatformProcess::CreateProc(
RespawnExePathname,
*RespawnExeArguments,
true, false, false,
&RespawnPid, 0,
nullptr,
reinterpret_cast<void*>(ChildPipe), // Ensure the child process inherit this pipe handle that was previously inherited from its parent.
nullptr);
if (Handle.IsValid())
{
FString PidPathname = FString::Printf(TEXT("%sue-crc-pid-%d"), FPlatformProcess::UserTempDir(), MonitoredEditorPid);
if (TUniquePtr<FArchive> Ar = TUniquePtr<FArchive>(IFileManager::Get().CreateFileWriter(*PidPathname, FILEWRITE_EvenIfReadOnly)))
{
*Ar << RespawnPid;
}
FPlatformProcess::CloseProc(Handle);
}
}
RequestEngineExit(TEXT("Respawn instance."));
return 0; // Exit this intermediate instance, the Editor is waiting for it to continue.
}
#if !PLATFORM_SEH_EXCEPTIONS_DISABLED
::SetUnhandledExceptionFilter(CrashReportUnhandledExceptionFilter);
::AddVectoredExceptionHandler(0, CrashReportVectoredExceptionFilter);
#endif // !PLATFORM_SEH_EXCEPTIONS_DISABLED
#endif // CRASH_REPORT_WITH_MTBF
RunCrashReportClient(GetCommandLineW());
return 0;
}