// Copyright Epic Games, Inc. All Rights Reserved. #include "CrashReportAnalyticsSessionSummary.h" #include "AnalyticsPropertyStore.h" #include "AnalyticsSessionSummaryManager.h" #include "AnalyticsSessionSummarySender.h" #include "IAnalyticsProviderET.h" #include "CrashReportAnalytics.h" #include "Containers/Map.h" #include "GenericPlatform/GenericPlatformMath.h" #include "GenericPlatform/GenericPlatformCrashContext.h" #include "HAL/PlatformTime.h" #include "Logging/LogMacros.h" #include "Misc/Paths.h" #include "Misc/ScopeLock.h" #include "Templates/UnrealTemplate.h" DEFINE_LOG_CATEGORY_STATIC(LogCrashReportClientDiagnostics, Log, All) #if PLATFORM_WINDOWS && CRASH_REPORT_WITH_MTBF #include "Windows/AllowWindowsPlatformTypes.h" #include "Windows.h" /** Handle windows messages. */ LRESULT CALLBACK CrashReportAnalyticsSessionSummaryWindowProc(HWND Hwnd, UINT Msg, WPARAM wParam, LPARAM lParam) { // wParam is true if the user session is going away (and CRC is going to die) if (Msg == WM_ENDSESSION && wParam == TRUE) { FCrashReportAnalyticsSessionSummary::Get().OnUserLoggingOut(); } else if (Msg == WM_CLOSE) { FCrashReportAnalyticsSessionSummary::Get().OnQuitSignal(); } return DefWindowProc(Hwnd, Msg, wParam, lParam); } #include "Windows/HideWindowsPlatformTypes.h" #endif namespace CrcAnalyticsProperties { /** The exit code of the monitored application. */ static const TAnalyticsProperty MonitoredAppExitCode(TEXT("ExitCode")); /** Track when CRC detected the death of the monitored app. */ static const TAnalyticsProperty MonitoredAppDeathTimestamp(TEXT("DeathTimestamp")); /** The CRC startup timestamp. */ static const TAnalyticsProperty StartupTimestamp(TEXT("MonitorStartupTimestamp")); /** The CRC timestamp. */ static const TAnalyticsProperty Timestamp(TEXT("MonitorTimestamp")); /** If CRC raised an exception that was captured by SEH, this is the exception code. */ static const TAnalyticsProperty ExceptCode(TEXT("MonitorExceptCode")); /** If CRC is about to close because the system sent a quit signal. */ static const TAnalyticsProperty QuitSignalRecv(TEXT("MonitorQuitSignalRecv")); /** The CRC diagnostic logs. */ static const TAnalyticsProperty DiagnosticLogs(TEXT("MonitorLog")); /** The CRC session duration in seconds. */ static const TAnalyticsProperty SessionDurationSecs(TEXT("MonitorSessionDuration")); /** The battery level, if known. */ static const TAnalyticsProperty BatteryLevel(TEXT("MonitorBatteryLevel")); /** If the system is connected to AC, if know */ static const TAnalyticsProperty IsOnACPower(TEXT("MonitorOnACPower")); /** Whether CRC is reporting a crash. */ static const TAnalyticsProperty IsReportingCrash(TEXT("MonitorIsReportingCrash")); /** Whether CRC is collecting crash artifacts. */ static const TAnalyticsProperty IsCollectingCrash(TEXT("MonitorIsCollectingCrash")); /** Whether CRC is processing a crash. */ static const TAnalyticsProperty IsProcessingCrash(TEXT("MonitorIsProcessingCrash")); /** If CRC is about to be killed because the user is logging out (system shutdown/reboot). */ static const TAnalyticsProperty UserIsLoggingOut(TEXT("MonitorLoggingOut")); /** Whether CRC crashed. */ static const TAnalyticsProperty IsCrashing(TEXT("MonitorCrashed")); /** Whether CRC was shutdown. */ static const TAnalyticsProperty WasShutdown(TEXT("MonitorWasShutdown")); } namespace CrashReportClientUtils { /** Flush the store peridically. */ static const FTimespan PropertyStoreFlushPeriod = FTimespan::FromSeconds(10); /** Tick periodically to poll new information. */ static const FTimespan TickPeriod = FTimespan::FromSeconds(0.5); /** Maximum length of the diagnostic log. */ static constexpr int32 MaxDiagnosticLogLen = 8 * 1024; #if PLATFORM_WINDOWS && CRASH_REPORT_WITH_MTBF HWND Hwnd = NULL; /** Create a hidden Windows to intercept WM_ messages, especially the WM_ENDSESSION. */ void InitPlatformSpecific() { // Register the window class. const wchar_t CLASS_NAME[] = L"CRC Analytics Session Window Message Interceptor"; WNDCLASS wc = { }; wc.lpfnWndProc = CrashReportAnalyticsSessionSummaryWindowProc; wc.hInstance = hInstance; wc.lpszClassName = CLASS_NAME; RegisterClass(&wc); // Create a window to capture WM_ENDSESSION message (so that we can detect when CRC fails because the user is logging off/shutting down/restarting) Hwnd = CreateWindowEx( 0, // Optional window styles. CLASS_NAME, // Window class L"CRC Message Loop Wnd", // Window text WS_OVERLAPPEDWINDOW, // Window style CW_USEDEFAULT, CW_USEDEFAULT, CW_USEDEFAULT, CW_USEDEFAULT, // Size and position NULL, // Parent window NULL, // Menu hInstance, // Instance handle NULL // Additional application data ); } /** Pump the message from the hidden Windows. */ void TickPlatformSpecific() { if (Hwnd != NULL) { // Pump the messages. MSG Msg = { }; while (::PeekMessage(&Msg, NULL, 0, 0, PM_REMOVE)) { ::TranslateMessage(&Msg); ::DispatchMessage(&Msg); } } } bool GetPowerStatus(TOptional& OutACPowerConnected, TOptional& OutBatteryPct) { bool bAvailable = false; SYSTEM_POWER_STATUS Status; if (GetSystemPowerStatus(&Status)) { switch (Status.ACLineStatus) { case 0: // AC Offline OutACPowerConnected.Emplace(false); bAvailable = true; break; case 1: // AC Online OutACPowerConnected.Emplace(true); bAvailable = true; break; default: // Unknown break; } if (Status.BatteryLifePercent != 255) // Unknown { OutBatteryPct.Emplace(Status.BatteryLifePercent); bAvailable = true; } } return bAvailable; } #else void InitPlatformSpecific(){} void TickPlatformSpecific(){} bool GetPowerStatus(TOptional& OutACPowerConnected, TOptional& OutBatteryPct) { return false; } #endif } // CrashReportClientUtils /** * Augments the default summary senders to perform a short analytis to detect if this is the session ended abnormally. The sender searches * for very specific keys published by the engine. Remember that CRC merges its summary with the monitored process summary (the Editor), so * it gets to see what was recorded (or not) by the monitored process. */ class FCrashReportClientAnalyticsSessionSummarySender : public FAnalyticsSessionSummarySender { public: FCrashReportClientAnalyticsSessionSummarySender(IAnalyticsProviderET& Provider) : FAnalyticsSessionSummarySender(Provider) { } virtual bool SendSessionSummary(const FString& UserId, const FString& AppId, const FString& AppVersion, const FString& SessionId, const TMap& Properties) override { // CRC should only send one session (its own), but do a reset in case this convention changes. bAbnormalShutdown = false; bUserLoggingOut = false; // Analyze the report to be sent and try to figure out if this is an abnormal shutdown. They keys are taken from the engine analytics session summary. To prevent dependencies // between CRC and Engine analytics, we duplicate the keys here. if (const FString* ShutdownTypeCode = Properties.Find(FAnalyticsSessionSummaryManager::ShutdownTypeCodeProperty.Key)) { bAbnormalShutdown = (*ShutdownTypeCode == LexToString((int32)EAnalyticsSessionShutdownType::Abnormal)); if (bAbnormalShutdown) { if (const FString* UserLoggingOut = Properties.Find(FAnalyticsSessionSummaryManager::IsUserLoggingOutProperty.Key)) { bUserLoggingOut = (*UserLoggingOut == LexToString(true)); } } } // Send the report unmodified. return FAnalyticsSessionSummarySender::SendSessionSummary(UserId, AppId, AppVersion, SessionId, Properties); } /** Returns whether the last session sent was abnormally terminated. */ bool IsAbnormalShutdown() const { return bAbnormalShutdown && !bUserLoggingOut; } private: bool bAbnormalShutdown = false; bool bUserLoggingOut = false; }; FCrashReportAnalyticsSessionSummary::FCrashReportAnalyticsSessionSummary() : SessionStartTimeSecs(FPlatformTime::Seconds()) , bMonitoredAppDeathRecorded(false) , bShutdown(false) { if (IsEnabled()) { CrashReportClientUtils::InitPlatformSpecific(); // Reserve the memory for the log string. DiagnosticLog.Reset(CrashReportClientUtils::MaxDiagnosticLogLen); DiagnosticLog.Append(FString::Printf(TEXT("CRC/Init:%s"), *FDateTime::UtcNow().ToString())); } } FCrashReportAnalyticsSessionSummary& FCrashReportAnalyticsSessionSummary::FCrashReportAnalyticsSessionSummary::Get() { static FCrashReportAnalyticsSessionSummary Instance; return Instance; } void FCrashReportAnalyticsSessionSummary::Initialize(const FString& ProcessGroupId, uint32 ForProcessId) { if (IsEnabled() && !SessionSummaryManager && !ProcessGroupId.IsEmpty()) { SessionSummaryManager = MakeUnique(TEXT("CrashReportClient"), ProcessGroupId, ForProcessId); if (SessionSummaryManager) { constexpr uint32 ReservedFileCapacity = CrashReportClientUtils::MaxDiagnosticLogLen + (4 * 1024); PropertyStore = SessionSummaryManager->MakeStore(ReservedFileCapacity); if (PropertyStore) { FCoreDelegates::ApplicationWillTerminateDelegate.AddRaw(this, &FCrashReportAnalyticsSessionSummary::OnApplicationWillTerminate); FCoreDelegates::OnHandleSystemError.AddRaw(this, &FCrashReportAnalyticsSessionSummary::OnHandleSystemError); CrcAnalyticsProperties::StartupTimestamp.Set(PropertyStore.Get(), FDateTime::UtcNow()); CrcAnalyticsProperties::Timestamp.Set(PropertyStore.Get(), FDateTime::UtcNow()); CrcAnalyticsProperties::SessionDurationSecs.Set(PropertyStore.Get(), FMath::FloorToInt(static_cast(FPlatformTime::Seconds() - SessionStartTimeSecs))); CrcAnalyticsProperties::DiagnosticLogs.Set(PropertyStore.Get(), DiagnosticLog, CrashReportClientUtils::MaxDiagnosticLogLen); CrcAnalyticsProperties::IsReportingCrash.Set(PropertyStore.Get(), false); CrcAnalyticsProperties::IsCollectingCrash.Set(PropertyStore.Get(), false); CrcAnalyticsProperties::IsProcessingCrash.Set(PropertyStore.Get(), false); CrcAnalyticsProperties::UserIsLoggingOut.Set(PropertyStore.Get(), false); CrcAnalyticsProperties::IsCrashing.Set(PropertyStore.Get(), false); CrcAnalyticsProperties::WasShutdown.Set(PropertyStore.Get(), false); CrcAnalyticsProperties::QuitSignalRecv.Set(PropertyStore.Get(), false); UpdatePowerStatus(); Flush(); GLog->AddOutputDevice(this); // CRC main thread might be busy processing a crash, so use a background thread to record important events that could otherwise be missed. AnalyticsThread = MakeUnique(TEXT("AnalyticsMonitorThread"), [this, ForProcessId]() { // Try to open the process. FProcHandle MonitoredProcessHandle = FPlatformProcess::OpenProcess(ForProcessId); if (!MonitoredProcessHandle.IsValid()) { LogEvent(TEXT("CRC/OpenProcessFailed")); return; } else { LogEvent(TEXT("CRC/Monitoring")); } CrashReportClientUtils::InitPlatformSpecific(); double NextFlushTimeSecs = FPlatformTime::Seconds(); bool bFlushedLowBattery = false; while (!bShutdown) { CrashReportClientUtils::TickPlatformSpecific(); // Monitor the power level. bool bShouldFlush = UpdatePowerStatus(); if (!FPlatformProcess::IsProcRunning(MonitoredProcessHandle)) { OnMonitoredAppDeath(MonitoredProcessHandle); break; } else if (FPlatformTime::Seconds() > NextFlushTimeSecs || bShouldFlush) { // Flush to timestamp the session. Flush(); NextFlushTimeSecs += CrashReportClientUtils::PropertyStoreFlushPeriod.GetTotalSeconds(); } // Throttle the thread. FPlatformProcess::Sleep(CrashReportClientUtils::TickPeriod.GetTotalSeconds()); } FPlatformProcess::CloseProc(MonitoredProcessHandle); }); } } } } bool FCrashReportAnalyticsSessionSummary::IsValid() const { return IsEnabled() && PropertyStore.IsValid(); } void FCrashReportAnalyticsSessionSummary::Shutdown(IAnalyticsProviderET* AnalyticsProvider, TFunction HandleAbnormalShutdownFn) { if (!IsValid()) { return; } if (AnalyticsThread) { bShutdown = true; AnalyticsThread->Join(); AnalyticsThread.Reset(); } CrcAnalyticsProperties::WasShutdown.Set(PropertyStore.Get(), true); LogEvent(FString::Printf(TEXT("CRC/Shutdown:%s:%.1fs"), *FDateTime::UtcNow().ToString(), FPlatformTime::Seconds() - SessionStartTimeSecs)); // Unregister from the core. FCoreDelegates::ApplicationWillTerminateDelegate.RemoveAll(this); FCoreDelegates::OnHandleSystemError.RemoveAll(this); GLog->RemoveOutputDevice(this); // Flush and close CRC analytics summary. Flush(); PropertyStore.Reset(); // If CRC is allowed to send summary report on behalf of the monitored application. if (AnalyticsProvider) { // Set a summary sender that will intercept the set of properties and perform an analysis to detect an abnormal shutdown. TSharedPtr SummarySender = MakeShared(*AnalyticsProvider); SessionSummaryManager->SetSender(SummarySender); // Merge the principal process summary with CRC process summary and sends the session summary if CRC is the last process to exit. SessionSummaryManager->Shutdown(); // If the summary sender detected an abnormal shutdown by checking the summary properties, report it. if (SummarySender->IsAbnormalShutdown() && HandleAbnormalShutdownFn) { HandleAbnormalShutdownFn(); } } else // Discard CRC summary. { SessionSummaryManager->Shutdown(/*bDiscard*/true); } SessionSummaryManager.Reset(); } void FCrashReportAnalyticsSessionSummary::Serialize(const TCHAR* V, ELogVerbosity::Type Verbosity, const FName& Category, const double Time) { Serialize(V, Verbosity, Category); } void FCrashReportAnalyticsSessionSummary::Serialize(const TCHAR* V, ELogVerbosity::Type Verbosity, const FName& Category) { // Log the errors, especially the failed 'check()' with the callstack/message. if (Verbosity == ELogVerbosity::Error || Verbosity == ELogVerbosity::Fatal) { // Log but don't forward to UE logging system. The log is already originate from the logging system. LogEvent(TEXT("CRC/Error"), /*bForwardToUELog*/false); LogEvent(V, /*bForwardToUELog*/false); } } bool FCrashReportAnalyticsSessionSummary::CanBeUsedOnAnyThread() const { return true; } bool FCrashReportAnalyticsSessionSummary::CanBeUsedOnMultipleThreads() const { return true; } void FCrashReportAnalyticsSessionSummary::LogEvent(const FString& Event) { LogEvent(*Event); } void FCrashReportAnalyticsSessionSummary::LogEvent(const TCHAR* Event, bool bForwardToUELog) { if (IsValid()) { FScopeLock ScopedLock(&LoggerLock); TGuardValue ReentrantGuard(bLoggerReentrantGuard, true); if (*ReentrantGuard) // Read the old value. { return; // Prevent renentrant logging. } AppendLog(Event); } // Prevent error logs coming from the logging system to be duplicated. if (bForwardToUELog) { UE_LOG(LogCrashReportClientDiagnostics, Log, TEXT("%s"), Event); } } void FCrashReportAnalyticsSessionSummary::AppendLog(const TCHAR* Event) { // Add the separator if some text is already logged. if (DiagnosticLog.Len()) { DiagnosticLog.Append(TEXT("|")); } // Rotate the log if it gets too long. int32 FreeLen = CrashReportClientUtils::MaxDiagnosticLogLen - DiagnosticLog.Len(); int32 EventLen = FCString::Strlen(Event); if (EventLen > FreeLen) { if (EventLen > CrashReportClientUtils::MaxDiagnosticLogLen) { DiagnosticLog.Reset(CrashReportClientUtils::MaxDiagnosticLogLen); EventLen = CrashReportClientUtils::MaxDiagnosticLogLen; } else { DiagnosticLog.RemoveAt(0, EventLen - FreeLen, /*bAllowShrinking*/false); // Free space, remove the chars from the oldest events (in front). } } // Append the log entry and dump the log to the file. DiagnosticLog.AppendChars(Event, EventLen); // Update the diagnostic field into the session summary store. CrcAnalyticsProperties::DiagnosticLogs.Set(PropertyStore.Get(), DiagnosticLog); // Flush the store. Flush(); } void FCrashReportAnalyticsSessionSummary::OnMonitoredAppDeath(FProcHandle& Handle) { // The first thread to exchange successfull is allowed to update. No need to update twice for the same monitored process. bool Expected = false; if (IsValid() && bMonitoredAppDeathRecorded.compare_exchange_strong(Expected, true)) { CrcAnalyticsProperties::MonitoredAppDeathTimestamp.Set(PropertyStore.Get(), FDateTime::UtcNow()); int32 ExitCode; if (Handle.IsValid() && FPlatformProcess::GetProcReturnCode(Handle, &ExitCode)) { CrcAnalyticsProperties::MonitoredAppExitCode.Set(PropertyStore.Get(), ExitCode); LogEvent(FString::Printf(TEXT("App/ExitCode:%d"), ExitCode)); } else { CrcAnalyticsProperties::MonitoredAppExitCode.Set(PropertyStore.Get(), ECrashExitCodes::MonitoredApplicationExitCodeNotAvailable); LogEvent(TEXT("App/ExitCode:N/A")); } Flush(); } } void FCrashReportAnalyticsSessionSummary::OnUserLoggingOut() { if (IsValid()) { // The user is logging out and CRC is going to die. CrcAnalyticsProperties::UserIsLoggingOut.Set(PropertyStore.Get(), true); // Log the event (this also flush the session). LogEvent(TEXT("CRC/EndSession")); } } void FCrashReportAnalyticsSessionSummary::OnQuitSignal() { // The system has requested the app to close. (Like if the user gently kills the application) CrcAnalyticsProperties::QuitSignalRecv.Set(PropertyStore.Get(), true); // Log the event (this also flush the session). LogEvent(TEXT("CRC/QuitSignal")); } void FCrashReportAnalyticsSessionSummary::OnCrcCrashing(int32 ExceptCode) { if (IsValid()) { CrcAnalyticsProperties::IsCrashing.Set(PropertyStore.Get(), true); CrcAnalyticsProperties::ExceptCode.Set(PropertyStore.Get(), ExceptCode); TCHAR CrashEventLog[64]; FCString::Sprintf(CrashEventLog, TEXT("CRC/Crash:%d"), ExceptCode); LogEvent(CrashEventLog); // This also flush the session. } } void FCrashReportAnalyticsSessionSummary::Flush() { if (IsValid()) { // Update the session progression. CrcAnalyticsProperties::Timestamp.Set(PropertyStore.Get(), FDateTime::UtcNow(), [](const FDateTime* Actual, const FDateTime& Proposed) { return Proposed > *Actual; }); CrcAnalyticsProperties::SessionDurationSecs.Set(PropertyStore.Get(), FMath::FloorToInt(FPlatformTime::Seconds() - SessionStartTimeSecs), [](const int32* Actual, const int32& Proposed) { return Proposed > *Actual; }); // Flush the store to disk. PropertyStore->Flush(); } } void FCrashReportAnalyticsSessionSummary::OnApplicationWillTerminate() { LogEvent(FString::Printf(TEXT("CRC/Terminate:%s"), *FDateTime::UtcNow().ToString())); } void FCrashReportAnalyticsSessionSummary::OnHandleSystemError() { CrcAnalyticsProperties::IsCrashing.Set(PropertyStore.Get(), true); LogEvent(FString::Printf(TEXT("CRC/SysError:%s"), *FDateTime::UtcNow().ToString())); } void FCrashReportAnalyticsSessionSummary::OnCrashReportStarted(ECrashContextType CrashType, const TCHAR* ErrorMsg) { CrashReportStartTimeSecs = FPlatformTime::Seconds(); CrcAnalyticsProperties::IsReportingCrash.Set(PropertyStore.Get(), true); LogEvent(FString::Printf(TEXT("Report/Start:%s"), *FDateTime::UtcNow().ToString())); // Log the assert and ensure condition/file/line/message to the diagnostic log gathered by the analytics to enable searching/grouping them later on. if (CrashType == ECrashContextType::Assert) { LogEvent(FString::Printf(TEXT("Assert/Msg: %s"), ErrorMsg)); } else if (CrashType == ECrashContextType::Ensure) { LogEvent(FString::Printf(TEXT("Ensure/Msg: %s"), ErrorMsg)); } } void FCrashReportAnalyticsSessionSummary::OnCrashReportCollecting() { CrashReportCollectingStartTimeSecs = FPlatformTime::Seconds(); CrcAnalyticsProperties::IsCollectingCrash.Set(PropertyStore.Get(), true); FCrashReportAnalyticsSessionSummary::Get().LogEvent(TEXT("Report/Collect")); } void FCrashReportAnalyticsSessionSummary::OnCrashReportProcessing(bool bUserInteractive) { CrcAnalyticsProperties::IsCollectingCrash.Set(PropertyStore.Get(), false); CrcAnalyticsProperties::IsProcessingCrash.Set(PropertyStore.Get(), true); CrashReportProcessingStartTimeSecs = FPlatformTime::Seconds(); FCrashReportAnalyticsSessionSummary::Get().LogEvent(FString::Printf(TEXT("Report/Process:%s"), bUserInteractive ? TEXT("Interactive") : TEXT("Unattended"))); } void FCrashReportAnalyticsSessionSummary::OnCrashReportCompleted(bool bSubmitted) { CrcAnalyticsProperties::IsCollectingCrash.Set(PropertyStore.Get(), false); CrcAnalyticsProperties::IsProcessingCrash.Set(PropertyStore.Get(), false); CrcAnalyticsProperties::IsReportingCrash.Set(PropertyStore.Get(), false); double CurrTimeSecs = FPlatformTime::Seconds(); double CollectTimeSecs = CrashReportProcessingStartTimeSecs - CrashReportCollectingStartTimeSecs; double ProcessTimeSecs = CurrTimeSecs - CrashReportProcessingStartTimeSecs; double TotalTimeSecs = CurrTimeSecs - CrashReportStartTimeSecs; const TCHAR* Event = bSubmitted ? TEXT("Report/Sent") : TEXT("Report/Discarded"); LogEvent(FString::Printf(TEXT("%s:Collect=%.1fs:Process=%.1fs:Total=%.1fs"), Event, CollectTimeSecs, ProcessTimeSecs, TotalTimeSecs)); } bool FCrashReportAnalyticsSessionSummary::UpdatePowerStatus() { bool bShouldFlush = false; // Monitor the power level. TOptional ConnectedToACPower; TOptional BatteryPercentage; if (CrashReportClientUtils::GetPowerStatus(ConnectedToACPower, BatteryPercentage)) { if (ConnectedToACPower) { CrcAnalyticsProperties::IsOnACPower.Set(PropertyStore.Get(), *ConnectedToACPower); } if (BatteryPercentage) { CrcAnalyticsProperties::BatteryLevel.Set(PropertyStore.Get(), *BatteryPercentage, [&bShouldFlush](const uint32* PreviousLevel, const uint32& NewLevel) { // Detects when the batter level goes from above to below or equal a threshonld. constexpr uint32 LowBatteryPct = 2; if (PreviousLevel && *PreviousLevel > LowBatteryPct && NewLevel <= LowBatteryPct) { // Last time, it was above the threshold, but now it dipped below, save the state before the battery runs out. bShouldFlush = true; } return true; }); } } return bShouldFlush; }