/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* vim:set ts=2 sw=2 sts=2 et cindent: */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ // Implement TimeStamp::Now() with QueryPerformanceCounter() controlled with // values of GetTickCount(). // XXX Forcing log to be able to catch issues in the field. Should be removed // before this reaches the Release or even Beta channel. #define FORCE_PR_LOG #include "mozilla/TimeStamp.h" #include "mozilla/Mutex.h" #include "mozilla/Services.h" #include "nsIObserver.h" #include "nsIObserverService.h" #include "nsThreadUtils.h" #include "nsAutoPtr.h" #include #include #include "prlog.h" #include #include static bool HasStableTSC() { union { int regs[4]; struct { int nIds; char cpuString[12]; }; } cpuInfo; __cpuid(cpuInfo.regs, 0); // Only allow Intel CPUs for now // The order of the registers is reg[1], reg[3], reg[2]. We just adjust the // string so that we can compare in one go. if (_strnicmp(cpuInfo.cpuString, "GenuntelineI", sizeof(cpuInfo.cpuString))) return false; int regs[4]; // detect if the Advanced Power Management feature is supported __cpuid(regs, 0x80000000); if (regs[0] < 0x80000007) return false; __cpuid(regs, 0x80000007); // if bit 8 is set than TSC will run at a constant rate // in all ACPI P-state, C-states and T-states return regs[3] & (1 << 8); } #if defined(PR_LOGGING) // Log module for mozilla::TimeStamp for Windows logging... // // To enable logging (see prlog.h for full details): // // set NSPR_LOG_MODULES=TimeStampWindows:5 // set NSPR_LOG_FILE=nspr.log // // this enables PR_LOG_DEBUG level information and places all output in // the file nspr.log static PRLogModuleInfo* GetTimeStampLog() { static PRLogModuleInfo *sLog; if (!sLog) sLog = PR_NewLogModule("TimeStampWindows"); return sLog; } #define LOG(x) PR_LOG(GetTimeStampLog(), PR_LOG_DEBUG, x) #else #define LOG(x) #endif /* PR_LOGGING */ // Estimate of the smallest duration of time we can measure. static volatile ULONGLONG sResolution; static volatile ULONGLONG sResolutionSigDigs; static const double kNsPerSecd = 1000000000.0; static const LONGLONG kNsPerSec = 1000000000; static const LONGLONG kNsPerMillisec = 1000000; static bool sHasStableTSC = false; // ---------------------------------------------------------------------------- // Global constants // ---------------------------------------------------------------------------- // After this time we always recalibrate the skew. // // On most platforms QPC and GTC have not quit the same slope, so after some // time the two values will disperse. The 4s calibration interval has been // chosen mostly arbitrarily based on tests. // // Mostly, 4 seconds has been chosen based on the sleep/wake issue - timers // shift after wakeup. I wanted to make the time as reasonably short as // possible to always recalibrate after even a very short standby time (quit // reasonable test case). So, there is a lot of space to prolong it // to say 20 seconds or even more, needs testing in the field, though. // // Value is number of [ms]. static const ULONGLONG kCalibrationInterval = 4000; // On every read of QPC we check the overflow of skew difference doesn't go // over this number of milliseconds. Both timer functions jitter so we have // to have some limit. The value is based on tests. // // Changing kCalibrationInterval influences this limit: prolonging // just kCalibrationInterval means to be more sensitive to threshold overflows. // // How this constant is used (also see CheckCalibration function): // First, adjust the limit linearly to the calibration interval: // LIMIT = (GTC_now - GTC_calib) / kCalibrationInterval // Then, check the skew difference overflow is in this adjusted limit: // ABS((QPC_now - GTC_now) - (QPC_calib - GTC_calib)) - THRESHOLD < LIMIT // // Thresholds are calculated dynamically, see sUnderrunThreshold and // sOverrunThreshold below. // // Value is number of [ms]. static const ULONGLONG kOverflowLimit = 100; // If we are not able to get the value of GTC time increment, use this value // which is the most usual increment. static const DWORD kDefaultTimeIncrement = 156001; // ---------------------------------------------------------------------------- // Global variables, not changing at runtime // ---------------------------------------------------------------------------- /** * The [mt] unit: * * Many values are kept in ticks of the Performance Coutner x 1000, * further just referred as [mt], meaning milli-ticks. * * This is needed to preserve maximum precision of the performance frequency * representation. GetTickCount values in milliseconds are multiplied with * frequency per second. Therefor we need to multiply QPC value by 1000 to * have the same units to allow simple arithmentic with both QPC and GTC. */ #define ms2mt(x) ((x) * sFrequencyPerSec) #define mt2ms(x) ((x) / sFrequencyPerSec) #define mt2ms_d(x) (double(x) / sFrequencyPerSec) // Result of QueryPerformanceFrequency static LONGLONG sFrequencyPerSec = 0; // Lower and upper bound that QueryPerformanceCounter - GetTickCount must not // go under or over when compared to the calibrated QPC - GTC difference (skew) // Values are based on the GetTickCount update interval. // // Schematically, QPC works correctly if ((QPC_now - GTC_now) - // (QPC_calib - GTC_calib)) is in [sUnderrunThreshold, sOverrunThreshold] // interval every time we access them. // // Kept in [mt] static LONGLONG sUnderrunThreshold; static LONGLONG sOverrunThreshold; // ---------------------------------------------------------------------------- // Global lock // ---------------------------------------------------------------------------- // Thread spin count before entering the full wait state for sTimeStampLock. // Inspired by Rob Arnold's work on PRMJ_Now(). static const DWORD kLockSpinCount = 4096; // Common mutex (thanks the relative complexity of the logic, this is better // then using CMPXCHG8B.) // It is protecting the globals bellow. CRITICAL_SECTION sTimeStampLock; // ---------------------------------------------------------------------------- // Globals heavily changing at runtime, protected with sTimeStampLock mutex // ---------------------------------------------------------------------------- // The calibrated difference between QPC and GTC. // // Kept in [mt] static LONGLONG sSkew = 0; // Keeps the last result we have returned from sGetTickCount64 (bellow). Protects // from roll over and going backward. // // Kept in [ms] static ULONGLONG sLastGTCResult = 0; // Holder of the last result of our main hi-res function. Protects from going // backward. // // Kept in [mt] static ULONGLONG sLastResult = 0; // Time of the last performed calibration. // // Kept in [ms] static ULONGLONG sLastCalibrated; // The following variable stores two booleans, both initialized to false. // // The lower word is fallbackToGTC: // After we have detected a run out of bounderies set this to true. This // then disallows use of QPC result for the hi-res timer. // // The higher word is forceRecalibrate: // Set to true to force recalibration on QPC read. This is generally set after // system wake up, during which skew can change a lot. static union CalibrationFlags { struct { bool fallBackToGTC; bool forceRecalibrate; } flags; uint32_t dwordValue; } sCalibrationFlags; namespace mozilla { static ULONGLONG CalibratedPerformanceCounter(); typedef ULONGLONG (WINAPI* GetTickCount64_t)(); static GetTickCount64_t sGetTickCount64 = nullptr; static inline ULONGLONG InterlockedRead64(volatile ULONGLONG* destination) { #ifdef _WIN64 // Aligned 64-bit reads on x86-64 are atomic return *destination; #else // Dirty hack since Windows doesn't provide an atomic 64-bit read function return _InterlockedCompareExchange64(reinterpret_cast (destination), 0, 0); #endif } // ---------------------------------------------------------------------------- // Critical Section helper class // ---------------------------------------------------------------------------- class AutoCriticalSection { public: AutoCriticalSection(LPCRITICAL_SECTION section) : mSection(section) { ::EnterCriticalSection(mSection); } ~AutoCriticalSection() { ::LeaveCriticalSection(mSection); } private: LPCRITICAL_SECTION mSection; }; // ---------------------------------------------------------------------------- // System standby and wakeup status observer. Needed to ignore skew jump after // the system has been woken up, happens mostly on XP. // ---------------------------------------------------------------------------- class StandbyObserver : public nsIObserver { NS_DECL_ISUPPORTS NS_DECL_NSIOBSERVER public: StandbyObserver() { LOG(("TimeStamp: StandByObserver::StandByObserver()")); } ~StandbyObserver() { LOG(("TimeStamp: StandByObserver::~StandByObserver()")); } static inline void Ensure() { if (sInitialized) return; // Not available to init on other then the main thread since using // the ObserverService. if (!NS_IsMainThread()) return; nsCOMPtr obs = services::GetObserverService(); if (!obs) return; // Too soon... sInitialized = true; nsRefPtr observer = new StandbyObserver(); obs->AddObserver(observer, "wake_notification", false); // There is no need to remove the observer, observer service is the only // referer and we don't hold reference back to the observer service. } private: static bool sInitialized; }; NS_IMPL_THREADSAFE_ISUPPORTS1(StandbyObserver, nsIObserver) bool StandbyObserver::sInitialized = false; NS_IMETHODIMP StandbyObserver::Observe(nsISupports *subject, const char *topic, const PRUnichar *data) { AutoCriticalSection lock(&sTimeStampLock); // Clear the potentiall fallback flag now and try using // QPC again after wake up. CalibrationFlags value; value.flags.fallBackToGTC = false; value.flags.forceRecalibrate = true; sCalibrationFlags.dwordValue = value.dwordValue; // aligned 32-bit writes are atomic LOG(("TimeStamp: system has woken up, reset GTC fallback")); return NS_OK; } // ---------------------------------------------------------------------------- // The timer core implementation // ---------------------------------------------------------------------------- static void InitThresholds() { DWORD timeAdjustment = 0, timeIncrement = 0; BOOL timeAdjustmentDisabled; GetSystemTimeAdjustment(&timeAdjustment, &timeIncrement, &timeAdjustmentDisabled); if (!timeIncrement) timeIncrement = kDefaultTimeIncrement; // Ceiling to a millisecond // Example values: 156001, 210000 DWORD timeIncrementCeil = timeIncrement; // Don't want to round up if already rounded, values will be: 156000, 209999 timeIncrementCeil -= 1; // Convert to ms, values will be: 15, 20 timeIncrementCeil /= 10000; // Round up, values will be: 16, 21 timeIncrementCeil += 1; // Convert back to 100ns, values will be: 160000, 210000 timeIncrementCeil *= 10000; // How many milli-ticks has the interval LONGLONG ticksPerGetTickCountResolution = (int64_t(timeIncrement) * sFrequencyPerSec) / 10000LL; // How many milli-ticks has the interval rounded up LONGLONG ticksPerGetTickCountResolutionCeiling = (int64_t(timeIncrementCeil) * sFrequencyPerSec) / 10000LL; // I observed differences about 2 times of the GTC resolution. GTC may // jump by 32 ms in two steps, therefor use the ceiling value. sUnderrunThreshold = LONGLONG((-2) * ticksPerGetTickCountResolutionCeiling); // QPC should go no further then 2 * GTC resolution sOverrunThreshold = LONGLONG((+2) * ticksPerGetTickCountResolution); } static void InitResolution() { // 10 total trials is arbitrary: what we're trying to avoid by // looping is getting unlucky and being interrupted by a context // switch or signal, or being bitten by paging/cache effects ULONGLONG minres = ~0ULL; int loops = 10; do { ULONGLONG start = CalibratedPerformanceCounter(); ULONGLONG end = CalibratedPerformanceCounter(); ULONGLONG candidate = (end - start); if (candidate < minres) minres = candidate; } while (--loops && minres); if (0 == minres) { minres = 1; } // Converting minres that is in [mt] to nanosecods, multiplicating // the argument to preserve resolution. ULONGLONG result = mt2ms(minres * kNsPerMillisec); if (0 == result) { result = 1; } sResolution = result; // find the number of significant digits in mResolution, for the // sake of ToSecondsSigDigits() ULONGLONG sigDigs; for (sigDigs = 1; !(sigDigs == result || 10*sigDigs > result); sigDigs *= 10); sResolutionSigDigs = sigDigs; } // Function protecting GetTickCount result from rolling over, result is in [ms] // @param gtc // Result of GetTickCount(). Passing it as an arg lets us call it out // of the common mutex. static ULONGLONG WINAPI GetTickCount64Fallback() { ULONGLONG old, newValue; do { old = InterlockedRead64(&sLastGTCResult); ULONGLONG oldTop = old & 0xffffffff00000000; ULONG oldBottom = old & 0xffffffff; ULONG newBottom = GetTickCount(); if (newBottom < oldBottom) { // handle overflow newValue = (oldTop + (1ULL<<32)) | newBottom; } else { newValue = oldTop | newBottom; } } while (old != _InterlockedCompareExchange64(reinterpret_cast (&sLastGTCResult), newValue, old)); return newValue; } // Result is in [mt] static inline ULONGLONG PerformanceCounter() { LARGE_INTEGER pc; ::QueryPerformanceCounter(&pc); return pc.QuadPart * 1000ULL; } // Called when we detect a larger deviation of QPC to disable it. static inline void RecordFlaw() { sCalibrationFlags.flags.fallBackToGTC = true; LOG(("TimeStamp: falling back to GTC :(")); #if 0 // This code has been disabled, because we: // 0. InitResolution must not be called under the lock (would reenter) while // we shouldn't release it here just to allow it // 1. may return back to using QPC after system wake up // 2. InitResolution for GTC will probably return 0 anyway (increments // only every 15 or 16 ms.) // // There is no need to drop sFrequencyPerSec to 1, result of sGetTickCount64 // is multiplied and later divided with sFrequencyPerSec. Changing it // here may introduce sync problems. Syncing access to sFrequencyPerSec // is overkill. Drawback is we loose some bits from the upper bound of // the 64 bits timer value, usualy up to 7, it means the app cannot run // more then some 4'000'000 years :) InitResolution(); #endif } // Check the current skew is in bounderies and occasionally recalculate it. // Return true if QPC is OK to use, return false to use GTC only. // // Arguments: // overflow - the calculated overflow out of the bounderies for skew difference // qpc - current value of QueryPerformanceCounter // gtc - current value of GetTickCount, more actual according possible system // sleep between read of QPC and GTC static inline bool CheckCalibration(LONGLONG overflow, ULONGLONG qpc, ULONGLONG gtc) { CalibrationFlags value; value.dwordValue = sCalibrationFlags.dwordValue; // aligned 32-bit reads are atomic if (value.flags.fallBackToGTC) { // We are forbidden to use QPC return false; } ULONGLONG sinceLastCalibration = gtc - sLastCalibrated; if (overflow && !value.flags.forceRecalibrate) { // Calculate trend of the overflow to correspond to the calibration // interval, we may get here long after the last calibration because we // either didn't read the hi-res function or the system was suspended. ULONGLONG trend = LONGLONG(overflow * (double(kCalibrationInterval) / sinceLastCalibration)); LOG(("TimeStamp: calibration after %llus with overflow %1.4fms" ", adjusted trend per calibration interval is %1.4fms", sinceLastCalibration / 1000, mt2ms_d(overflow), mt2ms_d(trend))); if (trend > ms2mt(kOverflowLimit)) { // This sets fallBackToGTC, we have detected // an unreliability of QPC, stop using it. AutoCriticalSection lock(&sTimeStampLock); RecordFlaw(); return false; } } if (sinceLastCalibration > kCalibrationInterval || value.flags.forceRecalibrate) { // Recalculate the skew now AutoCriticalSection lock(&sTimeStampLock); sSkew = qpc - ms2mt(gtc); sLastCalibrated = gtc; LOG(("TimeStamp: new skew is %1.2fms (force:%d)", mt2ms_d(sSkew), value.flags.forceRecalibrate)); sCalibrationFlags.flags.forceRecalibrate = false; } return true; } // AtomicStoreIfGreaterThan tries to store the maximum of two values in one of them // without locking. The only scenario in which two racing threads may corrupt the // maximum value is when they both try to increase the value without knowing about // each other, like below: // // Thread 1 reads 1000. newValue in thread 1 is 1005. // Thread 2 reads 1000. newValue in thread 2 is 1001. // Thread 1 tries to store. Its value is less than newValue, so the store happens. // *destination is now 1005. // Thread 2 tries to store. Its value is less than newValue, so the store happens. // *destination is now 1001. // // The correct value to be stored if this was happening serially is 1005. The // following algorithm achieves that. // // The return value is the maximum value. ULONGLONG AtomicStoreIfGreaterThan(ULONGLONG* destination, ULONGLONG newValue) { ULONGLONG readValue; do { readValue = InterlockedRead64(destination); if (readValue > newValue) return readValue; } while (readValue != _InterlockedCompareExchange64(reinterpret_cast (destination), newValue, readValue)); return newValue; } // The main function. Result is in [mt] ensuring to not go back and be mostly // reliable with highest possible resolution. static ULONGLONG CalibratedPerformanceCounter() { // XXX This is using ObserverService, cannot instantiate in the static // startup, really needs a better initation code here. StandbyObserver::Ensure(); // Don't hold the lock over call to QueryPerformanceCounter, since it is // the largest bottleneck, let threads read the value concurently to have // possibly a better performance. ULONGLONG qpc = PerformanceCounter(); // Rollover protection ULONGLONG gtc = sGetTickCount64(); LONGLONG diff = qpc - ms2mt(gtc) - sSkew; LONGLONG overflow = 0; if (diff < sUnderrunThreshold) { overflow = sUnderrunThreshold - diff; } else if (diff > sOverrunThreshold) { overflow = diff - sOverrunThreshold; } ULONGLONG result = qpc; if (!CheckCalibration(overflow, qpc, gtc)) { // We are back on GTC, QPC has been observed unreliable result = ms2mt(gtc) + sSkew; } #if 0 LOG(("TimeStamp: result = %1.2fms, diff = %1.4fms", mt2ms_d(result), mt2ms_d(diff))); #endif return AtomicStoreIfGreaterThan(&sLastResult, result); } // ---------------------------------------------------------------------------- // TimeDuration and TimeStamp implementation // ---------------------------------------------------------------------------- double TimeDuration::ToSeconds() const { // Converting before arithmetic avoids blocked store forward return double(mValue) / (double(sFrequencyPerSec) * 1000.0); } double TimeDuration::ToSecondsSigDigits() const { AutoCriticalSection lock(&sTimeStampLock); // don't report a value < mResolution ... LONGLONG resolution = sResolution; LONGLONG resolutionSigDigs = sResolutionSigDigs; LONGLONG valueSigDigs = resolution * (mValue / resolution); // and chop off insignificant digits valueSigDigs = resolutionSigDigs * (valueSigDigs / resolutionSigDigs); return double(valueSigDigs) / kNsPerSecd; } TimeDuration TimeDuration::FromMilliseconds(double aMilliseconds) { return TimeDuration::FromTicks(int64_t(ms2mt(aMilliseconds))); } TimeDuration TimeDuration::Resolution() { AutoCriticalSection lock(&sTimeStampLock); return TimeDuration::FromTicks(int64_t(sResolution)); } struct TimeStampInitialization { TimeStampInitialization() { TimeStamp::Startup(); } ~TimeStampInitialization() { TimeStamp::Shutdown(); } }; static TimeStampInitialization initOnce; nsresult TimeStamp::Startup() { // Decide which implementation to use for the high-performance timer. HMODULE kernelDLL = GetModuleHandleW(L"kernel32.dll"); sGetTickCount64 = reinterpret_cast (GetProcAddress(kernelDLL, "GetTickCount64")); if (!sGetTickCount64) { // If the platform does not support the GetTickCount64 (Windows XP doesn't), // then use our fallback implementation based on GetTickCount. sGetTickCount64 = GetTickCount64Fallback; } InitializeCriticalSectionAndSpinCount(&sTimeStampLock, kLockSpinCount); LARGE_INTEGER freq; BOOL QPCAvailable = ::QueryPerformanceFrequency(&freq); if (!QPCAvailable) { // No Performance Counter. Fall back to use GetTickCount. sFrequencyPerSec = 1; sCalibrationFlags.flags.fallBackToGTC = true; InitResolution(); LOG(("TimeStamp: using GetTickCount")); return NS_OK; } sFrequencyPerSec = freq.QuadPart; ULONGLONG qpc = PerformanceCounter(); sLastCalibrated = sGetTickCount64(); sSkew = qpc - ms2mt(sLastCalibrated); InitThresholds(); InitResolution(); sHasStableTSC = HasStableTSC(); LOG(("TimeStamp: initial skew is %1.2fms", mt2ms_d(sSkew))); return NS_OK; } void TimeStamp::Shutdown() { DeleteCriticalSection(&sTimeStampLock); } TimeStamp TimeStamp::Now() { if (sHasStableTSC) { return TimeStamp(uint64_t(PerformanceCounter())); } return TimeStamp(uint64_t(CalibratedPerformanceCounter())); } } // namespace mozilla