From 74daf13fb29f6147971a5e5fc63b3c795c5bffdd Mon Sep 17 00:00:00 2001 From: Rodrigo Kumpera <kumpera@users.noreply.github.com> Date: Fri, 23 Mar 2018 09:43:30 -0700 Subject: [PATCH] Harden JIT job control against cctors deadlocking. (#7423) * [mini] Don't wait for other threads to finish JITing if the current thread is running a cctor. This eliminates a possible source of deadlocks found during AOT runs of Roslyn. * [mini] Introduce a 1s timeout to JIT job control to ensure we don't simply deadlock in case of execution dependency bugs. * [mini] fix spelling5DD --- mono/metadata/threads-types.h | 3 +++ mono/metadata/threads.c | 8 ++++++++ mono/mini/mini-runtime.c | 32 +++++++++++++++++++++++++++----- 3 files changed, 38 insertions(+), 5 deletions(-) diff --git a/mono/metadata/threads-types.h b/mono/metadata/threads-types.h index b55aa6479a1..194d928e4ff 100644 --- a/mono/metadata/threads-types.h +++ b/mono/metadata/threads-types.h @@ -292,4 +292,7 @@ mono_thread_internal_describe (MonoInternalThread *internal, GString *str); gboolean mono_thread_internal_is_current (MonoInternalThread *internal); +gboolean +mono_threads_is_current_thread_in_protected_block (void); + #endif /* _MONO_METADATA_THREADS_TYPES_H_ */ diff --git a/mono/metadata/threads.c b/mono/metadata/threads.c index cf819395e0e..30743de78fa 100644 --- a/mono/metadata/threads.c +++ b/mono/metadata/threads.c @@ -257,6 +257,14 @@ mono_thread_get_abort_prot_block_count (MonoInternalThread *thread) return (state & ABORT_PROT_BLOCK_MASK) >> ABORT_PROT_BLOCK_SHIFT; } +gboolean +mono_threads_is_current_thread_in_protected_block (void) +{ + MonoInternalThread *thread = mono_thread_internal_current (); + + return mono_thread_get_abort_prot_block_count (thread) > 0; +} + void mono_threads_begin_abort_protected_block (void) { diff --git a/mono/mini/mini-runtime.c b/mono/mini/mini-runtime.c index 22dab7313d6..90fa3a452bf 100644 --- a/mono/mini/mini-runtime.c +++ b/mono/mini/mini-runtime.c @@ -1910,8 +1910,15 @@ typedef struct { MonoCoopMutex lock; } JitCompilationData; +/* +Timeout, in millisecounds, that we wait other threads to finish JITing. +This value can't be too small or we won't see enough methods being reused and it can't be too big to cause massive stalls due to unforseable circunstances. +*/ +#define MAX_JIT_TIMEOUT_MS 1000 + + static JitCompilationData compilation_data; -static int jit_methods_waited, jit_methods_multiple, jit_methods_overload, jit_spurious_wakeups; +static int jit_methods_waited, jit_methods_multiple, jit_methods_overload, jit_spurious_wakeups_or_timeouts; static void mini_jit_init_job_control (void) @@ -1976,7 +1983,7 @@ wait_or_register_method_to_compile (MonoMethod *method, MonoDomain *domain) mono_counters_register ("JIT compile waited others", MONO_COUNTER_INT|MONO_COUNTER_JIT, &jit_methods_waited); mono_counters_register ("JIT compile 1+ jobs", MONO_COUNTER_INT|MONO_COUNTER_JIT, &jit_methods_multiple); mono_counters_register ("JIT compile overload wait", MONO_COUNTER_INT|MONO_COUNTER_JIT, &jit_methods_overload); - mono_counters_register ("JIT compile spurious wakeups", MONO_COUNTER_INT|MONO_COUNTER_JIT, &jit_spurious_wakeups); + mono_counters_register ("JIT compile spurious wakeups or timeouts", MONO_COUNTER_INT|MONO_COUNTER_JIT, &jit_spurious_wakeups_or_timeouts); inited = TRUE; } @@ -1993,9 +2000,14 @@ wait_or_register_method_to_compile (MonoMethod *method, MonoDomain *domain) unlock_compilation_data (); return FALSE; - } else if (jit_tls->active_jit_methods > 0) { + } else if (jit_tls->active_jit_methods > 0 || mono_threads_is_current_thread_in_protected_block ()) { //We can't suspend the current thread if it's already JITing a method. //Dependency management is too compilated and we want to get rid of this anyways. + + //We can't suspend the current thread if it's running a protected block (such as a cctor) + //We can't rely only on JIT nesting as cctor's can be run from outside the JIT. + + //Finally, he hit a timeout or spurious wakeup. We're better off just giving up and keep recompiling ++entry->compilation_count; ++jit_methods_multiple; ++jit_tls->active_jit_methods; @@ -2015,7 +2027,7 @@ wait_or_register_method_to_compile (MonoMethod *method, MonoDomain *domain) ++entry->threads_waiting; g_assert (entry->has_cond); - mono_coop_cond_wait (&entry->cond, &compilation_data.lock); + mono_coop_cond_timedwait (&entry->cond, &compilation_data.lock, MAX_JIT_TIMEOUT_MS); --entry->threads_waiting; if (entry->done) { @@ -2023,7 +2035,17 @@ wait_or_register_method_to_compile (MonoMethod *method, MonoDomain *domain) unlock_compilation_data (); return TRUE; } else { - ++jit_spurious_wakeups; + //We hit the timeout or a spurious wakeup, fallback to JITing + g_assert (entry->ref_count > 1); + unref_jit_entry (entry); + ++jit_spurious_wakeups_or_timeouts; + + ++entry->compilation_count; + ++jit_methods_multiple; + ++jit_tls->active_jit_methods; + + unlock_compilation_data (); + return FALSE; } } } -- 2.14.1