From 74daf13fb29f6147971a5e5fc63b3c795c5bffdd Mon Sep 17 00:00:00 2001
From: Rodrigo Kumpera <kumpera@users.noreply.github.com>
Date: Fri, 23 Mar 2018 09:43:30 -0700
Subject: [PATCH] Harden JIT job control against cctors deadlocking. (#7423)

* [mini] Don't wait for other threads to finish JITing if the current thread is running a cctor.

This eliminates a possible source of deadlocks found during AOT runs of Roslyn.

* [mini] Introduce a 1s timeout to JIT job control to ensure we don't simply deadlock in case of execution dependency bugs.

* [mini] fix spelling5DD
---
 mono/metadata/threads-types.h |  3 +++
 mono/metadata/threads.c       |  8 ++++++++
 mono/mini/mini-runtime.c      | 32 +++++++++++++++++++++++++++-----
 3 files changed, 38 insertions(+), 5 deletions(-)

diff --git a/mono/metadata/threads-types.h b/mono/metadata/threads-types.h
index b55aa6479a1..194d928e4ff 100644
--- a/mono/metadata/threads-types.h
+++ b/mono/metadata/threads-types.h
@@ -292,4 +292,7 @@ mono_thread_internal_describe (MonoInternalThread *internal, GString *str);
 gboolean
 mono_thread_internal_is_current (MonoInternalThread *internal);
 
+gboolean
+mono_threads_is_current_thread_in_protected_block (void);
+
 #endif /* _MONO_METADATA_THREADS_TYPES_H_ */
diff --git a/mono/metadata/threads.c b/mono/metadata/threads.c
index cf819395e0e..30743de78fa 100644
--- a/mono/metadata/threads.c
+++ b/mono/metadata/threads.c
@@ -257,6 +257,14 @@ mono_thread_get_abort_prot_block_count (MonoInternalThread *thread)
 	return (state & ABORT_PROT_BLOCK_MASK) >> ABORT_PROT_BLOCK_SHIFT;
 }
 
+gboolean
+mono_threads_is_current_thread_in_protected_block (void)
+{
+	MonoInternalThread *thread = mono_thread_internal_current ();
+
+	return mono_thread_get_abort_prot_block_count (thread) > 0;
+}
+
 void
 mono_threads_begin_abort_protected_block (void)
 {
diff --git a/mono/mini/mini-runtime.c b/mono/mini/mini-runtime.c
index 22dab7313d6..90fa3a452bf 100644
--- a/mono/mini/mini-runtime.c
+++ b/mono/mini/mini-runtime.c
@@ -1910,8 +1910,15 @@ typedef struct {
 	MonoCoopMutex lock;
 } JitCompilationData;
 
+/*
+Timeout, in millisecounds, that we wait other threads to finish JITing.
+This value can't be too small or we won't see enough methods being reused and it can't be too big to cause massive stalls due to unforseable circunstances.
+*/
+#define MAX_JIT_TIMEOUT_MS 1000
+
+
 static JitCompilationData compilation_data;
-static int jit_methods_waited, jit_methods_multiple, jit_methods_overload, jit_spurious_wakeups;
+static int jit_methods_waited, jit_methods_multiple, jit_methods_overload, jit_spurious_wakeups_or_timeouts;
 
 static void
 mini_jit_init_job_control (void)
@@ -1976,7 +1983,7 @@ wait_or_register_method_to_compile (MonoMethod *method, MonoDomain *domain)
 		mono_counters_register ("JIT compile waited others", MONO_COUNTER_INT|MONO_COUNTER_JIT, &jit_methods_waited);
 		mono_counters_register ("JIT compile 1+ jobs", MONO_COUNTER_INT|MONO_COUNTER_JIT, &jit_methods_multiple);
 		mono_counters_register ("JIT compile overload wait", MONO_COUNTER_INT|MONO_COUNTER_JIT, &jit_methods_overload);
-		mono_counters_register ("JIT compile spurious wakeups", MONO_COUNTER_INT|MONO_COUNTER_JIT, &jit_spurious_wakeups);
+		mono_counters_register ("JIT compile spurious wakeups or timeouts", MONO_COUNTER_INT|MONO_COUNTER_JIT, &jit_spurious_wakeups_or_timeouts);
 		inited = TRUE;
 	}
 
@@ -1993,9 +2000,14 @@ wait_or_register_method_to_compile (MonoMethod *method, MonoDomain *domain)
 
 		unlock_compilation_data ();
 		return FALSE;
-	} else if (jit_tls->active_jit_methods > 0) {
+	} else if (jit_tls->active_jit_methods > 0 || mono_threads_is_current_thread_in_protected_block ()) {
 		//We can't suspend the current thread if it's already JITing a method.
 		//Dependency management is too compilated and we want to get rid of this anyways.
+
+		//We can't suspend the current thread if it's running a protected block (such as a cctor)
+		//We can't rely only on JIT nesting as cctor's can be run from outside the JIT.
+
+		//Finally, he hit a timeout or spurious wakeup. We're better off just giving up and keep recompiling
 		++entry->compilation_count;
 		++jit_methods_multiple;
 		++jit_tls->active_jit_methods;
@@ -2015,7 +2027,7 @@ wait_or_register_method_to_compile (MonoMethod *method, MonoDomain *domain)
 			++entry->threads_waiting;
 
 			g_assert (entry->has_cond);
-			mono_coop_cond_wait (&entry->cond, &compilation_data.lock);
+			mono_coop_cond_timedwait (&entry->cond, &compilation_data.lock, MAX_JIT_TIMEOUT_MS);
 			--entry->threads_waiting;
 
 			if (entry->done) {
@@ -2023,7 +2035,17 @@ wait_or_register_method_to_compile (MonoMethod *method, MonoDomain *domain)
 				unlock_compilation_data ();
 				return TRUE;
 			} else {
-				++jit_spurious_wakeups;
+				//We hit the timeout or a spurious wakeup, fallback to JITing
+				g_assert (entry->ref_count > 1);
+				unref_jit_entry (entry);
+				++jit_spurious_wakeups_or_timeouts;
+
+				++entry->compilation_count;
+				++jit_methods_multiple;
+				++jit_tls->active_jit_methods;
+
+				unlock_compilation_data ();
+				return FALSE;
 			}
 		}
 	}
-- 
2.14.1