ntdll-NtAlertThreadByThreadId: Use a lock-free (list of) arrays to map thread IDs to addresses.

This commit is contained in:
Zebediah Figura
2020-12-28 12:22:10 -06:00
parent 350eb136a5
commit b8ca0eae9f
5 changed files with 165 additions and 250 deletions

View File

@ -1,156 +0,0 @@
From a9f438755a74e85cfb3b9dd122058c722ffa2f3c Mon Sep 17 00:00:00 2001
From: Zebediah Figura <z.figura12@gmail.com>
Date: Wed, 28 Oct 2020 17:31:00 -0500
Subject: [PATCH 08/13] ntdll: Use a separate lock to access the PE TEB list.
Signed-off-by: Zebediah Figura <z.figura12@gmail.com>
---
dlls/ntdll/loader.c | 70 +++++++++++++++++++++++++++++++++++++++++----
1 file changed, 64 insertions(+), 6 deletions(-)
diff --git a/dlls/ntdll/loader.c b/dlls/ntdll/loader.c
index 45ad496fd64..26443a8c4f4 100644
--- a/dlls/ntdll/loader.c
+++ b/dlls/ntdll/loader.c
@@ -127,7 +127,6 @@ typedef struct _wine_modref
static UINT tls_module_count; /* number of modules with TLS directory */
static IMAGE_TLS_DIRECTORY *tls_dirs; /* array of TLS directories */
-LIST_ENTRY tls_links = { &tls_links, &tls_links };
static RTL_CRITICAL_SECTION loader_section;
static RTL_CRITICAL_SECTION_DEBUG critsect_debug =
@@ -184,6 +183,55 @@ static inline BOOL contains_path( LPCWSTR name )
return ((*name && (name[1] == ':')) || wcschr(name, '/') || wcschr(name, '\\'));
}
+/* We need to access this lock from inside the implementation of (process-local)
+ * synchronization functions, which makes actually scheduling more than a little
+ * difficult.
+ *
+ * On the other hand, we want to make this a read/write lock, not just to avoid
+ * lock contention, but also because lock contention is particularly expensive
+ * for a spinlock.
+ */
+LIST_ENTRY tls_links = { &tls_links, &tls_links };
+static LONG teb_spinlock;
+
+#define TEB_SPINLOCK_WRITE (-1)
+
+static inline void small_pause(void)
+{
+#ifdef __i386__
+ __asm__ __volatile__( "rep;nop" : : : "memory" );
+#else
+ __asm__ __volatile__( "" : : : "memory" );
+#endif
+}
+
+static void teb_list_rdlock(void)
+{
+ for (;;)
+ {
+ LONG old = teb_spinlock;
+ if (old != TEB_SPINLOCK_WRITE && InterlockedCompareExchange( &teb_spinlock, old + 1, old ) == old)
+ return;
+ small_pause();
+ }
+}
+
+static void teb_list_rdunlock(void)
+{
+ InterlockedDecrement( &teb_spinlock );
+}
+
+static void teb_list_wrlock(void)
+{
+ while (InterlockedCompareExchange( &teb_spinlock, TEB_SPINLOCK_WRITE, 0 ))
+ small_pause();
+}
+
+static void teb_list_wrunlock(void)
+{
+ InterlockedExchange( &teb_spinlock, 0 );
+}
+
#define RTL_UNLOAD_EVENT_TRACE_NUMBER 64
typedef struct _RTL_UNLOAD_EVENT_TRACE
@@ -991,13 +1039,18 @@ static SHORT alloc_tls_slot( LDR_DATA_TABLE_ENTRY *mod )
if (!new_ptr) return -1;
/* resize the pointer block in all running threads */
+ teb_list_rdlock();
for (entry = tls_links.Flink; entry != &tls_links; entry = entry->Flink)
{
TEB *teb = CONTAINING_RECORD( entry, TEB, TlsLinks );
void **old = teb->ThreadLocalStoragePointer;
void **new = RtlAllocateHeap( GetProcessHeap(), HEAP_ZERO_MEMORY, new_count * sizeof(*new));
- if (!new) return -1;
+ if (!new)
+ {
+ teb_list_rdunlock();
+ return -1;
+ }
if (old) memcpy( new, old, tls_module_count * sizeof(*new) );
teb->ThreadLocalStoragePointer = new;
#ifdef __x86_64__ /* macOS-specific hack */
@@ -1006,12 +1059,14 @@ static SHORT alloc_tls_slot( LDR_DATA_TABLE_ENTRY *mod )
TRACE( "thread %04lx tls block %p -> %p\n", (ULONG_PTR)teb->ClientId.UniqueThread, old, new );
/* FIXME: can't free old block here, should be freed at thread exit */
}
+ teb_list_rdunlock();
tls_dirs = new_ptr;
tls_module_count = new_count;
}
/* allocate the data block in all running threads */
+ teb_list_rdlock();
for (entry = tls_links.Flink; entry != &tls_links; entry = entry->Flink)
{
TEB *teb = CONTAINING_RECORD( entry, TEB, TlsLinks );
@@ -1026,6 +1081,7 @@ static SHORT alloc_tls_slot( LDR_DATA_TABLE_ENTRY *mod )
RtlFreeHeap( GetProcessHeap(), 0,
InterlockedExchangePointer( (void **)teb->ThreadLocalStoragePointer + i, new_ptr ));
}
+ teb_list_rdunlock();
*(DWORD *)dir->AddressOfIndex = i;
tls_dirs[i] = *dir;
@@ -3243,7 +3299,9 @@ void WINAPI LdrShutdownThread(void)
if (wm->ldr.TlsIndex != -1) call_tls_callbacks( wm->ldr.DllBase, DLL_THREAD_DETACH );
RtlAcquirePebLock();
+ teb_list_wrlock();
RemoveEntryList( &NtCurrentTeb()->TlsLinks );
+ teb_list_wrunlock();
if ((pointers = NtCurrentTeb()->ThreadLocalStoragePointer))
{
for (i = 0; i < tls_module_count; i++) RtlFreeHeap( GetProcessHeap(), 0, pointers[i] );
@@ -3480,6 +3538,10 @@ void WINAPI LdrInitializeThunk( CONTEXT *context, ULONG_PTR unknown2, ULONG_PTR
if (process_detaching) NtTerminateThread( GetCurrentThread(), 0 );
+ teb_list_wrlock();
+ InsertHeadList( &tls_links, &NtCurrentTeb()->TlsLinks );
+ teb_list_wrunlock();
+
RtlEnterCriticalSection( &loader_section );
wm = get_modref( NtCurrentTeb()->Peb->ImageBaseAddress );
@@ -3502,10 +3564,6 @@ void WINAPI LdrInitializeThunk( CONTEXT *context, ULONG_PTR unknown2, ULONG_PTR
imports_fixup_done = TRUE;
}
- RtlAcquirePebLock();
- InsertHeadList( &tls_links, &NtCurrentTeb()->TlsLinks );
- RtlReleasePebLock();
-
NtCurrentTeb()->FlsSlots = fls_alloc_data();
if (!attach_done) /* first time around */
--
2.29.2

View File

@ -1,56 +1,18 @@
From e27be2eef2738f6c7f213d70ee5554b052d07ec1 Mon Sep 17 00:00:00 2001
From c5833a431cf9ba59f1f04081a2be058833b4cbb8 Mon Sep 17 00:00:00 2001
From: Zebediah Figura <z.figura12@gmail.com>
Date: Mon, 2 Nov 2020 20:24:07 -0600
Subject: [PATCH 09/13] ntdll: Reimplement Win32 futexes on top of thread-ID
alerts.
Subject: [PATCH] ntdll: Reimplement Win32 futexes on top of thread-ID alerts.
Signed-off-by: Zebediah Figura <z.figura12@gmail.com>
---
dlls/ntdll/loader.c | 4 +-
dlls/ntdll/ntdll_misc.h | 2 +
dlls/ntdll/sync.c | 77 ++++++++++++++++++-
dlls/ntdll/sync.c | 147 ++++++++++++++++++++++++++++++++++-
dlls/ntdll/unix/loader.c | 3 -
dlls/ntdll/unix/sync.c | 162 ---------------------------------------
dlls/ntdll/unixlib.h | 6 +-
6 files changed, 79 insertions(+), 175 deletions(-)
4 files changed, 145 insertions(+), 173 deletions(-)
diff --git a/dlls/ntdll/loader.c b/dlls/ntdll/loader.c
index 26443a8c4f4..440a77d4063 100644
--- a/dlls/ntdll/loader.c
+++ b/dlls/ntdll/loader.c
@@ -205,7 +205,7 @@ static inline void small_pause(void)
#endif
}
-static void teb_list_rdlock(void)
+void teb_list_rdlock(void)
{
for (;;)
{
@@ -216,7 +216,7 @@ static void teb_list_rdlock(void)
}
}
-static void teb_list_rdunlock(void)
+void teb_list_rdunlock(void)
{
InterlockedDecrement( &teb_spinlock );
}
diff --git a/dlls/ntdll/ntdll_misc.h b/dlls/ntdll/ntdll_misc.h
index 41e8666a25c..6a46946b218 100644
--- a/dlls/ntdll/ntdll_misc.h
+++ b/dlls/ntdll/ntdll_misc.h
@@ -71,6 +71,8 @@ extern BOOL is_wow64 DECLSPEC_HIDDEN;
/* module handling */
extern LIST_ENTRY tls_links DECLSPEC_HIDDEN;
+extern void teb_list_rdlock(void) DECLSPEC_HIDDEN;
+extern void teb_list_rdunlock(void) DECLSPEC_HIDDEN;
extern FARPROC RELAY_GetProcAddress( HMODULE module, const IMAGE_EXPORT_DIRECTORY *exports,
DWORD exp_size, FARPROC proc, DWORD ordinal, const WCHAR *user ) DECLSPEC_HIDDEN;
extern FARPROC SNOOP_GetProcAddress( HMODULE hmod, const IMAGE_EXPORT_DIRECTORY *exports, DWORD exp_size,
diff --git a/dlls/ntdll/sync.c b/dlls/ntdll/sync.c
index 8df7015df9f..05bccf698b6 100644
index 8df7015df9f..be462f27881 100644
--- a/dlls/ntdll/sync.c
+++ b/dlls/ntdll/sync.c
@@ -37,6 +37,13 @@
@ -67,10 +29,70 @@ index 8df7015df9f..05bccf698b6 100644
/******************************************************************
* RtlRunOnceInitialize (NTDLL.@)
@@ -531,13 +538,48 @@ NTSTATUS WINAPI RtlSleepConditionVariableSRW( RTL_CONDITION_VARIABLE *variable,
@@ -531,13 +538,116 @@ NTSTATUS WINAPI RtlSleepConditionVariableSRW( RTL_CONDITION_VARIABLE *variable,
return status;
}
+/* The following functions define a lock-free array mapping thread IDs to
+ * values, which can be grown but not shrunk. We do this by allocating one slice
+ * of the array at a time, and storing a pointer to the next slice at the end.
+ *
+ * This is both for efficiency (we want this function to be as fast as possible)
+ * and because locking the TEB list is hard otherwise—we need to safely access
+ * the TEB list, but cannot do so using any of these synchronization primitives,
+ * and we may need to access the TEB list before being inserted into it (e.g.
+ * from heap locks, or the TEB list lock itself.)
+ */
+
+struct addr_wait_entry
+{
+ void *addr;
+ HANDLE tid;
+};
+
+struct addr_wait_array
+{
+ struct addr_wait_entry entries[(0x1000 - sizeof(struct addr_wait_entry *)) / sizeof(struct addr_wait_entry)];
+ struct addr_wait_array *next;
+};
+
+static struct addr_wait_array first_addr_wait_array;
+
+static struct addr_wait_entry *addr_wait_allocate_entry( HANDLE tid )
+{
+ struct addr_wait_array *array = &first_addr_wait_array;
+
+ for (;;)
+ {
+ struct addr_wait_array *new_array = NULL;
+ SIZE_T size = sizeof(*new_array);
+ unsigned int i;
+
+ for (;;)
+ {
+ for (i = 0; i < ARRAY_SIZE(array->entries); ++i)
+ {
+ if (!array->entries[i].tid && !InterlockedCompareExchangePointer( &array->entries[i].tid, tid, NULL ))
+ return &array->entries[i];
+ }
+
+ if (!array->next) break;
+ array = array->next;
+ }
+
+ if (NtAllocateVirtualMemory( NtCurrentProcess(), (void **)&new_array, 0, &size, MEM_COMMIT, PAGE_READWRITE ))
+ return NULL;
+
+ if (InterlockedCompareExchangePointer( (void **)&array->next, new_array, NULL ))
+ {
+ /* another thread beat us to it */
+ NtFreeVirtualMemory( NtCurrentProcess(), (void **)&new_array, &size, MEM_RELEASE );
+ }
+ /* start searching again from the new array */
+ array = array->next;
+ }
+}
+
+static BOOL compare_addr( const void *addr, const void *cmp, SIZE_T size )
+{
+ switch (size)
@ -95,6 +117,7 @@ index 8df7015df9f..05bccf698b6 100644
const LARGE_INTEGER *timeout )
{
- return unix_funcs->RtlWaitOnAddress( addr, cmp, size, timeout );
+ struct addr_wait_entry *entry = NtCurrentTeb()->ReservedForPerf;
+ NTSTATUS ret;
+
+ TRACE("addr %p cmp %p size %#Ix timeout %s\n", addr, cmp, size, debugstr_timeout( timeout ));
@ -102,65 +125,74 @@ index 8df7015df9f..05bccf698b6 100644
+ if (size != 1 && size != 2 && size != 4 && size != 8)
+ return STATUS_INVALID_PARAMETER;
+
+ InterlockedExchangePointer( &NtCurrentTeb()->ReservedForPerf, (void *)addr );
+ if (!entry)
+ {
+ if (!(entry = addr_wait_allocate_entry( NtCurrentTeb()->ClientId.UniqueThread )))
+ return STATUS_NO_MEMORY;
+ NtCurrentTeb()->ReservedForPerf = entry;
+ }
+
+ InterlockedExchangePointer( &entry->addr, (void *)addr );
+
+ if (!compare_addr( addr, cmp, size ))
+ {
+ InterlockedExchangePointer( &NtCurrentTeb()->ReservedForPerf, NULL );
+ InterlockedExchangePointer( &entry->addr, NULL );
+ return STATUS_SUCCESS;
+ }
+
+ ret = NtWaitForAlertByThreadId( addr, timeout );
+ InterlockedExchangePointer( &NtCurrentTeb()->ReservedForPerf, NULL );
+ ret = NtWaitForAlertByThreadId( NULL, timeout );
+ InterlockedExchangePointer( &entry->addr, NULL );
+ if (ret == STATUS_ALERTED) ret = STATUS_SUCCESS;
+ return ret;
}
/***********************************************************************
@@ -545,7 +587,20 @@ NTSTATUS WINAPI RtlWaitOnAddress( const void *addr, const void *cmp, SIZE_T size
@@ -545,7 +655,21 @@ NTSTATUS WINAPI RtlWaitOnAddress( const void *addr, const void *cmp, SIZE_T size
*/
void WINAPI RtlWakeAddressAll( const void *addr )
{
- return unix_funcs->RtlWakeAddressAll( addr );
+ LIST_ENTRY *entry;
+ struct addr_wait_array *array;
+ unsigned int i;
+
+ TRACE("%p\n", addr);
+
+ if (!addr) return;
+
+ teb_list_rdlock();
+ for (entry = tls_links.Flink; entry != &tls_links; entry = entry->Flink)
+ for (array = &first_addr_wait_array; array != NULL; array = array->next)
+ {
+ const TEB *teb = CONTAINING_RECORD( entry, TEB, TlsLinks );
+ if (teb->ReservedForPerf == addr)
+ NtAlertThreadByThreadId( teb->ClientId.UniqueThread );
+ for (i = 0; i < ARRAY_SIZE(array->entries); ++i)
+ {
+ if (array->entries[i].addr == addr)
+ NtAlertThreadByThreadId( array->entries[i].tid );
+ }
+ }
+ teb_list_rdunlock();
}
/***********************************************************************
@@ -553,5 +608,21 @@ void WINAPI RtlWakeAddressAll( const void *addr )
@@ -553,5 +677,22 @@ void WINAPI RtlWakeAddressAll( const void *addr )
*/
void WINAPI RtlWakeAddressSingle( const void *addr )
{
- return unix_funcs->RtlWakeAddressSingle( addr );
+ LIST_ENTRY *entry;
+ struct addr_wait_array *array;
+ unsigned int i;
+
+ TRACE("%p\n", addr);
+
+ if (!addr) return;
+
+ teb_list_rdlock();
+ for (entry = tls_links.Flink; entry != &tls_links; entry = entry->Flink)
+ for (array = &first_addr_wait_array; array != NULL; array = array->next)
+ {
+ const TEB *teb = CONTAINING_RECORD( entry, TEB, TlsLinks );
+ if (teb->ReservedForPerf == addr)
+ for (i = 0; i < ARRAY_SIZE(array->entries); ++i)
+ {
+ NtAlertThreadByThreadId( teb->ClientId.UniqueThread );
+ break;
+ if (array->entries[i].addr == addr)
+ {
+ NtAlertThreadByThreadId( array->entries[i].tid );
+ return;
+ }
+ }
+ }
+ teb_list_rdunlock();
}
diff --git a/dlls/ntdll/unix/loader.c b/dlls/ntdll/unix/loader.c
index 31cfb43d11f..5ab3121dd2c 100644

View File

@ -1,7 +1,7 @@
From 9e9f51741ee04bcf8123ae23241ba3c4f8c8b2cf Mon Sep 17 00:00:00 2001
From 9141708f675ec4aca0551249082dc7b494135bad Mon Sep 17 00:00:00 2001
From: Zebediah Figura <z.figura12@gmail.com>
Date: Mon, 31 Aug 2020 23:30:52 -0500
Subject: [PATCH 10/13] ntdll: Merge critsection.c into sync.c.
Subject: [PATCH] ntdll: Merge critsection.c into sync.c.
Signed-off-by: Zebediah Figura <z.figura12@gmail.com>
---
@ -573,7 +573,7 @@ index fe7d933c0fa..00000000000
- return STATUS_SUCCESS;
-}
diff --git a/dlls/ntdll/sync.c b/dlls/ntdll/sync.c
index 05bccf698b6..ea327172b86 100644
index be462f27881..b8465c53832 100644
--- a/dlls/ntdll/sync.c
+++ b/dlls/ntdll/sync.c
@@ -2,7 +2,7 @@
@ -593,9 +593,9 @@ index 05bccf698b6..ea327172b86 100644
static const char *debugstr_timeout( const LARGE_INTEGER *timeout )
{
@@ -626,3 +627,334 @@ void WINAPI RtlWakeAddressSingle( const void *addr )
@@ -696,3 +697,334 @@ void WINAPI RtlWakeAddressSingle( const void *addr )
}
}
teb_list_rdunlock();
}
+
+

View File

@ -1,3 +1,2 @@
Fixes: [50292] Process-local synchronization objects use private interfaces into the Unix library
Depends: server-Object_Types
Disabled: True